diff --git a/.DS_Store b/.DS_Store index 156ca34711eaf46fe3192a05010af95e03ebcf38..ac17a7f7de5586de8c95a9a1930065647cf24fba 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/Data_Base.xlsx b/Data_Base.xlsx index 03aeda0b44da3c8f933e563c8d2f81b30e256c49..0fe14da3727fdc3fee8012c728e0c4064045add1 100644 Binary files a/Data_Base.xlsx and b/Data_Base.xlsx differ diff --git a/adrian.xlsx b/adrian.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..28be9191fb04c3a0d4fb986f355e4890a68e45bb Binary files /dev/null and b/adrian.xlsx differ diff --git a/matrice_categories.xlsx b/matrice_categories.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..079b1e51632cd27e4393fb9c47ee5661eca5fb1e Binary files /dev/null and b/matrice_categories.xlsx differ diff --git a/projet_en_groupe/.DS_Store b/projet_en_groupe/.DS_Store index d61b1e5fc16293879ed2e29cb960a6608b268bef..68e71897315fb61ef3125ffa17d5459815a9f12c 100644 Binary files a/projet_en_groupe/.DS_Store and b/projet_en_groupe/.DS_Store differ diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py index 5e801f301cdac3b039799693cd0cdc7a3de9f84c..4538b7b207d1b4117ddcd507e8b9ac91546d6d93 100644 --- a/projet_en_groupe/algorithme_netflix.py +++ b/projet_en_groupe/algorithme_netflix.py @@ -1,6 +1,6 @@ # ALL the imports import pandas as pd -import tabulate +from tabulate import tabulate import os import csv from fuzzywuzzy import process @@ -20,105 +20,98 @@ data_2 = pd.read_csv(file_path_2) # Show the catalog def catalog(data_1): - print(data_1.head(100)) - + # display the head of catalog for more you can export it in a csv file + subset_data = data_1.head(50) + table_data = [list(row) for row in subset_data.itertuples(index=False)] + headers = list(subset_data.columns) + print(tabulate(table_data, headers=headers, tablefmt="grid")) save_to_csv(data_1) - return - # Be careful, you need to ask each time if they want to save the list to a .csv + return -def movies(data_1): # register - films = data_1[data_1['type'] == 'Movie'] # Filter the data to include only movies - print(films) # Display movie titles +def movies(data_1): + # display the head of films for more you can export it in a csv file + films = data_1[data_1['type'] == 'Movie'] + subset_data = films.head(50) + table_data = [list(row) for row in subset_data.itertuples(index=False)] + headers = list(subset_data.columns) + print(tabulate(table_data, headers=headers, tablefmt="grid")) save_to_csv(films) - return # Be careful, you need to ask each time if they want to save the list to a .csv + return def series(data_1): - - series = data_1[data_1['type'] == 'TV Show'] # Filter the data to include only series - - print(series) # Display series titles - + # display the head of series for more you can export it in a csv file + series = data_1[data_1['type'] == 'TV Show'] + subset_data = series.head(50) + table_data = [list(row) for row in subset_data.itertuples(index=False)] + headers = list(subset_data.columns) + print(tabulate(table_data, headers=headers, tablefmt="grid")) save_to_csv(series) - return # Be careful, you need to ask each time if they want to save the list to a .csv - + return -def by_year(data_1): # be careful and/or !!!!! +def by_year(data_1): # this function is used to display the data (movies, series or both) by ascending or descending the release year filtered_data = filter_media_type(data_1) + sort_order = get_sort_order() + sorted_data = sort_data_by_year(filtered_data, sort_order) - if filtered_data is None: - return # Exit the function if filter_media_type returns None + table_data = [list(row) for row in sorted_data.head(50).itertuples(index=False)] + headers = list(sorted_data.columns) - while True: - print("1. Ascending") - print("2. Descending") - sort_type = input("Do you want to sort the years in ascending or descending order? enter the number : ") - - if sort_type == "1": - sorted_data = filtered_data.sort_values(by='release_year', ascending=True) - break # Sort type is valid, exit the loop - elif sort_type == "2": - sorted_data = filtered_data.sort_values(by='release_year', ascending=False) - break # Sort type is valid, exit the loop - else: - print("Invalid choice. Please enter 1 for ascending or 2 for descending.") - # Repeat the loop to ask for a valid input + print(tabulate(table_data, headers=headers, tablefmt="grid")) - print(sorted_data) - save_to_csv(sorted_data) + save_to_csv(sorted_data) # ask if the user want to save the data just shown - return # Be careful, you need to ask each time if they want to save the list to a .csv + return -def by_country(data_1): +def by_country(data_1) : filtered_data = filter_media_type(data_1) country_list = [] - for countries in filtered_data['country'].dropna().str.split(','): + for countries in filtered_data['country'].dropna().str.split(',') : for country in countries: cleaned_country = country.strip() # Remove leading and trailing spaces - if cleaned_country and cleaned_country not in country_list: + if cleaned_country and cleaned_country not in country_list : country_list.append(cleaned_country) - print("List of all available countries:") country_list.sort() - print(country_list) + print("List of all available countries:") + print(tabulate(enumerate(country_list, start=1), headers=['No.', 'Country'], tablefmt='pretty')) - while True: + + while True : country_input = input("Enter the name of the country to display movies and/or series: ") - + # Use FuzzyWuzzy to find the closest match matches = process.extractOne(country_input, country_list) - if matches[1] >= 80: # Adjust the similarity threshold as needed - country_input = matches[0] + if matches[1] >= 80 : # 80 is the threshold to compare the fuzzywuzzy used in precedent line + country_input = matches[0] # give the name of the country break - else: - closest_match = matches[0] + else : + closest_match = matches[0] # give the closest match of the value entered (wich is a country) print(f"Invalid country name. The closest match is: {closest_match}") - # Check if the entered country is correct - if country_input in country_list: + if country_input in country_list : # Check if the entered country is correct print(f"You selected: {country_input}") - else: + else : print(f"You entered: {country_input}, which is not in the list.") - country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)] + country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)] # view all the data ( of movies, series or both) for a coutnry given - if not country_data.empty: - print(country_data) + if not country_data.empty : + print(tabulate(country_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(country_data) - else: + else : print(f"No movies or series found for the country {country_input}.") return - # Be careful, you need to ask each time if they want to save the list to a .csv def genre(data_1): filtered_data = filter_media_type(data_1) @@ -126,287 +119,246 @@ def genre(data_1): genre_list = [] for genres in data_1['listed_in'].dropna().str.split(', '): for genre in genres: - if genre not in genre_list and genre != '': + if genre not in genre_list and genre != '' : genre_list.append(genre) - print("List of all possible genres:") genre_list.sort() - print(genre_list) + print("List of all possible genres:") + print(tabulate(enumerate(genre_list, start=1), headers=['No.', 'Genre'], tablefmt='pretty')) - while True: + while True : type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ") - # Use FuzzyWuzzy to find the closest match - matches = process.extractOne(type_input, genre_list) + matches = process.extractOne(type_input, genre_list) # Use FuzzyWuzzy to find the closest match - if matches[1] >= 80: # Adjust the similarity threshold as needed + if matches[1] >= 80 : # 80 is the threshold to compare the fuzzywuzzy used in precedent line type_input = matches[0] break - else: - closest_match = matches[0] + else : + closest_match = matches[0] # give the closest match of the value entered (wich is a type of movie/series) print(f"Invalid genre. The closest match is: {closest_match}") # Check if the entered genre is correct - if type_input in genre_list: + if type_input in genre_list : print(f"You selected: {type_input}") - else: + else : print(f"You entered: {type_input}, which is not in the list.") - type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] + type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] # used to filter the type in listed_in and display the data - if not type_data.empty: - print(type_data) + if not type_data.empty : + print(tabulate(type_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(type_data) - else: + else : print(f"No movies or series found for the type {type_input}.") return def duration(data_1): - # Filtrer par type de média - filtered_data = filter_media_type(data_1) + filtered_data = filter_media_type(data_1) # filter by media type (movies, series or both) - # Obtenir la liste de tous les genres possibles - genre_list = [] - for genres in data_1['listed_in'].dropna().str.split(', '): + genre_list = [] # Get list of all possible genres + for genres in data_1['listed_in'].dropna().str.split(', ') : for genre in genres: - if genre not in genre_list and genre != '': + if genre not in genre_list and genre != '' : genre_list.append(genre) - # Afficher la liste des genres possibles - print("List of all possible genres:") genre_list.sort() - print(genre_list) + print("List of all possible genres:") + print(tabulate(enumerate(genre_list, start=1), headers=['No.', 'Genre'], tablefmt='pretty')) - # Demander à l'utilisateur de saisir le genre - while True: - type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ") + while True : # loop until the user enter a right type + type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ") # Ask user to enter gender - # Utiliser FuzzyWuzzy pour trouver la correspondance la plus proche - matches = process.extractOne(type_input, genre_list) + matches = process.extractOne(type_input, genre_list) # Use FuzzyWuzzy to find the closest match - if matches[1] >= 80: # Ajuster le seuil de similarité si nécessaire + if matches[1] >= 80 : type_input = matches[0] break - else: + else : closest_match = matches[0] print(f"Invalid genre. The closest match is: {closest_match}") - # Vérifier si le genre saisi est correct - if type_input in genre_list: + if type_input in genre_list : # Check if the type entered is correct print(f"You selected: {type_input}") - else: + else : print(f"You entered: {type_input}, which is not in the list.") - # Demander le type de tri - print("What type of sorting do you want? ") - print("1. Ascending") - print("2. Descending") - - while True: - sort_order = input("Enter the sort type number (1/2): ") + sort_order = get_sort_order() - if sort_order in ['1', '2']: - break - else: - print("Invalid sort order. Please enter 1 for ascending or 2 for descending.") - - # Filtrer les données en fonction du genre saisi type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)].copy() - if not type_data.empty: + if not type_data.empty : print(f"\nDisplaying data for {type_input} sorted in {'ascending' if sort_order == '1' else 'descending'} order:") - # Extraire les valeurs numériques de la colonne 'duration' - type_data['duration'] = type_data['duration'].str.extract('(\\d+)').astype(float) + type_data['duration'] = type_data['duration'].str.extract('(\\d+)').astype(float) # Extract numeric values ​​from the 'duration' column - # Trier les données en fonction du type et de la durée - type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, sort_order == '1']) + type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, sort_order == '1']) # Sort data based on type and duration - # Convertir les valeurs de durée en texte formaté type_data_sorted['duration'] = type_data_sorted.apply( lambda row: f"{int(row['duration'])} min" if row['type'].lower() == 'movie' else f"{int(row['duration'])} Season", axis=1 - ) + ) # Convert duration values ​​to formatted text - print(type_data_sorted) + print(tabulate(type_data_sorted.head(50), headers='keys', tablefmt='pretty')) save_to_csv(type_data_sorted) - else: + else : print(f"No movies or series found for the type {type_input}.") return -def director(data_1): +def director(data_1) : filtered_data = filter_media_type(data_1) - director_list = [] - for dirs in data_1['director'].dropna().str.split(', '): - for director_name in dirs: - if director_name not in director_list and director_name != '': + director_list = [] # get all the director possible + for dirs in data_1['director'].dropna().str.split(', ') : + for director_name in dirs : + if director_name not in director_list and director_name != '' : director_list.append(director_name) - # Sort the director_list in alphabetical order - print("List of all possible directors: ") - director_list = sorted(director_list) - print(director_list) + print("List of all possible directors: ") # Sort the director_list in alphabetical order + director_list = sorted(director_list) # not displayed with tabulate because the number of director is too big + print(', '.join(director_list)) - while True: + while True : director_input = input("Enter the name of the director to display movies and/or series: ") - # Use FuzzyWuzzy to find the closest match - matches = process.extractOne(director_input, director_list) + matches = process.extractOne(director_input, director_list) # Use FuzzyWuzzy to find the closest match - if matches[1] >= 80: # Adjust the similarity threshold as needed + if matches[1] >= 80 : # comparison with the line fuzzyWuzzy before director_input = matches[0] break - else: + else : closest_match = matches[0] print(f"Invalid director name. The closest match is: {closest_match}") - # Check if the entered director is correct - if director_input in director_list: + if director_input in director_list : # Check if the entered director is correct print(f"You selected: {director_input}") - else: + else : print(f"You entered: {director_input}, which is not in the list.") - # Menu for sorting order - while True: - print("Select sorting order:") - print("1. Ascending") - print("2. Descending") - sort_order_input = input("Enter the number of your choice: ") - - if sort_order_input == '1': - sort_order_bool = True - break - elif sort_order_input == '2': - sort_order_bool = False - break - else: - print("Invalid input. Please enter '1' for Ascending or '2' for Descending.") + sort_order = get_sort_order() - order_text = 'ascending' if sort_order_bool else 'descending' + order_text = 'ascending' if sort_order == '1' else 'descending' director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)] - if not director_data.empty: + if not director_data.empty : print(f"\nDisplaying data for movies and/or series directed by {director_input} sorted by release year in {order_text} order:") - director_data_sorted = director_data.sort_values(by='release_year', ascending=sort_order_bool) - print(director_data_sorted) - save_to_csv(director_data_sorted) # Uncomment this line if you want to save to CSV - else: + director_data_sorted = sort_data_by_year(director_data, sort_order) + print(tabulate(director_data_sorted.head(50), headers='keys', tablefmt='pretty')) + save_to_csv(director_data_sorted) + else : print(f"No person found with the name {director_input}.") return + def actor(data_1): filtered_data = filter_media_type(data_1) actor_list = [] - for actors in data_1['cast'].dropna().str.split(', '): + for actors in data_1['cast'].dropna().str.split(', ') : for actor_name in actors: - if actor_name not in actor_list and actor_name != '': + if actor_name not in actor_list and actor_name != '' : actor_list.append(actor_name) actor_list = sorted(actor_list) - print("List of all possible actors: ") - print(actor_list) + print("List of all possible actors: ") # not displayed with tabulate because the number of actor is too big + print(', '.join(actor_list)) - while True: + while True : actor_input = input("Enter the name of the actor to display movies and/or series: ") - # Use FuzzyWuzzy to find the closest match - matches = process.extractOne(actor_input, actor_list) + matches = process.extractOne(actor_input, actor_list) # Use FuzzyWuzzy to find the closest match - if matches[1] >= 80: # Adjust the similarity threshold as needed + if matches[1] >= 80: actor_input = matches[0] break - else: + else : closest_match = matches[0] print(f"Invalid actor name. The closest match is: {closest_match}") - # Check if the entered actor is correct - if actor_input in actor_list: + if actor_input in actor_list : # Check if the entered actor is correct print(f"You selected: {actor_input}") - else: + else : print(f"You entered: {actor_input}, which is not in the list.") + sort_order = get_sort_order() + + order_text = 'ascending' if sort_order == '1' else 'descending' + actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)] - if not actor_data.empty: - print(f"\nDisplaying data for movies and/or series featuring {actor_input} sorted by release year in ascending order:") + if not actor_data.empty : + print(f"\nDisplaying data for movies and/or series featuring {actor_input} sorted by release year in {order_text} order:") - actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True) - print(actor_data_sorted) + actor_data_sorted = sort_data_by_year(actor_data, sort_order) + print(tabulate(actor_data_sorted.head(50), headers='keys', tablefmt='pretty')) save_to_csv(actor_data_sorted) - else: + else : print(f"No actor found with the name {actor_input}.") return -def specific_genre_director(data_1): - filtered_data = filter_media_type(data_1) - unique_directors = filtered_data['director'].unique() - # Convert elements to strings to handle potential float values - unique_directors = [str(director) for director in unique_directors] +def specific_genre_director(data_1) : + filtered_data = filter_media_type(data_1) - # Sort the unique_directors in alphabetical order - unique_directors = sorted(unique_directors) + director_list = [] # get all the director possible + for dirs in data_1['director'].dropna().str.split(', ') : + for director_name in dirs : + if director_name not in director_list and director_name != '' : + director_list.append(director_name) - print("List of all available directors:") - print(', '.join(unique_directors)) + director_list = sorted(director_list) + print("List of all available directors:") # not displayed with tabulate because number too big + print(', '.join(director_list)) - while True: + while True : director_input = input("Enter the name of the director to display movies and/or series: ") - # Use FuzzyWuzzy to find the closest match - director_matches = process.extractOne(director_input, unique_directors) + director_matches = process.extractOne(director_input, director_list) # Use FuzzyWuzzy to find the closest match - if director_matches[1] >= 80: # Adjust the similarity threshold as needed + if director_matches[1] >= 80 : director_input = director_matches[0] break - else: + else : closest_match = director_matches[0] print(f"Invalid director name. The closest match is: {closest_match}") - # Check if the entered director is correct - if director_input in unique_directors: + if director_input in director_list : # Check if the entered director is correct print(f"You selected: {director_input}") - else: + else : print(f"You entered: {director_input}, which is not in the list.") - # Get a list of all available types without duplicates - unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() + unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() # Get a list of all available types without duplicates - # Sort the unique_types in alphabetical order - unique_types = sorted(unique_types) + unique_types = sorted(unique_types) # Sort the unique_types in alphabetical order - print("\nList of all available types:") - print(', '.join(unique_types)) + print("\nList of all available types:") # display with tabulate + print(tabulate(enumerate(unique_types, start=1), headers=['No.', 'Genre'], tablefmt='pretty')) - while True: + while True : type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() - # Use FuzzyWuzzy to find the closest match - type_matches = process.extractOne(type_input, unique_types) + type_matches = process.extractOne(type_input, unique_types) # Use FuzzyWuzzy to find the closest match - if type_matches[1] >= 80: # Adjust the similarity threshold as needed + if type_matches[1] >= 80 : type_input = type_matches[0] break - else: + else : closest_match = type_matches[0] print(f"Invalid type. The closest match is: {closest_match}") - # Check if the entered type is correct - if type_input in unique_types: + if type_input in unique_types : # Check if the entered type is correct print(f"You selected: {type_input}") - else: + else : print(f"You entered: {type_input}, which is not in the list.") director_type_data = filtered_data[ @@ -414,11 +366,11 @@ def specific_genre_director(data_1): (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)) ] - if not director_type_data.empty: - # Display the count - count = len(director_type_data) + if not director_type_data.empty : + + count = len(director_type_data) # Display the count print(f"The director {director_input} has directed {count} movie(s) or series of type {type_input}.") - print(director_type_data) + print(tabulate(director_type_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(director_type_data) else: @@ -427,234 +379,220 @@ def specific_genre_director(data_1): return -def specific_genre_actor(data_1): +def specific_genre_actor(data_1) : filtered_data = filter_media_type(data_1) - # Get a list of unique actors - unique_actors = filtered_data['cast'].str.split(', ').explode().unique() + unique_actors = filtered_data['cast'].str.split(', ').explode().unique() # Get a list of unique actors unique_actors = [str(actor) for actor in unique_actors] - # Sort the unique_actors in alphabetical order - unique_actors = sorted(unique_actors) + unique_actors = sorted(unique_actors) # Sort the unique_actors in alphabetical order print("List of all available actors:") print(', '.join(unique_actors)) - # Input actor name with fuzzy matching - while True: + while True : # Input actor name with fuzzy matching actor_input = input("Enter the name of the actor to display movies and/or series: ") actor_matches = process.extractOne(actor_input, unique_actors) - if actor_matches[1] >= 80: + if actor_matches[1] >= 80 : # verify the fuzzy matching actor_input = actor_matches[0] break - else: + else : closest_match = actor_matches[0] print(f"Invalid actor name. The closest match is: {closest_match}") - if actor_input in unique_actors: + if actor_input in unique_actors : print(f"You selected: {actor_input}") - else: + else : print(f"You entered: {actor_input}, which is not in the list.") - # Get a list of all available types without duplicates - unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() + unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() # Get a list of all available types without duplicates - # Sort the unique_types in alphabetical order - unique_types = sorted(unique_types) + unique_types = sorted(unique_types) # Sort the unique_types in alphabetical order print("\nList of all available types:") print(', '.join(unique_types)) - # Input type with fuzzy matching - while True: + while True : # Input type with fuzzy matching type_input = input("Enter the type (romantic, action, drama, etc.): ") type_matches = process.extractOne(type_input, unique_types) - if type_matches[1] >= 80: + if type_matches[1] >= 80 : type_input = type_matches[0] break - else: + else : closest_match = type_matches[0] print(f"Invalid type. The closest match is: {closest_match}") - if type_input in unique_types: + if type_input in unique_types : print(f"You selected: {type_input}") - else: + else : print(f"You entered: {type_input}, which is not in the list.") - # Filter the data based on actor and type actor_type_data = filtered_data[ (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) & (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)) - ] + ] # Filter the data based on actor and type - if not actor_type_data.empty: + if not actor_type_data.empty : count = len(actor_type_data) print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.") - print(actor_type_data) + print(tabulate(actor_type_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(actor_type_data) - else: + else : print(f"No movies or series found for the actor {actor_input} and type {type_input}.") return - -# rating +# RATING FUNCTIONS # these are variables that needs to be registered in general not in a local function -notes = data_2.drop('show_id', axis = 1) -mean_type = notes.mean(axis = 1) * 100 -data_2['appreciation (%)'] = mean_type +data_2['appreciation (%)'] = 0 +notes = data_2.drop('show_id', axis=1) +sum_vals = notes.sum(axis=1) +data_2['appreciation (%)'] = round((sum_vals / notes.shape[1]) * 100, 2) def most_rated(data_1, data_2) : filtered_data = filter_media_type(data_1) - link_between = pd.merge(filtered_data,data_2, on='show_id') - link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False) + table_headers = ['show_id', 'title', 'type', 'appreciation (%)'] + table_data = link_between_sorted[table_headers] + print("Films et séries les mieux notés :") - print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']]) + print(tabulate(table_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(link_between_sorted) return -def most_rated_year(data_1, data_2): - # Display all available unique release years +def most_rated_year(data_1, data_2): # Display all available unique release years available_years = sorted(data_1['release_year'].unique()) print("Available years: ", available_years) - # Input year with validation - while True: + while True: # Input year with validation year_input = input("Enter a release year: ") try: - # Convert the input year to an integer year = int(year_input) - # Check if the entered year is in the available years if year in available_years: break else: print("Please enter a valid year from the available options.") + except ValueError: - print("Please enter a valid year.") + print("Please enter a valid year.") # Filter the data based on the release year - # Filter the data based on the release year filtered_data = filter_media_type(data_1[data_1['release_year'] == year]) + link_between = pd.merge(filtered_data, data_2, on='show_id') # Merge the dataframes on the 'show_id' key + link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False) - # Merge the DataFrames on the 'show_id' key - link_between = pd.merge(filtered_data, data_2, on='show_id') + table_headers = ['show_id', 'title', 'type', 'release_year', 'appreciation (%)'] # Define headers for the tabulated table - # Sort the DataFrame by the 'appreciation' column (in descending order) - link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False) + table_data = link_between_sorted[table_headers] # Extract relevant data for tabulation + + print(f"Top-rated shows for the year {year}:") # Print the top-rated shows for the year in a tabular format + print(tabulate(table_data.head(50), headers='keys', tablefmt='pretty')) + + save_to_csv(link_between_sorted) # Save the sorted data to CSV - print(f"Top-rated shows for the year {year}:") - print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']]) - save_to_csv(link_between_sorted) return -def most_rated_recent(data_1, data_2): - # Merge the DataFrames on the 'show_id' key - merged_data = pd.merge(data_1, data_2, on='show_id') +def most_rated_recent(data_1, data_2) : # we chose to display the 20 newest and highest rated movies + merged_data = pd.merge(data_1, data_2, on='show_id') # merge the dataframes on the 'show_id' key + sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False]) # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order) + top_50_data = sorted_data.head(50) # Display the most rated and recent shows + + table_headers = ['show_id', 'title', 'type', 'release_year', 'appreciation (%)'] + table_data = top_50_data[table_headers] - # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order) - sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False]) + print("Top 50 most rated and recent shows:") + print(tabulate(table_data, headers='keys', tablefmt='pretty')) - # Display the most rated and recent shows - top_20_data = sorted_data.head(20) - print("Top 20 most rated and recent shows:") - print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']]) - save_to_csv(top_20_data) + save_to_csv(top_50_data) return -# Example usage -def parental_code(data_1): +# PARENTAL CODE FUNCTION +def parental_code(data_1) : valid_codes = set(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R', 'TV-G', 'G', 'NC-17', 'NR', 'TV-Y7-FV', 'UR']) - # there is a problem in the csv (values ​​which should not be there). So we sorted + # there is an issue in the csv (values ​​which should not be there) more explanation with the next lines. So we sorted cause we are not allowed to modify the csv files. + + filtered_data = data_1[data_1['rating'].isin(valid_codes)] # Filter out entries that are not valid parental codes + # the data needed to be filtered because there is a bug in the csv file with the comma. There were minutes values in the parental codes - # Filter out entries that are not valid parental codes - filtered_data = data_1[data_1['rating'].isin(valid_codes)] print("Valid parental codes:") - print(', '.join(valid_codes)) + print(tabulate(enumerate(valid_codes, start=1), headers=['No.', 'Parental Code'], tablefmt='pretty')) while True: - # Ask the user to enter a parental code - selected_code = input("Enter a parental code to display movies and/or series: ") + selected_code = input("Enter a parental code to display movies and/or series: ") # Ask the user to enter a parental code - # Filter the data based on the selected parental code - if selected_code in valid_codes: + if selected_code in valid_codes: # Filter the data based on the selected parental code result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)] if not result_data.empty: - print(result_data) + print(tabulate(result_data.head(50), headers='keys', tablefmt='pretty')) save_to_csv(result_data) else: print(f"No movies or series found for the parental code {selected_code}.") break else: print("Invalid parental code entered. Please enter a valid code.") - + return -def directors_nationality(data_1): +def directors_nationality(data_1) : - # Extract unique directors and their respective nationalities - directors_nationality_dict = {} + directors_nationality_dict = {} # Extract unique directors and their respective nationalities country_nationalities_set = set() for index, row in data_1.iterrows(): directors = str(row['director']).split(', ') if pd.notna(row['director']) else [] nationality = str(row['country']).split(',') - # Add unique nationalities from 'country' column to the set, excluding 'nan' - unique_nationalities = set(filter(lambda x: pd.notna(x) and x.lower() != 'nan', map(str.strip, nationality))) + unique_nationalities = set(filter(lambda x: pd.notna(x) and x.lower() != 'nan', map(str.strip, nationality))) # Add unique nationalities from 'country' column to the set, excluding 'nan' country_nationalities_set.update(unique_nationalities) - for director in directors: + for director in directors : director = director.strip() - if director in directors_nationality_dict: - # Add unique nationalities only if they are not already present - directors_nationality_dict[director]['nationalities'].update(unique_nationalities) + if director in directors_nationality_dict : + directors_nationality_dict[director]['nationalities'].update(unique_nationalities) # Add unique nationalities only if they are not already present directors_nationality_dict[director]['number of movies or series'] += 1 - else: + else : directors_nationality_dict[director] = {'nationalities': set(unique_nationalities), 'number of movies or series': 1} - # Sort the directors by the number of movies and series produced - sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True) + sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True) # sort the directors by the number of movies and series produced - # Create a DataFrame - columns = ['director', 'nationalities', 'number of movies or series'] + columns = ['director', 'nationalities', 'number of movies or series'] # Create a DataFrame with pandas to "SHOW" the output to the user. directors_df = pd.DataFrame([[director, ', '.join(info['nationalities']), info['number of movies or series']] for director, info in sorted_directors], columns=columns) - # Display the list of directors and their nationalities + columns = ['Director', 'Nationalities', 'Number of Movies or Series'] + directors_df = pd.DataFrame([[str(director) if pd.notna(director) else 'Unknown', + ', '.join(str(n) for n in info['nationalities']), + info['number of movies or series']] for director, info in sorted_directors], + columns=columns) + print("Directors and their nationalities, sorted by the number of movies and series produced:") - for i, (director, info) in enumerate(sorted_directors[:25]): - director_name = str(director) if pd.notna(director) else 'Unknown' - nationalities_str = ', '.join(str(n) for n in info['nationalities']) - print(f"{i+1}. {director_name}: {nationalities_str} - {info['number of movies or series']} movies/series") + print(tabulate(directors_df.head(25), headers='keys', tablefmt='grid')) # Display using tabulate - # Save to CSV using the DataFrame - save_to_csv(directors_df) + save_to_csv(directors_df) # Save to CSV using the pandas DataFrame return directors_df - -# Allow to filter if we want movie, tv show or both -def filter_media_type(data): +# Allow to filter if the user want movie, tv show or both +def filter_media_type(data) : while True: print("Select the type of media:") print("1. Movie") print("2. TV Show") print("3. Both") - + media_choice = input("Enter the corresponding number : ") if media_choice in ['1', '2', '3']: @@ -667,22 +605,35 @@ def filter_media_type(data): else: print("Invalid choice. Please enter a valid number.") +# Used to sort by ascending or descending (depending on the preference of the user) +def get_sort_order() : + while True : + print("1. Ascending") + print("2. Descending") + sort_type = input("Enter the number of sort order : ") + + if sort_type in ['1', '2'] : + return sort_type + else: + print("Invalid choice. Please enter 1 for ascending or 2 for descending.") + +def sort_data_by_year(data, sort_order) : + sorted_data = data.sort_values(by='release_year', ascending=(sort_order == '1')) + return sorted_data -# Example usage +# STATISTICS def basic_statistics(data_1): - # Check if the 'type' and 'country' columns exist in the dataset + if 'type' not in data_1.columns or 'country' not in data_1.columns: print("The dataset does not contain the necessary columns.") return - # Count the number of movies and series movies_count = len(data_1[data_1['type'] == 'Movie']) series_count = len(data_1[data_1['type'] == 'TV Show']) print(f"Number of movies in the catalog: {movies_count}") print(f"Number of series in the catalog: {series_count}") - # Compare the number of movies and series if movies_count > series_count: print("There are more movies than series in the catalog.") elif movies_count < series_count: @@ -690,40 +641,39 @@ def basic_statistics(data_1): else: print("The catalog has an equal number of movies and series.") - # List countries that produced movies/series from most productive to least country_counts = data_1['country'].str.split(', ').explode().value_counts() + country_table = tabulate(country_counts.reset_index().head(50), headers=['Country', 'Count'], tablefmt='grid') + print("\nCountries that produced movies/series, sorted from most to least productive:") - print(country_counts) + print(country_table) + return - # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv +# Allow to register a CSV file each time there was a 'show' as instruction def save_to_csv(data, default_filename='output.csv'): while True: - # Ask if the user wants to save to a CSV file + save_choice = input("Do you want to save the data to a CSV file? (YES/NO): ").upper() if save_choice == 'YES': - # Prompt for a file name - file_name = input("Enter the file name (DO NOT include .csv extension, or press Enter for the default): ") + + file_name = input("Enter the file name (DO NOT include .csv extension, or press Enter for the default): ") # Prompt for a file name file_name = file_name + ".csv" if not file_name: file_name = default_filename - # Check if the file already exists - if os.path.exists(file_name): + if os.path.exists(file_name): # Check if the file already exists while True: - # Ask if the user wants to overwrite or create a new file - overwrite_choice = input(f"The file '{file_name}' already exists. Do you want to overwrite it? (YES/NO): ").upper() + overwrite_choice = input(f"The file '{file_name}' already exists. Do you want to overwrite it? (YES/NO): ").upper() # Ask if the user wants to overwrite or create a new file if overwrite_choice == 'YES': - # Overwrite the existing file - data.to_csv(file_name, index=False) + data.to_csv(file_name, index=False) # Overwrite the existing file print(f"Data saved to {file_name}") break - elif overwrite_choice == 'NO': - # Prompt for a new file name + + elif overwrite_choice == 'NO': # Prompt for a new file name new_filename = input("Enter a new file name (DO NOT include .csv extension): ") new_filename = new_filename + ".csv" data.to_csv(new_filename, index=False) @@ -733,8 +683,7 @@ def save_to_csv(data, default_filename='output.csv'): print("Invalid choice. Please enter either 'YES' or 'NO'.") else: - # Save to a new file - data.to_csv(file_name, index=False) + data.to_csv(file_name, index=False) # Save to a new file .csv print(f"Data saved to {file_name}") break @@ -746,18 +695,15 @@ def save_to_csv(data, default_filename='output.csv'): print("Invalid choice. Please enter either 'YES' or 'NO.'") - -# début de l'algorithme de recommandation - -# Load the CSV file -categories = [] +# ALGO RECOMMENDATION +categories = [] # categories are defined on a global level def read_movie_series_info(file_path): catalog = {} with open(file_path, 'r', encoding='utf-8') as info_file: info_reader = csv.reader(info_file) - next(info_reader) # Skip header row + next(info_reader) # Skip header row. for row in info_reader: show_id, show_type, title, director, cast, country, date_added, release_year, rating, duration, listed_in, description = row catalog[show_id] = [title, listed_in.split(', ')] @@ -769,104 +715,97 @@ def read_user_ratings(file_path): ratings_reader = csv.reader(ratings_file) header = next(ratings_reader) # Skip header row user_ids = list(map(int, header[1:])) - + for row in ratings_reader: show_id = row[0] user_ratings = list(map(int, row[1:])) ratings[show_id] = dict(zip(user_ids, user_ratings)) - + return ratings def create_category_matrix(catalog, categories, output_file_path): - # Créez la matrice sans les noms de catégories - category_matrix = [[0 for _ in range(len(categories))] for _ in range(len(categories))] + category_matrix = [[0 for _ in range(len(categories))] for _ in range(len(categories))] # create the matrix category without the names - # Remplissez la matrice avec les données - for show_id, movie_categories in catalog.items(): + for show_id, movie_categories in catalog.items(): # fill up the matrix for i in range(len(categories)): if categories[i] in movie_categories[1]: for j in range(len(categories)): - if categories[j] in movie_categories[1]: - # Assurez-vous que les indices sont valides avant d'incrémenter + if categories[j] in movie_categories[1]: # verify if index is correct if i < len(category_matrix) and j < len(category_matrix[i]): category_matrix[i][j] += 1 - # Ajoutez les noms de catégories à la première ligne et à la première colonne du DataFrame - category_matrix_with_names = [[category] + row for category, row in zip(categories, category_matrix)] + category_matrix_with_names = [[category] + row for category, row in zip(categories, category_matrix)] # ADD names of categories df = pd.DataFrame(category_matrix_with_names, columns=[''] + categories) - # Enregistrez le DataFrame dans un fichier Excel avec les noms de colonnes et de lignes - df.to_excel(output_file_path, index=False) + df.to_excel(output_file_path, index=False) # register the dataframe in an excel because it's more readable than a matrix return category_matrix -def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.9999): - global categories # Déclarer categories en tant que variable globale - categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) # permet d'actualiser la variable catégories global au niveau local +def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.9999): # Give recommended movies according to a threshold of similarity. + # The threshold is very high to be very restrictive because all the users have shown loads of series and movies. allows us to be stricter on the recommendations + global categories # Declare categories as a global variable + categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) # allows you to update the global categories variable at the local level user_id = int(user_id) # Convertir user_id en entier suggestions = {} category_index = {} - # Créer le dictionnaire pour stocker les indices des catégories - category_index = {category: i for i, category in enumerate(categories)} + category_index = {category: i for i, category in enumerate(categories)} # Create the dictionary to store category indices - # Ajout de l'affectation manquante - user_categories = categories + user_categories = categories # Added missing assignment - for show_id, categories in catalog.items(): - # Check if the user has rated the show - if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0: - # Liste des catégories communes entre le film/série et les films/séries notés par l'utilisateur + for show_id, categories in catalog.items(): # Check if the user has rated the movie of series + + if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0: # verify the ratings given by the user + # List of categories common between the film/series and the films/series rated by the user common_categories = [category for category in categories[1] if category in user_categories] if common_categories: - # Calculez la similarité entre le film/série et les films/séries notés par l'utilisateur + # Calculate the similarity between the movie/series and the movies/series rated by the user similarity = sum( min(category_matrix[category_index[category]][category_index[user_category]] for user_category in common_categories) for category in categories[1] ) - # Ne recommandez que des films/séries dont la similarité dépasse le seuil spécifié + # Only recommend movies/series whose similarity exceeds the specified threshold if similarity > threshold: suggestions[show_id] = {'title': catalog[show_id][0], 'similarity': similarity} - # Triez les suggestions par similarité décroissante + # Sort suggestions by decreasing similarity sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1]['similarity'], reverse=True) - return sorted_suggestions[:5] + return sorted_suggestions[:5] # choosen 5 based on the instructions given def recommandation_algorithm() : # Replace file_path_1 and file_path_2 with the actual file paths - file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv" - file_path_2 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv" + file_path_1 = "/content/drive/MyDrive/Coding_project_netflix_2023/netflix_titles-2.csv" + file_path_2 = "/content/drive/MyDrive/Coding_project_netflix_2023/ratings.csv" - while True: + while True : user_id = input("Quel est ton user ? ") try: user_id = int(user_id) - # Check if user_id is between 1 and 100 - if 1 <= user_id <= 100: - break # Sort de la boucle si l'identifiant est valide + if 1 <= user_id <= 100 : # Check if user_id is between 1 and 100 + break # break if user id is valid else: print("L'identifiant de l'utilisateur doit être compris entre 1 et 100.") - except ValueError as e: + except ValueError as e : print(f"Veuillez entrer un identifiant d'utilisateur valide. Erreur: {e}") - # Read data from CSV files - catalog = read_movie_series_info(file_path_1) - ratings = read_user_ratings(file_path_2) + catalog = read_movie_series_info(file_path_1) # call the function read_movie_series_info + ratings = read_user_ratings(file_path_2) # call the function read_user_ratings + # Create category matrix categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) output_file_path = "matrice_categories.xlsx" category_matrix = create_category_matrix(catalog, categories, output_file_path) - + # Display movies already viewed by the user print("Films déjà vus par l'utilisateur:") @@ -874,7 +813,7 @@ def recommandation_algorithm() : if user_id in user_rating and user_rating[user_id] > 0: print(f"- {catalog[show_id][0]}") - # Recommend movies + # Recommend movies that the user hasn't seen yet recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5) # Display top 5 recommendations @@ -883,73 +822,77 @@ def recommandation_algorithm() : print(f"Title: {info['title']}, Similarity: {info['similarity']}") -# Création du menu -def action() : - print("Here are the different options available:") - print("1. View the entire catalog") - print("2. View all movies in the catalog") - print("3. View all series in the catalog") - print("4. View all series, movies or both by year") - print("5. View all series, movies or both by country") - print("6. View all series, movies or both by type") - print("7. View all series, movies or both by type sorted by duration") - print("8. View series, movies or both directed by a specific director and sorted by year") - print("9. View series, movies or both featuring a specific actor and sorted by year") - print("10. View how many series, movies or both and series directed by a director in a specific genre") - print("11. View how many series, movies or both an actor has played in") - print("12. Display the highest-rated series, movies or both") - print("13. Display the highest-rated series, movies or both for a specific year") - print("14. Display recent highest-rated series, movies or both") - print("15. Display movies and series based on parental control code") - print("16. Display the nationalities of directors and sort the list based on the number of movies and series directed") - print("17. Display basic statistics") - print("18. Get Personalized Recommendations") - print("STOP to stop") - command = input("Enter the number of what you want to do: ") - - if command == "1" : - catalog(data_1) - elif command == "2" : - movies(data_1) - elif command == "3" : - series(data_1) - elif command == "4" : - by_year(data_1) - elif command == "5" : - by_country(data_1) - elif command == "6" : - genre(data_1) - elif command == "7" : - duration(data_1) - elif command == "8" : - director(data_1) - elif command == "9" : - actor(data_1) - elif command == "10" : - specific_genre_director(data_1) - elif command == "11" : - specific_genre_actor(data_1) - elif command == "12" : - most_rated(data_1, data_2) - elif command == "13" : - most_rated_year(data_1, data_2) - elif command == "14" : - most_rated_recent(data_1, data_2) - elif command == "15" : - parental_code(data_1) - elif command == "16" : - directors_nationality(data_1) - elif command == "17" : - basic_statistics(data_1) - elif command == "18" : - recommandation_algorithm() - elif command.upper() == "STOP" : - return False - - - - # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv +# Menu +def action(): + print("Here are the different options available:") + options = [ + "View the entire catalog", + "View all movies in the catalog", + "View all series in the catalog", + "View all series, movies or both by year", + "View all series, movies or both by country", + "View all series, movies or both by type", + "View all series, movies or both by type sorted by duration", + "View series, movies or both directed by a specific director and sorted by year", + "View series, movies or both featuring a specific actor and sorted by year", + "View how many series, movies or both and series directed by a director in a specific genre", + "View how many series, movies or both an actor has played in", + "Display the highest-rated series, movies or both", + "Display the highest-rated series, movies or both for a specific year", + "Display recent highest-rated series, movies or both", + "Display movies and series based on parental control code", + "Display the nationalities of directors and sort the list based on the number of movies and series directed", + "Display basic statistics", + "Get Personalized Recommendations", + "STOP to stop" + ] + # Create a list of lists for tabulate + table = [[i + 1, option] for i, option in enumerate(options)] + + # Print the tabulated menu + print(tabulate(table, headers=["Options", "Descriptions"], tablefmt="grid", colalign=("center", "left"))) + + command = input("Enter the number of what you want to do: ") + + if command == "1" : + catalog(data_1) + elif command == "2" : + movies(data_1) + elif command == "3" : + series(data_1) + elif command == "4" : + by_year(data_1) + elif command == "5" : + by_country(data_1) + elif command == "6" : + genre(data_1) + elif command == "7" : + duration(data_1) + elif command == "8" : + director(data_1) + elif command == "9" : + actor(data_1) + elif command == "10" : + specific_genre_director(data_1) + elif command == "11" : + specific_genre_actor(data_1) + elif command == "12" : + most_rated(data_1, data_2) + elif command == "13" : + most_rated_year(data_1, data_2) + elif command == "14" : + most_rated_recent(data_1, data_2) + elif command == "15" : + parental_code(data_1) + elif command == "16" : + directors_nationality(data_1) + elif command == "17" : + basic_statistics(data_1) + elif command == "18" : + recommandation_algorithm() + elif command.upper() == "STOP" or "19" : + return False menu = [] diff --git a/projet_personnel/algorithme_gestion_etudiants.py b/projet_personnel/algorithme_gestion_etudiants.py index 208ace9e1804a8fd1c0a0b0c3c13620083429790..86b4be3457566deb7ac72f195353e4e731a175e0 100644 --- a/projet_personnel/algorithme_gestion_etudiants.py +++ b/projet_personnel/algorithme_gestion_etudiants.py @@ -38,27 +38,29 @@ campus_pattern = re.compile(r'^(Louvain-la-Neuve|Mons)$', re.IGNORECASE) curriculum_pattern = re.compile(r'^(INGM1BA|INGM2M)$', re.IGNORECASE) + +# function to reload data each time something is modified def reload_data(file_path): return pd.read_excel(file_path) -# REGISTER + +# function to REGISTER a new student in the data base def register_student(data): - # Setting up the patterns to follow for entering information while True: - user_input = input("Enter 'q' to quit or any other key to continue: ") + user_input = input("Enter 'q' to quit or any other key to continue: ") # manage issue if you have entered the wrong number if user_input.lower() == 'q': print("Operation canceled.") return - while True: - Firstname = input("What is the student's firstname? ") # Ask the user for the name + while True: # loop to verify if errors + Firstname = input("What is the student's firstname? ") if name_pattern.match(Firstname): # Check if the name matches the specified format print(f"The firstname is valid : {Firstname}") break # Exit the loop if the name is valid else: print("The firstname is not valid. Make sure to follow the specified format.") - while True: + while True: # loop to verify if errors Lastname = input("What is the student's lastname? ") # Ask the user for the surname if name_pattern.match(Lastname): print(f"The lastname is valid : {Lastname}") @@ -66,15 +68,15 @@ def register_student(data): else: print("The lastname is not valid. Make sure to follow the specified format.") - while True: - Date_of_birth = input("Please enter a date in the format dd/mm/yyyy: ") - if date_pattern.match(Date_of_birth): + while True: # loop to verify if errors + Date_of_birth = input("Please enter a date in the format dd/mm/yyyy: ") # Ask the user for the birthdate + if date_pattern.match(Date_of_birth): # check if the format is respected print(f"The date is valid : {Date_of_birth}") break # Exit the loop if the date is valid else: print("The date is not valid. Make sure to follow the format dd/mm/yyyy.") - while True: + while True: # loop to verify if errors Place_of_birth = input("What is the city of birth? ") if place_of_birth_pattern.match(Place_of_birth): print(f"The place of birth is valid : {Place_of_birth}") @@ -82,7 +84,7 @@ def register_student(data): else: print("The city of birth is not valid. Make sure to use only letters and spaces.") - while True: + while True: # loop to verify if errors Address = input("Please enter an address in the format 'street number, city': ") if address_pattern.match(Address): print(f"The address is valid : {Address}") @@ -90,7 +92,7 @@ def register_student(data): else: print("The address is not valid. Make sure to follow the format 'street number, city'.") - while True: + while True: # loop to verify if errors Telephone = input("What is the telephone number? (in the format 0000/00.00.00) ") if telephone_pattern.match(Telephone): print(f"The telephone number is valid : {Telephone}") @@ -99,11 +101,11 @@ def register_student(data): print("The telephone number is not valid. Make sure to follow the requested format.") print("The email address was completed automatically. If it is not suitable you can modify it later : ") - email_of_student = f"{Firstname}{'.'}{Lastname}{'@student.uclouvain.be'}" + email_of_student = f"{Firstname}{'.'}{Lastname}{'@student.uclouvain.be'}" # this is automaticaly attributed based on the firstname and lastname Email = email_of_student.lower() print(Email) - while True: + while True: # loop to verify if errors Gender = input("What is your gender? (For Male enter M, for Female enter F, and for another type enter O): ") if gender_pattern.match(Gender): print(f"The gender is valid : {Gender}") @@ -111,7 +113,7 @@ def register_student(data): else: print("The gender is not valid. Make sure to follow the requested format.") - while True: + while True: # loop to verify if errors Academic_year = input("What is your academic year? (BAC1/BAC2/BAC3/MA1/MA2): ") if academic_year_pattern.match(Academic_year): print(f"The academic year is valid : {Academic_year}") @@ -119,26 +121,28 @@ def register_student(data): else: print("The academic year is not valid. Make sure to follow the requested format.") - if Academic_year in ['BAC1', 'BAC2', 'BAC3'] : + if Academic_year in ['BAC1', 'BAC2', 'BAC3'] : # Curriculum is automatically attributed depeding on the academic year. Curriculum = "INGM1BA" else : Curriculum = "INGM2M" Courses_and_grade = {} if Academic_year == 'MA2': - for course in all_course_BAC1 + all_course_BAC2 + all_course_BAC3 + all_course_MA1 + all_course_MA2: + for course in all_course_BAC1 + all_course_BAC2 + all_course_BAC3 + all_course_MA1 + all_course_MA2: # for instance, if you are in MA2 you need to enter every gardes of each courses. grade = input(f"What is the grade for {course}? ") while not grade.isdigit() or int(grade) not in range(0, 21): print("The grade must be an integer between 0 and 20.") grade = input(f"What is the grade for {course}? ") Courses_and_grade[course] = int(grade) + elif Academic_year == 'MA1': - for course in all_course_BAC1 + all_course_BAC2 + all_course_BAC3 + all_course_MA1: + for course in all_course_BAC1 + all_course_BAC2 + all_course_BAC3 + all_course_MA1: # same as before every course unless MA2 because you are not in yet. grade = input(f"What is the grade for {course}? ") while not grade.isdigit() or int(grade) not in range(0, 21): print("The grade must be an integer between 0 and 20.") grade = input(f"What is the grade for {course}? ") Courses_and_grade[course] = int(grade) + elif Academic_year == 'BAC3': for course in all_course_BAC1 + all_course_BAC2 + all_course_BAC3: grade = input(f"What is the grade for {course}? ") @@ -146,6 +150,7 @@ def register_student(data): print("The grade must be an integer between 0 and 20.") grade = input(f"What is the grade for {course}? ") Courses_and_grade[course] = int(grade) + elif Academic_year == 'BAC2': for course in all_course_BAC1 + all_course_BAC2: grade = input(f"What is the grade for {course}? ") @@ -153,6 +158,7 @@ def register_student(data): print("The grade must be an integer between 0 and 20.") grade = input(f"What is the grade for {course}? ") Courses_and_grade[course] = int(grade) + elif Academic_year == 'BAC1': for course in all_course_BAC1: grade = input(f"What is the grade for {course}? ") @@ -162,18 +168,18 @@ def register_student(data): Courses_and_grade[course] = int(grade) while True: - Campus = input("Enter 'Louvain-la-Neuve' or 'Mons' depending on the campus: ") + Campus = input("Enter 'Louvain-la-Neuve' or 'Mons' depending on the campus: ") # Only 2 campus possible but it must respect the pattern of the campus if campus_pattern.match(Campus): print("The campus choice is valid.") break # Exit the loop if the campus choice is valid else: print("The campus choice is not valid. Make sure to follow the requested format.") - matricule = generate_matricule(Firstname, Lastname, Date_of_birth) + matricule = generate_matricule(Firstname, Lastname, Date_of_birth) # call the function matricule to generate a matricule - register(data, Firstname, Lastname, Curriculum, Date_of_birth, Place_of_birth, Address, Telephone, Email, Gender, Academic_year, Courses_and_grade, Campus, matricule) + register(data, Firstname, Lastname, Curriculum, Date_of_birth, Place_of_birth, Address, Telephone, Email, Gender, Academic_year, Courses_and_grade, Campus, matricule) # Call the function register to register a student with these informations - # Display the entered information + # Display the entered information by the user. print("\nEntered Information:") print(f"Firstname: {Firstname}") print(f"Lastname: {Lastname}") @@ -190,22 +196,22 @@ def register_student(data): print(f"Matricule: {matricule}") while True: - # Ask the user if they want to modify any information before registration + # Ask the user if they want to modify any information before registration because mistakes are always possible modify_info = input("Do you want to modify any information before registration? (YES/NO): ").upper() if modify_info == "YES": data = reload_data(file_path) - modify(data) # Call the modify function to update information + modify(data) # Call the modify function to update information (which may be false or incorrectly entered) break # Exit the loop after modification elif modify_info == "NO": - break # Exit the loop if no modification is needed + break # Exit the loop if no modification is needed. else: print("Invalid input. Please enter either 'YES' or 'NO'.") - return - + return +# the function is used to register the data in the file and to reload the file. def register(data, firstname, lastname, Curriculum, date_of_birth, place_of_birth, address, telephone, email, gender, academic_year, courses_and_grade, campus, matricule): student = { @@ -237,10 +243,11 @@ def register(data, firstname, lastname, Curriculum, date_of_birth, place_of_birt return data +# Used to generate the matricule of the student. def generate_matricule(firstname, lastname, date_of_birth): consonant_of_firstname = ''.join([c for c in firstname if c.lower() not in 'aeiou'])[:3] consonant_of_lastname = ''.join([c for c in lastname if c.lower() not in 'aeiou'])[:2] - last_consonant_of_lastname = ''.join([c for c in lastname if c.lower() not in 'aeiou']).lower()[-1] # ne sait pas si c'est la consonne du prénom ou du nom + last_consonant_of_lastname = ''.join([c for c in lastname if c.lower() not in 'aeiou']).lower()[-1] year_of_birth_string = str(date_of_birth)[-4:] random_integer = random.randint(0, 10) matricule = f"{consonant_of_lastname }{consonant_of_firstname}{last_consonant_of_lastname}{year_of_birth_string}{random_integer}" @@ -251,32 +258,32 @@ def generate_matricule(firstname, lastname, date_of_birth): # MODIFYING THE DATA OF A STUDENT def modify(data): while True: - matricule_to_modify = input("Enter the matricule of the student you want to modify (or 'q' to quit): ") - if matricule_to_modify.lower() == 'q': + matricule_to_modify = input("Enter the matricule of the student you want to modify (or 'q' to quit): ") # matricule is unique, that's why I based my function modify on it + if matricule_to_modify.lower() == 'q': # back to the menu if you entered the wrong number. print("Operation canceled.") break student_row = data[data['Matricule'] == matricule_to_modify.lower()] if student_row.empty: - print("No student found with the specified matricule.") + print("No student found with the specified matricule.") # the matricule encoded can be False. I didn't used the fuzzywuzzy because I wanted the matricule to be written correctly to modify information. return print("\nDetails of the student before modification:") - print(student_row[['Matricule', 'Firstname', 'Lastname']]) + print(student_row[['Matricule', 'Firstname', 'Lastname']]) # Better to give important information about the student before modifying it, to be sure it is him or her. - confirmation = input("Do you really want to modify this student? (YES/NO) ").upper() + confirmation = input("Do you really want to modify this student? (YES/NO) ").upper() # to make sure it's the right student if confirmation == "YES": student_index = student_row.index[0] - print("\nDetails of the student before modification:") + print("\nDetails of the student before modification:") # Give all the information about the student. for col in ['Matricule', 'Firstname', 'Lastname', 'Date of Birth', 'Place of Birth', 'Address', 'Telephone', 'Email', 'Gender', 'Academic Year', 'Curriculum', 'Campus']: print(f"{col}: {data.at[student_index, col]}") - for course, grade in data.iloc[student_index].items(): + for course, grade in data.iloc[student_index].items(): # Display all the course that the student has already done if course not in field_mapping.values(): print(f"{course}: {grade}") - print(f"\nModifying the student with matricule {matricule_to_modify}:\n") + print(f"\nModifying the student with matricule {matricule_to_modify}:\n") # display the information and what can be modified print("1. Firstname") print("2. Lastname") print("3. Date of Birth") @@ -300,9 +307,9 @@ def modify(data): if field_to_modify == 1: # If the field to modify is the name (Name) while True: firstname = input("What is the name you want to modify? ") - if name_pattern.match(firstname): + if name_pattern.match(firstname): # respect the pattern, same as register function print(f"The firstname is valid : {firstname}") - if field_to_modify in (1, 2, 3): + if field_to_modify in (1, 2, 3): # if you are modifying one of the 3 informations it will modify the matricule. matricule = generate_matricule(firstname, data.at[student_index, 'Lastname'], data.at[student_index, 'Date of Birth']) data.at[student_index, 'Firstname'] = firstname break # Exit the loop if the name is valid @@ -311,9 +318,9 @@ def modify(data): elif field_to_modify == 2: while True: # Ask the user for the surname lastname = input("What is the surname? ") - if name_pattern.match(lastname): + if name_pattern.match(lastname): # respect the pattern, same as register function print(f"The lastname is valid : {lastname}") - if field_to_modify in (1, 2, 3): + if field_to_modify in (1, 2, 3): # if you are modifying one of the 3 informations it will modify the matricule. matricule = generate_matricule(data.at[student_index, 'Firstname'], lastname, data.at[student_index, 'Date of Birth']) data.at[student_index, 'Lastname'] = lastname break # Exit the loop if the surname is valid @@ -322,9 +329,9 @@ def modify(data): elif field_to_modify == 3: while True: date_of_birth = input("Please enter a date in the format dd/mm/yyyy: ") - if date_pattern.match(date_of_birth): + if date_pattern.match(date_of_birth): # respect the pattern, same as register function print(f"The date is valid : {date_of_birth}") - if field_to_modify in (1, 2, 3): + if field_to_modify in (1, 2, 3): # if you are modifying one of the 3 informations it will modify the matricule. matricule = generate_matricule(data.at[student_index, 'Firstname'], data.at[student_index, 'Lastname'], date_of_birth) data.at[student_index, 'Date of Birth'] = date_of_birth break # Exit the loop if the date is valid @@ -333,7 +340,7 @@ def modify(data): elif field_to_modify == 4: while True: place_of_birth = input("What is the city of birth? ") - if place_of_birth_pattern.match(place_of_birth): + if place_of_birth_pattern.match(place_of_birth): # respect the pattern, same as register function print(f"The city of birth is valid : {place_of_birth}") data.at[student_index, 'Place of Birth'] = place_of_birth break # Exit the loop if the city of birth is valid @@ -342,7 +349,7 @@ def modify(data): elif field_to_modify == 5: while True: address = input("Please enter an address in the format 'street number, city': ") - if address_pattern.match(address): + if address_pattern.match(address): # respect the pattern, same as register function print(f"The address is valid : {address}") data.at[student_index, 'Address'] = address break # Exit the loop if the address is valid @@ -351,7 +358,7 @@ def modify(data): elif field_to_modify == 6: while True: telephone = input("What is the telephone number? (in the format 000/00.00.00) ") - if telephone_pattern.match(telephone): + if telephone_pattern.match(telephone): # respect the pattern, same as register function print(f"The telephone number is valid : {telephone}") data.at[student_index, 'Telephone'] = telephone break # Exit the loop if the telephone number is valid @@ -382,7 +389,7 @@ def modify(data): if academic_year_pattern.match(academic_year): print(f"The academic year is valid: {academic_year}") - if academic_year in ['BAC1', 'BAC2', 'BAC3']: + if academic_year in ['BAC1', 'BAC2', 'BAC3']: # it attributes automatically the curriculum based on the academic year curriculum = "INGM1BA" elif academic_year in ['MA1', 'MA2']: curriculum = "INGM2M" @@ -390,7 +397,7 @@ def modify(data): print("Invalid academic year for curriculum assignment.") continue - data.at[student_index, 'Academic Year'] = academic_year + data.at[student_index, 'Academic Year'] = academic_year # attributes the values in the column for the student concerned data.at[student_index, 'Curriculum'] = curriculum break # Exit the loop if the academic year is valid else: @@ -401,13 +408,13 @@ def modify(data): if curriculum_pattern.match(curriculum): if curriculum == 'INGM1BA': - valid_academic_years = ['BAC1', 'BAC2', 'BAC3'] + valid_academic_years = ['BAC1', 'BAC2', 'BAC3'] # defines the valid academic years elif curriculum == 'INGM2M': valid_academic_years = ['MA1', 'MA2'] academic_year = input(f"What is your academic year? ({'/'.join(valid_academic_years)}): ") - if academic_year in valid_academic_years and academic_year_pattern.match(academic_year): + if academic_year in valid_academic_years and academic_year_pattern.match(academic_year): # verify if the academic year and curriculum correspond print(f"The academic year for {curriculum} is valid: {academic_year}") data.at[student_index, 'Academic Year'] = academic_year data.at[student_index, 'Curriculum'] = curriculum @@ -418,25 +425,36 @@ def modify(data): print("Invalid curriculum. Make sure to follow the requested format.") elif field_to_modify == 11: # Courses and grades print("Courses already passed and their grade:\n") - for course, grade in data.iloc[student_index].items(): - if course not in field_mapping.values(): - print(f"{course}: {grade}") + courses = [course for course in data.iloc[student_index].index if course not in field_mapping.values()] + for i, course in enumerate(courses, start=1): + grade = data.at[student_index, course] + print(f"{i}. {course}: {grade}") while True: - course_to_modify = input("Enter the course you want to modify: ") - if course_to_modify in data.columns: # The course has been found, you can continue with the rest of your code - print("The specified course has been found.") - break # Exit the loop since the course has been found + course_number_to_modify = input("Enter the number of the course you want to modify: ") + + try: + course_number_to_modify = int(course_number_to_modify) + except ValueError: + print("Please enter a valid number.") + continue + + + if 1 <= course_number_to_modify <= len(courses): + course_to_modify = courses[course_number_to_modify - 1] + print(f"The specified course ({course_number_to_modify}) has been found: {course_to_modify}.") + break else: - print("The specified course has not been found. Please select a new one.") + print("Invalid course number. Please select a valid one.") new_grade = input(f"Enter the new grade for {course_to_modify}: ") + while not new_grade.isdigit() or int(new_grade) not in range(0, 21): print("The grade must be an integer between 0 and 20.") new_grade = input(f"Enter the new grade for {course_to_modify}: ") - # Update the grade data.at[student_index, course_to_modify] = int(new_grade) + elif field_to_modify == 12: while True: campus = input("Enter 'Louvain-la-Neuve' or 'Mons' depending on the campus: ") @@ -453,14 +471,14 @@ def modify(data): if matricule is not None: data.at[student_index, 'Matricule'] = matricule # Update the Matricule column with the new matricule - - print("Be careful because of a modification the registration number has changed :") - print(f"the new matricule is : {matricule}") + print("Be careful because of a modification the registration number has changed :") # Tell the user that the firstname,lastname or birthdate has been modified + + print(f"the new matricule is : {matricule}") # give the new matricule # Save the modified data to the Excel file - data.to_excel(file_path, index=False) - data = reload_data(file_path) + data.to_excel(file_path, index=False) # append the file with data modified + data = reload_data(file_path) # reload the data to update the data base. print("Modification successfully done.") # Mapping for field names to DataFrame column names @@ -480,16 +498,16 @@ field_mapping = { } -# DELETION OF A STUDENT BASED ON HIS MATRICULE BECAUSE IT'S UNIQUE +# DELETION OF A STUDENT based on his matricule because it's unique as the modification function. def delete(data): while True: user_input = input("Enter 'q' to quit or any other key to continue: ") if user_input.lower() == 'q': print("Operation canceled.") return - + matricule_to_delete = input("Enter the matricule of the student you want to delete: ") - student_index = data[data['Matricule'] == matricule_to_delete.lower()].index.tolist() + student_index = data[data['Matricule'] == matricule_to_delete.lower()].index.tolist() # no fuzzywuzzy because I wanted the matricule to be entered correctly if not student_index: print("No student found with the specified matricule.") @@ -498,24 +516,20 @@ def delete(data): student_index = student_index[0] print(f"\nDeleting the student with matricule {matricule_to_delete}:\n") - # Display the details of the student before deletion - print("Details of the student before deletion:") - print(data.iloc[student_index]) + print("Details of the student before deletion:") # Display the details of the student before deletion + print(data.iloc[student_index]) # give all the data of the student - confirmation = input("Do you really want to delete this student? (YES/NO) ").upper() + confirmation = input("Do you really want to delete this student? (YES/NO) ").upper() # to make sure the user really want to delete the student if confirmation == "YES": - # Delete the student - data = data.drop(index=student_index) + data = data.drop(index=student_index) # .drop allows the deletion - # Save the modified data to the Excel file - data.to_excel(file_path, index=False) + data.to_excel(file_path, index=False) # Save the modified data to the Excel file print("Deletion successful.") else: print("Deletion canceled.") - -# FIND +# FIND the student based on different search criteria def find_student(data): while True: user_input = input("Enter 'q' to quit or any other key to continue: ") @@ -526,10 +540,10 @@ def find_student(data): print("1. By his/her Lastname") print("2. By his/her Firstname") print("3. By his/her Matricule") - + while True: search_criteria = input("Enter the number of what you want to do: ") - + if search_criteria == "1": surname_search = input("Enter the lastname of the student: ") results = find_similar_data(data['Lastname'], surname_search, 'Lastname') @@ -544,7 +558,7 @@ def find_student(data): break else: print("Invalid search criteria. Please enter a valid number.") - + if results.empty: print("No student found with the specified criteria.") else: @@ -553,36 +567,46 @@ def find_student(data): print(f"Firstname: {row['Firstname']}, Lastname: {row['Lastname']}, Matricule: {row['Matricule']}") def find_similar_data(column, search_term, column_name): - # Using fuzzywuzzy to find similar values in the specified column - results = process.extractBests(search_term, column, score_cutoff=80) # You can adjust the score_cutoff as needed + + results = process.extractBests(search_term, column, score_cutoff=80) # Using fuzzywuzzy to find similar values in the specified column with a specified threshold. similar_values = [result[0] for result in results] - # Display suggestions if there are similar values - if similar_values: + if similar_values: # Display suggestions if there are similar values print(f"Suggestions for {column_name}: {', '.join(similar_values)}") - # Filter the DataFrame based on similar values - filtered_data = column.isin(similar_values) + filtered_data = column.isin(similar_values) # Filter the DataFrame based on similar values return data[filtered_data] -# SHOW +# SHOW all the student def filter_students(data): if data.empty: print("The list of students is empty.") else: table_data = [] for index, row in data.iterrows(): - table_data.append([row['Firstname'], row['Lastname'], row['Matricule']]) + table_data.append([row['Firstname'], row['Lastname'], row['Matricule']]) # the data that are displayed headers = ["Firstname", "Lastname", "Matricule"] - print(tabulate(table_data, headers=headers, tablefmt="pretty")) - - export_choice = input("Do you want to export the list to an Excel file? (YES/NO): ").upper() - if export_choice == "YES": - export_to_excel(data) + print(tabulate(table_data, headers=headers, tablefmt="pretty")) # using tabulate is better in term of display + + export_to_excel(data) # call the finction to export read_choice = input("Do you want to read the exported list from an Excel file? (YES/NO): ").upper() if read_choice == "YES": - read_exported_list() + read_exported_list() # read an exported file on the terminal + +def export_option(data): + while True: + export = input("Do you want to export in a file? (YES/NO) ").upper() + if export == "YES": + filename = input("Enter the filename without .xlsx: ") + filename = filename + '.xlsx' + export_data_to_excel(data, filename) + break + elif export == "NO": + print("Export canceled.") + break + else: + print("Invalid input. Please enter either 'YES' or 'NO'.") def export_to_excel(data): @@ -590,41 +614,47 @@ def export_to_excel(data): excel_filename += ".xlsx" try: - data.to_excel(excel_filename, index=False) + data.to_excel(excel_filename, index=False) # print(f"List successfully exported to {excel_filename}.") except Exception as e: print(f"An error occurred during export: {e}") -def read_exported_list(): +def read_exported_list(): # Enter the file name with the extension to display it in the terminal excel_filename = input("Enter the name of the Excel file to read (include .xlsx extension): ") try: - exported_data = pd.read_excel(excel_filename) + exported_data = pd.read_excel(excel_filename) # try to open the doc print("\nExported List:") print(exported_data) print("\nExported list successfully read from Excel.") - except FileNotFoundError: + except FileNotFoundError: # the doc can't be found that's an exception print(f"File not found: {excel_filename}") - except Exception as e: + except Exception as e: # sometimes the type of error is different and must be display to understand the issue print(f"An error occurred during reading: {e}") # SORTING def sort(data): - print('1. Sort in ascending alphabetical order') - print('2. Sort in descending alphabetical order') - print('3. Sort by date of birth') - print('4. Sort by age') - print('5. Sort by matricule') - print('6. Sort by academic year') - print('7. Get all people who passed a course') - print('8. Get all people who failed a course') - print('9. Get all Bachelor students') - print('10. Get all Master students') + print("Below are the sort criteria:") + sort_options = [ + ["1", "Sort in ascending alphabetical order"], + ["2", "Sort in descending alphabetical order"], + ["3", "Sort by date of birth"], + ["4", "Sort by age"], + ["5", "Sort by matricule"], + ["6", "Sort by academic year"], + ["7", "Get all people who passed a course"], + ["8", "Get all people who failed a course"], + ["9", "Get all Bachelor students"], + ["10", "Get all Master students"] + ] + + print(tabulate(sort_options, headers=["Option", "Description"], tablefmt="pretty", colalign=("center", "left"))) + sorting_choice = input("Enter the number of what you want to do: ") if sorting_choice == "1": - sort_ascending(data) + sort_ascending(data) # all the differents call / type of sorting elif sorting_choice == "2": sort_descending(data) elif sorting_choice == "3": @@ -645,20 +675,21 @@ def sort(data): sort_master(data) return +# all the export option is done with all the data (Firstname,...,Address, and all the course) sorting with a specific type +# all the show option is done with the most important informations about the students. def sort_ascending(data): - sorted_data = data.sort_values(by='Firstname', ascending=True) + sorted_data = data.sort_values(by=['Firstname', 'Lastname'], ascending=[True, True]) see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Matricule']) - export_option(sorted_data) + export_option(sorted_data) # asking if the user want to export def sort_descending(data): - sorted_data = data.sort_values(by='Firstname', ascending=False) + sorted_data = data.sort_values(by=['Firstname', 'Lastname'], ascending=[False,True]) see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Matricule']) export_option(sorted_data) def sort_by_date(data): data['Date of Birth'] = pd.to_datetime(data['Date of Birth'], format='%d/%m/%Y') - sorted_data = data.sort_values(by='Date of Birth') - sorted_data['Date of Birth'] = sorted_data['Date of Birth'].dt.strftime('%d/%m/%Y') + sorted_data = data.sort_values(by=['Date of Birth', 'Firstname', 'Lastname'])# also sort by firstname and lastname to be more precise see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Date of Birth']) export_option(sorted_data) @@ -666,28 +697,28 @@ def sort_by_age(data): data['Date of Birth'] = pd.to_datetime(data['Date of Birth'], format='%d/%m/%Y') today = pd.to_datetime('today') age_timedelta = today - data['Date of Birth'] - data['Age'] = (age_timedelta / pd.Timedelta(days=365.25)).astype(int) - sorted_data = data.sort_values(by='Age', ascending=True) + data['Age'] = (age_timedelta / pd.Timedelta(days=365.25)).astype(int) # allow us to transform the data by age + sorted_data = data.sort_values(by=['Age', 'Firstname', 'Lastname'], ascending=[True,True,True]) # also sort by firstname and lastname to be more precise see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Age']) export_option(sorted_data) def sort_by_matricule(data): - sorted_data = data.sort_values(by='Matricule', ascending=True) + sorted_data = data.sort_values(by='Matricule', ascending=True) # matricule is unique so no other sorting type needed see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Matricule']) export_option(sorted_data) def sort_by_academic_year(data): - sorted_data = data.sort_values(by='Academic Year', ascending=True) + sorted_data = data.sort_values(by=['Academic Year', 'Firstname', 'Lastname'], ascending=[True,True,True]) # also sort by firstname and lastname to be more precise see_the_data(sorted_data, columns_to_show=['Firstname', 'Lastname', 'Academic Year']) export_option(sorted_data) def sort_passed(data): - course_columns = data.columns[11:] + course_columns = data.columns[12:] # take the data from the 12 to the last print("Available Courses:") for i, course in enumerate(course_columns, 1): - print(f"{i}. {course}") - + print(f"{i}. {course}") # print all the course + while True: try: choice = int(input("Enter the number of the course you want to display: ")) @@ -696,20 +727,21 @@ def sort_passed(data): except (ValueError, IndexError): print("Invalid choice. Please enter a valid number.") - passed_data = data[data[selected_course] >= 10] - - # Notez que 'selected_course' doit être passé comme une liste pour inclure la note du cours dans 'columns_to_show' + passed_data = data[data[selected_course] >= 10] # course transfer in a list for the course with note more than 10 or equal to + + passed_data = passed_data.sort_values(by=['Firstname', 'Lastname', selected_course], ascending=[True, True,True]) + see_the_data(passed_data, columns_to_show=['Firstname', 'Lastname', selected_course]) export_option(passed_data) def sort_failed(data): - course_columns = data.columns[11:] + course_columns = data.columns[12:] # take the data from the 12 to the last print("Available Courses:") for i, course in enumerate(course_columns, 1): print(f"{i}. {course}") - + while True: try: choice = int(input("Enter the number of the course you want to display: ")) @@ -718,23 +750,25 @@ def sort_failed(data): except (ValueError, IndexError): print("Invalid choice. Please enter a valid number.") - failed_data = data[data[selected_course] < 10] - + failed_data = data[data[selected_course] < 10] # course transfer in a list for the course with note less than 10 + + failed_data = failed_data.sort_values(by=['Firstname', 'Lastname', selected_course], ascending=[True, True,True]) + see_the_data( failed_data, columns_to_show=['Firstname', 'Lastname', selected_course]) - export_option( failed_data) + export_option(failed_data) -def sort_bachelor(data): - bachelor_data = data[data['Academic Year'].isin(['BAC1', 'BAC2', 'BAC3'])].sort_values(by=['Academic Year', 'Lastname'], ascending=[True, True]) +def sort_bachelor(data): # search all the student of bachelor and sort them by academic year to have a better view + bachelor_data = data[data['Academic Year'].isin(['BAC1', 'BAC2', 'BAC3'])].sort_values(by=['Academic Year', 'Firstname', 'Lastname'], ascending=[True, True,True]) see_the_data(bachelor_data, columns_to_show=['Firstname', 'Lastname', 'Academic Year']) export_option(bachelor_data) def sort_master(data): - master_data = data[data['Academic Year'].isin(['MA1', 'MA2'])].sort_values(by=['Academic Year', 'Lastname'], ascending=[True, True]) + master_data = data[data['Academic Year'].isin(['MA1', 'MA2'])].sort_values(by=['Academic Year', 'Firstname', 'Lastname'], ascending=[True, True,True]) see_the_data(master_data, columns_to_show=['Firstname', 'Lastname', 'Academic Year']) export_option(master_data) -# export the data into an excel +# exporting the data def export_option(data): export = input("Do you want to export in a file? (YES/NO) ").upper() if export == "YES": @@ -759,11 +793,10 @@ def see_the_data(data, columns_to_show=None): # STATS - def statistics_analysis(data): - print_menu(["Get basic statistics of a student", "Get all grades of a student", "Get all grades of a course"]) + print_menu(["Get basic statistics of a student", "Get all grades of a student", "Get all grades of a course"]) # call the function pritnt menu - stats_choice = get_valid_input("Enter the number of what you want to do: ", 1, 3) + stats_choice = get_valid_input("Enter the number of what you want to do: ", 1, 3) # user enter the number he want to do. if stats_choice == 1: results = search_and_display_stats(data, "Statistics") @@ -776,7 +809,7 @@ def print_menu(options): for index, option in enumerate(options, start=1): print(f"{index}. {option}") -def get_valid_input(prompt, lower_limit, upper_limit): +def get_valid_input(prompt, lower_limit, upper_limit): # verify if the input is correct while True: user_input = input(prompt) if user_input.isdigit(): @@ -803,7 +836,7 @@ def search_and_display_stats(data, stats_type): return search_term = input(f"Enter the {field_name.lower()} of the student: ") - results = data[data[field_name].str.contains(search_term, case=False, na=False)] + results = data[data[field_name].str.contains(search_term, case=False, na=False)] # search the stat with the search criteria if not results.empty: if stats_type == "Statistics": @@ -814,39 +847,30 @@ def search_and_display_stats(data, stats_type): print(f"No students found with the specified {field_name}.") -# STATISTICAL_STUDENT +# STATISTICAL_STUDENT selected via the menu def display_statistics(results, data): numeric_columns = results.select_dtypes(include=['number']).columns - for index, row in results.iterrows(): - student_name = f"{row['Firstname']} {row['Lastname']}" - student_grades = [row[column] for column in numeric_columns if not pd.isnull(row[column])] - - if student_grades: - print_student_statistics(student_name, student_grades) - # Move the export_statistics call outside the loop - export_statistics(results, data, numeric_columns) + with pd.ExcelWriter('student_statistics.xlsx', engine='xlsxwriter') as excel_writer: # Create a pandas excel writer + for index, row in results.iterrows(): + student_name = f"{row['Firstname']} {row['Lastname']}" + student_grades = [row[column] for column in numeric_columns if not pd.isnull(row[column])] + if student_grades: + # Print and save student statistics + print_student_statistics(student_name, student_grades, excel_writer) -def export_statistics(results, data, numeric_columns): - export_choice = input("Do you want to export these statistics? (YES/NO): ").upper() - if export_choice == 'YES': - file_name = input("Enter the Excel file name (without extension): ") + '.xlsx' - export_path = file_name - export_df = calculate_student_stats(results, numeric_columns) - export_df.to_excel(export_path, index=False) - print(f"Statistics exported to {export_path}.") - + print("Statistics exported to student_statistics.xlsx.") -def calculate_student_stats(results, numeric_columns): - student_grades = pd.Series(results[numeric_columns].stack().dropna()) +def export_statistics(export_path, overwrite) : + if overwrite == "NO": + new_file_name = input("Enter a new Excel file name (without extension): ") + '.xlsx' + export_path = new_file_name - # Check if the student_grades Series is empty - if student_grades.empty: - # If no grades are found, return an empty DataFrame - return pd.DataFrame(columns=["Metric", "Value"]) + return export_path +def calculate_student_stats(student_grades) : student_stats = { "Lowest grade": min(student_grades), "Highest grade": max(student_grades), @@ -857,8 +881,7 @@ def calculate_student_stats(results, numeric_columns): return pd.DataFrame(list(student_stats.items()), columns=["Metric", "Value"]) - -def print_student_statistics(student_name, student_grades): +def print_student_statistics(student_name, student_grades, excel_writer) : lowest_grade = min(student_grades) highest_grade = max(student_grades) average_grade = statistics.mean(student_grades) @@ -873,90 +896,145 @@ def print_student_statistics(student_name, student_grades): ["Standard deviation of grades", std_deviation] ] + # Print student statistics print(f"\nStatistics for student {student_name}:") print(tabulate(table, headers=["Metric", "Value"], tablefmt="pretty")) + + export_df = calculate_student_stats(student_grades) # Save student statistics to a separate Excel sheet + export_df.to_excel(excel_writer, sheet_name=student_name, index=False) + + + # STUDENT_GRADES -def display_student_grades(results, data): - for index, row in results.iterrows(): +def display_student_grades(results, data) : + student_data_dict = {}# Create a dictionary to store data for each student + + for index, row in results.iterrows() : student_name = f"{row['Firstname']} {row['Lastname']}" print(f"\nGrades for student {student_name}:") table = [[column, grade] for column, grade in row.items() if pd.notna(grade) and column not in ['Firstname', 'Lastname', 'Academic Year', 'Curriculum', 'Place of Birth', 'Telephone', 'Address', 'Gender', 'Email', 'Campus', 'Date of Birth', 'Matricule']] print(tabulate(table, headers=["Course", "Grade"], tablefmt="pretty")) - export_student_grades(results, data) -def export_student_grades(results, data): - export_choice = input("Do you want to export these statistics? (YES/NO): ").upper() - if export_choice == 'YES': - file_name = input("Enter the Excel file name (without extension): ") + '.xlsx' - export_path = file_name - columns_to_remove = ['Firstname', 'Lastname', 'Academic Year', 'Curriculum', 'Place of Birth', 'Telephone', 'Address', 'Gender', 'Email', 'Campus', 'Date of Birth', 'Matricule'] - export_df = results.drop(columns=columns_to_remove) - - # Melt the DataFrame to have data in columns with courses and grades - export_df_melted = pd.melt(export_df, id_vars=[], value_vars=export_df.columns, var_name='Course', value_name='Grade') - - export_df_melted.to_excel(export_path, index=False) - print(f"Data exported to {export_path}.") + export_choice = input(f"Do you want to export these statistics for {student_name}? (YES/NO): ").upper() + + while export_choice not in ["YES", "NO"] : + print("Please enter a valid response (YES or NO).") + export_choice = input(f"Do you want to export these statistics for {student_name}? (YES/NO): ").upper() + + if export_choice == 'YES': + student_data_dict[student_name] = row # Store data for each student in the dictionary + + if student_data_dict: + export_student_grades(student_data_dict) + +def export_student_grades(student_data_dict) : + file_name = input("Enter the Excel file name (without extension): ") + '.xlsx' + export_path = file_name + + + while os.path.exists(export_path): # Verify if the file already exists + overwrite = input("File already exists. Do you want to overwrite it? (YES/NO) ").upper() + if overwrite == "YES": + break + else : + new_file_name = input("Enter a new Excel file name (without extension): ") + '.xlsx' + export_path = new_file_name + + + with pd.ExcelWriter(export_path, engine='xlsxwriter') as writer: # Create a pandas excel writer using xlsxwriter as the engine + for student_name, student_data in student_data_dict.items(): # Iterate through the dictionary and write each student's data to a separate sheet + columns_to_remove = ['Firstname', 'Lastname', 'Academic Year', 'Curriculum', 'Place of Birth', 'Telephone', 'Address', 'Gender', 'Email', 'Campus', 'Date of Birth', 'Matricule'] + export_df = pd.DataFrame([student_data], columns=student_data.index).drop(columns=columns_to_remove) + + # Melt the DataFrame to have data in columns with courses and grades + export_df_melted = pd.melt(export_df, id_vars=[], value_vars=export_df.columns, var_name='Course', value_name='Grade') + + # Write each student's data to a separate sheet + export_df_melted.to_excel(writer, sheet_name=student_name, index=False) + + print(f"Data exported to {export_path}.") # COURSE_GRADES def course_grades(data): + course_dict = {} print("Here is the list of courses:") - for course in data.columns[13:]: - print(f"- {course}") + for i, course in enumerate(data.columns[12:], 1): + course_dict[i] = course + print(f"{i}. {course}") - course_name = input("For which course do you want to display grades? ") + while True: + try: + choice = int(input("Enter the number of the course you want to display grades for: ")) + selected_course = course_dict.get(choice) + if selected_course is not None: + break + else: + print("Invalid choice. Please enter a valid number.") + except ValueError: + print("Invalid choice. Please enter a valid number.") - if course_name in data.columns: - students_in_course = data[data[course_name].notnull()] + if selected_course in data.columns: + students_in_course = data[data[selected_course].notnull()] if not students_in_course.empty: - display_course_grades(students_in_course, course_name, data) - export_course_grades(students_in_course, course_name) + + students_in_course = students_in_course.sort_values(by=['Firstname', 'Lastname', selected_course]) + + display_course_grades(students_in_course, selected_course, data) + export_course_grades(students_in_course, selected_course) else: - print(f"No students participated in the course {course_name}.") + print(f"No students participated in the course {selected_course}.") else: - print(f"The specified course ({course_name}) was not found.") + print(f"The specified course ({selected_course}) was not found.") return def display_course_grades(students_in_course, course_name, data): - print(f"\nGrades of students for the course {course_name}:") + print(f"\nGrades of students for the course {course_name} (sorted):") table = [["Firstname", "Lastname", "Matricule", "Grade"], *[[row['Firstname'], row['Lastname'], row['Matricule'], row[course_name]] for _, row in students_in_course.iterrows() if not pd.isnull(row[course_name])] ] print(tabulate(table, headers="firstrow", tablefmt="pretty")) - def export_course_grades(students_in_course, course_name): export_choice = input("Do you want to export these statistics? (YES/NO): ").upper() + + while export_choice not in ["YES", "NO"]: + print("Please enter a valid response (YES or NO).") + export_choice = input("Do you want to export these statistics? (YES/NO): ").upper() + if export_choice == 'YES': file_name = input("Enter the Excel file name (without extension): ") + '.xlsx' export_path = file_name - export_df = students_in_course[['Firstname', 'Lastname','Matricule', course_name]] - export_df.to_excel(export_path, index=False) - print(f"Data exported to {export_path}.") - + while os.path.exists(export_path): + overwrite = input("File already exists. Do you want to overwrite it? (YES/NO) ").upper() + if overwrite == "YES": + break + else : + new_file_name = input("Enter a new Excel file name (without extension): ") + '.xlsx' + export_path = new_file_name + export_df = students_in_course[['Firstname', 'Lastname', 'Matricule', course_name]] # export the data + export_df.to_excel(export_path, index=False) + print(f"Data exported to {export_path}.") -# Example of usage -# Make sure you have defined and loaded your DataFrame 'data' before calling this function -# Replace 'Course_Name' with the actual name of the course you are looking for -# display_course_grades(data, 'Course_Name') -# Action +def action(data): # This is the menu (called everytime to do smth on the app) + menu_options = [ + ["1", "Register a student"], + ["2", "Modify one or more fields"], + ["3", "Delete a student"], + ["4", "Find a student"], + ["5", "Show"], + ["6", "Sort, display, or export the list"], + ["7", "View statistics"], + ["8", "To stop the program"] + ] -def action(data): - print("What do you want to do?\nBelow, you will find what is possible followed by the commands to type.") - print("1. Register a student") - print("2. Modify one or more fields") - print("3. Delete a student") - print("4. Find a student") - print("5. Show") - print("6. Sort, display, or export the list") - print("7. View statistics") - print("8. To stop the program") + print("What do you want to do?\nBelow, you will find what is possible followed by the commands to type:") + print(tabulate(menu_options, headers=["Option", "Description"], tablefmt="pretty", colalign=("center", "left"))) while True: command = input("Enter the number of what you want to do: ") # Check if the command is an integer @@ -993,8 +1071,7 @@ while True: if response is False: break else: - if response: - # Reload the menu if the file was modified + if response: # Reload the menu if the file was modified menu = [] else: - menu.append(response) + menu.append(response) \ No newline at end of file diff --git a/projet_personnel/algorithme_student_generate.py b/projet_personnel/algorithme_student_generate.py index 4fc8ab83d50e183aff61b7faefb30a71f835369f..f71974148b70a6403a3004443e9d922ddce4c6ed 100644 --- a/projet_personnel/algorithme_student_generate.py +++ b/projet_personnel/algorithme_student_generate.py @@ -3,55 +3,7 @@ import pandas as pd import random import numpy as np - -firstname_of_students = ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', - 'Anderson', 'Thomas', 'Jackson', 'White', 'Harris', 'Martin', 'Thompson', 'Garcia', 'Martinez', 'Robinson', - 'Clark', 'Rodriguez', 'Lewis', 'Lee', 'Walker', 'Hall', 'Allen', 'Young', 'Hernandez', 'King', - 'Wright', 'Lopez', 'Hill', 'Scott', 'Green', 'Adams', 'Baker', 'Gonzalez', 'Nelson', 'Carter', - 'Mitchell', 'Perez', 'Roberts', 'Turner', 'Phillips', 'Campbell', 'Parker', 'Evans', 'Edwards', 'Collins', - 'Stewart', 'Sanchez', 'Morris', 'Rogers', 'Reed', 'Cook', 'Morgan', 'Bell', 'Murphy', 'Bailey', - 'Rivera', 'Cooper', 'Richardson', 'Cox', 'Howard', 'Ward', 'Torres', 'Peterson', 'Gray', 'Ramirez', - 'James', 'Watson', 'Brooks', 'Kelly', 'Sanders', 'Price', 'Bennett', 'Wood', 'Barnes', 'Ross', - 'Henderson', 'Coleman', 'Jenkins', 'Perry', 'Powell', 'Long', 'Patterson', 'Hughes', 'Flores', 'Washington', - 'Butler', 'Simmons', 'Foster', 'Gonzales', 'Bryant', 'Alexander', 'Russell', 'Griffin', 'Diaz', 'Hayes', - 'Myers', 'Ford', 'Hamilton', 'Graham', 'Sullivan', 'Wallace', 'Woods', 'Cole', 'West', 'Jordan', - 'Owens', 'Reynolds', 'Fisher', 'Ellis', 'Harrison', 'Gibson', 'McDonald', 'Cruz', 'Marshall', 'Ortiz', - 'Gomez', 'Murray', 'Freeman', 'Wells', 'Webb', 'Simpson', 'Stevens', 'Tucker', 'Porter', 'Hunter', - 'Hicks', 'Crawford', 'Henry', 'Boyd', 'Mason', 'Morales', 'Kennedy', 'Warren', 'Dixon', 'Ramos', - 'Reyes', 'Burns', 'Gordon', 'Shaw', 'Holmes', 'Rice', 'Robertson', 'Hunt', 'Black', 'Daniels', - 'Palmer', 'Mills', 'Nichols', 'Grant', 'Knight', 'Ferguson', 'Rose', 'Stone', 'Hawkins', 'Dunn', - 'Perkins', 'Hudson', 'Spencer', 'Gardner', 'Stephens', 'Payne', 'Pierce', 'Berry', 'Matthews', 'Arnold', - 'Wagner', 'Willis', 'Ray', 'Watkins', 'Olson', 'Carroll', 'Duncan', 'Snyder', 'Hart', 'Cunningham', - 'Bradley', 'Lane', 'Andrews', 'Ruiz', 'Harper', 'Fox', 'Riley', 'Armstrong', 'Carpenter', 'Weaver', - 'Greene', 'Lawrence', 'Elliott', 'Chavez', 'Sims', 'Austin', 'Peters', 'Kelley', 'Franklin', 'Lawson', - 'Fields', 'Gutierrez', 'Ryan', 'Schmidt', 'Carr', 'Vasquez', 'Castillo', 'Wheeler', 'Chapman', 'Oliver', - 'Montgomery', 'Richards', 'Williamson', 'Johnston', 'Banks', 'Meyer', 'Bishop', 'McCoy', 'Howell', 'Alvarez', - 'Morrison', 'Hansen', 'Fernandez', 'Garza', 'Harvey', 'Little', 'Burton', 'Stanley', 'Nguyen', 'George', - 'Jacobs', 'Reid', 'Kim', 'Fuller', 'Lynch', 'Dean', 'Gilbert', 'Garrett', 'Romero', 'Welch'] - -last_name_of_students = ['Emma', 'Liam', 'Olivia', 'Noah', 'Ava', 'Isabella', 'Sophia', 'Jackson', 'Mia', 'Lucas', - 'Aiden', 'Ethan', 'Luna', 'Mason', 'Harper', 'Evelyn', 'Oliver', 'Aria', 'Amelia', 'Caden', - 'Grayson', 'Charlotte', 'Chloe', 'Ella', 'Sebastian', 'Carter', 'Abigail', 'Scarlett', 'Madison', - 'Lily', 'Avery', 'Ella', 'Aubrey', 'Mila', 'Layla', 'Riley', 'Zoe', 'Sofia', 'Jackson', 'Liam', - 'Leah', 'Zoe', 'Henry', 'Gabriel', 'Grace', 'Hannah', 'Mila', 'Alexa', 'Eva', 'Liam', 'Mia', - 'Logan', 'Benjamin', 'Emily', 'Nora', 'Elijah', 'Lucy', 'Owen', 'Elizabeth', 'Stella', 'Levi', - 'William', 'Lily', 'Zoey', 'Parker', 'Brooklyn', 'Samuel', 'Luke', 'Madelyn', 'Eleanor', 'Nathan', - 'Isaac', 'Ryan', 'Ariana', 'Addison', 'Scarlett', 'Julian', 'Natalie', 'Victoria', 'Adam', 'Alexis', - 'Aaron', 'Aaliyah', 'Robert', 'Samantha', 'Christopher', 'Hazel', 'Serenity', 'Jordan', 'Leah', 'Chase', - 'Lillian', 'Liam', 'Skylar', 'Bella', 'Zachary', 'Caleb', 'Tyler', 'Connor', 'Eli', 'Aaliyah', - 'Ezekiel', 'Eva', 'Landon', 'Grace', 'Makayla', 'Nicholas', 'Natalie', 'Nathan', 'Avery', 'Cameron', - 'Hudson', 'Violet', 'Claire', 'Gavin', 'Eva', 'Zachary', 'Alexa', 'Brooklyn', 'Kylie', 'Peyton', - 'Emma', 'John', 'Aubrey', 'Carter', 'Mila', 'Elena', 'Paisley', 'Grayson', 'Hailey', 'Elijah', - 'Ellie', 'Julian', 'Adrian', 'Aria', 'Liliana', 'Bentley', 'Kinsley', 'Savannah', 'Eliana', 'Sofia', - 'Liam', 'Ella', 'Evelyn', 'Avery', 'Austin', 'Scarlett', 'Leah', 'Aiden', 'Audrey', 'Amelie', - 'Julian', 'Mila', 'Olivia', 'Anthony', 'Caleb', 'Zoe', 'Autumn', 'Samantha', 'Aria', 'Cooper', - 'Eva', 'Charlie', 'Madison', 'Landon', 'Penelope', 'Kaylee', 'Lily', 'Ryder', 'Hannah', 'Maya', - 'Christopher', 'Eleanor', 'Levi', 'Aubree', 'Emma', 'Sofia', 'Evan', 'Ella', 'Gabriel', 'Zachary', - 'Taylor', 'Liam', 'Makayla', 'Zoe', 'Oliver', 'Isaiah', 'Brooklyn', 'Lily', 'Victoria', 'Cameron', - 'Madison', 'Eva', 'Gabriel', 'Zoe', 'Brooklyn', 'Sophie', 'Ella', 'Logan', 'Madison', 'Julian', - 'Alexis', 'Landon', 'Grace', 'Lucas', 'Aria', 'Aaliyah', 'Adrian', 'Riley', 'Nora', 'Isabella', - 'Eva', 'Layla', 'Aiden', 'Amelia', 'Chloe', 'Levi', 'Lillian', 'Liam', 'Zoey', 'Hazel', 'Carter', - 'Grace', 'Peyton', 'Julian', 'Eliana', 'Eva', 'Harper', 'Henry', 'Olivia', 'Lily', 'Liam', 'Abigail'] +from faker import Faker school_years = ['BAC1', 'BAC2', 'BAC3', 'MA1', 'MA2'] @@ -93,15 +45,17 @@ school_years = ['BAC1', 'BAC2', 'BAC3', 'MA1', 'MA2'] # Générer des combinaisons aléatoires de noms et prénoms pour plus de 1000 personnes data_generated = [] +fake = Faker() + number_of_students = 1000 for each in range(number_of_students): # générer des données # nom de la personnes - last_name = random.choice(last_name_of_students) + last_name = fake.last_name() # prenom de la personnes - first_name = random.choice(firstname_of_students) + first_name = fake.first_name() # année de cours academic_year = np.random.choice(school_years, p=proportions) @@ -193,7 +147,7 @@ for each in range(number_of_students): # générer des données grades[courses] = random.randint(0, 20) - data_generated.append({"Firstname": last_name, "Lastname": first_name, "Academic Year" : academic_year, "Curriculum" : curriculum, "Place of Birth" : city_of_birth , "Telephone": phone, "Address": adress_of_student, "Gender" : gender_of_student, "Email" : email_formated, "Campus" : campus, "Date of Birth" : complete_date_of_birth, "Matricule" : matricule, **grades}) + data_generated.append({"Firstname": first_name, "Lastname": last_name, "Academic Year" : academic_year, "Curriculum" : curriculum, "Place of Birth" : city_of_birth , "Telephone": phone, "Address": adress_of_student, "Gender" : gender_of_student, "Email" : email_formated, "Campus" : campus, "Date of Birth" : complete_date_of_birth, "Matricule" : matricule, **grades}) # Créer un DataFrame pandas df = pd.DataFrame(data_generated) diff --git a/pse.xlsx b/pse.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..db6f99a4e0a298f05827e6c1d391f4620c1c1e03 Binary files /dev/null and b/pse.xlsx differ