diff --git a/.DS_Store b/.DS_Store index 8c4326e544aff75bdc18e95cd4563353d7bdd5eb..e67dba760cf882cd80f4290e831fa2069d1f19c4 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/matrice_cat.xlsx b/matrice_cat.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..47c49b89444b6029c80e2635215ec292111e0bbb Binary files /dev/null and b/matrice_cat.xlsx differ diff --git a/matrix_category.xlsx b/matrix_category.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c3cd0da546301fe7326d79c65e0b9a36fe63d3a9 Binary files /dev/null and b/matrix_category.xlsx differ diff --git a/projet_en_groupe/.DS_Store b/projet_en_groupe/.DS_Store index f62eb354c0eca2ffb7a9666b921257d47fa7d7e2..0bebdd61ccf544d5f88a54eb3b651bfcf9fc1fef 100644 Binary files a/projet_en_groupe/.DS_Store and b/projet_en_groupe/.DS_Store differ diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py index fb3c724f8d20587c592059673bba6dc51ee0aee1..b8065a774543ae84871233ec469d4081b9e28d4d 100644 --- a/projet_en_groupe/algorithme_netflix.py +++ b/projet_en_groupe/algorithme_netflix.py @@ -31,10 +31,12 @@ def movies(data_1): # enregistrement films = data_1[data_1['type'] == 'Movie'] # Filter the data to include only movies movie_titles = films['title'].tolist() # Extract movie titles + movie_df = pd.DataFrame({'Movie Titles': movie_titles}) + # afficher que les 100 premiers - print(movie_titles) # Display movie titles - save_to_csv(movie_titles) + print(movie_df) # Display movie titles + save_to_csv(movie_df) return # Be careful, you need to ask each time if they want to save the list to a .csv @@ -44,9 +46,11 @@ def series(data_1): series = data_1[data_1['type'] == 'TV Show'] # Filter the data to include only series series_titles = series['title'].tolist() # Extract series titles - print(series_titles) # Display series titles + series_df = pd.DataFrame({'Movie Titles': series_titles}) + + print(series_df) # Display series titles - save_to_csv(series_titles) + save_to_csv(series_df) return # Be careful, you need to ask each time if they want to save the list to a .csv @@ -77,10 +81,11 @@ def by_country(data_1): filtered_data = filter_media_type(data_1) country_list = [] - for countries in filtered_data['country'].dropna().str.split(', '): + for countries in filtered_data['country'].dropna().str.split(','): for country in countries: - if country not in country_list and country != '': - country_list.append(country) + cleaned_country = country.strip() # Remove leading and trailing spaces + if cleaned_country and cleaned_country not in country_list: + country_list.append(cleaned_country) print("List of all available countries:") country_list.sort() @@ -94,9 +99,9 @@ def by_country(data_1): save_to_csv(country_data) else: print(f"No movies or series found for the country {country_input}.") - return + # Be careful, you need to ask each time if they want to save the list to a .csv @@ -139,9 +144,19 @@ def duration(data_1): type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize() type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] - if not type_data.empty: + print("Quel type de tri voulez-vous ? ") + print("1. Croissant") + print("2. Décroissant") + sort_order = str(input("Entrez le numéro du type de tri : ")) - type_data_sorted = type_data.sort_values(by='duration', ascending=True) # see if we do in ascending or descending + if not type_data.empty: + if sort_order.lower() == '1': + type_data_sorted = type_data.sort_values(by='duration', ascending=True) + elif sort_order.lower() == '2': + type_data_sorted = type_data.sort_values(by='duration', ascending=False) + else: + print("Invalid sort order. Defaulting to ascending order.") + type_data_sorted = type_data.sort_values(by='duration', ascending=True) print(type_data_sorted) save_to_csv(type_data_sorted) @@ -149,6 +164,7 @@ def duration(data_1): print(f"No movies or series found for the type {type_input}.") + def director(data_1): filtered_data = filter_media_type(data_1) @@ -202,6 +218,9 @@ def specific_genre_director(data_1): filtered_data = filter_media_type(data_1) unique_directors = filtered_data['director'].unique() + # Convert elements to strings to handle potential float values + unique_directors = [str(director) for director in unique_directors] + print("List of all available directors:") print(', '.join(unique_directors)) @@ -233,6 +252,9 @@ def specific_genre_actor(data_1): filtered_data = filter_media_type(data_1) unique_actors = filtered_data['cast'].unique() + # Convert elements to strings to handle potential float values + unique_actors = [str(actor) for actor in unique_actors] + print("List of all available actors:") print(', '.join(unique_actors)) @@ -260,6 +282,7 @@ def specific_genre_actor(data_1): return + # rating # these are variables that needs to be registered in general not in a local function @@ -283,7 +306,7 @@ def most_rated(data_1, data_2) : def most_rated_year(data_1, data_2): # Display all available unique release years - available_years = data_1['release_year'].unique() + available_years = sorted(data_1['release_year'].unique()) print("Available years: ", available_years) # tri des dates # Ask the user to enter a release year @@ -328,52 +351,74 @@ def most_rated_recent(data_1, data_2): # Example usage -def parental_code(data_1): # s5542 bug concernant les virgules +def parental_code(data_1): + valid_codes = set(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R', 'TV-G', 'G', 'NC-17', 'NR', 'TV-Y7-FV', 'UR']) - code_list = [] - for codes in data_1['rating'].dropna().str.split(', '): - for code in codes: - if code not in code_list and code != '': - code_list.append(code) - print("Here are the parental codes: ") - print(code_list) - save_to_csv(code_list) - return + # Filter out entries that are not valid parental codes + filtered_data = data_1[data_1['rating'].isin(valid_codes)] + + print("Valid parental codes:") + print(', '.join(valid_codes)) + # Ask the user to enter a parental code + selected_code = input("Enter a parental code to display movies and/or series: ") + # Filter the data based on the selected parental code + if selected_code in valid_codes: + result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)] + if not result_data.empty: + print(result_data) + save_to_csv(result_data) + else: + print(f"No movies or series found for the parental code {selected_code}.") + else: + print("Invalid parental code entered.") + + return - #code_parental = input("Entrez le code de contrôle parental : PG-13, TV-MA") def directors_nationality(data_1): - # Check if the 'director' column exists in the dataset - if 'director' not in data_1.columns: - print("The dataset does not contain a 'director' column.") + # Check if the 'director' and 'country' columns exist in the dataset + if 'director' not in data_1.columns or 'country' not in data_1.columns: + print("The dataset does not contain 'director' or 'country' columns.") return # Extract unique directors and their respective nationalities directors_nationality_dict = {} for index, row in data_1.iterrows(): - directors = row['director'].split(', ') + directors = str(row['director']).split(', ') if pd.notna(row['director']) else [] nationality = row['country'] for director in directors: if director in directors_nationality_dict: - directors_nationality_dict[director]['nationalities'].add(nationality) - directors_nationality_dict[director]['count'] += 1 + # Use a set to store unique nationalities for each director + directors_nationality_dict[director]['nationalities'].add(str(nationality)) + directors_nationality_dict[director]['number of movies or series'] += 1 else: - directors_nationality_dict[director] = {'nationalities': {nationality}, 'count': 1} + directors_nationality_dict[director] = {'nationalities': set(), 'number of movies or series': 1} + directors_nationality_dict[director]['nationalities'].add(str(nationality)) + + # Remove duplicates from the nationalities list for each director + for director_info in directors_nationality_dict.values(): + director_info['nationalities'] = list(set(director_info['nationalities'])) # Sort the directors by the number of movies and series produced - sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['count'], reverse=True) + sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True) + + columns = ['director', 'nationalities', 'number of movies or series'] + directors_df = pd.DataFrame([[director, ', '.join(info['nationalities']), info['number of movies or series']] for director, info in sorted_directors], columns=columns) # Display the list of directors and their nationalities print("Directors and their nationalities, sorted by the number of movies and series produced:") for director, info in sorted_directors: - print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series") - save_to_csv(sorted_directors) - return + director_name = str(director) if pd.notna(director) else 'Unknown' + nationalities_str = ', '.join(str(n) for n in info['nationalities']) + print(f"{director_name}: {nationalities_str} - {info['number of movies or series']} movies/series") + # Save to CSV using the DataFrame + save_to_csv(directors_df) # Commenting this out since the 'save_to_csv' function is not provided + return directors_df @@ -457,26 +502,16 @@ def save_to_csv(data, default_filename='output.csv'): new_filename = new_filename + ".csv" data.to_csv(new_filename, index=False) print(f"Data saved to {new_filename}") - - # Ask if the user wants to open the file - open_choice = input("Do you want to open the saved file? (YES/NO): ").upper() - if open_choice == 'YES': - os.system(file_name) else: # Save to a new file data.to_csv(file_name, index=False) print(f"Data saved to {file_name}") - # Ask if the user wants to open the file - open_choice = input("Do you want to open the saved file? (YES/NO): ").upper() - if open_choice == 'YES': - os.system(file_name) else: print("Data not saved.") - # début de l'algorithme de recommandation # Load the CSV file