# afficher le premier document import pandas as pd import tabulate import os # allow us to display maximum things #pd.set_option('display.max_rows', None) """file_path_1 = "/content/drive/MyDrive/Coding_project_2023/netflix_titles-2.csv data_1 = pd.read_csv(file_path_1)""" #display(data_1) """from google.colab import drive drive.mount('/content/drive')""" # afficher le second document """file_path_2 = "/content/drive/MyDrive/Coding_project_2023/ratings.csv data_2 = pd.read_csv(file_path_2)""" #display(data_2) """file_path_1 = "/content/drive/MyDrive/Coding_project_2023/netflix_titles-2.csv""" file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv" data_1 = pd.read_csv(file_path_1) """file_path_2 = "/content/drive/MyDrive/Coding_project_2023/ratings.csv""" file_path_2 ="/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv" data_2 = pd.read_csv(file_path_2) # Show the catalog def catalog(data_1): print(data_1.head(100)) save_to_csv(data_1) # Be careful, you need to ask each time if they want to save the list to a .csv def movies(data_1): films = data_1[data_1['type'] == 'Movie'] # Filter the data to include only movies movie_titles = films['title'].tolist() # Extract movie titles print(movie_titles) # Display movie titles save_to_csv(movie_titles) return # Be careful, you need to ask each time if they want to save the list to a .csv def series(data_1): series = data_1[data_1['type'] == 'TV Show'] # Filter the data to include only series series_titles = series['title'].tolist() # Extract series titles print(series_titles) # Display series titles save_to_csv(series_titles) return # Be careful, you need to ask each time if they want to save the list to a .csv def by_year(data_1): # be careful and/or !!!!! filtered_data = filter_media_type(data_1) if filtered_data is None: return # Exit the function if filter_media_type returns None sort_type = input("Do you want to sort the years in ascending or descending order? (ascending/descending)") if sort_type == "ascending": sorted_data = filtered_data.sort_values(by='release_year', ascending=True) elif sort_type == "descending": sorted_data = filtered_data.sort_values(by='release_year', ascending=False) else: print("Invalid choice. The dataset could not be sorted!") return # Exit the function if the sort type is invalid print(sorted_data) save_to_csv(sorted_data) return # Be careful, you need to ask each time if they want to save the list to a .csv def by_country(data_1): filtered_data = filter_media_type(data_1) country_list = [] for countries in filtered_data['country'].dropna().str.split(', '): for country in countries: if country not in country_list and country != '': country_list.append(country) print("List of all available countries:") country_list.sort() print(country_list) country_input = input("Enter the name of the country to display movies and/or series: ").capitalize() country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)] if not country_data.empty: print(country_data) save_to_csv(country_data) else: print(f"No movies or series found for the country {country_input}.") return # Be careful, you need to ask each time if they want to save the list to a .csv def genre(data_1): filtered_data = filter_media_type(data_1) genre_list = [] for genres in data_1['listed_in'].dropna().str.split(', '): for genre in genres: if genre not in genre_list and genre != '': genre_list.append(genre) print("List of all possible genres:") genre_list.sort() print(genre_list) type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize() type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] if not type_data.empty: print(type_data) else: print(f"No movies or series found for the type {type_input}.") def duration(data_1): filtered_data = filter_media_type(data_1) genre_list = [] for genres in data_1['listed_in'].dropna().str.split(', '): for genre in genres: if genre not in genre_list and genre != '': genre_list.append(genre) print("List of all possible genres:") genre_list.sort() print(genre_list) type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize() type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] if not type_data.empty: type_data_sorted = type_data.sort_values(by='duration', ascending=True) # see if we do in ascending or descending print(type_data_sorted) save_to_csv(type_data_sorted) else: print(f"No movies or series found for the type {type_input}.") def director(data_1): filtered_data = filter_media_type(data_1) director_list = [] for dirs in data_1['director'].dropna().str.split(', '): for director_name in dirs: if director_name not in director_list and director_name != '': director_list.append(director_name) print("List of all possible directors: ") director_input = input("Enter the name of the director to display movies and/or series: ") director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)] if not director_data.empty: director_data_sorted = director_data.sort_values(by='release_year', ascending=True) # see if we do in ascending or descending print(director_data) save_to_csv(director_data) else: print(f"No person found with the name {director_input}.") def actor(data_1): filtered_data = filter_media_type(data_1) actor_list = [] for actors in data_1['cast'].dropna().str.split(', '): for actor_name in actors: if actor_name not in actor_list and actor_name != '': actor_list.append(actor_name) print("List of all possible actors: ") actor_input = input("Enter the name of the actor to display movies and/or series: ") actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)] if not actor_data.empty: actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True) print(actor_data_sorted) save_to_csv(actor_data_sorted) else: print(f"No actor found with the name {actor_input}.") def specific_genre_director(data_1): filtered_data = filter_media_type(data_1) unique_directors = filtered_data['director'].unique() print("List of all available directors:") print(', '.join(unique_directors)) director_input = input("Enter the name of the director to display movies and/or series: ") unique_types = filtered_data['listed_in'].unique() print("\nList of all available types:") print(', '.join(unique_types)) type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() director_type_data = filtered_data[ (filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)) & (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)) ] if not director_type_data.empty: # Display the count count = len(director_type_data) print(f"The director {director_input} has directed {count} movie(s) or series of type {type_input}.") print(director_type_data) save_to_csv(director_type_data) else: print(f"No movies or series found for the director {director_input} and type {type_input}.") def specific_genre_actor(data_1): filtered_data = filter_media_type(data_1) unique_actors = filtered_data['cast'].unique() print("List of all available actors:") print(', '.join(unique_actors)) actor_input = input("Enter the name of the actor to display movies and/or series: ") unique_types = filtered_data['listed_in'].unique() print("\nList of all available types:") print(', '.join(unique_types)) type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() actor_type_data = filtered_data[ (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) & (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)) ] if not actor_type_data.empty: # Display the count count = len(actor_type_data) print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.") print(actor_type_data) save_to_csv(actor_type_data) else: print(f"No movies or series found for the actor {actor_input} and type {type_input}.") return # rating # these are variables that needs to be registered in general not in a local function notes = data_2.drop('show_id', axis = 1) mean_type = notes.mean(axis = 1) * 100 data_2['appreciation (%)'] = mean_type def most_rated(data_1, data_2) : filtered_data = filter_media_type(data_1) link_between = pd.merge(filtered_data,data_2, on='show_id') link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False) print("Films et séries les mieux notés :") print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']]) save_to_csv(link_between_sorted) return def most_rated_year(data_1, data_2): # Display all available unique release years available_years = data_1['release_year'].unique() print("Available years: ", available_years) # Ask the user to enter a release year year = input("Enter a release year: ") try: # Convert the year to an integer year = int(year) except ValueError: print("Please enter a valid year.") return # Filter the data based on the release year filtered_data = filter_media_type(data_1[data_1['release_year'] == year]) # Merge the DataFrames on the 'show_id' key link_between = pd.merge(filtered_data, data_2, on='show_id') # Sort the DataFrame by the 'appreciation' column (in descending order) link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False) print(f"Top-rated shows for the year {year}:") print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']]) save_to_csv(link_between_sorted) return def most_rated_recent(data_1, data_2): # Merge the DataFrames on the 'show_id' key merged_data = pd.merge(data_1, data_2, on='show_id') # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order) sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False]) # Display the most rated and recent shows top_20_data = sorted_data.head(20) print("Top 20 most rated and recent shows:") print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']]) save_to_csv(top_20_data) return # Example usage def parental_code(data_1): code_list = [] for codes in data_1['rating'].dropna().str.split(', '): for code in codes: if code not in code_list and code != '': code_list.append(code) print("Here are the parental codes: ") print(code_list) save_to_csv(code_list) #code_parental = input("Entrez le code de contrôle parental : PG-13, TV-MA") def directors_nationality(data_1): # Check if the 'director' column exists in the dataset if 'director' not in data_1.columns: print("The dataset does not contain a 'director' column.") return # Extract unique directors and their respective nationalities directors_nationality_dict = {} for index, row in data_1.iterrows(): directors = row['director'].split(', ') nationality = row['country'] for director in directors: if director in directors_nationality_dict: directors_nationality_dict[director]['nationalities'].add(nationality) directors_nationality_dict[director]['count'] += 1 else: directors_nationality_dict[director] = {'nationalities': {nationality}, 'count': 1} # Sort the directors by the number of movies and series produced sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['count'], reverse=True) # Display the list of directors and their nationalities print("Directors and their nationalities, sorted by the number of movies and series produced:") for director, info in sorted_directors: print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series") save_to_csv(sorted_directors) # Allow to filter if we want movie, tv show or both def filter_media_type(data): media_type = input("What type of media do you want to display? (Movie/TV Show/Both): ").lower() if media_type in ['movie', 'tv show', 'both']: if media_type == 'both': return data else: return data[data['type'].str.lower() == media_type] else: print("Invalid choice. Displaying all types of media.") return data # Return the original data if the media type choice is invalid # Example usage def basic_statistics(data_1): # Check if the 'type' and 'country' columns exist in the dataset if 'type' not in data_1.columns or 'country' not in data_1.columns: print("The dataset does not contain the necessary columns.") return # Count the number of movies and series movies_count = len(data_1[data_1['type'] == 'Movie']) series_count = len(data_1[data_1['type'] == 'TV Show']) print(f"Number of movies in the catalog: {movies_count}") print(f"Number of series in the catalog: {series_count}") # Compare the number of movies and series if movies_count > series_count: print("There are more movies than series in the catalog.") elif movies_count < series_count: print("There are more series than movies in the catalog.") else: print("The catalog has an equal number of movies and series.") # List countries that produced movies/series from most productive to least country_counts = data_1['country'].str.split(', ').explode().value_counts() print("\nCountries that produced movies/series, sorted from most to least productive:") print(country_counts) return # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv def save_to_csv(data, default_filename='output.csv'): # Ask if the user wants to save to a CSV file save_choice = input("Do you want to save the data to a CSV file? (YES/NO): ").upper() if save_choice == 'YES': # Prompt for a file name file_name = input("Enter the file name (DO NOT include .csv extension, or press Enter for the default): ") file_name = file_name + ".csv" if not file_name: file_name = default_filename # Check if the file already exists if os.path.exists(file_name): # Ask if the user wants to overwrite or create a new file overwrite_choice = input(f"The file '{file_name}' already exists. Do you want to overwrite it? (YES/NO): ").upper() if overwrite_choice == 'YES': # Overwrite the existing file data.to_csv(file_name, index=False) print(f"Data saved to {file_name}") # Ask if the user wants to open the file open_choice = input("Do you want to open the saved file? (YES/NO): ").upper() if open_choice == 'YES': os.system(file_name) else: # Prompt for a new file name new_filename = input("Enter a new file name (DO NOT include .csv extension): ") new_filename = new_filename + ".csv" data.to_csv(new_filename, index=False) print(f"Data saved to {new_filename}") # Ask if the user wants to open the file open_choice = input("Do you want to open the saved file? (YES/NO): ").upper() if open_choice == 'YES': os.system(file_name) else: # Save to a new file data.to_csv(file_name, index=False) print(f"Data saved to {file_name}") # Ask if the user wants to open the file open_choice = input("Do you want to open the saved file? (YES/NO): ").upper() if open_choice == 'YES': os.system(file_name) else: print("Data not saved.") # Création du menu def action() : print("Here are the different options available:") print("1. View the entire catalog") print("2. View all movies in the catalog") print("3. View all series") print("4. View all series, movies or both by year") print("5. View all series, movies or both by country") print("6. View all series, movies or both by type") print("7. View all series, movies or both by type sorted by duration") print("8. View series, movies or both directed by a specific director and sorted by year") print("9. View series, movies or both featuring a specific actor and sorted by year") print("10. View how many series, movies or both and series directed by a director in a specific genre") print("11. View how many series, movies or both an actor has played in") print("12. Display the highest-rated series, movies or both") print("13. Display the highest-rated series, movies or both for a specific year") print("14. Display recent highest-rated series, movies or both") print("15. Display movies and series based on parental control code") print("16. Display the nationalities of directors and sort the list based on the number of movies and series directed") print("17. Display basic statistics") print("... Enter STOP to stop") command = input("Enter the number of what you want to do: ") if command == "1" : catalog(data_1) elif command == "2" : movies(data_1) elif command == "3" : series(data_1) elif command == "4" : by_year(data_1) elif command == "5" : by_country(data_1) elif command == "6" : genre(data_1) elif command == "7" : duration(data_1) elif command == "8" : director(data_1) elif command == "9" : actor(data_1) elif command == "10" : specific_genre_director(data_1) elif command == "11" : specific_genre_actor(data_1) elif command == "12" : most_rated(data_1, data_2) elif command == "13" : most_rated_year(data_1, data_2) elif command == "14" : most_rated_recent(data_1, data_2) elif command == "15" : parental_code(data_1) elif command == "16" : directors_nationality(data_1) elif command == "17" : basic_statistics(data_1) elif command == "..." : return False # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv menu = [] while True: response = action() if response is False: break else: if response == True: menu = [] else: menu.append(response)