diff --git a/Dramas.csv b/Dramas.csv new file mode 100644 index 0000000000000000000000000000000000000000..0e95f87503354eb0f54da8c011a3a2dc2eadd031 --- /dev/null +++ b/Dramas.csv @@ -0,0 +1,3 @@ +show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description +s1237,Movie,Sentinelle,Julien Leclercq,"Olga Kurylenko, Marilyn Lima, Michel Nabokoff, Martin Swabey, Carole Weyers, Andrey Gorlenko, Antonia Malinova, Gabriel Almaer, Blaise Afonso, Guillaume Duhesme, Michel Biel",France,"March 5, 2021",2021,TV-MA,81 min,"Action & Adventure, Dramas, International Movies","Transferred home after a traumatizing combat mission, a highly trained French soldier uses her lethal skills to hunt down the man who hurt her sister." +s2669,Movie,Earth and Blood,Julien Leclercq,"Sami Bouajila, Eriq Ebouaney, Samy Seghir, Sofia Lesaffre","France, Belgium","April 17, 2020",2020,TV-MA,81 min,"Dramas, International Movies, Thrillers",A sawmill owner and his teenage daughter become tangled in a deadly feud when a drug dealer stashes stolen cocaine on their remote property. diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py index 95d8900573be2b48770b596b2c0a4728786e9afd..95acc016786039dbfb6aa9bc06258aeca84a8103 100644 --- a/projet_en_groupe/algorithme_netflix.py +++ b/projet_en_groupe/algorithme_netflix.py @@ -4,6 +4,7 @@ import pandas as pd import tabulate import os import csv +from fuzzywuzzy import process #display(data_2) @@ -62,14 +63,20 @@ def by_year(data_1): # be careful and/or !!!!! if filtered_data is None: return # Exit the function if filter_media_type returns None - sort_type = input("Do you want to sort the years in ascending or descending order? (ascending/descending)") - if sort_type == "ascending": - sorted_data = filtered_data.sort_values(by='release_year', ascending=True) - elif sort_type == "descending": - sorted_data = filtered_data.sort_values(by='release_year', ascending=False) - else: - print("Invalid choice. The dataset could not be sorted!") - return # Exit the function if the sort type is invalid + while True: + print("1. Ascending") + print("2. Descending") + sort_type = input("Do you want to sort the years in ascending or descending order? enter the number : ") + + if sort_type == "1": + sorted_data = filtered_data.sort_values(by='release_year', ascending=True) + break # Sort type is valid, exit the loop + elif sort_type == "2": + sorted_data = filtered_data.sort_values(by='release_year', ascending=False) + break # Sort type is valid, exit the loop + else: + print("Invalid choice. Please enter 1 for ascending or 2 for descending.") + # Repeat the loop to ask for a valid input print(sorted_data) save_to_csv(sorted_data) @@ -91,7 +98,25 @@ def by_country(data_1): country_list.sort() print(country_list) - country_input = input("Enter the name of the country to display movies and/or series: ").capitalize() + while True: + country_input = input("Enter the name of the country to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + matches = process.extractOne(country_input, country_list) + + if matches[1] >= 80: # Adjust the similarity threshold as needed + country_input = matches[0] + break + else: + closest_match = matches[0] + print(f"Invalid country name. The closest match is: {closest_match}") + + # Check if the entered country is correct + if country_input in country_list: + print(f"You selected: {country_input}") + else: + print(f"You entered: {country_input}, which is not in the list.") + country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)] if not country_data.empty: @@ -119,7 +144,25 @@ def genre(data_1): genre_list.sort() print(genre_list) - type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize() + while True: + type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + matches = process.extractOne(type_input, genre_list) + + if matches[1] >= 80: # Adjust the similarity threshold as needed + type_input = matches[0] + break + else: + closest_match = matches[0] + print(f"Invalid genre. The closest match is: {closest_match}") + + # Check if the entered genre is correct + if type_input in genre_list: + print(f"You selected: {type_input}") + else: + print(f"You entered: {type_input}, which is not in the list.") + type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] if not type_data.empty: @@ -128,9 +171,14 @@ def genre(data_1): else: print(f"No movies or series found for the type {type_input}.") + return + + +from fuzzywuzzy import process def duration(data_1): filtered_data = filter_media_type(data_1) + genre_list = [] for genres in data_1['listed_in'].dropna().str.split(', '): for genre in genres: @@ -141,35 +189,61 @@ def duration(data_1): genre_list.sort() print(genre_list) - type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize() - type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] + while True: + type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + matches = process.extractOne(type_input, genre_list) + + if matches[1] >= 80: # Adjust the similarity threshold as needed + type_input = matches[0] + break + else: + closest_match = matches[0] + print(f"Invalid genre. The closest match is: {closest_match}") + + # Check if the entered genre is correct + if type_input in genre_list: + print(f"You selected: {type_input}") + else: + print(f"You entered: {type_input}, which is not in the list.") + + print("What type of sorting do you want? ") + print("1. Ascending") + print("2. Descending") + + while True: + sort_order = input("Enter the sort type number (1/2): ") - print("Quel type de tri voulez-vous ? ") - print("1. Croissant") - print("2. Décroissant") - sort_order = str(input("Entrez le numéro du type de tri : ")) + if sort_order in ['1', '2']: + break + else: + print("Invalid sort order. Please enter 1 for ascending or 2 for descending.") + + type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)] if not type_data.empty: - type_data['duration'] = type_data['duration'].str.extract('(\d+)').astype(int) + print(f"\nDisplaying data for {type_input} sorted in {'ascending' if sort_order == '1' else 'descending'} order:") + + type_data.loc[:, 'duration'] = type_data['duration'].str.extract('(\\d+)').astype(int) - if sort_order.lower() == '1': + if sort_order == '1': type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, True]) - elif sort_order.lower() == '2': + elif sort_order == '2': type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, False]) - else: - print("Invalid sort order. Defaulting to ascending order.") - type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, True]) type_data_sorted['duration'] = type_data_sorted.apply( lambda row: f"{row['duration']} min" if row['type'].lower() == 'movie' else f"{row['duration']} Season", axis=1 ) - print(type_data_sorted) save_to_csv(type_data_sorted) else: print(f"No movies or series found for the type {type_input}.") + return + + def director(data_1): filtered_data = filter_media_type(data_1) @@ -183,18 +257,37 @@ def director(data_1): print("List of all possible directors: ") print(director_list) - director_input = input("Enter the name of the director to display movies and/or series: ") + while True: + director_input = input("Enter the name of the director to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + matches = process.extractOne(director_input, director_list) + + if matches[1] >= 80: # Adjust the similarity threshold as needed + director_input = matches[0] + break + else: + closest_match = matches[0] + print(f"Invalid director name. The closest match is: {closest_match}") + + # Check if the entered director is correct + if director_input in director_list: + print(f"You selected: {director_input}") + else: + print(f"You entered: {director_input}, which is not in the list.") + director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)] if not director_data.empty: + print(f"\nDisplaying data for movies and/or series directed by {director_input} sorted by release year in ascending order:") - director_data_sorted = director_data.sort_values(by='release_year', ascending=True) # see if we do in ascending or descending - + director_data_sorted = director_data.sort_values(by='release_year', ascending=True) print(director_data_sorted) save_to_csv(director_data_sorted) else: print(f"No person found with the name {director_input}.") + return def actor(data_1): filtered_data = filter_media_type(data_1) @@ -208,17 +301,38 @@ def actor(data_1): print("List of all possible actors: ") print(actor_list) - actor_input = input("Enter the name of the actor to display movies and/or series: ") + while True: + actor_input = input("Enter the name of the actor to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + matches = process.extractOne(actor_input, actor_list) + + if matches[1] >= 80: # Adjust the similarity threshold as needed + actor_input = matches[0] + break + else: + closest_match = matches[0] + print(f"Invalid actor name. The closest match is: {closest_match}") + + # Check if the entered actor is correct + if actor_input in actor_list: + print(f"You selected: {actor_input}") + else: + print(f"You entered: {actor_input}, which is not in the list.") + actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)] if not actor_data.empty: - actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True) + print(f"\nDisplaying data for movies and/or series featuring {actor_input} sorted by release year in ascending order:") + actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True) print(actor_data_sorted) save_to_csv(actor_data_sorted) else: print(f"No actor found with the name {actor_input}.") + return + def specific_genre_director(data_1): filtered_data = filter_media_type(data_1) @@ -230,13 +344,49 @@ def specific_genre_director(data_1): print("List of all available directors:") print(', '.join(unique_directors)) - director_input = input("Enter the name of the director to display movies and/or series: ") + while True: + director_input = input("Enter the name of the director to display movies and/or series: ") + + # Use FuzzyWuzzy to find the closest match + director_matches = process.extractOne(director_input, unique_directors) + + if director_matches[1] >= 80: # Adjust the similarity threshold as needed + director_input = director_matches[0] + break + else: + closest_match = director_matches[0] + print(f"Invalid director name. The closest match is: {closest_match}") + + # Check if the entered director is correct + if director_input in unique_directors: + print(f"You selected: {director_input}") + else: + print(f"You entered: {director_input}, which is not in the list.") + + # Get a list of all available types without duplicates + unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() - unique_types = filtered_data['listed_in'].unique() print("\nList of all available types:") print(', '.join(unique_types)) - type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() + while True: + type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() + + # Use FuzzyWuzzy to find the closest match + type_matches = process.extractOne(type_input, unique_types) + + if type_matches[1] >= 80: # Adjust the similarity threshold as needed + type_input = type_matches[0] + break + else: + closest_match = type_matches[0] + print(f"Invalid type. The closest match is: {closest_match}") + + # Check if the entered type is correct + if type_input in unique_types: + print(f"You selected: {type_input}") + else: + print(f"You entered: {type_input}, which is not in the list.") director_type_data = filtered_data[ (filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)) & @@ -253,38 +403,73 @@ def specific_genre_director(data_1): else: print(f"No movies or series found for the director {director_input} and type {type_input}.") + return + def specific_genre_actor(data_1): filtered_data = filter_media_type(data_1) - unique_actors = filtered_data['cast'].unique() - # Convert elements to strings to handle potential float values + # Get a list of unique actors + unique_actors = filtered_data['cast'].str.split(', ').explode().unique() unique_actors = [str(actor) for actor in unique_actors] print("List of all available actors:") print(', '.join(unique_actors)) - actor_input = input("Enter the name of the actor to display movies and/or series: ") + # Input actor name with fuzzy matching + while True: + actor_input = input("Enter the name of the actor to display movies and/or series: ") + actor_matches = process.extractOne(actor_input, unique_actors) + + if actor_matches[1] >= 80: + actor_input = actor_matches[0] + break + else: + closest_match = actor_matches[0] + print(f"Invalid actor name. The closest match is: {closest_match}") + + if actor_input in unique_actors: + print(f"You selected: {actor_input}") + else: + print(f"You entered: {actor_input}, which is not in the list.") + + # Get a list of all available types without duplicates + unique_types = filtered_data['listed_in'].str.split(', ').explode().unique() - unique_types = filtered_data['listed_in'].unique() print("\nList of all available types:") print(', '.join(unique_types)) - type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize() + # Input type with fuzzy matching + while True: + type_input = input("Enter the type (romantic, action, drama, etc.): ") + type_matches = process.extractOne(type_input, unique_types) + + if type_matches[1] >= 80: + type_input = type_matches[0] + break + else: + closest_match = type_matches[0] + print(f"Invalid type. The closest match is: {closest_match}") + + if type_input in unique_types: + print(f"You selected: {type_input}") + else: + print(f"You entered: {type_input}, which is not in the list.") + # Filter the data based on actor and type actor_type_data = filtered_data[ (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) & (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)) ] if not actor_type_data.empty: - # Display the count count = len(actor_type_data) print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.") print(actor_type_data) save_to_csv(actor_type_data) else: print(f"No movies or series found for the actor {actor_input} and type {type_input}.") + return @@ -313,17 +498,23 @@ def most_rated(data_1, data_2) : def most_rated_year(data_1, data_2): # Display all available unique release years available_years = sorted(data_1['release_year'].unique()) - print("Available years: ", available_years) # tri des dates + print("Available years: ", available_years) + + # Input year with validation + while True: + year_input = input("Enter a release year: ") - # Ask the user to enter a release year - year = input("Enter a release year: ") + try: + # Convert the input year to an integer + year = int(year_input) - try: - # Convert the year to an integer - year = int(year) - except ValueError: - print("Please enter a valid year.") - return + # Check if the entered year is in the available years + if year in available_years: + break + else: + print("Please enter a valid year from the available options.") + except ValueError: + print("Please enter a valid year.") # Filter the data based on the release year filtered_data = filter_media_type(data_1[data_1['release_year'] == year]) @@ -366,20 +557,22 @@ def parental_code(data_1): print("Valid parental codes:") print(', '.join(valid_codes)) - # Ask the user to enter a parental code - selected_code = input("Enter a parental code to display movies and/or series: ") - - # Filter the data based on the selected parental code - if selected_code in valid_codes: - result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)] - if not result_data.empty: - print(result_data) - save_to_csv(result_data) + while True: + # Ask the user to enter a parental code + selected_code = input("Enter a parental code to display movies and/or series: ") + + # Filter the data based on the selected parental code + if selected_code in valid_codes: + result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)] + if not result_data.empty: + print(result_data) + save_to_csv(result_data) + else: + print(f"No movies or series found for the parental code {selected_code}.") + break else: - print(f"No movies or series found for the parental code {selected_code}.") - else: - print("Invalid parental code entered.") - + print("Invalid parental code entered. Please enter a valid code.") + return @@ -428,16 +621,23 @@ def directors_nationality(data_1): # Allow to filter if we want movie, tv show or both def filter_media_type(data): - media_type = input("What type of media do you want to display? (Movie/TV Show/Both): ").lower() - - if media_type in ['movie', 'tv show', 'both']: - if media_type == 'both': - return data + while True: + print("Select the type of media:") + print("1. Movie") + print("2. TV Show") + print("3. Both") + + media_choice = input("Enter the corresponding number : ") + + if media_choice in ['1', '2', '3']: + if media_choice == '1': + return data[data['type'].str.lower() == 'movie'] + elif media_choice == '2': + return data[data['type'].str.lower() == 'tv show'] + else: + return data else: - return data[data['type'].str.lower() == media_type] - else: - print("Invalid choice. Displaying all types of media.") - return data # Return the original data if the media type choice is invalid + print("Invalid choice. Please enter a valid number.") diff --git a/~$Data_Base.xlsx b/~$Data_Base.xlsx deleted file mode 100644 index d10daf560a7f8247050905285b3e618c244966d6..0000000000000000000000000000000000000000 Binary files a/~$Data_Base.xlsx and /dev/null differ