From fc16e100a5651823627097c4af351dafa32ef51f Mon Sep 17 00:00:00 2001
From: Adrienucl <adrien.payen@student.uclouvain.be>
Date: Tue, 26 Dec 2023 11:25:01 +0100
Subject: [PATCH] last update

---
 Dramas.csv                             |   3 +
 projet_en_groupe/algorithme_netflix.py | 336 ++++++++++++++++++++-----
 ~$Data_Base.xlsx                       | Bin 165 -> 0 bytes
 3 files changed, 271 insertions(+), 68 deletions(-)
 create mode 100644 Dramas.csv
 delete mode 100644 ~$Data_Base.xlsx

diff --git a/Dramas.csv b/Dramas.csv
new file mode 100644
index 0000000..0e95f87
--- /dev/null
+++ b/Dramas.csv
@@ -0,0 +1,3 @@
+show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
+s1237,Movie,Sentinelle,Julien Leclercq,"Olga Kurylenko, Marilyn Lima, Michel Nabokoff, Martin Swabey, Carole Weyers, Andrey Gorlenko, Antonia Malinova, Gabriel Almaer, Blaise Afonso, Guillaume Duhesme, Michel Biel",France,"March 5, 2021",2021,TV-MA,81 min,"Action & Adventure, Dramas, International Movies","Transferred home after a traumatizing combat mission, a highly trained French soldier uses her lethal skills to hunt down the man who hurt her sister."
+s2669,Movie,Earth and Blood,Julien Leclercq,"Sami Bouajila, Eriq Ebouaney, Samy Seghir, Sofia Lesaffre","France, Belgium","April 17, 2020",2020,TV-MA,81 min,"Dramas, International Movies, Thrillers",A sawmill owner and his teenage daughter become tangled in a deadly feud when a drug dealer stashes stolen cocaine on their remote property.
diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py
index 95d8900..95acc01 100644
--- a/projet_en_groupe/algorithme_netflix.py
+++ b/projet_en_groupe/algorithme_netflix.py
@@ -4,6 +4,7 @@ import pandas as pd
 import tabulate
 import os
 import csv
+from fuzzywuzzy import process
 
 #display(data_2)
 
@@ -62,14 +63,20 @@ def by_year(data_1):  # be careful and/or !!!!!
     if filtered_data is None:
         return  # Exit the function if filter_media_type returns None
 
-    sort_type = input("Do you want to sort the years in ascending or descending order? (ascending/descending)")
-    if sort_type == "ascending":
-        sorted_data = filtered_data.sort_values(by='release_year', ascending=True)
-    elif sort_type == "descending":
-        sorted_data = filtered_data.sort_values(by='release_year', ascending=False)
-    else:
-        print("Invalid choice. The dataset could not be sorted!")
-        return  # Exit the function if the sort type is invalid
+    while True:
+        print("1. Ascending")
+        print("2. Descending")
+        sort_type = input("Do you want to sort the years in ascending or descending order? enter the number : ")
+
+        if sort_type == "1":
+            sorted_data = filtered_data.sort_values(by='release_year', ascending=True)
+            break  # Sort type is valid, exit the loop
+        elif sort_type == "2":
+            sorted_data = filtered_data.sort_values(by='release_year', ascending=False)
+            break  # Sort type is valid, exit the loop
+        else:
+            print("Invalid choice. Please enter 1 for ascending or 2 for descending.")
+            # Repeat the loop to ask for a valid input
 
     print(sorted_data)
     save_to_csv(sorted_data)
@@ -91,7 +98,25 @@ def by_country(data_1):
     country_list.sort()
     print(country_list)
 
-    country_input = input("Enter the name of the country to display movies and/or series: ").capitalize()
+    while True:
+        country_input = input("Enter the name of the country to display movies and/or series: ")
+        
+        # Use FuzzyWuzzy to find the closest match
+        matches = process.extractOne(country_input, country_list)
+
+        if matches[1] >= 80:  # Adjust the similarity threshold as needed
+            country_input = matches[0]
+            break
+        else:
+            closest_match = matches[0]
+            print(f"Invalid country name. The closest match is: {closest_match}")
+
+    # Check if the entered country is correct
+    if country_input in country_list:
+        print(f"You selected: {country_input}")
+    else:
+        print(f"You entered: {country_input}, which is not in the list.")
+
     country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)]
 
     if not country_data.empty:
@@ -119,7 +144,25 @@ def genre(data_1):
     genre_list.sort()
     print(genre_list)
 
-    type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
+    while True:
+        type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ")
+
+        # Use FuzzyWuzzy to find the closest match
+        matches = process.extractOne(type_input, genre_list)
+
+        if matches[1] >= 80:  # Adjust the similarity threshold as needed
+            type_input = matches[0]
+            break
+        else:
+            closest_match = matches[0]
+            print(f"Invalid genre. The closest match is: {closest_match}")
+
+    # Check if the entered genre is correct
+    if type_input in genre_list:
+        print(f"You selected: {type_input}")
+    else:
+        print(f"You entered: {type_input}, which is not in the list.")
+
     type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
 
     if not type_data.empty:
@@ -128,9 +171,14 @@ def genre(data_1):
     else:
         print(f"No movies or series found for the type {type_input}.")
 
+    return
+
+
+from fuzzywuzzy import process
 
 def duration(data_1):
     filtered_data = filter_media_type(data_1)
+
     genre_list = []
     for genres in data_1['listed_in'].dropna().str.split(', '):
         for genre in genres:
@@ -141,35 +189,61 @@ def duration(data_1):
     genre_list.sort()
     print(genre_list)
 
-    type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
-    type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
+    while True:
+        type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ")
+
+        # Use FuzzyWuzzy to find the closest match
+        matches = process.extractOne(type_input, genre_list)
+
+        if matches[1] >= 80:  # Adjust the similarity threshold as needed
+            type_input = matches[0]
+            break
+        else:
+            closest_match = matches[0]
+            print(f"Invalid genre. The closest match is: {closest_match}")
+
+    # Check if the entered genre is correct
+    if type_input in genre_list:
+        print(f"You selected: {type_input}")
+    else:
+        print(f"You entered: {type_input}, which is not in the list.")
+
+    print("What type of sorting do you want? ")
+    print("1. Ascending")
+    print("2. Descending")
+    
+    while True:
+        sort_order = input("Enter the sort type number (1/2): ")
 
-    print("Quel type de tri voulez-vous ? ")
-    print("1. Croissant")
-    print("2. DÃ©croissant")
-    sort_order = str(input("Entrez le numÃ©ro du type de tri : "))
+        if sort_order in ['1', '2']:
+            break
+        else:
+            print("Invalid sort order. Please enter 1 for ascending or 2 for descending.")
+
+    type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
 
     if not type_data.empty:
-        type_data['duration'] = type_data['duration'].str.extract('(\d+)').astype(int)
+        print(f"\nDisplaying data for {type_input} sorted in {'ascending' if sort_order == '1' else 'descending'} order:")
+        
+        type_data.loc[:, 'duration'] = type_data['duration'].str.extract('(\\d+)').astype(int)
 
-        if sort_order.lower() == '1':
+        if sort_order == '1':
             type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, True])
-        elif sort_order.lower() == '2':
+        elif sort_order == '2':
             type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, False])
-        else:
-            print("Invalid sort order. Defaulting to ascending order.")
-            type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, True])
         
         type_data_sorted['duration'] = type_data_sorted.apply(
             lambda row: f"{row['duration']} min" if row['type'].lower() == 'movie' else f"{row['duration']} Season", axis=1
         )
 
-
         print(type_data_sorted)
         save_to_csv(type_data_sorted)
     else:
         print(f"No movies or series found for the type {type_input}.")
 
+    return
+
+
 
 def director(data_1):
     filtered_data = filter_media_type(data_1)
@@ -183,18 +257,37 @@ def director(data_1):
     print("List of all possible directors: ")
     print(director_list)
 
-    director_input = input("Enter the name of the director to display movies and/or series: ")
+    while True:
+        director_input = input("Enter the name of the director to display movies and/or series: ")
+
+        # Use FuzzyWuzzy to find the closest match
+        matches = process.extractOne(director_input, director_list)
+
+        if matches[1] >= 80:  # Adjust the similarity threshold as needed
+            director_input = matches[0]
+            break
+        else:
+            closest_match = matches[0]
+            print(f"Invalid director name. The closest match is: {closest_match}")
+
+    # Check if the entered director is correct
+    if director_input in director_list:
+        print(f"You selected: {director_input}")
+    else:
+        print(f"You entered: {director_input}, which is not in the list.")
+
     director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)]
 
     if not director_data.empty:
+        print(f"\nDisplaying data for movies and/or series directed by {director_input} sorted by release year in ascending order:")
 
-        director_data_sorted = director_data.sort_values(by='release_year', ascending=True)  # see if we do in ascending or descending
-
+        director_data_sorted = director_data.sort_values(by='release_year', ascending=True)
         print(director_data_sorted)
         save_to_csv(director_data_sorted)
     else:
         print(f"No person found with the name {director_input}.")
 
+    return
 
 def actor(data_1):
     filtered_data = filter_media_type(data_1)
@@ -208,17 +301,38 @@ def actor(data_1):
     print("List of all possible actors: ")
     print(actor_list)
 
-    actor_input = input("Enter the name of the actor to display movies and/or series: ")
+    while True:
+        actor_input = input("Enter the name of the actor to display movies and/or series: ")
+
+        # Use FuzzyWuzzy to find the closest match
+        matches = process.extractOne(actor_input, actor_list)
+
+        if matches[1] >= 80:  # Adjust the similarity threshold as needed
+            actor_input = matches[0]
+            break
+        else:
+            closest_match = matches[0]
+            print(f"Invalid actor name. The closest match is: {closest_match}")
+
+    # Check if the entered actor is correct
+    if actor_input in actor_list:
+        print(f"You selected: {actor_input}")
+    else:
+        print(f"You entered: {actor_input}, which is not in the list.")
+
     actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)]
 
     if not actor_data.empty:
-        actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True)
+        print(f"\nDisplaying data for movies and/or series featuring {actor_input} sorted by release year in ascending order:")
 
+        actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True)
         print(actor_data_sorted)
         save_to_csv(actor_data_sorted)
     else:
         print(f"No actor found with the name {actor_input}.")
 
+    return
+
 
 def specific_genre_director(data_1):
     filtered_data = filter_media_type(data_1)
@@ -230,13 +344,49 @@ def specific_genre_director(data_1):
     print("List of all available directors:")
     print(', '.join(unique_directors))
 
-    director_input = input("Enter the name of the director to display movies and/or series: ")
+    while True:
+        director_input = input("Enter the name of the director to display movies and/or series: ")
+
+        # Use FuzzyWuzzy to find the closest match
+        director_matches = process.extractOne(director_input, unique_directors)
+
+        if director_matches[1] >= 80:  # Adjust the similarity threshold as needed
+            director_input = director_matches[0]
+            break
+        else:
+            closest_match = director_matches[0]
+            print(f"Invalid director name. The closest match is: {closest_match}")
+
+    # Check if the entered director is correct
+    if director_input in unique_directors:
+        print(f"You selected: {director_input}")
+    else:
+        print(f"You entered: {director_input}, which is not in the list.")
+
+    # Get a list of all available types without duplicates
+    unique_types = filtered_data['listed_in'].str.split(', ').explode().unique()
 
-    unique_types = filtered_data['listed_in'].unique()
     print("\nList of all available types:")
     print(', '.join(unique_types))
 
-    type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
+    while True:
+        type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
+
+        # Use FuzzyWuzzy to find the closest match
+        type_matches = process.extractOne(type_input, unique_types)
+
+        if type_matches[1] >= 80:  # Adjust the similarity threshold as needed
+            type_input = type_matches[0]
+            break
+        else:
+            closest_match = type_matches[0]
+            print(f"Invalid type. The closest match is: {closest_match}")
+
+    # Check if the entered type is correct
+    if type_input in unique_types:
+        print(f"You selected: {type_input}")
+    else:
+        print(f"You entered: {type_input}, which is not in the list.")
 
     director_type_data = filtered_data[
         (filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)) &
@@ -253,38 +403,73 @@ def specific_genre_director(data_1):
     else:
         print(f"No movies or series found for the director {director_input} and type {type_input}.")
 
+    return
+
 
 def specific_genre_actor(data_1):
     filtered_data = filter_media_type(data_1)
 
-    unique_actors = filtered_data['cast'].unique()
-    # Convert elements to strings to handle potential float values
+    # Get a list of unique actors
+    unique_actors = filtered_data['cast'].str.split(', ').explode().unique()
     unique_actors = [str(actor) for actor in unique_actors]
     
     print("List of all available actors:")
     print(', '.join(unique_actors))
 
-    actor_input = input("Enter the name of the actor to display movies and/or series: ")
+    # Input actor name with fuzzy matching
+    while True:
+        actor_input = input("Enter the name of the actor to display movies and/or series: ")
+        actor_matches = process.extractOne(actor_input, unique_actors)
+
+        if actor_matches[1] >= 80:
+            actor_input = actor_matches[0]
+            break
+        else:
+            closest_match = actor_matches[0]
+            print(f"Invalid actor name. The closest match is: {closest_match}")
+
+    if actor_input in unique_actors:
+        print(f"You selected: {actor_input}")
+    else:
+        print(f"You entered: {actor_input}, which is not in the list.")
+
+    # Get a list of all available types without duplicates
+    unique_types = filtered_data['listed_in'].str.split(', ').explode().unique()
 
-    unique_types = filtered_data['listed_in'].unique()
     print("\nList of all available types:")
     print(', '.join(unique_types))
 
-    type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
+    # Input type with fuzzy matching
+    while True:
+        type_input = input("Enter the type (romantic, action, drama, etc.): ")
+        type_matches = process.extractOne(type_input, unique_types)
+
+        if type_matches[1] >= 80:
+            type_input = type_matches[0]
+            break
+        else:
+            closest_match = type_matches[0]
+            print(f"Invalid type. The closest match is: {closest_match}")
+
+    if type_input in unique_types:
+        print(f"You selected: {type_input}")
+    else:
+        print(f"You entered: {type_input}, which is not in the list.")
 
+    # Filter the data based on actor and type
     actor_type_data = filtered_data[
         (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) &
         (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False))
     ]
 
     if not actor_type_data.empty:
-        # Display the count
         count = len(actor_type_data)
         print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.")
         print(actor_type_data)
         save_to_csv(actor_type_data)
     else:
         print(f"No movies or series found for the actor {actor_input} and type {type_input}.")
+
     return
 
 
@@ -313,17 +498,23 @@ def most_rated(data_1, data_2) :
 def most_rated_year(data_1, data_2):
     # Display all available unique release years
     available_years = sorted(data_1['release_year'].unique())
-    print("Available years: ", available_years) # tri des dates 
+    print("Available years: ", available_years)
+
+    # Input year with validation
+    while True:
+        year_input = input("Enter a release year: ")
 
-    # Ask the user to enter a release year
-    year = input("Enter a release year: ")
+        try:
+            # Convert the input year to an integer
+            year = int(year_input)
 
-    try:
-        # Convert the year to an integer
-        year = int(year)
-    except ValueError:
-        print("Please enter a valid year.")
-        return
+            # Check if the entered year is in the available years
+            if year in available_years:
+                break
+            else:
+                print("Please enter a valid year from the available options.")
+        except ValueError:
+            print("Please enter a valid year.")
 
     # Filter the data based on the release year
     filtered_data = filter_media_type(data_1[data_1['release_year'] == year])
@@ -366,20 +557,22 @@ def parental_code(data_1):
     print("Valid parental codes:")
     print(', '.join(valid_codes))
 
-    # Ask the user to enter a parental code
-    selected_code = input("Enter a parental code to display movies and/or series: ")
-
-    # Filter the data based on the selected parental code
-    if selected_code in valid_codes:
-        result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)]
-        if not result_data.empty:
-            print(result_data)
-            save_to_csv(result_data)
+    while True:
+        # Ask the user to enter a parental code
+        selected_code = input("Enter a parental code to display movies and/or series: ")
+
+        # Filter the data based on the selected parental code
+        if selected_code in valid_codes:
+            result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)]
+            if not result_data.empty:
+                print(result_data)
+                save_to_csv(result_data)
+            else:
+                print(f"No movies or series found for the parental code {selected_code}.")
+            break
         else:
-            print(f"No movies or series found for the parental code {selected_code}.")
-    else:
-        print("Invalid parental code entered.")
-
+            print("Invalid parental code entered. Please enter a valid code.")
+    
     return
 
 
@@ -428,16 +621,23 @@ def directors_nationality(data_1):
 
 # Allow to filter if we want movie, tv show or both 
 def filter_media_type(data):
-    media_type = input("What type of media do you want to display? (Movie/TV Show/Both): ").lower()
-    
-    if media_type in ['movie', 'tv show', 'both']:
-        if media_type == 'both':
-            return data
+    while True:
+        print("Select the type of media:")
+        print("1. Movie")
+        print("2. TV Show")
+        print("3. Both")
+        
+        media_choice = input("Enter the corresponding number : ")
+
+        if media_choice in ['1', '2', '3']:
+            if media_choice == '1':
+                return data[data['type'].str.lower() == 'movie']
+            elif media_choice == '2':
+                return data[data['type'].str.lower() == 'tv show']
+            else:
+                return data
         else:
-            return data[data['type'].str.lower() == media_type]
-    else:
-        print("Invalid choice. Displaying all types of media.")
-        return data  # Return the original data if the media type choice is invalid
+            print("Invalid choice. Please enter a valid number.")
 
 
 
diff --git a/~$Data_Base.xlsx b/~$Data_Base.xlsx
deleted file mode 100644
index d10daf560a7f8247050905285b3e618c244966d6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 165
icmZQ~OexAt%~K!|uroL^q%agQWHO{O<S{7F7y<yg{SdVP

-- 
GitLab