last update

db25e628 · Adrien Payen · 76a0ab2a · db25e628 · db25e628 · db25e628
--- a/.DS_Store
+++ b/.DS_Store
--- a/matrice_cat.xlsx
+++ b/matrice_cat.xlsx
--- a/matrix_category.xlsx
+++ b/matrix_category.xlsx
--- a/projet_en_groupe/.DS_Store
+++ b/projet_en_groupe/.DS_Store
--- a/projet_en_groupe/algorithme_netflix.py
+++ b/projet_en_groupe/algorithme_netflix.py
@@ -31,10 +31,12 @@ def movies(data_1): # enregistrement
    films = data_1[data_1['type'] == 'Movie']  # Filter the data to include only movies
    movie_titles = films['title'].tolist()  # Extract movie titles

+    movie_df = pd.DataFrame({'Movie Titles': movie_titles})
+
    # afficher que les 100 premiers

-    print(movie_titles)  # Display movie titles
-    save_to_csv(movie_titles)
+    print(movie_df)  # Display movie titles
+    save_to_csv(movie_df)

    return  # Be careful, you need to ask each time if they want to save the list to a .csv

@@ -44,9 +46,11 @@ def series(data_1):
    series = data_1[data_1['type'] == 'TV Show']  # Filter the data to include only series
    series_titles = series['title'].tolist()  # Extract series titles

-    print(series_titles)  # Display series titles
+    series_df = pd.DataFrame({'Movie Titles': series_titles})
+
+    print(series_df)  # Display series titles

-    save_to_csv(series_titles)
+    save_to_csv(series_df)

    return  # Be careful, you need to ask each time if they want to save the list to a .csv

@@ -77,10 +81,11 @@ def by_country(data_1):
    filtered_data = filter_media_type(data_1)

    country_list = []
-    for countries in filtered_data['country'].dropna().str.split(', '):
+    for countries in filtered_data['country'].dropna().str.split(','):
        for country in countries:
-            if country not in country_list and country != '':
-                country_list.append(country)
+            cleaned_country = country.strip()  # Remove leading and trailing spaces
+            if cleaned_country and cleaned_country not in country_list:
+                country_list.append(cleaned_country)

    print("List of all available countries:")
    country_list.sort()
@@ -94,9 +99,9 @@ def by_country(data_1):
        save_to_csv(country_data)
    else:
        print(f"No movies or series found for the country {country_input}.")
-    

    return
+
 # Be careful, you need to ask each time if they want to save the list to a .csv


@@ -139,9 +144,19 @@ def duration(data_1):
    type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
    type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]

-    if not type_data.empty:
+    print("Quel type de tri voulez-vous ? ")
+    print("1. Croissant")
+    print("2. Décroissant")
+    sort_order = str(input("Entrez le numéro du type de tri : "))

-        type_data_sorted = type_data.sort_values(by='duration', ascending=True)  # see if we do in ascending or descending
+    if not type_data.empty:
+        if sort_order.lower() == '1':
+            type_data_sorted = type_data.sort_values(by='duration', ascending=True)
+        elif sort_order.lower() == '2':
+            type_data_sorted = type_data.sort_values(by='duration', ascending=False)
+        else:
+            print("Invalid sort order. Defaulting to ascending order.")
+            type_data_sorted = type_data.sort_values(by='duration', ascending=True)

        print(type_data_sorted)
        save_to_csv(type_data_sorted)
@@ -149,6 +164,7 @@ def duration(data_1):
        print(f"No movies or series found for the type {type_input}.")


+
 def director(data_1):
    filtered_data = filter_media_type(data_1)

@@ -202,6 +218,9 @@ def specific_genre_director(data_1):
    filtered_data = filter_media_type(data_1)

    unique_directors = filtered_data['director'].unique()
+    # Convert elements to strings to handle potential float values
+    unique_directors = [str(director) for director in unique_directors]
+    
    print("List of all available directors:")
    print(', '.join(unique_directors))

@@ -233,6 +252,9 @@ def specific_genre_actor(data_1):
    filtered_data = filter_media_type(data_1)

    unique_actors = filtered_data['cast'].unique()
+    # Convert elements to strings to handle potential float values
+    unique_actors = [str(actor) for actor in unique_actors]
+    
    print("List of all available actors:")
    print(', '.join(unique_actors))

@@ -260,6 +282,7 @@ def specific_genre_actor(data_1):
    return


+
 # rating

 # these are variables that needs to be registered in general not in a local function
@@ -283,7 +306,7 @@ def most_rated(data_1, data_2) :

 def most_rated_year(data_1, data_2):
    # Display all available unique release years
-    available_years = data_1['release_year'].unique()
+    available_years = sorted(data_1['release_year'].unique())
    print("Available years: ", available_years) # tri des dates 

    # Ask the user to enter a release year
@@ -328,52 +351,74 @@ def most_rated_recent(data_1, data_2):

 # Example usage

-def parental_code(data_1): # s5542 bug concernant les virgules
+def parental_code(data_1):
+    valid_codes = set(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R', 'TV-G', 'G', 'NC-17', 'NR', 'TV-Y7-FV', 'UR'])

-    code_list = []
-    for codes in data_1['rating'].dropna().str.split(', '):
-        for code in codes:
-            if code not in code_list and code != '':
-                code_list.append(code)
-    print("Here are the parental codes: ")
-    print(code_list)
-    save_to_csv(code_list)
-    return
+    # Filter out entries that are not valid parental codes
+    filtered_data = data_1[data_1['rating'].isin(valid_codes)]
+
+    print("Valid parental codes:")
+    print(', '.join(valid_codes))

+    # Ask the user to enter a parental code
+    selected_code = input("Enter a parental code to display movies and/or series: ")

+    # Filter the data based on the selected parental code
+    if selected_code in valid_codes:
+        result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)]
+        if not result_data.empty:
+            print(result_data)
+            save_to_csv(result_data)
+        else:
+            print(f"No movies or series found for the parental code {selected_code}.")
+    else:
+        print("Invalid parental code entered.")
+
+    return

-  #code_parental = input("Entrez le code de contrôle parental : PG-13, TV-MA")

 def directors_nationality(data_1):
-    # Check if the 'director' column exists in the dataset
-    if 'director' not in data_1.columns:
-        print("The dataset does not contain a 'director' column.")
+    # Check if the 'director' and 'country' columns exist in the dataset
+    if 'director' not in data_1.columns or 'country' not in data_1.columns:
+        print("The dataset does not contain 'director' or 'country' columns.")
        return

    # Extract unique directors and their respective nationalities
    directors_nationality_dict = {}
    for index, row in data_1.iterrows():
-        directors = row['director'].split(', ')
+        directors = str(row['director']).split(', ') if pd.notna(row['director']) else []
        nationality = row['country']

        for director in directors:
            if director in directors_nationality_dict:
-                directors_nationality_dict[director]['nationalities'].add(nationality)
-                directors_nationality_dict[director]['count'] += 1
+                # Use a set to store unique nationalities for each director
+                directors_nationality_dict[director]['nationalities'].add(str(nationality))
+                directors_nationality_dict[director]['number of movies or series'] += 1
            else:
-                directors_nationality_dict[director] = {'nationalities': {nationality}, 'count': 1}
+                directors_nationality_dict[director] = {'nationalities': set(), 'number of movies or series': 1}
+                directors_nationality_dict[director]['nationalities'].add(str(nationality))
+
+    # Remove duplicates from the nationalities list for each director
+    for director_info in directors_nationality_dict.values():
+        director_info['nationalities'] = list(set(director_info['nationalities']))

    # Sort the directors by the number of movies and series produced
-    sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['count'], reverse=True)
+    sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True)
+
+    columns = ['director', 'nationalities', 'number of movies or series']
+    directors_df = pd.DataFrame([[director, ', '.join(info['nationalities']), info['number of movies or series']] for director, info in sorted_directors], columns=columns)

    # Display the list of directors and their nationalities
    print("Directors and their nationalities, sorted by the number of movies and series produced:")
    for director, info in sorted_directors:
-        print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series")
-    save_to_csv(sorted_directors)
-    return
+        director_name = str(director) if pd.notna(director) else 'Unknown'
+        nationalities_str = ', '.join(str(n) for n in info['nationalities'])
+        print(f"{director_name}: {nationalities_str} - {info['number of movies or series']} movies/series")

+    # Save to CSV using the DataFrame
+    save_to_csv(directors_df)  # Commenting this out since the 'save_to_csv' function is not provided

+    return directors_df



@@ -457,26 +502,16 @@ def save_to_csv(data, default_filename='output.csv'):
                new_filename = new_filename + ".csv"
                data.to_csv(new_filename, index=False)
                print(f"Data saved to {new_filename}")
-
-                # Ask if the user wants to open the file
-                open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
-                if open_choice == 'YES':
-                    os.system(file_name)
        else:
            # Save to a new file
            data.to_csv(file_name, index=False)
            print(f"Data saved to {file_name}")

-            # Ask if the user wants to open the file
-            open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
-            if open_choice == 'YES':
-                os.system(file_name)
    else:
        print("Data not saved.")



-
 # début de l'algorithme de recommandation 

 # Load the CSV file