first part 90% done

f14b2969 · Adrien Payen · 343eefbf · f14b2969
--- a/projet_en_groupe/algorithme_netflix.py
+++ b/projet_en_groupe/algorithme_netflix.py
@@ -273,80 +273,73 @@ def specific_genre_actor(data_1):
        save_to_csv(actor_type_data)
    else:
        print(f"No movies or series found for the actor {actor_input} and type {type_input}.")
+    return
-def most_rated(data_1):
+# rating
-    filtered_data = filter_media_type(data_1)
-    # Check if the 'rating' column exists in the dataset
+# these are variables that needs to be registered in general not in a local function
-    if 'rating' not in filtered_data.columns:
+notes = data_2.drop('show_id', axis = 1)
-        print("The dataset does not contain a 'rating' column.")
+mean_type = notes.mean(axis = 1) * 100
-        return
+data_2['appreciation (%)'] = mean_type
-    # Sort the data by the 'rating' column in descending order
+def most_rated(data_1, data_2) :
-    sorted_data = filtered_data.sort_values(by='rating', ascending=False)
-    # Display the top-rated movies and/or series
+    filtered_data = filter_media_type(data_1)
-    if not sorted_data.empty:
-        print("Top-rated movies and/or series:")
-        print(sorted_data[['title', 'type', 'rating']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print("No rated movies or series found.")
+    link_between =  pd.merge(filtered_data,data_2, on='show_id')
-def most_rated_year(data_1):
+    link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
-    filtered_data = filter_media_type(data_1)
-    # Check if the 'rating' and 'release_year' columns exist in the dataset
+    print("Films et séries les mieux notés :")
-    if 'rating' not in filtered_data.columns or 'release_year' not in filtered_data.columns:
+    print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']])
-        print("The dataset does not contain the necessary columns.")
+    save_to_csv(link_between_sorted)
-        return
+    return
-    year_input = input("Enter the year to show the most rated movies and/or series: ")
+def most_rated_year(data_1, data_2):
+    # Display all available unique release years
+    available_years = data_1['release_year'].unique()
+    print("Available years: ", available_years)
+    # Ask the user to enter a release year
+    year = input("Enter a release year: ")
    try:
-        year_input = int(year_input)
+        # Convert the year to an integer
+        year = int(year)
    except ValueError:
-        print("Invalid input. Please enter a valid year.")
+        print("Please enter a valid year.")
        return
-    # Filter the data for the specified year
+    # Filter the data based on the release year
-    year_data = filtered_data[filtered_data['release_year'] == year_input]
+    filtered_data = filter_media_type(data_1[data_1['release_year'] == year])
-    if not year_data.empty:
-        # Sort the data by the 'rating' column in descending order
-        sorted_data = year_data.sort_values(by='rating', ascending=False)
-        # Display the top-rated movies and/or series in the specified year
+    # Merge the DataFrames on the 'show_id' key
-        print(f"\nTop-rated movies and/or series in {year_input}:")
+    link_between = pd.merge(filtered_data, data_2, on='show_id')
-        print(sorted_data[['title', 'type', 'rating']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print(f"No rated movies or series found for the year {year_input}.")
-def most_rated_recent(data_1):
-    filtered_data = filter_media_type(data_1)
-    # Check if the 'rating' and 'date_added' columns exist in the dataset
+    # Sort the DataFrame by the 'appreciation' column (in descending order)
-    if 'rating' not in filtered_data.columns or 'date_added' not in filtered_data.columns:
+    link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
-        print("The dataset does not contain the necessary columns.")
-        return
-    # Sort the data by the 'rating' column in descending order and 'date_added' in descending order
+    print(f"Top-rated shows for the year {year}:")
-    sorted_data = filtered_data.sort_values(by=['rating', 'date_added'], descending=[False, True])
+    print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
+    save_to_csv(link_between_sorted)
+    return
-    # Display the most rated and recent movies and/or series
-    if not sorted_data.empty:
-        print("Most rated and recent movies and/or series:")
-        print(sorted_data[['title', 'type', 'rating', 'date_added']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print("No rated and recent movies or series found.")
+def most_rated_recent(data_1, data_2):
+    # Merge the DataFrames on the 'show_id' key
+    merged_data = pd.merge(data_1, data_2, on='show_id')
+    # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order)
+    sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False])
+    # Display the most rated and recent shows
+    top_20_data = sorted_data.head(20)
+    print("Top 20 most rated and recent shows:")
+    print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
+    save_to_csv(top_20_data)
+    return
 # Example usage
@@ -360,6 +353,7 @@ def parental_code(data_1):
                code_list.append(code)
    print("Here are the parental codes: ")
    print(code_list)
+    save_to_csv(code_list)
@@ -440,9 +434,7 @@ def basic_statistics(data_1):
    country_counts = data_1['country'].str.split(', ').explode().value_counts()
    print("\nCountries that produced movies/series, sorted from most to least productive:")
    print(country_counts)
-    save_to_csv(pd.DataFrame({'Country Count' : [country_counts], 'Movies Count': [movies_count], 'Series Count': [series_count]}), 'counts.csv')
+    return
 # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
@@ -545,11 +537,11 @@ def action() :
  elif command == "11" :
    specific_genre_actor(data_1)
  elif command == "12" :
-    most_rated(data_1)
+    most_rated(data_1, data_2)
  elif command == "13" :
-    most_rated_year(data_1)
+    most_rated_year(data_1, data_2)
  elif command == "14" :
-    most_rated_recent(data_1)
+    most_rated_recent(data_1, data_2)
  elif command == "15" :
    parental_code(data_1)
  elif command == "16" :