From f14b2969b1d0db09541c6204c2be7e59a7215228 Mon Sep 17 00:00:00 2001
From: Adrienucl <adrien.payen@student.uclouvain.be>
Date: Mon, 4 Dec 2023 13:20:24 +0100
Subject: [PATCH] first part 90% done

---
 projet_en_groupe/algorithme_netflix.py | 110 ++++++++++++-------------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py
index c4392c5..051bb9a 100644
--- a/projet_en_groupe/algorithme_netflix.py
+++ b/projet_en_groupe/algorithme_netflix.py
@@ -273,80 +273,73 @@ def specific_genre_actor(data_1):
         save_to_csv(actor_type_data)
     else:
         print(f"No movies or series found for the actor {actor_input} and type {type_input}.")
+    return
 
 
-def most_rated(data_1):
-    filtered_data = filter_media_type(data_1)
+# rating
 
-    # Check if the 'rating' column exists in the dataset
-    if 'rating' not in filtered_data.columns:
-        print("The dataset does not contain a 'rating' column.")
-        return
+# these are variables that needs to be registered in general not in a local function
+notes = data_2.drop('show_id', axis = 1)
+mean_type = notes.mean(axis = 1) * 100
+data_2['appreciation (%)'] = mean_type
 
-    # Sort the data by the 'rating' column in descending order
-    sorted_data = filtered_data.sort_values(by='rating', ascending=False)
+def most_rated(data_1, data_2) :
 
-    # Display the top-rated movies and/or series
-    if not sorted_data.empty:
-        print("Top-rated movies and/or series:")
-        print(sorted_data[['title', 'type', 'rating']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print("No rated movies or series found.")
+    filtered_data = filter_media_type(data_1)
 
+    link_between =  pd.merge(filtered_data,data_2, on='show_id')
 
-def most_rated_year(data_1):
-    filtered_data = filter_media_type(data_1)
+    link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
 
-    # Check if the 'rating' and 'release_year' columns exist in the dataset
-    if 'rating' not in filtered_data.columns or 'release_year' not in filtered_data.columns:
-        print("The dataset does not contain the necessary columns.")
-        return
+    print("Films et séries les mieux notés :")
+    print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']])
+    save_to_csv(link_between_sorted)
+    return
 
-    year_input = input("Enter the year to show the most rated movies and/or series: ")
+
+def most_rated_year(data_1, data_2):
+    # Display all available unique release years
+    available_years = data_1['release_year'].unique()
+    print("Available years: ", available_years)
+
+    # Ask the user to enter a release year
+    year = input("Enter a release year: ")
 
     try:
-        year_input = int(year_input)
+        # Convert the year to an integer
+        year = int(year)
     except ValueError:
-        print("Invalid input. Please enter a valid year.")
+        print("Please enter a valid year.")
         return
 
-    # Filter the data for the specified year
-    year_data = filtered_data[filtered_data['release_year'] == year_input]
-
-    if not year_data.empty:
-        # Sort the data by the 'rating' column in descending order
-        sorted_data = year_data.sort_values(by='rating', ascending=False)
+    # Filter the data based on the release year
+    filtered_data = filter_media_type(data_1[data_1['release_year'] == year])
 
-        # Display the top-rated movies and/or series in the specified year
-        print(f"\nTop-rated movies and/or series in {year_input}:")
-        print(sorted_data[['title', 'type', 'rating']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print(f"No rated movies or series found for the year {year_input}.")
-
-
-def most_rated_recent(data_1):
-    filtered_data = filter_media_type(data_1)
+    # Merge the DataFrames on the 'show_id' key
+    link_between = pd.merge(filtered_data, data_2, on='show_id')
 
-    # Check if the 'rating' and 'date_added' columns exist in the dataset
-    if 'rating' not in filtered_data.columns or 'date_added' not in filtered_data.columns:
-        print("The dataset does not contain the necessary columns.")
-        return
+    # Sort the DataFrame by the 'appreciation' column (in descending order)
+    link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
 
-    # Sort the data by the 'rating' column in descending order and 'date_added' in descending order
-    sorted_data = filtered_data.sort_values(by=['rating', 'date_added'], descending=[False, True])
+    print(f"Top-rated shows for the year {year}:")
+    print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
+    save_to_csv(link_between_sorted)
+    return
 
-    # Display the most rated and recent movies and/or series
-    if not sorted_data.empty:
-        print("Most rated and recent movies and/or series:")
-        print(sorted_data[['title', 'type', 'rating', 'date_added']].head(10))  # Display the top 10
-        save_to_csv(sorted_data)
-    else:
-        print("No rated and recent movies or series found.")
 
+def most_rated_recent(data_1, data_2):
+    # Merge the DataFrames on the 'show_id' key
+    merged_data = pd.merge(data_1, data_2, on='show_id')
 
+    # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order)
+    sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False])
 
+    # Display the most rated and recent shows
+    top_20_data = sorted_data.head(20)
+    print("Top 20 most rated and recent shows:")
+    print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
+    save_to_csv(top_20_data)
+    return
 
 
 # Example usage
@@ -360,6 +353,7 @@ def parental_code(data_1):
                 code_list.append(code)
     print("Here are the parental codes: ")
     print(code_list)
+    save_to_csv(code_list)
 
 
 
@@ -440,9 +434,7 @@ def basic_statistics(data_1):
     country_counts = data_1['country'].str.split(', ').explode().value_counts()
     print("\nCountries that produced movies/series, sorted from most to least productive:")
     print(country_counts)
-    save_to_csv(pd.DataFrame({'Country Count' : [country_counts], 'Movies Count': [movies_count], 'Series Count': [series_count]}), 'counts.csv')
-
-
+    return
 
 
  # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
@@ -545,11 +537,11 @@ def action() :
   elif command == "11" :
     specific_genre_actor(data_1)
   elif command == "12" :
-    most_rated(data_1)
+    most_rated(data_1, data_2)
   elif command == "13" :
-    most_rated_year(data_1)
+    most_rated_year(data_1, data_2)
   elif command == "14" :
-    most_rated_recent(data_1)
+    most_rated_recent(data_1, data_2)
   elif command == "15" :
     parental_code(data_1)
   elif command == "16" :
-- 
GitLab