diff --git a/.DS_Store b/.DS_Store index 61d4d58717b48b98fb5d3fe59cafce93e0a06cb7..450fa5260957417979a4fceeb7accfaa24bedc19 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/projet_en_groupe/.DS_Store b/projet_en_groupe/.DS_Store index 48795fe3866d137a4ccd4936b41dfc63713555ee..72b0ce1e438fdd2638258e185d10e0b3b8b707a5 100644 Binary files a/projet_en_groupe/.DS_Store and b/projet_en_groupe/.DS_Store differ diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py index 051bb9a30c73ed94cd7160fad72a13385e36fb56..e395b93b34a3066ab724df54c00782b586eefb09 100644 --- a/projet_en_groupe/algorithme_netflix.py +++ b/projet_en_groupe/algorithme_netflix.py @@ -4,28 +4,6 @@ import pandas as pd import tabulate import os -# allow us to display maximum things -#pd.set_option('display.max_rows', None) - -"""file_path_1 = "/content/drive/MyDrive/Coding_project_2023/netflix_titles-2.csv - - -data_1 = pd.read_csv(file_path_1)""" - -#display(data_1) - -"""from google.colab import drive -drive.mount('/content/drive')""" - - - -# afficher le second document - - -"""file_path_2 = "/content/drive/MyDrive/Coding_project_2023/ratings.csv - -data_2 = pd.read_csv(file_path_2)""" - #display(data_2) @@ -354,6 +332,7 @@ def parental_code(data_1): print("Here are the parental codes: ") print(code_list) save_to_csv(code_list) + return @@ -386,6 +365,7 @@ def directors_nationality(data_1): for director, info in sorted_directors: print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series") save_to_csv(sorted_directors) + return @@ -490,6 +470,93 @@ def save_to_csv(data, default_filename='output.csv'): +# début de l'algorithme de recommandation + +# Load the CSV file + +def print_matrix(data_1): + # Initialize the category matrix with zeros + unique_categories = set(category for categories in data_1['listed_in'].str.split(', ') for category in categories) + category_matrix = {category: {other_category: 0 for other_category in unique_categories} for category in unique_categories} + + # Fill the matrix with the number of movies/series for each category pair + for categories in data_1['listed_in'].str.split(', '): + for category1 in categories: + for category2 in categories: + category_matrix[category1][category2] += 1 + + category_df = pd.DataFrame(category_matrix) + + # Save the DataFrame to an Excel file + output_file = 'category_matrix.xlsx' + category_df.to_excel(output_file, index_label='Categories') + + print(f"Category matrix saved to {output_file}") + return category_matrix + + +def recommend_movies(user_ratings, category_matrix, unique_categories, num_recommendations=5): + # Calculate the weighted sum of category matrix based on user ratings + weighted_sum = {category: 0 for category in unique_categories} + for category, rating in user_ratings.items(): + if category != 'title': + for other_category in unique_categories: + weighted_sum[other_category] += rating * category_matrix[category][other_category] + + # Find top N categories + top_categories = sorted(weighted_sum, key=weighted_sum.get, reverse=True)[:num_recommendations] + + # Find movies in top categories not rated by the user + recommended_movies = [] + for category in top_categories: + for i, row in user_ratings.iterrows(): + if row['title'] not in user_ratings['title'] and category in row.values: + recommended_movies.append(row['title']) + + return recommended_movies[:num_recommendations] + + +def print_user_matrix(data_1, data_2, user_id_to_recommend, category_matrix=None, num_recommendations=5): + # Load the category matrix if not provided + if category_matrix is None: + category_matrix = print_matrix(data_1) + + linked_base = pd.merge(data_1, data_2, on='show_id') + + data_of_users = data_2.drop(['show_id', 'appreciation (%)'], axis=1) + titles = linked_base['title'] + data_of_users.insert(0, 'title', titles) + + # Pivot the DataFrame + user_item_matrix = data_of_users.set_index('title').transpose() + + unique_categories = set(category for categories in data_1['listed_in'].str.split(', ') for category in categories) + + # Get user ID from the user_id_to_recommend variable + user_id = linked_base.loc[linked_base['user_id'] == user_id_to_recommend, 'show_id'].values[0] + + # Get user ratings + user_ratings = data_of_users.loc[user_id] + + # Recommend movies for the specified user + recommendations = recommend_movies(user_ratings, category_matrix, unique_categories, num_recommendations) + + print(f"Top {num_recommendations} recommendations for user {user_id_to_recommend}:") + for movie_title in recommendations: + print(movie_title) + + # Save the transposed user-item matrix to an Excel file + user_item_matrix.to_excel("user_item_matrix.xlsx", index_label='user_id') + + print("Transposed User-item matrix saved to user_item_matrix.xlsx") + return + + + + + + + # Création du menu def action() : @@ -548,6 +615,11 @@ def action() : directors_nationality(data_1) elif command == "17" : basic_statistics(data_1) + elif command == "A" : + print_matrix(data_1) + elif command == "B" : + user_id_to_recommend = str(input("Quel utilisateur es-tu ? ")) + print_user_matrix(data_1,data_2, user_id_to_recommend) elif command == "..." : return False diff --git a/projet_en_groupe/category_matrix.xlsx b/projet_en_groupe/category_matrix.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0e3828c53cc2b42adbbe085683e1bfd2a8a16f4c Binary files /dev/null and b/projet_en_groupe/category_matrix.xlsx differ diff --git a/projet_en_groupe/excel_filename.xlsx b/projet_en_groupe/excel_filename.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..56a3d8fa3d278a792946f2b9932f6fc7e9b9a832 Binary files /dev/null and b/projet_en_groupe/excel_filename.xlsx differ diff --git a/projet_en_groupe/~$excel_filename.xlsx b/projet_en_groupe/~$excel_filename.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d10daf560a7f8247050905285b3e618c244966d6 Binary files /dev/null and b/projet_en_groupe/~$excel_filename.xlsx differ