diff --git a/.DS_Store b/.DS_Store index 476cff3271cdbbab862a81ad2a8e56af759ca6b7..a05bdbf17207db37fd9f7d4ef9dab08547ed1654 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/Data_Base.xlsx b/Data_Base.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..467cf6659b1278f0a87da721fbdd0895369da3f1 Binary files /dev/null and b/Data_Base.xlsx differ diff --git a/projet_en_groupe/.DS_Store b/projet_en_groupe/.DS_Store index cee0b65dddcd5835972253a55dd2037bd4548c61..002561828bb5df759e0eb963818a54096c654419 100644 Binary files a/projet_en_groupe/.DS_Store and b/projet_en_groupe/.DS_Store differ diff --git a/projet_en_groupe/algorithme_netflix.py b/projet_en_groupe/algorithme_netflix.py index a1dfdc15e5d426f822eac5a4cc104d8cce503bcd..911459a9517246c68a6a59a26f7a54a49017658f 100644 --- a/projet_en_groupe/algorithme_netflix.py +++ b/projet_en_groupe/algorithme_netflix.py @@ -474,32 +474,12 @@ def save_to_csv(data, default_filename='output.csv'): # début de l'algorithme de recommandation # Load the CSV file -""" -def print_matrix(data_1): - # Initialize the category matrix with zeros - unique_categories = set(category for categories in data_1['listed_in'].str.split(', ') for category in categories) - category_matrix = {category: {other_category: 0 for other_category in unique_categories} for category in unique_categories} +categories = [] - # Fill the matrix with the number of movies/series for each category pair - for categories in data_1['listed_in'].str.split(', '): - for category1 in categories: - for category2 in categories: - category_matrix[category1][category2] += 1 - category_df = pd.DataFrame(category_matrix) - - # Save the DataFrame to an Excel file - output_file = 'category_matrix.xlsx' - category_df.to_excel(output_file, index_label='Categories') - - print(f"Category matrix saved to {output_file}") - return category_matrix""" - - - -def read_movie_series_info(file_path_1): +def read_movie_series_info(file_path): catalog = {} - with open(file_path_1, 'r') as info_file: + with open(file_path, 'r', encoding='utf-8') as info_file: info_reader = csv.reader(info_file) next(info_reader) # Skip header row for row in info_reader: @@ -507,57 +487,106 @@ def read_movie_series_info(file_path_1): catalog[show_id] = [title, listed_in.split(', ')] return catalog -def read_user_ratings(file_path_2): +def read_user_ratings(file_path): ratings = {} - user_ids = [] # Assurez-vous d'initialiser la liste user_ids - with open(file_path_2, 'r') as ratings_file: + with open(file_path, 'r', encoding='utf-8') as ratings_file: ratings_reader = csv.reader(ratings_file) - header = next(ratings_reader) # Lire la première ligne pour obtenir les ID des utilisateurs - user_ids = header[1:] # Les ID des utilisateurs sont dans la première ligne, sauf la première colonne (show_id) + header = next(ratings_reader) # Skip header row + user_ids = list(map(int, header[1:])) + for row in ratings_reader: show_id = row[0] user_ratings = list(map(int, row[1:])) - ratings[show_id] = user_ratings - return ratings, user_ids + ratings[show_id] = dict(zip(user_ids, user_ratings)) + + return ratings -def create_category_matrix(catalog, ratings, categories): - category_matrix = [[0 for _ in range(len(categories))] for _ in range(len(categories))] +def create_category_matrix(catalog, categories): + # Créez la matrice avec une rangée et une colonne supplémentaires pour les noms de catégories + category_matrix = [[0 for _ in range(len(categories) + 1)] for _ in range(len(categories) + 1)] + # Ajoutez les noms de catégories à la première ligne et à la première colonne + for i in range(len(categories)): + category_matrix[0][i + 1] = categories[i] # Ajoutez les noms de catégories à la première ligne + category_matrix[i + 1][0] = categories[i] # Ajoutez les noms de catégories à la première colonne + + # Remplissez la matrice avec les données for show_id, movie_categories in catalog.items(): for i in range(len(categories)): if categories[i] in movie_categories[1]: for j in range(len(categories)): if categories[j] in movie_categories[1]: - category_matrix[i][j] += ratings[show_id][j] - + category_matrix[i + 1][j + 1] += 1 # Commencez à remplir à partir de la deuxième ligne et de la deuxième colonne + # Ajoutez les noms de catégories à la première colonne et les données de la matrice return category_matrix -def calculate_user_preferences(category_matrix, user_preferences, categories): - user_vector = [] - print("Length of user_preferences:", len(user_preferences)) - print("Length of categories:", len(categories)) +def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.5): + global categories # Déclarer categories en tant que variable globale + categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) # permet d'actualiser la variable catégories global au niveau local - # Check if user_preferences is not empty and has the same length as categories - if len(user_preferences) != len(categories) or not user_preferences: - print("Error: Invalid user preferences") - return [] + user_id = int(user_id) # Convertir user_id en entier - for i in range(len(categories)): - user_vector.append(sum(category_matrix[i][j] * user_preferences[j] for j in range(len(categories)))) + suggestions = {} + category_index = {} + + # Créer le dictionnaire pour stocker les indices des catégories + category_index = {category: i + 1 for i, category in enumerate(categories)} + for show_id, categories in catalog.items(): + # Check if the user has rated the show + if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0: + # Liste des catégories communes entre le film/série et les films/séries notés par l'utilisateur + common_categories = [category for category in categories[1] if category in catalog[show_id][1]] + if common_categories: + # Calculez la similarité entre le film/série et les films/séries notés par l'utilisateur + similarity = sum( + min(category_matrix[category_index[category]][category_index[user_category]] for user_category in common_categories) + for category in categories[1] + ) + + # Ne recommandez que des films/séries dont la similarité dépasse le seuil spécifié + if similarity > threshold: + suggestions[show_id] = {'title': catalog[show_id][0], 'similarity': similarity} + + # Triez les suggestions par similarité décroissante + sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1]['similarity'], reverse=True) + + return sorted_suggestions[:5] + +def recommandation_algorithm() : + + # Replace file_path_1 and file_path_2 with the actual file paths + file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv" + file_path_2 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv" + + user_id = input("quel est ton user ? ") + + try: + user_id = int(user_id) + except ValueError: + print("Veuillez entrer un identifiant d'utilisateur valide.") + exit() - return user_vector + # Read data from CSV files + catalog = read_movie_series_info(file_path_1) + ratings = read_user_ratings(file_path_2) + # Create category matrix + categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) + category_matrix = create_category_matrix(catalog, categories) -def recommend_movies(catalog, ratings, category_matrix, user_preferences, categories): - recommended_movies = [] + # Display movies already viewed by the user + print("Films déjà vus par l'utilisateur:") + for show_id, user_rating in ratings.items(): + if user_id in user_rating and user_rating[user_id] > 0: + print(f"- {catalog[show_id][0]}") - for show_id, (title, movie_categories) in catalog.items(): - if show_id not in ratings: - movie_score = sum(category_matrix[i][j] * user_preferences[j] for i, j in enumerate(categories) if categories[i] in movie_categories[1]) - recommended_movies.append((title, movie_score)) + # Recommend movies + recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5) - recommended_movies.sort(key=lambda x: x[1], reverse=True) - return recommended_movies[:5] + # Display top 5 recommendations + print("\nTop 5 recommandations:") + for show_id, info in recommended_movies: + print(f"Title: {info['title']}, Similarity: {info['similarity']}") # Création du menu @@ -618,55 +647,8 @@ def action() : directors_nationality(data_1) elif command == "17" : basic_statistics(data_1) - elif command == "test" : - # Read data from CSV files - catalog = read_movie_series_info(file_path_1) - ratings, user_ids = read_user_ratings(file_path_2) - - transposed_ratings = {user_id: {} for user_id in user_ids} - for show_id, user_ratings in ratings.items(): - for i, user_id in enumerate(user_ids): - transposed_ratings[user_id][show_id] = user_ratings[i] - - print("User IDs in ratings:", transposed_ratings.keys()) - # Check if the user ID exists in the ratings dictionary - user_id_to_recommend = input("Enter your user ID: ") - user_id_to_recommend = str(user_id_to_recommend) - - # Check if the user ID exists in the ratings dictionary - if user_id_to_recommend not in transposed_ratings: - print(f"User with ID '{user_id_to_recommend}' not found in ratings.") - else: - # Get categories that exist in both catalog and user ratings - unique_types = file_path_1['listed_in'].unique() - - print("Common Categories:", unique_types) - - # Create category matrix - categories = list(common_categories) - print("Categories:", categories) - - category_matrix = create_category_matrix(catalog, transposed_ratings, categories) - - - # Use user ratings as preferences - user_preferences = [transposed_ratings[user_id_to_recommend][category] for category in categories] - print("User Preferences:", user_preferences) - - # Calculate user preferences - user_vector = calculate_user_preferences(category_matrix, user_preferences, categories) - - # Recommend movies - recommended_movies = recommend_movies(catalog, transposed_ratings, category_matrix, user_vector, categories) - - # Display top 5 recommendations - print(recommended_movies) - elif command == "18" : - user_id_to_recommend = input("Enter your user ID: ") - # Assuming 'catalog', 'ratings', 'category_matrix', 'categories' are available from your data loading - recommended_movies = recommend_movies(catalog, ratings, category_matrix, user_preferences, categories) - print(recommended_movies) + recommandation_algorithm() elif command == "..." : return False diff --git a/projet_en_groupe/recom_sys.py b/projet_en_groupe/recom_sys.py index f61d2a2d742f4a0f599285328ba22d2b3bb9edf4..28a982b399fcdad34ea2a053e59b729c669b1c82 100644 --- a/projet_en_groupe/recom_sys.py +++ b/projet_en_groupe/recom_sys.py @@ -56,6 +56,7 @@ def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold= # Créer le dictionnaire pour stocker les indices des catégories category_index = {category: i + 1 for i, category in enumerate(categories)} + print(category_index) for show_id, categories in catalog.items(): # Check if the user has rated the show @@ -100,10 +101,10 @@ if __name__ == "__main__": category_matrix = create_category_matrix(catalog, categories) # Display movies already viewed by the user - print("Films déjà vus par l'utilisateur:") + """print("Films déjà vus par l'utilisateur:") for show_id, user_rating in ratings.items(): if user_id in user_rating and user_rating[user_id] > 0: - print(f"- {catalog[show_id][0]}") + print(f"- {catalog[show_id][0]}")""" # Recommend movies recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5)