Skip to content
Extraits de code Groupes Projets
Valider bf128f1a rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

last iupdate with the recommandation system

parent 20aebc34
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
Fichier ajouté
Aucun aperçu pour ce type de fichier
......@@ -474,32 +474,12 @@ def save_to_csv(data, default_filename='output.csv'):
# début de l'algorithme de recommandation
# Load the CSV file
"""
def print_matrix(data_1):
# Initialize the category matrix with zeros
unique_categories = set(category for categories in data_1['listed_in'].str.split(', ') for category in categories)
category_matrix = {category: {other_category: 0 for other_category in unique_categories} for category in unique_categories}
categories = []
# Fill the matrix with the number of movies/series for each category pair
for categories in data_1['listed_in'].str.split(', '):
for category1 in categories:
for category2 in categories:
category_matrix[category1][category2] += 1
category_df = pd.DataFrame(category_matrix)
# Save the DataFrame to an Excel file
output_file = 'category_matrix.xlsx'
category_df.to_excel(output_file, index_label='Categories')
print(f"Category matrix saved to {output_file}")
return category_matrix"""
def read_movie_series_info(file_path_1):
def read_movie_series_info(file_path):
catalog = {}
with open(file_path_1, 'r') as info_file:
with open(file_path, 'r', encoding='utf-8') as info_file:
info_reader = csv.reader(info_file)
next(info_reader) # Skip header row
for row in info_reader:
......@@ -507,57 +487,106 @@ def read_movie_series_info(file_path_1):
catalog[show_id] = [title, listed_in.split(', ')]
return catalog
def read_user_ratings(file_path_2):
def read_user_ratings(file_path):
ratings = {}
user_ids = [] # Assurez-vous d'initialiser la liste user_ids
with open(file_path_2, 'r') as ratings_file:
with open(file_path, 'r', encoding='utf-8') as ratings_file:
ratings_reader = csv.reader(ratings_file)
header = next(ratings_reader) # Lire la première ligne pour obtenir les ID des utilisateurs
user_ids = header[1:] # Les ID des utilisateurs sont dans la première ligne, sauf la première colonne (show_id)
header = next(ratings_reader) # Skip header row
user_ids = list(map(int, header[1:]))
for row in ratings_reader:
show_id = row[0]
user_ratings = list(map(int, row[1:]))
ratings[show_id] = user_ratings
return ratings, user_ids
ratings[show_id] = dict(zip(user_ids, user_ratings))
return ratings
def create_category_matrix(catalog, ratings, categories):
category_matrix = [[0 for _ in range(len(categories))] for _ in range(len(categories))]
def create_category_matrix(catalog, categories):
# Créez la matrice avec une rangée et une colonne supplémentaires pour les noms de catégories
category_matrix = [[0 for _ in range(len(categories) + 1)] for _ in range(len(categories) + 1)]
# Ajoutez les noms de catégories à la première ligne et à la première colonne
for i in range(len(categories)):
category_matrix[0][i + 1] = categories[i] # Ajoutez les noms de catégories à la première ligne
category_matrix[i + 1][0] = categories[i] # Ajoutez les noms de catégories à la première colonne
# Remplissez la matrice avec les données
for show_id, movie_categories in catalog.items():
for i in range(len(categories)):
if categories[i] in movie_categories[1]:
for j in range(len(categories)):
if categories[j] in movie_categories[1]:
category_matrix[i][j] += ratings[show_id][j]
category_matrix[i + 1][j + 1] += 1 # Commencez à remplir à partir de la deuxième ligne et de la deuxième colonne
# Ajoutez les noms de catégories à la première colonne et les données de la matrice
return category_matrix
def calculate_user_preferences(category_matrix, user_preferences, categories):
user_vector = []
print("Length of user_preferences:", len(user_preferences))
print("Length of categories:", len(categories))
def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.5):
global categories # Déclarer categories en tant que variable globale
categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) # permet d'actualiser la variable catégories global au niveau local
# Check if user_preferences is not empty and has the same length as categories
if len(user_preferences) != len(categories) or not user_preferences:
print("Error: Invalid user preferences")
return []
user_id = int(user_id) # Convertir user_id en entier
for i in range(len(categories)):
user_vector.append(sum(category_matrix[i][j] * user_preferences[j] for j in range(len(categories))))
suggestions = {}
category_index = {}
# Créer le dictionnaire pour stocker les indices des catégories
category_index = {category: i + 1 for i, category in enumerate(categories)}
for show_id, categories in catalog.items():
# Check if the user has rated the show
if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0:
# Liste des catégories communes entre le film/série et les films/séries notés par l'utilisateur
common_categories = [category for category in categories[1] if category in catalog[show_id][1]]
if common_categories:
# Calculez la similarité entre le film/série et les films/séries notés par l'utilisateur
similarity = sum(
min(category_matrix[category_index[category]][category_index[user_category]] for user_category in common_categories)
for category in categories[1]
)
# Ne recommandez que des films/séries dont la similarité dépasse le seuil spécifié
if similarity > threshold:
suggestions[show_id] = {'title': catalog[show_id][0], 'similarity': similarity}
# Triez les suggestions par similarité décroissante
sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1]['similarity'], reverse=True)
return sorted_suggestions[:5]
def recommandation_algorithm() :
# Replace file_path_1 and file_path_2 with the actual file paths
file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv"
file_path_2 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv"
user_id = input("quel est ton user ? ")
try:
user_id = int(user_id)
except ValueError:
print("Veuillez entrer un identifiant d'utilisateur valide.")
exit()
return user_vector
# Read data from CSV files
catalog = read_movie_series_info(file_path_1)
ratings = read_user_ratings(file_path_2)
# Create category matrix
categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1]))
category_matrix = create_category_matrix(catalog, categories)
def recommend_movies(catalog, ratings, category_matrix, user_preferences, categories):
recommended_movies = []
# Display movies already viewed by the user
print("Films déjà vus par l'utilisateur:")
for show_id, user_rating in ratings.items():
if user_id in user_rating and user_rating[user_id] > 0:
print(f"- {catalog[show_id][0]}")
for show_id, (title, movie_categories) in catalog.items():
if show_id not in ratings:
movie_score = sum(category_matrix[i][j] * user_preferences[j] for i, j in enumerate(categories) if categories[i] in movie_categories[1])
recommended_movies.append((title, movie_score))
# Recommend movies
recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5)
recommended_movies.sort(key=lambda x: x[1], reverse=True)
return recommended_movies[:5]
# Display top 5 recommendations
print("\nTop 5 recommandations:")
for show_id, info in recommended_movies:
print(f"Title: {info['title']}, Similarity: {info['similarity']}")
# Création du menu
......@@ -618,55 +647,8 @@ def action() :
directors_nationality(data_1)
elif command == "17" :
basic_statistics(data_1)
elif command == "test" :
# Read data from CSV files
catalog = read_movie_series_info(file_path_1)
ratings, user_ids = read_user_ratings(file_path_2)
transposed_ratings = {user_id: {} for user_id in user_ids}
for show_id, user_ratings in ratings.items():
for i, user_id in enumerate(user_ids):
transposed_ratings[user_id][show_id] = user_ratings[i]
print("User IDs in ratings:", transposed_ratings.keys())
# Check if the user ID exists in the ratings dictionary
user_id_to_recommend = input("Enter your user ID: ")
user_id_to_recommend = str(user_id_to_recommend)
# Check if the user ID exists in the ratings dictionary
if user_id_to_recommend not in transposed_ratings:
print(f"User with ID '{user_id_to_recommend}' not found in ratings.")
else:
# Get categories that exist in both catalog and user ratings
unique_types = file_path_1['listed_in'].unique()
print("Common Categories:", unique_types)
# Create category matrix
categories = list(common_categories)
print("Categories:", categories)
category_matrix = create_category_matrix(catalog, transposed_ratings, categories)
# Use user ratings as preferences
user_preferences = [transposed_ratings[user_id_to_recommend][category] for category in categories]
print("User Preferences:", user_preferences)
# Calculate user preferences
user_vector = calculate_user_preferences(category_matrix, user_preferences, categories)
# Recommend movies
recommended_movies = recommend_movies(catalog, transposed_ratings, category_matrix, user_vector, categories)
# Display top 5 recommendations
print(recommended_movies)
elif command == "18" :
user_id_to_recommend = input("Enter your user ID: ")
# Assuming 'catalog', 'ratings', 'category_matrix', 'categories' are available from your data loading
recommended_movies = recommend_movies(catalog, ratings, category_matrix, user_preferences, categories)
print(recommended_movies)
recommandation_algorithm()
elif command == "..." :
return False
......
......@@ -56,6 +56,7 @@ def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=
# Créer le dictionnaire pour stocker les indices des catégories
category_index = {category: i + 1 for i, category in enumerate(categories)}
print(category_index)
for show_id, categories in catalog.items():
# Check if the user has rated the show
......@@ -100,10 +101,10 @@ if __name__ == "__main__":
category_matrix = create_category_matrix(catalog, categories)
# Display movies already viewed by the user
print("Films déjà vus par l'utilisateur:")
"""print("Films déjà vus par l'utilisateur:")
for show_id, user_rating in ratings.items():
if user_id in user_rating and user_rating[user_id] > 0:
print(f"- {catalog[show_id][0]}")
print(f"- {catalog[show_id][0]}")"""
# Recommend movies
recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5)
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter