diff --git a/recommender.py b/recommender.py index bf407ea688427efd6bbff0a7328c9ae7c0fe0bbc..e37cc25954a026dd50d3b42636ba7fbc23eb1ec0 100644 --- a/recommender.py +++ b/recommender.py @@ -7,6 +7,7 @@ from collections import defaultdict # Third-party imports import numpy as np import pandas as pd +import matplotlib.pyplot as plt from scipy.stats import pearsonr from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor @@ -20,7 +21,6 @@ from sklearn.svm import SVR from sklearn.tree import DecisionTreeRegressor from surprise import AlgoBase, KNNWithMeans, accuracy, PredictionImpossible from xgboost import XGBRegressor -from lightgbm import LGBMRegressor # Local imports from constants import Constant as C @@ -315,6 +315,19 @@ class RecommenderSystem_KNN : predictions = self.model.test(self.testset) self.rmse = accuracy.rmse(predictions) + def evaluate_rmse(self, testset): + """ + Evaluate the RMSE of the model on a test set. + + Args: + testset (list): List of testset entries containing (user, item, rating). + + Returns: + float: RMSE of the model. + """ + predictions = self.model.test(self.testset) + return accuracy.rmse(predictions, verbose=True) + def evaluate_mae(self): """ Evaluate the MAE of the model on the test set. @@ -400,7 +413,49 @@ class RecommenderSystem_KNN : average_distance = 0.0 return average_distance - + + def evaluate_knn_rmse_for_different_k(self): + """ + Evaluate the RMSE of the KNN model for different values of k. + """ + # Charger les données (par exemple, à partir d'un fichier de test de Surprise) + + # Split data into training and testing sets + surprise_data = load_ratings(surprise_format=True) + self.trainset = surprise_data.build_full_trainset() + self.testset = self.trainset.build_anti_testset() + + # Valeurs de k à tester + k_values = range(1, 81, 10) + rmse_values = [] + + # Évaluer le modèle pour chaque valeur de k + for k in k_values: + print(f"Évaluating for k={k}") + sim_options = { + 'name': 'msd', + 'user_based': True + } + algo = KNNWithMeans(sim_options=sim_options, k=k, min_k=k) + algo.fit(self.trainset) + predictions = algo.test(self.testset) + rmse = accuracy.rmse(predictions, verbose=False) + rmse_values.append(rmse) + print(f"k={k}, RMSE={rmse}") + + # Tracer le graphique de l'évolution du RMSE en fonction de k + plt.figure(figsize=(10, 6)) + plt.plot(k_values, rmse_values, marker='o') + plt.title('Évolution du RMSE en fonction de k') + plt.xlabel('Nombre de voisins (k)') + plt.ylabel('RMSE') + plt.grid(True) + plt.show() + +# # Utilisation de la classe RecommenderSystem_KNN +recommender = RecommenderSystem_KNN(ratings_path='data/small/evidence/ratings.csv') +recommender.evaluate_knn_rmse_for_different_k() + ########################################################################################################################### ################################################# OTHER USER-BASED MODEL ################################################## ###########################################################################################################################