diff --git a/configs.py b/configs.py deleted file mode 100644 index c0a0002c921ae0d97bf1206549529f3ea8821f17..0000000000000000000000000000000000000000 --- a/configs.py +++ /dev/null @@ -1,32 +0,0 @@ -# local imports -from models import * - - -class EvalConfig: - - """Configuration settings for evaluation.""" - - # List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict) - - models = [ - ("baseline_1", ModelBaseline1, {}), - ("baseline_2", ModelBaseline2, {}), - ("baseline_3", ModelBaseline3, {}), - ("baseline_4", ModelBaseline4, {}) - # model_name, model class, model parameters (dict) - ] - - # Metrics to compute for split evaluation - split_metrics = ["mae", "rmse"] - - # Metrics to compute for Leave-One-Out (LOO) evaluation - loo_metrics = ["hit_rate"] - - # Metrics to compute for full dataset evaluation - full_metrics = ["novelty"] - - # Split parameters - test_size = 0.25 # -- configure the test_size (from 0 to 1) -- - - # Loo parameters - top_n_value = 10 # -- configure the numer of recommendations (> 1) -- diff --git a/constants.py b/constants.py deleted file mode 100644 index 3d4d92211e487ded9dc4c873cb52e52e41a7bed4..0000000000000000000000000000000000000000 --- a/constants.py +++ /dev/null @@ -1,31 +0,0 @@ -# third parties imports -from pathlib import Path - - -class Constant: - - """Constants for dataset paths and column names.""" - - DATA_PATH = Path('data/tiny') # -- fill here the dataset size to use - - # Content - CONTENT_PATH = DATA_PATH / 'content' # Path to content data - # - item - ITEMS_FILENAME = 'movies.csv' # Filename for items data - ITEM_ID_COL = 'movieId' # Column name for item IDs - LABEL_COL = 'title' # Column name for item labels - GENRES_COL = 'genres' # Column name for item genres - - # Evidence - EVIDENCE_PATH = DATA_PATH / 'evidence' # Path to evidence data - # - ratings - RATINGS_FILENAME = 'ratings.csv' # Filename for ratings data - USER_ID_COL = 'userId' # Column name for user IDs - RATING_COL = 'rating' # Column name for ratings - TIMESTAMP_COL = 'timestamp' # Column name for timestamps - USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL] # List of columns for user-item ratings - - EVALUATION_PATH = DATA_PATH / 'evaluations' # Path to evaluation data - - # Rating scale - RATINGS_SCALE = (0.5, 5.0) # -- fill in here the ratings scale as a tuple (min_value, max_value) diff --git a/evaluator.ipynb b/evaluator.ipynb deleted file mode 100644 index c6e61513a1ea1a1ff88286fa764f36f771de2175..0000000000000000000000000000000000000000 --- a/evaluator.ipynb +++ /dev/null @@ -1,419 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a665885b", - "metadata": {}, - "source": [ - "# Evaluator Module\n", - "The Evaluator module creates evaluation reports.\n", - "\n", - "Reports contain evaluation metrics depending on models specified in the evaluation config." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6aaf9140", - "metadata": {}, - "outputs": [], - "source": [ - "# reloads modules automatically before entering the execution of code\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "# imports\n", - "import numpy as np \n", - "import pandas as pd\n", - "\n", - "# local imports\n", - "from configs import EvalConfig\n", - "from constants import Constant as C\n", - "from loaders import export_evaluation_report\n", - "from loaders import load_ratings\n", - "\n", - "# New imports\n", - "from surprise.model_selection import train_test_split\n", - "from surprise import accuracy\n", - "from surprise.model_selection import LeaveOneOut\n", - "from collections import Counter" - ] - }, - { - "cell_type": "markdown", - "id": "d47c24a4", - "metadata": {}, - "source": [ - "# 1. Model validation functions\n", - "Validation functions are a way to perform crossvalidation on recommender system models. " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d6d82188", - "metadata": {}, - "outputs": [], - "source": [ - "# -- implement the function generate_split_predictions --\n", - "def generate_split_predictions(algo, ratings_dataset, eval_config):\n", - " \"\"\"Generate predictions on a random test set specified in eval_config\"\"\"\n", - " \n", - " # Spliting the data into train and test sets\n", - " trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)\n", - "\n", - " # Training the algorithm on the train data set\n", - " algo.fit(trainset)\n", - "\n", - " # Predict ratings for the testset\n", - " predictions = algo.test(testset)\n", - " \n", - " return predictions\n", - "\n", - "# -- implement the function generate_loo_top_n --\n", - "def generate_loo_top_n(algo, ratings_dataset, eval_config):\n", - " \"\"\"Generate top-n recommendations for each user on a random Leave-one-out split (LOO)\"\"\"\n", - " \n", - " # Create a LeaveOneOut split\n", - " loo = LeaveOneOut(n_splits=1)\n", - " \n", - " for trainset, testset in loo.split(ratings_dataset):\n", - " algo.fit(trainset) # Train the algorithm on the training set\n", - " anti_testset = trainset.build_anti_testset() # Build the anti test-set\n", - " predictions = algo.test(anti_testset) # Get predictions on the anti test-set\n", - " top_n = {}\n", - " for uid, iid, _, est, _ in predictions:\n", - " if uid not in top_n:\n", - " top_n[uid] = []\n", - " top_n[uid].append((iid, est))\n", - " for uid, user_ratings in top_n.items():\n", - " user_ratings.sort(key=lambda x: x[1], reverse=True)\n", - " top_n[uid] = user_ratings[:eval_config.top_n_value] # Get top-N recommendations\n", - " anti_testset_top_n = top_n\n", - " return anti_testset_top_n, testset\n", - "\n", - "def generate_full_top_n(algo, ratings_dataset, eval_config):\n", - " \"\"\"Generate top-n recommendations for each user with full training set (LOO)\"\"\"\n", - "\n", - " full_trainset = ratings_dataset.build_full_trainset() # Build the full training set\n", - " algo.fit(full_trainset) # Train the algorithm on the full training set\n", - " anti_testset = full_trainset.build_anti_testset() # Build the anti test-set\n", - " predictions = algo.test(anti_testset) # Get predictions on the anti test-set\n", - " top_n = {}\n", - " for uid, iid, _, est, _ in predictions:\n", - " if uid not in top_n:\n", - " top_n[uid] = []\n", - " top_n[uid].append((iid, est))\n", - " for uid, user_ratings in top_n.items():\n", - " user_ratings.sort(key=lambda x: x[1], reverse=True)\n", - " top_n[uid] = user_ratings[:eval_config.top_n_value] # Get top-N recommendations\n", - " anti_testset_top_n = top_n\n", - " return anti_testset_top_n\n", - "\n", - "def precomputed_information(movie_data):\n", - "\n", - " \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n", - " \n", - " Dictionary keys:\n", - " - precomputed_dict[\"item_to_rank\"] : contains a dictionary mapping movie ids to rankings\n", - " - (-- for your project, add other relevant information here -- )\n", - " \"\"\"\n", - "\n", - " # Initialize an empty dictionary to store item_id to rank mapping\n", - " item_to_rank = {}\n", - " \n", - " # Calculate popularity rank for each movie\n", - " ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)\n", - " \n", - " # Assign ranks to movies based on their popularity\n", - " for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):\n", - " item_to_rank[movie_id] = rank\n", - " \n", - " # Create the precomputed dictionary\n", - " precomputed_dict = {}\n", - " precomputed_dict[\"item_to_rank\"] = item_to_rank\n", - " \n", - " return precomputed_dict\n", - "\n", - "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n", - "\n", - " \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config. \n", - " \"\"\"\n", - " \n", - " evaluation_dict = {}\n", - " for model_name, model, arguments in eval_config.models:\n", - " print(f'Handling model {model_name}')\n", - " algo = model(**arguments)\n", - " evaluation_dict[model_name] = {}\n", - " \n", - " # Type 1 : split evaluations\n", - " if len(eval_config.split_metrics) > 0:\n", - " print('Training split predictions')\n", - " predictions = generate_split_predictions(algo, sp_ratings, eval_config)\n", - " for metric in eval_config.split_metrics:\n", - " print(f'- computing metric {metric}')\n", - " assert metric in available_metrics['split']\n", - " evaluation_function, parameters = available_metrics[\"split\"][metric]\n", - " evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n", - " \n", - " # Type 2 : loo evaluations\n", - " if len(eval_config.loo_metrics) > 0:\n", - " print('Training loo predictions')\n", - " anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)\n", - " for metric in eval_config.loo_metrics:\n", - " assert metric in available_metrics['loo']\n", - " evaluation_function, parameters = available_metrics[\"loo\"][metric]\n", - " evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)\n", - " \n", - " # Type 3 : full evaluations\n", - " if len(eval_config.full_metrics) > 0:\n", - " print('Training full predictions')\n", - " anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)\n", - " for metric in eval_config.full_metrics:\n", - " assert metric in available_metrics['full']\n", - " evaluation_function, parameters = available_metrics[\"full\"][metric]\n", - " evaluation_dict[model_name][metric] = evaluation_function(\n", - " anti_testset_top_n,\n", - " **precomputed_dict,\n", - " **parameters\n", - " )\n", - " \n", - " return pd.DataFrame.from_dict(evaluation_dict).T" - ] - }, - { - "cell_type": "markdown", - "id": "f7e83d1d", - "metadata": {}, - "source": [ - "# 2. Evaluation metrics\n", - "Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f1849e55", - "metadata": {}, - "outputs": [], - "source": [ - "# -- implement the function get_hit_rate --\n", - "def get_hit_rate(anti_testset_top_n, testset):\n", - " \n", - " \"\"\"Compute the average hit over the users (loo metric)\n", - " \n", - " A hit (1) happens when the movie in the testset has been picked by the top-n recommender\n", - " A fail (0) happens when the movie in the testset has not been picked by the top-n recommender\n", - " \"\"\"\n", - "\n", - " hits = 0\n", - " total_users = len(testset)\n", - " for uid, true_iid, _ in testset:\n", - " if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:\n", - " hits += 1\n", - " hit_rate = hits / total_users\n", - "\n", - " return hit_rate\n", - "\n", - "# -- implement the function get_novelty --\n", - "def get_novelty(anti_testset_top_n, item_to_rank):\n", - "\n", - " \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n", - " \n", - " The novelty is defined as the average ranking of the movies recommended\n", - " \"\"\"\n", - "\n", - " total_rank_sum = 0\n", - " total_recommendations = 0\n", - " for uid, recommendations in anti_testset_top_n.items():\n", - " for iid, _ in recommendations:\n", - " if iid in item_to_rank:\n", - " total_rank_sum += item_to_rank[iid]\n", - " total_recommendations += 1\n", - " if total_recommendations == 0:\n", - " return 0 # Avoid division by zero\n", - " average_rank_sum = total_rank_sum / total_recommendations \n", - " \n", - " return average_rank_sum" - ] - }, - { - "cell_type": "markdown", - "id": "1a9855b3", - "metadata": {}, - "source": [ - "# 3. Evaluation workflow\n", - "Load data, evaluate models and save the experimental outcomes" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "704f4d2a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Handling model baseline_1\n", - "Training split predictions\n", - "- computing metric mae\n", - "- computing metric rmse\n", - "Training loo predictions\n", - "Training full predictions\n", - "Handling model baseline_2\n", - "Training split predictions\n", - "- computing metric mae\n", - "- computing metric rmse\n", - "Training loo predictions\n", - "Training full predictions\n", - "Handling model baseline_3\n", - "Training split predictions\n", - "- computing metric mae\n", - "- computing metric rmse\n", - "Training loo predictions\n", - "Training full predictions\n", - "Handling model baseline_4\n", - "Training split predictions\n", - "- computing metric mae\n", - "- computing metric rmse\n", - "Training loo predictions\n", - "Training full predictions\n", - "The data has been exported to the evaluation report\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mae</th>\n", - " <th>rmse</th>\n", - " <th>hit_rate</th>\n", - " <th>novelty</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>baseline_1</th>\n", - " <td>1.517749</td>\n", - " <td>1.745787</td>\n", - " <td>0.056075</td>\n", - " <td>99.405607</td>\n", - " </tr>\n", - " <tr>\n", - " <th>baseline_2</th>\n", - " <td>1.472806</td>\n", - " <td>1.805674</td>\n", - " <td>0.000000</td>\n", - " <td>429.942991</td>\n", - " </tr>\n", - " <tr>\n", - " <th>baseline_3</th>\n", - " <td>0.868666</td>\n", - " <td>1.076227</td>\n", - " <td>0.093458</td>\n", - " <td>99.405607</td>\n", - " </tr>\n", - " <tr>\n", - " <th>baseline_4</th>\n", - " <td>0.713063</td>\n", - " <td>0.912046</td>\n", - " <td>0.074766</td>\n", - " <td>60.349533</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mae rmse hit_rate novelty\n", - "baseline_1 1.517749 1.745787 0.056075 99.405607\n", - "baseline_2 1.472806 1.805674 0.000000 429.942991\n", - "baseline_3 0.868666 1.076227 0.093458 99.405607\n", - "baseline_4 0.713063 0.912046 0.074766 60.349533" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "AVAILABLE_METRICS = {\n", - " \"split\": {\n", - " \"mae\": (accuracy.mae, {'verbose': False}),\n", - " \"rmse\": (accuracy.rmse, {'verbose': False})\n", - " },\n", - " \"loo\": {\n", - " \"hit_rate\": (get_hit_rate, {}),\n", - " },\n", - " \"full\": {\n", - " \"novelty\": (get_novelty, {}),\n", - " }\n", - "}\n", - "\n", - "sp_ratings = load_ratings(surprise_format=True)\n", - "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n", - "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n", - "export_evaluation_report(evaluation_report)" - ] - }, - { - "cell_type": "markdown", - "id": "9fbf23fd", - "metadata": {}, - "source": [ - "Analyzing the provided data on different baselines, several observations can be made across various metrics.\n", - "\n", - "Firstly, looking at the Mean Absolute Error (MAE), baseline_4 stands out with the lowest value of 0.713063, indicating superior accuracy in predictions compared to the other baselines. Following closely behind is baseline_3 with a MAE of 0.868666, showcasing commendable precision in its predictions.\n", - "\n", - "Next, considering the Root Mean Square Error (RMSE), baseline_4 again exhibits the best performance with a value of 0.912046, suggesting minimal overall prediction errors. Baseline_3 maintains strong performance here as well, with an RMSE of 1.076227.\n", - "\n", - "Examining the Hit Rate, baseline_3 leads the pack with 9.35%, signifying a higher success rate in recommendations compared to the other baselines. Meanwhile, baseline_1 and baseline_4 show lower hit rates at 5.61% and 7.48% respectively.\n", - "\n", - "Lastly, looking at the Novelty metric, baseline_4 scores the lowest at 60.35, indicating that its recommendations are less novel or more conventional compared to the others. On the other hand, baseline_1 scores the highest in novelty at 99.41, implying that its recommendations are more diverse or less conventional.\n", - "\n", - "In summary, baseline_4 appears to excel in several metrics including MAE, RMSE, and maintaining relatively low novelty. Baseline_3 stands out with a higher hit rate, showcasing effectiveness in recommendation success. Baseline_2, despite not excelling in the other metrics, exhibits an exceptionally high novelty score, indicating a unique approach to recommendations compared to the rest." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/loaders.py b/loaders.py deleted file mode 100644 index d4cc224fd56f45809d239eeec6b820a953246f24..0000000000000000000000000000000000000000 --- a/loaders.py +++ /dev/null @@ -1,51 +0,0 @@ -# Third-party imports -import pandas as pd -import os - - -# Local imports -from constants import Constant as C -from surprise import Reader, Dataset - -def load_ratings(surprise_format=False): - """Loads ratings data. - - Parameters: - surprise_format (bool): If True, returns data in Surprise format. - - Returns: - DataFrame or surprise_data: Ratings data. - """ - df_ratings = pd.read_csv(C.EVIDENCE_PATH / C.RATINGS_FILENAME) - if surprise_format: - reader = Reader(rating_scale=C.RATINGS_SCALE) # on met 0.5 pcq c'est la plus petite note. - surprise_data = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], reader) - return surprise_data - else: - return df_ratings - - -def load_items(): - """Loads items data. - - Returns: - DataFrame: Items data. - """ - df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME) # ce qui se trouve dans le movie csv - df_items = df_items.set_index(C.ITEM_ID_COL) # movie id - return df_items - -def export_evaluation_report(report): - """Exports evaluation report. - - Parameters: - report: Evaluation report. - - Returns: - DataFrame: Merged ratings and items data. - """ - report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv" - export_path = os.path.join("data", "tiny", "evaluations", report_name) - report.to_csv(export_path, index=False) - print("The data has been exported to the evaluation report") - return report \ No newline at end of file diff --git a/models.py b/models.py deleted file mode 100644 index 18a45b259b3faea5f4451c93ced9e310f6bf4398..0000000000000000000000000000000000000000 --- a/models.py +++ /dev/null @@ -1,83 +0,0 @@ -# standard library imports -from collections import defaultdict - -# third parties imports -import numpy as np -import random as rd -from surprise import AlgoBase -from surprise import KNNWithMeans -from surprise import SVD - - -def get_top_n(predictions, n): - """Return the top-N recommendation for each user from a set of predictions. - Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py - and modified by cvandekerckh for random tie breaking - - Args: - predictions(list of Prediction objects): The list of predictions, as - returned by the test method of an algorithm. - n(int): The number of recommendation to output for each user. Default - is 10. - Returns: - A dict where keys are user (raw) ids and values are lists of tuples: - [(raw item id, rating estimation), ...] of size n. - """ - - rd.seed(0) - - # First map the predictions to each user. - top_n = defaultdict(list) - for uid, iid, true_r, est, _ in predictions: - top_n[uid].append((iid, est)) - - # Then sort the predictions for each user and retrieve the k highest ones. - for uid, user_ratings in top_n.items(): - rd.shuffle(user_ratings) - user_ratings.sort(key=lambda x: x[1], reverse=True) - top_n[uid] = user_ratings[:n] - - return top_n - - -# First algorithm -class ModelBaseline1(AlgoBase): - def __init__(self): - AlgoBase.__init__(self) - - def estimate(self, u, i): - return 2 - - -# Second algorithm -class ModelBaseline2(AlgoBase): - def __init__(self): - AlgoBase.__init__(self) - - def fit(self, trainset): - AlgoBase.fit(self, trainset) - rd.seed(0) - - def estimate(self, u, i): - return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1]) - - -# Third algorithm -class ModelBaseline3(AlgoBase): - def __init__(self): - AlgoBase.__init__(self) - - def fit(self, trainset): - AlgoBase.fit(self, trainset) - self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()]) - - return self - - def estimate(self, u, i): - return self.the_mean - - -# Fourth Model -class ModelBaseline4(SVD): - def __init__(self): - SVD.__init__(self, n_factors=100)