Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"id": "a665885b",
"metadata": {},
"source": [
"# Evaluator Module\n",
"The Evaluator module creates evaluation reports.\n",
"\n",
"Reports contain evaluation metrics depending on models specified in the evaluation config."
]
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"# reloads modules automatically before entering the execution of code\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"# third parties imports\n",
"import numpy as np \n",
"import pandas as pd\n",
"# -- add new imports here --\n",
"\n",
"# local imports\n",
"from configs import EvalConfig\n",
"from constants import Constant as C\n",
"from loaders import export_evaluation_report\n",
"from loaders import load_ratings\n",
"# -- add new imports here --\n",
"from surprise.model_selection import train_test_split\n",
"from surprise import accuracy\n",

Adrien Payen
a validé
"from surprise.model_selection import LeaveOneOut\n",
"from collections import Counter"
]
},
{
"cell_type": "markdown",
"id": "d47c24a4",
"metadata": {},
"source": [
"# 1. Model validation functions\n",
"Validation functions are a way to perform crossvalidation on recommender system models. "
]
},
{
"cell_type": "code",
"id": "d6d82188",
"metadata": {},
"outputs": [],
"source": [
"def generate_split_predictions(algo, ratings_dataset, eval_config):\n",
" \"\"\"Generate predictions on a random test set specified in eval_config\"\"\"\n",
" # -- implement the function generate_split_predictions --\n",
" trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)\n",
" # Training the algorithm on the train data set\n",
" algo.fit(trainset)\n",
" # Predict ratings for the testset\n",
" predictions = algo.test(testset)\n",
" return predictions\n",
"\n",
"\n",
"def generate_loo_top_n(algo, ratings_dataset, eval_config):\n",
" \"\"\"Generate top-n recommendations for each user on a random Leave-one-out split (LOO)\"\"\"\n",
" # -- implement the function generate_loo_top_n --\n",
" # Create a LeaveOneOut split\n",
" loo = LeaveOneOut(n_splits=1)\n",
" for trainset, testset in loo.split(ratings_dataset):\n",
" algo.fit(trainset) # Train the algorithm on the training set\n",
" anti_testset = trainset.build_anti_testset() # Build the anti test-set\n",
" predictions = algo.test(anti_testset) # Get predictions on the anti test-set\n",
" top_n = {}\n",
" for uid, iid, _, est, _ in predictions:\n",
" if uid not in top_n:\n",
" top_n[uid] = []\n",
" top_n[uid].append((iid, est))\n",
" for uid, user_ratings in top_n.items():\n",
" user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
" top_n[uid] = user_ratings[:eval_config.top_n_value] # Get top-N recommendations\n",
" anti_testset_top_n = top_n\n",
" return anti_testset_top_n, testset\n",
"\n",
"def generate_full_top_n(algo, ratings_dataset, eval_config):\n",
" \"\"\"Generate top-n recommendations for each user with full training set (LOO)\"\"\"\n",
" full_trainset = ratings_dataset.build_full_trainset() # Build the full training set\n",
" algo.fit(full_trainset) # Train the algorithm on the full training set\n",
" anti_testset = full_trainset.build_anti_testset() # Build the anti test-set\n",
" predictions = algo.test(anti_testset) # Get predictions on the anti test-set\n",
" top_n = {}\n",
" for uid, iid, _, est, _ in predictions:\n",
" if uid not in top_n:\n",
" top_n[uid] = []\n",
" top_n[uid].append((iid, est))\n",
" for uid, user_ratings in top_n.items():\n",
" user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
" top_n[uid] = user_ratings[:eval_config.top_n_value] # Get top-N recommendations\n",
" anti_testset_top_n = top_n\n",
"def precomputed_information(movie_data):\n",
" \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n",
" \n",
" Dictionary keys:\n",
" - precomputed_dict[\"item_to_rank\"] : contains a dictionary mapping movie ids to rankings\n",
" - (-- for your project, add other relevant information here -- )\n",
" \"\"\"\n",

Adrien Payen
a validé
" # Initialize an empty dictionary to store item_id to rank mapping\n",
" item_to_rank = {}\n",

Adrien Payen
a validé
" # Calculate popularity rank for each movie\n",
" ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)\n",
" \n",
" # Assign ranks to movies based on their popularity\n",
" for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):\n",
" item_to_rank[movie_id] = rank\n",
" \n",
" # Create the precomputed dictionary\n",
" precomputed_dict[\"item_to_rank\"] = item_to_rank\n",

Adrien Payen
a validé
" \n",
" return precomputed_dict\n",
"\n",
"def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n",
" \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config. \n",
" \"\"\"\n",
" evaluation_dict = {}\n",
" for model_name, model, arguments in eval_config.models:\n",
" print(f'Handling model {model_name}')\n",
" algo = model(**arguments)\n",
" evaluation_dict[model_name] = {}\n",
" \n",
" # Type 1 : split evaluations\n",
" if len(eval_config.split_metrics) > 0:\n",
" print('Training split predictions')\n",
" predictions = generate_split_predictions(algo, sp_ratings, eval_config)\n",
" for metric in eval_config.split_metrics:\n",
" print(f'- computing metric {metric}')\n",
" assert metric in available_metrics['split']\n",
" evaluation_function, parameters = available_metrics[\"split\"][metric]\n",
" evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n",
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
" # Type 2 : loo evaluations\n",
" if len(eval_config.loo_metrics) > 0:\n",
" print('Training loo predictions')\n",
" anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)\n",
" for metric in eval_config.loo_metrics:\n",
" assert metric in available_metrics['loo']\n",
" evaluation_function, parameters = available_metrics[\"loo\"][metric]\n",
" evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)\n",
" \n",
" # Type 3 : full evaluations\n",
" if len(eval_config.full_metrics) > 0:\n",
" print('Training full predictions')\n",
" anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)\n",
" for metric in eval_config.full_metrics:\n",
" assert metric in available_metrics['full']\n",
" evaluation_function, parameters = available_metrics[\"full\"][metric]\n",
" evaluation_dict[model_name][metric] = evaluation_function(\n",
" anti_testset_top_n,\n",
" **precomputed_dict,\n",
" **parameters\n",
" )\n",
" \n",
" return pd.DataFrame.from_dict(evaluation_dict).T"
]
},
{
"cell_type": "markdown",
"id": "f7e83d1d",
"metadata": {},
"source": [
"# 2. Evaluation metrics\n",
"Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)"
]
},
{
"cell_type": "code",
"id": "f1849e55",
"metadata": {},
"outputs": [],
"source": [
"def get_hit_rate(anti_testset_top_n, testset):\n",
" \"\"\"Compute the average hit over the users (loo metric)\n",
" \n",
" A hit (1) happens when the movie in the testset has been picked by the top-n recommender\n",
" A fail (0) happens when the movie in the testset has not been picked by the top-n recommender\n",
" \"\"\"\n",
" # -- implement the function get_hit_rate --\n",
"\n",
" hits = 0\n",
" total_users = len(testset)\n",
" for uid, true_iid, _ in testset:\n",
" if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:\n",
" hits += 1\n",
" hit_rate = hits / total_users\n",
"\n",
" return hit_rate\n",
"\n",
"def get_novelty(anti_testset_top_n, item_to_rank):\n",
" \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n",
" \n",
" The novelty is defined as the average ranking of the movies recommended\n",
" \"\"\"\n",
" # -- implement the function get_novelty --\n",
" total_rank_sum = 0\n",
" total_recommendations = 0\n",
" for uid, recommendations in anti_testset_top_n.items():\n",
" for iid, _ in recommendations:\n",
" if iid in item_to_rank:\n",
" total_rank_sum += item_to_rank[iid]\n",
" total_recommendations += 1\n",
" if total_recommendations == 0:\n",
" return 0 # Avoid division by zero\n",
" average_rank_sum = total_rank_sum / total_recommendations \n",
" \n",
" return average_rank_sum"
]
},
{
"cell_type": "markdown",
"id": "1a9855b3",
"metadata": {},
"source": [
"# 3. Evaluation workflow\n",
"Load data, evaluate models and save the experimental outcomes"
]
},
{
"cell_type": "code",
"ename": "NameError",
"evalue": "name 'accuracy' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m AVAILABLE_METRICS \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmae\u001b[39m\u001b[38;5;124m\"\u001b[39m: (\u001b[43maccuracy\u001b[49m\u001b[38;5;241m.\u001b[39mmae, {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mverbose\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m}),\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrmse\u001b[39m\u001b[38;5;124m\"\u001b[39m: (accuracy\u001b[38;5;241m.\u001b[39mrmse, {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mverbose\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m})\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Add new split metrics here if needed\u001b[39;00m\n\u001b[1;32m 6\u001b[0m },\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloo\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhit_rate\u001b[39m\u001b[38;5;124m\"\u001b[39m: (get_hit_rate, {}),\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Add new loo metrics here if needed\u001b[39;00m\n\u001b[1;32m 10\u001b[0m },\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfull\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnovelty\u001b[39m\u001b[38;5;124m\"\u001b[39m: (get_novelty, {}),\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# Add new full metrics here if needed\u001b[39;00m\n\u001b[1;32m 14\u001b[0m }\n\u001b[1;32m 15\u001b[0m }\n\u001b[1;32m 17\u001b[0m sp_ratings \u001b[38;5;241m=\u001b[39m load_ratings(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 18\u001b[0m precomputed_dict \u001b[38;5;241m=\u001b[39m precomputed_information(pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata/tiny/evidence/ratings.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
"\u001b[0;31mNameError\u001b[0m: name 'accuracy' is not defined"
"source": [
"AVAILABLE_METRICS = {\n",
" \"split\": {\n",
" \"mae\": (accuracy.mae, {'verbose': False}),\n",
" # Add new split metrics here if needed\n",
" },\n",
" \"loo\": {\n",
" \"hit_rate\": (get_hit_rate, {}),\n",
" # Add new loo metrics here if needed\n",
" \"full\": {\n",
" \"novelty\": (get_novelty, {}),\n",
" # Add new full metrics here if needed\n",
" }\n",
"}\n",
"\n",
"sp_ratings = load_ratings(surprise_format=True)\n",
"precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
"evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
"export_evaluation_report(evaluation_report)"
]
}
],
"metadata": {
"kernelspec": {
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
}
},
"nbformat": 4,
"nbformat_minor": 5
}