commit main

47eaca7b · Adrien Payen · be00dfce · 47eaca7b · 47eaca7b · 47eaca7b
--- a/evaluator.ipynb
+++ b/evaluator.ipynb
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 16,
   "id": "6aaf9140",
   "metadata": {},
   "outputs": [
@@ -59,7 +59,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 17,
   "id": "d6d82188",
   "metadata": {},
   "outputs": [],
@@ -193,7 +193,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 18,
   "id": "f1849e55",
   "metadata": {},
   "outputs": [],
@@ -246,7 +246,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 20,
   "id": "704f4d2a",
   "metadata": {},
   "outputs": [
@@ -311,31 +311,31 @@
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>baseline_1</th>\n",
-       "      <td>1.544940</td>\n",
+       "      <td>1.567221</td>\n",
-       "      <td>1.776982</td>\n",
+       "      <td>1.788369</td>\n",
-       "      <td>0.112150</td>\n",
+       "      <td>0.074766</td>\n",
       "      <td>99.405607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>baseline_2</th>\n",
-       "      <td>1.491063</td>\n",
+       "      <td>1.502872</td>\n",
-       "      <td>1.844761</td>\n",
+       "      <td>1.840696</td>\n",
-       "      <td>0.009346</td>\n",
+       "      <td>0.056075</td>\n",
       "      <td>429.942991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>baseline_3</th>\n",
-       "      <td>0.868139</td>\n",
+       "      <td>0.873993</td>\n",
-       "      <td>1.066303</td>\n",
+       "      <td>1.076982</td>\n",
-       "      <td>0.074766</td>\n",
+       "      <td>0.065421</td>\n",
       "      <td>99.405607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>baseline_4</th>\n",
-       "      <td>0.727803</td>\n",
+       "      <td>0.730657</td>\n",
-       "      <td>0.927636</td>\n",
+       "      <td>0.938814</td>\n",
-       "      <td>0.158879</td>\n",
+       "      <td>0.186916</td>\n",
-       "      <td>57.328037</td>\n",
+       "      <td>57.465421</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
@@ -343,13 +343,13 @@
      ],
      "text/plain": [
       "                 mae      rmse  hit_rate     novelty\n",
-       "baseline_1  1.544940  1.776982  0.112150   99.405607\n",
+       "baseline_1  1.567221  1.788369  0.074766   99.405607\n",
-       "baseline_2  1.491063  1.844761  0.009346  429.942991\n",
+       "baseline_2  1.502872  1.840696  0.056075  429.942991\n",
-       "baseline_3  0.868139  1.066303  0.074766   99.405607\n",
+       "baseline_3  0.873993  1.076982  0.065421   99.405607\n",
-       "baseline_4  0.727803  0.927636  0.158879   57.328037"
+       "baseline_4  0.730657  0.938814  0.186916   57.465421"
      ]
     },
-     "execution_count": 61,
+     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -372,7 +372,7 @@
    "}\n",
    "\n",
    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "precomputed_dict = precomputed_information(pd.read_csv(\"../data/tiny/evidence/ratings.csv\"))\n",
+    "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
    "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
    "export_evaluation_report(evaluation_report)"
   ]
@@ -394,7 +394,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.2"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:a665885b tags:
 # Evaluator Module
 The Evaluator module creates evaluation reports.
 Reports contain evaluation metrics depending on models specified in the evaluation config.
 %% Cell type:code id:6aaf9140 tags:
 ``` python
 # reloads modules automatically before entering the execution of code
 %load_ext autoreload
 %autoreload 2
 # third parties imports
 import numpy as np
 import pandas as pd
 # -- add new imports here --
 # local imports
 from configs import EvalConfig
 from constants import Constant as C
 from loaders import export_evaluation_report
 from loaders import load_ratings
 # -- add new imports here --
 from surprise.model_selection import train_test_split
 from surprise import accuracy
 from surprise.model_selection import LeaveOneOut
 from collections import Counter
 ```
 %% Output
    The autoreload extension is already loaded. To reload it, use:
      %reload_ext autoreload
 %% Cell type:markdown id:d47c24a4 tags:
 # 1. Model validation functions
 Validation functions are a way to perform crossvalidation on recommender system models.
 %% Cell type:code id:d6d82188 tags:
 ``` python
 def generate_split_predictions(algo, ratings_dataset, eval_config):
    """Generate predictions on a random test set specified in eval_config"""
    # -- implement the function generate_split_predictions --
    # Spliting the data into train and test sets
    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)
    # Training the algorithm on the train data set
    algo.fit(trainset)
    # Predict ratings for the testset
    predictions = algo.test(testset)
    return predictions
 def generate_loo_top_n(algo, ratings_dataset, eval_config):
    """Generate top-n recommendations for each user on a random Leave-one-out split (LOO)"""
    # -- implement the function generate_loo_top_n --
    # Create a LeaveOneOut split
    loo = LeaveOneOut(n_splits=1)
    for trainset, testset in loo.split(ratings_dataset):
        algo.fit(trainset)  # Train the algorithm on the training set
        anti_testset = trainset.build_anti_testset()  # Build the anti test-set
        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
        top_n = {}
        for uid, iid, _, est, _ in predictions:
            if uid not in top_n:
                top_n[uid] = []
            top_n[uid].append((iid, est))
        for uid, user_ratings in top_n.items():
            user_ratings.sort(key=lambda x: x[1], reverse=True)
            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
        anti_testset_top_n = top_n
        return anti_testset_top_n, testset
 def generate_full_top_n(algo, ratings_dataset, eval_config):
    """Generate top-n recommendations for each user with full training set (LOO)"""
    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set
    algo.fit(full_trainset)  # Train the algorithm on the full training set
    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set
    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
    top_n = {}
    for uid, iid, _, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
    anti_testset_top_n = top_n
    return anti_testset_top_n
 def precomputed_information(movie_data):
    """ Returns a dictionary that precomputes relevant information for evaluating in full mode
    Dictionary keys:
    - precomputed_dict["item_to_rank"] : contains a dictionary mapping movie ids to rankings
    - (-- for your project, add other relevant information here -- )
    """
    # Initialize an empty dictionary to store item_id to rank mapping
    item_to_rank = {}
    # Calculate popularity rank for each movie
    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)
    # Assign ranks to movies based on their popularity
    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):
        item_to_rank[movie_id] = rank
    # Create the precomputed dictionary
    precomputed_dict = {}
    precomputed_dict["item_to_rank"] = item_to_rank
    return precomputed_dict
 def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):
    """ Create a DataFrame evaluating various models on metrics specified in an evaluation config.
    """
    evaluation_dict = {}
    for model_name, model, arguments in eval_config.models:
        print(f'Handling model {model_name}')
        algo = model(**arguments)
        evaluation_dict[model_name] = {}
        # Type 1 : split evaluations
        if len(eval_config.split_metrics) > 0:
            print('Training split predictions')
            predictions = generate_split_predictions(algo, sp_ratings, eval_config)
            for metric in eval_config.split_metrics:
                print(f'- computing metric {metric}')
                assert metric in available_metrics['split']
                evaluation_function, parameters =  available_metrics["split"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters)
        # Type 2 : loo evaluations
        if len(eval_config.loo_metrics) > 0:
            print('Training loo predictions')
            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.loo_metrics:
                assert metric in available_metrics['loo']
                evaluation_function, parameters =  available_metrics["loo"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)
        # Type 3 : full evaluations
        if len(eval_config.full_metrics) > 0:
            print('Training full predictions')
            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.full_metrics:
                assert metric in available_metrics['full']
                evaluation_function, parameters =  available_metrics["full"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(
                    anti_testset_top_n,
                    **precomputed_dict,
                    **parameters
                )
    return pd.DataFrame.from_dict(evaluation_dict).T
 ```
 %% Cell type:markdown id:f7e83d1d tags:
 # 2. Evaluation metrics
 Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)
 %% Cell type:code id:f1849e55 tags:
 ``` python
 def get_hit_rate(anti_testset_top_n, testset):
    """Compute the average hit over the users (loo metric)
    A hit (1) happens when the movie in the testset has been picked by the top-n recommender
    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender
    """
    # -- implement the function get_hit_rate --
    hits = 0
    total_users = len(testset)
    for uid, true_iid, _ in testset:
        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:
            hits += 1
    hit_rate = hits / total_users
    return hit_rate
 def get_novelty(anti_testset_top_n, item_to_rank):
    """Compute the average novelty of the top-n recommendation over the users (full metric)
    The novelty is defined as the average ranking of the movies recommended
    """
    # -- implement the function get_novelty --
    total_rank_sum = 0
    total_recommendations = 0
    for uid, recommendations in anti_testset_top_n.items():
        for iid, _ in recommendations:
            if iid in item_to_rank:
                total_rank_sum += item_to_rank[iid]
                total_recommendations += 1
    if total_recommendations == 0:
        return 0  # Avoid division by zero
    average_rank_sum = total_rank_sum / total_recommendations
    return average_rank_sum
 ```
 %% Cell type:markdown id:1a9855b3 tags:
 # 3. Evaluation workflow
 Load data, evaluate models and save the experimental outcomes
 %% Cell type:code id:704f4d2a tags:
 ``` python
 AVAILABLE_METRICS = {
    "split": {
        "mae": (accuracy.mae, {'verbose': False}),
        "rmse": (accuracy.rmse, {'verbose': False})
        # Add new split metrics here if needed
    },
    "loo": {
        "hit_rate": (get_hit_rate, {}),
        # Add new loo metrics here if needed
    },
    "full": {
        "novelty": (get_novelty, {}),
        # Add new full metrics here if needed
    }
 }
 sp_ratings = load_ratings(surprise_format=True)
-precomputed_dict = precomputed_information(pd.read_csv("../data/tiny/evidence/ratings.csv"))
+precomputed_dict = precomputed_information(pd.read_csv("data/tiny/evidence/ratings.csv"))
 evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)
 export_evaluation_report(evaluation_report)
 ```
 %% Output
    Handling model baseline_1
    Training split predictions
    - computing metric mae
    - computing metric rmse
    Training loo predictions
    Training full predictions
    Handling model baseline_2
    Training split predictions
    - computing metric mae
    - computing metric rmse
    Training loo predictions
    Training full predictions
    Handling model baseline_3
    Training split predictions
    - computing metric mae
    - computing metric rmse
    Training loo predictions
    Training full predictions
    Handling model baseline_4
    Training split predictions
    - computing metric mae
    - computing metric rmse
    Training loo predictions
    Training full predictions
    The data has been exported to the evaluation report
                     mae      rmse  hit_rate     novelty
-    baseline_1  1.544940  1.776982  0.112150   99.405607
+    baseline_1  1.567221  1.788369  0.074766   99.405607
-    baseline_2  1.491063  1.844761  0.009346  429.942991
+    baseline_2  1.502872  1.840696  0.056075  429.942991
-    baseline_3  0.868139  1.066303  0.074766   99.405607
+    baseline_3  0.873993  1.076982  0.065421   99.405607
-    baseline_4  0.727803  0.927636  0.158879   57.328037
+    baseline_4  0.730657  0.938814  0.186916   57.465421

--- a/loaders.py
+++ b/loaders.py
 # Third-party imports
 import pandas as pd
 import os
-from pprint import pprint as pp
-# import display
 # Local imports
 from constants import Constant as C
@@ -24,7 +23,8 @@ def load_ratings(surprise_format=False):
        return surprise_data
    else:
        return df_ratings
-print(load_ratings())
 def load_items():
    """Loads items data.

--- a/user_based.ipynb
+++ b/user_based.ipynb