corrected evaluator assignment 2 , data tiny

e78fa579 · Nathanaël Kindidi · 92ed75ad · e78fa579 · e78fa579 · e78fa579
--- a/2_Evaluator_Block/configs.py
+++ b/2_Evaluator_Block/configs.py
@@ -26,7 +26,7 @@ class EvalConfig:
    full_metrics = ["novelty"]

    # Split parameters
-    test_size = 0.3 # -- configure the test_size (from 0 to 1) --
+    test_size = 0.25 # -- configure the test_size (from 0 to 1) --

    # Loo parameters
    top_n_value =  10 # -- configure the numer of recommendations (> 1) --
--- a/2_Evaluator_Block/constants.py
+++ b/2_Evaluator_Block/constants.py
@@ -6,7 +6,7 @@ class Constant:

    """Constants for dataset paths and column names."""

-    DATA_PATH = Path('../data/small')  # -- fill here the dataset size to use
+    DATA_PATH = Path('../data/tiny')  # -- fill here the dataset size to use

    # Content
    CONTENT_PATH = DATA_PATH / 'content' # Path to content data

--- a/2_Evaluator_Block/evaluator.ipynb
+++ b/2_Evaluator_Block/evaluator.ipynb
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 182,
+   "execution_count": 58,
   "id": "6aaf9140",
   "metadata": {},
   "outputs": [
@@ -59,7 +59,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 183,
+   "execution_count": 59,
   "id": "d6d82188",
   "metadata": {},
   "outputs": [],
@@ -74,8 +74,6 @@
    "    algo.fit(trainset)\n",
    "    # Predict ratings for the testset\n",
    "    predictions = algo.test(testset)\n",
-    "\n",
-    "\n",
    "    return predictions\n",
    "\n",
    "\n",
@@ -84,7 +82,7 @@
    "    # -- implement the function generate_loo_top_n --\n",
    "    # Create a LeaveOneOut split\n",
    "    loo = LeaveOneOut(n_splits=1)\n",
-    "\n",
+    "    \n",
    "    for trainset, testset in loo.split(ratings_dataset):\n",
    "        algo.fit(trainset)  # Train the algorithm on the training set\n",
    "        anti_testset = trainset.build_anti_testset()  # Build the anti test-set\n",
@@ -117,7 +115,7 @@
    "    anti_testset_top_n = top_n\n",
    "    return anti_testset_top_n\n",
    "\n",
-    "def precompute_information(movie_data):\n",
+    "def precomputed_information(movie_data):\n",
    "    \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n",
    "    \n",
    "    Dictionary keys:\n",
@@ -139,8 +137,6 @@
    "    precomputed_dict[\"item_to_rank\"] = item_to_rank\n",
    "    \n",
    "    return precomputed_dict\n",
-    "            \n",
-    "\n",
    "\n",
    "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n",
    "    \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config.  \n",
@@ -160,7 +156,7 @@
    "                assert metric in available_metrics['split']\n",
    "                evaluation_function, parameters =  available_metrics[\"split\"][metric]\n",
    "                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n",
-    "\n",
+    "        \n",
    "        # Type 2 : loo evaluations\n",
    "        if len(eval_config.loo_metrics) > 0:\n",
    "            print('Training loo predictions')\n",
@@ -197,7 +193,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 184,
+   "execution_count": 60,
   "id": "f1849e55",
   "metadata": {},
   "outputs": [],
@@ -219,7 +215,6 @@
    "\n",
    "    return hit_rate\n",
    "\n",
-    "\n",
    "def get_novelty(anti_testset_top_n, item_to_rank):\n",
    "    \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n",
    "    \n",
@@ -251,7 +246,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 185,
+   "execution_count": 61,
   "id": "704f4d2a",
   "metadata": {},
   "outputs": [
@@ -264,20 +259,99 @@
      "- computing metric mae\n",
      "- computing metric rmse\n",
      "Training loo predictions\n",
-      "Training full predictions\n"
+      "Training full predictions\n",
+      "Handling model baseline_2\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model baseline_3\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model baseline_4\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "The data has been exported to the evaluation report\n"
     ]
    },
    {
-     "ename": "AttributeError",
-     "evalue": "'dict' object has no attribute 'to_csv'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[185], line 20\u001b[0m\n\u001b[1;32m     18\u001b[0m precomputed_dict \u001b[38;5;241m=\u001b[39m precompute_information(pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../data/small/evidence/ratings.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m     19\u001b[0m evaluation_report \u001b[38;5;241m=\u001b[39m create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n\u001b[0;32m---> 20\u001b[0m export_evaluation_report(evaluation_report)\n",
-      "File \u001b[0;32m~/vscodeworkspace/recomsys/Analytics_UI/loaders.py:49\u001b[0m, in \u001b[0;36mexport_evaluation_report\u001b[0;34m(report)\u001b[0m\n\u001b[1;32m     47\u001b[0m report_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mevaluation_report_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpd\u001b[38;5;241m.\u001b[39mTimestamp\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY-\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     48\u001b[0m export_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../data\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msmall\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mevaluations\u001b[39m\u001b[38;5;124m\"\u001b[39m, report_name)\n\u001b[0;32m---> 49\u001b[0m df_items\u001b[38;5;241m.\u001b[39mto_csv(export_path, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m     50\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe data has been exported to the evaluation report\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df_items\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'to_csv'"
-     ]
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mae</th>\n",
+       "      <th>rmse</th>\n",
+       "      <th>hit_rate</th>\n",
+       "      <th>novelty</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>baseline_1</th>\n",
+       "      <td>1.544940</td>\n",
+       "      <td>1.776982</td>\n",
+       "      <td>0.112150</td>\n",
+       "      <td>99.405607</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_2</th>\n",
+       "      <td>1.491063</td>\n",
+       "      <td>1.844761</td>\n",
+       "      <td>0.009346</td>\n",
+       "      <td>429.942991</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_3</th>\n",
+       "      <td>0.868139</td>\n",
+       "      <td>1.066303</td>\n",
+       "      <td>0.074766</td>\n",
+       "      <td>99.405607</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_4</th>\n",
+       "      <td>0.727803</td>\n",
+       "      <td>0.927636</td>\n",
+       "      <td>0.158879</td>\n",
+       "      <td>57.328037</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 mae      rmse  hit_rate     novelty\n",
+       "baseline_1  1.544940  1.776982  0.112150   99.405607\n",
+       "baseline_2  1.491063  1.844761  0.009346  429.942991\n",
+       "baseline_3  0.868139  1.066303  0.074766   99.405607\n",
+       "baseline_4  0.727803  0.927636  0.158879   57.328037"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
@@ -298,7 +372,7 @@
    "}\n",
    "\n",
    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "precomputed_dict = precompute_information(pd.read_csv(\"../data/small/evidence/ratings.csv\"))\n",
+    "precomputed_dict = precomputed_information(pd.read_csv(\"../data/tiny/evidence/ratings.csv\"))\n",
    "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
    "export_evaluation_report(evaluation_report)"
   ]
@@ -320,7 +394,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
+   "version": "3.11.8"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:a665885b tags:

 # Evaluator Module
 The Evaluator module creates evaluation reports.

 Reports contain evaluation metrics depending on models specified in the evaluation config.

 %% Cell type:code id:6aaf9140 tags:

 ``` python
 # reloads modules automatically before entering the execution of code
 %load_ext autoreload
 %autoreload 2

 # third parties imports
 import numpy as np
 import pandas as pd
 # -- add new imports here --

 # local imports
 from configs import EvalConfig
 from constants import Constant as C
 from loaders import export_evaluation_report
 from loaders import load_ratings
 # -- add new imports here --
 from surprise.model_selection import train_test_split
 from surprise import accuracy
 from surprise.model_selection import LeaveOneOut
 from collections import Counter
 ```

 %% Output

    The autoreload extension is already loaded. To reload it, use:
      %reload_ext autoreload

 %% Cell type:markdown id:d47c24a4 tags:

 # 1. Model validation functions
 Validation functions are a way to perform crossvalidation on recommender system models.

 %% Cell type:code id:d6d82188 tags:

 ``` python
 def generate_split_predictions(algo, ratings_dataset, eval_config):
    """Generate predictions on a random test set specified in eval_config"""
    # -- implement the function generate_split_predictions --

    # Spliting the data into train and test sets
    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)
    # Training the algorithm on the train data set
    algo.fit(trainset)
    # Predict ratings for the testset
    predictions = algo.test(testset)
-
-
    return predictions


 def generate_loo_top_n(algo, ratings_dataset, eval_config):
    """Generate top-n recommendations for each user on a random Leave-one-out split (LOO)"""
    # -- implement the function generate_loo_top_n --
    # Create a LeaveOneOut split
    loo = LeaveOneOut(n_splits=1)

    for trainset, testset in loo.split(ratings_dataset):
        algo.fit(trainset)  # Train the algorithm on the training set
        anti_testset = trainset.build_anti_testset()  # Build the anti test-set
        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
        top_n = {}
        for uid, iid, _, est, _ in predictions:
            if uid not in top_n:
                top_n[uid] = []
            top_n[uid].append((iid, est))
        for uid, user_ratings in top_n.items():
            user_ratings.sort(key=lambda x: x[1], reverse=True)
            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
        anti_testset_top_n = top_n
        return anti_testset_top_n, testset

 def generate_full_top_n(algo, ratings_dataset, eval_config):
    """Generate top-n recommendations for each user with full training set (LOO)"""
    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set
    algo.fit(full_trainset)  # Train the algorithm on the full training set
    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set
    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
    top_n = {}
    for uid, iid, _, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
    anti_testset_top_n = top_n
    return anti_testset_top_n

-def precompute_information(movie_data):
+def precomputed_information(movie_data):
    """ Returns a dictionary that precomputes relevant information for evaluating in full mode

    Dictionary keys:
    - precomputed_dict["item_to_rank"] : contains a dictionary mapping movie ids to rankings
    - (-- for your project, add other relevant information here -- )
    """
    # Initialize an empty dictionary to store item_id to rank mapping
    item_to_rank = {}

    # Calculate popularity rank for each movie
    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)

    # Assign ranks to movies based on their popularity
    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):
        item_to_rank[movie_id] = rank

    # Create the precomputed dictionary
    precomputed_dict = {}
    precomputed_dict["item_to_rank"] = item_to_rank

    return precomputed_dict

-
-
 def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):
    """ Create a DataFrame evaluating various models on metrics specified in an evaluation config.
    """
    evaluation_dict = {}
    for model_name, model, arguments in eval_config.models:
        print(f'Handling model {model_name}')
        algo = model(**arguments)
        evaluation_dict[model_name] = {}

        # Type 1 : split evaluations
        if len(eval_config.split_metrics) > 0:
            print('Training split predictions')
            predictions = generate_split_predictions(algo, sp_ratings, eval_config)
            for metric in eval_config.split_metrics:
                print(f'- computing metric {metric}')
                assert metric in available_metrics['split']
                evaluation_function, parameters =  available_metrics["split"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters)

        # Type 2 : loo evaluations
        if len(eval_config.loo_metrics) > 0:
            print('Training loo predictions')
            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.loo_metrics:
                assert metric in available_metrics['loo']
                evaluation_function, parameters =  available_metrics["loo"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)

        # Type 3 : full evaluations
        if len(eval_config.full_metrics) > 0:
            print('Training full predictions')
            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.full_metrics:
                assert metric in available_metrics['full']
                evaluation_function, parameters =  available_metrics["full"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(
                    anti_testset_top_n,
                    **precomputed_dict,
                    **parameters
                )

    return pd.DataFrame.from_dict(evaluation_dict).T
 ```

 %% Cell type:markdown id:f7e83d1d tags:

 # 2. Evaluation metrics
 Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)

 %% Cell type:code id:f1849e55 tags:

 ``` python
 def get_hit_rate(anti_testset_top_n, testset):
    """Compute the average hit over the users (loo metric)

    A hit (1) happens when the movie in the testset has been picked by the top-n recommender
    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender
    """
    # -- implement the function get_hit_rate --

    hits = 0
    total_users = len(testset)
    for uid, true_iid, _ in testset:
        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:
            hits += 1
    hit_rate = hits / total_users

    return hit_rate

-
 def get_novelty(anti_testset_top_n, item_to_rank):
    """Compute the average novelty of the top-n recommendation over the users (full metric)

    The novelty is defined as the average ranking of the movies recommended
    """
    # -- implement the function get_novelty --
    total_rank_sum = 0
    total_recommendations = 0
    for uid, recommendations in anti_testset_top_n.items():
        for iid, _ in recommendations:
            if iid in item_to_rank:
                total_rank_sum += item_to_rank[iid]
                total_recommendations += 1
    if total_recommendations == 0:
        return 0  # Avoid division by zero
    average_rank_sum = total_rank_sum / total_recommendations

    return average_rank_sum
 ```

 %% Cell type:markdown id:1a9855b3 tags:

 # 3. Evaluation workflow
 Load data, evaluate models and save the experimental outcomes

 %% Cell type:code id:704f4d2a tags:

 ``` python
 AVAILABLE_METRICS = {
    "split": {
        "mae": (accuracy.mae, {'verbose': False}),
        "rmse": (accuracy.rmse, {'verbose': False})
        # Add new split metrics here if needed
    },
    "loo": {
        "hit_rate": (get_hit_rate, {}),
        # Add new loo metrics here if needed
    },
    "full": {
        "novelty": (get_novelty, {}),
        # Add new full metrics here if needed
    }
 }

 sp_ratings = load_ratings(surprise_format=True)
-precomputed_dict = precompute_information(pd.read_csv("../data/small/evidence/ratings.csv"))
+precomputed_dict = precomputed_information(pd.read_csv("../data/tiny/evidence/ratings.csv"))
 evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)
 export_evaluation_report(evaluation_report)
 ```

 %% Output

    Handling model baseline_1
    Training split predictions
    - computing metric mae
    - computing metric rmse
    Training loo predictions
    Training full predictions
+    Handling model baseline_2
+    Training split predictions
+    - computing metric mae
+    - computing metric rmse
+    Training loo predictions
+    Training full predictions
+    Handling model baseline_3
+    Training split predictions
+    - computing metric mae
+    - computing metric rmse
+    Training loo predictions
+    Training full predictions
+    Handling model baseline_4
+    Training split predictions
+    - computing metric mae
+    - computing metric rmse
+    Training loo predictions
+    Training full predictions
+    The data has been exported to the evaluation report

-    ---------------------------------------------------------------------------
-    AttributeError                            Traceback (most recent call last)
-Cell     In[185], line 20
-         18 precomputed_dict = precompute_information(pd.read_csv("../data/small/evidence/ratings.csv"))
-         19 evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)
-    ---> 20 export_evaluation_report(evaluation_report)
-File     ~/vscodeworkspace/recomsys/Analytics_UI/loaders.py:49, in export_evaluation_report(report)
-         47 report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv"
-         48 export_path = os.path.join("../data", "small", "evaluations", report_name)
-    ---> 49 df_items.to_csv(export_path, index=False)
-         50 print("The data has been exported to the evaluation report")
-         51 return df_items
-    AttributeError: 'dict' object has no attribute 'to_csv'
+                     mae      rmse  hit_rate     novelty
+    baseline_1  1.544940  1.776982  0.112150   99.405607
+    baseline_2  1.491063  1.844761  0.009346  429.942991
+    baseline_3  0.868139  1.066303  0.074766   99.405607
+    baseline_4  0.727803  0.927636  0.158879   57.328037

--- a/2_Evaluator_Block/loaders.py
+++ b/2_Evaluator_Block/loaders.py
@@ -24,7 +24,7 @@ def load_ratings(surprise_format=False):
        return surprise_data
    else:
        return df_ratings
-
+print(load_ratings())
 def load_items():
    """Loads items data.

@@ -44,12 +44,8 @@ def export_evaluation_report(report):
    Returns:
        DataFrame: Merged ratings and items data.
    """
-    df_ratings = load_ratings()
-    df_items = load_items()
-    df_merge = pd.merge(df_ratings, df_items, on='movieId')
-
    report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv"
-    export_path = os.path.join("../data", "small", "evaluations", report_name)
-    df_merge.to_csv(export_path, index=False)
+    export_path = os.path.join("../data", "tiny", "evaluations", report_name)
+    report.to_csv(export_path, index=False)
    print("The data has been exported to the evaluation report")
-    return df_merge
\ No newline at end of file
+    return report
\ No newline at end of file