Skip to content
Extraits de code Groupes Projets
evaluator.ipynb 14,5 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    {
     "cells": [
      {
       "cell_type": "markdown",
       "id": "a665885b",
       "metadata": {},
       "source": [
        "# Evaluator Module\n",
        "The Evaluator module creates evaluation reports.\n",
        "\n",
        "Reports contain evaluation metrics depending on models specified in the evaluation config."
       ]
      },
      {
       "cell_type": "code",
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "execution_count": 9,
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "id": "6aaf9140",
       "metadata": {},
    
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "The autoreload extension is already loaded. To reload it, use:\n",
          "  %reload_ext autoreload\n"
         ]
        }
       ],
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "source": [
        "# reloads modules automatically before entering the execution of code\n",
        "%load_ext autoreload\n",
        "%autoreload 2\n",
        "\n",
        "# third parties imports\n",
        "import numpy as np \n",
        "import pandas as pd\n",
        "# -- add new imports here --\n",
        "\n",
        "# local imports\n",
        "from configs import EvalConfig\n",
        "from constants import Constant as C\n",
        "from loaders import export_evaluation_report\n",
        "from loaders import load_ratings\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "# -- add new imports here --\n",
        "from surprise.model_selection import train_test_split\n",
        "from surprise import accuracy\n",
    
        "from surprise.model_selection import LeaveOneOut\n",
        "from collections import Counter"
    
    Adrien Payen's avatar
    Adrien Payen a validé
       ]
      },
      {
       "cell_type": "markdown",
       "id": "d47c24a4",
       "metadata": {},
       "source": [
        "# 1. Model validation functions\n",
        "Validation functions are a way to perform crossvalidation on recommender system models. "
       ]
      },
      {
       "cell_type": "code",
    
    Audrey Ghilain's avatar
    Audrey Ghilain a validé
       "execution_count": 2,
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "id": "d6d82188",
       "metadata": {},
       "outputs": [],
       "source": [
        "def generate_split_predictions(algo, ratings_dataset, eval_config):\n",
        "    \"\"\"Generate predictions on a random test set specified in eval_config\"\"\"\n",
        "    # -- implement the function generate_split_predictions --\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    \n",
        "    # Spliting the data into train and test sets\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    # Training the algorithm on the train data set\n",
        "    algo.fit(trainset)\n",
        "    # Predict ratings for the testset\n",
        "    predictions = algo.test(testset)\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    return predictions\n",
        "\n",
        "\n",
        "def generate_loo_top_n(algo, ratings_dataset, eval_config):\n",
        "    \"\"\"Generate top-n recommendations for each user on a random Leave-one-out split (LOO)\"\"\"\n",
        "    # -- implement the function generate_loo_top_n --\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    # Create a LeaveOneOut split\n",
        "    loo = LeaveOneOut(n_splits=1)\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    for trainset, testset in loo.split(ratings_dataset):\n",
        "        algo.fit(trainset)  # Train the algorithm on the training set\n",
        "        anti_testset = trainset.build_anti_testset()  # Build the anti test-set\n",
        "        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
        "        top_n = {}\n",
        "        for uid, iid, _, est, _ in predictions:\n",
        "            if uid not in top_n:\n",
        "                top_n[uid] = []\n",
        "            top_n[uid].append((iid, est))\n",
        "        for uid, user_ratings in top_n.items():\n",
        "            user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
        "            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
        "        anti_testset_top_n = top_n\n",
        "        return anti_testset_top_n, testset\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "\n",
        "def generate_full_top_n(algo, ratings_dataset, eval_config):\n",
        "    \"\"\"Generate top-n recommendations for each user with full training set (LOO)\"\"\"\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set\n",
        "    algo.fit(full_trainset)  # Train the algorithm on the full training set\n",
        "    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set\n",
        "    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
        "    top_n = {}\n",
        "    for uid, iid, _, est, _ in predictions:\n",
        "        if uid not in top_n:\n",
        "            top_n[uid] = []\n",
        "        top_n[uid].append((iid, est))\n",
        "    for uid, user_ratings in top_n.items():\n",
        "        user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
        "        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
        "    anti_testset_top_n = top_n\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    return anti_testset_top_n\n",
        "\n",
    
        "def precomputed_information(movie_data):\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n",
        "    \n",
        "    Dictionary keys:\n",
        "    - precomputed_dict[\"item_to_rank\"] : contains a dictionary mapping movie ids to rankings\n",
        "    - (-- for your project, add other relevant information here -- )\n",
        "    \"\"\"\n",
    
        "    # Initialize an empty dictionary to store item_id to rank mapping\n",
        "    item_to_rank = {}\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    \n",
    
        "    # Calculate popularity rank for each movie\n",
        "    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)\n",
        "    \n",
        "    # Assign ranks to movies based on their popularity\n",
        "    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):\n",
        "        item_to_rank[movie_id] = rank\n",
        "    \n",
        "    # Create the precomputed dictionary\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    precomputed_dict = {}\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    precomputed_dict[\"item_to_rank\"] = item_to_rank\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "\n",
        "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n",
        "    \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config.  \n",
        "    \"\"\"\n",
        "    evaluation_dict = {}\n",
        "    for model_name, model, arguments in eval_config.models:\n",
        "        print(f'Handling model {model_name}')\n",
        "        algo = model(**arguments)\n",
        "        evaluation_dict[model_name] = {}\n",
        "        \n",
        "        # Type 1 : split evaluations\n",
        "        if len(eval_config.split_metrics) > 0:\n",
        "            print('Training split predictions')\n",
        "            predictions = generate_split_predictions(algo, sp_ratings, eval_config)\n",
        "            for metric in eval_config.split_metrics:\n",
        "                print(f'- computing metric {metric}')\n",
        "                assert metric in available_metrics['split']\n",
        "                evaluation_function, parameters =  available_metrics[\"split\"][metric]\n",
        "                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "        # Type 2 : loo evaluations\n",
        "        if len(eval_config.loo_metrics) > 0:\n",
        "            print('Training loo predictions')\n",
        "            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)\n",
        "            for metric in eval_config.loo_metrics:\n",
        "                assert metric in available_metrics['loo']\n",
        "                evaluation_function, parameters =  available_metrics[\"loo\"][metric]\n",
        "                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)\n",
        "        \n",
        "        # Type 3 : full evaluations\n",
        "        if len(eval_config.full_metrics) > 0:\n",
        "            print('Training full predictions')\n",
        "            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)\n",
        "            for metric in eval_config.full_metrics:\n",
        "                assert metric in available_metrics['full']\n",
        "                evaluation_function, parameters =  available_metrics[\"full\"][metric]\n",
        "                evaluation_dict[model_name][metric] = evaluation_function(\n",
        "                    anti_testset_top_n,\n",
        "                    **precomputed_dict,\n",
        "                    **parameters\n",
        "                )\n",
        "        \n",
        "    return pd.DataFrame.from_dict(evaluation_dict).T"
       ]
      },
      {
       "cell_type": "markdown",
       "id": "f7e83d1d",
       "metadata": {},
       "source": [
        "# 2. Evaluation metrics\n",
        "Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)"
       ]
      },
      {
       "cell_type": "code",
    
    Audrey Ghilain's avatar
    Audrey Ghilain a validé
       "execution_count": 5,
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "id": "f1849e55",
       "metadata": {},
       "outputs": [],
       "source": [
        "def get_hit_rate(anti_testset_top_n, testset):\n",
        "    \"\"\"Compute the average hit over the users (loo metric)\n",
        "    \n",
        "    A hit (1) happens when the movie in the testset has been picked by the top-n recommender\n",
        "    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender\n",
        "    \"\"\"\n",
        "    # -- implement the function get_hit_rate --\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "\n",
        "    hits = 0\n",
        "    total_users = len(testset)\n",
        "    for uid, true_iid, _ in testset:\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "            hits += 1\n",
        "    hit_rate = hits / total_users\n",
        "\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    return hit_rate\n",
        "\n",
        "def get_novelty(anti_testset_top_n, item_to_rank):\n",
        "    \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n",
        "    \n",
        "    The novelty is defined as the average ranking of the movies recommended\n",
        "    \"\"\"\n",
        "    # -- implement the function get_novelty --\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    total_rank_sum = 0\n",
        "    total_recommendations = 0\n",
        "    for uid, recommendations in anti_testset_top_n.items():\n",
        "        for iid, _ in recommendations:\n",
        "            if iid in item_to_rank:\n",
        "                total_rank_sum += item_to_rank[iid]\n",
        "                total_recommendations += 1\n",
        "    if total_recommendations == 0:\n",
        "        return 0  # Avoid division by zero\n",
        "    average_rank_sum = total_rank_sum / total_recommendations \n",
        "    \n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    return average_rank_sum"
       ]
      },
      {
       "cell_type": "markdown",
       "id": "1a9855b3",
       "metadata": {},
       "source": [
        "# 3. Evaluation workflow\n",
        "Load data, evaluate models and save the experimental outcomes"
       ]
      },
      {
       "cell_type": "code",
    
    Audrey Ghilain's avatar
    Audrey Ghilain a validé
       "execution_count": 6,
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "id": "704f4d2a",
       "metadata": {},
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "outputs": [
        {
    
    Audrey Ghilain's avatar
    Audrey Ghilain a validé
         "ename": "NameError",
         "evalue": "name 'accuracy' is not defined",
    
    Adrien Payen's avatar
    Adrien Payen a validé
         "output_type": "error",
         "traceback": [
          "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
    
    Audrey Ghilain's avatar
    Audrey Ghilain a validé
          "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
          "Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m AVAILABLE_METRICS \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msplit\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[0;32m----> 3\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmae\u001b[39m\u001b[38;5;124m\"\u001b[39m: (\u001b[43maccuracy\u001b[49m\u001b[38;5;241m.\u001b[39mmae, {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mverbose\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m}),\n\u001b[1;32m      4\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrmse\u001b[39m\u001b[38;5;124m\"\u001b[39m: (accuracy\u001b[38;5;241m.\u001b[39mrmse, {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mverbose\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m})\n\u001b[1;32m      5\u001b[0m         \u001b[38;5;66;03m# Add new split metrics here if needed\u001b[39;00m\n\u001b[1;32m      6\u001b[0m     },\n\u001b[1;32m      7\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloo\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m      8\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhit_rate\u001b[39m\u001b[38;5;124m\"\u001b[39m: (get_hit_rate, {}),\n\u001b[1;32m      9\u001b[0m         \u001b[38;5;66;03m# Add new loo metrics here if needed\u001b[39;00m\n\u001b[1;32m     10\u001b[0m     },\n\u001b[1;32m     11\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfull\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[1;32m     12\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnovelty\u001b[39m\u001b[38;5;124m\"\u001b[39m: (get_novelty, {}),\n\u001b[1;32m     13\u001b[0m         \u001b[38;5;66;03m# Add new full metrics here if needed\u001b[39;00m\n\u001b[1;32m     14\u001b[0m     }\n\u001b[1;32m     15\u001b[0m }\n\u001b[1;32m     17\u001b[0m sp_ratings \u001b[38;5;241m=\u001b[39m load_ratings(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     18\u001b[0m precomputed_dict \u001b[38;5;241m=\u001b[39m precomputed_information(pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata/tiny/evidence/ratings.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
          "\u001b[0;31mNameError\u001b[0m: name 'accuracy' is not defined"
    
    Adrien Payen's avatar
    Adrien Payen a validé
         ]
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "source": [
        "AVAILABLE_METRICS = {\n",
        "    \"split\": {\n",
        "        \"mae\": (accuracy.mae, {'verbose': False}),\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "        \"rmse\": (accuracy.rmse, {'verbose': False})\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "        # Add new split metrics here if needed\n",
        "    },\n",
        "    \"loo\": {\n",
        "        \"hit_rate\": (get_hit_rate, {}),\n",
        "        # Add new loo metrics here if needed\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    },\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "    \"full\": {\n",
        "        \"novelty\": (get_novelty, {}),\n",
        "        # Add new full metrics here if needed\n",
        "    }\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "}\n",
        "\n",
        "sp_ratings = load_ratings(surprise_format=True)\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
    
    Adrien Payen's avatar
    Adrien Payen a validé
        "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
        "export_evaluation_report(evaluation_report)"
       ]
      }
     ],
     "metadata": {
      "kernelspec": {
    
       "display_name": "Python 3",
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "language": "python",
    
       "name": "python3"
    
    Adrien Payen's avatar
    Adrien Payen a validé
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
    
    Adrien Payen's avatar
    Adrien Payen a validé
       "version": "3.12.2"
    
    Adrien Payen's avatar
    Adrien Payen a validé
      }
     },
     "nbformat": 4,
     "nbformat_minor": 5
    }