update Analytics

a442a7bf · Adrien Payen · 7dd59e1b · a442a7bf · a442a7bf · 7dd59e1b
--- a/analytics_small.ipynb
+++ b/analytics_small.ipynb
@@ -2,15 +2,13 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n",
      "Display The Movies : \n"
     ]
    },
@@ -313,7 +311,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -332,7 +330,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@@ -357,7 +355,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@@ -409,7 +407,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -428,7 +426,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -447,7 +445,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@@ -467,7 +465,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@@ -490,7 +488,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -513,7 +511,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -547,7 +545,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@@ -577,7 +575,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -610,7 +608,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -627,7 +625,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@@ -690,7 +688,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {

 %% Cell type:code id: tags:
 ``` python
 # Reload modules automatically before entering the execution of code
 %load_ext autoreload
 %autoreload 2
 # Third-party imports
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from scipy.sparse import csr_matrix
 # Constants and functions
 from constants import Constant as C
 from loaders import load_ratings
 from loaders import load_items
 from tabulate import tabulate
 # Call the load_items() function and create a variable df_items
 df_movies = load_items()
 # Display the DataFrame
 print("Display The Movies : ")
 display(df_movies)
 # Call the load_ratings() function and create a variable df_ratings
 df_ratings = load_ratings()
 # Display the DataFrame
 print("Display The Ratings : ")
 display(df_ratings)
 ```
 %% Output
-    The autoreload extension is already loaded. To reload it, use:
-      %reload_ext autoreload
    Display The Movies :
    Display The Ratings :
 %% Cell type:code id: tags:
 ``` python
 # NUMBER OF MOVIES
 n_movies = df_movies['title'].nunique()
 print(f"Number of movies: {n_movies}")
 ```
 %% Output
    Number of movies: 912
 %% Cell type:code id: tags:
 ``` python
 # THE YEAR RANGE
 df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
 df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
 min_range = int(df_movies['annee'].min())
 max_range = int(df_movies['annee'].max())
 print("Minimum range:", min_range)
 print("Maximum range:", max_range)
 ```
 %% Output
    Minimum range: 1921
    Maximum range: 2016
 %% Cell type:code id: tags:
 ``` python
 # LIST OF MOVIE GENRES
 def tabulate_genres(df_movies):
    """Tabulate list of movie genres."""
    # Split genres and explode
    df_movies['genres'] = df_movies['genres'].str.split('|')
    df_movies = df_movies.explode('genres')
    unique_genres = sorted(df_movies['genres'].unique())
    # Tabulate
    print("\nList of all genres:")
    genres_table = [[genre, "|"] for genre in unique_genres]
    print(tabulate(genres_table, tablefmt="plain", numalign="left"))
 # Call the tabulate_genres function
 tabulate_genres(df_movies)
 ```
 %% Output
    List of all genres:
    (no genres listed)  |
    Action              |
    Adventure           |
    Animation           |
    Children            |
    Comedy              |
    Crime               |
    Documentary         |
    Drama               |
    Fantasy             |
    Film-Noir           |
    Horror              |
    IMAX                |
    Musical             |
    Mystery             |
    Romance             |
    Sci-Fi              |
    Thriller            |
    War                 |
    Western             |
 %% Cell type:code id: tags:
 ``` python
 # THE TOTAL NUMBER OF RATINGS
 n_ratings = df_ratings['rating'].count()
 print(f"Number of ratings: {n_ratings}")
 ```
 %% Output
    Number of ratings: 5296
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF UNIQUE USERS
 n_users = df_ratings['userId'].nunique()
 print(f"Number of users: {n_users}")
 ```
 %% Output
    Number of users: 107
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF UNIQUE MOVIES (IN THE RATING MATRIX)
 unique_movies = df_ratings["movieId"].unique()
 num_unique_movies = len(unique_movies)
 print(f"Number of unique movies : {num_unique_movies}")
 ```
 %% Output
    Number of unique movies : 834
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF RATINGS OF THE MOST RATED MOVIES
 def most_rated_movies_ratings_count(df_ratings):
    movie_ratings_count = df_ratings.groupby('movieId')['rating'].count()
    most_rated_movies = movie_ratings_count[movie_ratings_count == movie_ratings_count.max()]
    print(f"Number of ratings of the most rated movie(s): {most_rated_movies.max()}")
 most_rated_movies_ratings_count(df_ratings)
 ```
 %% Output
    Number of ratings of the most rated movie(s): 75
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF RATINGS OF THE LESS RATED MOVIES
 def least_rated_movies_ratings_count(df_ratings):
    movie_ratings_count = df_ratings.groupby('movieId')['rating'].count()
    least_rated_movies = movie_ratings_count[movie_ratings_count == movie_ratings_count.min()]
    print("Number of ratings of the least rated movie(s):", least_rated_movies.min())
 least_rated_movies_ratings_count(df_ratings)
 ```
 %% Output
    Number of ratings of the least rated movie(s): 1
 %% Cell type:code id: tags:
 ``` python
 # ALL THE POSSIBLE RATING VALUES; FROM THE SMALLEST VALUE TO THE VALUE HIGHEST
 def all_possible_ratings(df_ratings):
    rating_values = sorted(df_ratings['rating'].unique())
    print("All possible rating values, from smallest to highest:")
    for rating in rating_values:
        print(rating)
 all_possible_ratings(df_ratings)
 ```
 %% Output
    All possible rating values, from smallest to highest:
    0.5
    1.0
    1.5
    2.0
    2.5
    3.0
    3.5
    4.0
    4.5
    5.0
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF MOVIES THAT WERE NOT RATED AT ALL
 def unrated_movies_count(df_ratings, df_movies):
    rated_movies = df_ratings['movieId'].unique() if 'movieId' in df_ratings.columns else []
    unrated_movies_count = df_movies[~df_movies.index.isin(rated_movies)].shape[0]
    print("Number of movies that were not rated at all:", unrated_movies_count)
 unrated_movies_count(df_ratings, df_movies)
 ```
 %% Output
    Number of movies that were not rated at all: 78
 %% Cell type:markdown id: tags:
 LONG-TAIL PROPERTY
 %% Cell type:code id: tags:
 ``` python
 # Rating Frequency Distribution
 merged_df = pd.merge(df_ratings,df_movies, on='movieId')
 rating_counts = merged_df['movieId'].value_counts()
 value_counts = rating_counts.value_counts().sort_index()
 plt.figure(figsize=(20, 6))
 plt.plot(value_counts.values, value_counts.index, marker='o', color='skyblue', linestyle='-')  # Swap x and y arguments
 plt.title('Rating Frequency Distribution')
 plt.xlabel('Number of Movies')  # Update x-label
 plt.ylabel('Number of Ratings')  # Update y-label
 plt.xticks(rotation=45)
 plt.grid(axis='x', linestyle='--', alpha=0.7)  # Change grid to x-axis
 plt.tight_layout()
 plt.show()
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 M = df_ratings['userId'].nunique()
 N = df_ratings['movieId'].nunique()
 user_mapper = dict(zip(np.unique(df_ratings["userId"]), list(range(M))))
 movie_mapper = dict(zip(np.unique(df_ratings["movieId"]), list(range(N))))
 user_inv_mapper = dict(zip(list(range(M)), np.unique(df_ratings["userId"])))
 movie_inv_mapper = dict(zip(list(range(N)), np.unique(df_ratings["movieId"])))
 user_index = [user_mapper[i] for i in df_ratings['userId']]
 item_index = [movie_mapper[i] for i in df_ratings['movieId']]
 X = csr_matrix((df_ratings["rating"], (user_index,item_index)), shape=(M,N))
 ```
 %% Cell type:code id: tags:
 ``` python
 def create_X(df):
    """
    Generates a sparse matrix from ratings dataframe.
    Args:
        df: pandas dataframe containing 3 columns (userId, movieId, rating)
    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        movie_mapper: dict that maps movie id's to movie indices
        movie_inv_mapper: dict that maps movie indices to movie id's
    """
    M = df['userId'].nunique()
    N = df['movieId'].nunique()
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(M))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(N))))
    user_inv_mapper = dict(zip(list(range(M)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(N)), np.unique(df["movieId"])))
    user_index = [user_mapper[i] for i in df['userId']]
    item_index = [movie_mapper[i] for i in df['movieId']]
    X = csr_matrix((df["rating"], (user_index,item_index)), shape=(M,N))
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
 # Assuming df_ratings contains your ratings dataframe
 X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(df_ratings)
 # Extract the 100 first users and 100 first items
 X_sub = X[:100, :100]
 # Plot the non-zero values of the sparse matrix
 plt.figure(figsize=(8, 6))
 plt.spy(X_sub, markersize=1)
 plt.title('Non-zero values of a sparse matrix')
 plt.xlabel('Movie Index')
 plt.ylabel('User Index')
 plt.show()
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 n_total = X.shape[0]*X.shape[1]
 n_ratings = X.nnz
 sparsity = n_ratings/n_total
 print(f"Matrix sparsity: {round(sparsity*100,2)}%")
 ```
 %% Output
    Matrix sparsity: 5.93%

--- a/analytics_tiny.ipynb
+++ b/analytics_tiny.ipynb
@@ -2,15 +2,13 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n",
      "Display The Movies : \n"
     ]
    },
@@ -322,7 +320,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -341,7 +339,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@@ -366,7 +364,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@@ -418,7 +416,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -437,7 +435,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@@ -456,7 +454,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@@ -476,7 +474,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@@ -499,7 +497,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -522,7 +520,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@@ -556,7 +554,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@@ -586,7 +584,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@@ -619,7 +617,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@@ -682,7 +680,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {

 %% Cell type:code id: tags:
 ``` python
 # Reload modules automatically before entering the execution of code
 %load_ext autoreload
 %autoreload 2
 # Third-party imports
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from scipy.sparse import csr_matrix
 from sklearn.linear_model import LinearRegression
 # Constants and functions
 from constants import Constant as C
 # We use a pd.read_csv() so importing the loaders is not necessary
 # from loaders import load_ratings
 # from loaders import load_items
 from tabulate import tabulate
 # Call the load_items() function and create a variable df_items
 df_movies = pd.read_csv("data/tiny/content/movies.csv")
 # Display the DataFrame
 print("Display The Movies : ")
 display(df_movies)
 # Call the load_ratings() function and create a variable df_ratings
 df_ratings = pd.read_csv("data/tiny/evidence/ratings.csv")
 # Display the DataFrame
 print("Display The Ratings : ")
 display(df_ratings)
 ```
 %% Output
-    The autoreload extension is already loaded. To reload it, use:
-      %reload_ext autoreload
    Display The Movies :
    Display The Ratings :
 %% Cell type:code id: tags:
 ``` python
 # NUMBER OF MOVIES
 n_movies = df_movies['title'].nunique()
 print(f"Number of movies: {n_movies}")
 ```
 %% Output
    Number of movies: 912
 %% Cell type:code id: tags:
 ``` python
 # THE YEAR RANGE
 df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
 df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
 min_range = int(df_movies['annee'].min())
 max_range = int(df_movies['annee'].max())
 print("Minimum range:", min_range)
 print("Maximum range:", max_range)
 ```
 %% Output
    Minimum range: 1921
    Maximum range: 2016
 %% Cell type:code id: tags:
 ``` python
 # LIST OF MOVIE GENRES
 def tabulate_genres(df_movies):
    """Tabulate list of movie genres."""
    # Split genres and explode
    df_movies['genres'] = df_movies['genres'].str.split('|')
    df_movies = df_movies.explode('genres')
    unique_genres = sorted(df_movies['genres'].unique())
    # Tabulate
    print("\nList of all genres:")
    genres_table = [[genre, "|"] for genre in unique_genres]
    print(tabulate(genres_table, tablefmt="plain", numalign="left"))
 # Call the tabulate_genres function
 tabulate_genres(df_movies)
 ```
 %% Output
    List of all genres:
    (no genres listed)  |
    Action              |
    Adventure           |
    Animation           |
    Children            |
    Comedy              |
    Crime               |
    Documentary         |
    Drama               |
    Fantasy             |
    Film-Noir           |
    Horror              |
    IMAX                |
    Musical             |
    Mystery             |
    Romance             |
    Sci-Fi              |
    Thriller            |
    War                 |
    Western             |
 %% Cell type:code id: tags:
 ``` python
 # THE TOTAL NUMBER OF RATINGS
 n_ratings = df_ratings['rating'].count()
 print(f"Number of ratings: {n_ratings}")
 ```
 %% Output
    Number of ratings: 5296
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF UNIQUE USERS
 n_users = df_ratings['userId'].nunique()
 print(f"Number of users: {n_users}")
 ```
 %% Output
    Number of users: 107
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF UNIQUE MOVIES (IN THE RATING MATRIX)
 unique_movies = df_ratings["movieId"].unique()
 num_unique_movies = len(unique_movies)
 print(f"Number of unique movies : {num_unique_movies}")
 ```
 %% Output
    Number of unique movies : 834
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF RATINGS OF THE MOST RATED MOVIES
 def most_rated_movies_ratings_count(df_ratings):
    movie_ratings_count = df_ratings.groupby('movieId')['rating'].count()
    most_rated_movies = movie_ratings_count[movie_ratings_count == movie_ratings_count.max()]
    print(f"Number of ratings of the most rated movie(s): {most_rated_movies.max()}")
 most_rated_movies_ratings_count(df_ratings)
 ```
 %% Output
    Number of ratings of the most rated movie(s): 75
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF RATINGS OF THE LESS RATED MOVIES
 def least_rated_movies_ratings_count(df_ratings):
    movie_ratings_count = df_ratings.groupby('movieId')['rating'].count()
    least_rated_movies = movie_ratings_count[movie_ratings_count == movie_ratings_count.min()]
    print("Number of ratings of the least rated movie(s):", least_rated_movies.min())
 least_rated_movies_ratings_count(df_ratings)
 ```
 %% Output
    Number of ratings of the least rated movie(s): 1
 %% Cell type:code id: tags:
 ``` python
 # ALL THE POSSIBLE RATING VALUES; FROM THE SMALLEST VALUE TO THE VALUE HIGHEST
 def all_possible_ratings(df_ratings):
    rating_values = sorted(df_ratings['rating'].unique())
    print("All possible rating values, from smallest to highest:")
    for rating in rating_values:
        print(rating)
 all_possible_ratings(df_ratings)
 ```
 %% Output
    All possible rating values, from smallest to highest:
    0.5
    1.0
    1.5
    2.0
    2.5
    3.0
    3.5
    4.0
    4.5
    5.0
 %% Cell type:code id: tags:
 ``` python
 # THE NUMBER OF MOVIES THAT WERE NOT RATED AT ALL
 def unrated_movies_count(df_ratings, df_movies):
    rated_movies = df_ratings['movieId'].unique() if 'movieId' in df_ratings.columns else []
    unrated_movies_count = df_movies[~df_movies.index.isin(rated_movies)].shape[0]
    print("Number of movies that were not rated at all:", unrated_movies_count)
 unrated_movies_count(df_ratings, df_movies)
 ```
 %% Output
    Number of movies that were not rated at all: 846
 %% Cell type:markdown id: tags:
 ### LONG-TAIL PROPERTY
 %% Cell type:code id: tags:
 ``` python
 # Rating Frequency Distribution
 merged_df = pd.merge(df_ratings,df_movies, on='movieId')
 rating_counts = merged_df['movieId'].value_counts()
 value_counts = rating_counts.value_counts().sort_index()
 plt.figure(figsize=(20, 6))
 plt.plot(value_counts.values, value_counts.index, marker='o', color='skyblue', linestyle='-')  # Swap x and y arguments
 plt.title('Rating Frequency Distribution')
 plt.xlabel('Number of Movies')  # Update x-label
 plt.ylabel('Number of Ratings')  # Update y-label
 plt.xticks(rotation=45)
 plt.grid(axis='x', linestyle='--', alpha=0.7)  # Change grid to x-axis
 plt.tight_layout()
 plt.show()
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 def create_X(df):
    """
    Generates a sparse matrix from ratings dataframe.
    Args:
        df: pandas dataframe containing 3 columns (userId, movieId, rating)
    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        movie_mapper: dict that maps movie id's to movie indices
        movie_inv_mapper: dict that maps movie indices to movie id's
    """
    M = df['userId'].nunique()
    N = df['movieId'].nunique()
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(M))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(N))))
    user_inv_mapper = dict(zip(list(range(M)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(N)), np.unique(df["movieId"])))
    user_index = [user_mapper[i] for i in df['userId']]
    item_index = [movie_mapper[i] for i in df['movieId']]
    X = csr_matrix((df["rating"], (user_index,item_index)), shape=(M,N))
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
 # Assuming df_ratings contains your ratings dataframe
 X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(df_ratings)
 # Extract the 100 first users and 100 first items
 X_sub = X[:100, :100]
 # Plot the non-zero values of the sparse matrix
 plt.figure(figsize=(8, 6))
 plt.spy(X_sub, markersize=1)
 plt.title('Non-zero values of a sparse matrix')
 plt.xlabel('Movie Index')
 plt.ylabel('User Index')
 plt.show()
 ```
 %% Output
 %% Cell type:code id: tags:
 ``` python
 n_total = X.shape[0]*X.shape[1]
 n_ratings = X.nnz
 sparsity = n_ratings/n_total
 print(f"Matrix sparsity: {round(sparsity*100,2)}%")
 ```
 %% Output
    Matrix sparsity: 5.93%

--- a/configs.py
+++ b/configs.py
-# local imports
-from models import *
-class EvalConfig:
-    """Configuration settings for evaluation."""
-    # List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict)
-    models = [
-        ("baseline_1", ModelBaseline1, {}),
-        ("baseline_2", ModelBaseline2, {}),
-        ("baseline_3", ModelBaseline3, {}),
-        ("baseline_4", ModelBaseline4, {})
-        # model_name, model class, model parameters (dict)
-    ]
-    # Metrics to compute for split evaluation
-    split_metrics = ["mae", "rmse"]
-    # Metrics to compute for Leave-One-Out (LOO) evaluation
-    loo_metrics = ["hit_rate"]
-    # Metrics to compute for full dataset evaluation
-    full_metrics = ["novelty"]
-    # Split parameters
-    test_size = 0.25 # -- configure the test_size (from 0 to 1) --
-    # Loo parameters
-    top_n_value =  10 # -- configure the numer of recommendations (> 1) --
--- a/evaluator.ipynb
+++ b/evaluator.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "a665885b",
-   "metadata": {},
-   "source": [
-    "# Evaluator Module\n",
-    "The Evaluator module creates evaluation reports.\n",
-    "\n",
-    "Reports contain evaluation metrics depending on models specified in the evaluation config."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "6aaf9140",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# reloads modules automatically before entering the execution of code\n",
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "# third parties imports\n",
-    "import numpy as np \n",
-    "import pandas as pd\n",
-    "# -- add new imports here --\n",
-    "\n",
-    "# local imports\n",
-    "from configs import EvalConfig\n",
-    "from constants import Constant as C\n",
-    "from loaders import export_evaluation_report\n",
-    "from loaders import load_ratings\n",
-    "# -- add new imports here --\n",
-    "from surprise.model_selection import train_test_split\n",
-    "from surprise import accuracy\n",
-    "from surprise.model_selection import LeaveOneOut\n",
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d47c24a4",
-   "metadata": {},
-   "source": [
-    "# 1. Model validation functions\n",
-    "Validation functions are a way to perform crossvalidation on recommender system models. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "d6d82188",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def generate_split_predictions(algo, ratings_dataset, eval_config):\n",
-    "    \"\"\"Generate predictions on a random test set specified in eval_config\"\"\"\n",
-    "    # -- implement the function generate_split_predictions --\n",
-    "    \n",
-    "    # Spliting the data into train and test sets\n",
-    "    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)\n",
-    "    # Training the algorithm on the train data set\n",
-    "    algo.fit(trainset)\n",
-    "    # Predict ratings for the testset\n",
-    "    predictions = algo.test(testset)\n",
-    "    return predictions\n",
-    "\n",
-    "\n",
-    "def generate_loo_top_n(algo, ratings_dataset, eval_config):\n",
-    "    \"\"\"Generate top-n recommendations for each user on a random Leave-one-out split (LOO)\"\"\"\n",
-    "    # -- implement the function generate_loo_top_n --\n",
-    "    # Create a LeaveOneOut split\n",
-    "    loo = LeaveOneOut(n_splits=1)\n",
-    "    \n",
-    "    for trainset, testset in loo.split(ratings_dataset):\n",
-    "        algo.fit(trainset)  # Train the algorithm on the training set\n",
-    "        anti_testset = trainset.build_anti_testset()  # Build the anti test-set\n",
-    "        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
-    "        top_n = {}\n",
-    "        for uid, iid, _, est, _ in predictions:\n",
-    "            if uid not in top_n:\n",
-    "                top_n[uid] = []\n",
-    "            top_n[uid].append((iid, est))\n",
-    "        for uid, user_ratings in top_n.items():\n",
-    "            user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
-    "            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
-    "        anti_testset_top_n = top_n\n",
-    "        return anti_testset_top_n, testset\n",
-    "\n",
-    "def generate_full_top_n(algo, ratings_dataset, eval_config):\n",
-    "    \"\"\"Generate top-n recommendations for each user with full training set (LOO)\"\"\"\n",
-    "    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set\n",
-    "    algo.fit(full_trainset)  # Train the algorithm on the full training set\n",
-    "    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set\n",
-    "    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
-    "    top_n = {}\n",
-    "    for uid, iid, _, est, _ in predictions:\n",
-    "        if uid not in top_n:\n",
-    "            top_n[uid] = []\n",
-    "        top_n[uid].append((iid, est))\n",
-    "    for uid, user_ratings in top_n.items():\n",
-    "        user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
-    "        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
-    "    anti_testset_top_n = top_n\n",
-    "    return anti_testset_top_n\n",
-    "\n",
-    "def precomputed_information(movie_data):\n",
-    "    \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n",
-    "    \n",
-    "    Dictionary keys:\n",
-    "    - precomputed_dict[\"item_to_rank\"] : contains a dictionary mapping movie ids to rankings\n",
-    "    - (-- for your project, add other relevant information here -- )\n",
-    "    \"\"\"\n",
-    "    # Initialize an empty dictionary to store item_id to rank mapping\n",
-    "    item_to_rank = {}\n",
-    "    \n",
-    "    # Calculate popularity rank for each movie\n",
-    "    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)\n",
-    "    \n",
-    "    # Assign ranks to movies based on their popularity\n",
-    "    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):\n",
-    "        item_to_rank[movie_id] = rank\n",
-    "    \n",
-    "    # Create the precomputed dictionary\n",
-    "    precomputed_dict = {}\n",
-    "    precomputed_dict[\"item_to_rank\"] = item_to_rank\n",
-    "    \n",
-    "    return precomputed_dict\n",
-    "\n",
-    "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n",
-    "    \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config.  \n",
-    "    \"\"\"\n",
-    "    evaluation_dict = {}\n",
-    "    for model_name, model, arguments in eval_config.models:\n",
-    "        print(f'Handling model {model_name}')\n",
-    "        algo = model(**arguments)\n",
-    "        evaluation_dict[model_name] = {}\n",
-    "        \n",
-    "        # Type 1 : split evaluations\n",
-    "        if len(eval_config.split_metrics) > 0:\n",
-    "            print('Training split predictions')\n",
-    "            predictions = generate_split_predictions(algo, sp_ratings, eval_config)\n",
-    "            for metric in eval_config.split_metrics:\n",
-    "                print(f'- computing metric {metric}')\n",
-    "                assert metric in available_metrics['split']\n",
-    "                evaluation_function, parameters =  available_metrics[\"split\"][metric]\n",
-    "                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n",
-    "        \n",
-    "        # Type 2 : loo evaluations\n",
-    "        if len(eval_config.loo_metrics) > 0:\n",
-    "            print('Training loo predictions')\n",
-    "            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)\n",
-    "            for metric in eval_config.loo_metrics:\n",
-    "                assert metric in available_metrics['loo']\n",
-    "                evaluation_function, parameters =  available_metrics[\"loo\"][metric]\n",
-    "                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)\n",
-    "        \n",
-    "        # Type 3 : full evaluations\n",
-    "        if len(eval_config.full_metrics) > 0:\n",
-    "            print('Training full predictions')\n",
-    "            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)\n",
-    "            for metric in eval_config.full_metrics:\n",
-    "                assert metric in available_metrics['full']\n",
-    "                evaluation_function, parameters =  available_metrics[\"full\"][metric]\n",
-    "                evaluation_dict[model_name][metric] = evaluation_function(\n",
-    "                    anti_testset_top_n,\n",
-    "                    **precomputed_dict,\n",
-    "                    **parameters\n",
-    "                )\n",
-    "        \n",
-    "    return pd.DataFrame.from_dict(evaluation_dict).T"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f7e83d1d",
-   "metadata": {},
-   "source": [
-    "# 2. Evaluation metrics\n",
-    "Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "f1849e55",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_hit_rate(anti_testset_top_n, testset):\n",
-    "    \"\"\"Compute the average hit over the users (loo metric)\n",
-    "    \n",
-    "    A hit (1) happens when the movie in the testset has been picked by the top-n recommender\n",
-    "    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender\n",
-    "    \"\"\"\n",
-    "    # -- implement the function get_hit_rate --\n",
-    "\n",
-    "    hits = 0\n",
-    "    total_users = len(testset)\n",
-    "    for uid, true_iid, _ in testset:\n",
-    "        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:\n",
-    "            hits += 1\n",
-    "    hit_rate = hits / total_users\n",
-    "\n",
-    "    return hit_rate\n",
-    "\n",
-    "def get_novelty(anti_testset_top_n, item_to_rank):\n",
-    "    \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n",
-    "    \n",
-    "    The novelty is defined as the average ranking of the movies recommended\n",
-    "    \"\"\"\n",
-    "    # -- implement the function get_novelty --\n",
-    "    total_rank_sum = 0\n",
-    "    total_recommendations = 0\n",
-    "    for uid, recommendations in anti_testset_top_n.items():\n",
-    "        for iid, _ in recommendations:\n",
-    "            if iid in item_to_rank:\n",
-    "                total_rank_sum += item_to_rank[iid]\n",
-    "                total_recommendations += 1\n",
-    "    if total_recommendations == 0:\n",
-    "        return 0  # Avoid division by zero\n",
-    "    average_rank_sum = total_rank_sum / total_recommendations \n",
-    "    \n",
-    "    return average_rank_sum"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1a9855b3",
-   "metadata": {},
-   "source": [
-    "# 3. Evaluation workflow\n",
-    "Load data, evaluate models and save the experimental outcomes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "704f4d2a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Handling model baseline_1\n",
-      "Training split predictions\n",
-      "- computing metric mae\n",
-      "- computing metric rmse\n",
-      "Training loo predictions\n",
-      "Training full predictions\n",
-      "Handling model baseline_2\n",
-      "Training split predictions\n",
-      "- computing metric mae\n",
-      "- computing metric rmse\n",
-      "Training loo predictions\n",
-      "Training full predictions\n",
-      "Handling model baseline_3\n",
-      "Training split predictions\n",
-      "- computing metric mae\n",
-      "- computing metric rmse\n",
-      "Training loo predictions\n",
-      "Training full predictions\n",
-      "Handling model baseline_4\n",
-      "Training split predictions\n",
-      "- computing metric mae\n",
-      "- computing metric rmse\n",
-      "Training loo predictions\n",
-      "Training full predictions\n",
-      "The data has been exported to the evaluation report\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>mae</th>\n",
-       "      <th>rmse</th>\n",
-       "      <th>hit_rate</th>\n",
-       "      <th>novelty</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>baseline_1</th>\n",
-       "      <td>1.567221</td>\n",
-       "      <td>1.788369</td>\n",
-       "      <td>0.074766</td>\n",
-       "      <td>99.405607</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>baseline_2</th>\n",
-       "      <td>1.502872</td>\n",
-       "      <td>1.840696</td>\n",
-       "      <td>0.056075</td>\n",
-       "      <td>429.942991</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>baseline_3</th>\n",
-       "      <td>0.873993</td>\n",
-       "      <td>1.076982</td>\n",
-       "      <td>0.065421</td>\n",
-       "      <td>99.405607</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>baseline_4</th>\n",
-       "      <td>0.730657</td>\n",
-       "      <td>0.938814</td>\n",
-       "      <td>0.186916</td>\n",
-       "      <td>57.465421</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                 mae      rmse  hit_rate     novelty\n",
-       "baseline_1  1.567221  1.788369  0.074766   99.405607\n",
-       "baseline_2  1.502872  1.840696  0.056075  429.942991\n",
-       "baseline_3  0.873993  1.076982  0.065421   99.405607\n",
-       "baseline_4  0.730657  0.938814  0.186916   57.465421"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "AVAILABLE_METRICS = {\n",
-    "    \"split\": {\n",
-    "        \"mae\": (accuracy.mae, {'verbose': False}),\n",
-    "        \"rmse\": (accuracy.rmse, {'verbose': False})\n",
-    "        # Add new split metrics here if needed\n",
-    "    },\n",
-    "    \"loo\": {\n",
-    "        \"hit_rate\": (get_hit_rate, {}),\n",
-    "        # Add new loo metrics here if needed\n",
-    "    },\n",
-    "    \"full\": {\n",
-    "        \"novelty\": (get_novelty, {}),\n",
-    "        # Add new full metrics here if needed\n",
-    "    }\n",
-    "}\n",
-    "\n",
-    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
-    "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
-    "export_evaluation_report(evaluation_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
-%% Cell type:markdown id:a665885b tags:
-# Evaluator Module
-The Evaluator module creates evaluation reports.
-Reports contain evaluation metrics depending on models specified in the evaluation config.
-%% Cell type:code id:6aaf9140 tags:
-``` python
-# reloads modules automatically before entering the execution of code
-%load_ext autoreload
-%autoreload 2
-# third parties imports
-import numpy as np
-import pandas as pd
-# -- add new imports here --
-# local imports
-from configs import EvalConfig
-from constants import Constant as C
-from loaders import export_evaluation_report
-from loaders import load_ratings
-# -- add new imports here --
-from surprise.model_selection import train_test_split
-from surprise import accuracy
-from surprise.model_selection import LeaveOneOut
-from collections import Counter
-```
-%% Cell type:markdown id:d47c24a4 tags:
-# 1. Model validation functions
-Validation functions are a way to perform crossvalidation on recommender system models.
-%% Cell type:code id:d6d82188 tags:
-``` python
-def generate_split_predictions(algo, ratings_dataset, eval_config):
-    """Generate predictions on a random test set specified in eval_config"""
-    # -- implement the function generate_split_predictions --
-    # Spliting the data into train and test sets
-    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)
-    # Training the algorithm on the train data set
-    algo.fit(trainset)
-    # Predict ratings for the testset
-    predictions = algo.test(testset)
-    return predictions
-def generate_loo_top_n(algo, ratings_dataset, eval_config):
-    """Generate top-n recommendations for each user on a random Leave-one-out split (LOO)"""
-    # -- implement the function generate_loo_top_n --
-    # Create a LeaveOneOut split
-    loo = LeaveOneOut(n_splits=1)
-    for trainset, testset in loo.split(ratings_dataset):
-        algo.fit(trainset)  # Train the algorithm on the training set
-        anti_testset = trainset.build_anti_testset()  # Build the anti test-set
-        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
-        top_n = {}
-        for uid, iid, _, est, _ in predictions:
-            if uid not in top_n:
-                top_n[uid] = []
-            top_n[uid].append((iid, est))
-        for uid, user_ratings in top_n.items():
-            user_ratings.sort(key=lambda x: x[1], reverse=True)
-            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
-        anti_testset_top_n = top_n
-        return anti_testset_top_n, testset
-def generate_full_top_n(algo, ratings_dataset, eval_config):
-    """Generate top-n recommendations for each user with full training set (LOO)"""
-    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set
-    algo.fit(full_trainset)  # Train the algorithm on the full training set
-    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set
-    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set
-    top_n = {}
-    for uid, iid, _, est, _ in predictions:
-        if uid not in top_n:
-            top_n[uid] = []
-        top_n[uid].append((iid, est))
-    for uid, user_ratings in top_n.items():
-        user_ratings.sort(key=lambda x: x[1], reverse=True)
-        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations
-    anti_testset_top_n = top_n
-    return anti_testset_top_n
-def precomputed_information(movie_data):
-    """ Returns a dictionary that precomputes relevant information for evaluating in full mode
-    Dictionary keys:
-    - precomputed_dict["item_to_rank"] : contains a dictionary mapping movie ids to rankings
-    - (-- for your project, add other relevant information here -- )
-    """
-    # Initialize an empty dictionary to store item_id to rank mapping
-    item_to_rank = {}
-    # Calculate popularity rank for each movie
-    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)
-    # Assign ranks to movies based on their popularity
-    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):
-        item_to_rank[movie_id] = rank
-    # Create the precomputed dictionary
-    precomputed_dict = {}
-    precomputed_dict["item_to_rank"] = item_to_rank
-    return precomputed_dict
-def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):
-    """ Create a DataFrame evaluating various models on metrics specified in an evaluation config.
-    """
-    evaluation_dict = {}
-    for model_name, model, arguments in eval_config.models:
-        print(f'Handling model {model_name}')
-        algo = model(**arguments)
-        evaluation_dict[model_name] = {}
-        # Type 1 : split evaluations
-        if len(eval_config.split_metrics) > 0:
-            print('Training split predictions')
-            predictions = generate_split_predictions(algo, sp_ratings, eval_config)
-            for metric in eval_config.split_metrics:
-                print(f'- computing metric {metric}')
-                assert metric in available_metrics['split']
-                evaluation_function, parameters =  available_metrics["split"][metric]
-                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters)
-        # Type 2 : loo evaluations
-        if len(eval_config.loo_metrics) > 0:
-            print('Training loo predictions')
-            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)
-            for metric in eval_config.loo_metrics:
-                assert metric in available_metrics['loo']
-                evaluation_function, parameters =  available_metrics["loo"][metric]
-                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)
-        # Type 3 : full evaluations
-        if len(eval_config.full_metrics) > 0:
-            print('Training full predictions')
-            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)
-            for metric in eval_config.full_metrics:
-                assert metric in available_metrics['full']
-                evaluation_function, parameters =  available_metrics["full"][metric]
-                evaluation_dict[model_name][metric] = evaluation_function(
-                    anti_testset_top_n,
-                    **precomputed_dict,
-                    **parameters
-                )
-    return pd.DataFrame.from_dict(evaluation_dict).T
-```
-%% Cell type:markdown id:f7e83d1d tags:
-# 2. Evaluation metrics
-Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)
-%% Cell type:code id:f1849e55 tags:
-``` python
-def get_hit_rate(anti_testset_top_n, testset):
-    """Compute the average hit over the users (loo metric)
-    A hit (1) happens when the movie in the testset has been picked by the top-n recommender
-    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender
-    """
-    # -- implement the function get_hit_rate --
-    hits = 0
-    total_users = len(testset)
-    for uid, true_iid, _ in testset:
-        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:
-            hits += 1
-    hit_rate = hits / total_users
-    return hit_rate
-def get_novelty(anti_testset_top_n, item_to_rank):
-    """Compute the average novelty of the top-n recommendation over the users (full metric)
-    The novelty is defined as the average ranking of the movies recommended
-    """
-    # -- implement the function get_novelty --
-    total_rank_sum = 0
-    total_recommendations = 0
-    for uid, recommendations in anti_testset_top_n.items():
-        for iid, _ in recommendations:
-            if iid in item_to_rank:
-                total_rank_sum += item_to_rank[iid]
-                total_recommendations += 1
-    if total_recommendations == 0:
-        return 0  # Avoid division by zero
-    average_rank_sum = total_rank_sum / total_recommendations
-    return average_rank_sum
-```
-%% Cell type:markdown id:1a9855b3 tags:
-# 3. Evaluation workflow
-Load data, evaluate models and save the experimental outcomes
-%% Cell type:code id:704f4d2a tags:
-``` python
-AVAILABLE_METRICS = {
-    "split": {
-        "mae": (accuracy.mae, {'verbose': False}),
-        "rmse": (accuracy.rmse, {'verbose': False})
-        # Add new split metrics here if needed
-    },
-    "loo": {
-        "hit_rate": (get_hit_rate, {}),
-        # Add new loo metrics here if needed
-    },
-    "full": {
-        "novelty": (get_novelty, {}),
-        # Add new full metrics here if needed
-    }
-}
-sp_ratings = load_ratings(surprise_format=True)
-precomputed_dict = precomputed_information(pd.read_csv("data/tiny/evidence/ratings.csv"))
-evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)
-export_evaluation_report(evaluation_report)
-```
-%% Output
-    Handling model baseline_1
-    Training split predictions
-    - computing metric mae
-    - computing metric rmse
-    Training loo predictions
-    Training full predictions
-    Handling model baseline_2
-    Training split predictions
-    - computing metric mae
-    - computing metric rmse
-    Training loo predictions
-    Training full predictions
-    Handling model baseline_3
-    Training split predictions
-    - computing metric mae
-    - computing metric rmse
-    Training loo predictions
-    Training full predictions
-    Handling model baseline_4
-    Training split predictions
-    - computing metric mae
-    - computing metric rmse
-    Training loo predictions
-    Training full predictions
-    The data has been exported to the evaluation report
-                     mae      rmse  hit_rate     novelty
-    baseline_1  1.567221  1.788369  0.074766   99.405607
-    baseline_2  1.502872  1.840696  0.056075  429.942991
-    baseline_3  0.873993  1.076982  0.065421   99.405607
-    baseline_4  0.730657  0.938814  0.186916   57.465421
--- a/models.py
+++ b/models.py
-# standard library imports
-from collections import defaultdict
-# third parties imports
-import numpy as np
-import random as rd
-from surprise import AlgoBase
-from surprise import KNNWithMeans
-from surprise import SVD
-def get_top_n(predictions, n):
-    """Return the top-N recommendation for each user from a set of predictions.
-    Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
-    and modified by cvandekerckh for random tie breaking
-    Args:
-        predictions(list of Prediction objects): The list of predictions, as
-            returned by the test method of an algorithm.
-        n(int): The number of recommendation to output for each user. Default
-            is 10.
-    Returns:
-    A dict where keys are user (raw) ids and values are lists of tuples:
-        [(raw item id, rating estimation), ...] of size n.
-    """
-    rd.seed(0)
-    # First map the predictions to each user.
-    top_n = defaultdict(list)
-    for uid, iid, true_r, est, _ in predictions:
-        top_n[uid].append((iid, est))
-    # Then sort the predictions for each user and retrieve the k highest ones.
-    for uid, user_ratings in top_n.items():
-        rd.shuffle(user_ratings)
-        user_ratings.sort(key=lambda x: x[1], reverse=True)
-        top_n[uid] = user_ratings[:n]
-    return top_n
-# First algorithm
-class ModelBaseline1(AlgoBase):
-    def __init__(self):
-        AlgoBase.__init__(self)
-    def estimate(self, u, i):
-        return 2
-# Second algorithm
-class ModelBaseline2(AlgoBase):
-    def __init__(self):
-        AlgoBase.__init__(self)
-    def fit(self, trainset):
-        AlgoBase.fit(self, trainset)
-        rd.seed(0)
-    def estimate(self, u, i):
-        return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1])
-# Third algorithm
-class ModelBaseline3(AlgoBase):
-    def __init__(self):
-        AlgoBase.__init__(self)
-    def fit(self, trainset):
-        AlgoBase.fit(self, trainset)
-        self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
-        return self
-    def estimate(self, u, i):
-        return self.the_mean
-# Fourth Model
-class ModelBaseline4(SVD):
-    def __init__(self):
-        SVD.__init__(self, n_factors=100)
--- a/user_based.ipynb
+++ b/user_based.ipynb