diff --git a/configs.py b/configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..06dd89bde99d75a7e84859d504aaea21eab2675e
--- /dev/null
+++ b/configs.py
@@ -0,0 +1,36 @@
+# local imports
+from models import *
+
+
+class EvalConfig:
+
+    """Configuration settings for evaluation."""
+    
+    # List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict)
+
+    models = [
+        ("baseline_1", ModelBaseline1, {}),
+        ("baseline_2", ModelBaseline2, {}),
+        ("baseline_3", ModelBaseline3, {}),
+        ("baseline_4", ModelBaseline4, {}),
+        ("ContentBased_sample", ContentBased, {"features_method" : "title_length", "regressor_method" : "random_sample"}),
+        ("ContentBased_score", ContentBased, {"features_method" : "title_length", "regressor_method" : "random_score"}),
+        ("ContentBased_Lr", ContentBased, {"features_method" : "title_length", "regressor_method" : "linear_regression"})
+
+        # model_name, model class, model parameters (dict)
+    ]
+
+    # Metrics to compute for split evaluation
+    split_metrics = ["mae", "rmse"]
+
+    # Metrics to compute for Leave-One-Out (LOO) evaluation
+    loo_metrics = ["hit_rate"]
+
+    # Metrics to compute for full dataset evaluation
+    full_metrics = ["novelty"]
+
+    # Split parameters
+    test_size = 0.25 # -- configure the test_size (from 0 to 1) --
+
+    # Loo parameters
+    top_n_value =  10 # -- configure the numer of recommendations (> 1) --
diff --git a/content_based.ipynb b/content_based.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..da98e312cd3f2968f0e5586cc3c6caafa3585869
--- /dev/null
+++ b/content_based.ipynb
@@ -0,0 +1,302 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "82d5ca82",
+   "metadata": {},
+   "source": [
+    "# Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "277473a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import random as rd\n",
+    "from surprise import AlgoBase\n",
+    "from surprise.prediction_algorithms.predictions import PredictionImpossible\n",
+    "\n",
+    "from loaders import load_ratings\n",
+    "from loaders import load_items\n",
+    "from constants import Constant as C\n",
+    "\n",
+    "from sklearn.linear_model import LinearRegression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a42c16bf",
+   "metadata": {},
+   "source": [
+    "# Explore and select content features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e8378976",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>n_character_title</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>movieId</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>59</th>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64</th>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         n_character_title\n",
+       "movieId                   \n",
+       "3                       23\n",
+       "15                      23\n",
+       "34                      11\n",
+       "59                      44\n",
+       "64                      20"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "df_items = load_items()\n",
+    "df_ratings = load_ratings()\n",
+    "\n",
+    "# Example 1 : create title_length features\n",
+    "df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "display(df_features.head())\n",
+    "\n",
+    "# (explore here other features)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2c9a2b6",
+   "metadata": {},
+   "source": [
+    "# Build a content-based model\n",
+    "When ready, move the following class in the *models.py* script"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "16b0a602",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ContentBased(AlgoBase):\n",
+    "    def __init__(self, features_method, regressor_method):\n",
+    "        AlgoBase.__init__(self)\n",
+    "        self.regressor_method = regressor_method\n",
+    "        self.content_features = self.create_content_features(features_method)\n",
+    "\n",
+    "    def create_content_features(self, features_method):\n",
+    "        \"\"\"Content Analyzer\"\"\"\n",
+    "        df_items = load_items()\n",
+    "        if features_method is None:\n",
+    "            df_features = None\n",
+    "        elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
+    "            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "        else: # (implement other feature creations here)\n",
+    "            raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
+    "        return df_features\n",
+    "    \n",
+    "\n",
+    "    def fit(self, trainset):\n",
+    "        \"\"\"Profile Learner\"\"\"\n",
+    "        AlgoBase.fit(self, trainset)\n",
+    "        \n",
+    "        # Preallocate user profiles\n",
+    "        self.user_profile = {u: None for u in trainset.all_users()}\n",
+    "\n",
+    "        if self.regressor_method == 'random_score':\n",
+    "            for u in self.user_profile :\n",
+    "                self.user_profile[u] = rd.uniform(0.5,5)\n",
+    "            \n",
+    "        elif self.regressor_method == 'random_sample':\n",
+    "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
+    "        else:\n",
+    "            for u in self.user_profile:\n",
+    "\n",
+    "                user_ratings = [(trainset.to_raw_iid(iid), rating) for (iid, rating) in trainset.ur[u]]\n",
+    "\n",
+    "                df_user = pd.DataFrame(user_ratings, columns = [\"item_id\", \"user_ratings\"])\n",
+    "\n",
+    "                df_user[\"item_id\"] = df_user['item_id'].map(trainset.to_raw_idd)\n",
+    "\n",
+    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
+    "\n",
+    "                X = df_user['n_character_title'].values.reshape(-1,1)\n",
+    "\n",
+    "                y = df_user['user_ratings'].values\n",
+    "\n",
+    "                linear_regressor = LinearRegression(fit_intercept = False)\n",
+    "\n",
+    "                linear_regressor.fit(X,y)\n",
+    "                \n",
+    "                # Store the computed user profile\n",
+    "                self.user_profile[u] = linear_regressor\n",
+    "\n",
+    "            # (implement here the regressor fitting)  \n",
+    "        \n",
+    "    def estimate(self, u, i):\n",
+    "        \"\"\"Scoring component used for item filtering\"\"\"\n",
+    "        # First, handle cases for unknown users and items\n",
+    "        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):\n",
+    "            raise PredictionImpossible('User and/or item is unkown.')\n",
+    "\n",
+    "\n",
+    "        if self.regressor_method == 'random_score':\n",
+    "            rd.seed()\n",
+    "            score = rd.uniform(0.5,5)\n",
+    "\n",
+    "        elif self.regressor_method == 'random_sample':\n",
+    "            rd.seed()\n",
+    "            score = rd.choice(self.user_profile[u])\n",
+    "        \n",
+    "        else:\n",
+    "\n",
+    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
+    "\n",
+    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
+    "\n",
+    "            linear_regressor = self.user_profile[u]\n",
+    "\n",
+    "            score= linear_regressor.predict(item_features)[0]\n",
+    "\n",
+    "\n",
+    "            # (implement here the regressor prediction)\n",
+    "\n",
+    "        return score\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffd75b7e",
+   "metadata": {},
+   "source": [
+    "The following script test the ContentBased class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "69d12f7d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "user: 15         item: 942        r_ui = None   est = 3.59   {'was_impossible': False}\n",
+      "user: 15         item: 942        r_ui = None   est = 3.00   {'was_impossible': False}\n"
+     ]
+    }
+   ],
+   "source": [
+    "def test_contentbased_class(feature_method, regressor_method):\n",
+    "    \"\"\"Test the ContentBased class.\n",
+    "    Tries to make a prediction on the first (user,item ) tuple of the anti_test_set\n",
+    "    \"\"\"\n",
+    "    sp_ratings = load_ratings(surprise_format=True)\n",
+    "    train_set = sp_ratings.build_full_trainset()\n",
+    "    content_algo = ContentBased(feature_method, regressor_method)\n",
+    "    content_algo.fit(train_set)\n",
+    "    anti_test_set_first = train_set.build_anti_testset()[0]\n",
+    "    prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])\n",
+    "    print(prediction)\n",
+    "\n",
+    "# (call here the test functions with different regressor methods)\n",
+    "\n",
+    "test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
+    "test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/evaluator.ipynb b/evaluator.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b88bfe44a4e7d2898edb216abecfd1b673f059b6
--- /dev/null
+++ b/evaluator.ipynb
@@ -0,0 +1,460 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a665885b",
+   "metadata": {},
+   "source": [
+    "# Evaluator Module\n",
+    "The Evaluator module creates evaluation reports.\n",
+    "\n",
+    "Reports contain evaluation metrics depending on models specified in the evaluation config."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "id": "6aaf9140",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "# reloads modules automatically before entering the execution of code\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "# imports\n",
+    "import numpy as np \n",
+    "import pandas as pd\n",
+    "\n",
+    "# local imports\n",
+    "from configs import EvalConfig\n",
+    "from constants import Constant as C\n",
+    "from loaders import export_evaluation_report\n",
+    "from loaders import load_ratings\n",
+    "\n",
+    "# New imports\n",
+    "from surprise.model_selection import train_test_split\n",
+    "from surprise import accuracy\n",
+    "from surprise.model_selection import LeaveOneOut\n",
+    "from collections import Counter"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d47c24a4",
+   "metadata": {},
+   "source": [
+    "# 1. Model validation functions\n",
+    "Validation functions are a way to perform crossvalidation on recommender system models. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "id": "d6d82188",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# -- implement the function generate_split_predictions --\n",
+    "def generate_split_predictions(algo, ratings_dataset, eval_config):\n",
+    "    \"\"\"Generate predictions on a random test set specified in eval_config\"\"\"\n",
+    "    \n",
+    "    # Spliting the data into train and test sets\n",
+    "    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size)\n",
+    "\n",
+    "    # Training the algorithm on the train data set\n",
+    "    algo.fit(trainset)\n",
+    "\n",
+    "    # Predict ratings for the testset\n",
+    "    predictions = algo.test(testset)\n",
+    "    \n",
+    "    return predictions\n",
+    "\n",
+    "# -- implement the function generate_loo_top_n --\n",
+    "def generate_loo_top_n(algo, ratings_dataset, eval_config):\n",
+    "    \"\"\"Generate top-n recommendations for each user on a random Leave-one-out split (LOO)\"\"\"\n",
+    "    \n",
+    "    # Create a LeaveOneOut split\n",
+    "    loo = LeaveOneOut(n_splits=1)\n",
+    "    \n",
+    "    for trainset, testset in loo.split(ratings_dataset):\n",
+    "        algo.fit(trainset)  # Train the algorithm on the training set\n",
+    "        anti_testset = trainset.build_anti_testset()  # Build the anti test-set\n",
+    "        predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
+    "        top_n = {}\n",
+    "        for uid, iid, _, est, _ in predictions:\n",
+    "            if uid not in top_n:\n",
+    "                top_n[uid] = []\n",
+    "            top_n[uid].append((iid, est))\n",
+    "        for uid, user_ratings in top_n.items():\n",
+    "            user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
+    "            top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
+    "        anti_testset_top_n = top_n\n",
+    "        return anti_testset_top_n, testset\n",
+    "\n",
+    "def generate_full_top_n(algo, ratings_dataset, eval_config):\n",
+    "    \"\"\"Generate top-n recommendations for each user with full training set (LOO)\"\"\"\n",
+    "\n",
+    "    full_trainset = ratings_dataset.build_full_trainset()  # Build the full training set\n",
+    "    algo.fit(full_trainset)  # Train the algorithm on the full training set\n",
+    "    anti_testset = full_trainset.build_anti_testset()  # Build the anti test-set\n",
+    "    predictions = algo.test(anti_testset)  # Get predictions on the anti test-set\n",
+    "    top_n = {}\n",
+    "    for uid, iid, _, est, _ in predictions:\n",
+    "        if uid not in top_n:\n",
+    "            top_n[uid] = []\n",
+    "        top_n[uid].append((iid, est))\n",
+    "    for uid, user_ratings in top_n.items():\n",
+    "        user_ratings.sort(key=lambda x: x[1], reverse=True)\n",
+    "        top_n[uid] = user_ratings[:eval_config.top_n_value]  # Get top-N recommendations\n",
+    "    anti_testset_top_n = top_n\n",
+    "    return anti_testset_top_n\n",
+    "\n",
+    "def precomputed_information(movie_data):\n",
+    "\n",
+    "    \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n",
+    "    \n",
+    "    Dictionary keys:\n",
+    "    - precomputed_dict[\"item_to_rank\"] : contains a dictionary mapping movie ids to rankings\n",
+    "    - (-- for your project, add other relevant information here -- )\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Initialize an empty dictionary to store item_id to rank mapping\n",
+    "    item_to_rank = {}\n",
+    "    \n",
+    "    # Calculate popularity rank for each movie\n",
+    "    ratings_count = movie_data.groupby('movieId').size().sort_values(ascending=False)\n",
+    "    \n",
+    "    # Assign ranks to movies based on their popularity\n",
+    "    for rank, (movie_id, _) in enumerate(ratings_count.items(), start=1):\n",
+    "        item_to_rank[movie_id] = rank\n",
+    "    \n",
+    "    # Create the precomputed dictionary\n",
+    "    precomputed_dict = {}\n",
+    "    precomputed_dict[\"item_to_rank\"] = item_to_rank\n",
+    "    \n",
+    "    return precomputed_dict\n",
+    "\n",
+    "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n",
+    "\n",
+    "    \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config.  \n",
+    "    \"\"\"\n",
+    "    \n",
+    "    evaluation_dict = {}\n",
+    "    for model_name, model, arguments in eval_config.models:\n",
+    "        print(f'Handling model {model_name}')\n",
+    "        algo = model(**arguments)\n",
+    "        evaluation_dict[model_name] = {}\n",
+    "        \n",
+    "        # Type 1 : split evaluations\n",
+    "        if len(eval_config.split_metrics) > 0:\n",
+    "            print('Training split predictions')\n",
+    "            predictions = generate_split_predictions(algo, sp_ratings, eval_config)\n",
+    "            for metric in eval_config.split_metrics:\n",
+    "                print(f'- computing metric {metric}')\n",
+    "                assert metric in available_metrics['split']\n",
+    "                evaluation_function, parameters =  available_metrics[\"split\"][metric]\n",
+    "                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n",
+    "        \n",
+    "        # Type 2 : loo evaluations\n",
+    "        if len(eval_config.loo_metrics) > 0:\n",
+    "            print('Training loo predictions')\n",
+    "            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)\n",
+    "            for metric in eval_config.loo_metrics:\n",
+    "                assert metric in available_metrics['loo']\n",
+    "                evaluation_function, parameters =  available_metrics[\"loo\"][metric]\n",
+    "                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)\n",
+    "        \n",
+    "        # Type 3 : full evaluations\n",
+    "        if len(eval_config.full_metrics) > 0:\n",
+    "            print('Training full predictions')\n",
+    "            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)\n",
+    "            for metric in eval_config.full_metrics:\n",
+    "                assert metric in available_metrics['full']\n",
+    "                evaluation_function, parameters =  available_metrics[\"full\"][metric]\n",
+    "                evaluation_dict[model_name][metric] = evaluation_function(\n",
+    "                    anti_testset_top_n,\n",
+    "                    **precomputed_dict,\n",
+    "                    **parameters\n",
+    "                )\n",
+    "        \n",
+    "    return pd.DataFrame.from_dict(evaluation_dict).T"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7e83d1d",
+   "metadata": {},
+   "source": [
+    "# 2. Evaluation metrics\n",
+    "Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "id": "f1849e55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# -- implement the function get_hit_rate --\n",
+    "def get_hit_rate(anti_testset_top_n, testset):\n",
+    "    \n",
+    "    \"\"\"Compute the average hit over the users (loo metric)\n",
+    "    \n",
+    "    A hit (1) happens when the movie in the testset has been picked by the top-n recommender\n",
+    "    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender\n",
+    "    \"\"\"\n",
+    "\n",
+    "    hits = 0\n",
+    "    total_users = len(testset)\n",
+    "    for uid, true_iid, _ in testset:\n",
+    "        if uid in anti_testset_top_n and true_iid in {iid for iid, _ in anti_testset_top_n[uid]}:\n",
+    "            hits += 1\n",
+    "    hit_rate = hits / total_users\n",
+    "\n",
+    "    return hit_rate\n",
+    "\n",
+    "# -- implement the function get_novelty --\n",
+    "def get_novelty(anti_testset_top_n, item_to_rank):\n",
+    "\n",
+    "    \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n",
+    "    \n",
+    "    The novelty is defined as the average ranking of the movies recommended\n",
+    "    \"\"\"\n",
+    "\n",
+    "    total_rank_sum = 0\n",
+    "    total_recommendations = 0\n",
+    "    for uid, recommendations in anti_testset_top_n.items():\n",
+    "        for iid, _ in recommendations:\n",
+    "            if iid in item_to_rank:\n",
+    "                total_rank_sum += item_to_rank[iid]\n",
+    "                total_recommendations += 1\n",
+    "    if total_recommendations == 0:\n",
+    "        return 0  # Avoid division by zero\n",
+    "    average_rank_sum = total_rank_sum / total_recommendations \n",
+    "    \n",
+    "    return average_rank_sum"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a9855b3",
+   "metadata": {},
+   "source": [
+    "# 3. Evaluation workflow\n",
+    "Load data, evaluate models and save the experimental outcomes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "id": "704f4d2a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Handling model baseline_1\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model baseline_2\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model baseline_3\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model baseline_4\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model ContentBased_sample\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model ContentBased_score\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "Handling model ContentBased_Lr\n",
+      "Training split predictions\n",
+      "- computing metric mae\n",
+      "- computing metric rmse\n",
+      "Training loo predictions\n",
+      "Training full predictions\n",
+      "The data has been exported to the evaluation report\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mae</th>\n",
+       "      <th>rmse</th>\n",
+       "      <th>hit_rate</th>\n",
+       "      <th>novelty</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>baseline_1</th>\n",
+       "      <td>1.561178</td>\n",
+       "      <td>1.792482</td>\n",
+       "      <td>0.074766</td>\n",
+       "      <td>99.405607</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_2</th>\n",
+       "      <td>1.471412</td>\n",
+       "      <td>1.819364</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>429.942991</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_3</th>\n",
+       "      <td>0.878270</td>\n",
+       "      <td>1.085591</td>\n",
+       "      <td>0.074766</td>\n",
+       "      <td>99.405607</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>baseline_4</th>\n",
+       "      <td>0.705673</td>\n",
+       "      <td>0.912313</td>\n",
+       "      <td>0.130841</td>\n",
+       "      <td>60.202804</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ContentBased_sample</th>\n",
+       "      <td>1.013747</td>\n",
+       "      <td>1.350417</td>\n",
+       "      <td>0.084112</td>\n",
+       "      <td>178.048598</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ContentBased_score</th>\n",
+       "      <td>1.461846</td>\n",
+       "      <td>1.803067</td>\n",
+       "      <td>0.018692</td>\n",
+       "      <td>437.222430</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ContentBased_Lr</th>\n",
+       "      <td>1.202626</td>\n",
+       "      <td>1.460273</td>\n",
+       "      <td>0.084112</td>\n",
+       "      <td>278.046729</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                          mae      rmse  hit_rate     novelty\n",
+       "baseline_1           1.561178  1.792482  0.074766   99.405607\n",
+       "baseline_2           1.471412  1.819364  0.000000  429.942991\n",
+       "baseline_3           0.878270  1.085591  0.074766   99.405607\n",
+       "baseline_4           0.705673  0.912313  0.130841   60.202804\n",
+       "ContentBased_sample  1.013747  1.350417  0.084112  178.048598\n",
+       "ContentBased_score   1.461846  1.803067  0.018692  437.222430\n",
+       "ContentBased_Lr      1.202626  1.460273  0.084112  278.046729"
+      ]
+     },
+     "execution_count": 112,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "AVAILABLE_METRICS = {\n",
+    "    \"split\": {\n",
+    "        \"mae\": (accuracy.mae, {'verbose': False}),\n",
+    "        \"rmse\": (accuracy.rmse, {'verbose': False})\n",
+    "    },\n",
+    "    \"loo\": {\n",
+    "        \"hit_rate\": (get_hit_rate, {}),\n",
+    "    },\n",
+    "    \"full\": {\n",
+    "        \"novelty\": (get_novelty, {}),\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "sp_ratings = load_ratings(surprise_format=True)\n",
+    "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
+    "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
+    "export_evaluation_report(evaluation_report)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6f8b6d19",
+   "metadata": {},
+   "source": [
+    "dire quel modÃ¨le est meilleur ?\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/hackathon_make_predictions.ipynb b/hackathon_make_predictions.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..78103bf4c6417bd66655669cf1eae37b93728c0c
--- /dev/null
+++ b/hackathon_make_predictions.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8090f896",
+   "metadata": {},
+   "source": [
+    "# Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dac6dae8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from constants import Constant as C\n",
+    "from loaders import load_ratings\n",
+    "from models import ContentBased"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2f9e553",
+   "metadata": {},
+   "source": [
+    "# How to generate predictions for the hackathon ?\n",
+    "To submit predictions to the hackathon: \n",
+    "- be sure to consider your latest version of ContentBased. Use your evaluator notebook to assess the quality of your model\n",
+    "- run the make_hackathon_prediction() function in the present notebook with your best feature_method and regressor_method. This will generate a ratings_prediction.csv file\n",
+    "- download ratings_prediction.csv and upload it on the onedrive of your group"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ae7eadd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_hackathon_prediction(feature_method, regressor_method):\n",
+    "    \"\"\"Generate a prediction file on the test set\"\"\"\n",
+    "    # 1) load train data - make sure to redirect the DATA_PATH to'data/hackathon'\n",
+    "    assert str(C.DATA_PATH) == 'data/hackathon'\n",
+    "    sp_ratings = load_ratings(surprise_format=True)\n",
+    "    train_set = sp_ratings.build_full_trainset()\n",
+    "    \n",
+    "    # 2) train your ContentBased model on the train set\n",
+    "    content_knn = ContentBased(feature_method, regressor_method)\n",
+    "    content_knn.fit(train_set)\n",
+    "    \n",
+    "    # 3) make predictions on the test set\n",
+    "    df_test = pd.read_csv('data/hackathon/evidence/ratings_test.csv')[C.USER_ITEM_RATINGS]\n",
+    "    test_records = list(df_test.to_records(index=False))\n",
+    "    predictions = content_knn.test(test_records)\n",
+    "    output_predictions = []\n",
+    "    for uid, iid, _, est, _ in predictions:\n",
+    "        output_predictions.append([uid, iid, est])\n",
+    "    df_predictions = pd.DataFrame(data=output_predictions, columns = df_test.columns)\n",
+    "\n",
+    "    # 4) dump predictions\n",
+    "    df_predictions.to_csv(f'ratings_predictions.csv', index=False)\n",
+    "\n",
+    "    \n",
+    "make_hackathon_prediction(None, \"random_score\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3f546c18",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models.py b/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..c288a5b8f7812d2b4187ec75540a77012b0997b3
--- /dev/null
+++ b/models.py
@@ -0,0 +1,181 @@
+# standard library imports
+from collections import defaultdict
+
+# third parties imports
+import pandas as pd
+import numpy as np
+import random as rd
+from surprise import AlgoBase, SVD, KNNWithMeans
+from surprise import PredictionImpossible
+
+# import local
+from loaders import load_items, load_ratings
+from constants import Constant as C
+from sklearn.linear_model import LinearRegression
+
+
+def get_top_n(predictions, n):
+    """Return the top-N recommendation for each user from a set of predictions.
+    Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
+    and modified by cvandekerckh for random tie breaking
+
+    Args:
+        predictions(list of Prediction objects): The list of predictions, as
+            returned by the test method of an algorithm.
+        n(int): The number of recommendation to output for each user. Default
+            is 10.
+    Returns:
+    A dict where keys are user (raw) ids and values are lists of tuples:
+        [(raw item id, rating estimation), ...] of size n.
+    """
+
+    rd.seed(0)
+
+    # First map the predictions to each user.
+    top_n = defaultdict(list)
+    for uid, iid, true_r, est, _ in predictions:
+        top_n[uid].append((iid, est))
+
+    # Then sort the predictions for each user and retrieve the k highest ones.
+    for uid, user_ratings in top_n.items():
+        rd.shuffle(user_ratings)
+        user_ratings.sort(key=lambda x: x[1], reverse=True)
+        top_n[uid] = user_ratings[:n]
+
+    return top_n
+
+
+# First algorithm
+class ModelBaseline1(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def estimate(self, u, i):
+        return 2
+
+
+# Second algorithm
+class ModelBaseline2(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def fit(self, trainset):
+        AlgoBase.fit(self, trainset)
+        rd.seed(0)
+
+    def estimate(self, u, i):
+        return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1])
+
+
+# Third algorithm
+class ModelBaseline3(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def fit(self, trainset):
+        AlgoBase.fit(self, trainset)
+        self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
+
+        return self
+
+    def estimate(self, u, i):
+        return self.the_mean
+
+
+# Fourth Model
+class ModelBaseline4(SVD):
+    def __init__(self):
+        SVD.__init__(self, n_factors=100)
+
+
+class ContentBased(AlgoBase):
+    def __init__(self, features_method, regressor_method):
+        AlgoBase.__init__(self)
+        self.regressor_method = regressor_method
+        self.content_features = self.create_content_features(features_method)
+
+    def create_content_features(self, features_method):
+        """Content Analyzer"""
+        df_items = load_items()
+        if features_method is None:
+            df_features = None
+        elif features_method == "title_length": # a naive method that creates only 1 feature based on title length
+            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')
+        else: # (implement other feature creations here)
+            raise NotImplementedError(f'Feature method {features_method} not yet implemented')
+        return df_features
+    
+
+    def fit(self, trainset):
+        """Profile Learner"""
+        AlgoBase.fit(self, trainset)
+        
+        # Preallocate user profiles
+        self.user_profile = {u: None for u in trainset.all_users()}
+
+        if self.regressor_method == 'random_score':
+            for u in self.user_profile :
+                self.user_profile[u] = rd.uniform(0.5,5)
+            
+        elif self.regressor_method == 'random_sample':
+            for u in self.user_profile:
+                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]
+        elif self.regressor_method == 'linear_regression' :
+            for u in self.user_profile:
+
+                user_ratings = [rating for _, rating in trainset.ur[u]]
+                item_ids = [iid for iid, _ in trainset.ur[u]]
+
+                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})
+
+                df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
+
+                df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
+
+                X = df_user['n_character_title'].values.reshape(-1,1)
+
+                y = df_user['user_ratings'].values
+
+                linear_regressor = LinearRegression(fit_intercept = False)
+
+                linear_regressor.fit(X,y)
+                
+                # Store the computed user profile
+                self.user_profile[u] = linear_regressor
+        else : 
+            pass
+
+            # (implement here the regressor fitting)  
+        
+    def estimate(self, u, i):
+        """Scoring component used for item filtering"""
+        # First, handle cases for unknown users and items
+        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
+            raise PredictionImpossible('User and/or item is unkown.')
+
+
+        if self.regressor_method == 'random_score':
+            rd.seed()
+            score = rd.uniform(0.5,5)
+
+        elif self.regressor_method == 'random_sample':
+            rd.seed()
+            score = rd.choice(self.user_profile[u])
+        
+        elif self.regressor_method == 'linear_regression':
+
+            raw_item_id = self.trainset.to_raw_iid(i)
+
+            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values
+
+            linear_regressor = self.user_profile[u]
+
+            score= linear_regressor.predict(item_features)[0]
+        else : 
+            score = None
+
+            # (implement here the regressor prediction)
+
+        return score
+
+