diff --git a/Home.py b/Home.py
index cedd90e4e9d4e58730645f37dc6f51a7966bea35..fc42a2fca243e92a0246ec1aefa28d44f00df0a2 100644
--- a/Home.py
+++ b/Home.py
@@ -4,7 +4,7 @@ import pandas as pd
 from content import fetch_movie_info, df_links
 from content import df_audrey, df_adrien, df_nathanael, df_charles
 from surprise import Dataset, Reader
-from recommender import OtherUserBased, UserBased, RecommenderSystem_KNN, LatentFactorModel # Importer la classe OtherUserBased
+from recommender import OtherUserBased, UserBased, RecommenderSystem_KNN, LatentFactorModel,test_contentbased_class # Importer la classe OtherUserBased
 from loaders import load_ratings
 
 
@@ -46,10 +46,10 @@ def display_user_movies(df, title, column_name):
     </div>
     """, unsafe_allow_html=True)
 
-def display_recommendations(user_name, user_id, csv_file):
+def display_recommendations_tm(user_name, user_id, csv_file):
     recommender = OtherUserBased(user_name, user_id)
     recommender.load_model()
-    top_10_predictions = recommender.get_top_10_predictions_for_user(csv_file)
+    top_10_predictions = recommender.get_top_n_predictions_for_user(csv_file)
 
     if top_10_predictions is not None:
         st.subheader(f"Top 10 Recommendations for {user_name}")
@@ -73,6 +73,7 @@ def display_recommendations(user_name, user_id, csv_file):
     else:
         st.write("No recommendations found.")
 
+
 def display_recommendations_ub(user_name, user_id):
     
     # Charger les données et préparer l'ensemble de données pour l'entraînement et le test
@@ -84,7 +85,7 @@ def display_recommendations_ub(user_name, user_id):
     recommender = UserBased(k=340, min_k=340)
     recommender.fit(trainset)
     
-    top_10_predictions = recommender.get_top_10_pred_ub(testset, user_id)
+    top_10_predictions = recommender.get_top_n_pred_ub(testset, user_id)
 
     if top_10_predictions is not None:
         st.subheader(f"Top 10 based on similar users of {user_name}")
@@ -138,6 +139,7 @@ def display_recommendations_knn(user_name, user_id):
     else:
         st.write("No recommendations found.")
 
+
 def display_recommendations_latent_factor(user_name, user_id):
     st.subheader(f"Top 10 Recommendations for {user_name}")
     cols_html = ""
@@ -174,6 +176,33 @@ def display_recommendations_latent_factor(user_name, user_id):
 
 
 
+def display_content_based_recommendations(user_name, user_id=-1, n=10):
+    cols_html = ""
+
+    # Call the test_contentbased_class function to get top N recommendations
+    top_n_recommendations = test_contentbased_class(["title_length", "movie_year", "genre", "avg_rating"], "ridge_regression", user_id=-1, n=10)
+
+    
+    if top_n_recommendations:
+        st.write(f"Top {n} recommendations for User {user_name}:")
+        for iid, est in top_n_recommendations:
+            tmdbId = df_links.loc[df_links['movieId'] == iid, 'tmdbId'].values[0]
+            title_dict, poster_url = fetch_movie_info(tmdbId)
+            movie_title = title_dict.get("title", "Unknown Title") if isinstance(title_dict, dict) else title_dict
+            if poster_url:
+                html_file_url = f"http://localhost:8501/{movie_title.replace(' ', '_')}.html"
+                cols_html += f'<div style="display: inline-block; margin-right: 20px;"><a href="{html_file_url}" target="_blank"><img src="{poster_url}" alt="{movie_title}" style="width:150px;height:225px;"></a><div style="color: white; text-decoration: none; font-size: 14px; text-align: center; max-width: 150px; word-wrap: break-word; white-space: normal;"><b>{movie_title}</b></div></a></div>'
+            else:
+                cols_html += f"<p>{movie_title}</p>"
+
+        st.markdown(f"""
+        <div style="overflow-x: scroll; white-space: nowrap; height: 300px; display: flex; flex-direction: row;">
+            {cols_html}
+        </div>
+        """, unsafe_allow_html=True)
+    else:
+        st.write("No recommendations found.")
+
 
 def main():
 
@@ -209,15 +238,17 @@ def main():
     if selected_user:
         user_df = user_options[selected_user]
 
+        display_content_based_recommendations(selected_user, user_id_options[selected_user], n=10)
+
         display_recommendations_latent_factor(selected_user, user_id_options[selected_user])
         # Afficher les recommandations basées sur l'algorithme OtherUserBased
-        display_recommendations(selected_user, user_id_options[selected_user], csv_file)
+        
+        display_recommendations_tm(selected_user, user_id_options[selected_user], csv_file)
         # Afficher les recommandations basées sur l'algorithme UserBased
         display_recommendations_ub(selected_user, user_id_options[selected_user])
 
         display_recommendations_knn(selected_user, user_id_options[selected_user])
 
-        display_recommendations_latent_factor(selected_user, user_id_options[selected_user])
 
         dataframe_links = df_links.copy()
         user_df['movieId'] = user_df['movieId'].astype(int)
diff --git a/user_based.ipynb b/backend/user_based.ipynb
similarity index 100%
rename from user_based.ipynb
rename to backend/user_based.ipynb
diff --git a/configs.py b/configs.py
index 29e2937909d84b6c039b4e0de73882589d74f869..f9de902be256cac165fb82a3b3598454af185830 100644
--- a/configs.py
+++ b/configs.py
@@ -1,35 +1,33 @@
-# local imports
 from models import *
 from itertools import combinations
 
-# # Méthodes de caractéristiques disponibles
-# features_methods = [
-#     'genre', 'movie_year', 'avg_rating', 
-#     'title_length'
-# ]
-
-# # Méthodes de régression disponibles
-# regressor_methods = [
-#     'linear_regression','random_forest', 'lasso_regression',
-#     'ridge_regression','elastic_net','decision_tree','adaboost'
-# ]
-
-# # Générer toutes les combinaisons possibles de méthodes de caractéristiques
-# feature_combinations = []
-# for r in range(1, len(features_methods) + 1):
-#     feature_combinations.extend(combinations(features_methods, r))
-
-# # Générer toutes les combinaisons possibles de méthodes de régression et de caractéristiques
-# model_combinations = []
-# for feature_set in feature_combinations:
-#     for regressor in regressor_methods:
-#         model_name = f"combination_{regressor}_{'_'.join(feature_set)}"
-#         arguments = {
-#             "features_method": list(feature_set),
-#             "regressor_method": regressor
-#         }
-#         model_combinations.append((model_name, ContentBased, arguments))
-
+# Méthodes de caractéristiques disponibles
+features_methods = [
+    'genre', 'movie_year', 'avg_rating', 
+    'title_length'
+]
+
+# Méthodes de régression disponibles
+regressor_methods = [
+    'linear_regression','random_forest', 'lasso_regression','gradient_boosting',
+    'ridge_regression', 'svr_regression'
+]
+
+# Générer toutes les combinaisons possibles de méthodes de caractéristiques
+feature_combinations = []
+for r in range(1, len(features_methods) + 1):
+    feature_combinations.extend(combinations(features_methods, r))
+
+# Générer toutes les combinaisons possibles de méthodes de régression et de caractéristiques
+model_combinations = []
+for feature_set in feature_combinations:
+    for regressor in regressor_methods:
+        model_name = f"combination_{regressor}_{'_'.join(feature_set)}"
+        arguments = {
+            "features_method": list(feature_set),
+            "regressor_method": regressor
+        }
+        model_combinations.append((model_name, ContentBased, arguments))
 
 
 class EvalConfig:
@@ -37,45 +35,39 @@ class EvalConfig:
     """Configuration settings for evaluation."""
     
     # List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict)
-    #Modèles de base
     models = [
         ("baseline_1", ModelBaseline1, {}),
         ("baseline_2", ModelBaseline2, {}),
         ("baseline_3", ModelBaseline3, {}),
         ("baseline_4", ModelBaseline4, {}),
-
-        #("1", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'linear_regression'}),
-        ("2", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'random_forest'})
-        
-        #("3", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'lasso_regression'}),
-        #("4", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'elastic_net'}),
-        # ("2", ContentBased, {"features_method": ['genre', 'avg_rating'], "regressor_method":'ridge_regression'}),
-        # ("3", ContentBased, {"features_method":['movie_year', 'avg_rating', 'title_length'], "regressor_method":'lasso_regression'}),
-        # ("4", ContentBased, {"features_method":['title_length'], "regressor_method":'random_forest'}),
-        # ("5", ContentBased, {"features_method":['genre', 'title_length'], "regressor_method":'lasso_regression'}),
-        # ("6", ContentBased, {"features_method":['genre', 'title_length'], "regressor_method":'linear_regression'}),
-        # ("7", ContentBased, {"features_method":['genre', 'avg_rating'], "regressor_method":'lasso_regression'}),
-        # ("8", ContentBased, {"features_method":['avg_rating', 'title_length'], "regressor_method":'adaboost'}),
-        # ("9", ContentBased, {"features_method":['genre', 'movie_year', 'avg_rating'], "regressor_method":'decision_tree'}),
-        # ("10", ContentBased, {"features_method":['genre', 'movie_year'], "regressor_method":'decision_tree'}),
-        # ("11", ContentBased, {"features_method":['genre', 'movie_year', 'avg_rating'], "regressor_method":'elastic_net'}),
-        # ("12", ContentBased, {"features_method":['movie_year', 'avg_rating', 'title_length'], "regressor_method":'elastic_net'})
-
-
-
-
+        ("1", ContentBased, {"features_method": ['movie_year', 'avg_rating'], "regressor_method": 'linear_regression'}),
+        # ("2", ContentBased, {"features_method": ['genre', 'movie_year', 'avg_rating'], "regressor_method": 'gradient_boosting'}),
+        # ("3", ContentBased, {"features_method": ['avg_rating'], "regressor_method": 'gradient_boosting'}),
+        # ("4", ContentBased, {"features_method": ['avg_rating'], "regressor_method": 'lasso_regression'}),
+        # ("5", ContentBased, {"features_method": ['genre'], "regressor_method": 'random_forest'}),
+        # ("6", ContentBased, {"features_method": ['genre'], "regressor_method": 'lasso_regression'}),
+        # ("7", ContentBased, {"features_method": ['avg_rating', 'title_length'], "regressor_method": 'ridge_regression'}),
+        # ("8", ContentBased, {"features_method": ['avg_rating'], "regressor_method": 'svr_regression'}),
+        # ("9", ContentBased, {"features_method": ['genre', 'movie_year', 'title_length'], "regressor_method": 'gradient_boosting'}),
+        # ("10", ContentBased, {"features_method": ['genre', 'title_length'], "regressor_method": 'svr_regression'}),
+        # ("11", ContentBased, {"features_method": ['genre', 'avg_rating', 'title_length'], "regressor_method": 'linear_regression'}),
+        # ("12", ContentBased, {"features_method": ['genre', 'avg_rating'], "regressor_method": 'linear_regression'}),
+        # ("13", ContentBased, {"features_method": ['genre', 'avg_rating', 'title_length'], "regressor_method": 'knn_regression'}),
+        # ("14", ContentBased, {"features_method": ['genre', 'movie_year', 'avg_rating'], "regressor_method": 'xgboost'}),
+        # ("15", ContentBased, {"features_method": ['genre', 'title_length'], "regressor_method": 'decision_tree'}),
+        # ("16", ContentBased, {"features_method": ['title_length'], "regressor_method": 'random_forest'}),
+        # ("17", ContentBased, {"features_method": ['genre', 'title_length'], "regressor_method": 'gradient_boosting'}),
+        # ("18", ContentBased, {"features_method": ['movie_year', 'title_length'], "regressor_method": 'lightgbm'}),
+        # ("19", ContentBased, {"features_method": ['avg_rating', 'title_length'], "regressor_method": 'decision_tree'})
     ]
 
     # # Ajouter les combinaisons de ContentBased à la liste des modèles
-    # models.extend(combinations)
+    # models.extend(model_combinations)
 
-    # Affichage des modèles pour vérification
+    # # Affichage des modèles pour vérification
     # for model in models:
-    #     print(model)
-
-    #models = model_combinations
+    #      print(model)
     
-
     # Metrics to compute for split evaluation
     split_metrics = ["mae", "rmse"]
 
@@ -91,3 +83,24 @@ class EvalConfig:
     # Loo parameters
     top_n_value =  10 # -- configure the numer of recommendations (> 1) --
 
+
+
+    #     #("1", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'linear_regression'}),
+    #     ("2", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'random_forest'})
+        
+    #     #("3", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'lasso_regression'}),
+    #     #("4", ContentBased, {"features_method": ['movie_year', 'avg_rating', 'genre'], "regressor_method":'elastic_net'}),
+    #     # ("2", ContentBased, {"features_method": ['genre', 'avg_rating'], "regressor_method":'ridge_regression'}),
+    #     # ("3", ContentBased, {"features_method":['movie_year', 'avg_rating', 'title_length'], "regressor_method":'lasso_regression'}),
+    #     # ("4", ContentBased, {"features_method":['title_length'], "regressor_method":'random_forest'}),
+    #     # ("5", ContentBased, {"features_method":['genre', 'title_length'], "regressor_method":'lasso_regression'}),
+    #     # ("6", ContentBased, {"features_method":['genre', 'title_length'], "regressor_method":'linear_regression'}),
+    #     # ("7", ContentBased, {"features_method":['genre', 'avg_rating'], "regressor_method":'lasso_regression'}),
+    #     # ("8", ContentBased, {"features_method":['avg_rating', 'title_length'], "regressor_method":'adaboost'}),
+    #     # ("9", ContentBased, {"features_method":['genre', 'movie_year', 'avg_rating'], "regressor_method":'decision_tree'}),
+    #     # ("10", ContentBased, {"features_method":['genre', 'movie_year'], "regressor_method":'decision_tree'}),
+    #     # ("11", ContentBased, {"features_method":['genre', 'movie_year', 'avg_rating'], "regressor_method":'elastic_net'}),
+    #     # ("12", ContentBased, {"features_method":['movie_year', 'avg_rating', 'title_length'], "regressor_method":'elastic_net'})
+
+
+
diff --git a/content_based.ipynb b/content_based.ipynb
index 22f076dd1f3a34d39c20299ad6a52bad1c18268f..fffea99ab7e327cbe56f9b267cd486ef0a2fd8c1 100644
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
    "id": "277473a3",
    "metadata": {},
    "outputs": [
@@ -27,20 +27,30 @@
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "import numpy as np\n",
+    "\n",
+    "# third parties imports\n",
     "import pandas as pd\n",
+    "import numpy as np\n",
     "import random as rd\n",
-    "from surprise import AlgoBase\n",
-    "from surprise.prediction_algorithms.predictions import PredictionImpossible\n",
+    "from surprise import AlgoBase, SVD\n",
+    "from surprise import PredictionImpossible\n",
     "\n",
-    "from loaders import load_ratings\n",
-    "from loaders import load_items\n",
+    "# import local\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from loaders import load_items, load_ratings\n",
     "from constants import Constant as C\n",
-    "\n",
     "from sklearn.linear_model import LinearRegression\n",
     "from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor\n",
     "from sklearn.svm import SVR\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer"
+    "\n",
+    "from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet\n",
+    "from sklearn.svm import SVR\n",
+    "from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, AdaBoostRegressor\n",
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from xgboost import XGBRegressor\n",
+    "from lightgbm import LGBMRegressor\n",
+    "\n"
    ]
   },
   {
@@ -53,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
    "id": "e8378976",
    "metadata": {},
    "outputs": [
@@ -87,24 +97,24 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>23</td>\n",
+       "      <th>1</th>\n",
+       "      <td>16</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>23</td>\n",
+       "      <th>2</th>\n",
+       "      <td>14</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>11</td>\n",
+       "      <th>3</th>\n",
+       "      <td>23</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>44</td>\n",
+       "      <th>4</th>\n",
+       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>64</th>\n",
-       "      <td>20</td>\n",
+       "      <th>5</th>\n",
+       "      <td>34</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -113,11 +123,11 @@
       "text/plain": [
        "         n_character_title\n",
        "movieId                   \n",
+       "1                       16\n",
+       "2                       14\n",
        "3                       23\n",
-       "15                      23\n",
-       "34                      11\n",
-       "59                      44\n",
-       "64                      20"
+       "4                       24\n",
+       "5                       34"
       ]
      },
      "metadata": {},
@@ -126,11 +136,11 @@
     {
      "data": {
       "text/plain": [
-       "0                       Russian\n",
-       "1    Trilogy of the Imagination\n",
-       "2                 Takashi Miike\n",
-       "3                        action\n",
-       "4                      bad plot\n",
+       "0    sandra 'boring' bullock\n",
+       "1                    dentist\n",
+       "2                   Cambodia\n",
+       "3                    Russian\n",
+       "4                forgettable\n",
        "Name: tag, dtype: object"
       ]
      },
@@ -169,177 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "bf27365c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "None\n",
-      "None\n",
-      "{'n_character_title': array([2.33593365e-04, 2.03124665e-05, 1.01562333e-05]), 'avg_rating': array([2.33593365e-04, 2.03124665e-05, 1.01562333e-05]), 'tags': array([1.11718566e-04, 3.04686998e-05, 1.01562333e-05])}\n",
-      "{'n_character_title': array([1.08454062e-04, 9.43078803e-06, 4.71539402e-06]), 'avg_rating': array([1.08454062e-04, 9.43078803e-06, 4.71539402e-06]), 'tags': array([5.18693342e-05, 1.41461820e-05, 4.71539402e-06])}\n",
-      "None\n",
-      "{'n_character_title': array([5.24102880e-05, 4.55741635e-06, 2.27870817e-06]), 'avg_rating': array([5.24102880e-05, 4.55741635e-06, 2.27870817e-06]), 'tags': array([2.50657899e-05, 6.83612452e-06, 2.27870817e-06])}\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "class ContentBased(AlgoBase):\n",
-    "    def __init__(self, features_method, regressor_method, combination_methods=None):\n",
-    "        AlgoBase.__init__(self)\n",
-    "        self.regressor_method = regressor_method\n",
-    "        self.features_method = features_method\n",
-    "        self.combination_methods = combination_methods\n",
-    "        self.content_features = self.create_content_features(features_method)\n",
-    "        self.user_profile = {}\n",
-    "        self.user_profile_explain = {}\n",
-    "\n",
-    "    def create_content_features(self, features_method):\n",
-    "        df_items = load_items()\n",
-    "        df_ratings = load_ratings()\n",
-    "        df_tag = pd.read_csv(C.CONTENT_PATH / C.TAGS_FILENAME)\n",
-    "        df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
-    "        df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
-    "\n",
-    "        def get_features(method):\n",
-    "            if method == \"relevance\":\n",
-    "                return df_genome_score.groupby('movieId')[\"relevance\"].mean().to_frame('avg_relevance')\n",
-    "\n",
-    "            elif method == \"title_length\":\n",
-    "                return df_items[C.LABEL_COL].apply(len).to_frame('n_character_title')\n",
-    "\n",
-    "            elif method == \"movie_year\":\n",
-    "                return df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "\n",
-    "            elif method == \"genres\":\n",
-    "                genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "                df_features = pd.DataFrame()\n",
-    "                for genre in genres_list:\n",
-    "                    df_features[genre] = df_items['genres'].str.contains(genre).astype(int)\n",
-    "                return df_features\n",
-    "\n",
-    "            elif method == \"rating\":\n",
-    "                return df_ratings.groupby('movieId')['rating'].mean().to_frame('avg_rating')\n",
-    "\n",
-    "            elif method == \"tags\":\n",
-    "                return df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')\n",
-    "\n",
-    "            elif method == \"tags_length\":\n",
-    "                return df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(',')) if isinstance(x, str) else 0).to_frame('n_character_tags')\n",
-    "\n",
-    "            else:\n",
-    "                raise NotImplementedError(f'Feature method {method} not yet implemented')\n",
-    "\n",
-    "        if features_method == \"combination\":\n",
-    "            if not self.combination_methods:\n",
-    "                raise ValueError('No combination methods provided for \"combination\" feature method')\n",
-    "            df_features = pd.DataFrame()\n",
-    "            for method in self.combination_methods:\n",
-    "                df_method_features = get_features(method)\n",
-    "                df_features = pd.concat([df_features, df_method_features], axis=1)\n",
-    "        else:\n",
-    "            df_features = get_features(features_method)\n",
-    "\n",
-    "        if df_features is not None:\n",
-    "            df_features.fillna(0, inplace=True)\n",
-    "\n",
-    "        return df_features\n",
-    "\n",
-    "    def fit(self, trainset):\n",
-    "        AlgoBase.fit(self, trainset)\n",
-    "        self.user_profile = {u: None for u in trainset.all_users()}\n",
-    "        self.user_profile_explain = {}\n",
-    "\n",
-    "        for u in trainset.all_users():\n",
-    "            raw_user_id = trainset.to_raw_uid(u)\n",
-    "            self.user_profile_explain[raw_user_id] = {}\n",
-    "\n",
-    "            user_ratings = np.array([rating for _, rating in trainset.ur[u]])\n",
-    "            feature_values = self.content_features.values.astype(int)\n",
-    "            weighted_features = feature_values / np.linalg.norm(feature_values)\n",
-    "            feature_importance = weighted_features / np.sum(user_ratings)\n",
-    "\n",
-    "            self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))\n",
-    "\n",
-    "        self._fit_regressors(trainset)\n",
-    "\n",
-    "    def _fit_regressors(self, trainset):\n",
-    "        for u in self.user_profile:\n",
-    "            user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "            item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "            df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "            df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "            df_user = df_user.merge(self.content_features, left_on=\"item_id\", right_index=True, how='left')\n",
-    "\n",
-    "            if df_user.empty:\n",
-    "                continue\n",
-    "\n",
-    "            X = df_user.iloc[:, 2:].values  # Assuming features start from the third column\n",
-    "            y = df_user['user_ratings'].values\n",
-    "\n",
-    "            if self.regressor_method == 'linear_regression':\n",
-    "                regressor = LinearRegression(fit_intercept=False)\n",
-    "            elif self.regressor_method == 'svr_regression':\n",
-    "                regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n",
-    "            elif self.regressor_method == 'gradient_boosting':\n",
-    "                regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
-    "            elif self.regressor_method == 'random_forest':\n",
-    "                regressor = RandomForestRegressor(n_estimators=100)\n",
-    "            else:\n",
-    "                regressor = None\n",
-    "\n",
-    "            if regressor is not None:\n",
-    "                regressor.fit(X, y)\n",
-    "                self.user_profile[u] = regressor\n",
-    "\n",
-    "    def estimate(self, u, i):\n",
-    "        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):\n",
-    "            raise PredictionImpossible('User and/or item is unknown.')\n",
-    "\n",
-    "        raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "        item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "        regressor = self.user_profile[u]\n",
-    "        if regressor is None:\n",
-    "            raise PredictionImpossible('Regressor not found.')\n",
-    "\n",
-    "        score = regressor.predict(item_features)[0]\n",
-    "        return score\n",
-    "    \n",
-    "\n",
-    "    def explain(self, u) :    \n",
-    "        if u in self.user_profile_explain :\n",
-    "            return self.user_profile_explain[u]\n",
-    "        else :\n",
-    "            return None\n",
-    "\n",
-    "# Example usage\n",
-    "cb = ContentBased(\"combination\", \"svr_regression\", combination_methods=[\"title_length\", \"rating\", \"tags\"])\n",
-    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "train_set = sp_ratings.build_full_trainset()\n",
-    "cb.fit(train_set)\n",
-    "\n",
-    "\n",
-    "print(cb.explain(11))\n",
-    "\n",
-    "print(cb.explain(13))\n",
-    "\n",
-    "print(cb.explain(17))\n",
-    "\n",
-    "print(cb.explain(23))\n",
-    "\n",
-    "print(cb.explain(27))\n",
-    "\n",
-    "print(cb.explain(73))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "id": "16b0a602",
    "metadata": {},
    "outputs": [
@@ -347,401 +187,170 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "None\n",
-      "None\n",
-      "{'movie_year': array([0.00030032])}\n",
-      "{'movie_year': array([0.00013943])}\n",
-      "None\n",
-      "{'movie_year': array([6.73812161e-05])}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/7z/lpyksh0x59x6l5_mf048x9tc0000gn/T/ipykernel_23482/2546010458.py:36: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.\n",
-      "  df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n"
+      "{'title_length': 0.1497645139703848, 'movie_year': 0.16218667420100635, '(no genres listed)': 0.0, 'action': 0.09449072815753193, 'adventure': 0.08778978776313201, 'animation': 0.0, 'children': 0.038431411145366176, 'comedy': 0.07268129109348041, 'crime': 0.09469516433772891, 'documentary': 0.0611428358670058, 'drama': 0.10494783392380302, 'fantasy': 0.025806451608591505, 'film-noir': 0.025806451609512046, 'horror': 0.018342712153336858, 'imax': 0.06947533670577526, 'musical': 0.0, 'mystery': 0.06234903350217154, 'romance': 0.036771716124540825, 'sci-fi': 0.059571001735546115, 'thriller': 0.0993122803165238, 'war': 0.04002978709072218, 'western': 0.04547648227079719, 'avg_rating': 0.16263357553020436}\n",
+      "{'title_length': 0.12975573389578626, 'movie_year': 0.13738555574364605, '(no genres listed)': 0.0, 'action': 0.0640388318396414, 'adventure': 0.0827515664964472, 'animation': 0.05686854568650957, 'children': 0.06799492283569505, 'comedy': 0.07354182680364503, 'crime': 0.05543740962624167, 'documentary': 0.0, 'drama': 0.09170589087803577, 'fantasy': 0.061481521263689595, 'film-noir': 0.0, 'horror': 0.015113350123518238, 'imax': 0.04592205020685974, 'musical': 0.03201459126079391, 'mystery': 0.03412706135338736, 'romance': 0.05989121250223656, 'sci-fi': 0.04370793816378273, 'thriller': 0.045800659191095036, 'war': 0.04907194751877139, 'western': 0.027287416762806844, 'avg_rating': 0.13740560847192132}\n",
+      "{'title_length': 0.04702378569892371, 'movie_year': 0.052440003628289225, '(no genres listed)': 0.0, 'action': 0.020439581335728367, 'adventure': 0.015593308332521032, 'animation': 0.004256286923052558, 'children': 0.003520723090188317, 'comedy': 0.018972762464944913, 'crime': 0.028340544273099223, 'documentary': 0.005823989517206729, 'drama': 0.037415345194166824, 'fantasy': 0.013643903080149476, 'film-noir': 0.015390183296279798, 'horror': 0.01926898253629829, 'imax': 0.0014716703456143566, 'musical': 0.0061519348279224124, 'mystery': 0.02847033164163413, 'romance': 0.019827342468818163, 'sci-fi': 0.022573488552024915, 'thriller': 0.03522231545147593, 'war': 0.010339617301415098, 'western': 0.005663885036293055, 'avg_rating': 0.05327750989412312}\n",
+      "{'title_length': 0.033402138126294736, 'movie_year': 0.03710065977291947, '(no genres listed)': 0.0, 'action': 0.014528522669579273, 'adventure': 0.013963913494241694, 'animation': 0.005764814103226412, 'children': 0.006513197483932152, 'comedy': 0.017763201411495646, 'crime': 0.016002513666599556, 'documentary': 0.004292962983778595, 'drama': 0.027458210593047847, 'fantasy': 0.009302633945770895, 'film-noir': 0.006823368830454359, 'horror': 0.007391689869010394, 'imax': 0.004855154663168369, 'musical': 0.0058909467772061425, 'mystery': 0.012191560732760487, 'romance': 0.01723631022081761, 'sci-fi': 0.010817269433255231, 'thriller': 0.01658593988724716, 'war': 0.010193212979882352, 'western': 0.0052038255339472966, 'avg_rating': 0.03742403427834079}\n",
+      "{'title_length': 0.20154225634108316, 'movie_year': 0.20848962267389695, '(no genres listed)': 0.0, 'action': 0.04545454544645529, 'adventure': 0.04545454544730129, 'animation': 0.0, 'children': 0.0, 'comedy': 0.07177284969293253, 'crime': 0.1145252645738102, 'documentary': 0.0, 'drama': 0.16778172557550536, 'fantasy': 0.0, 'film-noir': 0.0, 'horror': 0.06315936177961773, 'imax': 0.0, 'musical': 0.0, 'mystery': 0.08510520557533159, 'romance': 0.09754755529442835, 'sci-fi': 0.045454545449454146, 'thriller': 0.12542163704872258, 'war': 0.08035304331050673, 'western': 0.0, 'avg_rating': 0.21152969571139305}\n",
+      "{'title_length': 0.021927486954368552, 'movie_year': 0.02488786702116846, '(no genres listed)': 0.0007363092498113207, 'action': 0.013836432470735639, 'adventure': 0.011610617815573265, 'animation': 0.007520799115717832, 'children': 0.006287966766754299, 'comedy': 0.012951125615087338, 'crime': 0.011084119744598393, 'documentary': 0.0018287715645832062, 'drama': 0.015221252640276463, 'fantasy': 0.008631010164284143, 'film-noir': 0.0024629052522566544, 'horror': 0.008816299251739122, 'imax': 0.005347204099216887, 'musical': 0.0038827346462235236, 'mystery': 0.0068652812039576095, 'romance': 0.008086664541950757, 'sci-fi': 0.010304269379559203, 'thriller': 0.013200133984104478, 'war': 0.005127335699821772, 'western': 0.0036215200349232765, 'avg_rating': 0.025470698706944836}\n"
      ]
     }
    ],
    "source": [
+    "\n",
+    "# ContetnBased\n",
     "class ContentBased(AlgoBase):\n",
     "    def __init__(self, features_method, regressor_method):\n",
     "        AlgoBase.__init__(self)\n",
     "        self.regressor_method = regressor_method\n",
+    "        self.features_methods = features_method\n",
     "        self.content_features = self.create_content_features(features_method)\n",
+    "        self.user_profile = {}\n",
     "        self.user_profile_explain = {}\n",
     "\n",
-    "    def create_content_features(self, features_method):\n",
+    "    def create_content_features(self, features_methods):\n",
     "        \"\"\"Content Analyzer\"\"\"\n",
     "        df_items = load_items()\n",
     "        df_ratings = load_ratings()\n",
-    "        df_tag = df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+    "        df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
     "        df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
     "        df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
     "\n",
-    "        if features_method is None:\n",
-    "            df_features = None\n",
+    "        df_features = pd.DataFrame(index=df_items.index)\n",
     "\n",
-    "        elif features_method == \"relevance\" :\n",
-    "            df_features = df_genome_score.groupby('movieId')[\"relevance\"].transform('mean').to_frame('avg_relevance')\n",
-    "\n",
-    "        elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
-    "            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "        for method in features_methods:\n",
+    "            if method == \"title_length\":\n",
+    "                df_title_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('title_length')\n",
+    "                df_features = pd.concat([df_features, df_title_length], axis=1)\n",
     "            \n",
-    "        elif features_method == \"movie_year\" :\n",
-    "            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "\n",
-    "        elif features_method == \"genres\" :\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
-    "        \n",
-    "        elif features_method == \"combination\" :\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
+    "            elif method == \"movie_year\":\n",
+    "                df_movie_year = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
+    "                df_features = pd.concat([df_features, df_movie_year.astype(float).fillna(0)], axis=1)\n",
     "            \n",
-    "            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
+    "            elif method == \"genre\":\n",
+    "                tfidf_vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split('|'), token_pattern=None)\n",
+    "                tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
+    "                df_tfidf_genres = pd.DataFrame(tfidf_matrix.toarray(), index=df_items.index, columns=tfidf_vectorizer.get_feature_names_out())\n",
+    "                df_features = pd.concat([df_features, df_tfidf_genres], axis=1)\n",
     "\n",
-    "        \n",
-    "        elif features_method == \"rating\" :\n",
-    "            df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')\n",
+    "            elif method == \"avg_rating\":\n",
+    "                df_avg_rating = df_ratings.groupby('movieId')['rating'].mean().to_frame('avg_rating')\n",
+    "                df_features = df_features.join(df_avg_rating, on='movieId')\n",
     "\n",
-    "        elif features_method == \"tags\" :\n",
-    "            df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')\n",
-    "\n",
-    "        elif features_method == \"tags_length\" :\n",
-    "            \n",
-    "            df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')\n",
-    "\n",
-    "        else: # (implement other feature creations here)\n",
-    "            raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
+    "            else:\n",
+    "                raise NotImplementedError(f'Feature method {method} not yet implemented')\n",
     "\n",
     "        # Handle missing values in df_features\n",
-    "        if df_features is not None:\n",
-    "            df_features.fillna(0, inplace=True) \n",
+    "        df_features.fillna(0, inplace=True)\n",
     "\n",
     "        return df_features\n",
-    "    \n",
     "\n",
     "    def fit(self, trainset):\n",
     "        \"\"\"Profile Learner\"\"\"\n",
     "        AlgoBase.fit(self, trainset)\n",
-    "        \n",
+    "\n",
     "        # Preallocate user profiles\n",
     "        self.user_profile = {u: None for u in trainset.all_users()}\n",
-    "\n",
     "        self.user_profile_explain = {}\n",
     "\n",
-    "        # Loop over all internal user IDs in the trainset\n",
+    "        epsilon = 1e-10  # Small value to prevent division by zero\n",
+    "\n",
     "        for u in trainset.all_users():\n",
-    "            # Convert internal user ID to raw user ID\n",
     "            raw_user_id = trainset.to_raw_uid(u)\n",
-    "\n",
-    "            # Initialize feature importance dictionary for the raw user ID\n",
     "            self.user_profile_explain[raw_user_id] = {}\n",
     "\n",
-    "            # Extract user ratings for the current user\n",
-    "            user_ratings = np.array([rating for _, rating in trainset.ur[u]])\n",
+    "            user_ratings = np.array([rating for (_, rating) in trainset.ur[u]])\n",
+    "            item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "            raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
     "\n",
-    "            # Compute feature importance based on content features and user ratings\n",
-    "            feature_values = self.content_features.values.astype(int)\n",
-    "            weighted_features = feature_values / np.linalg.norm(feature_values)\n",
-    "            feature_importance = weighted_features / np.sum(user_ratings)\n",
+    "            feature_values = self.content_features.loc[raw_item_ids].values\n",
+    "            norms = np.linalg.norm(feature_values, axis=0) + epsilon\n",
+    "            weighted_features = feature_values / norms\n",
+    "            feature_importance = weighted_features.T @ user_ratings\n",
+    "            feature_importance /= np.sum(user_ratings)\n",
     "\n",
-    "            # Map feature importance scores to feature names and store in user_profile_explain\n",
     "            self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))\n",
-    "            \n",
     "\n",
     "        if self.regressor_method == 'random_score':\n",
-    "            for u in self.user_profile :\n",
-    "                self.user_profile[u] = rd.uniform(0.5,5)\n",
-    "            \n",
-    "        elif self.regressor_method == 'random_sample':\n",
-    "            for u in self.user_profile:\n",
-    "                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
-    "\n",
-    "        elif self.regressor_method == 'linear_regression' :\n",
     "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = rd.uniform(0.5, 5)\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "                \n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns :\n",
-    "                    X = df_user['movie_year','genres' ].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "\n",
-    "                linear_regressor = LinearRegression(fit_intercept = False)\n",
-    "\n",
-    "                linear_regressor.fit(X,y)\n",
-    "                \n",
-    "                # Store the computed user profile\n",
-    "                self.user_profile[u] = linear_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'svr_regression':\n",
-    "            for u in self.user_profile:\n",
-    "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "                \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n",
-    "                svr_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = svr_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
+    "        elif self.regressor_method == 'random_sample':\n",
     "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = [rating for (_, rating) in trainset.ur[u]]\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "            \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
-    "                gb_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = gb_regressor\n",
-    "\n",
+    "        else:\n",
+    "            regressor_models = {\n",
+    "                'linear_regression': LinearRegression(fit_intercept=False),\n",
+    "                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),\n",
+    "                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'random_forest': RandomForestRegressor(n_estimators=100),\n",
+    "                'lasso_regression': Lasso(alpha=0.1),\n",
+    "                'ridge_regression': Ridge(alpha=1.0),\n",
+    "                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),\n",
+    "                'knn_regression': KNeighborsRegressor(n_neighbors=1),\n",
+    "                'decision_tree': DecisionTreeRegressor(max_depth=5),\n",
+    "                'adaboost': AdaBoostRegressor(n_estimators=50),\n",
+    "                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
+    "            }\n",
+    "\n",
+    "            if self.regressor_method not in regressor_models:\n",
+    "                raise NotImplementedError(f'Regressor method {self.regressor_method} not yet implemented')\n",
     "\n",
-    "        elif self.regressor_method == 'random_forest':\n",
     "            for u in self.user_profile:\n",
+    "                user_ratings = [rating for (_, rating) in trainset.ur[u]]\n",
+    "                item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "                raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
+    "                df_user = pd.DataFrame({'item_id': raw_item_ids, 'user_ratings': user_ratings})\n",
+    "                df_user = df_user.merge(self.content_features, left_on=\"item_id\", right_index=True, how='left')\n",
     "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
+    "                X = df_user.drop(columns=['item_id', 'user_ratings'])\n",
+    "                y = df_user['user_ratings']\n",
     "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "                rf_regressor = RandomForestRegressor(n_estimators=100)\n",
-    "                rf_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = rf_regressor\n",
+    "                regressor = regressor_models[self.regressor_method]\n",
+    "                regressor.fit(X, y)\n",
     "\n",
-    "        else : \n",
-    "            pass\n",
+    "                self.user_profile[u] = regressor\n",
     "\n",
-    "            # (implement here the regressor fitting)  \n",
-    "        \n",
     "    def estimate(self, u, i):\n",
     "        \"\"\"Scoring component used for item filtering\"\"\"\n",
-    "        # First, handle cases for unknown users and items\n",
     "        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):\n",
-    "            raise PredictionImpossible('User and/or item is unkown.')\n",
-    "\n",
+    "            raise PredictionImpossible('User and/or item is unknown.')\n",
     "\n",
     "        if self.regressor_method == 'random_score':\n",
-    "            rd.seed()\n",
-    "            score = rd.uniform(0.5,5)\n",
+    "            return rd.uniform(0.5, 5)\n",
     "\n",
     "        elif self.regressor_method == 'random_sample':\n",
-    "            rd.seed()\n",
-    "            score = rd.choice(self.user_profile[u])\n",
-    "        \n",
-    "        elif self.regressor_method == 'linear_regression':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            linear_regressor = self.user_profile[u]\n",
-    "\n",
-    "            score= linear_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'svr_regression':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            svr_regressor = self.user_profile[u]\n",
-    "            score = svr_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            gradient_boosting = self.user_profile[u]\n",
-    "            score = gradient_boosting.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'random_forest':\n",
+    "            return rd.choice(self.user_profile[u])\n",
     "\n",
+    "        else:\n",
     "            raw_item_id = self.trainset.to_raw_iid(i)\n",
+    "            item_features = self.content_features.loc[raw_item_id, :].values.reshape(1, -1)\n",
+    "            regressor = self.user_profile[u]\n",
+    "            item_features_df = pd.DataFrame(item_features, columns=self.content_features.columns)\n",
+    "            return regressor.predict(item_features_df)[0]\n",
     "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            randomforest = self.user_profile[u]\n",
-    "            score = randomforest.predict(item_features)[0]\n",
-    "        \n",
-    "        else : \n",
-    "            score = None\n",
+    "    def explain(self, u):\n",
+    "        if u in self.user_profile_explain:\n",
+    "            return self.user_profile_explain[u]\n",
+    "        else:\n",
+    "            return None\n",
     "\n",
-    "            # (implement here the regressor prediction)\n",
     "\n",
-    "        return score\n",
+    "#Example usage:\n",
+    "cb = ContentBased([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")\n",
+    "surprise_data = load_ratings(surprise_format=True)\n",
+    "trainset = surprise_data.build_full_trainset()\n",
+    "testset = trainset.build_anti_testset()\n",
+    "cb.fit(trainset)\n",
     "\n",
-    "    def explain(self, u) :    \n",
-    "        if u in self.user_profile_explain :\n",
-    "            return self.user_profile_explain[u]\n",
-    "        else :\n",
-    "            return None\n",
     "\n",
+    "#print(\"RMSE: \", cb.rmse(testset))\n",
     "\n",
-    "cb = ContentBased(\"combination\", \"svr_regression\")\n",
-    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "train_set = sp_ratings.build_full_trainset()\n",
-    "cb.fit(train_set)\n",
     "\n",
+    "#Example explanations for users:\n",
     "print(cb.explain(11))\n",
     "\n",
     "print(cb.explain(13))\n",
@@ -752,400 +361,7 @@
     "\n",
     "print(cb.explain(27))\n",
     "\n",
-    "print(cb.explain(73))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "baab88b7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Matrice TF-IDF des genres :\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>action</th>\n",
-       "      <th>adventure</th>\n",
-       "      <th>animation</th>\n",
-       "      <th>children</th>\n",
-       "      <th>comedy</th>\n",
-       "      <th>crime</th>\n",
-       "      <th>documentary</th>\n",
-       "      <th>drama</th>\n",
-       "      <th>fantasy</th>\n",
-       "      <th>fi</th>\n",
-       "      <th>...</th>\n",
-       "      <th>listed</th>\n",
-       "      <th>musical</th>\n",
-       "      <th>mystery</th>\n",
-       "      <th>no</th>\n",
-       "      <th>noir</th>\n",
-       "      <th>romance</th>\n",
-       "      <th>sci</th>\n",
-       "      <th>thriller</th>\n",
-       "      <th>war</th>\n",
-       "      <th>western</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.589275</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.807933</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.553377</td>\n",
-       "      <td>0.612756</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.564185</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.9065</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.422206</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.422206</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.9065</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.589275</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.807933</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>907</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.403927</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.914791</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>908</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.57735</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.57735</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>909</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>910</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>911</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>912 rows × 24 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "       action  adventure  animation  children    comedy  crime  documentary  \\\n",
-       "0    0.000000   0.000000        0.0    0.0000  0.589275    0.0          0.0   \n",
-       "1    0.553377   0.612756        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "2    0.000000   0.000000        0.0    0.9065  0.000000    0.0          0.0   \n",
-       "3    0.000000   0.000000        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "4    0.000000   0.000000        0.0    0.0000  0.589275    0.0          0.0   \n",
-       "..        ...        ...        ...       ...       ...    ...          ...   \n",
-       "907  0.000000   0.000000        0.0    0.0000  0.403927    0.0          0.0   \n",
-       "908  0.000000   0.000000        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "909  0.000000   0.000000        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "910  0.000000   0.000000        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "911  0.000000   0.000000        0.0    0.0000  0.000000    0.0          0.0   \n",
-       "\n",
-       "        drama  fantasy   fi  ...   listed  musical  mystery       no  noir  \\\n",
-       "0    0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "1    0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "2    0.422206      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "3    0.422206      0.0  0.0  ...  0.00000      0.0   0.9065  0.00000   0.0   \n",
-       "4    0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "..        ...      ...  ...  ...      ...      ...      ...      ...   ...   \n",
-       "907  0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "908  0.000000      0.0  0.0  ...  0.57735      0.0   0.0000  0.57735   0.0   \n",
-       "909  0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "910  0.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "911  1.000000      0.0  0.0  ...  0.00000      0.0   0.0000  0.00000   0.0   \n",
-       "\n",
-       "      romance  sci  thriller  war   western  \n",
-       "0    0.807933  0.0       0.0  0.0  0.000000  \n",
-       "1    0.564185  0.0       0.0  0.0  0.000000  \n",
-       "2    0.000000  0.0       0.0  0.0  0.000000  \n",
-       "3    0.000000  0.0       0.0  0.0  0.000000  \n",
-       "4    0.807933  0.0       0.0  0.0  0.000000  \n",
-       "..        ...  ...       ...  ...       ...  \n",
-       "907  0.000000  0.0       0.0  0.0  0.914791  \n",
-       "908  0.000000  0.0       0.0  0.0  0.000000  \n",
-       "909  0.000000  0.0       0.0  0.0  0.000000  \n",
-       "910  0.000000  0.0       1.0  0.0  0.000000  \n",
-       "911  0.000000  0.0       0.0  0.0  0.000000  \n",
-       "\n",
-       "[912 rows x 24 columns]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from pprint import pprint\n",
-    "\n",
-    "# Créer une instance de TfidfVectorizer pour les genres\n",
-    "tfidf_vectorizer = TfidfVectorizer()\n",
-    "\n",
-    "# Fit et transform pour calculer la matrice TF-IDF des genres\n",
-    "tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
-    "\n",
-    "# Obtenir les noms des genres (features)\n",
-    "genre_names = tfidf_vectorizer.get_feature_names_out()\n",
-    "\n",
-    "# Créer un DataFrame à partir de la matrice TF-IDF des genres\n",
-    "df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=genre_names)\n",
-    "\n",
-    "print(\"Matrice TF-IDF des genres :\")\n",
-    "display(df_tfidf)"
+    "print(cb.explain(73))\n"
    ]
   },
   {
@@ -1158,10 +374,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
    "id": "69d12f7d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "user: 1          item: 10         r_ui = None   est = 0.72   {'was_impossible': False}\n"
+     ]
+    }
+   ],
    "source": [
     "def test_contentbased_class(feature_method, regressor_method):\n",
     "    \"\"\"Test the ContentBased class.\n",
@@ -1175,72 +399,7 @@
     "    prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])\n",
     "    print(prediction)\n",
     "\n",
-    "\n",
-    "\n",
-    "# print(\"title_length :\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"movie_year : \")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"relevance : \") \n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"genres : \") \n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"rating : \")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_sample\")\n",
-    "# # test_contentbased_class(feature_method= \"rating\", regressor_method=\"linear_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"svr_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"gradient_boosting\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags : \")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_sample\")\n",
-    "# #test_contentbased_class(feature_method=\"tags\", regressor_method=\"linear_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"svr_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"gradient_boosting\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags_length : \")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_forest\")\n",
-    "\n",
-    "# print(\"\\n\")\n",
-    "# print(\"combination : \")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_forest\")\n"
+    "test_contentbased_class([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")"
    ]
   }
  ],
diff --git a/data/small/evaluations/evaluation_report_2024-05-22.csv b/data/small/evaluations/evaluation_report_2024-05-22.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5f8f0c651c6debc5911f80ba5ce99f2a16d49b3b
--- /dev/null
+++ b/data/small/evaluations/evaluation_report_2024-05-22.csv
@@ -0,0 +1,6 @@
+mae,rmse,hit_rate,novelty
+1.6571763025276776,1.8653776413082341,0.014814814814814815,538.5435555555556
+1.5157635089877097,1.859210427636794,0.0014814814814814814,4648.247407407407
+0.8632653600233939,1.0824195251647628,0.005925925925925926,538.5435555555556
+0.6877531258252827,0.8936032538534392,0.01925925925925926,533.9954074074074
+0.7429268968557247,0.9807166886090721,0.0,6516.658222222222
diff --git a/data/test/evaluations/evaluation_report_2024-05-22_test.csv b/data/test/evaluations/evaluation_report_2024-05-22_test.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a5678a7e4530b855d30286aae9de3d58659a1879
--- /dev/null
+++ b/data/test/evaluations/evaluation_report_2024-05-22_test.csv
@@ -0,0 +1,185 @@
+features_method,regressor_method,mae,rmse,hit_rate,novelty
+Unknown features,Unknown regressor,1.4375,1.704772712123232,1.0,6.033333333333333
+Unknown features,Unknown regressor,1.1047906549123014,1.2718982459540529,1.0,6.033333333333333
+Unknown features,Unknown regressor,1.1590909090909092,1.3636363636363635,1.0,6.033333333333333
+Unknown features,Unknown regressor,1.411912797135983,1.591814183342947,1.0,6.033333333333333
+['genre'],linear_regression,1.4473754873073326,1.6776813783581366,0.6666666666666666,6.033333333333333
+['genre'],random_forest,0.8532954545454545,0.9835795175558364,1.0,6.033333333333333
+['genre'],lasso_regression,0.7954545454545454,1.0041237288352056,1.0,6.033333333333333
+['genre'],svr_regression,1.7957362529964096,2.0608964237898526,1.0,6.033333333333333
+['genre'],gradient_boosting,1.25,1.5811388300841898,1.0,6.033333333333333
+['genre'],ridge_regression,1.6875,1.976423537605237,1.0,6.033333333333333
+['genre'],elastic_net,1.5,1.6583123951777,1.0,6.033333333333333
+['genre'],decision_tree,1.25,1.4187717304557825,1.0,6.033333333333333
+['genre'],adaboost,1.5625,2.023301757029831,1.0,6.033333333333333
+['genre'],knn_regression,1.5625,2.084166500066633,1.0,6.033333333333333
+['genre'],xgboost,1.8114174157381058,2.1121808891918574,1.0,6.033333333333333
+['genre'],lightgbm,1.25,1.3900844257611844,1.0,6.033333333333333
+['movie_year'],linear_regression,1.1793559588121263,1.3141843299213294,0.3333333333333333,6.033333333333333
+['movie_year'],random_forest,1.685,1.738001726121122,1.0,6.033333333333333
+['movie_year'],lasso_regression,2.033290207006363,2.2940804500068475,1.0,6.033333333333333
+['movie_year'],svr_regression,2.0,2.384848003542364,0.5,6.033333333333333
+['movie_year'],gradient_boosting,1.6875,2.038688303787511,1.0,6.033333333333333
+['movie_year'],ridge_regression,2.1079545454545454,2.4100019718108596,1.0,6.033333333333333
+['movie_year'],elastic_net,2.0227272727272725,2.6096560601094763,1.0,6.033333333333333
+['movie_year'],decision_tree,2.125,2.318404623873926,1.0,6.033333333333333
+['movie_year'],adaboost,1.3920454545454546,1.71594671843151,1.0,6.033333333333333
+['movie_year'],knn_regression,1.5,1.6583123951777,1.0,6.033333333333333
+['movie_year'],xgboost,2.1235196590423584,2.678605209338892,1.0,6.033333333333333
+['movie_year'],lightgbm,1.8863636363636362,2.408956900217901,1.0,6.033333333333333
+['avg_rating'],linear_regression,1.4044117647058827,1.5860506663643976,0.5,6.033333333333333
+['avg_rating'],random_forest,1.915,2.1330403067952615,1.0,6.033333333333333
+['avg_rating'],lasso_regression,0.7025858070500931,0.9815772285932589,1.0,6.033333333333333
+['avg_rating'],svr_regression,0.8063143949253345,1.060586228071386,0.5,6.033333333333333
+['avg_rating'],gradient_boosting,0.749980078950834,0.9353877265415008,1.0,6.033333333333333
+['avg_rating'],ridge_regression,2.0454545454545454,2.358987999156691,1.0,6.033333333333333
+['avg_rating'],elastic_net,2.125,2.5860201081971503,1.0,6.033333333333333
+['avg_rating'],decision_tree,2.5,2.7950849718747373,1.0,6.033333333333333
+['avg_rating'],adaboost,2.1875,2.5031230493125984,1.0,6.033333333333333
+['avg_rating'],knn_regression,2.5,2.8939592256975564,0.6666666666666666,6.033333333333333
+['avg_rating'],xgboost,1.438937783241272,1.6884888622389684,1.0,6.033333333333333
+['avg_rating'],lightgbm,1.6875,2.0512858797180313,1.0,6.033333333333333
+['title_length'],linear_regression,1.2958333333333332,1.8809186421192186,1.0,6.033333333333333
+['title_length'],random_forest,0.9375,1.2149263658264091,1.0,6.033333333333333
+['title_length'],lasso_regression,1.9375,2.099106952968333,1.0,6.033333333333333
+['title_length'],svr_regression,2.5,2.8722813232690143,1.0,6.033333333333333
+['title_length'],gradient_boosting,1.75,2.2472401663219115,1.0,6.033333333333333
+['title_length'],ridge_regression,1.953251787924632,2.096761794400394,1.0,6.033333333333333
+['title_length'],elastic_net,1.8125,2.404423007708918,1.0,6.033333333333333
+['title_length'],decision_tree,1.25,1.6770509831248424,1.0,6.033333333333333
+['title_length'],adaboost,1.375,1.9525624189766635,1.0,6.033333333333333
+['title_length'],knn_regression,1.125,1.3693063937629153,1.0,6.033333333333333
+['title_length'],xgboost,1.6263536214828491,1.879353438588852,1.0,6.033333333333333
+['title_length'],lightgbm,1.1742424242424243,1.7479449759985175,1.0,6.033333333333333
+"['genre', 'movie_year']",linear_regression,1.1654492777443672,1.4386998725766413,1.0,6.033333333333333
+"['genre', 'movie_year']",random_forest,1.355,1.5987727085341803,1.0,6.033333333333333
+"['genre', 'movie_year']",lasso_regression,1.6922151766529794,2.1026562882679745,1.0,6.033333333333333
+"['genre', 'movie_year']",svr_regression,1.2653420920250695,1.4938026536014781,1.0,6.033333333333333
+"['genre', 'movie_year']",gradient_boosting,1.4034001355292514,1.7108770020792687,1.0,6.033333333333333
+"['genre', 'movie_year']",ridge_regression,1.4984061821373587,1.8243627906442716,1.0,6.033333333333333
+"['genre', 'movie_year']",elastic_net,1.4127246300211427,1.5953292965261576,1.0,6.033333333333333
+"['genre', 'movie_year']",decision_tree,3.1875,3.254804141572884,1.0,6.033333333333333
+"['genre', 'movie_year']",adaboost,2.125,2.3048861143232218,1.0,6.033333333333333
+"['genre', 'movie_year']",knn_regression,2.4375,2.7894892005526746,1.0,6.033333333333333
+"['genre', 'movie_year']",xgboost,1.3125,1.8114220932736798,1.0,6.033333333333333
+"['genre', 'movie_year']",lightgbm,1.3625,1.509552913945053,1.0,6.033333333333333
+"['genre', 'avg_rating']",linear_regression,0.8810197834402558,1.1633623833190734,1.0,6.033333333333333
+"['genre', 'avg_rating']",random_forest,1.47875,1.6431714761399678,1.0,6.033333333333333
+"['genre', 'avg_rating']",lasso_regression,1.3125,1.5512092057488571,1.0,6.033333333333333
+"['genre', 'avg_rating']",svr_regression,1.4030189396223172,1.7226838917059923,1.0,6.033333333333333
+"['genre', 'avg_rating']",gradient_boosting,2.9375,3.1770662567847086,1.0,6.033333333333333
+"['genre', 'avg_rating']",ridge_regression,1.5941614495865424,1.7614218970767976,1.0,6.033333333333333
+"['genre', 'avg_rating']",elastic_net,1.4791666666666667,1.6442448396591178,1.0,6.033333333333333
+"['genre', 'avg_rating']",decision_tree,1.5625,1.9598566843370195,0.5,6.033333333333333
+"['genre', 'avg_rating']",adaboost,1.5,1.713913650100261,1.0,6.033333333333333
+"['genre', 'avg_rating']",knn_regression,1.5,1.8874586088176875,1.0,6.033333333333333
+"['genre', 'avg_rating']",xgboost,1.6083768904209137,2.1469591124834313,1.0,6.033333333333333
+"['genre', 'avg_rating']",lightgbm,1.3125,1.4469796128487782,1.0,6.033333333333333
+"['genre', 'title_length']",linear_regression,1.7095903990166095,1.9079361689052665,1.0,6.033333333333333
+"['genre', 'title_length']",random_forest,2.16125,2.3690438472092494,1.0,6.033333333333333
+"['genre', 'title_length']",lasso_regression,1.9095085221807393,2.2649506463335594,1.0,6.033333333333333
+"['genre', 'title_length']",svr_regression,0.9028659166948557,1.0922570883925715,1.0,6.033333333333333
+"['genre', 'title_length']",gradient_boosting,0.9652308740417632,1.216094887841987,1.0,6.033333333333333
+"['genre', 'title_length']",ridge_regression,1.097871043098452,1.728983663512694,1.0,6.033333333333333
+"['genre', 'title_length']",elastic_net,1.6542403043691702,1.7556295027609992,1.0,6.033333333333333
+"['genre', 'title_length']",decision_tree,0.8125,1.2119199643540823,1.0,6.033333333333333
+"['genre', 'title_length']",adaboost,1.375,1.6393596310755,1.0,6.033333333333333
+"['genre', 'title_length']",knn_regression,1.6875,2.2150056433336687,1.0,6.033333333333333
+"['genre', 'title_length']",xgboost,1.3717930614948273,1.7417813022095112,1.0,6.033333333333333
+"['genre', 'title_length']",lightgbm,1.75,2.25,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",linear_regression,0.6734228085797149,0.8239517444490464,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",random_forest,1.5,1.984313483298443,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",lasso_regression,1.5007812500000028,1.8480867957667793,0.6666666666666666,6.033333333333333
+"['movie_year', 'avg_rating']",svr_regression,1.5,2.1360009363293826,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",gradient_boosting,2.0052481525699046,2.692573423919177,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",ridge_regression,1.5289909638554189,1.6544569693334048,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",elastic_net,1.9375,2.143303524935281,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",decision_tree,2.1875,2.481179155159901,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",adaboost,1.8125,2.143303524935281,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",knn_regression,1.9375,2.3251344047172844,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",xgboost,1.6889803409576416,1.897769890072298,1.0,6.033333333333333
+"['movie_year', 'avg_rating']",lightgbm,2.0,2.328741546432889,1.0,6.033333333333333
+"['movie_year', 'title_length']",linear_regression,1.2499217118997914,1.5122313117996493,1.0,6.033333333333333
+"['movie_year', 'title_length']",random_forest,1.4825,2.073179924656806,1.0,6.033333333333333
+"['movie_year', 'title_length']",lasso_regression,1.8143768982923731,2.26125435323686,1.0,6.033333333333333
+"['movie_year', 'title_length']",svr_regression,1.8125,2.069118169655856,1.0,6.033333333333333
+"['movie_year', 'title_length']",gradient_boosting,1.5,1.7853571071357126,1.0,6.033333333333333
+"['movie_year', 'title_length']",ridge_regression,1.6804379446793671,2.1523028399883026,1.0,6.033333333333333
+"['movie_year', 'title_length']",elastic_net,2.4326635034049033,2.780356782965454,1.0,6.033333333333333
+"['movie_year', 'title_length']",decision_tree,2.125,2.48746859276655,1.0,6.033333333333333
+"['movie_year', 'title_length']",adaboost,1.6875,1.8624580532189174,0.6666666666666666,6.033333333333333
+"['movie_year', 'title_length']",knn_regression,1.9375,2.143303524935281,1.0,6.033333333333333
+"['movie_year', 'title_length']",xgboost,1.687782883644104,1.97009092981722,1.0,6.033333333333333
+"['movie_year', 'title_length']",lightgbm,1.0056818181818183,1.2201556637960094,1.0,6.033333333333333
+"['avg_rating', 'title_length']",linear_regression,1.4443069306930694,1.841000186873917,1.0,6.033333333333333
+"['avg_rating', 'title_length']",random_forest,1.84125,2.1687525216123666,1.0,6.033333333333333
+"['avg_rating', 'title_length']",lasso_regression,2.090909090909091,2.6987141106904358,1.0,6.033333333333333
+"['avg_rating', 'title_length']",svr_regression,2.2179796680695123,2.5951310459929013,1.0,6.033333333333333
+"['avg_rating', 'title_length']",gradient_boosting,1.4999983399125698,2.0184285217460998,1.0,6.033333333333333
+"['avg_rating', 'title_length']",ridge_regression,0.7230706317708891,1.019417759512073,1.0,6.033333333333333
+"['avg_rating', 'title_length']",elastic_net,1.3125,1.5967021151824468,1.0,6.033333333333333
+"['avg_rating', 'title_length']",decision_tree,0.8181818181818182,1.2374890432773151,1.0,6.033333333333333
+"['avg_rating', 'title_length']",adaboost,1.5,2.0766559657295187,1.0,6.033333333333333
+"['avg_rating', 'title_length']",knn_regression,2.625,2.883140648667699,1.0,6.033333333333333
+"['avg_rating', 'title_length']",xgboost,1.4955596327781677,1.7791506491564508,0.6666666666666666,6.033333333333333
+"['avg_rating', 'title_length']",lightgbm,1.4375,1.704772712123232,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",linear_regression,1.9452454483098371,2.1507860231787106,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",random_forest,2.375,2.839454172900137,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",lasso_regression,2.107954545454546,2.425171460241574,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",svr_regression,2.1647727272727275,2.3927943655970645,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",gradient_boosting,0.696366266134975,0.9245168688215822,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",ridge_regression,1.9375,2.1578345627040085,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",elastic_net,1.7329545454545454,1.9673864653728386,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",decision_tree,1.625,1.984313483298443,0.5,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",adaboost,1.0,1.299038105676658,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",knn_regression,2.125,2.4325614836835734,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",xgboost,0.7976747588677839,1.1996512624250524,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating']",lightgbm,1.8125,2.0077973005261263,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",linear_regression,1.1511554727356577,1.4283269907432299,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",random_forest,0.9375,1.286953767623375,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",lasso_regression,1.9654476796952167,2.057096575823434,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",svr_regression,1.0626623467862377,1.3111671549042816,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",gradient_boosting,0.9615501326314708,1.063826267065753,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",ridge_regression,1.8955865714529594,2.0412170823459532,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",elastic_net,2.6875,3.0771334062727926,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",decision_tree,1.375,1.6007810593582121,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",adaboost,2.25,2.5495097567963922,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",knn_regression,2.875,3.0516389039334255,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",xgboost,1.375,1.7677669529663689,1.0,6.033333333333333
+"['genre', 'movie_year', 'title_length']",lightgbm,2.104166666666667,2.3884851079944194,0.6666666666666666,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",linear_regression,0.8154928335413847,1.1606281170743262,0.6666666666666666,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",random_forest,1.34875,1.8100362565429458,0.6666666666666666,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",lasso_regression,1.2187500000000002,1.4027874037073473,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",svr_regression,1.127125408284661,1.4485854886669336,0.5,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",gradient_boosting,1.75,2.03100960115899,0.6666666666666666,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",ridge_regression,1.8125,2.069118169655856,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",elastic_net,1.6303571428571428,1.835915775490226,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",decision_tree,2.4375,2.5805965019322805,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",adaboost,1.9375,2.2150056433336687,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",knn_regression,0.9403409090909091,1.1835378659617484,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",xgboost,2.6875,2.8777161083053344,1.0,6.033333333333333
+"['genre', 'avg_rating', 'title_length']",lightgbm,1.5700757575757576,1.7515652844875222,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",linear_regression,1.7557054258493372,1.9887479913562542,0.6666666666666666,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",random_forest,1.600625,1.7266559081067656,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",lasso_regression,0.8366370842413322,1.368810888302608,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",svr_regression,1.5629929403189624,2.039055332090993,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",gradient_boosting,1.9520765758783163,2.3984081852018986,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",ridge_regression,2.1802325581395365,2.563201914841398,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",elastic_net,1.9801136363636365,2.298925705808356,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",decision_tree,1.3977272727272727,1.7712697971271558,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",adaboost,1.6420454545454546,1.9221012305536531,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",knn_regression,2.8125,2.942150573984955,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",xgboost,1.5625,1.704772712123232,1.0,6.033333333333333
+"['movie_year', 'avg_rating', 'title_length']",lightgbm,1.4005681818181819,1.7874353424126381,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",linear_regression,2.2618903144827494,2.3066045260646746,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",random_forest,1.9375,2.2980970388562794,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",lasso_regression,1.6222469135802469,1.7958401377770798,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",svr_regression,1.313013214298001,1.4569356767282977,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",gradient_boosting,1.1875,1.5309310892394863,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",ridge_regression,1.375,1.9525624189766635,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",elastic_net,1.1515325148823656,1.3471236231883315,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",decision_tree,1.8125,2.1286732957408003,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",adaboost,1.5,1.9364916731037085,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",knn_regression,1.8125,2.069118169655856,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",xgboost,1.875,2.1937410968480306,1.0,6.033333333333333
+"['genre', 'movie_year', 'avg_rating', 'title_length']",lightgbm,1.8585227272727276,1.9982010245995632,1.0,6.033333333333333
diff --git a/data/tiny/evaluations/evaluation_report_2024-05-22.csv b/data/tiny/evaluations/evaluation_report_2024-05-22.csv
new file mode 100644
index 0000000000000000000000000000000000000000..79a775cae4a41aa5e68ebfec95881014ebd0e1e2
--- /dev/null
+++ b/data/tiny/evaluations/evaluation_report_2024-05-22.csv
@@ -0,0 +1,24 @@
+features_method,regressor_method,mae,rmse,hit_rate,novelty
+Unknown features,Unknown regressor,1.5649546827794563,1.7774604240073495,0.08411214953271028,99.40560747663551
+Unknown features,Unknown regressor,1.506078052886924,1.8397201791089532,0.0,429.94299065420563
+Unknown features,Unknown regressor,0.8746908115113954,1.0897093001331002,0.06542056074766354,99.40560747663551
+Unknown features,Unknown regressor,0.7138366989023895,0.9313405510567094,0.16822429906542055,61.73271028037383
+"['movie_year', 'avg_rating']",linear_regression,0.8064711208383942,0.983824686370847,0.009345794392523364,675.9383177570094
+"['genre', 'movie_year', 'avg_rating']",gradient_boosting,0.9711461895227331,1.248394836559089,0.028037383177570093,308.4906542056075
+['avg_rating'],gradient_boosting,0.8590351163143807,1.121240295642085,0.102803738317757,182.49252336448598
+['avg_rating'],lasso_regression,0.7125829169303501,0.9338850255246349,0.0,724.6280373831776
+['genre'],random_forest,0.9199064200916992,1.235119018631717,0.056074766355140186,327.0514018691589
+['genre'],lasso_regression,1.0440014586729254,1.239127258979977,0.056074766355140186,99.40560747663551
+"['avg_rating', 'title_length']",ridge_regression,0.8454994573520899,1.0365690571406192,0.0,571.8523364485982
+['avg_rating'],svr_regression,0.9622470020847163,1.286461375966794,0.018691588785046728,290.4084112149533
+"['genre', 'movie_year', 'title_length']",gradient_boosting,0.9829050110032581,1.23532414575894,0.04672897196261682,363.6448598130841
+"['genre', 'title_length']",svr_regression,0.9590937921698368,1.2054205241611384,0.0,527.6280373831776
+"['genre', 'avg_rating', 'title_length']",linear_regression,0.8261378328560118,1.0311965608643556,0.056074766355140186,115.61495327102804
+"['genre', 'avg_rating']",linear_regression,0.9911160053318097,1.266697175630553,0.018691588785046728,119.30934579439253
+"['genre', 'avg_rating', 'title_length']",knn_regression,1.037088167018069,1.3090384914907294,0.09345794392523364,141.0981308411215
+"['genre', 'movie_year', 'avg_rating']",xgboost,0.9579212706198381,1.2303094916039912,0.08411214953271028,291.59532710280376
+"['genre', 'title_length']",decision_tree,0.9076211919995862,1.1905523032159628,0.0,239.76822429906542
+['title_length'],random_forest,0.9242463275468562,1.1381511361285654,0.018691588785046728,481.17289719626166
+"['genre', 'title_length']",gradient_boosting,1.068975407551646,1.3464061489576002,0.009345794392523364,413.4607476635514
+"['movie_year', 'title_length']",lightgbm,1.1591172193268287,1.4711169690740502,0.06542056074766354,99.40560747663551
+"['avg_rating', 'title_length']",decision_tree,0.7793934429222078,1.0481330138327167,0.06542056074766354,88.16168224299065
diff --git a/evaluator.ipynb b/evaluator.ipynb
index 3f594379da37b248b5392890b82736a41a832f09..5c9d6e6c12f67fe890b0b357181eedd0b2c7c3cf 100644
--- a/evaluator.ipynb
+++ b/evaluator.ipynb
@@ -288,7 +288,7 @@
       "- computing metric rmse\n",
       "Training loo predictions\n",
       "Training full predictions\n",
-      "Handling model 2\n",
+      "Handling model 1\n",
       "Training split predictions\n",
       "- computing metric mae\n",
       "- computing metric rmse\n",
@@ -327,50 +327,50 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>baseline_1</th>\n",
-       "      <td>1.596677</td>\n",
-       "      <td>1.814364</td>\n",
-       "      <td>0.102804</td>\n",
-       "      <td>99.405607</td>\n",
+       "      <td>1.657176</td>\n",
+       "      <td>1.865378</td>\n",
+       "      <td>0.014815</td>\n",
+       "      <td>538.543556</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>baseline_2</th>\n",
-       "      <td>1.504517</td>\n",
-       "      <td>1.836713</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>429.942991</td>\n",
+       "      <td>1.515764</td>\n",
+       "      <td>1.859210</td>\n",
+       "      <td>0.001481</td>\n",
+       "      <td>4648.247407</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>baseline_3</th>\n",
-       "      <td>0.878197</td>\n",
-       "      <td>1.080797</td>\n",
-       "      <td>0.084112</td>\n",
-       "      <td>99.405607</td>\n",
+       "      <td>0.863265</td>\n",
+       "      <td>1.082420</td>\n",
+       "      <td>0.005926</td>\n",
+       "      <td>538.543556</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>baseline_4</th>\n",
-       "      <td>0.721185</td>\n",
-       "      <td>0.918754</td>\n",
-       "      <td>0.112150</td>\n",
-       "      <td>54.942056</td>\n",
+       "      <td>0.687753</td>\n",
+       "      <td>0.893603</td>\n",
+       "      <td>0.019259</td>\n",
+       "      <td>533.995407</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.849388</td>\n",
-       "      <td>1.037533</td>\n",
-       "      <td>0.028037</td>\n",
-       "      <td>453.141121</td>\n",
+       "      <th>1</th>\n",
+       "      <td>0.742927</td>\n",
+       "      <td>0.980717</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>6516.658222</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                 mae      rmse  hit_rate     novelty\n",
-       "baseline_1  1.596677  1.814364  0.102804   99.405607\n",
-       "baseline_2  1.504517  1.836713  0.000000  429.942991\n",
-       "baseline_3  0.878197  1.080797  0.084112   99.405607\n",
-       "baseline_4  0.721185  0.918754  0.112150   54.942056\n",
-       "2           0.849388  1.037533  0.028037  453.141121"
+       "                 mae      rmse  hit_rate      novelty\n",
+       "baseline_1  1.657176  1.865378  0.014815   538.543556\n",
+       "baseline_2  1.515764  1.859210  0.001481  4648.247407\n",
+       "baseline_3  0.863265  1.082420  0.005926   538.543556\n",
+       "baseline_4  0.687753  0.893603  0.019259   533.995407\n",
+       "1           0.742927  0.980717  0.000000  6516.658222"
       ]
      },
      "execution_count": 52,
diff --git a/models.py b/models.py
index 66fdfee8dab3004586972c90eaa04fd95a947153..14424b62ddea3a3bd20774c6bf49b90d674a7cd5 100644
--- a/models.py
+++ b/models.py
@@ -25,6 +25,7 @@ from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, A
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.neighbors import KNeighborsRegressor
 from xgboost import XGBRegressor
+from lightgbm import LGBMRegressor
 
 
 # All the dataframes
@@ -200,17 +201,18 @@ class ContentBased(AlgoBase):
 
         else:
             regressor_models = {
-                'linear_regression': LinearRegression(fit_intercept=True),  # Fit intercept might help
-                'svr_regression': SVR(kernel='rbf', C=1.0, epsilon=0.1),  # Adjusted C and epsilon for better performance
-                'gradient_boosting': GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=4),  # More estimators and smaller learning rate
-                'random_forest': RandomForestRegressor(n_estimators=200, max_depth=10, min_samples_split=5),  # More estimators and added max_depth and min_samples_split
-                'lasso_regression': Lasso(alpha=0.01),  # Lower alpha for less regularization
-                'ridge_regression': Ridge(alpha=0.5),  # Lower alpha for less regularization
-                'elastic_net': ElasticNet(alpha=0.5, l1_ratio=0.7),  # Adjusted l1_ratio for better balance
-                'knn_regression': KNeighborsRegressor(n_neighbors=5),  # Increased neighbors for better smoothing
-                'decision_tree': DecisionTreeRegressor(max_depth=10, min_samples_split=4),  # Increased max_depth and added min_samples_split
-                'adaboost': AdaBoostRegressor(n_estimators=100, learning_rate=0.1),  # More estimators and added learning rate
-                'xgboost': XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=4)  # More estimators and smaller learning rate
+                'linear_regression': LinearRegression(fit_intercept=False),
+                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),
+                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
+                'random_forest': RandomForestRegressor(n_estimators=100),
+                'lasso_regression': Lasso(alpha=0.1),
+                'ridge_regression': Ridge(alpha=1.0),
+                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),
+                'knn_regression': KNeighborsRegressor(n_neighbors=1),
+                'decision_tree': DecisionTreeRegressor(max_depth=5),
+                'adaboost': AdaBoostRegressor(n_estimators=50),
+                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
+                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
             }
 
             if self.regressor_method not in regressor_models:
@@ -255,20 +257,6 @@ class ContentBased(AlgoBase):
             return self.user_profile_explain[u]
         else:
             return None
-
-    def explain2(self, u):
-        if u in self.user_profile_explain:
-            user_explanation = self.user_profile_explain[u]
-            unique_explanation = {}
-            for feature in self.features_methods:
-                if feature == "genre":
-                    genre_weights = {genre: user_explanation[genre] for genre in self.content_features.columns if genre in user_explanation}
-                    unique_explanation[feature] = genre_weights
-                else:
-                    unique_explanation[feature] = user_explanation[feature]
-            return unique_explanation
-        else:
-            return None
     
     def rmse(self, testset):
         """Compute RMSE on the testset"""
@@ -289,8 +277,7 @@ class ContentBased(AlgoBase):
 
 
 # Example usage:
-# cb = ContentBased(["title_length", "movie_year", "tags"], "svr_regression")
-# cb = ContentBased(["movie_year","Romance","avg_rating"], "random_forest")
+# cb = ContentBased(["title_length", "movie_year","genre","avg_rating"], "ridge_regression")
 # surprise_data = load_ratings(surprise_format=True)
 # trainset = surprise_data.build_full_trainset()
 # testset = trainset.build_anti_testset()
@@ -301,19 +288,17 @@ class ContentBased(AlgoBase):
 
 
 # # Example explanations for users:
-# print(cb.explain(11))
+# #print(cb.explain(11))
 
-# print(cb.explain(13))
+# #print(cb.explain(13))
 
-# # print(cb.explain(17))
-# print(cb.explain2(17))
-# print("-----\n")
+# print(cb.explain(17))
 
-# print(cb.explain(23))
+#print(cb.explain(23))
 
-# print(cb.explain(27))
+#print(cb.explain(27))
 
-# print(cb.explain(73))
+#print(cb.explain(73))
 
 
 
diff --git a/recommender.py b/recommender.py
index 0d687d75aeccb6d234ae34d68f0bbc770c869fef..0778db72658c9fcc31b65075449092178222285f 100644
--- a/recommender.py
+++ b/recommender.py
@@ -1,33 +1,31 @@
 # Standard library imports
-import numpy as np 
-import pandas as pd
-import requests
-from collections import defaultdict
 import heapq
 import pickle
 import random as rd
+from collections import defaultdict
 
 # Third-party imports
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.preprocessing import MultiLabelBinarizer
+import numpy as np
+import pandas as pd
+
+from scipy.stats import pearsonr
+from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
-from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, AdaBoostRegressor
-from sklearn.tree import DecisionTreeRegressor
+from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge
+from sklearn.metrics import mean_squared_error
+
 from sklearn.neighbors import KNeighborsRegressor
+from sklearn.preprocessing import MultiLabelBinarizer
 from sklearn.svm import SVR
-from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible, KNNBasic, Reader, Dataset, SVD
-from surprise.model_selection import train_test_split
-from surprise.similarities import cosine, msd
-import xgboost as xgb
+from sklearn.tree import DecisionTreeRegressor
+from surprise import AlgoBase, KNNWithMeans, accuracy, PredictionImpossible
 from xgboost import XGBRegressor
-from scipy.stats import pearsonr
-
+from lightgbm import LGBMRegressor
 
 # Local imports
-from loaders import load_items, load_ratings
 from constants import Constant as C
+from loaders import load_items, load_ratings
+
 
 #################################################################################################################
 ################################################# load the data #################################################
@@ -46,7 +44,7 @@ testset = trainset.build_anti_testset()
 
 
 class UserBased(AlgoBase):
-    def __init__(self, k=340, min_k=340, sim_options={}, **kwargs):
+    def __init__(self, k=20, min_k=20, sim_options={}, **kwargs):
         """
         Initialize the UserBased collaborative filtering algorithm.
 
@@ -172,17 +170,20 @@ class UserBased(AlgoBase):
 
         self.mean_ratings = mean_ratings
 
-    def get_top_10_pred_ub(self, testset, target_user):
+    def get_top_n_pred_ub(self, testset, target_user, n=10):
         """
-        Get the top 10 predictions for a specific target user.
+        Get the top N predictions for a specific target user.
 
         Args:
             testset (list): List of testset entries containing (user, item, rating).
             target_user (int): Target user for whom predictions are needed.
+            n (int): Number of predictions to return (default: 10).
 
+        Returns:
+            list: Top N predictions for the target user.
         """
-        self.min_k = 340
-        self.k = 340
+        self.min_k = 20
+        self.k = 20
 
         # Get the items the target user has already rated
         rated_items = set([item for item, rating in self.trainset.ur[self.trainset.to_inner_uid(target_user)]])
@@ -202,13 +203,43 @@ class UserBased(AlgoBase):
 
         # Sort the predictions by estimated rating in descending order
         user_based_predictions.sort(key=lambda x: x[1], reverse=True)
-        top_10_predictions_ub = user_based_predictions[:10]
+        top_n_predictions_ub = user_based_predictions[:n]
 
-        # Print the top 10 predictions for the target user
-        print(f"Top 10 predictions for user {target_user}:")
-        for movie_id, pred in top_10_predictions_ub:
+        # Print the top N predictions for the target user
+        print(f"Top {n} predictions for user {target_user}:")
+        for movie_id, pred in top_n_predictions_ub:
             print(f"MovieId {movie_id}: {pred}")
-        return top_10_predictions_ub
+        return top_n_predictions_ub
+    
+    def inter_user_diversity(self, top_n_recommendations):
+        """
+        Calculate the inter-user diversity (IUD) of the recommender system.
+
+        Args:
+            top_n_recommendations (dict): Dictionary containing top N recommendations for each user.
+
+        Returns:
+            float: Average pairwise Jaccard distance between recommendations to users.
+        """
+        jaccard_distances = []
+
+        # Convert top_n_recommendations to a list of sets for easier computation
+        recommendation_sets = [set([item_id for item_id, _ in recommendations]) for recommendations in top_n_recommendations.values()]
+
+        # Calculate Jaccard distance between all pairs of recommendation sets
+        for i in range(len(recommendation_sets)):
+            for j in range(i+1, len(recommendation_sets)):
+                union_size = len(recommendation_sets[i].union(recommendation_sets[j]))
+                intersection_size = len(recommendation_sets[i].intersection(recommendation_sets[j]))
+                jaccard_distances.append(1 - (intersection_size / union_size))
+
+        # Calculate the average pairwise Jaccard distance
+        if jaccard_distances:
+            average_distance = sum(jaccard_distances) / len(jaccard_distances)
+        else:
+            average_distance = 0.0
+
+        return average_distance
     
     def evaluate_rmse(self, testset):
         """
@@ -241,20 +272,25 @@ class UserBased(AlgoBase):
         Calculate catalog coverage based on the top N recommendations.
 
         Args:
-            top_n_recommendations (list): List of top N recommendations for each user.
+            top_n_recommendations (list or dict): List or dictionary containing top N recommendations for each user.
 
         Returns:
             float: Catalog coverage ratio.
         """
-        all_items = set()
         recommended_items = set()
-        for user_recommendations in top_n_recommendations.values():
-            for item_id, _ in user_recommendations:
-                all_items.add(item_id)
+        all_items = set(range(self.trainset.n_items))
+
+        if isinstance(top_n_recommendations, dict):
+            for user_recommendations in top_n_recommendations.values():
+                for item_id, _ in user_recommendations:
+                    recommended_items.add(item_id)
+        elif isinstance(top_n_recommendations, list):
+            for item_id, _ in top_n_recommendations:
                 recommended_items.add(item_id)
+
         coverage = len(recommended_items) / len(all_items)
         return coverage
-
+    
 
 ###########################################################################################################################
 ####################################################### KNN MODEL  ########################################################
@@ -292,11 +328,11 @@ class RecommenderSystem_KNN :
         sim_options = {
             'name': 'msd',        # Mean Squared Difference (Mean Square Error)
             'user_based': True,   # User-based collaborative filtering
-            'min_support': 340    # Minimum number of common ratings required
+            'min_support': 20    # Minimum number of common ratings required
         }
 
         # Build and train the KNN model
-        self.model = KNNWithMeans(sim_options=sim_options, k=340, min_k=340)
+        self.model = KNNWithMeans(sim_options=sim_options, k=20, min_k=20)
         self.model.fit(self.trainset)
 
         # Evaluate the model
@@ -345,22 +381,58 @@ class RecommenderSystem_KNN :
 
         return top_n.get(userid, [])
     
+
+    def inter_user_diversity(self, top_n_recommendations):
+        """
+        Calculate the inter-user diversity (IUD) of the recommender system.
+
+        Args:
+            top_n_recommendations (dict): Dictionary containing top N recommendations for each user.
+
+        Returns:
+            float: Average pairwise Jaccard distance between recommendations to users.
+        """
+        jaccard_distances = []
+
+        # Convert top_n_recommendations to a list of sets for easier computation
+        recommendation_sets = [set([item_id for item_id, _ in recommendations]) for recommendations in top_n_recommendations.values()]
+
+        # Calculate Jaccard distance between all pairs of recommendation sets
+        for i in range(len(recommendation_sets)):
+            for j in range(i+1, len(recommendation_sets)):
+                union_size = len(recommendation_sets[i].union(recommendation_sets[j]))
+                intersection_size = len(recommendation_sets[i].intersection(recommendation_sets[j]))
+                jaccard_distances.append(1 - (intersection_size / union_size))
+
+        # Calculate the average pairwise Jaccard distance
+        if jaccard_distances:
+            average_distance = sum(jaccard_distances) / len(jaccard_distances)
+        else:
+            average_distance = 0.0
+
+        return average_distance
+    
     def catalog_coverage(self, top_n_recommendations):
         """
         Calculate catalog coverage based on the top N recommendations.
 
         Args:
-            top_n_recommendations (defaultdict(list)): Dictionary containing top N recommendations for each user.
+            top_n_recommendations (list or dict): List or dictionary containing top N recommendations for each user.
 
         Returns:
             float: Catalog coverage ratio.
         """
-        all_items = set()
         recommended_items = set()
-        for user_recommendations in top_n_recommendations.values():
-            for item_id, _ in user_recommendations:
-                all_items.add(item_id)
+        all_items = set(range(self.trainset.n_items))
+
+        if isinstance(top_n_recommendations, dict):
+            for user_recommendations in top_n_recommendations.values():
+                for item_id, _ in user_recommendations:
+                    recommended_items.add(item_id)
+        elif isinstance(top_n_recommendations, list):
+            for item_id, _ in top_n_recommendations:
                 recommended_items.add(item_id)
+
         coverage = len(recommended_items) / len(all_items)
         return coverage
     
@@ -404,12 +476,16 @@ class OtherUserBased:
         data = pd.read_csv(csv_file)
         return data['movieId'].unique()
 
-    def get_top_10_predictions_for_user(self, csv_file):
+    def get_top_n_predictions_for_user(self, csv_file, n=10):
         """
-        Get the top 100 predictions for the user.
+        Get the top N predictions for all users.
 
         Args:
             csv_file (str): Path to the CSV file containing item data.
+            n (int): Number of predictions to return for each user (default: 10).
+
+        Returns:
+            dict: Dictionary containing top N predictions for each user.
         """
         if hasattr(self, 'model') and self.model is not None:
             all_item_ids = self.get_all_item_ids_from_csv(csv_file)
@@ -426,6 +502,7 @@ class OtherUserBased:
             print(f"Model for user {self.user_id} ({self.user_name}) could not be loaded.")
             return None
 
+
     def evaluate_rmse(self):
         """
         Evaluate the RMSE of the model on the test data.
@@ -468,6 +545,36 @@ class OtherUserBased:
             print(f"Model for user {self.user_id} ({self.user_name}) could not be loaded.")
             return None
     
+    def inter_user_diversity(self, top_n_recommendations):
+        """
+        Calculate the inter-user diversity (IUD) of the recommender system.
+
+        Args:
+            top_n_recommendations (dict): Dictionary containing top N recommendations for each user.
+
+        Returns:
+            float: Average pairwise Jaccard distance between recommendations to users.
+        """
+        jaccard_distances = []
+
+        # Convert top_n_recommendations to a list of sets for easier computation
+        recommendation_sets = [set([item_id for item_id, _ in recommendations]) for recommendations in top_n_recommendations.values()]
+
+        # Calculate Jaccard distance between all pairs of recommendation sets
+        for i in range(len(recommendation_sets)):
+            for j in range(i+1, len(recommendation_sets)):
+                union_size = len(recommendation_sets[i].union(recommendation_sets[j]))
+                intersection_size = len(recommendation_sets[i].intersection(recommendation_sets[j]))
+                jaccard_distances.append(1 - (intersection_size / union_size))
+
+        # Calculate the average pairwise Jaccard distance
+        if jaccard_distances:
+            average_distance = sum(jaccard_distances) / len(jaccard_distances)
+        else:
+            average_distance = 0.0
+
+        return average_distance
+    
     def catalog_coverage(self, top_n_predictions):
         """
         Calculate catalog coverage based on the top N predictions.
@@ -491,7 +598,7 @@ class OtherUserBased:
 ###########################################################################################################################
 
 class CustomUserBased(UserBased):
-    def __init__(self, k=340, min_k=340, sim_options={}, **kwargs):
+    def __init__(self, k=20, min_k=20, sim_options={}, **kwargs):
         """
         Initialize the CustomUserBased collaborative filtering algorithm.
 
@@ -633,73 +740,127 @@ def compare_similarity_measures(trainset,testset):
 
     return results
 
-# # Example usage:
+# # # Example usage:
 # comparison_results = compare_similarity_measures(trainset ,testset)
 # print(comparison_results)
 
 
 def evaluate_models(trainset, testset, ratings_path, user_name, user_id):
     # Entraînement et évaluation du modèle UserBased
-    user_based_model = UserBased(k=340, min_k=340)
+    user_based_model = UserBased(k=20, min_k=20)
     user_based_model.fit(trainset)
-    top_n_recommendations_ub = user_based_model.get_top_10_pred_ub(testset, user_id)
-    diversity_ub = user_based_model.catalog_coverage(top_n_recommendations_ub)
+    top_n_predictions_ub = user_based_model.get_top_n_pred_ub(testset, user_id, n=5000)
+    diversity_ub = user_based_model.catalog_coverage(top_n_predictions_ub)
     print("Diversity for UserBased model:", diversity_ub)
 
     # Entraînement et évaluation du modèle KNN
     knn_model = RecommenderSystem_KNN(ratings_path)
     knn_model.train_knn_model()
-    top_n_recommendations_knn = knn_model.get_top_n_recommendations(userid=user_id, n=10)
-    diversity_knn = knn_model.catalog_coverage(top_n_recommendations_knn)
+    all_predictions_knn = knn_model.get_top_n_recommendations(userid=user_id, n=5000)  # Modifiez 100 selon vos besoins
+    diversity_knn = knn_model.catalog_coverage(all_predictions_knn)
     print("Diversity for KNN model:", diversity_knn)
 
     # Entraînement et évaluation du modèle OtherUserBased
     other_user_based_model = OtherUserBased(user_name, user_id)
     other_user_based_model.load_model()
-    top_n_predictions_other = other_user_based_model.get_top_10_predictions_for_user(ratings_path)
-    diversity_other = other_user_based_model.catalog_coverage(top_n_predictions_other)
+    top_n_predictions = other_user_based_model.get_top_n_predictions_for_user(ratings_path, n=10)
+    diversity_other = other_user_based_model.catalog_coverage(top_n_predictions)
     print("Diversity for OtherUserBased model:", diversity_other)
 
-# Utilisation de la fonction
+# # Utilisation de la fonction
+# evaluate_models(trainset, testset, "data/small/evidence/ratings.csv", "Adrien", -1)
+
+def evaluate_inter_user_diversity(user_based_model, ratings_path, other_user_based, trainset, testset):
+    """
+    Evaluate the inter-user diversity of different recommender models.
+
+    Args:
+        user_based_model (UserBased): Instance of the UserBased model.
+        ratings_path (str): Path to the ratings data.
+        other_user_based (OtherUserBased): Instance of the OtherUserBased model.
+        trainset (Trainset): Training dataset containing user-item ratings.
+        testset (list): List of testset entries containing (user, item, rating).
+
+    Returns:
+        dict: Dictionary containing inter-user diversity scores for each model.
+    """
+    inter_user_diversity_scores = {}
+
+    # UserBased model
+    user_based_model.fit(trainset)
+    all_top_n_recommendations_ub = {}
+    for user_id in range(user_based_model.trainset.n_users):
+        try:
+            trainset_user_id = user_based_model.trainset.to_raw_uid(user_id)
+            top_n_recommendations_ub = user_based_model.get_top_n_pred_ub(testset, target_user=trainset_user_id, n=10)
+            all_top_n_recommendations_ub[trainset_user_id] = top_n_recommendations_ub
+        except ValueError:
+            print(f"User {trainset_user_id} is not part of the training set for UserBased model. Skipping...")
+
+    inter_user_diversity_scores['UserBased'] = user_based_model.inter_user_diversity(all_top_n_recommendations_ub)
+
+    #KNN model
+    knn_model = RecommenderSystem_KNN(ratings_path)
+    knn_model.train_knn_model()
+    knn_top_n_recommendations = knn_model.get_top_n_recommendations(testset, n=10)
+    inter_user_diversity_scores['KNN'] = knn_model.inter_user_diversity(knn_top_n_recommendations)
+
+    # OtherUserBased model
+    other_user_based.load_model()
+    other_top_n_recommendations = other_user_based.get_top_n_predictions_for_user("data/small/evidence/ratings.csv", n=10)
+    inter_user_diversity_scores['OtherUserBased'] = other_user_based.inter_user_diversity(other_top_n_recommendations)
+
+    return inter_user_diversity_scores
+
+
+# # Example usage:
+# user_based_model = UserBased(k=40, min_k=40)
+# ratings = "data/small/evidence/ratings.csv"
+# other_user_based = OtherUserBased("Adrien", -1)
+# other_user_based_2 = OtherUserBased("Audrey", -2)
+# other_user_based_3 = OtherUserBased("Nathanael", -3)
+# other_user_based_4 = OtherUserBased("Charles", -4)
+
+# inter_user_diversity_scores = evaluate_inter_user_diversity(user_based_model, ratings, other_user_based, trainset, testset)
+# print("Inter-user Diversity Scores:")
+# for model_name, score in inter_user_diversity_scores.items():
+#     print(f"{model_name}: {score}")
+
 
-evaluate_models(trainset, testset, "data/small/evidence/ratings.csv", "Adrien", -1)
 
 
 ###########################################################################################################################
 ###################################################### CONTENT-BASED MODEL ################################################
 ###########################################################################################################################
 
-def get_top_n(predictions, n):
-    """Return the top-N recommendation for each user from a set of predictions.
-    Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
-    and modified by cvandekerckh for random tie breaking
 
+def get_top_n(predictions, user_id, n=10):
+    """
+    Return the top-N recommendation for a specific user from a set of predictions.
     Args:
-        predictions(list of Prediction objects): The list of predictions, as
-            returned by the test method of an algorithm.
-        n(int): The number of recommendation to output for each user. Default
-            is 10.
+        predictions(list of Prediction objects): The list of predictions, as returned by the test method of an algorithm.
+        user_id(str): The user ID for which to return recommendations.
+        n(int): The number of recommendations to output. Default is 10.
     Returns:
-    A dict where keys are user (raw) ids and values are lists of tuples:
-        [(raw item id, rating estimation), ...] of size n.
+    A list of tuples: [(raw item id, rating estimation), ...] of size n.
     """
 
     rd.seed(0)
 
-    # First map the predictions to each user.
-    top_n = defaultdict(list)
+    # First map the predictions to the specified user.
+    user_ratings = []
     for uid, iid, true_r, est, _ in predictions:
-        top_n[uid].append((iid, est))
+        if uid == user_id:
+            user_ratings.append((iid, est))
 
-    # Then sort the predictions for each user and retrieve the k highest ones.
-    for uid, user_ratings in top_n.items():
-        rd.shuffle(user_ratings)
-        user_ratings.sort(key=lambda x: x[1], reverse=True)
-        top_n[uid] = user_ratings[:n]
+    # Then sort the predictions for the user and retrieve the k highest ones.
+    rd.shuffle(user_ratings)
+    user_ratings.sort(key=lambda x: x[1], reverse=True)
+    top_n = user_ratings[:n]
 
     return top_n
 
-
+# Define your ContentBased class
 class ContentBased(AlgoBase):
     def __init__(self, features_method, regressor_method):
         AlgoBase.__init__(self)
@@ -713,13 +874,11 @@ class ContentBased(AlgoBase):
         """Content Analyzer"""
         df_items = load_items()
         df_ratings = load_ratings()
-        df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
-
         df_features = pd.DataFrame(index=df_items.index)
 
         for method in features_methods:
             if method == "title_length":
-                df_title_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('title_length')
+                df_title_length = df_items['title'].apply(lambda x: len(x)).to_frame('title_length')
                 df_features = pd.concat([df_features, df_title_length], axis=1)
             
             elif method == "movie_year":
@@ -739,35 +898,27 @@ class ContentBased(AlgoBase):
             else:
                 raise NotImplementedError(f'Feature method {method} not yet implemented')
 
-        # Handle missing values in df_features
         df_features.fillna(0, inplace=True)
-
         return df_features
 
     def fit(self, trainset):
         """Profile Learner"""
         AlgoBase.fit(self, trainset)
-
-        # Preallocate user profiles
         self.user_profile = {u: None for u in trainset.all_users()}
         self.user_profile_explain = {}
-
-        epsilon = 1e-10  # Small value to prevent division by zero
+        epsilon = 1e-10
 
         for u in trainset.all_users():
             raw_user_id = trainset.to_raw_uid(u)
             self.user_profile_explain[raw_user_id] = {}
-
             user_ratings = np.array([rating for (_, rating) in trainset.ur[u]])
             item_ids = [iid for (iid, _) in trainset.ur[u]]
             raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]
-
             feature_values = self.content_features.loc[raw_item_ids].values
             norms = np.linalg.norm(feature_values, axis=0) + epsilon
             weighted_features = feature_values / norms
             feature_importance = weighted_features.T @ user_ratings
             feature_importance /= np.sum(user_ratings)
-
             self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))
 
         if self.regressor_method == 'random_score':
@@ -778,19 +929,21 @@ class ContentBased(AlgoBase):
             for u in self.user_profile:
                 self.user_profile[u] = [rating for (_, rating) in trainset.ur[u]]
 
+
         else:
             regressor_models = {
-                'linear_regression': LinearRegression(fit_intercept=True),  # Fit intercept might help
-                'svr_regression': SVR(kernel='rbf', C=1.0, epsilon=0.1),  # Adjusted C and epsilon for better performance
-                'gradient_boosting': GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=4),  # More estimators and smaller learning rate
-                'random_forest': RandomForestRegressor(n_estimators=200, max_depth=10, min_samples_split=5),  # More estimators and added max_depth and min_samples_split
-                'lasso_regression': Lasso(alpha=0.01),  # Lower alpha for less regularization
-                'ridge_regression': Ridge(alpha=0.5),  # Lower alpha for less regularization
-                'elastic_net': ElasticNet(alpha=0.5, l1_ratio=0.7),  # Adjusted l1_ratio for better balance
-                'knn_regression': KNeighborsRegressor(n_neighbors=5),  # Increased neighbors for better smoothing
-                'decision_tree': DecisionTreeRegressor(max_depth=10, min_samples_split=4),  # Increased max_depth and added min_samples_split
-                'adaboost': AdaBoostRegressor(n_estimators=100, learning_rate=0.1),  # More estimators and added learning rate
-                'xgboost': XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=4)  # More estimators and smaller learning rate
+                'linear_regression': LinearRegression(fit_intercept=False),
+                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),
+                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
+                'random_forest': RandomForestRegressor(n_estimators=100),
+                'lasso_regression': Lasso(alpha=0.1),
+                'ridge_regression': Ridge(alpha=1.0),
+                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),
+                'knn_regression': KNeighborsRegressor(n_neighbors=1),
+                'decision_tree': DecisionTreeRegressor(max_depth=5),
+                'adaboost': AdaBoostRegressor(n_estimators=50),
+                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
+                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
             }
 
             if self.regressor_method not in regressor_models:
@@ -800,20 +953,15 @@ class ContentBased(AlgoBase):
                 user_ratings = [rating for (_, rating) in trainset.ur[u]]
                 item_ids = [iid for (iid, _) in trainset.ur[u]]
                 raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]
-
                 df_user = pd.DataFrame({'item_id': raw_item_ids, 'user_ratings': user_ratings})
                 df_user = df_user.merge(self.content_features, left_on="item_id", right_index=True, how='left')
-
                 X = df_user.drop(columns=['item_id', 'user_ratings'])
                 y = df_user['user_ratings']
-
                 regressor = regressor_models[self.regressor_method]
                 regressor.fit(X, y)
-
                 self.user_profile[u] = regressor
 
     def estimate(self, u, i):
-        """Scoring component used for item filtering"""
         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
             raise PredictionImpossible('User and/or item is unknown.')
 
@@ -829,26 +977,6 @@ class ContentBased(AlgoBase):
             regressor = self.user_profile[u]
             item_features_df = pd.DataFrame(item_features, columns=self.content_features.columns)
             return regressor.predict(item_features_df)[0]
-
-    def explain(self, u):
-        if u in self.user_profile_explain:
-            return self.user_profile_explain[u]
-        else:
-            return None
-
-    def explain2(self, u):
-        if u in self.user_profile_explain:
-            user_explanation = self.user_profile_explain[u]
-            unique_explanation = {}
-            for feature in self.features_methods:
-                if feature == "genre":
-                    genre_weights = {genre: user_explanation[genre] for genre in self.content_features.columns if genre in user_explanation}
-                    unique_explanation[feature] = genre_weights
-                else:
-                    unique_explanation[feature] = user_explanation[feature]
-            return unique_explanation
-        else:
-            return None
     
     def rmse(self, testset):
         """Compute RMSE on the testset"""
@@ -867,13 +995,48 @@ class ContentBased(AlgoBase):
         rmse_value = np.sqrt(mse)
         return rmse_value
 
+    def explain(self, u):
+        if u in self.user_profile_explain:
+            return self.user_profile_explain[u]
+        else:
+            return None
 
 
+def test_contentbased_class(feature_method, regressor_method, user_id=-1, n=10):
+    """Test the ContentBased class and get top N recommendations."""
+    sp_ratings = load_ratings(surprise_format=True)
+    train_set = sp_ratings.build_full_trainset()
+    content_algo = ContentBased(feature_method, regressor_method)
+    content_algo.fit(train_set)
+    
+    anti_test_set = train_set.build_anti_testset()
+    user_anti_test_set = [entry for entry in anti_test_set if entry[0] == user_id]
+    
+    predictions = []
+    for uid, iid, _ in user_anti_test_set:
+        prediction = content_algo.predict(uid, iid)
+        predictions.append(prediction)
 
+    # Get the top-N recommendations for each user
+    top_n_recommendations = get_top_n(predictions, user_id= user_id, n=n)
 
+    # Print the top-N recommendations
+    print(f"Top {n} recommendations for User {user_id}:")
+    for iid, est in top_n_recommendations:
+        print(f"Item {iid}: {est:.2f}")
+    return top_n_recommendations
 
+# Example usage
+#test_contentbased_class(["title_length", "movie_year", "genre"], "gradient_boosting", user_id=-1, n=10)
 
+cb = ContentBased(["title_length", "movie_year","genre","avg_rating"], "ridge_regression")
 
+surprise_data = load_ratings(surprise_format=True)
+trainset = surprise_data.build_full_trainset()
+testset = trainset.build_anti_testset()
+cb.fit(trainset)
+
+print("RMSE: ", cb.rmse(testset))
 
 
 ###########################################################################################################################
@@ -979,9 +1142,11 @@ class LatentFactorModel:
 
 
 
-# # Example usage:
+# Example usage:
+
+
 # # Load the data
-# ratings = pd.read_csv('data/small/evidence/ratings.csv')  # Make sure your CSV has columns 'userId', 'movieId', 'rating'
+# ratings = pd.read_csv('data/small/evidence/ratings.csv') 
 # # Charger les données des films
 # movies = pd.read_csv('data/small/content/movies.csv')
 
@@ -994,7 +1159,7 @@ class LatentFactorModel:
 
 # # Predict a rating for a specific user and movie
 # user_id = -1
-# movie_id = 4306
+# movie_id = 5218
 # predicted_rating = lfm.predict(user_id, movie_id)
 # print(f"Predicted rating for user {user_id} and movie {movie_id}: {predicted_rating}")