From 6456e17aad30d5649d8968a91b69b06f9484e7d3 Mon Sep 17 00:00:00 2001
From: Nathanael <nathanael.kindidi@student.uclouvain.be>
Date: Thu, 23 May 2024 16:35:17 +0200
Subject: [PATCH] modif class content based

---
 content_based.ipynb | 896 ++++++--------------------------------------
 1 file changed, 121 insertions(+), 775 deletions(-)

diff --git a/content_based.ipynb b/content_based.ipynb
index 24950d71..e2c88030 100644
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 756,
+   "execution_count": 15,
    "id": "277473a3",
    "metadata": {},
    "outputs": [
@@ -53,89 +53,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 757,
+   "execution_count": 16,
    "id": "e8378976",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>n_character_title</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>movieId</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>4993</th>\n",
-       "      <td>57</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5952</th>\n",
-       "      <td>45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>527</th>\n",
-       "      <td>23</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2028</th>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4308</th>\n",
-       "      <td>19</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         n_character_title\n",
-       "movieId                   \n",
-       "4993                    57\n",
-       "5952                    45\n",
-       "527                     23\n",
-       "2028                    26\n",
-       "4308                    19"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0         long\n",
-       "1       boring\n",
-       "2         long\n",
-       "3      romance\n",
-       "4    stupidity\n",
-       "Name: tag, dtype: object"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'data/test/content/movies.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[16], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# All the dataframes\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df_items \u001b[38;5;241m=\u001b[39m \u001b[43mload_items\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m df_ratings \u001b[38;5;241m=\u001b[39m load_ratings()\n\u001b[1;32m      4\u001b[0m df_tag \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(C\u001b[38;5;241m.\u001b[39mCONTENT_PATH\u001b[38;5;241m/\u001b[39mC\u001b[38;5;241m.\u001b[39mTAGS_FILENAME)\n",
+      "File \u001b[0;32m~/Desktop/Université/Recommender Systems/recomsys/loaders.py:34\u001b[0m, in \u001b[0;36mload_items\u001b[0;34m()\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_items\u001b[39m():\n\u001b[1;32m     29\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Loads items data.\u001b[39;00m\n\u001b[1;32m     30\u001b[0m \n\u001b[1;32m     31\u001b[0m \u001b[38;5;124;03m    Returns:\u001b[39;00m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;124;03m        DataFrame: Items data.\u001b[39;00m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m     df_items \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mC\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCONTENT_PATH\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mC\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mITEMS_FILENAME\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# ce qui se trouve dans le movie csv\u001b[39;00m\n\u001b[1;32m     35\u001b[0m     df_items \u001b[38;5;241m=\u001b[39m df_items\u001b[38;5;241m.\u001b[39mset_index(C\u001b[38;5;241m.\u001b[39mITEM_ID_COL) \u001b[38;5;66;03m# movie id\u001b[39;00m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m df_items\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m   1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m   1014\u001b[0m     dialect,\n\u001b[1;32m   1015\u001b[0m     delimiter,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1022\u001b[0m     dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m   1023\u001b[0m )\n\u001b[1;32m   1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m    619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m    623\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m   1617\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m   1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m   1878\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m   1879\u001b[0m         mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1881\u001b[0m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1882\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1883\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1884\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1885\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1886\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1887\u001b[0m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1888\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    869\u001b[0m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m    870\u001b[0m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m    871\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m    872\u001b[0m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    877\u001b[0m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m    882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/test/content/movies.csv'"
+     ]
     }
    ],
    "source": [
@@ -169,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 758,
+   "execution_count": null,
    "id": "16b0a602",
    "metadata": {},
    "outputs": [
@@ -193,697 +130,171 @@
     }
    ],
    "source": [
+    "\n",
+    "# ContetnBased\n",
     "class ContentBased(AlgoBase):\n",
     "    def __init__(self, features_method, regressor_method):\n",
     "        AlgoBase.__init__(self)\n",
     "        self.regressor_method = regressor_method\n",
+    "        self.features_methods = features_method\n",
     "        self.content_features = self.create_content_features(features_method)\n",
+    "        self.user_profile = {}\n",
     "        self.user_profile_explain = {}\n",
     "\n",
-    "    def create_content_features(self, features_method):\n",
+    "    def create_content_features(self, features_methods):\n",
     "        \"\"\"Content Analyzer\"\"\"\n",
     "        df_items = load_items()\n",
     "        df_ratings = load_ratings()\n",
-    "        df_tag = df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+    "        df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
     "        df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
     "        df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
     "\n",
-    "        if features_method is None:\n",
-    "            df_features = None\n",
-    "\n",
-    "        elif features_method == \"relevance\" :\n",
-    "            df_features = df_genome_score.groupby('movieId')[\"relevance\"].transform('mean').to_frame('avg_relevance')\n",
+    "        df_features = pd.DataFrame(index=df_items.index)\n",
     "\n",
-    "        elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
-    "            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "        for method in features_methods:\n",
+    "            if method == \"title_length\":\n",
+    "                df_title_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('title_length')\n",
+    "                df_features = pd.concat([df_features, df_title_length], axis=1)\n",
     "            \n",
-    "        elif features_method == \"movie_year\" :\n",
-    "            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "\n",
-    "        elif features_method == \"genres\" :\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
-    "        \n",
-    "        elif features_method == \"combination\": \n",
-    "            df_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
-    "            df_movie = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_genre = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
-    "        \n",
-    "            df_features = pd.concat([df_genre, df_length, df_movie], axis=1)\n",
-    "        \n",
-    "        elif features_method == \"rating\" :\n",
-    "            df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')\n",
-    "\n",
-    "        elif features_method == \"tags\" :\n",
-    "            df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')\n",
-    "\n",
-    "        elif features_method == \"tags_length\" :\n",
+    "            elif method == \"movie_year\":\n",
+    "                df_movie_year = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
+    "                df_features = pd.concat([df_features, df_movie_year.astype(float).fillna(0)], axis=1)\n",
     "            \n",
-    "            df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')\n",
+    "            elif method == \"genre\":\n",
+    "                tfidf_vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split('|'), token_pattern=None)\n",
+    "                tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
+    "                df_tfidf_genres = pd.DataFrame(tfidf_matrix.toarray(), index=df_items.index, columns=tfidf_vectorizer.get_feature_names_out())\n",
+    "                df_features = pd.concat([df_features, df_tfidf_genres], axis=1)\n",
     "\n",
+    "            elif method == \"avg_rating\":\n",
+    "                df_avg_rating = df_ratings.groupby('movieId')['rating'].mean().to_frame('avg_rating')\n",
+    "                df_features = df_features.join(df_avg_rating, on='movieId')\n",
+    "\n",
+    "            else:\n",
+    "                raise NotImplementedError(f'Feature method {method} not yet implemented')\n",
+    "\n",
+    "        # Handle missing values in df_features\n",
+    "        df_features.fillna(0, inplace=True)\n",
     "\n",
-    "        else: # (implement other feature creations here)\n",
-    "            raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
     "        return df_features\n",
-    "    \n",
     "\n",
     "    def fit(self, trainset):\n",
     "        \"\"\"Profile Learner\"\"\"\n",
     "        AlgoBase.fit(self, trainset)\n",
-    "        \n",
+    "\n",
     "        # Preallocate user profiles\n",
     "        self.user_profile = {u: None for u in trainset.all_users()}\n",
+    "        self.user_profile_explain = {}\n",
     "\n",
-    "        self.user_profile_explain = {u: {} for u in trainset.all_users()}\n",
-    "\n",
-    "        for u in self.user_profile_explain :\n",
-    "            print(u)\n",
-    "            user_ratings = np.array([rating for _, rating in trainset.ur[u]])\n",
-    "\n",
-    "            feature_values = self.content_features.values\n",
-    "\n",
-    "            fv = feature_values.astype(int)\n",
-    "\n",
-    "            weighted_features = fv/np.linalg.norm(fv)\n",
-    "\n",
-    "            feature_importance = weighted_features / np.sum(user_ratings)\n",
+    "        epsilon = 1e-10  # Small value to prevent division by zero\n",
     "\n",
-    "            self.user_profile_explain[u] = dict(zip(self.content_features.columns, feature_importance))\n",
-    "        \n",
+    "        for u in trainset.all_users():\n",
+    "            raw_user_id = trainset.to_raw_uid(u)\n",
+    "            self.user_profile_explain[raw_user_id] = {}\n",
     "\n",
+    "            user_ratings = np.array([rating for (_, rating) in trainset.ur[u]])\n",
+    "            item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "            raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
     "\n",
+    "            feature_values = self.content_features.loc[raw_item_ids].values\n",
+    "            norms = np.linalg.norm(feature_values, axis=0) + epsilon\n",
+    "            weighted_features = feature_values / norms\n",
+    "            feature_importance = weighted_features.T @ user_ratings\n",
+    "            feature_importance /= np.sum(user_ratings)\n",
     "\n",
+    "            self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))\n",
     "\n",
     "        if self.regressor_method == 'random_score':\n",
-    "            for u in self.user_profile :\n",
-    "                self.user_profile[u] = rd.uniform(0.5,5)\n",
-    "            \n",
-    "        elif self.regressor_method == 'random_sample':\n",
     "            for u in self.user_profile:\n",
-    "                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
+    "                self.user_profile[u] = rd.uniform(0.5, 5)\n",
     "\n",
-    "        elif self.regressor_method == 'linear_regression' :\n",
+    "        elif self.regressor_method == 'random_sample':\n",
     "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = [rating for (_, rating) in trainset.ur[u]]\n",
+    "\n",
+    "        else:\n",
+    "            regressor_models = {\n",
+    "                'linear_regression': LinearRegression(fit_intercept=False),\n",
+    "                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),\n",
+    "                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'random_forest': RandomForestRegressor(n_estimators=100),\n",
+    "                'lasso_regression': Lasso(alpha=0.1),\n",
+    "                'ridge_regression': Ridge(alpha=1.0),\n",
+    "                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),\n",
+    "                'knn_regression': KNeighborsRegressor(n_neighbors=1),\n",
+    "                'decision_tree': DecisionTreeRegressor(max_depth=5),\n",
+    "                'adaboost': AdaBoostRegressor(n_estimators=50),\n",
+    "                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
+    "            }\n",
+    "\n",
+    "            if self.regressor_method not in regressor_models:\n",
+    "                raise NotImplementedError(f'Regressor method {self.regressor_method} not yet implemented')\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "                \n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "\n",
-    "                linear_regressor = LinearRegression(fit_intercept = False)\n",
-    "\n",
-    "                linear_regressor.fit(X,y)\n",
-    "                \n",
-    "                # Store the computed user profile\n",
-    "                self.user_profile[u] = linear_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'svr_regression':\n",
     "            for u in self.user_profile:\n",
+    "                user_ratings = [rating for (_, rating) in trainset.ur[u]]\n",
+    "                item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "                raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "                \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n",
-    "                svr_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = svr_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
-    "            for u in self.user_profile:\n",
+    "                df_user = pd.DataFrame({'item_id': raw_item_ids, 'user_ratings': user_ratings})\n",
+    "                df_user = df_user.merge(self.content_features, left_on=\"item_id\", right_index=True, how='left')\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "            \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
-    "                gb_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = gb_regressor\n",
+    "                X = df_user.drop(columns=['item_id', 'user_ratings'])\n",
+    "                y = df_user['user_ratings']\n",
     "\n",
+    "                regressor = regressor_models[self.regressor_method]\n",
+    "                regressor.fit(X, y)\n",
     "\n",
-    "        elif self.regressor_method == 'random_forest':\n",
-    "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = regressor\n",
     "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "                rf_regressor = RandomForestRegressor(n_estimators=100)\n",
-    "                rf_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = rf_regressor\n",
-    "\n",
-    "        else : \n",
-    "            pass\n",
-    "\n",
-    "            # (implement here the regressor fitting)  \n",
-    "        \n",
     "    def estimate(self, u, i):\n",
     "        \"\"\"Scoring component used for item filtering\"\"\"\n",
-    "        # First, handle cases for unknown users and items\n",
     "        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):\n",
-    "            raise PredictionImpossible('User and/or item is unkown.')\n",
-    "\n",
+    "            raise PredictionImpossible('User and/or item is unknown.')\n",
     "\n",
     "        if self.regressor_method == 'random_score':\n",
-    "            rd.seed()\n",
-    "            score = rd.uniform(0.5,5)\n",
+    "            return rd.uniform(0.5, 5)\n",
     "\n",
     "        elif self.regressor_method == 'random_sample':\n",
-    "            rd.seed()\n",
-    "            score = rd.choice(self.user_profile[u])\n",
-    "        \n",
-    "        elif self.regressor_method == 'linear_regression':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            linear_regressor = self.user_profile[u]\n",
-    "\n",
-    "            score= linear_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'svr_regression':\n",
+    "            return rd.choice(self.user_profile[u])\n",
     "\n",
+    "        else:\n",
     "            raw_item_id = self.trainset.to_raw_iid(i)\n",
+    "            item_features = self.content_features.loc[raw_item_id, :].values.reshape(1, -1)\n",
+    "            regressor = self.user_profile[u]\n",
+    "            item_features_df = pd.DataFrame(item_features, columns=self.content_features.columns)\n",
+    "            return regressor.predict(item_features_df)[0]\n",
     "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            svr_regressor = self.user_profile[u]\n",
-    "            score = svr_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            gradient_boosting = self.user_profile[u]\n",
-    "            score = gradient_boosting.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'random_forest':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            randomforest = self.user_profile[u]\n",
-    "            score = randomforest.predict(item_features)[0]\n",
-    "        \n",
-    "        else : \n",
-    "            score = None\n",
-    "\n",
-    "            # (implement here the regressor prediction)\n",
-    "\n",
-    "        return score\n",
-    "\n",
-    "    def explain(self, u) :        \n",
-    "        if u in self.user_profile_explain :\n",
+    "    def explain(self, u):\n",
+    "        if u in self.user_profile_explain:\n",
     "            return self.user_profile_explain[u]\n",
-    "        else :\n",
+    "        else:\n",
     "            return None\n",
     "\n",
     "\n",
-    "cb = ContentBased(\"title_length\", \"random_sample\")\n",
-    "sp_ratings = load_ratings(surprise_format=True)\n",
-    "train_set = sp_ratings.build_full_trainset()\n",
-    "print(cb.fit(train_set))\n",
+    "#Example usage:\n",
+    "cb = ContentBased([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")\n",
+    "surprise_data = load_ratings(surprise_format=True)\n",
+    "trainset = surprise_data.build_full_trainset()\n",
+    "testset = trainset.build_anti_testset()\n",
+    "cb.fit(trainset)\n",
     "\n",
-    "print(cb.explain(0))\n",
     "\n",
-    "print(cb.explain(1))\n",
+    "#print(\"RMSE: \", cb.rmse(testset))\n",
     "\n",
-    "print(cb.explain(2))\n",
     "\n",
-    "print(cb.explain(3))\n",
+    "#Example explanations for users:\n",
+    "print(cb.explain(11))\n",
     "\n",
-    "print(cb.explain(4))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 759,
-   "id": "baab88b7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Matrice TF-IDF des genres :\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>action</th>\n",
-       "      <th>adventure</th>\n",
-       "      <th>animation</th>\n",
-       "      <th>children</th>\n",
-       "      <th>comedy</th>\n",
-       "      <th>drama</th>\n",
-       "      <th>fantasy</th>\n",
-       "      <th>fi</th>\n",
-       "      <th>horror</th>\n",
-       "      <th>imax</th>\n",
-       "      <th>musical</th>\n",
-       "      <th>mystery</th>\n",
-       "      <th>romance</th>\n",
-       "      <th>sci</th>\n",
-       "      <th>war</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.658454</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.752621</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.658454</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.752621</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.572658</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.819795</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.694164</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.412209</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.590102</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.465343</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.582818</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.666168</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.572658</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.819795</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.647689</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.761905</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.606043</td>\n",
-       "      <td>0.515192</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.606043</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.563507</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.662879</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.493002</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.363703</td>\n",
-       "      <td>0.415716</td>\n",
-       "      <td>0.489026</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.290394</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.489026</td>\n",
-       "      <td>0.363703</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     action  adventure  animation  children    comedy     drama   fantasy  \\\n",
-       "0  0.000000   0.658454   0.000000  0.000000  0.000000  0.000000  0.752621   \n",
-       "1  0.000000   0.658454   0.000000  0.000000  0.000000  0.000000  0.752621   \n",
-       "2  0.000000   0.000000   0.000000  0.000000  0.000000  0.572658  0.000000   \n",
-       "3  0.694164   0.000000   0.000000  0.000000  0.000000  0.412209  0.000000   \n",
-       "4  0.000000   0.000000   0.000000  0.000000  0.000000  0.465343  0.000000   \n",
-       "5  0.000000   0.000000   0.000000  0.000000  0.000000  0.572658  0.000000   \n",
-       "6  0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "7  0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "8  0.000000   0.000000   0.563507  0.000000  0.662879  0.000000  0.000000   \n",
-       "9  0.000000   0.363703   0.415716  0.489026  0.000000  0.290394  0.000000   \n",
-       "\n",
-       "         fi    horror      imax   musical   mystery   romance       sci  \\\n",
-       "0  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "1  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "2  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "3  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "4  0.000000  0.000000  0.000000  0.582818  0.000000  0.666168  0.000000   \n",
-       "5  0.000000  0.000000  0.000000  0.000000  0.000000  0.819795  0.000000   \n",
-       "6  0.000000  0.647689  0.000000  0.000000  0.761905  0.000000  0.000000   \n",
-       "7  0.606043  0.515192  0.000000  0.000000  0.000000  0.000000  0.606043   \n",
-       "8  0.000000  0.000000  0.000000  0.493002  0.000000  0.000000  0.000000   \n",
-       "9  0.000000  0.000000  0.489026  0.363703  0.000000  0.000000  0.000000   \n",
-       "\n",
-       "        war  \n",
-       "0  0.000000  \n",
-       "1  0.000000  \n",
-       "2  0.819795  \n",
-       "3  0.590102  \n",
-       "4  0.000000  \n",
-       "5  0.000000  \n",
-       "6  0.000000  \n",
-       "7  0.000000  \n",
-       "8  0.000000  \n",
-       "9  0.000000  "
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from pprint import pprint\n",
+    "print(cb.explain(13))\n",
     "\n",
-    "# Créer une instance de TfidfVectorizer pour les genres\n",
-    "tfidf_vectorizer = TfidfVectorizer()\n",
+    "print(cb.explain(17))\n",
     "\n",
-    "# Fit et transform pour calculer la matrice TF-IDF des genres\n",
-    "tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
+    "print(cb.explain(23))\n",
     "\n",
-    "# Obtenir les noms des genres (features)\n",
-    "genre_names = tfidf_vectorizer.get_feature_names_out()\n",
+    "print(cb.explain(27))\n",
     "\n",
-    "# Créer un DataFrame à partir de la matrice TF-IDF des genres\n",
-    "df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=genre_names)\n",
-    "\n",
-    "print(\"Matrice TF-IDF des genres :\")\n",
-    "display(df_tfidf)"
+    "print(cb.explain(73))\n"
    ]
   },
   {
@@ -896,7 +307,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 760,
+   "execution_count": null,
    "id": "69d12f7d",
    "metadata": {},
    "outputs": [],
@@ -913,72 +324,7 @@
     "    prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])\n",
     "    print(prediction)\n",
     "\n",
-    "\n",
-    "\n",
-    "# print(\"title_length :\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"movie_year : \")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"relevance : \") \n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"genres : \") \n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"rating : \")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_sample\")\n",
-    "# # test_contentbased_class(feature_method= \"rating\", regressor_method=\"linear_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"svr_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"gradient_boosting\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags : \")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_sample\")\n",
-    "# #test_contentbased_class(feature_method=\"tags\", regressor_method=\"linear_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"svr_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"gradient_boosting\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags_length : \")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_forest\")\n",
-    "\n",
-    "# print(\"\\n\")\n",
-    "# print(\"combination : \")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_forest\")\n"
+    "test_contentbased_class([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")"
    ]
   }
  ],
@@ -998,7 +344,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.2"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,
-- 
GitLab