modif class content based

6456e17a · Nathanaël Kindidi · d549c086 · 6456e17a
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,7 +10,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 756,
+   "execution_count": 15,
   "id": "277473a3",
   "metadata": {},
   "outputs": [
@@ -53,89 +53,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 757,
+   "execution_count": 16,
   "id": "e8378976",
   "metadata": {},
   "outputs": [
    {
-     "data": {
+     "ename": "FileNotFoundError",
-      "text/html": [
+     "evalue": "[Errno 2] No such file or directory: 'data/test/content/movies.csv'",
-       "<div>\n",
+     "output_type": "error",
-       "<style scoped>\n",
+     "traceback": [
-       "    .dataframe tbody tr th:only-of-type {\n",
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-       "        vertical-align: middle;\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
-       "    }\n",
+      "Cell \u001b[0;32mIn[16], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# All the dataframes\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df_items \u001b[38;5;241m=\u001b[39m \u001b[43mload_items\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m df_ratings \u001b[38;5;241m=\u001b[39m load_ratings()\n\u001b[1;32m      4\u001b[0m df_tag \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(C\u001b[38;5;241m.\u001b[39mCONTENT_PATH\u001b[38;5;241m/\u001b[39mC\u001b[38;5;241m.\u001b[39mTAGS_FILENAME)\n",
-       "\n",
+      "File \u001b[0;32m~/Desktop/Université/Recommender Systems/recomsys/loaders.py:34\u001b[0m, in \u001b[0;36mload_items\u001b[0;34m()\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_items\u001b[39m():\n\u001b[1;32m     29\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Loads items data.\u001b[39;00m\n\u001b[1;32m     30\u001b[0m \n\u001b[1;32m     31\u001b[0m \u001b[38;5;124;03m    Returns:\u001b[39;00m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;124;03m        DataFrame: Items data.\u001b[39;00m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m     df_items \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mC\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCONTENT_PATH\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mC\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mITEMS_FILENAME\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# ce qui se trouve dans le movie csv\u001b[39;00m\n\u001b[1;32m     35\u001b[0m     df_items \u001b[38;5;241m=\u001b[39m df_items\u001b[38;5;241m.\u001b[39mset_index(C\u001b[38;5;241m.\u001b[39mITEM_ID_COL) \u001b[38;5;66;03m# movie id\u001b[39;00m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m df_items\n",
-       "    .dataframe tbody tr th {\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m   1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m   1014\u001b[0m     dialect,\n\u001b[1;32m   1015\u001b[0m     delimiter,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1022\u001b[0m     dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m   1023\u001b[0m )\n\u001b[1;32m   1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
-       "        vertical-align: top;\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m    619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m    623\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
-       "    }\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m   1617\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m   1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
-       "\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m   1878\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m   1879\u001b[0m         mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1881\u001b[0m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1882\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1883\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1884\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1885\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1886\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1887\u001b[0m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1888\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
-       "    .dataframe thead th {\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    869\u001b[0m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m    870\u001b[0m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m    871\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m    872\u001b[0m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    877\u001b[0m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m    882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
-       "        text-align: right;\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/test/content/movies.csv'"
-       "    }\n",
+     ]
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>n_character_title</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>movieId</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>4993</th>\n",
-       "      <td>57</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5952</th>\n",
-       "      <td>45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>527</th>\n",
-       "      <td>23</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2028</th>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4308</th>\n",
-       "      <td>19</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         n_character_title\n",
-       "movieId                   \n",
-       "4993                    57\n",
-       "5952                    45\n",
-       "527                     23\n",
-       "2028                    26\n",
-       "4308                    19"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0         long\n",
-       "1       boring\n",
-       "2         long\n",
-       "3      romance\n",
-       "4    stupidity\n",
-       "Name: tag, dtype: object"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
    }
   ],
   "source": [
@@ -169,7 +106,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 758,
+   "execution_count": null,
   "id": "16b0a602",
   "metadata": {},
   "outputs": [
@@ -193,697 +130,171 @@
    }
   ],
   "source": [
+    "\n",
+    "# ContetnBased\n",
    "class ContentBased(AlgoBase):\n",
    "    def __init__(self, features_method, regressor_method):\n",
    "        AlgoBase.__init__(self)\n",
    "        self.regressor_method = regressor_method\n",
+    "        self.features_methods = features_method\n",
    "        self.content_features = self.create_content_features(features_method)\n",
+    "        self.user_profile = {}\n",
    "        self.user_profile_explain = {}\n",
    "\n",
-    "    def create_content_features(self, features_method):\n",
+    "    def create_content_features(self, features_methods):\n",
    "        \"\"\"Content Analyzer\"\"\"\n",
    "        df_items = load_items()\n",
    "        df_ratings = load_ratings()\n",
-    "        df_tag = df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+    "        df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
    "        df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
    "        df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
    "\n",
-    "        if features_method is None:\n",
+    "        df_features = pd.DataFrame(index=df_items.index)\n",
-    "            df_features = None\n",
-    "\n",
-    "        elif features_method == \"relevance\" :\n",
-    "            df_features = df_genome_score.groupby('movieId')[\"relevance\"].transform('mean').to_frame('avg_relevance')\n",
    "\n",
-    "        elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
+    "        for method in features_methods:\n",
-    "            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "            if method == \"title_length\":\n",
+    "                df_title_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('title_length')\n",
+    "                df_features = pd.concat([df_features, df_title_length], axis=1)\n",
    "            \n",
-    "        elif features_method == \"movie_year\" :\n",
+    "            elif method == \"movie_year\":\n",
-    "            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
+    "                df_movie_year = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "\n",
+    "                df_features = pd.concat([df_features, df_movie_year.astype(float).fillna(0)], axis=1)\n",
-    "        elif features_method == \"genres\" :\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
-    "        \n",
-    "        elif features_method == \"combination\": \n",
-    "            df_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
-    "            df_movie = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
-    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
-    "            for genre in genres_list:\n",
-    "                df_genre = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
-    "        \n",
-    "            df_features = pd.concat([df_genre, df_length, df_movie], axis=1)\n",
-    "        \n",
-    "        elif features_method == \"rating\" :\n",
-    "            df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')\n",
-    "\n",
-    "        elif features_method == \"tags\" :\n",
-    "            df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')\n",
-    "\n",
-    "        elif features_method == \"tags_length\" :\n",
    "            \n",
-    "            df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')\n",
+    "            elif method == \"genre\":\n",
+    "                tfidf_vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split('|'), token_pattern=None)\n",
+    "                tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
+    "                df_tfidf_genres = pd.DataFrame(tfidf_matrix.toarray(), index=df_items.index, columns=tfidf_vectorizer.get_feature_names_out())\n",
+    "                df_features = pd.concat([df_features, df_tfidf_genres], axis=1)\n",
    "\n",
+    "            elif method == \"avg_rating\":\n",
+    "                df_avg_rating = df_ratings.groupby('movieId')['rating'].mean().to_frame('avg_rating')\n",
+    "                df_features = df_features.join(df_avg_rating, on='movieId')\n",
+    "\n",
+    "            else:\n",
+    "                raise NotImplementedError(f'Feature method {method} not yet implemented')\n",
+    "\n",
+    "        # Handle missing values in df_features\n",
+    "        df_features.fillna(0, inplace=True)\n",
    "\n",
-    "        else: # (implement other feature creations here)\n",
-    "            raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
    "        return df_features\n",
-    "    \n",
    "\n",
    "    def fit(self, trainset):\n",
    "        \"\"\"Profile Learner\"\"\"\n",
    "        AlgoBase.fit(self, trainset)\n",
-    "        \n",
+    "\n",
    "        # Preallocate user profiles\n",
    "        self.user_profile = {u: None for u in trainset.all_users()}\n",
+    "        self.user_profile_explain = {}\n",
    "\n",
-    "        self.user_profile_explain = {u: {} for u in trainset.all_users()}\n",
+    "        epsilon = 1e-10  # Small value to prevent division by zero\n",
-    "\n",
-    "        for u in self.user_profile_explain :\n",
-    "            print(u)\n",
-    "            user_ratings = np.array([rating for _, rating in trainset.ur[u]])\n",
-    "\n",
-    "            feature_values = self.content_features.values\n",
-    "\n",
-    "            fv = feature_values.astype(int)\n",
-    "\n",
-    "            weighted_features = fv/np.linalg.norm(fv)\n",
-    "\n",
-    "            feature_importance = weighted_features / np.sum(user_ratings)\n",
    "\n",
-    "            self.user_profile_explain[u] = dict(zip(self.content_features.columns, feature_importance))\n",
+    "        for u in trainset.all_users():\n",
-    "        \n",
+    "            raw_user_id = trainset.to_raw_uid(u)\n",
+    "            self.user_profile_explain[raw_user_id] = {}\n",
    "\n",
+    "            user_ratings = np.array([rating for (_, rating) in trainset.ur[u]])\n",
+    "            item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "            raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
    "\n",
+    "            feature_values = self.content_features.loc[raw_item_ids].values\n",
+    "            norms = np.linalg.norm(feature_values, axis=0) + epsilon\n",
+    "            weighted_features = feature_values / norms\n",
+    "            feature_importance = weighted_features.T @ user_ratings\n",
+    "            feature_importance /= np.sum(user_ratings)\n",
    "\n",
+    "            self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))\n",
    "\n",
    "        if self.regressor_method == 'random_score':\n",
-    "            for u in self.user_profile :\n",
-    "                self.user_profile[u] = rd.uniform(0.5,5)\n",
-    "            \n",
-    "        elif self.regressor_method == 'random_sample':\n",
    "            for u in self.user_profile:\n",
-    "                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
+    "                self.user_profile[u] = rd.uniform(0.5, 5)\n",
    "\n",
-    "        elif self.regressor_method == 'linear_regression' :\n",
+    "        elif self.regressor_method == 'random_sample':\n",
    "            for u in self.user_profile:\n",
+    "                self.user_profile[u] = [rating for (_, rating) in trainset.ur[u]]\n",
+    "\n",
+    "        else:\n",
+    "            regressor_models = {\n",
+    "                'linear_regression': LinearRegression(fit_intercept=False),\n",
+    "                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),\n",
+    "                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'random_forest': RandomForestRegressor(n_estimators=100),\n",
+    "                'lasso_regression': Lasso(alpha=0.1),\n",
+    "                'ridge_regression': Ridge(alpha=1.0),\n",
+    "                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),\n",
+    "                'knn_regression': KNeighborsRegressor(n_neighbors=1),\n",
+    "                'decision_tree': DecisionTreeRegressor(max_depth=5),\n",
+    "                'adaboost': AdaBoostRegressor(n_estimators=50),\n",
+    "                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),\n",
+    "                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
+    "            }\n",
+    "\n",
+    "            if self.regressor_method not in regressor_models:\n",
+    "                raise NotImplementedError(f'Regressor method {self.regressor_method} not yet implemented')\n",
    "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "                \n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "\n",
-    "                linear_regressor = LinearRegression(fit_intercept = False)\n",
-    "\n",
-    "                linear_regressor.fit(X,y)\n",
-    "                \n",
-    "                # Store the computed user profile\n",
-    "                self.user_profile[u] = linear_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'svr_regression':\n",
    "            for u in self.user_profile:\n",
+    "                user_ratings = [rating for (_, rating) in trainset.ur[u]]\n",
+    "                item_ids = [iid for (iid, _) in trainset.ur[u]]\n",
+    "                raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]\n",
    "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+    "                df_user = pd.DataFrame({'item_id': raw_item_ids, 'user_ratings': user_ratings})\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
+    "                df_user = df_user.merge(self.content_features, left_on=\"item_id\", right_index=True, how='left')\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "                \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n",
-    "                svr_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = svr_regressor\n",
-    "\n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
-    "            for u in self.user_profile:\n",
    "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+    "                X = df_user.drop(columns=['item_id', 'user_ratings'])\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
+    "                y = df_user['user_ratings']\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "            \n",
-    "                y = df_user['user_ratings'].values\n",
-    "                gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
-    "                gb_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = gb_regressor\n",
    "\n",
+    "                regressor = regressor_models[self.regressor_method]\n",
+    "                regressor.fit(X, y)\n",
    "\n",
-    "        elif self.regressor_method == 'random_forest':\n",
+    "                self.user_profile[u] = regressor\n",
-    "            for u in self.user_profile:\n",
    "\n",
-    "                user_ratings = [rating for _, rating in trainset.ur[u]]\n",
-    "                item_ids = [iid for iid, _ in trainset.ur[u]]\n",
-    "\n",
-    "                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
-    "\n",
-    "                df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
-    "\n",
-    "                df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
-    "\n",
-    "                if 'n_character_title' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['n_character_title'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'avg_relevance' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'movie_year' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['movie_year'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'genres' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['genres'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'combination' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['combination'].values.reshape(-1, 1)\n",
-    "                \n",
-    "                elif 'avg_rating' in df_user.columns:\n",
-    "                    # Si 'n_character_title' est disponible comme caractéristique\n",
-    "                    X = df_user['avg_rating'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                elif 'n_character_tags' in df_user.columns:\n",
-    "                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
-    "                    X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
-    "\n",
-    "                else:\n",
-    "                    # Si aucune caractéristique appropriée n'est disponible\n",
-    "                    continue  # Ou gère le cas d'erreur/exception ici\n",
-    "\n",
-    "                y = df_user['user_ratings'].values\n",
-    "                rf_regressor = RandomForestRegressor(n_estimators=100)\n",
-    "                rf_regressor.fit(X, y)\n",
-    "                self.user_profile[u] = rf_regressor\n",
-    "\n",
-    "        else : \n",
-    "            pass\n",
-    "\n",
-    "            # (implement here the regressor fitting)  \n",
-    "        \n",
    "    def estimate(self, u, i):\n",
    "        \"\"\"Scoring component used for item filtering\"\"\"\n",
-    "        # First, handle cases for unknown users and items\n",
    "        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):\n",
-    "            raise PredictionImpossible('User and/or item is unkown.')\n",
+    "            raise PredictionImpossible('User and/or item is unknown.')\n",
-    "\n",
    "\n",
    "        if self.regressor_method == 'random_score':\n",
-    "            rd.seed()\n",
+    "            return rd.uniform(0.5, 5)\n",
-    "            score = rd.uniform(0.5,5)\n",
    "\n",
    "        elif self.regressor_method == 'random_sample':\n",
-    "            rd.seed()\n",
+    "            return rd.choice(self.user_profile[u])\n",
-    "            score = rd.choice(self.user_profile[u])\n",
-    "        \n",
-    "        elif self.regressor_method == 'linear_regression':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            linear_regressor = self.user_profile[u]\n",
-    "\n",
-    "            score= linear_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'svr_regression':\n",
    "\n",
+    "        else:\n",
    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
+    "            item_features = self.content_features.loc[raw_item_id, :].values.reshape(1, -1)\n",
+    "            regressor = self.user_profile[u]\n",
+    "            item_features_df = pd.DataFrame(item_features, columns=self.content_features.columns)\n",
+    "            return regressor.predict(item_features_df)[0]\n",
    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
+    "    def explain(self, u):\n",
-    "\n",
+    "        if u in self.user_profile_explain:\n",
-    "            svr_regressor = self.user_profile[u]\n",
-    "            score = svr_regressor.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'gradient_boosting':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            gradient_boosting = self.user_profile[u]\n",
-    "            score = gradient_boosting.predict(item_features)[0]\n",
-    "        \n",
-    "        elif self.regressor_method == 'random_forest':\n",
-    "\n",
-    "            raw_item_id = self.trainset.to_raw_iid(i)\n",
-    "\n",
-    "            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
-    "\n",
-    "            randomforest = self.user_profile[u]\n",
-    "            score = randomforest.predict(item_features)[0]\n",
-    "        \n",
-    "        else : \n",
-    "            score = None\n",
-    "\n",
-    "            # (implement here the regressor prediction)\n",
-    "\n",
-    "        return score\n",
-    "\n",
-    "    def explain(self, u) :        \n",
-    "        if u in self.user_profile_explain :\n",
    "            return self.user_profile_explain[u]\n",
-    "        else :\n",
+    "        else:\n",
    "            return None\n",
    "\n",
    "\n",
-    "cb = ContentBased(\"title_length\", \"random_sample\")\n",
+    "#Example usage:\n",
-    "sp_ratings = load_ratings(surprise_format=True)\n",
+    "cb = ContentBased([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")\n",
-    "train_set = sp_ratings.build_full_trainset()\n",
+    "surprise_data = load_ratings(surprise_format=True)\n",
-    "print(cb.fit(train_set))\n",
+    "trainset = surprise_data.build_full_trainset()\n",
+    "testset = trainset.build_anti_testset()\n",
+    "cb.fit(trainset)\n",
    "\n",
-    "print(cb.explain(0))\n",
    "\n",
-    "print(cb.explain(1))\n",
+    "#print(\"RMSE: \", cb.rmse(testset))\n",
    "\n",
-    "print(cb.explain(2))\n",
    "\n",
-    "print(cb.explain(3))\n",
+    "#Example explanations for users:\n",
+    "print(cb.explain(11))\n",
    "\n",
-    "print(cb.explain(4))\n"
+    "print(cb.explain(13))\n",
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 759,
-   "id": "baab88b7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Matrice TF-IDF des genres :\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>action</th>\n",
-       "      <th>adventure</th>\n",
-       "      <th>animation</th>\n",
-       "      <th>children</th>\n",
-       "      <th>comedy</th>\n",
-       "      <th>drama</th>\n",
-       "      <th>fantasy</th>\n",
-       "      <th>fi</th>\n",
-       "      <th>horror</th>\n",
-       "      <th>imax</th>\n",
-       "      <th>musical</th>\n",
-       "      <th>mystery</th>\n",
-       "      <th>romance</th>\n",
-       "      <th>sci</th>\n",
-       "      <th>war</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.658454</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.752621</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.658454</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.752621</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.572658</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.819795</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.694164</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.412209</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.590102</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.465343</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.582818</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.666168</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.572658</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.819795</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.647689</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.761905</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.606043</td>\n",
-       "      <td>0.515192</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.606043</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.563507</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.662879</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.493002</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.363703</td>\n",
-       "      <td>0.415716</td>\n",
-       "      <td>0.489026</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.290394</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.489026</td>\n",
-       "      <td>0.363703</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     action  adventure  animation  children    comedy     drama   fantasy  \\\n",
-       "0  0.000000   0.658454   0.000000  0.000000  0.000000  0.000000  0.752621   \n",
-       "1  0.000000   0.658454   0.000000  0.000000  0.000000  0.000000  0.752621   \n",
-       "2  0.000000   0.000000   0.000000  0.000000  0.000000  0.572658  0.000000   \n",
-       "3  0.694164   0.000000   0.000000  0.000000  0.000000  0.412209  0.000000   \n",
-       "4  0.000000   0.000000   0.000000  0.000000  0.000000  0.465343  0.000000   \n",
-       "5  0.000000   0.000000   0.000000  0.000000  0.000000  0.572658  0.000000   \n",
-       "6  0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "7  0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "8  0.000000   0.000000   0.563507  0.000000  0.662879  0.000000  0.000000   \n",
-       "9  0.000000   0.363703   0.415716  0.489026  0.000000  0.290394  0.000000   \n",
-       "\n",
-       "         fi    horror      imax   musical   mystery   romance       sci  \\\n",
-       "0  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "1  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "2  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "3  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   \n",
-       "4  0.000000  0.000000  0.000000  0.582818  0.000000  0.666168  0.000000   \n",
-       "5  0.000000  0.000000  0.000000  0.000000  0.000000  0.819795  0.000000   \n",
-       "6  0.000000  0.647689  0.000000  0.000000  0.761905  0.000000  0.000000   \n",
-       "7  0.606043  0.515192  0.000000  0.000000  0.000000  0.000000  0.606043   \n",
-       "8  0.000000  0.000000  0.000000  0.493002  0.000000  0.000000  0.000000   \n",
-       "9  0.000000  0.000000  0.489026  0.363703  0.000000  0.000000  0.000000   \n",
-       "\n",
-       "        war  \n",
-       "0  0.000000  \n",
-       "1  0.000000  \n",
-       "2  0.819795  \n",
-       "3  0.590102  \n",
-       "4  0.000000  \n",
-       "5  0.000000  \n",
-       "6  0.000000  \n",
-       "7  0.000000  \n",
-       "8  0.000000  \n",
-       "9  0.000000  "
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from pprint import pprint\n",
    "\n",
-    "# Créer une instance de TfidfVectorizer pour les genres\n",
+    "print(cb.explain(17))\n",
-    "tfidf_vectorizer = TfidfVectorizer()\n",
    "\n",
-    "# Fit et transform pour calculer la matrice TF-IDF des genres\n",
+    "print(cb.explain(23))\n",
-    "tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])\n",
    "\n",
-    "# Obtenir les noms des genres (features)\n",
+    "print(cb.explain(27))\n",
-    "genre_names = tfidf_vectorizer.get_feature_names_out()\n",
    "\n",
-    "# Créer un DataFrame à partir de la matrice TF-IDF des genres\n",
+    "print(cb.explain(73))\n"
-    "df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=genre_names)\n",
-    "\n",
-    "print(\"Matrice TF-IDF des genres :\")\n",
-    "display(df_tfidf)"
   ]
  },
  {
@@ -896,7 +307,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 760,
+   "execution_count": null,
   "id": "69d12f7d",
   "metadata": {},
   "outputs": [],
@@ -913,72 +324,7 @@
    "    prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])\n",
    "    print(prediction)\n",
    "\n",
-    "\n",
+    "test_contentbased_class([\"title_length\", \"movie_year\",\"genre\",\"avg_rating\"], \"ridge_regression\")"
-    "\n",
-    "# print(\"title_length :\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")\n",
-    "# test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"title_length\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"movie_year : \")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"relevance : \") \n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"genres : \") \n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_sample\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"linear_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"svr_regression\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"rating : \")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_sample\")\n",
-    "# # test_contentbased_class(feature_method= \"rating\", regressor_method=\"linear_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"svr_regression\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"gradient_boosting\")\n",
-    "# #test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags : \")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_sample\")\n",
-    "# #test_contentbased_class(feature_method=\"tags\", regressor_method=\"linear_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"svr_regression\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"gradient_boosting\")\n",
-    "# # test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_forest\")\n",
-    "# print(\"\\n\")\n",
-    "# print(\"tags_length : \")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_forest\")\n",
-    "\n",
-    "# print(\"\\n\")\n",
-    "# print(\"combination : \")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_score\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_sample\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"linear_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"svr_regression\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"gradient_boosting\")\n",
-    "# test_contentbased_class(feature_method=\"combination\", regressor_method=\"random_forest\")\n"
   ]
  }
 ],
@@ -998,7 +344,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
+   "version": "3.12.0"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:82d5ca82 tags:
 # Packages
 %% Cell type:code id:277473a3 tags:
 ``` python
 %load_ext autoreload
 %autoreload 2
 import numpy as np
 import pandas as pd
 import random as rd
 from surprise import AlgoBase
 from surprise.prediction_algorithms.predictions import PredictionImpossible
 from loaders import load_ratings
 from loaders import load_items
 from constants import Constant as C
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
 from sklearn.svm import SVR
 from sklearn.feature_extraction.text import TfidfVectorizer
 ```
 %% Output
    The autoreload extension is already loaded. To reload it, use:
      %reload_ext autoreload
 %% Cell type:markdown id:a42c16bf tags:
 # Explore and select content features
 %% Cell type:code id:e8378976 tags:
 ``` python
 # All the dataframes
 df_items = load_items()
 df_ratings = load_ratings()
 df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
 #df_genome_score = pd.read_csv("data/hackathon/content/genome-scores.csv")
 # df_genome_tag = pd.read_csv("data/hackathon/content/genome-tags.csv")
 # Example 1 : create title_length features
 df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')
 display(df_features.head())
 df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
 df_features = df_tag[C.TAG]
 display(df_features.head())
 # (explore here other features)
 ```
 %% Output
+    ---------------------------------------------------------------------------
+    FileNotFoundError                         Traceback (most recent call last)
+Cell     In[16], line 2
+          1 # All the dataframes
+    ----> 2 df_items = load_items()
+          3 df_ratings = load_ratings()
+          4 df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
+File     ~/Desktop/Université/Recommender Systems/recomsys/loaders.py:34, in load_items()
+         28 def load_items():
+         29     """Loads items data.
+         30
+         31     Returns:
+         32         DataFrame: Items data.
+         33     """
+    ---> 34     df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME) # ce qui se trouve dans le movie csv
+         35     df_items = df_items.set_index(C.ITEM_ID_COL) # movie id
+         36     return df_items
+File     ~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1026, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
+       1013 kwds_defaults = _refine_defaults_read(
+       1014     dialect,
+       1015     delimiter,
+       (...)
+       1022     dtype_backend=dtype_backend,
+       1023 )
+       1024 kwds.update(kwds_defaults)
+    -> 1026 return _read(filepath_or_buffer, kwds)
+File     ~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:620, in _read(filepath_or_buffer, kwds)
+        617 _validate_names(kwds.get("names", None))
+        619 # Create the parser.
+    --> 620 parser = TextFileReader(filepath_or_buffer, **kwds)
+        622 if chunksize or iterator:
+        623     return parser
+File     ~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1620, in TextFileReader.__init__(self, f, engine, **kwds)
+       1617     self.options["has_index_names"] = kwds["has_index_names"]
+       1619 self.handles: IOHandles | None = None
+    -> 1620 self._engine = self._make_engine(f, self.engine)
+File     ~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1880, in TextFileReader._make_engine(self, f, engine)
+       1878     if "b" not in mode:
+       1879         mode += "b"
+    -> 1880 self.handles = get_handle(
+       1881     f,
+       1882     mode,
+       1883     encoding=self.options.get("encoding", None),
+       1884     compression=self.options.get("compression", None),
+       1885     memory_map=self.options.get("memory_map", False),
+       1886     is_text=is_text,
+       1887     errors=self.options.get("encoding_errors", "strict"),
+       1888     storage_options=self.options.get("storage_options", None),
+       1889 )
+       1890 assert self.handles is not None
+       1891 f = self.handles.handle
+File     ~/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
+        868 elif isinstance(handle, str):
+        869     # Check whether the filename is to be opened in binary mode.
+        870     # Binary mode does not support 'encoding' and 'newline'.
+        871     if ioargs.encoding and "b" not in ioargs.mode:
+        872         # Encoding
+    --> 873         handle = open(
+        874             handle,
+        875             ioargs.mode,
+        876             encoding=ioargs.encoding,
+        877             errors=errors,
+        878             newline="",
+        879         )
+        880     else:
+        881         # Binary mode
+        882         handle = open(handle, ioargs.mode)
+    FileNotFoundError: [Errno 2] No such file or directory: 'data/test/content/movies.csv'
 %% Cell type:markdown id:a2c9a2b6 tags:
 # Build a content-based model
 When ready, move the following class in the *models.py* script
 %% Cell type:code id:16b0a602 tags:
 ``` python
+# ContetnBased
 class ContentBased(AlgoBase):
    def __init__(self, features_method, regressor_method):
        AlgoBase.__init__(self)
        self.regressor_method = regressor_method
+        self.features_methods = features_method
        self.content_features = self.create_content_features(features_method)
+        self.user_profile = {}
        self.user_profile_explain = {}
-    def create_content_features(self, features_method):
+    def create_content_features(self, features_methods):
        """Content Analyzer"""
        df_items = load_items()
        df_ratings = load_ratings()
-        df_tag = df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
+        df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)
        df_genome_score = pd.read_csv("data/hackathon/content/genome-scores.csv")
        df_genome_tag = pd.read_csv("data/hackathon/content/genome-tags.csv")
-        if features_method is None:
+        df_features = pd.DataFrame(index=df_items.index)
-            df_features = None
-        elif features_method == "relevance" :
-            df_features = df_genome_score.groupby('movieId')["relevance"].transform('mean').to_frame('avg_relevance')
-        elif features_method == "title_length": # a naive method that creates only 1 feature based on title length
-            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')
-        elif features_method == "movie_year" :
-            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\((\d{4})\)', expand=False).to_frame('movie_year')
-        elif features_method == "genres" :
-            genres_list = df_items['genres'].str.split('|').explode().unique()
-            for genre in genres_list:
-                df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')
-        elif features_method == "combination":
-            df_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')
-            df_movie = df_items['title'].str.extract(r'\((\d{4})\)', expand=False).to_frame('movie_year')
-            genres_list = df_items['genres'].str.split('|').explode().unique()
-            for genre in genres_list:
-                df_genre = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')
-            df_features = pd.concat([df_genre, df_length, df_movie], axis=1)
-        elif features_method == "rating" :
+        for method in features_methods:
-            df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')
+            if method == "title_length":
+                df_title_length = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('title_length')
+                df_features = pd.concat([df_features, df_title_length], axis=1)
+            elif method == "movie_year":
+                df_movie_year = df_items['title'].str.extract(r'\((\d{4})\)', expand=False).to_frame('movie_year')
+                df_features = pd.concat([df_features, df_movie_year.astype(float).fillna(0)], axis=1)
+            elif method == "genre":
+                tfidf_vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split('|'), token_pattern=None)
+                tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])
+                df_tfidf_genres = pd.DataFrame(tfidf_matrix.toarray(), index=df_items.index, columns=tfidf_vectorizer.get_feature_names_out())
+                df_features = pd.concat([df_features, df_tfidf_genres], axis=1)
+            elif method == "avg_rating":
+                df_avg_rating = df_ratings.groupby('movieId')['rating'].mean().to_frame('avg_rating')
+                df_features = df_features.join(df_avg_rating, on='movieId')
-        elif features_method == "tags" :
+            else:
-            df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')
+                raise NotImplementedError(f'Feature method {method} not yet implemented')
-        elif features_method == "tags_length" :
+        # Handle missing values in df_features
+        df_features.fillna(0, inplace=True)
-            df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')
-        else: # (implement other feature creations here)
-            raise NotImplementedError(f'Feature method {features_method} not yet implemented')
        return df_features
    def fit(self, trainset):
        """Profile Learner"""
        AlgoBase.fit(self, trainset)
        # Preallocate user profiles
        self.user_profile = {u: None for u in trainset.all_users()}
+        self.user_profile_explain = {}
-        self.user_profile_explain = {u: {} for u in trainset.all_users()}
+        epsilon = 1e-10  # Small value to prevent division by zero
-        for u in self.user_profile_explain :
-            print(u)
-            user_ratings = np.array([rating for _, rating in trainset.ur[u]])
-            feature_values = self.content_features.values
-            fv = feature_values.astype(int)
-            weighted_features = fv/np.linalg.norm(fv)
-            feature_importance = weighted_features / np.sum(user_ratings)
-            self.user_profile_explain[u] = dict(zip(self.content_features.columns, feature_importance))
+        for u in trainset.all_users():
+            raw_user_id = trainset.to_raw_uid(u)
+            self.user_profile_explain[raw_user_id] = {}
+            user_ratings = np.array([rating for (_, rating) in trainset.ur[u]])
+            item_ids = [iid for (iid, _) in trainset.ur[u]]
+            raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]
+            feature_values = self.content_features.loc[raw_item_ids].values
+            norms = np.linalg.norm(feature_values, axis=0) + epsilon
+            weighted_features = feature_values / norms
+            feature_importance = weighted_features.T @ user_ratings
+            feature_importance /= np.sum(user_ratings)
+            self.user_profile_explain[raw_user_id] = dict(zip(self.content_features.columns, feature_importance))
        if self.regressor_method == 'random_score':
-            for u in self.user_profile :
-                self.user_profile[u] = rd.uniform(0.5,5)
-        elif self.regressor_method == 'random_sample':
-            for u in self.user_profile:
-                self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]
-        elif self.regressor_method == 'linear_regression' :
            for u in self.user_profile:
+                self.user_profile[u] = rd.uniform(0.5, 5)
-                user_ratings = [rating for _, rating in trainset.ur[u]]
+        elif self.regressor_method == 'random_sample':
-                item_ids = [iid for iid, _ in trainset.ur[u]]
-                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})
-                df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
-                df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
-                if 'n_character_title' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['n_character_title'].values.reshape(-1, 1)
-                elif 'avg_relevance' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_relevance'].values.reshape(-1, 1)
-                elif 'movie_year' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['movie_year'].values.reshape(-1, 1)
-                elif 'genres' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['genres'].values.reshape(-1, 1)
-                elif 'combination' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['combination'].values.reshape(-1, 1)
-                elif 'avg_rating' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_rating'].values.reshape(-1, 1)
-                elif 'tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['tags'].values.reshape(-1, 1)
-                elif 'n_character_tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['n_character_tags'].values.reshape(-1, 1)
-                else:
-                    # Si aucune caractéristique appropriée n'est disponible
-                    continue  # Ou gère le cas d'erreur/exception ici
-                y = df_user['user_ratings'].values
-                linear_regressor = LinearRegression(fit_intercept = False)
-                linear_regressor.fit(X,y)
-                # Store the computed user profile
-                self.user_profile[u] = linear_regressor
-        elif self.regressor_method == 'svr_regression':
            for u in self.user_profile:
+                self.user_profile[u] = [rating for (_, rating) in trainset.ur[u]]
-                user_ratings = [rating for _, rating in trainset.ur[u]]
+        else:
-                item_ids = [iid for iid, _ in trainset.ur[u]]
+            regressor_models = {
+                'linear_regression': LinearRegression(fit_intercept=False),
-                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})
+                'svr_regression': SVR(kernel='rbf', C=10, epsilon=0.2),
+                'gradient_boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
-                df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
+                'random_forest': RandomForestRegressor(n_estimators=100),
+                'lasso_regression': Lasso(alpha=0.1),
-                df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
+                'ridge_regression': Ridge(alpha=1.0),
+                'elastic_net': ElasticNet(alpha=1.0, l1_ratio=0.5),
-                if 'n_character_title' in df_user.columns:
+                'knn_regression': KNeighborsRegressor(n_neighbors=1),
-                    # Si 'n_character_title' est disponible comme caractéristique
+                'decision_tree': DecisionTreeRegressor(max_depth=5),
-                    X = df_user['n_character_title'].values.reshape(-1, 1)
+                'adaboost': AdaBoostRegressor(n_estimators=50),
+                'xgboost': XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3),
+                'lightgbm': LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
+            }
-                elif 'avg_relevance' in df_user.columns:
+            if self.regressor_method not in regressor_models:
-                    # Si 'n_character_title' est disponible comme caractéristique
+                raise NotImplementedError(f'Regressor method {self.regressor_method} not yet implemented')
-                    X = df_user['avg_relevance'].values.reshape(-1, 1)
-                elif 'movie_year' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['movie_year'].values.reshape(-1, 1)
-                elif 'genres' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['genres'].values.reshape(-1, 1)
-                elif 'combination' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['combination'].values.reshape(-1, 1)
-                elif 'avg_rating' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_rating'].values.reshape(-1, 1)
-                elif 'tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['tags'].values.reshape(-1, 1)
-                elif 'n_character_tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['n_character_tags'].values.reshape(-1, 1)
-                else:
-                    # Si aucune caractéristique appropriée n'est disponible
-                    continue  # Ou gère le cas d'erreur/exception ici
-                y = df_user['user_ratings'].values
-                svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)
-                svr_regressor.fit(X, y)
-                self.user_profile[u] = svr_regressor
-        elif self.regressor_method == 'gradient_boosting':
            for u in self.user_profile:
+                user_ratings = [rating for (_, rating) in trainset.ur[u]]
+                item_ids = [iid for (iid, _) in trainset.ur[u]]
+                raw_item_ids = [trainset.to_raw_iid(iid) for iid in item_ids]
-                user_ratings = [rating for _, rating in trainset.ur[u]]
+                df_user = pd.DataFrame({'item_id': raw_item_ids, 'user_ratings': user_ratings})
-                item_ids = [iid for iid, _ in trainset.ur[u]]
+                df_user = df_user.merge(self.content_features, left_on="item_id", right_index=True, how='left')
-                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})
-                df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
-                df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
-                if 'n_character_title' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['n_character_title'].values.reshape(-1, 1)
-                elif 'avg_relevance' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_relevance'].values.reshape(-1, 1)
-                elif 'movie_year' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['movie_year'].values.reshape(-1, 1)
-                elif 'genres' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['genres'].values.reshape(-1, 1)
-                elif 'combination' in df_user.columns:
+                X = df_user.drop(columns=['item_id', 'user_ratings'])
-                    # Si 'n_character_title' est disponible comme caractéristique
+                y = df_user['user_ratings']
-                    X = df_user['combination'].values.reshape(-1, 1)
-                elif 'avg_rating' in df_user.columns:
+                regressor = regressor_models[self.regressor_method]
-                    # Si 'n_character_title' est disponible comme caractéristique
+                regressor.fit(X, y)
-                    X = df_user['avg_rating'].values.reshape(-1, 1)
-                elif 'tags' in df_user.columns:
+                self.user_profile[u] = regressor
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['tags'].values.reshape(-1, 1)
-                elif 'n_character_tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['n_character_tags'].values.reshape(-1, 1)
-                else:
-                    # Si aucune caractéristique appropriée n'est disponible
-                    continue  # Ou gère le cas d'erreur/exception ici
-                y = df_user['user_ratings'].values
-                gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
-                gb_regressor.fit(X, y)
-                self.user_profile[u] = gb_regressor
-        elif self.regressor_method == 'random_forest':
-            for u in self.user_profile:
-                user_ratings = [rating for _, rating in trainset.ur[u]]
-                item_ids = [iid for iid, _ in trainset.ur[u]]
-                df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})
-                df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
-                df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
-                if 'n_character_title' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['n_character_title'].values.reshape(-1, 1)
-                elif 'avg_relevance' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_relevance'].values.reshape(-1, 1)
-                elif 'movie_year' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['movie_year'].values.reshape(-1, 1)
-                elif 'genres' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['genres'].values.reshape(-1, 1)
-                elif 'combination' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['combination'].values.reshape(-1, 1)
-                elif 'avg_rating' in df_user.columns:
-                    # Si 'n_character_title' est disponible comme caractéristique
-                    X = df_user['avg_rating'].values.reshape(-1, 1)
-                elif 'tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['tags'].values.reshape(-1, 1)
-                elif 'n_character_tags' in df_user.columns:
-                    # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
-                    X = df_user['n_character_tags'].values.reshape(-1, 1)
-                else:
-                    # Si aucune caractéristique appropriée n'est disponible
-                    continue  # Ou gère le cas d'erreur/exception ici
-                y = df_user['user_ratings'].values
-                rf_regressor = RandomForestRegressor(n_estimators=100)
-                rf_regressor.fit(X, y)
-                self.user_profile[u] = rf_regressor
-        else :
-            pass
-            # (implement here the regressor fitting)
    def estimate(self, u, i):
        """Scoring component used for item filtering"""
-        # First, handle cases for unknown users and items
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
-            raise PredictionImpossible('User and/or item is unkown.')
+            raise PredictionImpossible('User and/or item is unknown.')
        if self.regressor_method == 'random_score':
-            rd.seed()
+            return rd.uniform(0.5, 5)
-            score = rd.uniform(0.5,5)
        elif self.regressor_method == 'random_sample':
-            rd.seed()
+            return rd.choice(self.user_profile[u])
-            score = rd.choice(self.user_profile[u])
-        elif self.regressor_method == 'linear_regression':
-            raw_item_id = self.trainset.to_raw_iid(i)
-            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values
-            linear_regressor = self.user_profile[u]
-            score= linear_regressor.predict(item_features)[0]
-        elif self.regressor_method == 'svr_regression':
+        else:
            raw_item_id = self.trainset.to_raw_iid(i)
+            item_features = self.content_features.loc[raw_item_id, :].values.reshape(1, -1)
+            regressor = self.user_profile[u]
+            item_features_df = pd.DataFrame(item_features, columns=self.content_features.columns)
+            return regressor.predict(item_features_df)[0]
-            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values
+    def explain(self, u):
+        if u in self.user_profile_explain:
-            svr_regressor = self.user_profile[u]
+            return self.user_profile_explain[u]
-            score = svr_regressor.predict(item_features)[0]
+        else:
+            return None
-        elif self.regressor_method == 'gradient_boosting':
-            raw_item_id = self.trainset.to_raw_iid(i)
-            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values
-            gradient_boosting = self.user_profile[u]
-            score = gradient_boosting.predict(item_features)[0]
-        elif self.regressor_method == 'random_forest':
-            raw_item_id = self.trainset.to_raw_iid(i)
-            item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values
-            randomforest = self.user_profile[u]
-            score = randomforest.predict(item_features)[0]
-        else :
-            score = None
-            # (implement here the regressor prediction)
+#Example usage:
+cb = ContentBased(["title_length", "movie_year","genre","avg_rating"], "ridge_regression")
+surprise_data = load_ratings(surprise_format=True)
+trainset = surprise_data.build_full_trainset()
+testset = trainset.build_anti_testset()
+cb.fit(trainset)
-        return score
-    def explain(self, u) :
+#print("RMSE: ", cb.rmse(testset))
-        if u in self.user_profile_explain :
-            return self.user_profile_explain[u]
-        else :
-            return None
-cb = ContentBased("title_length", "random_sample")
+#Example explanations for users:
-sp_ratings = load_ratings(surprise_format=True)
+print(cb.explain(11))
-train_set = sp_ratings.build_full_trainset()
-print(cb.fit(train_set))
-print(cb.explain(0))
+print(cb.explain(13))
-print(cb.explain(1))
+print(cb.explain(17))
-print(cb.explain(2))
+print(cb.explain(23))
-print(cb.explain(3))
+print(cb.explain(27))
-print(cb.explain(4))
+print(cb.explain(73))
 ```
 %% Output
    0
    1
    2
    3
    4
    5
    None
    {'n_character_title': array([0.03019692])}
    {'n_character_title': array([0.04098154])}
    {'n_character_title': array([0.02942264])}
    {'n_character_title': array([0.08196307])}
    {'n_character_title': array([0.02798739])}
-%% Cell type:code id:baab88b7 tags:
-``` python
-from pprint import pprint
-# Créer une instance de TfidfVectorizer pour les genres
-tfidf_vectorizer = TfidfVectorizer()
-# Fit et transform pour calculer la matrice TF-IDF des genres
-tfidf_matrix = tfidf_vectorizer.fit_transform(df_items['genres'])
-# Obtenir les noms des genres (features)
-genre_names = tfidf_vectorizer.get_feature_names_out()
-# Créer un DataFrame à partir de la matrice TF-IDF des genres
-df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=genre_names)
-print("Matrice TF-IDF des genres :")
-display(df_tfidf)
-```
-%% Output
-    Matrice TF-IDF des genres :
 %% Cell type:markdown id:ffd75b7e tags:
 The following script test the ContentBased class
 %% Cell type:code id:69d12f7d tags:
 ``` python
 def test_contentbased_class(feature_method, regressor_method):
    """Test the ContentBased class.
    Tries to make a prediction on the first (user,item ) tuple of the anti_test_set
    """
    sp_ratings = load_ratings(surprise_format=True)
    train_set = sp_ratings.build_full_trainset()
    content_algo = ContentBased(feature_method, regressor_method)
    content_algo.fit(train_set)
    anti_test_set_first = train_set.build_anti_testset()[0]
    prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])
    print(prediction)
+test_contentbased_class(["title_length", "movie_year","genre","avg_rating"], "ridge_regression")
-# print("title_length :")
-# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_score")
-# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_sample")
-# test_contentbased_class(feature_method = "title_length" , regressor_method = "linear_regression")
-# test_contentbased_class(feature_method= "title_length", regressor_method= "svr_regression")
-# test_contentbased_class(feature_method= "title_length", regressor_method= "gradient_boosting")
-# test_contentbased_class(feature_method= "title_length", regressor_method= "random_forest")
-# print("\n")
-# print("movie_year : ")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_score")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_sample")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "linear_regression")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "svr_regression")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "gradient_boosting")
-# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_forest")
-# print("\n")
-# print("relevance : ")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "random_score")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "random_sample")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "linear_regression")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "svr_regression")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "gradient_boosting")
-# test_contentbased_class(feature_method= "relevance", regressor_method= "random_forest")
-# print("\n")
-# print("genres : ")
-# test_contentbased_class(feature_method= "genres", regressor_method= "random_score")
-# test_contentbased_class(feature_method= "genres", regressor_method= "random_sample")
-# test_contentbased_class(feature_method= "genres", regressor_method= "linear_regression")
-# test_contentbased_class(feature_method= "genres", regressor_method= "svr_regression")
-# test_contentbased_class(feature_method= "genres", regressor_method= "gradient_boosting")
-# test_contentbased_class(feature_method= "genres", regressor_method= "random_forest")
-# print("\n")
-# print("rating : ")
-# test_contentbased_class(feature_method= "rating", regressor_method="random_score")
-# test_contentbased_class(feature_method= "rating", regressor_method="random_sample")
-# # test_contentbased_class(feature_method= "rating", regressor_method="linear_regression")
-# #test_contentbased_class(feature_method="rating", regressor_method="svr_regression")
-# #test_contentbased_class(feature_method="rating", regressor_method="gradient_boosting")
-# #test_contentbased_class(feature_method="rating", regressor_method="random_forest")
-# print("\n")
-# print("tags : ")
-# test_contentbased_class(feature_method="tags", regressor_method="random_score")
-# test_contentbased_class(feature_method="tags", regressor_method="random_sample")
-# #test_contentbased_class(feature_method="tags", regressor_method="linear_regression")
-# # test_contentbased_class(feature_method="tags", regressor_method="svr_regression")
-# # test_contentbased_class(feature_method="tags", regressor_method="gradient_boosting")
-# # test_contentbased_class(feature_method="tags", regressor_method="random_forest")
-# print("\n")
-# print("tags_length : ")
-# test_contentbased_class(feature_method="tags_length", regressor_method="random_score")
-# test_contentbased_class(feature_method="tags_length", regressor_method="random_sample")
-# test_contentbased_class(feature_method="tags_length", regressor_method="linear_regression")
-# test_contentbased_class(feature_method="tags_length", regressor_method="svr_regression")
-# test_contentbased_class(feature_method="tags_length", regressor_method="gradient_boosting")
-# test_contentbased_class(feature_method="tags_length", regressor_method="random_forest")
-# print("\n")
-# print("combination : ")
-# test_contentbased_class(feature_method="combination", regressor_method="random_score")
-# test_contentbased_class(feature_method="combination", regressor_method="random_sample")
-# test_contentbased_class(feature_method="combination", regressor_method="linear_regression")
-# test_contentbased_class(feature_method="combination", regressor_method="svr_regression")
-# test_contentbased_class(feature_method="combination", regressor_method="gradient_boosting")
-# test_contentbased_class(feature_method="combination", regressor_method="random_forest")
 ```