diff --git a/constants.py b/constants.py
index 8c11f22c0178095ff190a9f182258ff415d61f4f..e6125570df096d8edd74e9f50c99041779d73b03 100644
--- a/constants.py
+++ b/constants.py
@@ -16,6 +16,10 @@ class Constant:
     LABEL_COL = 'title' # Column name for item labels
     GENRES_COL = 'genres'  # Column name for item genres
 
+    TAGS_FILENAME = "tags.csv"
+    TAG = 'tag'
+    
+
     # Evidence
     EVIDENCE_PATH = DATA_PATH / 'evidence' # Path to evidence data
     # - ratings
diff --git a/content_based.ipynb b/content_based.ipynb
index f62bfd8653c5ac072f750fa1eb0f5bafb4f25338..979171c4bc9883f8e428d75a6e30521bb4a32ee6 100644
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 41,
    "id": "277473a3",
    "metadata": {},
    "outputs": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 42,
    "id": "e8378976",
    "metadata": {},
    "outputs": [
@@ -84,24 +84,24 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>23</td>\n",
+       "      <th>4993</th>\n",
+       "      <td>57</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>23</td>\n",
+       "      <th>5952</th>\n",
+       "      <td>45</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>34</th>\n",
-       "      <td>11</td>\n",
+       "      <th>527</th>\n",
+       "      <td>23</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>44</td>\n",
+       "      <th>2028</th>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>64</th>\n",
-       "      <td>20</td>\n",
+       "      <th>4308</th>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -110,11 +110,25 @@
       "text/plain": [
        "         n_character_title\n",
        "movieId                   \n",
-       "3                       23\n",
-       "15                      23\n",
-       "34                      11\n",
-       "59                      44\n",
-       "64                      20"
+       "4993                    57\n",
+       "5952                    45\n",
+       "527                     23\n",
+       "2028                    26\n",
+       "4308                    19"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0         long\n",
+       "1       boring\n",
+       "2         long\n",
+       "3      romance\n",
+       "4    stupidity\n",
+       "Name: tag, dtype: object"
       ]
      },
      "metadata": {},
@@ -129,6 +143,10 @@
     "df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
     "display(df_features.head())\n",
     "\n",
+    "df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+    "df_features = df_tag[C.TAG]\n",
+    "display(df_features.head())\n",
+    "\n",
     "# (explore here other features)\n"
    ]
   },
@@ -143,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 43,
    "id": "16b0a602",
    "metadata": {},
    "outputs": [],
@@ -161,6 +179,28 @@
     "            df_features = None\n",
     "        elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
     "            df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
+    "\n",
+    "        elif features_method == \"movie_year\" :\n",
+    "            df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False)\n",
+    "\n",
+    "        elif features_method == \"genres\" :\n",
+    "            genres_list = df_items['genres'].str.split('|').explode().unique()\n",
+    "            for genre in genres_list:\n",
+    "                df_features = df_items['genres'].str.contains(genre).astype(int)\n",
+    "\n",
+    "        elif features_method == \"rating\" :\n",
+    "            df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')\n",
+    "\n",
+    "        elif features_method == \"tags\" :\n",
+    "            df_features = df_tag['tag'].apply(lambda x: len(x.split(',')))\n",
+    "\n",
+    "        elif features_method == \"tags_length\" :\n",
+    "             df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(',')))\n",
+    "\n",
+    "        elif features_method == \"timestamp\" :\n",
+    "            df_features =  df_ratings['timestamp_sin'] = np.sin(2 * np.pi * df_ratings['timestamp'] / 86400)\n",
+    "            df_features =  df_ratings['timestamp_cos'] = np.cos(2 * np.pi * df_ratings['timestamp'] / 86400)\n",
+    "\n",
     "        else: # (implement other feature creations here)\n",
     "            raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
     "        return df_features\n",
@@ -249,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 44,
    "id": "69d12f7d",
    "metadata": {},
    "outputs": [
@@ -257,8 +297,40 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "user: 15         item: 942        r_ui = None   est = 3.79   {'was_impossible': False}\n",
-      "user: 15         item: 942        r_ui = None   est = 4.00   {'was_impossible': False}\n"
+      "user: 11         item: 1214       r_ui = None   est = 0.86   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 1.00   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 4.42   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 3.00   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 4.53   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 3.00   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 0.72   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 4.00   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 3.33   {'was_impossible': False}\n",
+      "user: 11         item: 1214       r_ui = None   est = 3.00   {'was_impossible': False}\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'timestamp'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/indexes/base.py:3791\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3790\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3791\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3792\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+      "File \u001b[0;32mindex.pyx:152\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mindex.pyx:181\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'timestamp'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[44], line 33\u001b[0m\n\u001b[1;32m     30\u001b[0m test_contentbased_class(feature_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtags_length\u001b[39m\u001b[38;5;124m\"\u001b[39m, regressor_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrandom_score\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     31\u001b[0m test_contentbased_class(feature_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtags_length\u001b[39m\u001b[38;5;124m\"\u001b[39m, regressor_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrandom_sample\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 33\u001b[0m \u001b[43mtest_contentbased_class\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeature_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtimestamp\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregressor_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrandom_score\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     34\u001b[0m test_contentbased_class(feature_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m, regressor_method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrandom_sample\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "Cell \u001b[0;32mIn[44], line 7\u001b[0m, in \u001b[0;36mtest_contentbased_class\u001b[0;34m(feature_method, regressor_method)\u001b[0m\n\u001b[1;32m      5\u001b[0m sp_ratings \u001b[38;5;241m=\u001b[39m load_ratings(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      6\u001b[0m train_set \u001b[38;5;241m=\u001b[39m sp_ratings\u001b[38;5;241m.\u001b[39mbuild_full_trainset()\n\u001b[0;32m----> 7\u001b[0m content_algo \u001b[38;5;241m=\u001b[39m \u001b[43mContentBased\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeature_method\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregressor_method\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      8\u001b[0m content_algo\u001b[38;5;241m.\u001b[39mfit(train_set)\n\u001b[1;32m      9\u001b[0m anti_test_set_first \u001b[38;5;241m=\u001b[39m train_set\u001b[38;5;241m.\u001b[39mbuild_anti_testset()[\u001b[38;5;241m0\u001b[39m]\n",
+      "Cell \u001b[0;32mIn[43], line 5\u001b[0m, in \u001b[0;36mContentBased.__init__\u001b[0;34m(self, features_method, regressor_method)\u001b[0m\n\u001b[1;32m      3\u001b[0m AlgoBase\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mregressor_method \u001b[38;5;241m=\u001b[39m regressor_method\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontent_features \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_content_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeatures_method\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[43], line 33\u001b[0m, in \u001b[0;36mContentBased.create_content_features\u001b[0;34m(self, features_method)\u001b[0m\n\u001b[1;32m     30\u001b[0m      df_features \u001b[38;5;241m=\u001b[39m df_tag[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtag\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28msum\u001b[39m(\u001b[38;5;28mlen\u001b[39m(tag) \u001b[38;5;28;01mfor\u001b[39;00m tag \u001b[38;5;129;01min\u001b[39;00m x\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m'\u001b[39m)))\n\u001b[1;32m     32\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m features_method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m\"\u001b[39m :\n\u001b[0;32m---> 33\u001b[0m     df_features \u001b[38;5;241m=\u001b[39m  df_items[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp_sin\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39msin(\u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mpi \u001b[38;5;241m*\u001b[39m \u001b[43mdf_items\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtimestamp\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m86400\u001b[39m)\n\u001b[1;32m     34\u001b[0m     df_features \u001b[38;5;241m=\u001b[39m  df_items[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp_cos\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mcos(\u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mpi \u001b[38;5;241m*\u001b[39m df_items[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m86400\u001b[39m)\n\u001b[1;32m     36\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# (implement other feature creations here)\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/frame.py:3893\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3891\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   3892\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3893\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3894\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   3895\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/indexes/base.py:3798\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3793\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   3794\u001b[0m         \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m   3795\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m   3796\u001b[0m     ):\n\u001b[1;32m   3797\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3798\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m   3799\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   3800\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m   3801\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m   3802\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[1;32m   3803\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'timestamp'"
      ]
     }
    ],
@@ -278,7 +350,25 @@
     "# (call here the test functions with different regressor methods)\n",
     "\n",
     "test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
-    "test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")"
+    "test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"movie_year\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"movie_year\", regressor_method=\"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"genres\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"genres\", regressor_method=\"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_sample\")\n",
+    "\n",
+    "test_contentbased_class(feature_method=\"timestamp\", regressor_method=\"random_score\")\n",
+    "test_contentbased_class(feature_method=\"timestamp\", regressor_method=\"random_sample\")"
    ]
   }
  ],