From e1451ea1d4dbba9789a3e51c4bd0feb40a406b65 Mon Sep 17 00:00:00 2001
From: Adrienucl <adrien.payen@student.uclouvain.be>
Date: Thu, 2 May 2024 11:13:30 +0200
Subject: [PATCH] content based commit
---
analytics_small.ipynb | 303 +++++++++++++++++++++++++++++++++++++++---
content_based.ipynb | 44 +++---
2 files changed, 303 insertions(+), 44 deletions(-)
diff --git a/analytics_small.ipynb b/analytics_small.ipynb
index b41000c2..b6f7494f 100644
--- a/analytics_small.ipynb
+++ b/analytics_small.ipynb
@@ -6,15 +6,274 @@
"metadata": {},
"outputs": [
{
- "ename": "ImportError",
- "evalue": "cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[1], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msparse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m csr_matrix\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Constants and functions\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_ratings\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_items\n",
- "\u001b[0;31mImportError\u001b[0m: cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Display The Movies : \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>title</th>\n",
+ " <th>genres</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>movieId</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>Grumpier Old Men (1995)</td>\n",
+ " <td>Comedy|Romance</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>15</th>\n",
+ " <td>Cutthroat Island (1995)</td>\n",
+ " <td>Action|Adventure|Romance</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>34</th>\n",
+ " <td>Babe (1995)</td>\n",
+ " <td>Children|Drama</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>59</th>\n",
+ " <td>Confessional, The (Confessionnal, Le) (1995)</td>\n",
+ " <td>Drama|Mystery</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>64</th>\n",
+ " <td>Two if by Sea (1996)</td>\n",
+ " <td>Comedy|Romance</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>148652</th>\n",
+ " <td>The Ridiculous 6 (2015)</td>\n",
+ " <td>Comedy|Western</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>151307</th>\n",
+ " <td>The Lovers and the Despot</td>\n",
+ " <td>(no genres listed)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>152173</th>\n",
+ " <td>Michael Jackson's Thriller (1983)</td>\n",
+ " <td>Horror</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>160440</th>\n",
+ " <td>The Maid's Room (2014)</td>\n",
+ " <td>Thriller</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>160656</th>\n",
+ " <td>Tallulah (2016)</td>\n",
+ " <td>Drama</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>912 rows × 2 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " title \\\n",
+ "movieId \n",
+ "3 Grumpier Old Men (1995) \n",
+ "15 Cutthroat Island (1995) \n",
+ "34 Babe (1995) \n",
+ "59 Confessional, The (Confessionnal, Le) (1995) \n",
+ "64 Two if by Sea (1996) \n",
+ "... ... \n",
+ "148652 The Ridiculous 6 (2015) \n",
+ "151307 The Lovers and the Despot \n",
+ "152173 Michael Jackson's Thriller (1983) \n",
+ "160440 The Maid's Room (2014) \n",
+ "160656 Tallulah (2016) \n",
+ "\n",
+ " genres \n",
+ "movieId \n",
+ "3 Comedy|Romance \n",
+ "15 Action|Adventure|Romance \n",
+ "34 Children|Drama \n",
+ "59 Drama|Mystery \n",
+ "64 Comedy|Romance \n",
+ "... ... \n",
+ "148652 Comedy|Western \n",
+ "151307 (no genres listed) \n",
+ "152173 Horror \n",
+ "160440 Thriller \n",
+ "160656 Drama \n",
+ "\n",
+ "[912 rows x 2 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Display The Ratings : \n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>userId</th>\n",
+ " <th>movieId</th>\n",
+ " <th>rating</th>\n",
+ " <th>timestamp</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>15</td>\n",
+ " <td>34</td>\n",
+ " <td>3.0</td>\n",
+ " <td>997938310</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>15</td>\n",
+ " <td>95</td>\n",
+ " <td>1.5</td>\n",
+ " <td>1093028331</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>15</td>\n",
+ " <td>101</td>\n",
+ " <td>4.0</td>\n",
+ " <td>1134522072</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>15</td>\n",
+ " <td>123</td>\n",
+ " <td>4.0</td>\n",
+ " <td>997938358</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>15</td>\n",
+ " <td>125</td>\n",
+ " <td>3.5</td>\n",
+ " <td>1245362506</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5291</th>\n",
+ " <td>665</td>\n",
+ " <td>3908</td>\n",
+ " <td>1.0</td>\n",
+ " <td>1046967201</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5292</th>\n",
+ " <td>665</td>\n",
+ " <td>4052</td>\n",
+ " <td>4.0</td>\n",
+ " <td>992838277</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5293</th>\n",
+ " <td>665</td>\n",
+ " <td>4351</td>\n",
+ " <td>4.0</td>\n",
+ " <td>992837743</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5294</th>\n",
+ " <td>665</td>\n",
+ " <td>4643</td>\n",
+ " <td>4.0</td>\n",
+ " <td>997239207</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5295</th>\n",
+ " <td>665</td>\n",
+ " <td>5502</td>\n",
+ " <td>4.0</td>\n",
+ " <td>1046967596</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>5296 rows × 4 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " userId movieId rating timestamp\n",
+ "0 15 34 3.0 997938310\n",
+ "1 15 95 1.5 1093028331\n",
+ "2 15 101 4.0 1134522072\n",
+ "3 15 123 4.0 997938358\n",
+ "4 15 125 3.5 1245362506\n",
+ "... ... ... ... ...\n",
+ "5291 665 3908 1.0 1046967201\n",
+ "5292 665 4052 4.0 992838277\n",
+ "5293 665 4351 4.0 992837743\n",
+ "5294 665 4643 4.0 997239207\n",
+ "5295 665 5502 4.0 1046967596\n",
+ "\n",
+ "[5296 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -52,7 +311,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -71,7 +330,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -96,7 +355,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -148,7 +407,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -167,7 +426,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -186,7 +445,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -206,7 +465,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -229,7 +488,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -252,7 +511,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -286,7 +545,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -316,7 +575,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -349,7 +608,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -366,7 +625,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -429,7 +688,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
diff --git a/content_based.ipynb b/content_based.ipynb
index df2d2bef..f62bfd86 100644
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,20 +10,16 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 9,
"id": "277473a3",
"metadata": {},
"outputs": [
{
- "ename": "ImportError",
- "evalue": "cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[1], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AlgoBase\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction_algorithms\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpredictions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PredictionImpossible\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_ratings\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_items\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n",
- "File \u001b[0;32m~/vscodeworkspace/recomsys/loaders.py:7\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Local imports\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Reader, Dataset\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_ratings\u001b[39m(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n",
- "\u001b[0;31mImportError\u001b[0m: cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
]
}
],
@@ -54,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "e8378976",
"metadata": {},
"outputs": [
@@ -147,7 +143,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "16b0a602",
"metadata": {},
"outputs": [],
@@ -184,14 +180,15 @@
" elif self.regressor_method == 'random_sample':\n",
" for u in self.user_profile:\n",
" self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
- " else:\n",
+ " elif self.regressor_method == 'linear_regression' :\n",
" for u in self.user_profile:\n",
"\n",
- " user_ratings = [(trainset.to_raw_iid(iid), rating) for (iid, rating) in trainset.ur[u]]\n",
+ " user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+ " item_ids = [iid for iid, _ in trainset.ur[u]]\n",
"\n",
- " df_user = pd.DataFrame(user_ratings, columns = [\"item_id\", \"user_ratings\"])\n",
+ " df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
"\n",
- " df_user[\"item_id\"] = df_user['item_id'].map(trainset.to_raw_idd)\n",
+ " df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
"\n",
" df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
"\n",
@@ -205,6 +202,8 @@
" \n",
" # Store the computed user profile\n",
" self.user_profile[u] = linear_regressor\n",
+ " else : \n",
+ " pass\n",
"\n",
" # (implement here the regressor fitting) \n",
" \n",
@@ -223,7 +222,7 @@
" rd.seed()\n",
" score = rd.choice(self.user_profile[u])\n",
" \n",
- " else:\n",
+ " elif self.regressor_method == 'linear_regression':\n",
"\n",
" raw_item_id = self.trainset.to_raw_iid(i)\n",
"\n",
@@ -232,11 +231,12 @@
" linear_regressor = self.user_profile[u]\n",
"\n",
" score= linear_regressor.predict(item_features)[0]\n",
- "\n",
+ " else : \n",
+ " score = None\n",
"\n",
" # (implement here the regressor prediction)\n",
"\n",
- " return score\n"
+ " return score"
]
},
{
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"id": "69d12f7d",
"metadata": {},
"outputs": [
@@ -257,8 +257,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "user: 15 item: 942 r_ui = None est = 3.59 {'was_impossible': False}\n",
- "user: 15 item: 942 r_ui = None est = 3.00 {'was_impossible': False}\n"
+ "user: 15 item: 942 r_ui = None est = 3.79 {'was_impossible': False}\n",
+ "user: 15 item: 942 r_ui = None est = 4.00 {'was_impossible': False}\n"
]
}
],
--
GitLab