diff --git a/analytics_small.ipynb b/analytics_small.ipynb index b41000c264e2cfe1eb455e18413f15bd6fb7d464..b6f7494f9dcd736b78efecb0128a9476936d4754 100644 --- a/analytics_small.ipynb +++ b/analytics_small.ipynb @@ -6,15 +6,274 @@ "metadata": {}, "outputs": [ { - "ename": "ImportError", - "evalue": "cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msparse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m csr_matrix\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Constants and functions\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_ratings\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_items\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)" + "name": "stdout", + "output_type": "stream", + "text": [ + "Display The Movies : \n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>title</th>\n", + " <th>genres</th>\n", + " </tr>\n", + " <tr>\n", + " <th>movieId</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Grumpier Old Men (1995)</td>\n", + " <td>Comedy|Romance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>Cutthroat Island (1995)</td>\n", + " <td>Action|Adventure|Romance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>Babe (1995)</td>\n", + " <td>Children|Drama</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>Confessional, The (Confessionnal, Le) (1995)</td>\n", + " <td>Drama|Mystery</td>\n", + " </tr>\n", + " <tr>\n", + " <th>64</th>\n", + " <td>Two if by Sea (1996)</td>\n", + " <td>Comedy|Romance</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>148652</th>\n", + " <td>The Ridiculous 6 (2015)</td>\n", + " <td>Comedy|Western</td>\n", + " </tr>\n", + " <tr>\n", + " <th>151307</th>\n", + " <td>The Lovers and the Despot</td>\n", + " <td>(no genres listed)</td>\n", + " </tr>\n", + " <tr>\n", + " <th>152173</th>\n", + " <td>Michael Jackson's Thriller (1983)</td>\n", + " <td>Horror</td>\n", + " </tr>\n", + " <tr>\n", + " <th>160440</th>\n", + " <td>The Maid's Room (2014)</td>\n", + " <td>Thriller</td>\n", + " </tr>\n", + " <tr>\n", + " <th>160656</th>\n", + " <td>Tallulah (2016)</td>\n", + " <td>Drama</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>912 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " title \\\n", + "movieId \n", + "3 Grumpier Old Men (1995) \n", + "15 Cutthroat Island (1995) \n", + "34 Babe (1995) \n", + "59 Confessional, The (Confessionnal, Le) (1995) \n", + "64 Two if by Sea (1996) \n", + "... ... \n", + "148652 The Ridiculous 6 (2015) \n", + "151307 The Lovers and the Despot \n", + "152173 Michael Jackson's Thriller (1983) \n", + "160440 The Maid's Room (2014) \n", + "160656 Tallulah (2016) \n", + "\n", + " genres \n", + "movieId \n", + "3 Comedy|Romance \n", + "15 Action|Adventure|Romance \n", + "34 Children|Drama \n", + "59 Drama|Mystery \n", + "64 Comedy|Romance \n", + "... ... \n", + "148652 Comedy|Western \n", + "151307 (no genres listed) \n", + "152173 Horror \n", + "160440 Thriller \n", + "160656 Drama \n", + "\n", + "[912 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Display The Ratings : \n" ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>userId</th>\n", + " <th>movieId</th>\n", + " <th>rating</th>\n", + " <th>timestamp</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>15</td>\n", + " <td>34</td>\n", + " <td>3.0</td>\n", + " <td>997938310</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>15</td>\n", + " <td>95</td>\n", + " <td>1.5</td>\n", + " <td>1093028331</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>15</td>\n", + " <td>101</td>\n", + " <td>4.0</td>\n", + " <td>1134522072</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>15</td>\n", + " <td>123</td>\n", + " <td>4.0</td>\n", + " <td>997938358</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>15</td>\n", + " <td>125</td>\n", + " <td>3.5</td>\n", + " <td>1245362506</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5291</th>\n", + " <td>665</td>\n", + " <td>3908</td>\n", + " <td>1.0</td>\n", + " <td>1046967201</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5292</th>\n", + " <td>665</td>\n", + " <td>4052</td>\n", + " <td>4.0</td>\n", + " <td>992838277</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5293</th>\n", + " <td>665</td>\n", + " <td>4351</td>\n", + " <td>4.0</td>\n", + " <td>992837743</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5294</th>\n", + " <td>665</td>\n", + " <td>4643</td>\n", + " <td>4.0</td>\n", + " <td>997239207</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5295</th>\n", + " <td>665</td>\n", + " <td>5502</td>\n", + " <td>4.0</td>\n", + " <td>1046967596</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5296 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " userId movieId rating timestamp\n", + "0 15 34 3.0 997938310\n", + "1 15 95 1.5 1093028331\n", + "2 15 101 4.0 1134522072\n", + "3 15 123 4.0 997938358\n", + "4 15 125 3.5 1245362506\n", + "... ... ... ... ...\n", + "5291 665 3908 1.0 1046967201\n", + "5292 665 4052 4.0 992838277\n", + "5293 665 4351 4.0 992837743\n", + "5294 665 4643 4.0 997239207\n", + "5295 665 5502 4.0 1046967596\n", + "\n", + "[5296 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -52,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -71,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -96,7 +355,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -148,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -167,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -186,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -206,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -229,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -252,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -286,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -316,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -349,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -366,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -429,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { diff --git a/content_based.ipynb b/content_based.ipynb index df2d2bef0162939075e729f8cebb02c356571c8f..f62bfd8653c5ac072f750fa1eb0f5bafb4f25338 100644 --- a/content_based.ipynb +++ b/content_based.ipynb @@ -10,20 +10,16 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "id": "277473a3", "metadata": {}, "outputs": [ { - "ename": "ImportError", - "evalue": "cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AlgoBase\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprediction_algorithms\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpredictions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PredictionImpossible\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_ratings\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mloaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_items\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n", - "File \u001b[0;32m~/vscodeworkspace/recomsys/loaders.py:7\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Local imports\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Constant \u001b[38;5;28;01mas\u001b[39;00m C\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msurprise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Reader, Dataset\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_ratings\u001b[39m(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'Constant' from 'constants' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/constants.py)" + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" ] } ], @@ -54,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "e8378976", "metadata": {}, "outputs": [ @@ -147,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "16b0a602", "metadata": {}, "outputs": [], @@ -184,14 +180,15 @@ " elif self.regressor_method == 'random_sample':\n", " for u in self.user_profile:\n", " self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n", - " else:\n", + " elif self.regressor_method == 'linear_regression' :\n", " for u in self.user_profile:\n", "\n", - " user_ratings = [(trainset.to_raw_iid(iid), rating) for (iid, rating) in trainset.ur[u]]\n", + " user_ratings = [rating for _, rating in trainset.ur[u]]\n", + " item_ids = [iid for iid, _ in trainset.ur[u]]\n", "\n", - " df_user = pd.DataFrame(user_ratings, columns = [\"item_id\", \"user_ratings\"])\n", + " df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n", "\n", - " df_user[\"item_id\"] = df_user['item_id'].map(trainset.to_raw_idd)\n", + " df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n", "\n", " df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n", "\n", @@ -205,6 +202,8 @@ " \n", " # Store the computed user profile\n", " self.user_profile[u] = linear_regressor\n", + " else : \n", + " pass\n", "\n", " # (implement here the regressor fitting) \n", " \n", @@ -223,7 +222,7 @@ " rd.seed()\n", " score = rd.choice(self.user_profile[u])\n", " \n", - " else:\n", + " elif self.regressor_method == 'linear_regression':\n", "\n", " raw_item_id = self.trainset.to_raw_iid(i)\n", "\n", @@ -232,11 +231,12 @@ " linear_regressor = self.user_profile[u]\n", "\n", " score= linear_regressor.predict(item_features)[0]\n", - "\n", + " else : \n", + " score = None\n", "\n", " # (implement here the regressor prediction)\n", "\n", - " return score\n" + " return score" ] }, { @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "69d12f7d", "metadata": {}, "outputs": [ @@ -257,8 +257,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "user: 15 item: 942 r_ui = None est = 3.59 {'was_impossible': False}\n", - "user: 15 item: 942 r_ui = None est = 3.00 {'was_impossible': False}\n" + "user: 15 item: 942 r_ui = None est = 3.79 {'was_impossible': False}\n", + "user: 15 item: 942 r_ui = None est = 4.00 {'was_impossible': False}\n" ] } ],