From 46bea813a55364e375815c30fa216dfd74dfea8d Mon Sep 17 00:00:00 2001 From: Adrien <adrien.payen@student.uclouvain.be> Date: Fri, 10 May 2024 21:17:31 +0200 Subject: [PATCH] update UserBased --- user_based.ipynb | 57 ++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/user_based.ipynb b/user_based.ipynb index 7a8d85b2..124aa3d8 100644 --- a/user_based.ipynb +++ b/user_based.ipynb @@ -11,28 +11,35 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 91, "id": "00d1b249", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "# reloads modules automatically before entering the execution of code\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", - "# standard library imports\n", - "# -- add new imports here --\n", - "\n", - "# third parties imports\n", + "# Standard library imports\n", "import numpy as np \n", "import pandas as pd\n", - "# -- add new imports here --\n", "\n", - "# local imports\n", + "# Others imports\n", + "from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible, KNNBasic\n", + "import heapq\n", + "\n", + "# Local imports\n", "from constants import Constant as C\n", - "from loaders import load_ratings\n", - "from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible,KNNBasic\n", - "import heapq" + "from loaders import load_ratings" ] }, { @@ -46,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 92, "id": "aafd1712", "metadata": {}, "outputs": [], @@ -70,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 93, "id": "ce078b43", "metadata": {}, "outputs": [ @@ -103,12 +110,13 @@ "uid = 11 # raw user id (as in the ratings file). They are **strings**!\n", "iid = 364 \n", "\n", - "pred = knn_model.predict(uid, iid, verbose=True)" + "pred = knn_model.predict(uid, iid)\n", + "print(pred)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 94, "id": "ffe89c56", "metadata": {}, "outputs": [ @@ -245,7 +253,7 @@ "id": "c8890e11", "metadata": {}, "source": [ - "he change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.\n", + "The change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.\n", "\n", "Initially, with min_k = 1, predictions are generated even if only a single similar user (neighbor) has rated a particular item. This approach can lead to predictions that might not accurately represent the item's true rating, especially if the rating from the sole available neighbor is an outlier or not representative of the broader user preferences.\n", "\n", @@ -262,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 95, "id": "cc806424", "metadata": {}, "outputs": [ @@ -443,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 96, "id": "d03ed9eb", "metadata": {}, "outputs": [ @@ -476,6 +484,7 @@ " self.k = k\n", " self.min_k = min_k\n", " self.sim_options = sim_options\n", + " \n", "\n", " def fit(self, trainset):\n", " \"\"\"\n", @@ -508,10 +517,10 @@ "\n", " # Step 1: Create the peer group of user u for item i\n", " peer_group = []\n", - " for j, rating in enumerate(self.trainset.ir[i]):\n", + " for neighbor_inner_id, rating in enumerate(self.trainset.ir[i]):\n", " if rating is not None:\n", - " similarity = self.sim[u, j] # Similarity between user u and user j for item i\n", - " peer_group.append((j, similarity, rating))\n", + " similarity = self.sim[u, neighbor_inner_id] # Similarity between user u and user j for item i\n", + " peer_group.append((neighbor_inner_id, similarity, rating))\n", "\n", " # Step 2: Pick up the top neighbors efficiently\n", " k_neighbors = heapq.nlargest(self.min_k, peer_group, key=lambda x: x[1]) # Top k neighbors based on similarity\n", @@ -608,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 97, "id": "7a9147ea", "metadata": {}, "outputs": [ @@ -719,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 98, "id": "be53ae27", "metadata": {}, "outputs": [ @@ -775,7 +784,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 99, "id": "c20d8e19", "metadata": {}, "outputs": [ -- GitLab