update UserBased

46bea813 · Adrien Payen · 4c4184c2 · 46bea813
--- a/user_based.ipynb
+++ b/user_based.ipynb
@@ -11,28 +11,35 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 91,
   "id": "00d1b249",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
   "source": [
    "# reloads modules automatically before entering the execution of code\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
-    "# standard library imports\n",
+    "# Standard library imports\n",
-    "# -- add new imports here --\n",
-    "\n",
-    "# third parties imports\n",
    "import numpy as np \n",
    "import pandas as pd\n",
-    "# -- add new imports here --\n",
    "\n",
-    "# local imports\n",
+    "# Others imports\n",
+    "from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible, KNNBasic\n",
+    "import heapq\n",
+    "\n",
+    "# Local imports\n",
    "from constants import Constant as C\n",
-    "from loaders import load_ratings\n",
+    "from loaders import load_ratings"
-    "from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible,KNNBasic\n",
-    "import heapq"
   ]
  },
  {
@@ -46,7 +53,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 92,
   "id": "aafd1712",
   "metadata": {},
   "outputs": [],
@@ -70,7 +77,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 93,
   "id": "ce078b43",
   "metadata": {},
   "outputs": [
@@ -103,12 +110,13 @@
    "uid = 11  # raw user id (as in the ratings file). They are **strings**!\n",
    "iid = 364 \n",
    "\n",
-    "pred = knn_model.predict(uid, iid, verbose=True)"
+    "pred = knn_model.predict(uid, iid)\n",
+    "print(pred)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 94,
   "id": "ffe89c56",
   "metadata": {},
   "outputs": [
@@ -245,7 +253,7 @@
   "id": "c8890e11",
   "metadata": {},
   "source": [
-    "he change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.\n",
+    "The change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.\n",
    "\n",
    "Initially, with min_k = 1, predictions are generated even if only a single similar user (neighbor) has rated a particular item. This approach can lead to predictions that might not accurately represent the item's true rating, especially if the rating from the sole available neighbor is an outlier or not representative of the broader user preferences.\n",
    "\n",
@@ -262,7 +270,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 95,
   "id": "cc806424",
   "metadata": {},
   "outputs": [
@@ -443,7 +451,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 96,
   "id": "d03ed9eb",
   "metadata": {},
   "outputs": [
@@ -476,6 +484,7 @@
    "        self.k = k\n",
    "        self.min_k = min_k\n",
    "        self.sim_options = sim_options\n",
+    "        \n",
    "\n",
    "    def fit(self, trainset):\n",
    "        \"\"\"\n",
@@ -508,10 +517,10 @@
    "\n",
    "        # Step 1: Create the peer group of user u for item i\n",
    "        peer_group = []\n",
-    "        for j, rating in enumerate(self.trainset.ir[i]):\n",
+    "        for neighbor_inner_id, rating in enumerate(self.trainset.ir[i]):\n",
    "            if rating is not None:\n",
-    "                similarity = self.sim[u, j]  # Similarity between user u and user j for item i\n",
+    "                similarity = self.sim[u, neighbor_inner_id]  # Similarity between user u and user j for item i\n",
-    "                peer_group.append((j, similarity, rating))\n",
+    "                peer_group.append((neighbor_inner_id, similarity, rating))\n",
    "\n",
    "        # Step 2: Pick up the top neighbors efficiently\n",
    "        k_neighbors = heapq.nlargest(self.min_k, peer_group, key=lambda x: x[1])  # Top k neighbors based on similarity\n",
@@ -608,7 +617,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 97,
   "id": "7a9147ea",
   "metadata": {},
   "outputs": [
@@ -719,7 +728,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 98,
   "id": "be53ae27",
   "metadata": {},
   "outputs": [
@@ -775,7 +784,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 99,
   "id": "c20d8e19",
   "metadata": {},
   "outputs": [

 %% Cell type:markdown id:f4a8f664 tags:
 # Custom User-based Model
 The present notebooks aims at creating a UserBased class that inherits from the Algobase class (surprise package) and that can be customized with various similarity metrics, peer groups and score aggregation functions.
 %% Cell type:code id:00d1b249 tags:
 ``` python
 # reloads modules automatically before entering the execution of code
 %load_ext autoreload
 %autoreload 2
-# standard library imports
+# Standard library imports
-# -- add new imports here --
-# third parties imports
 import numpy as np
 import pandas as pd
-# -- add new imports here --
-# local imports
+# Others imports
+from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible, KNNBasic
+import heapq
+# Local imports
 from constants import Constant as C
 from loaders import load_ratings
-from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible,KNNBasic
-import heapq
 ```
+%% Output
+    The autoreload extension is already loaded. To reload it, use:
+      %reload_ext autoreload
 %% Cell type:markdown id:22716aa3 tags:
 # 1. Loading Data
 Prepare a dataset in order to help implementing a user-based recommender system
 %% Cell type:code id:aafd1712 tags:
 ``` python
 # Create Surprise Dataset from the pandas DataFrame and Reader
 surprise_data = load_ratings(surprise_format=True)
 trainset = surprise_data.build_full_trainset()
 testset = trainset.build_anti_testset()
 ```
 %% Cell type:markdown id:94adf3a6 tags:
 # 2. Explore Surprise's user-based algorithm
 Displays user-based predictions and similarity matrix on the test dataset using the KNNWithMeans class
 %% Cell type:code id:ce078b43 tags:
 ``` python
 #User-based prediction for the user 11 and the item 364
 sim_options = {
    'name': 'msd',  # Mean Squared Difference (Mean Square Error)
    'user_based': True,  # User-based collaborative filtering
    'min_support': 3  # Minimum number of common ratings required
 }
 # Build an algorithm, and train it.
 knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=2)
 knn_model.fit(trainset)
 knn_model.test(testset)
 uid = 11  # raw user id (as in the ratings file). They are **strings**!
 iid = 364
-pred = knn_model.predict(uid, iid, verbose=True)
+pred = knn_model.predict(uid, iid)
+print(pred)
 ```
 %% Output
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    user: 11         item: 364        r_ui = None   est = 2.49   {'actual_k': 2, 'was_impossible': False}
 %% Cell type:code id:ffe89c56 tags:
 ``` python
 # Playing with KNN
 # Create an instance of KNNWithMeans with the specified options
 def predict_ratings(trainset, testset, min_k_values):
    for min_k in min_k_values:
        knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=min_k)
        # Train the algorithm on the trainset
        knn_model.fit(trainset)
        # Make predictions for all ratings in the anti testset
        predictions = knn_model.test(testset)
        # Display 30 predictions
        print(f"Predictions with min_k = {min_k}:")
        for prediction in predictions[:30]:
            print(f"User: {prediction.uid}, Item: {prediction.iid}, Rating: {prediction.est}")
 # Assuming trainset and testset are already defined
 predict_ratings(trainset, testset, min_k_values=[1, 2, 3])
 ```
 %% Output
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    Predictions with min_k = 1:
    User: 11, Item: 1214, Rating: 3.6041666666666665
    User: 11, Item: 364, Rating: 2.49203431372549
    User: 11, Item: 4308, Rating: 1.6041666666666667
    User: 11, Item: 527, Rating: 3.898897058823529
    User: 13, Item: 1997, Rating: 2.8
    User: 13, Item: 4993, Rating: 3.2375
    User: 13, Item: 2700, Rating: 2.8
    User: 13, Item: 1721, Rating: 1.2374999999999998
    User: 13, Item: 527, Rating: 3.2375
    User: 17, Item: 2028, Rating: 3.8125
    User: 17, Item: 4993, Rating: 4.128289473684211
    User: 17, Item: 1214, Rating: 3.6875
    User: 17, Item: 4308, Rating: 1.6875
    User: 19, Item: 1997, Rating: 3.5
    User: 19, Item: 2028, Rating: 3.5
    User: 19, Item: 4993, Rating: 3.5
    User: 19, Item: 5952, Rating: 3.5
    User: 19, Item: 2700, Rating: 3.5
    User: 19, Item: 1721, Rating: 3.5
    User: 19, Item: 1214, Rating: 3.5
    User: 19, Item: 364, Rating: 3.5
    User: 23, Item: 1997, Rating: 2.782649253731343
    User: 23, Item: 2700, Rating: 2.349813432835821
    User: 27, Item: 1997, Rating: 4.666666666666667
    User: 27, Item: 2028, Rating: 5.0
    User: 27, Item: 5952, Rating: 5.0
    User: 27, Item: 2700, Rating: 4.666666666666667
    User: 27, Item: 1721, Rating: 3.104166666666667
    User: 27, Item: 364, Rating: 4.604166666666667
    User: 27, Item: 4308, Rating: 3.104166666666667
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    Predictions with min_k = 2:
    User: 11, Item: 1214, Rating: 3.1666666666666665
    User: 11, Item: 364, Rating: 2.49203431372549
    User: 11, Item: 4308, Rating: 3.1666666666666665
    User: 11, Item: 527, Rating: 3.898897058823529
    User: 13, Item: 1997, Rating: 2.8
    User: 13, Item: 4993, Rating: 2.8
    User: 13, Item: 2700, Rating: 2.8
    User: 13, Item: 1721, Rating: 2.8
    User: 13, Item: 527, Rating: 2.8
    User: 17, Item: 2028, Rating: 3.8125
    User: 17, Item: 4993, Rating: 4.128289473684211
    User: 17, Item: 1214, Rating: 3.25
    User: 17, Item: 4308, Rating: 3.25
    User: 19, Item: 1997, Rating: 3.5
    User: 19, Item: 2028, Rating: 3.5
    User: 19, Item: 4993, Rating: 3.5
    User: 19, Item: 5952, Rating: 3.5
    User: 19, Item: 2700, Rating: 3.5
    User: 19, Item: 1721, Rating: 3.5
    User: 19, Item: 1214, Rating: 3.5
    User: 19, Item: 364, Rating: 3.5
    User: 23, Item: 1997, Rating: 2.782649253731343
    User: 23, Item: 2700, Rating: 2.349813432835821
    User: 27, Item: 1997, Rating: 4.666666666666667
    User: 27, Item: 2028, Rating: 4.666666666666667
    User: 27, Item: 5952, Rating: 4.666666666666667
    User: 27, Item: 2700, Rating: 4.666666666666667
    User: 27, Item: 1721, Rating: 4.666666666666667
    User: 27, Item: 364, Rating: 4.666666666666667
    User: 27, Item: 4308, Rating: 4.666666666666667
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    Predictions with min_k = 3:
    User: 11, Item: 1214, Rating: 3.1666666666666665
    User: 11, Item: 364, Rating: 3.1666666666666665
    User: 11, Item: 4308, Rating: 3.1666666666666665
    User: 11, Item: 527, Rating: 3.1666666666666665
    User: 13, Item: 1997, Rating: 2.8
    User: 13, Item: 4993, Rating: 2.8
    User: 13, Item: 2700, Rating: 2.8
    User: 13, Item: 1721, Rating: 2.8
    User: 13, Item: 527, Rating: 2.8
    User: 17, Item: 2028, Rating: 3.25
    User: 17, Item: 4993, Rating: 3.25
    User: 17, Item: 1214, Rating: 3.25
    User: 17, Item: 4308, Rating: 3.25
    User: 19, Item: 1997, Rating: 3.5
    User: 19, Item: 2028, Rating: 3.5
    User: 19, Item: 4993, Rating: 3.5
    User: 19, Item: 5952, Rating: 3.5
    User: 19, Item: 2700, Rating: 3.5
    User: 19, Item: 1721, Rating: 3.5
    User: 19, Item: 1214, Rating: 3.5
    User: 19, Item: 364, Rating: 3.5
    User: 23, Item: 1997, Rating: 2.5625
    User: 23, Item: 2700, Rating: 2.5625
    User: 27, Item: 1997, Rating: 4.666666666666667
    User: 27, Item: 2028, Rating: 4.666666666666667
    User: 27, Item: 5952, Rating: 4.666666666666667
    User: 27, Item: 2700, Rating: 4.666666666666667
    User: 27, Item: 1721, Rating: 4.666666666666667
    User: 27, Item: 364, Rating: 4.666666666666667
    User: 27, Item: 4308, Rating: 4.666666666666667
 %% Cell type:markdown id:c8890e11 tags:
-he change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.
+The change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.
 Initially, with min_k = 1, predictions are generated even if only a single similar user (neighbor) has rated a particular item. This approach can lead to predictions that might not accurately represent the item's true rating, especially if the rating from the sole available neighbor is an outlier or not representative of the broader user preferences.
 For example, consider User 11's ratings for items like 1214 and 364. Under min_k = 1, the predictions were 3.604 and 2.492, respectively. However, when min_k is increased to 3, these ratings adjust downwards to 3.166 for both items. This adjustment indicates that the initial ratings might have been influenced by only a few ratings from similar users, which can lead to more volatile or less reliable predictions.
 Similarly, for User 23's ratings on items 1997 and 2700, transitioning from min_k = 1 to min_k = 3 results in downward adjustments from 2.782 and 2.349 to 2.5625 for both items. This change suggests that the initial ratings might have been based on limited or potentially biased data, prompting a more conservative reassessment under min_k = 3.
 The rationale behind this change lies in the nature of the min_k parameter. Increasing min_k to 3 requires a more robust set of similar users (at least 3) to have rated an item before a prediction is made. This adjustment aims to provide more stable and reliable predictions by relying on a broader consensus among users with similar preferences.
 By enforcing a higher min_k, the system adopts a more cautious approach to estimating ratings, particularly for items with sparse or potentially biased rating data. This approach helps mitigate the impact of outliers or sparse data in the recommendation system, leading to more consistent and credible predictions overall.
 In summary, adjusting the min_k parameter from 1 to 3 signifies a shift towards more conservative and reliable estimates of item ratings within the recommendation system. This adjustment aims to enhance the accuracy and robustness of the system's predictions by requiring a broader consensus among similar users before making predictions, thereby improving the overall quality and reliability of recommendations provided to users.
 %% Cell type:code id:cc806424 tags:
 ``` python
 def analyse_min_support(knn_model, testset):
    # Reset min_k to 2
    knn_model.min_k = 2
    # Modify min_support from 1 to 3 and observe actual_k
    for min_support in range(1, 4):
        knn_model.sim_options['min_support'] = min_support
        predictions_min_support = knn_model.test(testset[:30])  # Take the first 30 predictions for display
        print(f"\nPredictions with min_support = {min_support}:")
        for prediction in predictions_min_support:
            actual_k = prediction.details['actual_k']
            print(f"User: {prediction.uid}, Item: {prediction.iid}, Actual_k: {actual_k}")
    # Visualize the similarity matrix
    similarity_matrix = knn_model.sim  # Algorithm of knn_model
    print("\nSimilarity Matrix:")
    return similarity_matrix
 # Call the function and print the analysis
 result = analyse_min_support(knn_model, testset)
 print(result)
 ```
 %% Output
    Predictions with min_support = 1:
    User: 11, Item: 1214, Actual_k: 1
    User: 11, Item: 364, Actual_k: 2
    User: 11, Item: 4308, Actual_k: 1
    User: 11, Item: 527, Actual_k: 2
    User: 13, Item: 1997, Actual_k: 0
    User: 13, Item: 4993, Actual_k: 1
    User: 13, Item: 2700, Actual_k: 0
    User: 13, Item: 1721, Actual_k: 1
    User: 13, Item: 527, Actual_k: 1
    User: 17, Item: 2028, Actual_k: 2
    User: 17, Item: 4993, Actual_k: 2
    User: 17, Item: 1214, Actual_k: 1
    User: 17, Item: 4308, Actual_k: 1
    User: 19, Item: 1997, Actual_k: 0
    User: 19, Item: 2028, Actual_k: 0
    User: 19, Item: 4993, Actual_k: 0
    User: 19, Item: 5952, Actual_k: 0
    User: 19, Item: 2700, Actual_k: 0
    User: 19, Item: 1721, Actual_k: 0
    User: 19, Item: 1214, Actual_k: 0
    User: 19, Item: 364, Actual_k: 0
    User: 23, Item: 1997, Actual_k: 2
    User: 23, Item: 2700, Actual_k: 2
    User: 27, Item: 1997, Actual_k: 0
    User: 27, Item: 2028, Actual_k: 1
    User: 27, Item: 5952, Actual_k: 1
    User: 27, Item: 2700, Actual_k: 0
    User: 27, Item: 1721, Actual_k: 1
    User: 27, Item: 364, Actual_k: 1
    User: 27, Item: 4308, Actual_k: 1
    Predictions with min_support = 2:
    User: 11, Item: 1214, Actual_k: 1
    User: 11, Item: 364, Actual_k: 2
    User: 11, Item: 4308, Actual_k: 1
    User: 11, Item: 527, Actual_k: 2
    User: 13, Item: 1997, Actual_k: 0
    User: 13, Item: 4993, Actual_k: 1
    User: 13, Item: 2700, Actual_k: 0
    User: 13, Item: 1721, Actual_k: 1
    User: 13, Item: 527, Actual_k: 1
    User: 17, Item: 2028, Actual_k: 2
    User: 17, Item: 4993, Actual_k: 2
    User: 17, Item: 1214, Actual_k: 1
    User: 17, Item: 4308, Actual_k: 1
    User: 19, Item: 1997, Actual_k: 0
    User: 19, Item: 2028, Actual_k: 0
    User: 19, Item: 4993, Actual_k: 0
    User: 19, Item: 5952, Actual_k: 0
    User: 19, Item: 2700, Actual_k: 0
    User: 19, Item: 1721, Actual_k: 0
    User: 19, Item: 1214, Actual_k: 0
    User: 19, Item: 364, Actual_k: 0
    User: 23, Item: 1997, Actual_k: 2
    User: 23, Item: 2700, Actual_k: 2
    User: 27, Item: 1997, Actual_k: 0
    User: 27, Item: 2028, Actual_k: 1
    User: 27, Item: 5952, Actual_k: 1
    User: 27, Item: 2700, Actual_k: 0
    User: 27, Item: 1721, Actual_k: 1
    User: 27, Item: 364, Actual_k: 1
    User: 27, Item: 4308, Actual_k: 1
    Predictions with min_support = 3:
    User: 11, Item: 1214, Actual_k: 1
    User: 11, Item: 364, Actual_k: 2
    User: 11, Item: 4308, Actual_k: 1
    User: 11, Item: 527, Actual_k: 2
    User: 13, Item: 1997, Actual_k: 0
    User: 13, Item: 4993, Actual_k: 1
    User: 13, Item: 2700, Actual_k: 0
    User: 13, Item: 1721, Actual_k: 1
    User: 13, Item: 527, Actual_k: 1
    User: 17, Item: 2028, Actual_k: 2
    User: 17, Item: 4993, Actual_k: 2
    User: 17, Item: 1214, Actual_k: 1
    User: 17, Item: 4308, Actual_k: 1
    User: 19, Item: 1997, Actual_k: 0
    User: 19, Item: 2028, Actual_k: 0
    User: 19, Item: 4993, Actual_k: 0
    User: 19, Item: 5952, Actual_k: 0
    User: 19, Item: 2700, Actual_k: 0
    User: 19, Item: 1721, Actual_k: 0
    User: 19, Item: 1214, Actual_k: 0
    User: 19, Item: 364, Actual_k: 0
    User: 23, Item: 1997, Actual_k: 2
    User: 23, Item: 2700, Actual_k: 2
    User: 27, Item: 1997, Actual_k: 0
    User: 27, Item: 2028, Actual_k: 1
    User: 27, Item: 5952, Actual_k: 1
    User: 27, Item: 2700, Actual_k: 0
    User: 27, Item: 1721, Actual_k: 1
    User: 27, Item: 364, Actual_k: 1
    User: 27, Item: 4308, Actual_k: 1
    Similarity Matrix:
    [[1.         0.         0.24615385 0.         0.43243243 0.        ]
     [0.         1.         0.         0.         0.17094017 0.        ]
     [0.24615385 0.         1.         0.         0.53333333 0.        ]
     [0.         0.         0.         1.         0.         0.        ]
     [0.43243243 0.17094017 0.53333333 0.         1.         0.25      ]
     [0.         0.         0.         0.         0.25       1.        ]]
 %% Cell type:markdown id:9fcc525d tags:
 Predictions with min_support = 1:
 The actual_k values vary across different predictions. For instance, for User 11 and Item 1214, actual_k is 1, indicating that only one neighbor was used to estimate this prediction. Conversely, for predictions like User 11 with Item 364, actual_k is 2, indicating that two neighbors were considered in the estimation.
 Predictions with min_support = 2 and min_support = 3:
 Increasing the min_support threshold to 2 or 3 doesn't significantly alter the actual_k values compared to predictions with min_support = 1. This suggests that for most predictions, the actual number of neighbors (actual_k) involved in the estimation remains relatively consistent.
 Understanding actual_k:
 actual_k represents the real number of neighbors (similar users) that were taken into account to estimate the rating of a specific item for a given user. A higher actual_k indicates that more neighbors were involved in the prediction, potentially leading to more robust and reliable estimations of ratings.
 Regarding the similarity matrix (algo.sim):
 ########################################## similarity matrix ##########################################
 The similarity matrix provides an overview of the similarities between users. Each element in the matrix represents the similarity score between two users, where higher values indicate greater similarity. For example, a similarity coefficient of 1 on the main diagonal indicates maximum similarity of a user with themselves.
 This similarity matrix is crucial in the recommendation process to identify users who are most similar to a given user, enabling the system to weight ratings effectively and produce personalized and relevant predictions.
 In summary, by adjusting parameters like min_support, we control how predictions are computed using data from similar neighbors, while the similarity matrix offers insights into user similarities that are fundamental for the effective functioning of collaborative filtering-based recommendation systems.
 %% Cell type:markdown id:2dd01f5b tags:
 # 3. Implement and explore a customizable user-based algorithm
 Create a self-made user-based algorithm allowing to customize the similarity metric, peer group calculation and aggregation function
 %% Cell type:code id:d03ed9eb tags:
 ``` python
 class UserBased(AlgoBase):
    def __init__(self, k=3, min_k=1, sim_options={}, **kwargs):
        """
        Initialize the UserBased collaborative filtering algorithm.
        Args:
            k (int): Number of neighbors to consider (default: 3).
            min_k (int): Minimum number of neighbors required to make predictions (default: 1).
            sim_options (dict): Options for similarity computation (default: {}).
            **kwargs: Additional keyword arguments.
        """
        AlgoBase.__init__(self, sim_options=sim_options, **kwargs)
        self.k = k
        self.min_k = min_k
        self.sim_options = sim_options
    def fit(self, trainset):
        """
        Fit the UserBased collaborative filtering model on the training set.
        Args:
            trainset (Trainset): Training dataset containing user-item ratings.
        """
        AlgoBase.fit(self, trainset)
        self.compute_rating_matrix()
        self.compute_similarity_matrix()
        self.compute_mean_ratings()
    def estimate(self, u, i):
        """
        Predict the rating for user `u` on item `i`.
        Args:
            u (int): User ID.
            i (int): Item ID.
        Returns:
            float: Predicted rating for user `u` on item `i`.
        """
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')
        estimate = self.mean_ratings[u]
        # Step 1: Create the peer group of user u for item i
        peer_group = []
-        for j, rating in enumerate(self.trainset.ir[i]):
+        for neighbor_inner_id, rating in enumerate(self.trainset.ir[i]):
            if rating is not None:
-                similarity = self.sim[u, j]  # Similarity between user u and user j for item i
+                similarity = self.sim[u, neighbor_inner_id]  # Similarity between user u and user j for item i
-                peer_group.append((j, similarity, rating))
+                peer_group.append((neighbor_inner_id, similarity, rating))
        # Step 2: Pick up the top neighbors efficiently
        k_neighbors = heapq.nlargest(self.min_k, peer_group, key=lambda x: x[1])  # Top k neighbors based on similarity
        # Step 3: Compute the weighted average
        actual_k = len(k_neighbors)
        if actual_k >= self.min_k:
            weighted_sum = 0
            total_similarity = 0
            for j, similarity, rating_list in k_neighbors:
                # Assuming rating_list is a list or array containing ratings
                rating = rating_list[0]  # Access the first element of the rating list
                weighted_sum += similarity * rating
                total_similarity += similarity
            if total_similarity != 0:
                peer_group_average = weighted_sum / total_similarity
                estimate += peer_group_average
        return estimate
    def compute_rating_matrix(self):
        """
        Compute the ratings matrix from the training set.
        """
        n_users = self.trainset.n_users
        n_items = self.trainset.n_items
        ratings_matrix = np.empty((n_users, n_items))
        ratings_matrix[:] = np.nan
        for uiid in range(n_users):
            user_ratings = self.trainset.ur[uiid]
            if user_ratings:
                for item_id, rating in user_ratings:
                    ratings_matrix[uiid, item_id] = rating
        self.ratings_matrix = ratings_matrix
    def compute_similarity_matrix(self):
        """
        Compute the similarity matrix based on user ratings.
        """
        n_users = self.trainset.n_users
        similarity_matrix = np.eye(n_users)
        for i in range(n_users):
            for j in range(i + 1, n_users):
                support = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j]))
                if support >= self.min_k:
                    intersection = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j]))
                    union = np.sum(~np.isnan(self.ratings_matrix[i]) | ~np.isnan(self.ratings_matrix[j]))
                    similarity = intersection / union
                    similarity_matrix[i, j] = similarity
                    similarity_matrix[j, i] = similarity
        self.sim = similarity_matrix
    def compute_mean_ratings(self):
        """
        Compute the mean ratings for each user.
        """
        n_users = self.trainset.n_users
        mean_ratings = []
        for uiid in range(n_users):
            user_ratings = self.trainset.ur[uiid]
            if user_ratings:
                mean_rating = np.mean([uiid[1] for uiid in user_ratings])
                mean_ratings.append(mean_rating)
            else:
                mean_ratings.append(0)
        self.mean_ratings = mean_ratings
 # Create an instance of UserBased collaborative filtering
 user_based_instance = UserBased(trainset=trainset)
 # Fit the model to calculate rating, similarity, and mean rating matrices
 user_based_instance.fit(trainset)
 # Display the ratings matrix
 print(user_based_instance.ratings_matrix)
 ```
 %% Output
    [[1.5 4.  5.  4.5 3.  1.  nan nan nan nan]
     [nan 2.  nan 2.  nan nan 1.  5.  4.  nan]
     [5.  nan nan 4.5 3.  1.  nan 1.5 nan 4.5]
     [nan nan nan nan nan nan nan nan 2.  5. ]
     [nan 3.  3.  4.  nan 1.  3.  2.5 1.  3. ]
     [nan nan 5.  nan nan nan 4.  nan nan 5. ]]
 %% Cell type:markdown id:dfdc9cfe tags:
 # 4. Compare KNNWithMeans with UserBased
 Try to replicate KNNWithMeans with your self-made UserBased and check that outcomes are identical
 %% Cell type:code id:7a9147ea tags:
 ``` python
 def compare_predictions(knn_model, user_based_model, testset, num_samples=30):
    """
    Compare predictions between two different collaborative filtering models (KNNWithMeans and UserBased).
    Args:
        knn_model (KNNWithMeans): Trained KNNWithMeans collaborative filtering model.
        user_based_model (UserBased): Trained UserBased collaborative filtering model.
        testset (list): List of testset entries containing (user, item, rating).
        num_samples (int): Number of testset entries to sample for comparison (default: 30).
    """
    # Get a subset of the testset for comparison
    test_subset = testset[:num_samples]
    knn_model.min_k = 5
    knn_model.k = 5
    user_based_model.min_k = 5
    user_based_model.k = 5
    # Predictions using KNNWithMeans
    knn_predictions = []
    for uid, iid, _ in test_subset:
        pred = knn_model.predict(uid, iid)
        knn_pred_rating = pred.est  # Get estimated rating from prediction object
        knn_predictions.append((uid, iid, knn_pred_rating))
    # Predictions using UserBased
    user_based_predictions = []
    for uid, iid, _ in test_subset:
        try:
            pred = user_based_model.predict(uid, iid)
            ub_pred_rating = pred.est  # Get estimated rating from prediction object
            user_based_predictions.append((uid, iid, ub_pred_rating))
        except PredictionImpossible:
            # Handle cases where prediction is not possible (user or item unknown)
            pass
    # Compare predictions
    print("Comparing predictions for the first {} entries in the testset:".format(num_samples))
    msd_sum = 0  # Initialize sum of squared differences for MSD
    for idx in range(num_samples):
        uid, iid, knn_pred_rating = knn_predictions[idx]
        uid_ub, iid_ub, ub_pred_rating = user_based_predictions[idx]
        # Print results side by side with formatted predictions
        print(f"User: {uid}, Item: {iid} - KNNWithMeans Prediction: {knn_pred_rating:.2f}, UserBased Prediction: {ub_pred_rating:.2f}")
        # Calculate squared difference between predictions
        squared_diff = (knn_pred_rating - ub_pred_rating) ** 2
        msd_sum += squared_diff
    # Calculate Mean Squared Difference (MSD)
    msd = msd_sum / num_samples
    print(f"\nMean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: {msd:.4f}")
 # Assuming knn_model and user_based_instance are already trained
 knn_model.fit(trainset)
 knn_model.test(testset)
 compare_predictions(knn_model, user_based_instance, testset)
 ```
 %% Output
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    Comparing predictions for the first 30 entries in the testset:
    User: 11, Item: 1214 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
    User: 11, Item: 364 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
    User: 11, Item: 4308 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
    User: 11, Item: 527 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
    User: 13, Item: 1997 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
    User: 13, Item: 4993 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
    User: 13, Item: 2700 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
    User: 13, Item: 1721 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
    User: 13, Item: 527 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
    User: 17, Item: 2028 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
    User: 17, Item: 4993 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
    User: 17, Item: 1214 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
    User: 17, Item: 4308 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
    User: 19, Item: 1997 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 2028 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 4993 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 5952 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 2700 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 1721 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 1214 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 19, Item: 364 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
    User: 23, Item: 1997 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56
    User: 23, Item: 2700 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56
    User: 27, Item: 1997 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 2028 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 5952 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 2700 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 1721 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 364 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    User: 27, Item: 4308 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
    Mean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: 0.0000
 %% Cell type:code id:be53ae27 tags:
 ``` python
 # 1. Obtain Predictions
 # Using UserBased algorithm
 user_based_predictions = []
 for uid, iid, true_r in testset:
    user_based_pred = user_based_instance.predict(uid, iid)
    user_based_predictions.append((uid, iid, true_r, user_based_pred.est, {}))
 # Using KNNWithMeans algorithm
 knn_predictions = []
 for uid, iid, true_r in testset:
    knn_pred = knn_model.predict(uid, iid)
    knn_predictions.append((uid, iid, true_r, knn_pred.est, knn_pred.details))
 # 2. Calculate Metrics
 # Calculate MAE and RMSE for UserBased algorithm
 user_based_mae = accuracy.mae(user_based_predictions, verbose=False)
 user_based_rmse = accuracy.rmse(user_based_predictions, verbose=False)
 # Calculate MAE and RMSE for KNNWithMeans algorithm
 knn_mae = accuracy.mae(knn_predictions, verbose=False)
 knn_rmse = accuracy.rmse(knn_predictions, verbose=False)
 # 3. Compare Results
 print("UserBased MAE:", user_based_mae)
 print("UserBased RMSE:", user_based_rmse)
 print("KNNWithMeans MAE:", knn_mae)
 print("KNNWithMeans RMSE:", knn_rmse)
 ```
 %% Output
    UserBased MAE: 0.5691666666666667
    UserBased RMSE: 0.7916118402067746
    KNNWithMeans MAE: 0.5691666666666667
    KNNWithMeans RMSE: 0.7916118402067746
 %% Cell type:markdown id:cced76d9 tags:
 # 5. Compare MSD and Jacard
 Compare predictions made with MSD similarity and Jacard similarity
 %% Cell type:code id:c20d8e19 tags:
 ``` python
 # Initialize the model with MSD similarity
 sim_options_msd = {'name': 'msd'}
 user_based_msd = KNNBasic(sim_options=sim_options_msd)
 user_based_msd.fit(trainset)
 # Initialize the model with Jacard similarity
 sim_options_jaccard = {'name': 'cosine'}
 user_based_jaccard = KNNBasic(sim_options=sim_options_jaccard)
 user_based_jaccard.fit(trainset)
 # Make predictions with each model on the test set
 predictions_msd = user_based_msd.test(testset)
 predictions_jaccard = user_based_jaccard.test(testset)
 # Calculate and display the performances of the two modelsa
 rmse_msd = accuracy.rmse(predictions_msd)
 rmse_jaccard = accuracy.rmse(predictions_jaccard)
 print("RMSE with MSD similarity:", rmse_msd)
 print("RMSE with Jaccard similarity:", rmse_jaccard)
 ```
 %% Output
    Computing the msd similarity matrix...
    Done computing similarity matrix.
    Computing the cosine similarity matrix...
    Done computing similarity matrix.
    RMSE: 1.0829
    RMSE: 0.9589
    RMSE with MSD similarity: 1.0829450651603574
    RMSE with Jaccard similarity: 0.9588566070964019