Skip to content
Extraits de code Groupes Projets
Valider 46bea813 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update UserBased

parent 4c4184c2
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
%% Cell type:markdown id:f4a8f664 tags: %% Cell type:markdown id:f4a8f664 tags:
# Custom User-based Model # Custom User-based Model
The present notebooks aims at creating a UserBased class that inherits from the Algobase class (surprise package) and that can be customized with various similarity metrics, peer groups and score aggregation functions. The present notebooks aims at creating a UserBased class that inherits from the Algobase class (surprise package) and that can be customized with various similarity metrics, peer groups and score aggregation functions.
%% Cell type:code id:00d1b249 tags: %% Cell type:code id:00d1b249 tags:
``` python ``` python
# reloads modules automatically before entering the execution of code # reloads modules automatically before entering the execution of code
%load_ext autoreload %load_ext autoreload
%autoreload 2 %autoreload 2
# standard library imports # Standard library imports
# -- add new imports here --
# third parties imports
import numpy as np import numpy as np
import pandas as pd import pandas as pd
# -- add new imports here --
# local imports # Others imports
from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible, KNNBasic
import heapq
# Local imports
from constants import Constant as C from constants import Constant as C
from loaders import load_ratings from loaders import load_ratings
from surprise import KNNWithMeans, accuracy, AlgoBase, PredictionImpossible,KNNBasic
import heapq
``` ```
%% Output
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%% Cell type:markdown id:22716aa3 tags: %% Cell type:markdown id:22716aa3 tags:
# 1. Loading Data # 1. Loading Data
Prepare a dataset in order to help implementing a user-based recommender system Prepare a dataset in order to help implementing a user-based recommender system
%% Cell type:code id:aafd1712 tags: %% Cell type:code id:aafd1712 tags:
``` python ``` python
# Create Surprise Dataset from the pandas DataFrame and Reader # Create Surprise Dataset from the pandas DataFrame and Reader
surprise_data = load_ratings(surprise_format=True) surprise_data = load_ratings(surprise_format=True)
trainset = surprise_data.build_full_trainset() trainset = surprise_data.build_full_trainset()
testset = trainset.build_anti_testset() testset = trainset.build_anti_testset()
``` ```
%% Cell type:markdown id:94adf3a6 tags: %% Cell type:markdown id:94adf3a6 tags:
# 2. Explore Surprise's user-based algorithm # 2. Explore Surprise's user-based algorithm
Displays user-based predictions and similarity matrix on the test dataset using the KNNWithMeans class Displays user-based predictions and similarity matrix on the test dataset using the KNNWithMeans class
%% Cell type:code id:ce078b43 tags: %% Cell type:code id:ce078b43 tags:
``` python ``` python
#User-based prediction for the user 11 and the item 364 #User-based prediction for the user 11 and the item 364
sim_options = { sim_options = {
'name': 'msd', # Mean Squared Difference (Mean Square Error) 'name': 'msd', # Mean Squared Difference (Mean Square Error)
'user_based': True, # User-based collaborative filtering 'user_based': True, # User-based collaborative filtering
'min_support': 3 # Minimum number of common ratings required 'min_support': 3 # Minimum number of common ratings required
} }
# Build an algorithm, and train it. # Build an algorithm, and train it.
knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=2) knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=2)
knn_model.fit(trainset) knn_model.fit(trainset)
knn_model.test(testset) knn_model.test(testset)
uid = 11 # raw user id (as in the ratings file). They are **strings**! uid = 11 # raw user id (as in the ratings file). They are **strings**!
iid = 364 iid = 364
pred = knn_model.predict(uid, iid, verbose=True) pred = knn_model.predict(uid, iid)
print(pred)
``` ```
%% Output %% Output
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
user: 11 item: 364 r_ui = None est = 2.49 {'actual_k': 2, 'was_impossible': False} user: 11 item: 364 r_ui = None est = 2.49 {'actual_k': 2, 'was_impossible': False}
%% Cell type:code id:ffe89c56 tags: %% Cell type:code id:ffe89c56 tags:
``` python ``` python
# Playing with KNN # Playing with KNN
# Create an instance of KNNWithMeans with the specified options # Create an instance of KNNWithMeans with the specified options
def predict_ratings(trainset, testset, min_k_values): def predict_ratings(trainset, testset, min_k_values):
for min_k in min_k_values: for min_k in min_k_values:
knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=min_k) knn_model = KNNWithMeans(sim_options=sim_options, k=3, min_k=min_k)
# Train the algorithm on the trainset # Train the algorithm on the trainset
knn_model.fit(trainset) knn_model.fit(trainset)
# Make predictions for all ratings in the anti testset # Make predictions for all ratings in the anti testset
predictions = knn_model.test(testset) predictions = knn_model.test(testset)
# Display 30 predictions # Display 30 predictions
print(f"Predictions with min_k = {min_k}:") print(f"Predictions with min_k = {min_k}:")
for prediction in predictions[:30]: for prediction in predictions[:30]:
print(f"User: {prediction.uid}, Item: {prediction.iid}, Rating: {prediction.est}") print(f"User: {prediction.uid}, Item: {prediction.iid}, Rating: {prediction.est}")
# Assuming trainset and testset are already defined # Assuming trainset and testset are already defined
predict_ratings(trainset, testset, min_k_values=[1, 2, 3]) predict_ratings(trainset, testset, min_k_values=[1, 2, 3])
``` ```
%% Output %% Output
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
Predictions with min_k = 1: Predictions with min_k = 1:
User: 11, Item: 1214, Rating: 3.6041666666666665 User: 11, Item: 1214, Rating: 3.6041666666666665
User: 11, Item: 364, Rating: 2.49203431372549 User: 11, Item: 364, Rating: 2.49203431372549
User: 11, Item: 4308, Rating: 1.6041666666666667 User: 11, Item: 4308, Rating: 1.6041666666666667
User: 11, Item: 527, Rating: 3.898897058823529 User: 11, Item: 527, Rating: 3.898897058823529
User: 13, Item: 1997, Rating: 2.8 User: 13, Item: 1997, Rating: 2.8
User: 13, Item: 4993, Rating: 3.2375 User: 13, Item: 4993, Rating: 3.2375
User: 13, Item: 2700, Rating: 2.8 User: 13, Item: 2700, Rating: 2.8
User: 13, Item: 1721, Rating: 1.2374999999999998 User: 13, Item: 1721, Rating: 1.2374999999999998
User: 13, Item: 527, Rating: 3.2375 User: 13, Item: 527, Rating: 3.2375
User: 17, Item: 2028, Rating: 3.8125 User: 17, Item: 2028, Rating: 3.8125
User: 17, Item: 4993, Rating: 4.128289473684211 User: 17, Item: 4993, Rating: 4.128289473684211
User: 17, Item: 1214, Rating: 3.6875 User: 17, Item: 1214, Rating: 3.6875
User: 17, Item: 4308, Rating: 1.6875 User: 17, Item: 4308, Rating: 1.6875
User: 19, Item: 1997, Rating: 3.5 User: 19, Item: 1997, Rating: 3.5
User: 19, Item: 2028, Rating: 3.5 User: 19, Item: 2028, Rating: 3.5
User: 19, Item: 4993, Rating: 3.5 User: 19, Item: 4993, Rating: 3.5
User: 19, Item: 5952, Rating: 3.5 User: 19, Item: 5952, Rating: 3.5
User: 19, Item: 2700, Rating: 3.5 User: 19, Item: 2700, Rating: 3.5
User: 19, Item: 1721, Rating: 3.5 User: 19, Item: 1721, Rating: 3.5
User: 19, Item: 1214, Rating: 3.5 User: 19, Item: 1214, Rating: 3.5
User: 19, Item: 364, Rating: 3.5 User: 19, Item: 364, Rating: 3.5
User: 23, Item: 1997, Rating: 2.782649253731343 User: 23, Item: 1997, Rating: 2.782649253731343
User: 23, Item: 2700, Rating: 2.349813432835821 User: 23, Item: 2700, Rating: 2.349813432835821
User: 27, Item: 1997, Rating: 4.666666666666667 User: 27, Item: 1997, Rating: 4.666666666666667
User: 27, Item: 2028, Rating: 5.0 User: 27, Item: 2028, Rating: 5.0
User: 27, Item: 5952, Rating: 5.0 User: 27, Item: 5952, Rating: 5.0
User: 27, Item: 2700, Rating: 4.666666666666667 User: 27, Item: 2700, Rating: 4.666666666666667
User: 27, Item: 1721, Rating: 3.104166666666667 User: 27, Item: 1721, Rating: 3.104166666666667
User: 27, Item: 364, Rating: 4.604166666666667 User: 27, Item: 364, Rating: 4.604166666666667
User: 27, Item: 4308, Rating: 3.104166666666667 User: 27, Item: 4308, Rating: 3.104166666666667
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
Predictions with min_k = 2: Predictions with min_k = 2:
User: 11, Item: 1214, Rating: 3.1666666666666665 User: 11, Item: 1214, Rating: 3.1666666666666665
User: 11, Item: 364, Rating: 2.49203431372549 User: 11, Item: 364, Rating: 2.49203431372549
User: 11, Item: 4308, Rating: 3.1666666666666665 User: 11, Item: 4308, Rating: 3.1666666666666665
User: 11, Item: 527, Rating: 3.898897058823529 User: 11, Item: 527, Rating: 3.898897058823529
User: 13, Item: 1997, Rating: 2.8 User: 13, Item: 1997, Rating: 2.8
User: 13, Item: 4993, Rating: 2.8 User: 13, Item: 4993, Rating: 2.8
User: 13, Item: 2700, Rating: 2.8 User: 13, Item: 2700, Rating: 2.8
User: 13, Item: 1721, Rating: 2.8 User: 13, Item: 1721, Rating: 2.8
User: 13, Item: 527, Rating: 2.8 User: 13, Item: 527, Rating: 2.8
User: 17, Item: 2028, Rating: 3.8125 User: 17, Item: 2028, Rating: 3.8125
User: 17, Item: 4993, Rating: 4.128289473684211 User: 17, Item: 4993, Rating: 4.128289473684211
User: 17, Item: 1214, Rating: 3.25 User: 17, Item: 1214, Rating: 3.25
User: 17, Item: 4308, Rating: 3.25 User: 17, Item: 4308, Rating: 3.25
User: 19, Item: 1997, Rating: 3.5 User: 19, Item: 1997, Rating: 3.5
User: 19, Item: 2028, Rating: 3.5 User: 19, Item: 2028, Rating: 3.5
User: 19, Item: 4993, Rating: 3.5 User: 19, Item: 4993, Rating: 3.5
User: 19, Item: 5952, Rating: 3.5 User: 19, Item: 5952, Rating: 3.5
User: 19, Item: 2700, Rating: 3.5 User: 19, Item: 2700, Rating: 3.5
User: 19, Item: 1721, Rating: 3.5 User: 19, Item: 1721, Rating: 3.5
User: 19, Item: 1214, Rating: 3.5 User: 19, Item: 1214, Rating: 3.5
User: 19, Item: 364, Rating: 3.5 User: 19, Item: 364, Rating: 3.5
User: 23, Item: 1997, Rating: 2.782649253731343 User: 23, Item: 1997, Rating: 2.782649253731343
User: 23, Item: 2700, Rating: 2.349813432835821 User: 23, Item: 2700, Rating: 2.349813432835821
User: 27, Item: 1997, Rating: 4.666666666666667 User: 27, Item: 1997, Rating: 4.666666666666667
User: 27, Item: 2028, Rating: 4.666666666666667 User: 27, Item: 2028, Rating: 4.666666666666667
User: 27, Item: 5952, Rating: 4.666666666666667 User: 27, Item: 5952, Rating: 4.666666666666667
User: 27, Item: 2700, Rating: 4.666666666666667 User: 27, Item: 2700, Rating: 4.666666666666667
User: 27, Item: 1721, Rating: 4.666666666666667 User: 27, Item: 1721, Rating: 4.666666666666667
User: 27, Item: 364, Rating: 4.666666666666667 User: 27, Item: 364, Rating: 4.666666666666667
User: 27, Item: 4308, Rating: 4.666666666666667 User: 27, Item: 4308, Rating: 4.666666666666667
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
Predictions with min_k = 3: Predictions with min_k = 3:
User: 11, Item: 1214, Rating: 3.1666666666666665 User: 11, Item: 1214, Rating: 3.1666666666666665
User: 11, Item: 364, Rating: 3.1666666666666665 User: 11, Item: 364, Rating: 3.1666666666666665
User: 11, Item: 4308, Rating: 3.1666666666666665 User: 11, Item: 4308, Rating: 3.1666666666666665
User: 11, Item: 527, Rating: 3.1666666666666665 User: 11, Item: 527, Rating: 3.1666666666666665
User: 13, Item: 1997, Rating: 2.8 User: 13, Item: 1997, Rating: 2.8
User: 13, Item: 4993, Rating: 2.8 User: 13, Item: 4993, Rating: 2.8
User: 13, Item: 2700, Rating: 2.8 User: 13, Item: 2700, Rating: 2.8
User: 13, Item: 1721, Rating: 2.8 User: 13, Item: 1721, Rating: 2.8
User: 13, Item: 527, Rating: 2.8 User: 13, Item: 527, Rating: 2.8
User: 17, Item: 2028, Rating: 3.25 User: 17, Item: 2028, Rating: 3.25
User: 17, Item: 4993, Rating: 3.25 User: 17, Item: 4993, Rating: 3.25
User: 17, Item: 1214, Rating: 3.25 User: 17, Item: 1214, Rating: 3.25
User: 17, Item: 4308, Rating: 3.25 User: 17, Item: 4308, Rating: 3.25
User: 19, Item: 1997, Rating: 3.5 User: 19, Item: 1997, Rating: 3.5
User: 19, Item: 2028, Rating: 3.5 User: 19, Item: 2028, Rating: 3.5
User: 19, Item: 4993, Rating: 3.5 User: 19, Item: 4993, Rating: 3.5
User: 19, Item: 5952, Rating: 3.5 User: 19, Item: 5952, Rating: 3.5
User: 19, Item: 2700, Rating: 3.5 User: 19, Item: 2700, Rating: 3.5
User: 19, Item: 1721, Rating: 3.5 User: 19, Item: 1721, Rating: 3.5
User: 19, Item: 1214, Rating: 3.5 User: 19, Item: 1214, Rating: 3.5
User: 19, Item: 364, Rating: 3.5 User: 19, Item: 364, Rating: 3.5
User: 23, Item: 1997, Rating: 2.5625 User: 23, Item: 1997, Rating: 2.5625
User: 23, Item: 2700, Rating: 2.5625 User: 23, Item: 2700, Rating: 2.5625
User: 27, Item: 1997, Rating: 4.666666666666667 User: 27, Item: 1997, Rating: 4.666666666666667
User: 27, Item: 2028, Rating: 4.666666666666667 User: 27, Item: 2028, Rating: 4.666666666666667
User: 27, Item: 5952, Rating: 4.666666666666667 User: 27, Item: 5952, Rating: 4.666666666666667
User: 27, Item: 2700, Rating: 4.666666666666667 User: 27, Item: 2700, Rating: 4.666666666666667
User: 27, Item: 1721, Rating: 4.666666666666667 User: 27, Item: 1721, Rating: 4.666666666666667
User: 27, Item: 364, Rating: 4.666666666666667 User: 27, Item: 364, Rating: 4.666666666666667
User: 27, Item: 4308, Rating: 4.666666666666667 User: 27, Item: 4308, Rating: 4.666666666666667
%% Cell type:markdown id:c8890e11 tags: %% Cell type:markdown id:c8890e11 tags:
he change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications. The change in the min_k parameter from 1 to 3 in the predictions has a significant impact on how estimated ratings are computed and subsequently affects the performance of the recommendation system. Let's delve into this transition and its implications.
Initially, with min_k = 1, predictions are generated even if only a single similar user (neighbor) has rated a particular item. This approach can lead to predictions that might not accurately represent the item's true rating, especially if the rating from the sole available neighbor is an outlier or not representative of the broader user preferences. Initially, with min_k = 1, predictions are generated even if only a single similar user (neighbor) has rated a particular item. This approach can lead to predictions that might not accurately represent the item's true rating, especially if the rating from the sole available neighbor is an outlier or not representative of the broader user preferences.
For example, consider User 11's ratings for items like 1214 and 364. Under min_k = 1, the predictions were 3.604 and 2.492, respectively. However, when min_k is increased to 3, these ratings adjust downwards to 3.166 for both items. This adjustment indicates that the initial ratings might have been influenced by only a few ratings from similar users, which can lead to more volatile or less reliable predictions. For example, consider User 11's ratings for items like 1214 and 364. Under min_k = 1, the predictions were 3.604 and 2.492, respectively. However, when min_k is increased to 3, these ratings adjust downwards to 3.166 for both items. This adjustment indicates that the initial ratings might have been influenced by only a few ratings from similar users, which can lead to more volatile or less reliable predictions.
Similarly, for User 23's ratings on items 1997 and 2700, transitioning from min_k = 1 to min_k = 3 results in downward adjustments from 2.782 and 2.349 to 2.5625 for both items. This change suggests that the initial ratings might have been based on limited or potentially biased data, prompting a more conservative reassessment under min_k = 3. Similarly, for User 23's ratings on items 1997 and 2700, transitioning from min_k = 1 to min_k = 3 results in downward adjustments from 2.782 and 2.349 to 2.5625 for both items. This change suggests that the initial ratings might have been based on limited or potentially biased data, prompting a more conservative reassessment under min_k = 3.
The rationale behind this change lies in the nature of the min_k parameter. Increasing min_k to 3 requires a more robust set of similar users (at least 3) to have rated an item before a prediction is made. This adjustment aims to provide more stable and reliable predictions by relying on a broader consensus among users with similar preferences. The rationale behind this change lies in the nature of the min_k parameter. Increasing min_k to 3 requires a more robust set of similar users (at least 3) to have rated an item before a prediction is made. This adjustment aims to provide more stable and reliable predictions by relying on a broader consensus among users with similar preferences.
By enforcing a higher min_k, the system adopts a more cautious approach to estimating ratings, particularly for items with sparse or potentially biased rating data. This approach helps mitigate the impact of outliers or sparse data in the recommendation system, leading to more consistent and credible predictions overall. By enforcing a higher min_k, the system adopts a more cautious approach to estimating ratings, particularly for items with sparse or potentially biased rating data. This approach helps mitigate the impact of outliers or sparse data in the recommendation system, leading to more consistent and credible predictions overall.
In summary, adjusting the min_k parameter from 1 to 3 signifies a shift towards more conservative and reliable estimates of item ratings within the recommendation system. This adjustment aims to enhance the accuracy and robustness of the system's predictions by requiring a broader consensus among similar users before making predictions, thereby improving the overall quality and reliability of recommendations provided to users. In summary, adjusting the min_k parameter from 1 to 3 signifies a shift towards more conservative and reliable estimates of item ratings within the recommendation system. This adjustment aims to enhance the accuracy and robustness of the system's predictions by requiring a broader consensus among similar users before making predictions, thereby improving the overall quality and reliability of recommendations provided to users.
%% Cell type:code id:cc806424 tags: %% Cell type:code id:cc806424 tags:
``` python ``` python
def analyse_min_support(knn_model, testset): def analyse_min_support(knn_model, testset):
# Reset min_k to 2 # Reset min_k to 2
knn_model.min_k = 2 knn_model.min_k = 2
# Modify min_support from 1 to 3 and observe actual_k # Modify min_support from 1 to 3 and observe actual_k
for min_support in range(1, 4): for min_support in range(1, 4):
knn_model.sim_options['min_support'] = min_support knn_model.sim_options['min_support'] = min_support
predictions_min_support = knn_model.test(testset[:30]) # Take the first 30 predictions for display predictions_min_support = knn_model.test(testset[:30]) # Take the first 30 predictions for display
print(f"\nPredictions with min_support = {min_support}:") print(f"\nPredictions with min_support = {min_support}:")
for prediction in predictions_min_support: for prediction in predictions_min_support:
actual_k = prediction.details['actual_k'] actual_k = prediction.details['actual_k']
print(f"User: {prediction.uid}, Item: {prediction.iid}, Actual_k: {actual_k}") print(f"User: {prediction.uid}, Item: {prediction.iid}, Actual_k: {actual_k}")
# Visualize the similarity matrix # Visualize the similarity matrix
similarity_matrix = knn_model.sim # Algorithm of knn_model similarity_matrix = knn_model.sim # Algorithm of knn_model
print("\nSimilarity Matrix:") print("\nSimilarity Matrix:")
return similarity_matrix return similarity_matrix
# Call the function and print the analysis # Call the function and print the analysis
result = analyse_min_support(knn_model, testset) result = analyse_min_support(knn_model, testset)
print(result) print(result)
``` ```
%% Output %% Output
Predictions with min_support = 1: Predictions with min_support = 1:
User: 11, Item: 1214, Actual_k: 1 User: 11, Item: 1214, Actual_k: 1
User: 11, Item: 364, Actual_k: 2 User: 11, Item: 364, Actual_k: 2
User: 11, Item: 4308, Actual_k: 1 User: 11, Item: 4308, Actual_k: 1
User: 11, Item: 527, Actual_k: 2 User: 11, Item: 527, Actual_k: 2
User: 13, Item: 1997, Actual_k: 0 User: 13, Item: 1997, Actual_k: 0
User: 13, Item: 4993, Actual_k: 1 User: 13, Item: 4993, Actual_k: 1
User: 13, Item: 2700, Actual_k: 0 User: 13, Item: 2700, Actual_k: 0
User: 13, Item: 1721, Actual_k: 1 User: 13, Item: 1721, Actual_k: 1
User: 13, Item: 527, Actual_k: 1 User: 13, Item: 527, Actual_k: 1
User: 17, Item: 2028, Actual_k: 2 User: 17, Item: 2028, Actual_k: 2
User: 17, Item: 4993, Actual_k: 2 User: 17, Item: 4993, Actual_k: 2
User: 17, Item: 1214, Actual_k: 1 User: 17, Item: 1214, Actual_k: 1
User: 17, Item: 4308, Actual_k: 1 User: 17, Item: 4308, Actual_k: 1
User: 19, Item: 1997, Actual_k: 0 User: 19, Item: 1997, Actual_k: 0
User: 19, Item: 2028, Actual_k: 0 User: 19, Item: 2028, Actual_k: 0
User: 19, Item: 4993, Actual_k: 0 User: 19, Item: 4993, Actual_k: 0
User: 19, Item: 5952, Actual_k: 0 User: 19, Item: 5952, Actual_k: 0
User: 19, Item: 2700, Actual_k: 0 User: 19, Item: 2700, Actual_k: 0
User: 19, Item: 1721, Actual_k: 0 User: 19, Item: 1721, Actual_k: 0
User: 19, Item: 1214, Actual_k: 0 User: 19, Item: 1214, Actual_k: 0
User: 19, Item: 364, Actual_k: 0 User: 19, Item: 364, Actual_k: 0
User: 23, Item: 1997, Actual_k: 2 User: 23, Item: 1997, Actual_k: 2
User: 23, Item: 2700, Actual_k: 2 User: 23, Item: 2700, Actual_k: 2
User: 27, Item: 1997, Actual_k: 0 User: 27, Item: 1997, Actual_k: 0
User: 27, Item: 2028, Actual_k: 1 User: 27, Item: 2028, Actual_k: 1
User: 27, Item: 5952, Actual_k: 1 User: 27, Item: 5952, Actual_k: 1
User: 27, Item: 2700, Actual_k: 0 User: 27, Item: 2700, Actual_k: 0
User: 27, Item: 1721, Actual_k: 1 User: 27, Item: 1721, Actual_k: 1
User: 27, Item: 364, Actual_k: 1 User: 27, Item: 364, Actual_k: 1
User: 27, Item: 4308, Actual_k: 1 User: 27, Item: 4308, Actual_k: 1
Predictions with min_support = 2: Predictions with min_support = 2:
User: 11, Item: 1214, Actual_k: 1 User: 11, Item: 1214, Actual_k: 1
User: 11, Item: 364, Actual_k: 2 User: 11, Item: 364, Actual_k: 2
User: 11, Item: 4308, Actual_k: 1 User: 11, Item: 4308, Actual_k: 1
User: 11, Item: 527, Actual_k: 2 User: 11, Item: 527, Actual_k: 2
User: 13, Item: 1997, Actual_k: 0 User: 13, Item: 1997, Actual_k: 0
User: 13, Item: 4993, Actual_k: 1 User: 13, Item: 4993, Actual_k: 1
User: 13, Item: 2700, Actual_k: 0 User: 13, Item: 2700, Actual_k: 0
User: 13, Item: 1721, Actual_k: 1 User: 13, Item: 1721, Actual_k: 1
User: 13, Item: 527, Actual_k: 1 User: 13, Item: 527, Actual_k: 1
User: 17, Item: 2028, Actual_k: 2 User: 17, Item: 2028, Actual_k: 2
User: 17, Item: 4993, Actual_k: 2 User: 17, Item: 4993, Actual_k: 2
User: 17, Item: 1214, Actual_k: 1 User: 17, Item: 1214, Actual_k: 1
User: 17, Item: 4308, Actual_k: 1 User: 17, Item: 4308, Actual_k: 1
User: 19, Item: 1997, Actual_k: 0 User: 19, Item: 1997, Actual_k: 0
User: 19, Item: 2028, Actual_k: 0 User: 19, Item: 2028, Actual_k: 0
User: 19, Item: 4993, Actual_k: 0 User: 19, Item: 4993, Actual_k: 0
User: 19, Item: 5952, Actual_k: 0 User: 19, Item: 5952, Actual_k: 0
User: 19, Item: 2700, Actual_k: 0 User: 19, Item: 2700, Actual_k: 0
User: 19, Item: 1721, Actual_k: 0 User: 19, Item: 1721, Actual_k: 0
User: 19, Item: 1214, Actual_k: 0 User: 19, Item: 1214, Actual_k: 0
User: 19, Item: 364, Actual_k: 0 User: 19, Item: 364, Actual_k: 0
User: 23, Item: 1997, Actual_k: 2 User: 23, Item: 1997, Actual_k: 2
User: 23, Item: 2700, Actual_k: 2 User: 23, Item: 2700, Actual_k: 2
User: 27, Item: 1997, Actual_k: 0 User: 27, Item: 1997, Actual_k: 0
User: 27, Item: 2028, Actual_k: 1 User: 27, Item: 2028, Actual_k: 1
User: 27, Item: 5952, Actual_k: 1 User: 27, Item: 5952, Actual_k: 1
User: 27, Item: 2700, Actual_k: 0 User: 27, Item: 2700, Actual_k: 0
User: 27, Item: 1721, Actual_k: 1 User: 27, Item: 1721, Actual_k: 1
User: 27, Item: 364, Actual_k: 1 User: 27, Item: 364, Actual_k: 1
User: 27, Item: 4308, Actual_k: 1 User: 27, Item: 4308, Actual_k: 1
Predictions with min_support = 3: Predictions with min_support = 3:
User: 11, Item: 1214, Actual_k: 1 User: 11, Item: 1214, Actual_k: 1
User: 11, Item: 364, Actual_k: 2 User: 11, Item: 364, Actual_k: 2
User: 11, Item: 4308, Actual_k: 1 User: 11, Item: 4308, Actual_k: 1
User: 11, Item: 527, Actual_k: 2 User: 11, Item: 527, Actual_k: 2
User: 13, Item: 1997, Actual_k: 0 User: 13, Item: 1997, Actual_k: 0
User: 13, Item: 4993, Actual_k: 1 User: 13, Item: 4993, Actual_k: 1
User: 13, Item: 2700, Actual_k: 0 User: 13, Item: 2700, Actual_k: 0
User: 13, Item: 1721, Actual_k: 1 User: 13, Item: 1721, Actual_k: 1
User: 13, Item: 527, Actual_k: 1 User: 13, Item: 527, Actual_k: 1
User: 17, Item: 2028, Actual_k: 2 User: 17, Item: 2028, Actual_k: 2
User: 17, Item: 4993, Actual_k: 2 User: 17, Item: 4993, Actual_k: 2
User: 17, Item: 1214, Actual_k: 1 User: 17, Item: 1214, Actual_k: 1
User: 17, Item: 4308, Actual_k: 1 User: 17, Item: 4308, Actual_k: 1
User: 19, Item: 1997, Actual_k: 0 User: 19, Item: 1997, Actual_k: 0
User: 19, Item: 2028, Actual_k: 0 User: 19, Item: 2028, Actual_k: 0
User: 19, Item: 4993, Actual_k: 0 User: 19, Item: 4993, Actual_k: 0
User: 19, Item: 5952, Actual_k: 0 User: 19, Item: 5952, Actual_k: 0
User: 19, Item: 2700, Actual_k: 0 User: 19, Item: 2700, Actual_k: 0
User: 19, Item: 1721, Actual_k: 0 User: 19, Item: 1721, Actual_k: 0
User: 19, Item: 1214, Actual_k: 0 User: 19, Item: 1214, Actual_k: 0
User: 19, Item: 364, Actual_k: 0 User: 19, Item: 364, Actual_k: 0
User: 23, Item: 1997, Actual_k: 2 User: 23, Item: 1997, Actual_k: 2
User: 23, Item: 2700, Actual_k: 2 User: 23, Item: 2700, Actual_k: 2
User: 27, Item: 1997, Actual_k: 0 User: 27, Item: 1997, Actual_k: 0
User: 27, Item: 2028, Actual_k: 1 User: 27, Item: 2028, Actual_k: 1
User: 27, Item: 5952, Actual_k: 1 User: 27, Item: 5952, Actual_k: 1
User: 27, Item: 2700, Actual_k: 0 User: 27, Item: 2700, Actual_k: 0
User: 27, Item: 1721, Actual_k: 1 User: 27, Item: 1721, Actual_k: 1
User: 27, Item: 364, Actual_k: 1 User: 27, Item: 364, Actual_k: 1
User: 27, Item: 4308, Actual_k: 1 User: 27, Item: 4308, Actual_k: 1
Similarity Matrix: Similarity Matrix:
[[1. 0. 0.24615385 0. 0.43243243 0. ] [[1. 0. 0.24615385 0. 0.43243243 0. ]
[0. 1. 0. 0. 0.17094017 0. ] [0. 1. 0. 0. 0.17094017 0. ]
[0.24615385 0. 1. 0. 0.53333333 0. ] [0.24615385 0. 1. 0. 0.53333333 0. ]
[0. 0. 0. 1. 0. 0. ] [0. 0. 0. 1. 0. 0. ]
[0.43243243 0.17094017 0.53333333 0. 1. 0.25 ] [0.43243243 0.17094017 0.53333333 0. 1. 0.25 ]
[0. 0. 0. 0. 0.25 1. ]] [0. 0. 0. 0. 0.25 1. ]]
%% Cell type:markdown id:9fcc525d tags: %% Cell type:markdown id:9fcc525d tags:
Predictions with min_support = 1: Predictions with min_support = 1:
The actual_k values vary across different predictions. For instance, for User 11 and Item 1214, actual_k is 1, indicating that only one neighbor was used to estimate this prediction. Conversely, for predictions like User 11 with Item 364, actual_k is 2, indicating that two neighbors were considered in the estimation. The actual_k values vary across different predictions. For instance, for User 11 and Item 1214, actual_k is 1, indicating that only one neighbor was used to estimate this prediction. Conversely, for predictions like User 11 with Item 364, actual_k is 2, indicating that two neighbors were considered in the estimation.
Predictions with min_support = 2 and min_support = 3: Predictions with min_support = 2 and min_support = 3:
Increasing the min_support threshold to 2 or 3 doesn't significantly alter the actual_k values compared to predictions with min_support = 1. This suggests that for most predictions, the actual number of neighbors (actual_k) involved in the estimation remains relatively consistent. Increasing the min_support threshold to 2 or 3 doesn't significantly alter the actual_k values compared to predictions with min_support = 1. This suggests that for most predictions, the actual number of neighbors (actual_k) involved in the estimation remains relatively consistent.
Understanding actual_k: Understanding actual_k:
actual_k represents the real number of neighbors (similar users) that were taken into account to estimate the rating of a specific item for a given user. A higher actual_k indicates that more neighbors were involved in the prediction, potentially leading to more robust and reliable estimations of ratings. actual_k represents the real number of neighbors (similar users) that were taken into account to estimate the rating of a specific item for a given user. A higher actual_k indicates that more neighbors were involved in the prediction, potentially leading to more robust and reliable estimations of ratings.
Regarding the similarity matrix (algo.sim): Regarding the similarity matrix (algo.sim):
########################################## similarity matrix ########################################## ########################################## similarity matrix ##########################################
The similarity matrix provides an overview of the similarities between users. Each element in the matrix represents the similarity score between two users, where higher values indicate greater similarity. For example, a similarity coefficient of 1 on the main diagonal indicates maximum similarity of a user with themselves. The similarity matrix provides an overview of the similarities between users. Each element in the matrix represents the similarity score between two users, where higher values indicate greater similarity. For example, a similarity coefficient of 1 on the main diagonal indicates maximum similarity of a user with themselves.
This similarity matrix is crucial in the recommendation process to identify users who are most similar to a given user, enabling the system to weight ratings effectively and produce personalized and relevant predictions. This similarity matrix is crucial in the recommendation process to identify users who are most similar to a given user, enabling the system to weight ratings effectively and produce personalized and relevant predictions.
In summary, by adjusting parameters like min_support, we control how predictions are computed using data from similar neighbors, while the similarity matrix offers insights into user similarities that are fundamental for the effective functioning of collaborative filtering-based recommendation systems. In summary, by adjusting parameters like min_support, we control how predictions are computed using data from similar neighbors, while the similarity matrix offers insights into user similarities that are fundamental for the effective functioning of collaborative filtering-based recommendation systems.
%% Cell type:markdown id:2dd01f5b tags: %% Cell type:markdown id:2dd01f5b tags:
# 3. Implement and explore a customizable user-based algorithm # 3. Implement and explore a customizable user-based algorithm
Create a self-made user-based algorithm allowing to customize the similarity metric, peer group calculation and aggregation function Create a self-made user-based algorithm allowing to customize the similarity metric, peer group calculation and aggregation function
%% Cell type:code id:d03ed9eb tags: %% Cell type:code id:d03ed9eb tags:
``` python ``` python
class UserBased(AlgoBase): class UserBased(AlgoBase):
def __init__(self, k=3, min_k=1, sim_options={}, **kwargs): def __init__(self, k=3, min_k=1, sim_options={}, **kwargs):
""" """
Initialize the UserBased collaborative filtering algorithm. Initialize the UserBased collaborative filtering algorithm.
Args: Args:
k (int): Number of neighbors to consider (default: 3). k (int): Number of neighbors to consider (default: 3).
min_k (int): Minimum number of neighbors required to make predictions (default: 1). min_k (int): Minimum number of neighbors required to make predictions (default: 1).
sim_options (dict): Options for similarity computation (default: {}). sim_options (dict): Options for similarity computation (default: {}).
**kwargs: Additional keyword arguments. **kwargs: Additional keyword arguments.
""" """
AlgoBase.__init__(self, sim_options=sim_options, **kwargs) AlgoBase.__init__(self, sim_options=sim_options, **kwargs)
self.k = k self.k = k
self.min_k = min_k self.min_k = min_k
self.sim_options = sim_options self.sim_options = sim_options
def fit(self, trainset): def fit(self, trainset):
""" """
Fit the UserBased collaborative filtering model on the training set. Fit the UserBased collaborative filtering model on the training set.
Args: Args:
trainset (Trainset): Training dataset containing user-item ratings. trainset (Trainset): Training dataset containing user-item ratings.
""" """
AlgoBase.fit(self, trainset) AlgoBase.fit(self, trainset)
self.compute_rating_matrix() self.compute_rating_matrix()
self.compute_similarity_matrix() self.compute_similarity_matrix()
self.compute_mean_ratings() self.compute_mean_ratings()
def estimate(self, u, i): def estimate(self, u, i):
""" """
Predict the rating for user `u` on item `i`. Predict the rating for user `u` on item `i`.
Args: Args:
u (int): User ID. u (int): User ID.
i (int): Item ID. i (int): Item ID.
Returns: Returns:
float: Predicted rating for user `u` on item `i`. float: Predicted rating for user `u` on item `i`.
""" """
if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
raise PredictionImpossible('User and/or item is unknown.') raise PredictionImpossible('User and/or item is unknown.')
estimate = self.mean_ratings[u] estimate = self.mean_ratings[u]
# Step 1: Create the peer group of user u for item i # Step 1: Create the peer group of user u for item i
peer_group = [] peer_group = []
for j, rating in enumerate(self.trainset.ir[i]): for neighbor_inner_id, rating in enumerate(self.trainset.ir[i]):
if rating is not None: if rating is not None:
similarity = self.sim[u, j] # Similarity between user u and user j for item i similarity = self.sim[u, neighbor_inner_id] # Similarity between user u and user j for item i
peer_group.append((j, similarity, rating)) peer_group.append((neighbor_inner_id, similarity, rating))
# Step 2: Pick up the top neighbors efficiently # Step 2: Pick up the top neighbors efficiently
k_neighbors = heapq.nlargest(self.min_k, peer_group, key=lambda x: x[1]) # Top k neighbors based on similarity k_neighbors = heapq.nlargest(self.min_k, peer_group, key=lambda x: x[1]) # Top k neighbors based on similarity
# Step 3: Compute the weighted average # Step 3: Compute the weighted average
actual_k = len(k_neighbors) actual_k = len(k_neighbors)
if actual_k >= self.min_k: if actual_k >= self.min_k:
weighted_sum = 0 weighted_sum = 0
total_similarity = 0 total_similarity = 0
for j, similarity, rating_list in k_neighbors: for j, similarity, rating_list in k_neighbors:
# Assuming rating_list is a list or array containing ratings # Assuming rating_list is a list or array containing ratings
rating = rating_list[0] # Access the first element of the rating list rating = rating_list[0] # Access the first element of the rating list
weighted_sum += similarity * rating weighted_sum += similarity * rating
total_similarity += similarity total_similarity += similarity
if total_similarity != 0: if total_similarity != 0:
peer_group_average = weighted_sum / total_similarity peer_group_average = weighted_sum / total_similarity
estimate += peer_group_average estimate += peer_group_average
return estimate return estimate
def compute_rating_matrix(self): def compute_rating_matrix(self):
""" """
Compute the ratings matrix from the training set. Compute the ratings matrix from the training set.
""" """
n_users = self.trainset.n_users n_users = self.trainset.n_users
n_items = self.trainset.n_items n_items = self.trainset.n_items
ratings_matrix = np.empty((n_users, n_items)) ratings_matrix = np.empty((n_users, n_items))
ratings_matrix[:] = np.nan ratings_matrix[:] = np.nan
for uiid in range(n_users): for uiid in range(n_users):
user_ratings = self.trainset.ur[uiid] user_ratings = self.trainset.ur[uiid]
if user_ratings: if user_ratings:
for item_id, rating in user_ratings: for item_id, rating in user_ratings:
ratings_matrix[uiid, item_id] = rating ratings_matrix[uiid, item_id] = rating
self.ratings_matrix = ratings_matrix self.ratings_matrix = ratings_matrix
def compute_similarity_matrix(self): def compute_similarity_matrix(self):
""" """
Compute the similarity matrix based on user ratings. Compute the similarity matrix based on user ratings.
""" """
n_users = self.trainset.n_users n_users = self.trainset.n_users
similarity_matrix = np.eye(n_users) similarity_matrix = np.eye(n_users)
for i in range(n_users): for i in range(n_users):
for j in range(i + 1, n_users): for j in range(i + 1, n_users):
support = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j])) support = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j]))
if support >= self.min_k: if support >= self.min_k:
intersection = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j])) intersection = np.sum(~np.isnan(self.ratings_matrix[i]) & ~np.isnan(self.ratings_matrix[j]))
union = np.sum(~np.isnan(self.ratings_matrix[i]) | ~np.isnan(self.ratings_matrix[j])) union = np.sum(~np.isnan(self.ratings_matrix[i]) | ~np.isnan(self.ratings_matrix[j]))
similarity = intersection / union similarity = intersection / union
similarity_matrix[i, j] = similarity similarity_matrix[i, j] = similarity
similarity_matrix[j, i] = similarity similarity_matrix[j, i] = similarity
self.sim = similarity_matrix self.sim = similarity_matrix
def compute_mean_ratings(self): def compute_mean_ratings(self):
""" """
Compute the mean ratings for each user. Compute the mean ratings for each user.
""" """
n_users = self.trainset.n_users n_users = self.trainset.n_users
mean_ratings = [] mean_ratings = []
for uiid in range(n_users): for uiid in range(n_users):
user_ratings = self.trainset.ur[uiid] user_ratings = self.trainset.ur[uiid]
if user_ratings: if user_ratings:
mean_rating = np.mean([uiid[1] for uiid in user_ratings]) mean_rating = np.mean([uiid[1] for uiid in user_ratings])
mean_ratings.append(mean_rating) mean_ratings.append(mean_rating)
else: else:
mean_ratings.append(0) mean_ratings.append(0)
self.mean_ratings = mean_ratings self.mean_ratings = mean_ratings
# Create an instance of UserBased collaborative filtering # Create an instance of UserBased collaborative filtering
user_based_instance = UserBased(trainset=trainset) user_based_instance = UserBased(trainset=trainset)
# Fit the model to calculate rating, similarity, and mean rating matrices # Fit the model to calculate rating, similarity, and mean rating matrices
user_based_instance.fit(trainset) user_based_instance.fit(trainset)
# Display the ratings matrix # Display the ratings matrix
print(user_based_instance.ratings_matrix) print(user_based_instance.ratings_matrix)
``` ```
%% Output %% Output
[[1.5 4. 5. 4.5 3. 1. nan nan nan nan] [[1.5 4. 5. 4.5 3. 1. nan nan nan nan]
[nan 2. nan 2. nan nan 1. 5. 4. nan] [nan 2. nan 2. nan nan 1. 5. 4. nan]
[5. nan nan 4.5 3. 1. nan 1.5 nan 4.5] [5. nan nan 4.5 3. 1. nan 1.5 nan 4.5]
[nan nan nan nan nan nan nan nan 2. 5. ] [nan nan nan nan nan nan nan nan 2. 5. ]
[nan 3. 3. 4. nan 1. 3. 2.5 1. 3. ] [nan 3. 3. 4. nan 1. 3. 2.5 1. 3. ]
[nan nan 5. nan nan nan 4. nan nan 5. ]] [nan nan 5. nan nan nan 4. nan nan 5. ]]
%% Cell type:markdown id:dfdc9cfe tags: %% Cell type:markdown id:dfdc9cfe tags:
# 4. Compare KNNWithMeans with UserBased # 4. Compare KNNWithMeans with UserBased
Try to replicate KNNWithMeans with your self-made UserBased and check that outcomes are identical Try to replicate KNNWithMeans with your self-made UserBased and check that outcomes are identical
%% Cell type:code id:7a9147ea tags: %% Cell type:code id:7a9147ea tags:
``` python ``` python
def compare_predictions(knn_model, user_based_model, testset, num_samples=30): def compare_predictions(knn_model, user_based_model, testset, num_samples=30):
""" """
Compare predictions between two different collaborative filtering models (KNNWithMeans and UserBased). Compare predictions between two different collaborative filtering models (KNNWithMeans and UserBased).
Args: Args:
knn_model (KNNWithMeans): Trained KNNWithMeans collaborative filtering model. knn_model (KNNWithMeans): Trained KNNWithMeans collaborative filtering model.
user_based_model (UserBased): Trained UserBased collaborative filtering model. user_based_model (UserBased): Trained UserBased collaborative filtering model.
testset (list): List of testset entries containing (user, item, rating). testset (list): List of testset entries containing (user, item, rating).
num_samples (int): Number of testset entries to sample for comparison (default: 30). num_samples (int): Number of testset entries to sample for comparison (default: 30).
""" """
# Get a subset of the testset for comparison # Get a subset of the testset for comparison
test_subset = testset[:num_samples] test_subset = testset[:num_samples]
knn_model.min_k = 5 knn_model.min_k = 5
knn_model.k = 5 knn_model.k = 5
user_based_model.min_k = 5 user_based_model.min_k = 5
user_based_model.k = 5 user_based_model.k = 5
# Predictions using KNNWithMeans # Predictions using KNNWithMeans
knn_predictions = [] knn_predictions = []
for uid, iid, _ in test_subset: for uid, iid, _ in test_subset:
pred = knn_model.predict(uid, iid) pred = knn_model.predict(uid, iid)
knn_pred_rating = pred.est # Get estimated rating from prediction object knn_pred_rating = pred.est # Get estimated rating from prediction object
knn_predictions.append((uid, iid, knn_pred_rating)) knn_predictions.append((uid, iid, knn_pred_rating))
# Predictions using UserBased # Predictions using UserBased
user_based_predictions = [] user_based_predictions = []
for uid, iid, _ in test_subset: for uid, iid, _ in test_subset:
try: try:
pred = user_based_model.predict(uid, iid) pred = user_based_model.predict(uid, iid)
ub_pred_rating = pred.est # Get estimated rating from prediction object ub_pred_rating = pred.est # Get estimated rating from prediction object
user_based_predictions.append((uid, iid, ub_pred_rating)) user_based_predictions.append((uid, iid, ub_pred_rating))
except PredictionImpossible: except PredictionImpossible:
# Handle cases where prediction is not possible (user or item unknown) # Handle cases where prediction is not possible (user or item unknown)
pass pass
# Compare predictions # Compare predictions
print("Comparing predictions for the first {} entries in the testset:".format(num_samples)) print("Comparing predictions for the first {} entries in the testset:".format(num_samples))
msd_sum = 0 # Initialize sum of squared differences for MSD msd_sum = 0 # Initialize sum of squared differences for MSD
for idx in range(num_samples): for idx in range(num_samples):
uid, iid, knn_pred_rating = knn_predictions[idx] uid, iid, knn_pred_rating = knn_predictions[idx]
uid_ub, iid_ub, ub_pred_rating = user_based_predictions[idx] uid_ub, iid_ub, ub_pred_rating = user_based_predictions[idx]
# Print results side by side with formatted predictions # Print results side by side with formatted predictions
print(f"User: {uid}, Item: {iid} - KNNWithMeans Prediction: {knn_pred_rating:.2f}, UserBased Prediction: {ub_pred_rating:.2f}") print(f"User: {uid}, Item: {iid} - KNNWithMeans Prediction: {knn_pred_rating:.2f}, UserBased Prediction: {ub_pred_rating:.2f}")
# Calculate squared difference between predictions # Calculate squared difference between predictions
squared_diff = (knn_pred_rating - ub_pred_rating) ** 2 squared_diff = (knn_pred_rating - ub_pred_rating) ** 2
msd_sum += squared_diff msd_sum += squared_diff
# Calculate Mean Squared Difference (MSD) # Calculate Mean Squared Difference (MSD)
msd = msd_sum / num_samples msd = msd_sum / num_samples
print(f"\nMean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: {msd:.4f}") print(f"\nMean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: {msd:.4f}")
# Assuming knn_model and user_based_instance are already trained # Assuming knn_model and user_based_instance are already trained
knn_model.fit(trainset) knn_model.fit(trainset)
knn_model.test(testset) knn_model.test(testset)
compare_predictions(knn_model, user_based_instance, testset) compare_predictions(knn_model, user_based_instance, testset)
``` ```
%% Output %% Output
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
Comparing predictions for the first 30 entries in the testset: Comparing predictions for the first 30 entries in the testset:
User: 11, Item: 1214 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17 User: 11, Item: 1214 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
User: 11, Item: 364 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17 User: 11, Item: 364 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
User: 11, Item: 4308 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17 User: 11, Item: 4308 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
User: 11, Item: 527 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17 User: 11, Item: 527 - KNNWithMeans Prediction: 3.17, UserBased Prediction: 3.17
User: 13, Item: 1997 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80 User: 13, Item: 1997 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
User: 13, Item: 4993 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80 User: 13, Item: 4993 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
User: 13, Item: 2700 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80 User: 13, Item: 2700 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
User: 13, Item: 1721 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80 User: 13, Item: 1721 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
User: 13, Item: 527 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80 User: 13, Item: 527 - KNNWithMeans Prediction: 2.80, UserBased Prediction: 2.80
User: 17, Item: 2028 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25 User: 17, Item: 2028 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
User: 17, Item: 4993 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25 User: 17, Item: 4993 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
User: 17, Item: 1214 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25 User: 17, Item: 1214 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
User: 17, Item: 4308 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25 User: 17, Item: 4308 - KNNWithMeans Prediction: 3.25, UserBased Prediction: 3.25
User: 19, Item: 1997 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 1997 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 2028 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 2028 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 4993 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 4993 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 5952 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 5952 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 2700 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 2700 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 1721 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 1721 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 1214 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 1214 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 19, Item: 364 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50 User: 19, Item: 364 - KNNWithMeans Prediction: 3.50, UserBased Prediction: 3.50
User: 23, Item: 1997 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56 User: 23, Item: 1997 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56
User: 23, Item: 2700 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56 User: 23, Item: 2700 - KNNWithMeans Prediction: 2.56, UserBased Prediction: 2.56
User: 27, Item: 1997 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 1997 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 2028 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 2028 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 5952 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 5952 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 2700 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 2700 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 1721 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 1721 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 364 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 364 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
User: 27, Item: 4308 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67 User: 27, Item: 4308 - KNNWithMeans Prediction: 4.67, UserBased Prediction: 4.67
Mean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: 0.0000 Mean Squared Difference (MSD) between KNNWithMeans and UserBased predictions: 0.0000
%% Cell type:code id:be53ae27 tags: %% Cell type:code id:be53ae27 tags:
``` python ``` python
# 1. Obtain Predictions # 1. Obtain Predictions
# Using UserBased algorithm # Using UserBased algorithm
user_based_predictions = [] user_based_predictions = []
for uid, iid, true_r in testset: for uid, iid, true_r in testset:
user_based_pred = user_based_instance.predict(uid, iid) user_based_pred = user_based_instance.predict(uid, iid)
user_based_predictions.append((uid, iid, true_r, user_based_pred.est, {})) user_based_predictions.append((uid, iid, true_r, user_based_pred.est, {}))
# Using KNNWithMeans algorithm # Using KNNWithMeans algorithm
knn_predictions = [] knn_predictions = []
for uid, iid, true_r in testset: for uid, iid, true_r in testset:
knn_pred = knn_model.predict(uid, iid) knn_pred = knn_model.predict(uid, iid)
knn_predictions.append((uid, iid, true_r, knn_pred.est, knn_pred.details)) knn_predictions.append((uid, iid, true_r, knn_pred.est, knn_pred.details))
# 2. Calculate Metrics # 2. Calculate Metrics
# Calculate MAE and RMSE for UserBased algorithm # Calculate MAE and RMSE for UserBased algorithm
user_based_mae = accuracy.mae(user_based_predictions, verbose=False) user_based_mae = accuracy.mae(user_based_predictions, verbose=False)
user_based_rmse = accuracy.rmse(user_based_predictions, verbose=False) user_based_rmse = accuracy.rmse(user_based_predictions, verbose=False)
# Calculate MAE and RMSE for KNNWithMeans algorithm # Calculate MAE and RMSE for KNNWithMeans algorithm
knn_mae = accuracy.mae(knn_predictions, verbose=False) knn_mae = accuracy.mae(knn_predictions, verbose=False)
knn_rmse = accuracy.rmse(knn_predictions, verbose=False) knn_rmse = accuracy.rmse(knn_predictions, verbose=False)
# 3. Compare Results # 3. Compare Results
print("UserBased MAE:", user_based_mae) print("UserBased MAE:", user_based_mae)
print("UserBased RMSE:", user_based_rmse) print("UserBased RMSE:", user_based_rmse)
print("KNNWithMeans MAE:", knn_mae) print("KNNWithMeans MAE:", knn_mae)
print("KNNWithMeans RMSE:", knn_rmse) print("KNNWithMeans RMSE:", knn_rmse)
``` ```
%% Output %% Output
UserBased MAE: 0.5691666666666667 UserBased MAE: 0.5691666666666667
UserBased RMSE: 0.7916118402067746 UserBased RMSE: 0.7916118402067746
KNNWithMeans MAE: 0.5691666666666667 KNNWithMeans MAE: 0.5691666666666667
KNNWithMeans RMSE: 0.7916118402067746 KNNWithMeans RMSE: 0.7916118402067746
%% Cell type:markdown id:cced76d9 tags: %% Cell type:markdown id:cced76d9 tags:
# 5. Compare MSD and Jacard # 5. Compare MSD and Jacard
Compare predictions made with MSD similarity and Jacard similarity Compare predictions made with MSD similarity and Jacard similarity
%% Cell type:code id:c20d8e19 tags: %% Cell type:code id:c20d8e19 tags:
``` python ``` python
# Initialize the model with MSD similarity # Initialize the model with MSD similarity
sim_options_msd = {'name': 'msd'} sim_options_msd = {'name': 'msd'}
user_based_msd = KNNBasic(sim_options=sim_options_msd) user_based_msd = KNNBasic(sim_options=sim_options_msd)
user_based_msd.fit(trainset) user_based_msd.fit(trainset)
# Initialize the model with Jacard similarity # Initialize the model with Jacard similarity
sim_options_jaccard = {'name': 'cosine'} sim_options_jaccard = {'name': 'cosine'}
user_based_jaccard = KNNBasic(sim_options=sim_options_jaccard) user_based_jaccard = KNNBasic(sim_options=sim_options_jaccard)
user_based_jaccard.fit(trainset) user_based_jaccard.fit(trainset)
# Make predictions with each model on the test set # Make predictions with each model on the test set
predictions_msd = user_based_msd.test(testset) predictions_msd = user_based_msd.test(testset)
predictions_jaccard = user_based_jaccard.test(testset) predictions_jaccard = user_based_jaccard.test(testset)
# Calculate and display the performances of the two modelsa # Calculate and display the performances of the two modelsa
rmse_msd = accuracy.rmse(predictions_msd) rmse_msd = accuracy.rmse(predictions_msd)
rmse_jaccard = accuracy.rmse(predictions_jaccard) rmse_jaccard = accuracy.rmse(predictions_jaccard)
print("RMSE with MSD similarity:", rmse_msd) print("RMSE with MSD similarity:", rmse_msd)
print("RMSE with Jaccard similarity:", rmse_jaccard) print("RMSE with Jaccard similarity:", rmse_jaccard)
``` ```
%% Output %% Output
Computing the msd similarity matrix... Computing the msd similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
Computing the cosine similarity matrix... Computing the cosine similarity matrix...
Done computing similarity matrix. Done computing similarity matrix.
RMSE: 1.0829 RMSE: 1.0829
RMSE: 0.9589 RMSE: 0.9589
RMSE with MSD similarity: 1.0829450651603574 RMSE with MSD similarity: 1.0829450651603574
RMSE with Jaccard similarity: 0.9588566070964019 RMSE with Jaccard similarity: 0.9588566070964019
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter