Skip to content
Extraits de code Groupes Projets
Valider 34917164 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

all files analytics

parent 4d848f51
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
# local imports
from models import *
class EvalConfig:
models = [
("baseline_1", ModelBaseline1, {}), # model_name, model class, model parameters (dict)
]
split_metrics = ["mae"]
loo_metrics = []
full_metrics = []
# Split parameters
test_size = None # -- configure the test_size (from 0 to 1) --
# Loo parameters
top_n_value = None # -- configure the numer of recommendations (> 1) --
# third parties imports
from pathlib import Path
class Constant:
DATA_PATH = Path('data/small') # -- fill here the dataset size to use
# Content
CONTENT_PATH = DATA_PATH / 'content'
# - item
ITEMS_FILENAME = 'movies.csv'
ITEM_ID_COL = 'movieId'
LABEL_COL = 'title'
GENRES_COL = 'genres'
# Evidence
EVIDENCE_PATH = DATA_PATH / 'evidence'
# - ratings
RATINGS_FILENAME = 'ratings.csv'
USER_ID_COL = 'userId'
RATING_COL = 'rating'
TIMESTAMP_COL = 'timestamp'
USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL]
# Rating scale
RATINGS_SCALE = None # -- fill in here the ratings scale as a tuple (min_value, max_value)
# standard library imports
from collections import defaultdict
# third parties imports
import numpy as np
import random as rd
from surprise import AlgoBase
from surprise import KNNWithMeans
from surprise import SVD
def get_top_n(predictions, n):
"""Return the top-N recommendation for each user from a set of predictions.
Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
and modified by cvandekerckh for random tie breaking
Args:
predictions(list of Prediction objects): The list of predictions, as
returned by the test method of an algorithm.
n(int): The number of recommendation to output for each user. Default
is 10.
Returns:
A dict where keys are user (raw) ids and values are lists of tuples:
[(raw item id, rating estimation), ...] of size n.
"""
rd.seed(0)
# First map the predictions to each user.
top_n = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
top_n[uid].append((iid, est))
# Then sort the predictions for each user and retrieve the k highest ones.
for uid, user_ratings in top_n.items():
rd.shuffle(user_ratings)
user_ratings.sort(key=lambda x: x[1], reverse=True)
top_n[uid] = user_ratings[:n]
return top_n
# First algorithm
class ModelBaseline1(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def estimate(self, u, i):
return 2
# Second algorithm
class ModelBaseline2(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
rd.seed(0)
def estimate(self, u, i):
return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1])
# Third algorithm
class ModelBaseline3(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
return self
def estimate(self, u, i):
return self.the_mean
# Fourth Model
class ModelBaseline4(SVD):
def __init__(self):
SVD.__init__(self, n_factors=100)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter