all files analytics

34917164 · Adrien Payen · 4d848f51 · 34917164 · 34917164 · 34917164
--- a/Analytics_UI/configs.py
+++ b/Analytics_UI/configs.py
+# local imports
+from models import *
+
+
+class EvalConfig:
+    
+    models = [
+        ("baseline_1", ModelBaseline1, {}),  # model_name, model class, model parameters (dict)
+    ]
+    split_metrics = ["mae"]
+    loo_metrics = []
+    full_metrics = []
+
+    # Split parameters
+    test_size = None  # -- configure the test_size (from 0 to 1) --
+
+    # Loo parameters
+    top_n_value = None  # -- configure the numer of recommendations (> 1) --
--- a/Analytics_UI/constants.py
+++ b/Analytics_UI/constants.py
+# third parties imports
+from pathlib import Path
+
+
+class Constant:
+
+    DATA_PATH = Path('data/small')  # -- fill here the dataset size to use
+
+    # Content
+    CONTENT_PATH = DATA_PATH / 'content'
+    # - item
+    ITEMS_FILENAME = 'movies.csv'
+    ITEM_ID_COL = 'movieId'
+    LABEL_COL = 'title'
+    GENRES_COL = 'genres'
+
+    # Evidence
+    EVIDENCE_PATH = DATA_PATH / 'evidence'
+    # - ratings
+    RATINGS_FILENAME = 'ratings.csv'
+    USER_ID_COL = 'userId'
+    RATING_COL = 'rating'
+    TIMESTAMP_COL = 'timestamp'
+    USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL]
+
+    # Rating scale
+    RATINGS_SCALE = None  # -- fill in here the ratings scale as a tuple (min_value, max_value)
--- a/Analytics_UI/models.py
+++ b/Analytics_UI/models.py
+# standard library imports
+from collections import defaultdict
+
+# third parties imports
+import numpy as np
+import random as rd
+from surprise import AlgoBase
+from surprise import KNNWithMeans
+from surprise import SVD
+
+
+def get_top_n(predictions, n):
+    """Return the top-N recommendation for each user from a set of predictions.
+    Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
+    and modified by cvandekerckh for random tie breaking
+
+    Args:
+        predictions(list of Prediction objects): The list of predictions, as
+            returned by the test method of an algorithm.
+        n(int): The number of recommendation to output for each user. Default
+            is 10.
+    Returns:
+    A dict where keys are user (raw) ids and values are lists of tuples:
+        [(raw item id, rating estimation), ...] of size n.
+    """
+
+    rd.seed(0)
+
+    # First map the predictions to each user.
+    top_n = defaultdict(list)
+    for uid, iid, true_r, est, _ in predictions:
+        top_n[uid].append((iid, est))
+
+    # Then sort the predictions for each user and retrieve the k highest ones.
+    for uid, user_ratings in top_n.items():
+        rd.shuffle(user_ratings)
+        user_ratings.sort(key=lambda x: x[1], reverse=True)
+        top_n[uid] = user_ratings[:n]
+
+    return top_n
+
+
+# First algorithm
+class ModelBaseline1(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def estimate(self, u, i):
+        return 2
+
+
+# Second algorithm
+class ModelBaseline2(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def fit(self, trainset):
+        AlgoBase.fit(self, trainset)
+        rd.seed(0)
+
+    def estimate(self, u, i):
+        return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1])
+
+
+# Third algorithm
+class ModelBaseline3(AlgoBase):
+    def __init__(self):
+        AlgoBase.__init__(self)
+
+    def fit(self, trainset):
+        AlgoBase.fit(self, trainset)
+        self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
+
+        return self
+
+    def estimate(self, u, i):
+        return self.the_mean
+
+
+# Fourth Model
+class ModelBaseline4(SVD):
+    def __init__(self):
+        SVD.__init__(self, n_factors=100)