Skip to content
Extraits de code Groupes Projets
Valider d24ee4ed rédigé par Corentin Vande Kerckhove's avatar Corentin Vande Kerckhove
Parcourir les fichiers

initial commit

parent
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# MacOS
.DS_Store
# Images and data
*.pdf
images
data
*.zip
posters
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
pandas = "*"
ipykernel = "*"
jupyter = "*"
matplotlib = "*"
scipy = "*"
seaborn = "*"
scikit-surprise = "*"
python-dotenv = "*"
scikit-learn = "*"
streamlit = "*"
[dev-packages]
[requires]
python_version = "3.9"
Ce diff est replié.
# Group XX - Movie Recommender System
Welcome in the README file :)
Write here a few introduction words for your project.
If you want inspiration on how to write an awesome README, check this github repo : https://github.com/navendu-pottekkat/awesome-readme . But don't spend to much time on it. This is not the topic of this course.
If you need help with the Markdown syntax, you might find some help here : https://www.markdownguide.org/basic-syntax/ .
Good luck with your project.
May the force be with you.
\ No newline at end of file
%% Cell type:markdown id: tags:
# Analytics Module
The Analytics module provides descriptive statistics on content data, evidence data and model evaluations
%% Cell type:code id: tags:
``` python
# reloads modules automatically before entering the execution of code
%load_ext autoreload
%autoreload 2
# third parties imports
import numpy as np
import pandas as pd
# -- add new imports here --
# local imports
from constants import Constant as C
from loaders import load_ratings
from loaders import load_items
```
%% Cell type:markdown id: tags:
# 1 - Content analytics
Explore and perform descriptive statistics on content data
%% Cell type:code id: tags:
``` python
# -- load the items and display the Dataframe
```
%% Cell type:code id: tags:
``` python
# -- display relevant informations that can be extracted from the dataset
```
%% Cell type:markdown id: tags:
# 2 - Evidence analytics
Explore and perform descriptive statistics on evidence data
%% Cell type:code id: tags:
``` python
# -- load the items and display the Dataframe
```
%% Cell type:code id: tags:
``` python
# -- display relevant informations that can be extracted from the dataset
```
# local imports
from models import *
class EvalConfig:
models = [
("baseline_1", ModelBaseline1, {}), # model_name, model class, model parameters (dict)
]
split_metrics = ["mae"]
loo_metrics = []
full_metrics = []
# Split parameters
test_size = None # -- configure the test_size (from 0 to 1) --
# Loo parameters
top_n_value = None # -- configure the numer of recommendations (> 1) --
# third parties imports
from pathlib import Path
class Constant:
DATA_PATH = Path('data/test') # -- fill here the dataset size to use
# Content
CONTENT_PATH = DATA_PATH / 'content'
# - item
ITEMS_FILENAME = 'movies.csv'
ITEM_ID_COL = 'movieId'
LABEL_COL = 'title'
GENRES_COL = 'genres'
# Evidence
EVIDENCE_PATH = DATA_PATH / 'evidence'
# - ratings
RATINGS_FILENAME = 'ratings.csv'
USER_ID_COL = 'userId'
RATING_COL = 'rating'
TIMESTAMP_COL = 'timestamp'
USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL]
# Rating scale
RATINGS_SCALE = None # -- fill in here the ratings scale as a tuple (min_value, max_value)
# third parties imports
import pandas as pd
# local imports
from constants import Constant as C
def load_ratings(surprise_format=False):
df_ratings = pd.read_csv(C.EVIDENCE_PATH / C.RATINGS_FILENAME)
if surprise_format:
pass
else:
return df_ratings
def load_items():
df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME)
df_items = df_items.set_index(C.ITEM_ID_COL)
return df_items
def export_evaluation_report(df):
""" Export the report to the evaluation folder.
The name of the report is versioned using today's date
"""
pass
\ No newline at end of file
# standard library imports
from collections import defaultdict
# third parties imports
import numpy as np
import random as rd
from surprise import AlgoBase
from surprise import KNNWithMeans
from surprise import SVD
def get_top_n(predictions, n):
"""Return the top-N recommendation for each user from a set of predictions.
Source: inspired by https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
and modified by cvandekerckh for random tie breaking
Args:
predictions(list of Prediction objects): The list of predictions, as
returned by the test method of an algorithm.
n(int): The number of recommendation to output for each user. Default
is 10.
Returns:
A dict where keys are user (raw) ids and values are lists of tuples:
[(raw item id, rating estimation), ...] of size n.
"""
rd.seed(0)
# First map the predictions to each user.
top_n = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
top_n[uid].append((iid, est))
# Then sort the predictions for each user and retrieve the k highest ones.
for uid, user_ratings in top_n.items():
rd.shuffle(user_ratings)
user_ratings.sort(key=lambda x: x[1], reverse=True)
top_n[uid] = user_ratings[:n]
return top_n
# First algorithm
class ModelBaseline1(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def estimate(self, u, i):
return 2
# Second algorithm
class ModelBaseline2(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
rd.seed(0)
def estimate(self, u, i):
return rd.uniform(self.trainset.rating_scale[0], self.trainset.rating_scale[1])
# Third algorithm
class ModelBaseline3(AlgoBase):
def __init__(self):
AlgoBase.__init__(self)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
self.the_mean = np.mean([r for (_, _, r) in self.trainset.all_ratings()])
return self
def estimate(self, u, i):
return self.the_mean
# Fourth Model
class ModelBaseline4(SVD):
def __init__(self):
SVD.__init__(self, n_factors=100)
import os
import shutil
import zipfile as zf
import errno
def unzip_and_remove(zip_filename):
if not os.path.exists(zip_filename):
raise FileNotFoundError(
errno.ENOENT,
os.strerror(errno.ENOENT),
zip_filename,
)
files = zf.ZipFile(zip_filename, 'r')
files.extractall('.')
files.close()
os.remove(zip_filename)
shutil.rmtree('__MACOSX') # remove macosx data for Mac users
if __name__ == "__main__":
unzip_and_remove("data.zip")
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter