Skip to content
Extraits de code Groupes Projets
Valider 1eb8178c rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

first update

parent 34917164
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Pipeline #47663 annulé
......@@ -5,6 +5,7 @@
# third parties imports
import numpy as np
import pandas as pd
import re
# -- add new imports here --
# local imports
......@@ -13,9 +14,61 @@ from loaders import load_ratings
from loaders import load_items
# -- load the items and display the Dataframe
load_items()
load_ratings()
df_movies = pd.read_csv("data/small/content/movies.csv")
n_movies = df_movies['title'].nunique()
print("\n")
print(f"Number of movies: {n_movies}")
df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
# Trouver le range minimum et maximum
min_range = int(df_movies['annee'].min())
max_range = int(df_movies['annee'].max())
# Afficher le range minimum et maximum
print("\n")
print(f"Minimum range: {min_range}")
print(f"Maximum range: {max_range}")
print("\n")
df_movies['genres'] = df_movies['genres'].str.split('|')
df_movies = df_movies.explode('genres')
# Afficher tous les genres uniques
unique_genres = sorted(df_movies['genres'].unique())
print("List of all genres:")
for genre in unique_genres:
print(genre, "|", end = " ")
print("\n","A")
df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
n_ratings = df_ratings['rating'].count()
print(f"Number of ratings: {n_ratings}")
print("\n","B")
df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
n_users = df_ratings['userId'].nunique()
print(f"Number of users: {n_users}")
print("\n","C")
# -- display relevant informations that can be extracted from the dataset
# -- load the items and display the Dataframe
# -- display relevant informations that can be extracted from the dataset
\ No newline at end of file
# -- display relevant informations that can be extracted from the dataset
......@@ -12,7 +12,7 @@ class EvalConfig:
full_metrics = []
# Split parameters
test_size = None # -- configure the test_size (from 0 to 1) --
test_size = 0.3 #None # -- configure the test_size (from 0 to 1) --
# Loo parameters
top_n_value = None # -- configure the numer of recommendations (> 1) --
top_n_value = 10 #None # -- configure the numer of recommendations (> 1) --
......@@ -24,4 +24,5 @@ class Constant:
USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL]
# Rating scale
RATINGS_SCALE = None # -- fill in here the ratings scale as a tuple (min_value, max_value)
RATINGS_SCALE = (0.0, 5.0) # -- fill in here the ratings scale as a tuple (min_value, max_value)
# avant c 'était None
# third parties imports
import pandas as pd
import os
from pprint import pprint as pp
# import display
# local imports
from constants import Constant as C
......@@ -14,14 +17,33 @@ def load_ratings(surprise_format=False):
def load_items():
df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME)
df_items = df_items.set_index(C.ITEM_ID_COL)
df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME) # ce qui se trouve dans le movie csv
df_items = df_items.set_index(C.ITEM_ID_COL) # movie id
return df_items
def export_evaluation_report(df):
""" Export the report to the evaluation folder.
def export_evaluation_report():
df_ratings = load_ratings()
df_items = load_items()
df_merge = pd.merge(df_ratings,df_items, on = 'movieId')
The name of the report is versioned using today's date
"""
pass
report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv"
export_path = os.path.join("data", "small", "evaluations", report_name)
df_merge.to_csv(export_path, index=False)
print("The data has been exported to the evaluation report")
return df_merge
# Display
print("\n")
print("Display the ratings")
pp(load_ratings())
print("\n\n")
print("Display the movie data")
pp(load_items())
# display(load_items())# pas pratique dans .py vscode
# Call the function to register the report
export_evaluation_report()
\ No newline at end of file
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
pandas = "*"
ipykernel = "*"
jupyter = "*"
matplotlib = "*"
scipy = "*"
seaborn = "*"
scikit-surprise = "*"
python-dotenv = "*"
scikit-learn = "*"
streamlit = "*"
black = "*"
[dev-packages]
[requires]
python_version = "3.9"
Ce diff est replié.
Aucun aperçu pour ce type de fichier
Aucun aperçu pour ce type de fichier
Aucun aperçu pour ce type de fichier
Aucun aperçu pour ce type de fichier
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter