first update

1eb8178c · Adrien Payen · 34917164 · 1eb8178c · 1eb8178c · 1eb8178c
--- a/Analytics_UI/analytics.py
+++ b/Analytics_UI/analytics.py
@@ -5,6 +5,7 @@
 # third parties imports
 import numpy as np 
 import pandas as pd
+import re
 # -- add new imports here --
 # local imports
@@ -13,9 +14,61 @@ from loaders import load_ratings
 from loaders import load_items
 # -- load the items and display the Dataframe
+load_items()
+load_ratings()
+df_movies = pd.read_csv("data/small/content/movies.csv")
+n_movies = df_movies['title'].nunique()
+print("\n") 
+print(f"Number of movies: {n_movies}")
+df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
+df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
+# Trouver le range minimum et maximum
+min_range = int(df_movies['annee'].min())
+max_range = int(df_movies['annee'].max())
+# Afficher le range minimum et maximum
+print("\n") 
+print(f"Minimum range: {min_range}")
+print(f"Maximum range: {max_range}")
+print("\n") 
+df_movies['genres'] = df_movies['genres'].str.split('|')
+df_movies = df_movies.explode('genres')
+# Afficher tous les genres uniques
+unique_genres = sorted(df_movies['genres'].unique())
+print("List of all genres:")
+for genre in unique_genres:
+    print(genre, "|", end = " ")
+print("\n","A") 
+df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
+n_ratings = df_ratings['rating'].count()
+print(f"Number of ratings: {n_ratings}")
+print("\n","B") 
+df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
+n_users = df_ratings['userId'].nunique()
+print(f"Number of users: {n_users}")
+print("\n","C") 
 # -- display relevant informations that can be extracted from the dataset
 # -- load the items and display the Dataframe
-# -- display relevant informations that can be extracted from the dataset
\ No newline at end of file
+# -- display relevant informations that can be extracted from the dataset
--- a/Analytics_UI/configs.py
+++ b/Analytics_UI/configs.py
@@ -12,7 +12,7 @@ class EvalConfig:
    full_metrics = []
    # Split parameters
-    test_size = None  # -- configure the test_size (from 0 to 1) --
+    test_size = 0.3 #None  # -- configure the test_size (from 0 to 1) --
    # Loo parameters
-    top_n_value = None  # -- configure the numer of recommendations (> 1) --
+    top_n_value =  10 #None  # -- configure the numer of recommendations (> 1) --
--- a/Analytics_UI/constants.py
+++ b/Analytics_UI/constants.py
@@ -24,4 +24,5 @@ class Constant:
    USER_ITEM_RATINGS = [USER_ID_COL, ITEM_ID_COL, RATING_COL]
    # Rating scale
-    RATINGS_SCALE = None  # -- fill in here the ratings scale as a tuple (min_value, max_value)
+    RATINGS_SCALE = (0.0, 5.0) # -- fill in here the ratings scale as a tuple (min_value, max_value)
+    # avant c 'était None
--- a/Analytics_UI/loaders.py
+++ b/Analytics_UI/loaders.py
 # third parties imports
 import pandas as pd
+import os
+from pprint import pprint as pp
+# import display
 # local imports
 from constants import Constant as C
@@ -14,14 +17,33 @@ def load_ratings(surprise_format=False):
 def load_items():
-    df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME)
+    df_items = pd.read_csv(C.CONTENT_PATH / C.ITEMS_FILENAME) # ce qui se trouve dans le movie csv
-    df_items = df_items.set_index(C.ITEM_ID_COL)
+    df_items = df_items.set_index(C.ITEM_ID_COL) # movie id
    return df_items
-def export_evaluation_report(df):
+def export_evaluation_report():
-    """ Export the report to the evaluation folder.
+    df_ratings = load_ratings()
+    df_items = load_items()
+    df_merge = pd.merge(df_ratings,df_items, on = 'movieId')
-    The name of the report is versioned using today's date
+    report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv"
-    """
+    export_path = os.path.join("data", "small", "evaluations", report_name)
-    pass
+    df_merge.to_csv(export_path, index=False)
+    print("The data has been exported to the evaluation report")
+    return df_merge
+# Display
+print("\n")
+print("Display the ratings")
+pp(load_ratings())
+print("\n\n")
+print("Display the movie data")
+pp(load_items())
+# display(load_items())# pas pratique dans .py vscode
+# Call the function to register the report 
+export_evaluation_report()
\ No newline at end of file
--- a/Pipfile
+++ b/Pipfile
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+[packages]
+pandas = "*"
+ipykernel = "*"
+jupyter = "*"
+matplotlib = "*"
+scipy = "*"
+seaborn = "*"
+scikit-surprise = "*"
+python-dotenv = "*"
+scikit-learn = "*"
+streamlit = "*"
+black = "*"
+[dev-packages]
+[requires]
+python_version = "3.9"
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/data/.DS_Store
+++ b/data/.DS_Store
--- a/data/small/.DS_Store
+++ b/data/small/.DS_Store
--- a/data/small/content/.DS_Store
+++ b/data/small/content/.DS_Store
--- a/data/small/evidence/.DS_Store
+++ b/data/small/evidence/.DS_Store