Newer
Older
#reloads modules automatically before entering the execution of code
# %load_ext autoreload
# %autoreload 2
# third parties imports
import numpy as np
import pandas as pd
# -- add new imports here --
# local imports
from constants import Constant as C
from loaders import load_ratings
from loaders import load_items
# -- load the items and display the Dataframe
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
load_items()
load_ratings()
df_movies = pd.read_csv("data/small/content/movies.csv")
n_movies = df_movies['title'].nunique()
print("\n")
print(f"Number of movies: {n_movies}")
df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
# Trouver le range minimum et maximum
min_range = int(df_movies['annee'].min())
max_range = int(df_movies['annee'].max())
# Afficher le range minimum et maximum
print("\n")
print(f"Minimum range: {min_range}")
print(f"Maximum range: {max_range}")
print("\n")
df_movies['genres'] = df_movies['genres'].str.split('|')
df_movies = df_movies.explode('genres')
# Afficher tous les genres uniques
unique_genres = sorted(df_movies['genres'].unique())
print("List of all genres:")
for genre in unique_genres:
print(genre, "|", end = " ")
print("\n","A")
df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
n_ratings = df_ratings['rating'].count()
print(f"Number of ratings: {n_ratings}")
print("\n","B")
df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
n_users = df_ratings['userId'].nunique()
print(f"Number of users: {n_users}")
print("\n","C")
# -- display relevant informations that can be extracted from the dataset
# -- display relevant informations that can be extracted from the dataset