Skip to content
Extraits de code Groupes Projets
analytics.py 1,74 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    #reloads modules automatically before entering the execution of code
    # %load_ext autoreload
    # %autoreload 2
    
    # third parties imports
    import numpy as np 
    import pandas as pd
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import re
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # -- add new imports here --
    
    # local imports
    from constants import Constant as C
    from loaders import load_ratings
    from loaders import load_items
    
    # -- load the items and display the Dataframe
    
    Adrien Payen's avatar
    Adrien Payen a validé
    load_items()
    load_ratings()
    
    df_movies = pd.read_csv("data/small/content/movies.csv")
    n_movies = df_movies['title'].nunique()
    
    print("\n") 
    print(f"Number of movies: {n_movies}")
    
    df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)')
    df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce')
    
    # Trouver le range minimum et maximum
    min_range = int(df_movies['annee'].min())
    max_range = int(df_movies['annee'].max())
    
    # Afficher le range minimum et maximum
    print("\n") 
    print(f"Minimum range: {min_range}")
    print(f"Maximum range: {max_range}")
    
    
    print("\n") 
    df_movies['genres'] = df_movies['genres'].str.split('|')
    df_movies = df_movies.explode('genres')
    
    # Afficher tous les genres uniques
    unique_genres = sorted(df_movies['genres'].unique())
    print("List of all genres:")
    for genre in unique_genres:
        print(genre, "|", end = " ")
    
     
    print("\n","A") 
    df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
    n_ratings = df_ratings['rating'].count()
    print(f"Number of ratings: {n_ratings}")
    
    
    print("\n","B") 
    df_ratings = pd.read_csv("data/small/evidence/ratings.csv")
    n_users = df_ratings['userId'].nunique()
    print(f"Number of users: {n_users}")
    
    
    print("\n","C") 
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    # -- display relevant informations that can be extracted from the dataset
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # -- load the items and display the Dataframe
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    # -- display relevant informations that can be extracted from the dataset