Skip to content
Extraits de code Groupes Projets
algorithme_netflix.py 24,5 ko
Newer Older
  • Learn to ignore specific revisions
  • # afficher le premier document
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    import pandas as pd
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import tabulate
    import os
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import csv
    
    
    #display(data_2)
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    """file_path_1 = "/content/drive/MyDrive/Coding_project_2023/netflix_titles-2.csv"""
    file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv"
    
    data_1 = pd.read_csv(file_path_1)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    """file_path_2 = "/content/drive/MyDrive/Coding_project_2023/ratings.csv"""
    file_path_2 ="/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv"
    
    data_2 = pd.read_csv(file_path_2)
    
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Show the catalog
    def catalog(data_1):
        print(data_1.head(100))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(data_1)
        # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def movies(data_1):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        films = data_1[data_1['type'] == 'Movie']  # Filter the data to include only movies
        movie_titles = films['title'].tolist()  # Extract movie titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(movie_titles)  # Display movie titles
        save_to_csv(movie_titles)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def series(data_1):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        series = data_1[data_1['type'] == 'TV Show']  # Filter the data to include only series
        series_titles = series['title'].tolist()  # Extract series titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(series_titles)  # Display series titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(series_titles)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def by_year(data_1):  # be careful and/or !!!!!
    
    Adrien Payen's avatar
    Adrien Payen a validé
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if filtered_data is None:
            return  # Exit the function if filter_media_type returns None
    
    Adrien Payen's avatar
    Adrien Payen a validé
        sort_type = input("Do you want to sort the years in ascending or descending order? (ascending/descending)")
        if sort_type == "ascending":
            sorted_data = filtered_data.sort_values(by='release_year', ascending=True)
        elif sort_type == "descending":
            sorted_data = filtered_data.sort_values(by='release_year', ascending=False)
        else:
            print("Invalid choice. The dataset could not be sorted!")
            return  # Exit the function if the sort type is invalid
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(sorted_data)
        save_to_csv(sorted_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def by_country(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        country_list = []
        for countries in filtered_data['country'].dropna().str.split(', '):
            for country in countries:
                if country not in country_list and country != '':
                    country_list.append(country)
    
        print("List of all available countries:")
        country_list.sort()
        print(country_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        country_input = input("Enter the name of the country to display movies and/or series: ").capitalize()
        country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)]
    
    
        if not country_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(country_data)
            save_to_csv(country_data)
    
        else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"No movies or series found for the country {country_input}.")
        
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
     # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def genre(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        genre_list = []
        for genres in data_1['listed_in'].dropna().str.split(', '):
            for genre in genres:
                if genre not in genre_list and genre != '':
                    genre_list.append(genre)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible genres:")
        genre_list.sort()
        print(genre_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
        type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not type_data.empty:
            print(type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            save_to_csv(type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        else:
            print(f"No movies or series found for the type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def duration(data_1):
        filtered_data = filter_media_type(data_1)
        genre_list = []
        for genres in data_1['listed_in'].dropna().str.split(', '):
            for genre in genres:
                if genre not in genre_list and genre != '':
                    genre_list.append(genre)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible genres:")
        genre_list.sort()
        print(genre_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
        type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not type_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            type_data_sorted = type_data.sort_values(by='duration', ascending=True)  # see if we do in ascending or descending
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(type_data_sorted)
            save_to_csv(type_data_sorted)
        else:
            print(f"No movies or series found for the type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def director(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_list = []
        for dirs in data_1['director'].dropna().str.split(', '):
            for director_name in dirs:
                if director_name not in director_list and director_name != '':
                    director_list.append(director_name)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible directors: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_input = input("Enter the name of the director to display movies and/or series: ")
        director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not director_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            director_data_sorted = director_data.sort_values(by='release_year', ascending=True)  # see if we do in ascending or descending
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(director_data)
            save_to_csv(director_data)
        else:
            print(f"No person found with the name {director_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def actor(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_list = []
        for actors in data_1['cast'].dropna().str.split(', '):
            for actor_name in actors:
                if actor_name not in actor_list and actor_name != '':
                    actor_list.append(actor_name)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible actors: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_input = input("Enter the name of the actor to display movies and/or series: ")
        actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not actor_data.empty:
            actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(actor_data_sorted)
            save_to_csv(actor_data_sorted)
        else:
            print(f"No actor found with the name {actor_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def specific_genre_director(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        unique_directors = filtered_data['director'].unique()
        print("List of all available directors:")
        print(', '.join(unique_directors))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_input = input("Enter the name of the director to display movies and/or series: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        unique_types = filtered_data['listed_in'].unique()
        print("\nList of all available types:")
        print(', '.join(unique_types))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_type_data = filtered_data[
            (filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)) &
            (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False))
        ]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not director_type_data.empty:
    
            # Display the count
            count = len(director_type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"The director {director_input} has directed {count} movie(s) or series of type {type_input}.")
            print(director_type_data)
            save_to_csv(director_type_data)
    
        else:
            print(f"No movies or series found for the director {director_input} and type {type_input}.")
    
    
    def specific_genre_actor(data_1):
        filtered_data = filter_media_type(data_1)
    
        unique_actors = filtered_data['cast'].unique()
        print("List of all available actors:")
        print(', '.join(unique_actors))
    
        actor_input = input("Enter the name of the actor to display movies and/or series: ")
    
        unique_types = filtered_data['listed_in'].unique()
        print("\nList of all available types:")
        print(', '.join(unique_types))
    
        type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
    
        actor_type_data = filtered_data[
            (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) &
            (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False))
        ]
    
        if not actor_type_data.empty:
            # Display the count
            count = len(actor_type_data)
            print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.")
            print(actor_type_data)
            save_to_csv(actor_type_data)
        else:
            print(f"No movies or series found for the actor {actor_input} and type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # rating
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # these are variables that needs to be registered in general not in a local function
    notes = data_2.drop('show_id', axis = 1)
    mean_type = notes.mean(axis = 1) * 100
    data_2['appreciation (%)'] = mean_type
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def most_rated(data_1, data_2) :
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        link_between =  pd.merge(filtered_data,data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("Films et séries les mieux notés :")
        print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']])
        save_to_csv(link_between_sorted)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    def most_rated_year(data_1, data_2):
        # Display all available unique release years
        available_years = data_1['release_year'].unique()
        print("Available years: ", available_years)
    
        # Ask the user to enter a release year
        year = input("Enter a release year: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        try:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Convert the year to an integer
            year = int(year)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        except ValueError:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print("Please enter a valid year.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Filter the data based on the release year
        filtered_data = filter_media_type(data_1[data_1['release_year'] == year])
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Merge the DataFrames on the 'show_id' key
        link_between = pd.merge(filtered_data, data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the DataFrame by the 'appreciation' column (in descending order)
        link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(f"Top-rated shows for the year {year}:")
        print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
        save_to_csv(link_between_sorted)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def most_rated_recent(data_1, data_2):
        # Merge the DataFrames on the 'show_id' key
        merged_data = pd.merge(data_1, data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order)
        sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False])
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Display the most rated and recent shows
        top_20_data = sorted_data.head(20)
        print("Top 20 most rated and recent shows:")
        print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
        save_to_csv(top_20_data)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Example usage
    
    def parental_code(data_1):
    
        code_list = []
        for codes in data_1['rating'].dropna().str.split(', '):
            for code in codes:
                if code not in code_list and code != '':
                    code_list.append(code)
        print("Here are the parental codes: ")
        print(code_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(code_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    
    
      #code_parental = input("Entrez le code de contrôle parental : PG-13, TV-MA")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def directors_nationality(data_1):
        # Check if the 'director' column exists in the dataset
        if 'director' not in data_1.columns:
            print("The dataset does not contain a 'director' column.")
            return
    
        # Extract unique directors and their respective nationalities
        directors_nationality_dict = {}
        for index, row in data_1.iterrows():
            directors = row['director'].split(', ')
            nationality = row['country']
    
            for director in directors:
                if director in directors_nationality_dict:
                    directors_nationality_dict[director]['nationalities'].add(nationality)
                    directors_nationality_dict[director]['count'] += 1
                else:
                    directors_nationality_dict[director] = {'nationalities': {nationality}, 'count': 1}
    
        # Sort the directors by the number of movies and series produced
        sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['count'], reverse=True)
    
        # Display the list of directors and their nationalities
        print("Directors and their nationalities, sorted by the number of movies and series produced:")
        for director, info in sorted_directors:
            print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series")
        save_to_csv(sorted_directors)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    
    
    
    
    # Allow to filter if we want movie, tv show or both 
    def filter_media_type(data):
        media_type = input("What type of media do you want to display? (Movie/TV Show/Both): ").lower()
        
        if media_type in ['movie', 'tv show', 'both']:
            if media_type == 'both':
                return data
            else:
                return data[data['type'].str.lower() == media_type]
        else:
            print("Invalid choice. Displaying all types of media.")
            return data  # Return the original data if the media type choice is invalid
    
    
    
    
    # Example usage
    def basic_statistics(data_1):
        # Check if the 'type' and 'country' columns exist in the dataset
        if 'type' not in data_1.columns or 'country' not in data_1.columns:
            print("The dataset does not contain the necessary columns.")
            return
    
        # Count the number of movies and series
        movies_count = len(data_1[data_1['type'] == 'Movie'])
        series_count = len(data_1[data_1['type'] == 'TV Show'])
    
        print(f"Number of movies in the catalog: {movies_count}")
        print(f"Number of series in the catalog: {series_count}")
    
        # Compare the number of movies and series
        if movies_count > series_count:
            print("There are more movies than series in the catalog.")
        elif movies_count < series_count:
            print("There are more series than movies in the catalog.")
        else:
            print("The catalog has an equal number of movies and series.")
    
        # List countries that produced movies/series from most productive to least
        country_counts = data_1['country'].str.split(', ').explode().value_counts()
        print("\nCountries that produced movies/series, sorted from most to least productive:")
        print(country_counts)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    
     # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def save_to_csv(data, default_filename='output.csv'):
        # Ask if the user wants to save to a CSV file
        save_choice = input("Do you want to save the data to a CSV file? (YES/NO): ").upper()
    
        if save_choice == 'YES':
            # Prompt for a file name
            file_name = input("Enter the file name (DO NOT include .csv extension, or press Enter for the default): ")
            file_name = file_name + ".csv"
            if not file_name:
                file_name = default_filename
    
            # Check if the file already exists
            if os.path.exists(file_name):
                # Ask if the user wants to overwrite or create a new file
                overwrite_choice = input(f"The file '{file_name}' already exists. Do you want to overwrite it? (YES/NO): ").upper()
    
                if overwrite_choice == 'YES':
                    # Overwrite the existing file
                    data.to_csv(file_name, index=False)
                    print(f"Data saved to {file_name}")
    
                    # Ask if the user wants to open the file
                    open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
                    if open_choice == 'YES':
                        os.system(file_name)
                else:
                    # Prompt for a new file name
                    new_filename = input("Enter a new file name (DO NOT include .csv extension): ")
                    new_filename = new_filename + ".csv"
                    data.to_csv(new_filename, index=False)
                    print(f"Data saved to {new_filename}")
    
                    # Ask if the user wants to open the file
                    open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
                    if open_choice == 'YES':
                        os.system(file_name)
            else:
                # Save to a new file
                data.to_csv(file_name, index=False)
                print(f"Data saved to {file_name}")
    
                # Ask if the user wants to open the file
                open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
                if open_choice == 'YES':
                    os.system(file_name)
        else:
            print("Data not saved.")
    
    
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # début de l'algorithme de recommandation 
    
    # Load the CSV file
    
    def read_movie_series_info(file_path):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        catalog = {}
    
        with open(file_path, 'r', encoding='utf-8') as info_file:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            info_reader = csv.reader(info_file)
            next(info_reader)  # Skip header row
            for row in info_reader:
                show_id, show_type, title, director, cast, country, date_added, release_year, rating, duration, listed_in, description = row
                catalog[show_id] = [title, listed_in.split(', ')]
        return catalog
    
    
    def read_user_ratings(file_path):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        ratings = {}
    
        with open(file_path, 'r', encoding='utf-8') as ratings_file:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            ratings_reader = csv.reader(ratings_file)
    
            header = next(ratings_reader)  # Skip header row
            user_ids = list(map(int, header[1:]))
            
    
    Adrien Payen's avatar
    Adrien Payen a validé
            for row in ratings_reader:
                show_id = row[0]
                user_ratings = list(map(int, row[1:]))
    
                ratings[show_id] = dict(zip(user_ids, user_ratings))
                
        return ratings
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    def create_category_matrix(catalog, categories):
        # Créez la matrice avec une rangée et une colonne supplémentaires pour les noms de catégories
        category_matrix = [[0 for _ in range(len(categories) + 1)] for _ in range(len(categories) + 1)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
        # Ajoutez les noms de catégories à la première ligne et à la première colonne
        for i in range(len(categories)):
            category_matrix[0][i + 1] = categories[i]  # Ajoutez les noms de catégories à la première ligne
            category_matrix[i + 1][0] = categories[i]  # Ajoutez les noms de catégories à la première colonne
        
        # Remplissez la matrice avec les données
    
    Adrien Payen's avatar
    Adrien Payen a validé
        for show_id, movie_categories in catalog.items():
            for i in range(len(categories)):
                if categories[i] in movie_categories[1]:
                    for j in range(len(categories)):
                        if categories[j] in movie_categories[1]:
    
                            category_matrix[i + 1][j + 1] += 1  # Commencez à remplir à partir de la deuxième ligne et de la deuxième colonne
        # Ajoutez les noms de catégories à la première colonne et les données de la matrice
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return category_matrix
    
    def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.5):
        global categories  # Déclarer categories en tant que variable globale
        categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1])) # permet d'actualiser la variable catégories global au niveau local
    
        user_id = int(user_id)  # Convertir user_id en entier
    
        suggestions = {}
        category_index = {}
    
        # Créer le dictionnaire pour stocker les indices des catégories
        category_index = {category: i + 1 for i, category in enumerate(categories)}  
        for show_id, categories in catalog.items():
            # Check if the user has rated the show
            if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0:
                # Liste des catégories communes entre le film/série et les films/séries notés par l'utilisateur
                common_categories = [category for category in categories[1] if category in catalog[show_id][1]]
                if common_categories:
                    # Calculez la similarité entre le film/série et les films/séries notés par l'utilisateur
                    similarity = sum(
                        min(category_matrix[category_index[category]][category_index[user_category]] for user_category in common_categories)
                        for category in categories[1]
                    )
    
                    # Ne recommandez que des films/séries dont la similarité dépasse le seuil spécifié
                    if similarity > threshold:
                        suggestions[show_id] = {'title': catalog[show_id][0], 'similarity': similarity}
    
        # Triez les suggestions par similarité décroissante
        sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1]['similarity'], reverse=True)
    
        return sorted_suggestions[:5]
    
    def recommandation_algorithm() :
    
        # Replace file_path_1 and file_path_2 with the actual file paths
        file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv"
        file_path_2 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv"
    
        user_id = input("quel est ton user ? ")
    
        try:
            user_id = int(user_id)
        except ValueError:
            print("Veuillez entrer un identifiant d'utilisateur valide.")
            exit()
    
        # Read data from CSV files
        catalog = read_movie_series_info(file_path_1)
        ratings = read_user_ratings(file_path_2)
        # Create category matrix
        categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1]))
        category_matrix = create_category_matrix(catalog, categories)
    
        # Display movies already viewed by the user
        print("Films déjà vus par l'utilisateur:")
        for show_id, user_rating in ratings.items():
            if user_id in user_rating and user_rating[user_id] > 0:
                print(f"- {catalog[show_id][0]}")
    
        # Recommend movies
        recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5)
    
        # Display top 5 recommendations
        print("\nTop 5 recommandations:")
        for show_id, info in recommended_movies:
            print(f"Title: {info['title']}, Similarity: {info['similarity']}")
    
    
    # Création du menu
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def action() :
      print("Here are the different options available:")
      print("1. View the entire catalog")
      print("2. View all movies in the catalog")
      print("3. View all series")
      print("4. View all series, movies or both by year")
      print("5. View all series, movies or both by country")
      print("6. View all series, movies or both by type")
      print("7. View all series, movies or both by type sorted by duration")
      print("8. View series, movies or both directed by a specific director and sorted by year")
      print("9. View series, movies or both featuring a specific actor and sorted by year")
      print("10. View how many series, movies or both and series directed by a director in a specific genre")
      print("11. View how many series, movies or both an actor has played in")
      print("12. Display the highest-rated series, movies or both")
      print("13. Display the highest-rated series, movies or both for a specific year")
      print("14. Display recent highest-rated series, movies or both")
      print("15. Display movies and series based on parental control code")
      print("16. Display the nationalities of directors and sort the list based on the number of movies and series directed")
      print("17. Display basic statistics")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("18. Get Personalized Recommendations")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("... Enter STOP to stop")
      command = input("Enter the number of what you want to do: ")
    
      if command == "1" :
        catalog(data_1)
      elif command == "2" :
        movies(data_1)
      elif command == "3" :
    
        series(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "4" :
        by_year(data_1)
      elif command == "5" :
        by_country(data_1)
      elif command == "6" :
    
        genre(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "7" :
        duration(data_1)
      elif command == "8" :
        director(data_1)
      elif command == "9" :
        actor(data_1)
      elif command == "10" :
        specific_genre_director(data_1)
      elif command == "11" :
        specific_genre_actor(data_1)
      elif command == "12" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "13" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated_year(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "14" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated_recent(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "15" :
        parental_code(data_1)
      elif command == "16" :
        directors_nationality(data_1)
      elif command == "17" :
        basic_statistics(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "18" :
    
        recommandation_algorithm()
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "..." :
    
        return False
    
    
    
     # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
    
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    menu = []
    
    
    while True:
    
    Adrien Payen's avatar
    Adrien Payen a validé
        response = action()
        if response is False:
    
            break
        else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            if response == True:
                menu = []
            else:
                menu.append(response)