Skip to content
Extraits de code Groupes Projets
algorithme_netflix.py 34,7 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    # ALL the imports
    
    import pandas as pd
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import tabulate
    import os
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    from fuzzywuzzy import process
    
    
    #display(data_2)
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    """file_path_1 = "/content/drive/MyDrive/Coding_project_2023/netflix_titles-2.csv"""
    file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv"
    
    data_1 = pd.read_csv(file_path_1)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    """file_path_2 = "/content/drive/MyDrive/Coding_project_2023/ratings.csv"""
    file_path_2 ="/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv"
    
    data_2 = pd.read_csv(file_path_2)
    
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Show the catalog
    def catalog(data_1):
        print(data_1.head(100))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(data_1)
        # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def movies(data_1): # register
    
    Adrien Payen's avatar
    Adrien Payen a validé
        films = data_1[data_1['type'] == 'Movie']  # Filter the data to include only movies
        movie_titles = films['title'].tolist()  # Extract movie titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        movie_df = pd.DataFrame({'Movie Titles': movie_titles})
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # afficher que les 100 premiers
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(movie_df)  # Display movie titles
        save_to_csv(movie_df)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def series(data_1):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        series = data_1[data_1['type'] == 'TV Show']  # Filter the data to include only series
        series_titles = series['title'].tolist()  # Extract series titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        series_df = pd.DataFrame({'Movie Titles': series_titles})
    
        print(series_df)  # Display series titles
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(series_df)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def by_year(data_1):  # be careful and/or !!!!!
    
    Adrien Payen's avatar
    Adrien Payen a validé
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if filtered_data is None:
            return  # Exit the function if filter_media_type returns None
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            print("1. Ascending")
            print("2. Descending")
            sort_type = input("Do you want to sort the years in ascending or descending order? enter the number : ")
    
            if sort_type == "1":
                sorted_data = filtered_data.sort_values(by='release_year', ascending=True)
                break  # Sort type is valid, exit the loop
            elif sort_type == "2":
                sorted_data = filtered_data.sort_values(by='release_year', ascending=False)
                break  # Sort type is valid, exit the loop
            else:
                print("Invalid choice. Please enter 1 for ascending or 2 for descending.")
                # Repeat the loop to ask for a valid input
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(sorted_data)
        save_to_csv(sorted_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return  # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def by_country(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        country_list = []
    
    Adrien Payen's avatar
    Adrien Payen a validé
        for countries in filtered_data['country'].dropna().str.split(','):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            for country in countries:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                cleaned_country = country.strip()  # Remove leading and trailing spaces
                if cleaned_country and cleaned_country not in country_list:
                    country_list.append(cleaned_country)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        print("List of all available countries:")
        country_list.sort()
        print(country_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            country_input = input("Enter the name of the country to display movies and/or series: ")
            
            # Use FuzzyWuzzy to find the closest match
            matches = process.extractOne(country_input, country_list)
    
            if matches[1] >= 80:  # Adjust the similarity threshold as needed
                country_input = matches[0]
                break
            else:
                closest_match = matches[0]
                print(f"Invalid country name. The closest match is: {closest_match}")
    
        # Check if the entered country is correct
        if country_input in country_list:
            print(f"You selected: {country_input}")
        else:
            print(f"You entered: {country_input}, which is not in the list.")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        country_data = filtered_data[filtered_data['country'].str.lower().str.contains(country_input.lower(), case=False, na=False)]
    
    
        if not country_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(country_data)
            save_to_csv(country_data)
    
        else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"No movies or series found for the country {country_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
     # Be careful, you need to ask each time if they want to save the list to a .csv
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def genre(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        genre_list = []
        for genres in data_1['listed_in'].dropna().str.split(', '):
            for genre in genres:
                if genre not in genre_list and genre != '':
                    genre_list.append(genre)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible genres:")
        genre_list.sort()
        print(genre_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ")
    
            # Use FuzzyWuzzy to find the closest match
            matches = process.extractOne(type_input, genre_list)
    
            if matches[1] >= 80:  # Adjust the similarity threshold as needed
                type_input = matches[0]
                break
            else:
                closest_match = matches[0]
                print(f"Invalid genre. The closest match is: {closest_match}")
    
        # Check if the entered genre is correct
        if type_input in genre_list:
            print(f"You selected: {type_input}")
        else:
            print(f"You entered: {type_input}, which is not in the list.")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not type_data.empty:
            print(type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            save_to_csv(type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        else:
            print(f"No movies or series found for the type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def duration(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        genre_list = []
        for genres in data_1['listed_in'].dropna().str.split(', '):
            for genre in genres:
                if genre not in genre_list and genre != '':
                    genre_list.append(genre)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible genres:")
        genre_list.sort()
        print(genre_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ")
    
            # Use FuzzyWuzzy to find the closest match
            matches = process.extractOne(type_input, genre_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            if matches[1] >= 80:  # Adjust the similarity threshold as needed
                type_input = matches[0]
                break
            else:
                closest_match = matches[0]
                print(f"Invalid genre. The closest match is: {closest_match}")
    
        # Check if the entered genre is correct
        if type_input in genre_list:
            print(f"You selected: {type_input}")
        else:
            print(f"You entered: {type_input}, which is not in the list.")
    
        print("What type of sorting do you want? ")
        print("1. Ascending")
        print("2. Descending")
        
        while True:
            sort_order = input("Enter the sort type number (1/2): ")
    
            if sort_order in ['1', '2']:
                break
            else:
                print("Invalid sort order. Please enter 1 for ascending or 2 for descending.")
    
        type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not type_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"\nDisplaying data for {type_input} sorted in {'ascending' if sort_order == '1' else 'descending'} order:")
            
            type_data.loc[:, 'duration'] = type_data['duration'].str.extract('(\\d+)').astype(int)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            if sort_order == '1':
    
    Adrien Payen's avatar
    Adrien Payen a validé
                type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, True])
    
    Adrien Payen's avatar
    Adrien Payen a validé
            elif sort_order == '2':
    
    Adrien Payen's avatar
    Adrien Payen a validé
                type_data_sorted = type_data.sort_values(by=['type', 'duration'], ascending=[True, False])
            
            type_data_sorted['duration'] = type_data_sorted.apply(
                lambda row: f"{row['duration']} min" if row['type'].lower() == 'movie' else f"{row['duration']} Season", axis=1
            )
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(type_data_sorted)
            save_to_csv(type_data_sorted)
        else:
            print(f"No movies or series found for the type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def director(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_list = []
        for dirs in data_1['director'].dropna().str.split(', '):
            for director_name in dirs:
                if director_name not in director_list and director_name != '':
                    director_list.append(director_name)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the director_list in alphabetical order
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible directors: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_list = sorted(director_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(director_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            director_input = input("Enter the name of the director to display movies and/or series: ")
    
            # Use FuzzyWuzzy to find the closest match
            matches = process.extractOne(director_input, director_list)
    
            if matches[1] >= 80:  # Adjust the similarity threshold as needed
                director_input = matches[0]
                break
            else:
                closest_match = matches[0]
                print(f"Invalid director name. The closest match is: {closest_match}")
    
        # Check if the entered director is correct
        if director_input in director_list:
            print(f"You selected: {director_input}")
        else:
            print(f"You entered: {director_input}, which is not in the list.")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_data = filtered_data[filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not director_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"\nDisplaying data for movies and/or series directed by {director_input} sorted by release year in ascending order:")
    
    Adrien Payen's avatar
    Adrien Payen a validé
            director_data_sorted = director_data.sort_values(by='release_year', ascending=True)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(director_data_sorted)
            save_to_csv(director_data_sorted)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        else:
            print(f"No person found with the name {director_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def actor(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_list = []
        for actors in data_1['cast'].dropna().str.split(', '):
            for actor_name in actors:
                if actor_name not in actor_list and actor_name != '':
                    actor_list.append(actor_name)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_list = sorted(actor_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all possible actors: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(actor_list)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            actor_input = input("Enter the name of the actor to display movies and/or series: ")
    
            # Use FuzzyWuzzy to find the closest match
            matches = process.extractOne(actor_input, actor_list)
    
            if matches[1] >= 80:  # Adjust the similarity threshold as needed
                actor_input = matches[0]
                break
            else:
                closest_match = matches[0]
                print(f"Invalid actor name. The closest match is: {closest_match}")
    
        # Check if the entered actor is correct
        if actor_input in actor_list:
            print(f"You selected: {actor_input}")
        else:
            print(f"You entered: {actor_input}, which is not in the list.")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_data = filtered_data[filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not actor_data.empty:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"\nDisplaying data for movies and/or series featuring {actor_input} sorted by release year in ascending order:")
    
    Adrien Payen's avatar
    Adrien Payen a validé
            actor_data_sorted = actor_data.sort_values(by='release_year', ascending=True)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(actor_data_sorted)
            save_to_csv(actor_data_sorted)
        else:
            print(f"No actor found with the name {actor_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def specific_genre_director(data_1):
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        unique_directors = filtered_data['director'].unique()
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Convert elements to strings to handle potential float values
        unique_directors = [str(director) for director in unique_directors]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        # Sort the unique_directors in alphabetical order
        unique_directors = sorted(unique_directors)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all available directors:")
        print(', '.join(unique_directors))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            director_input = input("Enter the name of the director to display movies and/or series: ")
    
            # Use FuzzyWuzzy to find the closest match
            director_matches = process.extractOne(director_input, unique_directors)
    
            if director_matches[1] >= 80:  # Adjust the similarity threshold as needed
                director_input = director_matches[0]
                break
            else:
                closest_match = director_matches[0]
                print(f"Invalid director name. The closest match is: {closest_match}")
    
        # Check if the entered director is correct
        if director_input in unique_directors:
            print(f"You selected: {director_input}")
        else:
            print(f"You entered: {director_input}, which is not in the list.")
    
        # Get a list of all available types without duplicates
        unique_types = filtered_data['listed_in'].str.split(', ').explode().unique()
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the unique_types in alphabetical order
        unique_types = sorted(unique_types)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("\nList of all available types:")
        print(', '.join(unique_types))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            type_input = input("Enter the type (romantic, action, drama, etc.): ").capitalize()
    
            # Use FuzzyWuzzy to find the closest match
            type_matches = process.extractOne(type_input, unique_types)
    
            if type_matches[1] >= 80:  # Adjust the similarity threshold as needed
                type_input = type_matches[0]
                break
            else:
                closest_match = type_matches[0]
                print(f"Invalid type. The closest match is: {closest_match}")
    
        # Check if the entered type is correct
        if type_input in unique_types:
            print(f"You selected: {type_input}")
        else:
            print(f"You entered: {type_input}, which is not in the list.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        director_type_data = filtered_data[
            (filtered_data['director'].str.lower().str.contains(director_input.lower(), case=False, na=False)) &
            (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False))
        ]
    
    Adrien Payen's avatar
    Adrien Payen a validé
        if not director_type_data.empty:
    
            # Display the count
            count = len(director_type_data)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"The director {director_input} has directed {count} movie(s) or series of type {type_input}.")
            print(director_type_data)
            save_to_csv(director_type_data)
    
        else:
            print(f"No movies or series found for the director {director_input} and type {type_input}.")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    def specific_genre_actor(data_1):
        filtered_data = filter_media_type(data_1)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Get a list of unique actors
        unique_actors = filtered_data['cast'].str.split(', ').explode().unique()
    
    Adrien Payen's avatar
    Adrien Payen a validé
        unique_actors = [str(actor) for actor in unique_actors]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        # Sort the unique_actors in alphabetical order
        unique_actors = sorted(unique_actors)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("List of all available actors:")
        print(', '.join(unique_actors))
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Input actor name with fuzzy matching
        while True:
            actor_input = input("Enter the name of the actor to display movies and/or series: ")
            actor_matches = process.extractOne(actor_input, unique_actors)
    
            if actor_matches[1] >= 80:
                actor_input = actor_matches[0]
                break
            else:
                closest_match = actor_matches[0]
                print(f"Invalid actor name. The closest match is: {closest_match}")
    
        if actor_input in unique_actors:
            print(f"You selected: {actor_input}")
        else:
            print(f"You entered: {actor_input}, which is not in the list.")
    
        # Get a list of all available types without duplicates
        unique_types = filtered_data['listed_in'].str.split(', ').explode().unique()
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the unique_types in alphabetical order
        unique_types = sorted(unique_types)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("\nList of all available types:")
        print(', '.join(unique_types))
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Input type with fuzzy matching
        while True:
            type_input = input("Enter the type (romantic, action, drama, etc.): ")
            type_matches = process.extractOne(type_input, unique_types)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            if type_matches[1] >= 80:
                type_input = type_matches[0]
                break
            else:
                closest_match = type_matches[0]
                print(f"Invalid type. The closest match is: {closest_match}")
    
        if type_input in unique_types:
            print(f"You selected: {type_input}")
        else:
            print(f"You entered: {type_input}, which is not in the list.")
    
        # Filter the data based on actor and type
    
    Adrien Payen's avatar
    Adrien Payen a validé
        actor_type_data = filtered_data[
            (filtered_data['cast'].str.lower().str.contains(actor_input.lower(), case=False, na=False)) &
            (filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False))
        ]
    
        if not actor_type_data.empty:
            count = len(actor_type_data)
            print(f"The actor {actor_input} has acted in {count} movie(s) or series of type {type_input}.")
            print(actor_type_data)
            save_to_csv(actor_type_data)
        else:
            print(f"No movies or series found for the actor {actor_input} and type {type_input}.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # rating
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # these are variables that needs to be registered in general not in a local function
    notes = data_2.drop('show_id', axis = 1)
    mean_type = notes.mean(axis = 1) * 100
    data_2['appreciation (%)'] = mean_type
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def most_rated(data_1, data_2) :
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        filtered_data = filter_media_type(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        link_between =  pd.merge(filtered_data,data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("Films et séries les mieux notés :")
        print(link_between_sorted[['show_id', 'title', 'type', 'appreciation (%)']])
        save_to_csv(link_between_sorted)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    def most_rated_year(data_1, data_2):
        # Display all available unique release years
    
    Adrien Payen's avatar
    Adrien Payen a validé
        available_years = sorted(data_1['release_year'].unique())
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print("Available years: ", available_years)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Input year with validation
        while True:
            year_input = input("Enter a release year: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            try:
                # Convert the input year to an integer
                year = int(year_input)
    
                # Check if the entered year is in the available years
                if year in available_years:
                    break
                else:
                    print("Please enter a valid year from the available options.")
            except ValueError:
                print("Please enter a valid year.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Filter the data based on the release year
        filtered_data = filter_media_type(data_1[data_1['release_year'] == year])
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Merge the DataFrames on the 'show_id' key
        link_between = pd.merge(filtered_data, data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the DataFrame by the 'appreciation' column (in descending order)
        link_between_sorted = link_between.sort_values(by='appreciation (%)', ascending=False)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        print(f"Top-rated shows for the year {year}:")
        print(link_between_sorted[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
        save_to_csv(link_between_sorted)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def most_rated_recent(data_1, data_2):
        # Merge the DataFrames on the 'show_id' key
        merged_data = pd.merge(data_1, data_2, on='show_id')
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the DataFrame by the 'appreciation' column (in descending order) and 'release_year' (in descending order)
        sorted_data = merged_data.sort_values(by=['release_year', 'appreciation (%)'], ascending=[False, False])
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Display the most rated and recent shows
        top_20_data = sorted_data.head(20)
        print("Top 20 most rated and recent shows:")
        print(top_20_data[['show_id', 'title', 'type', 'release_year', 'appreciation (%)']])
        save_to_csv(top_20_data)
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Example usage
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def parental_code(data_1):
        valid_codes = set(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R', 'TV-G', 'G', 'NC-17', 'NR', 'TV-Y7-FV', 'UR'])
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Filter out entries that are not valid parental codes
        filtered_data = data_1[data_1['rating'].isin(valid_codes)]
    
        print("Valid parental codes:")
        print(', '.join(valid_codes))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            # Ask the user to enter a parental code
            selected_code = input("Enter a parental code to display movies and/or series: ")
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Filter the data based on the selected parental code
            if selected_code in valid_codes:
                result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)]
                if not result_data.empty:
                    print(result_data)
                    save_to_csv(result_data)
                else:
                    print(f"No movies or series found for the parental code {selected_code}.")
                break
    
    Adrien Payen's avatar
    Adrien Payen a validé
            else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                print("Invalid parental code entered. Please enter a valid code.")
        
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def directors_nationality(data_1):
    
        # Extract unique directors and their respective nationalities
        directors_nationality_dict = {}
    
    Adrien Payen's avatar
    Adrien Payen a validé
        country_nationalities_set = set()
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        for index, row in data_1.iterrows():
    
    Adrien Payen's avatar
    Adrien Payen a validé
            directors = str(row['director']).split(', ') if pd.notna(row['director']) else []
    
    Adrien Payen's avatar
    Adrien Payen a validé
            nationality = str(row['country']).split(',')
    
            # Add unique nationalities from 'country' column to the set, excluding 'nan'
            unique_nationalities = set(filter(lambda x: pd.notna(x) and x.lower() != 'nan', map(str.strip, nationality)))
            country_nationalities_set.update(unique_nationalities)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            for director in directors:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                director = director.strip()
    
    Adrien Payen's avatar
    Adrien Payen a validé
                if director in directors_nationality_dict:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    # Add unique nationalities only if they are not already present
                    directors_nationality_dict[director]['nationalities'].update(unique_nationalities)
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    directors_nationality_dict[director]['number of movies or series'] += 1
    
    Adrien Payen's avatar
    Adrien Payen a validé
                else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    directors_nationality_dict[director] = {'nationalities': set(unique_nationalities), 'number of movies or series': 1}
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Sort the directors by the number of movies and series produced
    
    Adrien Payen's avatar
    Adrien Payen a validé
        sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Create a DataFrame
    
    Adrien Payen's avatar
    Adrien Payen a validé
        columns = ['director', 'nationalities', 'number of movies or series']
        directors_df = pd.DataFrame([[director, ', '.join(info['nationalities']), info['number of movies or series']] for director, info in sorted_directors], columns=columns)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        # Display the list of directors and their nationalities
        print("Directors and their nationalities, sorted by the number of movies and series produced:")
    
    Adrien Payen's avatar
    Adrien Payen a validé
        for i, (director, info) in enumerate(sorted_directors[:25]):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            director_name = str(director) if pd.notna(director) else 'Unknown'
            nationalities_str = ', '.join(str(n) for n in info['nationalities'])
    
    Adrien Payen's avatar
    Adrien Payen a validé
            print(f"{i+1}. {director_name}: {nationalities_str} - {info['number of movies or series']} movies/series")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Save to CSV using the DataFrame
    
    Adrien Payen's avatar
    Adrien Payen a validé
        save_to_csv(directors_df)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return directors_df
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    # Allow to filter if we want movie, tv show or both 
    def filter_media_type(data):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            print("Select the type of media:")
            print("1. Movie")
            print("2. TV Show")
            print("3. Both")
            
            media_choice = input("Enter the corresponding number : ")
    
            if media_choice in ['1', '2', '3']:
                if media_choice == '1':
                    return data[data['type'].str.lower() == 'movie']
                elif media_choice == '2':
                    return data[data['type'].str.lower() == 'tv show']
                else:
                    return data
    
    Adrien Payen's avatar
    Adrien Payen a validé
            else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                print("Invalid choice. Please enter a valid number.")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    
    
    # Example usage
    def basic_statistics(data_1):
        # Check if the 'type' and 'country' columns exist in the dataset
        if 'type' not in data_1.columns or 'country' not in data_1.columns:
            print("The dataset does not contain the necessary columns.")
            return
    
        # Count the number of movies and series
        movies_count = len(data_1[data_1['type'] == 'Movie'])
        series_count = len(data_1[data_1['type'] == 'TV Show'])
    
        print(f"Number of movies in the catalog: {movies_count}")
        print(f"Number of series in the catalog: {series_count}")
    
        # Compare the number of movies and series
        if movies_count > series_count:
            print("There are more movies than series in the catalog.")
        elif movies_count < series_count:
            print("There are more series than movies in the catalog.")
        else:
            print("The catalog has an equal number of movies and series.")
    
        # List countries that produced movies/series from most productive to least
        country_counts = data_1['country'].str.split(', ').explode().value_counts()
        print("\nCountries that produced movies/series, sorted from most to least productive:")
        print(country_counts)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return
    
    
    
     # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def save_to_csv(data, default_filename='output.csv'):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            # Ask if the user wants to save to a CSV file
            save_choice = input("Do you want to save the data to a CSV file? (YES/NO): ").upper()
    
            if save_choice == 'YES':
                # Prompt for a file name
                file_name = input("Enter the file name (DO NOT include .csv extension, or press Enter for the default): ")
                file_name = file_name + ".csv"
                if not file_name:
                    file_name = default_filename
    
                # Check if the file already exists
                if os.path.exists(file_name):
                    while True:
                        # Ask if the user wants to overwrite or create a new file
                        overwrite_choice = input(f"The file '{file_name}' already exists. Do you want to overwrite it? (YES/NO): ").upper()
    
                        if overwrite_choice == 'YES':
                            # Overwrite the existing file
                            data.to_csv(file_name, index=False)
                            print(f"Data saved to {file_name}")
                            break
                        elif overwrite_choice == 'NO':
                            # Prompt for a new file name
                            new_filename = input("Enter a new file name (DO NOT include .csv extension): ")
                            new_filename = new_filename + ".csv"
                            data.to_csv(new_filename, index=False)
                            print(f"Data saved to {new_filename}")
                            break
                        else:
                            print("Invalid choice. Please enter either 'YES' or 'NO'.")
    
                else:
                    # Save to a new file
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    data.to_csv(file_name, index=False)
                    print(f"Data saved to {file_name}")
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    break
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            elif save_choice == 'NO':
                print("Data not saved.")
                break
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            else:
                print("Invalid choice. Please enter either 'YES' or 'NO.'")
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # début de l'algorithme de recommandation 
    
    # Load the CSV file
    
    def read_movie_series_info(file_path):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        catalog = {}
    
        with open(file_path, 'r', encoding='utf-8') as info_file:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            info_reader = csv.reader(info_file)
            next(info_reader)  # Skip header row
            for row in info_reader:
                show_id, show_type, title, director, cast, country, date_added, release_year, rating, duration, listed_in, description = row
                catalog[show_id] = [title, listed_in.split(', ')]
        return catalog
    
    
    def read_user_ratings(file_path):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        ratings = {}
    
        with open(file_path, 'r', encoding='utf-8') as ratings_file:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            ratings_reader = csv.reader(ratings_file)
    
            header = next(ratings_reader)  # Skip header row
            user_ids = list(map(int, header[1:]))
            
    
    Adrien Payen's avatar
    Adrien Payen a validé
            for row in ratings_reader:
                show_id = row[0]
                user_ratings = list(map(int, row[1:]))
    
                ratings[show_id] = dict(zip(user_ids, user_ratings))
                
        return ratings
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def create_category_matrix(catalog, categories, output_file_path):
        # Créez la matrice sans les noms de catégories
        category_matrix = [[0 for _ in range(len(categories))] for _ in range(len(categories))]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
        # Remplissez la matrice avec les données
    
    Adrien Payen's avatar
    Adrien Payen a validé
        for show_id, movie_categories in catalog.items():
            for i in range(len(categories)):
                if categories[i] in movie_categories[1]:
                    for j in range(len(categories)):
                        if categories[j] in movie_categories[1]:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                            # Assurez-vous que les indices sont valides avant d'incrémenter
                            if i < len(category_matrix) and j < len(category_matrix[i]):
                                category_matrix[i][j] += 1
    
        # Ajoutez les noms de catégories à la première ligne et à la première colonne du DataFrame
        category_matrix_with_names = [[category] + row for category, row in zip(categories, category_matrix)]
        df = pd.DataFrame(category_matrix_with_names, columns=[''] + categories)
    
        # Enregistrez le DataFrame dans un fichier Excel avec les noms de colonnes et de lignes
        df.to_excel(output_file_path, index=False)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        return category_matrix
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    def recommend_movies(user_id, catalog, user_ratings, category_matrix, threshold=0.9999):
    
        global categories  # Déclarer categories en tant que variable globale
    
    Adrien Payen's avatar
    Adrien Payen a validé
        categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1]))  # permet d'actualiser la variable catégories global au niveau local
    
        user_id = int(user_id)  # Convertir user_id en entier
    
        suggestions = {}
        category_index = {}
    
        # Créer le dictionnaire pour stocker les indices des catégories
    
    Adrien Payen's avatar
    Adrien Payen a validé
        category_index = {category: i for i, category in enumerate(categories)}
    
        # Ajout de l'affectation manquante
        user_categories = categories
    
    
        for show_id, categories in catalog.items():
            # Check if the user has rated the show
            if show_id in user_ratings and user_id in user_ratings[show_id] and user_ratings[show_id][user_id] == 0:
                # Liste des catégories communes entre le film/série et les films/séries notés par l'utilisateur
    
    Adrien Payen's avatar
    Adrien Payen a validé
                common_categories = [category for category in categories[1] if category in user_categories]
    
    
                if common_categories:
                    # Calculez la similarité entre le film/série et les films/séries notés par l'utilisateur
                    similarity = sum(
                        min(category_matrix[category_index[category]][category_index[user_category]] for user_category in common_categories)
                        for category in categories[1]
                    )
    
                    # Ne recommandez que des films/séries dont la similarité dépasse le seuil spécifié
                    if similarity > threshold:
                        suggestions[show_id] = {'title': catalog[show_id][0], 'similarity': similarity}
    
        # Triez les suggestions par similarité décroissante
        sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1]['similarity'], reverse=True)
    
        return sorted_suggestions[:5]
    
    def recommandation_algorithm() :
    
        # Replace file_path_1 and file_path_2 with the actual file paths
        file_path_1 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/netflix_titles-2.csv"
        file_path_2 = "/Users/adrien/vscodeworkspace/coding-project/projet_en_groupe/data_cp_2023/ratings.csv"
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        while True:
            user_id = input("Quel est ton user ? ")
    
            try:
                user_id = int(user_id)
                # Check if user_id is between 1 and 100
                if 1 <= user_id <= 100:
                    break  # Sort de la boucle si l'identifiant est valide
                else:
                    print("L'identifiant de l'utilisateur doit être compris entre 1 et 100.")
            except ValueError as e:
                print(f"Veuillez entrer un identifiant d'utilisateur valide. Erreur: {e}")
    
        # Read data from CSV files
        catalog = read_movie_series_info(file_path_1)
        ratings = read_user_ratings(file_path_2)
        # Create category matrix
        categories = list(set(category for _, movie_info in catalog.items() for category in movie_info[1]))
    
    Adrien Payen's avatar
    Adrien Payen a validé
        output_file_path = "matrice_categories.xlsx"
        category_matrix = create_category_matrix(catalog, categories, output_file_path)
        
    
        # Display movies already viewed by the user
        print("Films déjà vus par l'utilisateur:")
        for show_id, user_rating in ratings.items():
            if user_id in user_rating and user_rating[user_id] > 0:
                print(f"- {catalog[show_id][0]}")
    
        # Recommend movies
        recommended_movies = recommend_movies(user_id, catalog, ratings, category_matrix, threshold=0.5)
    
        # Display top 5 recommendations
        print("\nTop 5 recommandations:")
        for show_id, info in recommended_movies:
            print(f"Title: {info['title']}, Similarity: {info['similarity']}")
    
    
    # Création du menu
    
    Adrien Payen's avatar
    Adrien Payen a validé
    def action() :
      print("Here are the different options available:")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("1.  View the entire catalog")
      print("2.  View all movies in the catalog")
      print("3.  View all series")
      print("4.  View all series, movies or both by year")
      print("5.  View all series, movies or both by country")
      print("6.  View all series, movies or both by type")
      print("7.  View all series, movies or both by type sorted by duration")
      print("8.  View series, movies or both directed by a specific director and sorted by year")
      print("9.  View series, movies or both featuring a specific actor and sorted by year")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("10. View how many series, movies or both and series directed by a director in a specific genre")
      print("11. View how many series, movies or both an actor has played in")
      print("12. Display the highest-rated series, movies or both")
      print("13. Display the highest-rated series, movies or both for a specific year")
      print("14. Display recent highest-rated series, movies or both")
      print("15. Display movies and series based on parental control code")
      print("16. Display the nationalities of directors and sort the list based on the number of movies and series directed")
      print("17. Display basic statistics")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("18. Get Personalized Recommendations")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      print("STOP to stop")
    
    Adrien Payen's avatar
    Adrien Payen a validé
      command = input("Enter the number of what you want to do: ")
    
      if command == "1" :
        catalog(data_1)
      elif command == "2" :
        movies(data_1)
      elif command == "3" :
    
        series(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "4" :
        by_year(data_1)
      elif command == "5" :
        by_country(data_1)
      elif command == "6" :
    
        genre(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "7" :
        duration(data_1)
      elif command == "8" :
        director(data_1)
      elif command == "9" :
        actor(data_1)
      elif command == "10" :
        specific_genre_director(data_1)
      elif command == "11" :
        specific_genre_actor(data_1)
      elif command == "12" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "13" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated_year(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "14" :
    
    Adrien Payen's avatar
    Adrien Payen a validé
        most_rated_recent(data_1, data_2)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "15" :
        parental_code(data_1)
      elif command == "16" :
        directors_nationality(data_1)
      elif command == "17" :
        basic_statistics(data_1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "18" :
    
        recommandation_algorithm()
    
    Adrien Payen's avatar
    Adrien Payen a validé
      elif command == "STOP" :
    
        return False
    
    
    
     # attention il faut demander à chaque fois, s'il désire enregistrer la liste sur un .csv
    
    
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    menu = []
    
    
    while True:
    
    Adrien Payen's avatar
    Adrien Payen a validé
        response = action()
        if response is False:
    
            break
        else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
            if response == True:
                menu = []
            else:
                menu.append(response)