Skip to content
Extraits de code Groupes Projets
Valider db25e628 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

last update

parent 76a0ab2a
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
Fichier ajouté
Fichier ajouté
Aucun aperçu pour ce type de fichier
......@@ -31,10 +31,12 @@ def movies(data_1): # enregistrement
films = data_1[data_1['type'] == 'Movie'] # Filter the data to include only movies
movie_titles = films['title'].tolist() # Extract movie titles
movie_df = pd.DataFrame({'Movie Titles': movie_titles})
# afficher que les 100 premiers
print(movie_titles) # Display movie titles
save_to_csv(movie_titles)
print(movie_df) # Display movie titles
save_to_csv(movie_df)
return # Be careful, you need to ask each time if they want to save the list to a .csv
......@@ -44,9 +46,11 @@ def series(data_1):
series = data_1[data_1['type'] == 'TV Show'] # Filter the data to include only series
series_titles = series['title'].tolist() # Extract series titles
print(series_titles) # Display series titles
series_df = pd.DataFrame({'Movie Titles': series_titles})
print(series_df) # Display series titles
save_to_csv(series_titles)
save_to_csv(series_df)
return # Be careful, you need to ask each time if they want to save the list to a .csv
......@@ -77,10 +81,11 @@ def by_country(data_1):
filtered_data = filter_media_type(data_1)
country_list = []
for countries in filtered_data['country'].dropna().str.split(', '):
for countries in filtered_data['country'].dropna().str.split(','):
for country in countries:
if country not in country_list and country != '':
country_list.append(country)
cleaned_country = country.strip() # Remove leading and trailing spaces
if cleaned_country and cleaned_country not in country_list:
country_list.append(cleaned_country)
print("List of all available countries:")
country_list.sort()
......@@ -94,9 +99,9 @@ def by_country(data_1):
save_to_csv(country_data)
else:
print(f"No movies or series found for the country {country_input}.")
return
# Be careful, you need to ask each time if they want to save the list to a .csv
......@@ -139,9 +144,19 @@ def duration(data_1):
type_input = input("Enter the type (romantic, action, drama, etc.) to display movies and/or series: ").capitalize()
type_data = filtered_data[filtered_data['listed_in'].str.lower().str.contains(type_input.lower(), case=False, na=False)]
if not type_data.empty:
print("Quel type de tri voulez-vous ? ")
print("1. Croissant")
print("2. Décroissant")
sort_order = str(input("Entrez le numéro du type de tri : "))
type_data_sorted = type_data.sort_values(by='duration', ascending=True) # see if we do in ascending or descending
if not type_data.empty:
if sort_order.lower() == '1':
type_data_sorted = type_data.sort_values(by='duration', ascending=True)
elif sort_order.lower() == '2':
type_data_sorted = type_data.sort_values(by='duration', ascending=False)
else:
print("Invalid sort order. Defaulting to ascending order.")
type_data_sorted = type_data.sort_values(by='duration', ascending=True)
print(type_data_sorted)
save_to_csv(type_data_sorted)
......@@ -149,6 +164,7 @@ def duration(data_1):
print(f"No movies or series found for the type {type_input}.")
def director(data_1):
filtered_data = filter_media_type(data_1)
......@@ -202,6 +218,9 @@ def specific_genre_director(data_1):
filtered_data = filter_media_type(data_1)
unique_directors = filtered_data['director'].unique()
# Convert elements to strings to handle potential float values
unique_directors = [str(director) for director in unique_directors]
print("List of all available directors:")
print(', '.join(unique_directors))
......@@ -233,6 +252,9 @@ def specific_genre_actor(data_1):
filtered_data = filter_media_type(data_1)
unique_actors = filtered_data['cast'].unique()
# Convert elements to strings to handle potential float values
unique_actors = [str(actor) for actor in unique_actors]
print("List of all available actors:")
print(', '.join(unique_actors))
......@@ -260,6 +282,7 @@ def specific_genre_actor(data_1):
return
# rating
# these are variables that needs to be registered in general not in a local function
......@@ -283,7 +306,7 @@ def most_rated(data_1, data_2) :
def most_rated_year(data_1, data_2):
# Display all available unique release years
available_years = data_1['release_year'].unique()
available_years = sorted(data_1['release_year'].unique())
print("Available years: ", available_years) # tri des dates
# Ask the user to enter a release year
......@@ -328,52 +351,74 @@ def most_rated_recent(data_1, data_2):
# Example usage
def parental_code(data_1): # s5542 bug concernant les virgules
def parental_code(data_1):
valid_codes = set(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R', 'TV-G', 'G', 'NC-17', 'NR', 'TV-Y7-FV', 'UR'])
code_list = []
for codes in data_1['rating'].dropna().str.split(', '):
for code in codes:
if code not in code_list and code != '':
code_list.append(code)
print("Here are the parental codes: ")
print(code_list)
save_to_csv(code_list)
return
# Filter out entries that are not valid parental codes
filtered_data = data_1[data_1['rating'].isin(valid_codes)]
print("Valid parental codes:")
print(', '.join(valid_codes))
# Ask the user to enter a parental code
selected_code = input("Enter a parental code to display movies and/or series: ")
# Filter the data based on the selected parental code
if selected_code in valid_codes:
result_data = filtered_data[filtered_data['rating'].str.contains(selected_code, case=False, na=False)]
if not result_data.empty:
print(result_data)
save_to_csv(result_data)
else:
print(f"No movies or series found for the parental code {selected_code}.")
else:
print("Invalid parental code entered.")
return
#code_parental = input("Entrez le code de contrôle parental : PG-13, TV-MA")
def directors_nationality(data_1):
# Check if the 'director' column exists in the dataset
if 'director' not in data_1.columns:
print("The dataset does not contain a 'director' column.")
# Check if the 'director' and 'country' columns exist in the dataset
if 'director' not in data_1.columns or 'country' not in data_1.columns:
print("The dataset does not contain 'director' or 'country' columns.")
return
# Extract unique directors and their respective nationalities
directors_nationality_dict = {}
for index, row in data_1.iterrows():
directors = row['director'].split(', ')
directors = str(row['director']).split(', ') if pd.notna(row['director']) else []
nationality = row['country']
for director in directors:
if director in directors_nationality_dict:
directors_nationality_dict[director]['nationalities'].add(nationality)
directors_nationality_dict[director]['count'] += 1
# Use a set to store unique nationalities for each director
directors_nationality_dict[director]['nationalities'].add(str(nationality))
directors_nationality_dict[director]['number of movies or series'] += 1
else:
directors_nationality_dict[director] = {'nationalities': {nationality}, 'count': 1}
directors_nationality_dict[director] = {'nationalities': set(), 'number of movies or series': 1}
directors_nationality_dict[director]['nationalities'].add(str(nationality))
# Remove duplicates from the nationalities list for each director
for director_info in directors_nationality_dict.values():
director_info['nationalities'] = list(set(director_info['nationalities']))
# Sort the directors by the number of movies and series produced
sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['count'], reverse=True)
sorted_directors = sorted(directors_nationality_dict.items(), key=lambda x: x[1]['number of movies or series'], reverse=True)
columns = ['director', 'nationalities', 'number of movies or series']
directors_df = pd.DataFrame([[director, ', '.join(info['nationalities']), info['number of movies or series']] for director, info in sorted_directors], columns=columns)
# Display the list of directors and their nationalities
print("Directors and their nationalities, sorted by the number of movies and series produced:")
for director, info in sorted_directors:
print(f"{director}: {', '.join(info['nationalities'])} - {info['count']} movies/series")
save_to_csv(sorted_directors)
return
director_name = str(director) if pd.notna(director) else 'Unknown'
nationalities_str = ', '.join(str(n) for n in info['nationalities'])
print(f"{director_name}: {nationalities_str} - {info['number of movies or series']} movies/series")
# Save to CSV using the DataFrame
save_to_csv(directors_df) # Commenting this out since the 'save_to_csv' function is not provided
return directors_df
......@@ -457,26 +502,16 @@ def save_to_csv(data, default_filename='output.csv'):
new_filename = new_filename + ".csv"
data.to_csv(new_filename, index=False)
print(f"Data saved to {new_filename}")
# Ask if the user wants to open the file
open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
if open_choice == 'YES':
os.system(file_name)
else:
# Save to a new file
data.to_csv(file_name, index=False)
print(f"Data saved to {file_name}")
# Ask if the user wants to open the file
open_choice = input("Do you want to open the saved file? (YES/NO): ").upper()
if open_choice == 'YES':
os.system(file_name)
else:
print("Data not saved.")
# début de l'algorithme de recommandation
# Load the CSV file
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter