diff --git a/.DS_Store b/.DS_Store index 037352eb3c96f3e092083529fd07e9ce531b0c8c..9216513466f8ff22ac018f9a3316854345c7df53 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/Data_Base.xlsx b/Data_Base.xlsx index 3677033dede2ed7e49e7e0637ebd4ebae7d378ca..73faea6556dd7a214ee3b831aca8b026044880f3 100644 Binary files a/Data_Base.xlsx and b/Data_Base.xlsx differ diff --git a/projet_personnel/algorithme_student_generate.py b/projet_personnel/algorithme_student_generate.py index d20cc8a1e802c647e8a537716c44ee1b6cead33c..4fc8ab83d50e183aff61b7faefb30a71f835369f 100644 --- a/projet_personnel/algorithme_student_generate.py +++ b/projet_personnel/algorithme_student_generate.py @@ -2,7 +2,7 @@ import pandas as pd import random - +import numpy as np firstname_of_students = ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson', 'White', 'Harris', 'Martin', 'Thompson', 'Garcia', 'Martinez', 'Robinson', @@ -85,12 +85,15 @@ all_course_MA1 = ("Advanced English 1", "Español avanzado 1", "Data analytics", all_course_MA2 = ("Advanced English 2", "Español avanzado 1", "Responsabilité sociétale de l'entreprise","Integrated Information Systems", "Mémoire", "Séminaire d'accompagnement du mémoire") +# Définir les proportions souhaitées pour chaque année académique +proportions = [0.4, 0.25, 0.15, 0.1, 0.1] - +# Liste des années académiques disponibles +school_years = ['BAC1', 'BAC2', 'BAC3', 'MA1', 'MA2'] # Générer des combinaisons aléatoires de noms et prénoms pour plus de 1000 personnes data_generated = [] -number_of_students = 100 +number_of_students = 1000 for each in range(number_of_students): # générer des données @@ -101,7 +104,7 @@ for each in range(number_of_students): # générer des données first_name = random.choice(firstname_of_students) # année de cours - academic_year = random.choice(school_years) + academic_year = np.random.choice(school_years, p=proportions) # curriculum if academic_year in ['BAC1', 'BAC2', 'BAC3'] : @@ -192,11 +195,6 @@ for each in range(number_of_students): # générer des données data_generated.append({"Firstname": last_name, "Lastname": first_name, "Academic Year" : academic_year, "Curriculum" : curriculum, "Place of Birth" : city_of_birth , "Telephone": phone, "Address": adress_of_student, "Gender" : gender_of_student, "Email" : email_formated, "Campus" : campus, "Date of Birth" : complete_date_of_birth, "Matricule" : matricule, **grades}) -# génération de données qui ne peuvent pas se ressembler - - - - # Créer un DataFrame pandas df = pd.DataFrame(data_generated) diff --git a/~$adrien.xlsx b/~$adrien.xlsx deleted file mode 100644 index d10daf560a7f8247050905285b3e618c244966d6..0000000000000000000000000000000000000000 Binary files a/~$adrien.xlsx and /dev/null differ