Skip to content
GitLab
Explorer
Connexion
S'inscrire
Navigation principale
Rechercher ou aller à…
Projet
M
MLP1
Gestion
Activité
Membres
Labels
Programmation
Tickets
Tableaux des tickets
Jalons
Wiki
Code
Requêtes de fusion
Dépôt
Branches
Validations
Étiquettes
Graphe du dépôt
Comparer les révisions
Extraits de code
Compilation
Pipelines
Jobs
Planifications de pipeline
Artéfacts
Déploiement
Releases
Registre de paquets
Registre de conteneur
Registre de modèles
Opération
Environnements
Modules Terraform
Surveillance
Incidents
Analyse
Données d'analyse des chaînes de valeur
Analyse des contributeurs
Données d'analyse CI/CD
Données d'analyse du dépôt
Expériences du modèle
Aide
Aide
Support
Documentation de GitLab
Comparer les forfaits GitLab
Forum de la communauté
Contribuer à GitLab
Donner votre avis
Conditions générales et politique de confidentialité
Raccourcis clavier
?
Extraits de code
Groupes
Projets
Afficher davantage de fils d'Ariane
machine_learning
MLP1
Validations
883d5811
Valider
883d5811
rédigé
1 year ago
par
Adrien Payen
Parcourir les fichiers
Options
Téléchargements
Correctifs
Plain Diff
merge solution
parent
81121bf8
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Modifications
3
Masquer les modifications d'espaces
En ligne
Côte à côte
Affichage de
3 fichiers modifiés
plot.py
+0
-48
0 ajout, 48 suppressions
plot.py
simulate.py
+173
-77
173 ajouts, 77 suppressions
simulate.py
validation.py
+61
-23
61 ajouts, 23 suppressions
validation.py
avec
234 ajouts
et
148 suppressions
plot.py
supprimé
100644 → 0
+
0
−
48
Voir le fichier @
81121bf8
import
matplotlib.pyplot
as
plt
from
simulate
import
Validation
as
Val
from
tmc
import
TransitionMatrixCalculator
as
tmc
from
markovDecision
import
MarkovDecisionSolver
as
mD
import
random
as
rd
import
numpy
as
np
def
plot_results
(
layouts
,
circle
,
n_iterations
=
100
):
results_markov
=
[]
results_safe
=
[]
results_normal
=
[]
results_risky
=
[]
results_random
=
[]
for
layout
in
layouts
:
# Compute optimal policy
expec
,
policy
=
mD
(
layout
,
circle
)
# Simulate game
result_markov
=
Val
.
simulate_game
(
policy
,
layout
,
circle
,
n_iterations
)
results_markov
.
append
(
result_markov
)
result_safe
=
Val
.
simulate_game
([
1
]
*
15
,
layout
,
circle
,
n_iterations
)
results_safe
.
append
(
result_safe
)
result_normal
=
Val
.
simulate_game
([
2
]
*
15
,
layout
,
circle
,
n_iterations
)
results_normal
.
append
(
result_normal
)
result_risky
=
Val
.
simulate_game
([
3
]
*
15
,
layout
,
circle
,
n_iterations
)
results_risky
.
append
(
result_risky
)
result_random
=
Val
.
simulate_game
(
np
.
random
.
randint
(
1
,
4
,
size
=
15
),
layout
,
circle
,
n_iterations
)
results_random
.
append
(
result_random
)
# Plot the results
plt
.
figure
(
figsize
=
(
12
,
8
))
plt
.
plot
(
range
(
len
(
layouts
)),
results_markov
,
label
=
'
Markov
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_safe
,
label
=
'
Safe
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_normal
,
label
=
'
Normal
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_risky
,
label
=
'
Risky
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_random
,
label
=
'
Random
'
)
plt
.
xticks
(
range
(
len
(
layouts
)),
range
(
len
(
layouts
)))
plt
.
xlabel
(
'
Layout number
'
,
fontsize
=
13
)
plt
.
ylabel
(
'
Average number of turns
'
,
fontsize
=
13
)
plt
.
legend
(
loc
=
'
upper left
'
,
bbox_to_anchor
=
(
1
,
1
),
ncol
=
1
)
plt
.
show
()
Ce diff est replié.
Cliquez pour l'agrandir.
simulate.py
+
173
−
77
Voir le fichier @
883d5811
from
tmc
import
TransitionMatrixCalculator
as
tmc
from
markovDecision
import
MarkovDecisionSolver
as
mD
import
random
as
rd
import
random
import
numpy
as
np
class
Validation
:
def
__init__
(
self
,
layout
,
circle
=
False
):
self
.
layout
=
layout
self
.
circle
=
circle
# Compute transition matrices using TransitionMatrixCalculator
self
.
tmc_instance
=
tmc
()
self
.
safe_dice
=
self
.
tmc_instance
.
_compute_safe_matrix
()
self
.
normal_dice
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)
self
.
risky_dice
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)
# Solve Markov Decision Problem
solver
=
mD
(
self
.
layout
,
self
.
circle
)
self
.
expec
,
self
.
optimal_policy
=
solver
.
solve
()
# Define all the strategies
self
.
optimal_strategy
=
self
.
optimal_policy
self
.
safe_strategy
=
[
1
]
*
15
self
.
normal_strategy
=
[
2
]
*
15
self
.
risky_strategy
=
[
3
]
*
15
self
.
random_strategy
=
[
rd
.
choice
([
1
,
2
,
3
])
for
_
in
range
(
15
)]
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
# Compute transition matrices for each dice
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
number_turns
=
[]
for
_
in
range
(
n_iterations
):
total_turns
=
0
state
=
0
# initial state
while
state
<
len
(
self
.
layout
)
-
1
:
# until goal state is reached
action
=
strategy
[
state
]
# get action according to strategy
transition_matrix
=
transition_matrices
[
int
(
action
)
-
1
]
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
total_turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
total_turns
+=
2
else
:
total_turns
+=
1
number_turns
.
append
(
total_turns
)
return
np
.
mean
(
number_turns
)
def
simulate_state
(
self
,
strategy
,
n_iterations
=
10000
):
# Compute transition matrices for each dice
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
number_turns
=
[]
for
_
in
range
(
n_iterations
):
turns_per_state
=
[]
state
=
0
while
state
<
len
(
self
.
layout
)
-
1
:
total_turns
=
0
action
=
strategy
[
state
]
transition_matrix
=
transition_matrices
[
int
(
action
)
-
1
]
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
total_turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
total_turns
+=
2
else
:
total_turns
+=
1
turns_per_state
.
append
(
total_turns
)
number_turns
.
append
(
turns_per_state
)
return
np
.
mean
(
number_turns
,
axis
=
0
)
import
matplotlib.pyplot
as
plt
from
tmc
import
TransitionMatrixCalculator
as
tmc
from
markovDecision
import
MarkovDecisionSolver
nSquares
=
15
nSimul
=
10000
def
playOneTurn
(
diceChoice
,
curPos
,
layout
,
circle
,
prison
):
if
curPos
==
nSquares
-
1
:
return
nSquares
-
1
,
False
if
prison
:
return
curPos
,
False
listDiceResults
=
[
i
for
i
in
range
(
diceChoice
+
1
)]
result
=
random
.
choice
(
listDiceResults
)
if
curPos
==
2
and
result
!=
0
:
slowLane
=
random
.
choice
([
0
,
1
])
if
slowLane
:
newPos
=
curPos
+
result
else
:
newPos
=
curPos
+
result
+
7
elif
((
curPos
==
9
and
result
!=
0
)
or
(
curPos
in
[
7
,
8
,
9
]
and
curPos
+
result
>=
10
)):
newPos
=
curPos
+
result
+
4
else
:
newPos
=
curPos
+
result
if
newPos
>
nSquares
-
1
:
if
circle
:
newPos
-=
nSquares
else
:
return
nSquares
-
1
,
True
newSquare
=
layout
[
newPos
]
if
diceChoice
==
1
:
return
newPos
,
False
elif
diceChoice
==
2
:
newSquare
=
random
.
choice
([
0
,
newSquare
])
if
newSquare
==
0
:
return
newPos
,
False
# nothing happens
elif
newSquare
==
1
:
return
0
,
False
# back to square one
elif
newSquare
==
2
:
if
newPos
-
3
<
0
:
return
0
,
False
# back to square one
return
newPos
-
3
,
False
# back 3 squares
elif
newSquare
==
3
:
return
newPos
,
True
# prison
elif
newSquare
==
4
:
newSquare
=
random
.
choice
([
1
,
2
,
3
])
if
newSquare
==
1
:
return
0
,
False
# back to square one
elif
newSquare
==
2
:
if
newPos
-
3
<
0
:
return
0
,
False
# back to square one
return
newPos
-
3
,
False
# back 3 squares
elif
newSquare
==
3
:
return
newPos
,
True
# prison
def
playOneGame
(
layout
,
circle
,
policy
,
start
=
0
):
nTurns
=
0
curPos
=
start
prison
=
False
if
circle
:
while
curPos
!=
nSquares
-
1
:
newPos
,
prison
=
playOneTurn
(
policy
[
curPos
],
curPos
,
layout
,
circle
,
prison
)
if
newPos
>
nSquares
-
1
:
curPos
=
nSquares
-
newPos
curPos
=
newPos
nTurns
+=
1
else
:
while
curPos
<
nSquares
-
1
:
newPos
,
prison
=
playOneTurn
(
policy
[
curPos
],
curPos
,
layout
,
circle
,
prison
)
curPos
=
newPos
nTurns
+=
1
return
nTurns
def
empiric_cost_of_square
(
layout
,
circle
,
policy
):
expected_costs
=
np
.
zeros
(
nSquares
)
for
start_square
in
range
(
nSquares
):
total_turns
=
0
for
_
in
range
(
nSimul
):
total_turns
+=
playOneGame
(
layout
,
circle
,
policy
,
start
=
start_square
)
expected_costs
[
start_square
]
=
total_turns
/
nSimul
return
expected_costs
def
empirical_results
(
layout
,
circle
,
policy
):
avgnTurnsPlayed
=
0
for
_
in
range
(
nSimul
):
nTurns
=
playOneGame
(
layout
,
circle
,
policy
)
avgnTurnsPlayed
+=
nTurns
return
avgnTurnsPlayed
/
nSimul
def
comparison_theorical_empirical
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
expec
,
optimal_policy
=
solver
.
solve
()
actual
=
empiric_cost_of_square
(
layout
,
circle
,
optimal_policy
.
astype
(
int
))
# Plotting both arrays on the same plot
squares
=
np
.
arange
(
len
(
expec
))
plt
.
plot
(
squares
,
expec
,
label
=
"
Theoretical cost
"
)
plt
.
plot
(
squares
,
actual
,
label
=
"
Empirical cost
"
)
plt
.
xticks
(
np
.
arange
(
0
,
len
(
expec
),
step
=
1
))
plt
.
grid
(
True
)
plt
.
xlabel
(
"
Square
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
legend
()
plt
.
title
(
"
Comparison between the expected cost and the actual cost
"
)
plt
.
show
()
def
comparison_of_policies_total
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
_
,
optimal_policy
=
solver
.
solve
()
policies
=
[
optimal_policy
.
astype
(
int
),
np
.
ones
(
nSquares
,
dtype
=
int
),
np
.
ones
(
nSquares
,
dtype
=
int
)
*
2
,
np
.
ones
(
nSquares
,
dtype
=
int
)
*
3
,
np
.
random
.
randint
(
1
,
4
,
size
=
nSquares
)]
avgnTurns
=
[
empirical_results
(
layout
,
circle
,
policy
)
for
policy
in
policies
]
names
=
[
"
optimal
"
,
"
safe
"
,
"
normal
"
,
"
risky
"
,
"
random
"
]
# Creating the bar plot
plt
.
bar
(
names
,
avgnTurns
)
# Adding labels and title
plt
.
xlabel
(
"
Policy
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
title
(
"
Expected number of turns by policy
"
)
# Displaying the plot
plt
.
show
()
def
comparison_of_policies_squares
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
_
,
optimal_policy
=
solver
.
solve
()
policies
=
[
optimal_policy
.
astype
(
int
),
np
.
ones
(
nSquares
,
dtype
=
int
),
np
.
ones
(
nSquares
,
dtype
=
int
)
*
2
,
np
.
ones
(
nSquares
,
dtype
=
int
)
*
3
,
np
.
random
.
randint
(
1
,
4
,
size
=
nSquares
)]
avgnTurns
=
[
empiric_cost_of_square
(
layout
,
circle
,
policy
)
for
policy
in
policies
]
# Generating x-axis values (squares)
squares
=
np
.
arange
(
len
(
avgnTurns
[
0
]))
# Plotting both arrays on the same plot
plt
.
plot
(
squares
,
avgnTurns
[
0
],
label
=
"
Optimal
"
)
plt
.
plot
(
squares
,
avgnTurns
[
1
],
label
=
"
Safe
"
)
plt
.
plot
(
squares
,
avgnTurns
[
2
],
label
=
"
Normal
"
)
plt
.
plot
(
squares
,
avgnTurns
[
3
],
label
=
"
Risky
"
)
plt
.
plot
(
squares
,
avgnTurns
[
4
],
label
=
"
Random
"
)
plt
.
xticks
(
np
.
arange
(
0
,
len
(
avgnTurns
[
0
]),
step
=
1
))
plt
.
grid
(
True
)
plt
.
xlabel
(
"
Square
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
legend
()
plt
.
title
(
"
Expected cost for different policies
"
)
plt
.
show
()
def
make_plots
():
layout
=
[
0
,
0
,
3
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
circle
=
False
comparison_theorical_empirical
(
layout
,
circle
)
# comparison_of_policies_total(layout, circle)
# comparison_of_policies_squares(layout, circle)
make_plots
()
Ce diff est replié.
Cliquez pour l'agrandir.
validation.py
+
61
−
23
Voir le fichier @
883d5811
...
...
@@ -25,43 +25,69 @@ class validation:
self
.
random_strategy
=
[
rd
.
choice
([
0
,
1
,
2
,
3
])
for
_
in
range
(
15
)]
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
# Compute transition matrices for each dice
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
number_turns
=
[]
for
_
in
range
(
n_iterations
):
total_turns
=
0
state
=
0
# initial state
while
state
<
len
(
self
.
layout
)
-
1
:
# until goal state is reached
action
=
strategy
[
state
]
# get action according to strategy
transition_matrix
=
transition_matrices
[
int
(
action
-
1
)]
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
k
=
0
# état initial
while
k
<
len
(
self
.
layout
)
-
1
:
action
=
strategy
[
k
]
# action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index
=
int
(
action
)
-
1
transition_matrix
=
transition_matrices
[
action_index
]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs
=
transition_matrix
[
k
]
flattened_probs
/=
np
.
sum
(
flattened_probs
)
# Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
flattened_probs
)
# Mise à jour du nombre de tours en fonction de l'état actuel
if
self
.
layout
[
k
]
==
3
and
action
==
2
:
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
elif
self
.
layout
[
k
]
==
3
and
action
==
3
:
total_turns
+=
2
else
:
total_turns
+=
1
number_turns
.
append
(
total_turns
)
return
np
.
mean
(
number_turns
)
def
play_optimal_strategy
(
self
):
return
turns
def
play_optimal_strategy
(
self
,
n_iterations
=
10000
):
return
self
.
simulate_game
(
self
.
optimal_policy
,
n_iterations
)
def
play_dice_strategy
(
self
):
return
turns
def
play_dice_strategy
(
self
,
dice_choice
,
n_iterations
=
10000
):
if
dice_choice
==
'
SafeDice
'
:
strategy
=
self
.
safe_strategy
elif
dice_choice
==
'
NormalDice
'
:
strategy
=
self
.
normal_strategy
elif
dice_choice
==
'
RiskyDice
'
:
strategy
=
self
.
risky_strategy
else
:
raise
ValueError
(
"
Invalid dice choice
"
)
def
play_random_strategy
(
self
):
return
turns
return
self
.
simulate_game
(
strategy
,
n_iterations
)
def
play_random_strategy
(
self
,
n_iterations
=
10000
):
return
self
.
simulate_game
(
self
.
random_strategy
,
n_iterations
)
def
compare_strategies
(
self
,
num_games
=
1000
):
optimal_cost
=
self
.
simulate_game
(
s
trategy
=
'
Optimal
'
,
num_game
s
=
num_games
)
dice1_cost
=
self
.
simulate_game
(
s
trategy
=
'
SafeDice
'
,
num_game
s
=
num_games
)
dice2_cost
=
self
.
simulate_game
(
s
trategy
=
'
NormalDice
'
,
num_game
s
=
num_games
)
dice3_cost
=
self
.
simulate_game
(
s
trategy
=
'
RiskyDice
'
,
num_game
s
=
num_games
)
random_cost
=
self
.
simulate_game
(
s
trategy
=
'
Random
'
,
num_game
s
=
num_games
)
optimal_cost
=
self
.
simulate_game
(
s
elf
.
optimal_strategy
,
n_iteration
s
=
num_games
)
dice1_cost
=
self
.
simulate_game
(
s
elf
.
safe_strategy
,
n_iteration
s
=
num_games
)
dice2_cost
=
self
.
simulate_game
(
s
elf
.
normal_strategy
,
n_iteration
s
=
num_games
)
dice3_cost
=
self
.
simulate_game
(
s
elf
.
risky_strategy
,
n_iteration
s
=
num_games
)
random_cost
=
self
.
simulate_game
(
s
elf
.
random_strategy
,
n_iteration
s
=
num_games
)
return
{
'
Optimal
'
:
optimal_cost
,
...
...
@@ -75,11 +101,11 @@ class validation:
# Utilisation d'exemple
layout
=
[
0
,
0
,
3
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
layout
=
[
0
,
0
,
3
,
0
,
2
,
0
,
2
,
0
,
2
,
0
,
3
,
0
,
0
,
1
,
0
]
validation
=
validation
(
layout
,
circle
=
False
)
circle
=
False
# Example circle value
"""
# Create an instance of validation
validator = validation(layout, circle)
...
...
@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
print(
"
Coûts moyens :
"
)
for strategy, cost in results.items():
print
(
f
"
{
strategy
}
:
{
cost
}
"
)
print(f
"
{strategy}: {cost}
"
)
"""
optimal_cost
=
validation
.
play_optimal_strategy
(
n_iterations
=
10000
)
print
(
"
Optimal Strategy Cost:
"
,
optimal_cost
)
dice2_cost
=
validation
.
play_dice_strategy
(
'
NormalDice
'
,
n_iterations
=
10000
)
print
(
"
Normal Dice Strategy Cost:
"
,
dice2_cost
)
random_cost
=
validation
.
play_random_strategy
(
n_iterations
=
10000
)
print
(
"
Random Strategy Cost:
"
,
random_cost
)
strategy_comparison
=
validation
.
compare_strategies
(
num_games
=
10000
)
print
(
"
Strategy Comparison Results:
"
,
strategy_comparison
)
Ce diff est replié.
Cliquez pour l'agrandir.
Aperçu
0%
Chargement en cours
Veuillez réessayer
ou
joindre un nouveau fichier
.
Annuler
You are about to add
0
people
to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Enregistrer le commentaire
Annuler
Veuillez vous
inscrire
ou vous
se connecter
pour commenter