Skip to content
GitLab
Explorer
Connexion
S'inscrire
Navigation principale
Rechercher ou aller à…
Projet
M
MLP1
Gestion
Activité
Membres
Labels
Programmation
Tickets
Tableaux des tickets
Jalons
Wiki
Code
Requêtes de fusion
Dépôt
Branches
Validations
Étiquettes
Graphe du dépôt
Comparer les révisions
Extraits de code
Compilation
Pipelines
Jobs
Planifications de pipeline
Artéfacts
Déploiement
Releases
Registre de paquets
Registre de conteneur
Registre de modèles
Opération
Environnements
Modules Terraform
Surveillance
Incidents
Analyse
Données d'analyse des chaînes de valeur
Analyse des contributeurs
Données d'analyse CI/CD
Données d'analyse du dépôt
Expériences du modèle
Aide
Aide
Support
Documentation de GitLab
Comparer les forfaits GitLab
Forum de la communauté
Contribuer à GitLab
Donner votre avis
Conditions générales et politique de confidentialité
Raccourcis clavier
?
Extraits de code
Groupes
Projets
Afficher davantage de fils d'Ariane
machine_learning
MLP1
Validations
883d5811
Valider
883d5811
rédigé
1 year ago
par
Adrien Payen
Parcourir les fichiers
Options
Téléchargements
Correctifs
Plain Diff
merge solution
parent
81121bf8
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Modifications
3
Masquer les modifications d'espaces
En ligne
Côte à côte
Affichage de
3 fichiers modifiés
plot.py
+0
-48
0 ajout, 48 suppressions
plot.py
simulate.py
+173
-77
173 ajouts, 77 suppressions
simulate.py
validation.py
+61
-23
61 ajouts, 23 suppressions
validation.py
avec
234 ajouts
et
148 suppressions
plot.py
supprimé
100644 → 0
+
0
−
48
Voir le fichier @
81121bf8
import
matplotlib.pyplot
as
plt
from
simulate
import
Validation
as
Val
from
tmc
import
TransitionMatrixCalculator
as
tmc
from
markovDecision
import
MarkovDecisionSolver
as
mD
import
random
as
rd
import
numpy
as
np
def
plot_results
(
layouts
,
circle
,
n_iterations
=
100
):
results_markov
=
[]
results_safe
=
[]
results_normal
=
[]
results_risky
=
[]
results_random
=
[]
for
layout
in
layouts
:
# Compute optimal policy
expec
,
policy
=
mD
(
layout
,
circle
)
# Simulate game
result_markov
=
Val
.
simulate_game
(
policy
,
layout
,
circle
,
n_iterations
)
results_markov
.
append
(
result_markov
)
result_safe
=
Val
.
simulate_game
([
1
]
*
15
,
layout
,
circle
,
n_iterations
)
results_safe
.
append
(
result_safe
)
result_normal
=
Val
.
simulate_game
([
2
]
*
15
,
layout
,
circle
,
n_iterations
)
results_normal
.
append
(
result_normal
)
result_risky
=
Val
.
simulate_game
([
3
]
*
15
,
layout
,
circle
,
n_iterations
)
results_risky
.
append
(
result_risky
)
result_random
=
Val
.
simulate_game
(
np
.
random
.
randint
(
1
,
4
,
size
=
15
),
layout
,
circle
,
n_iterations
)
results_random
.
append
(
result_random
)
# Plot the results
plt
.
figure
(
figsize
=
(
12
,
8
))
plt
.
plot
(
range
(
len
(
layouts
)),
results_markov
,
label
=
'
Markov
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_safe
,
label
=
'
Safe
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_normal
,
label
=
'
Normal
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_risky
,
label
=
'
Risky
'
)
plt
.
plot
(
range
(
len
(
layouts
)),
results_random
,
label
=
'
Random
'
)
plt
.
xticks
(
range
(
len
(
layouts
)),
range
(
len
(
layouts
)))
plt
.
xlabel
(
'
Layout number
'
,
fontsize
=
13
)
plt
.
ylabel
(
'
Average number of turns
'
,
fontsize
=
13
)
plt
.
legend
(
loc
=
'
upper left
'
,
bbox_to_anchor
=
(
1
,
1
),
ncol
=
1
)
plt
.
show
()
Ce diff est replié.
Cliquez pour l'agrandir.
simulate.py
+
173
−
77
Voir le fichier @
883d5811
from
tmc
import
TransitionMatrixCalculator
as
tmc
import
random
from
markovDecision
import
MarkovDecisionSolver
as
mD
import
random
as
rd
import
numpy
as
np
import
numpy
as
np
import
matplotlib.pyplot
as
plt
class
Validation
:
from
tmc
import
TransitionMatrixCalculator
as
tmc
def
__init__
(
self
,
layout
,
circle
=
False
):
from
markovDecision
import
MarkovDecisionSolver
self
.
layout
=
layout
self
.
circle
=
circle
nSquares
=
15
nSimul
=
10000
# Compute transition matrices using TransitionMatrixCalculator
self
.
tmc_instance
=
tmc
()
def
playOneTurn
(
diceChoice
,
curPos
,
layout
,
circle
,
prison
):
self
.
safe_dice
=
self
.
tmc_instance
.
_compute_safe_matrix
()
if
curPos
==
nSquares
-
1
:
self
.
normal_dice
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)
return
nSquares
-
1
,
False
self
.
risky_dice
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)
if
prison
:
# Solve Markov Decision Problem
return
curPos
,
False
solver
=
mD
(
self
.
layout
,
self
.
circle
)
self
.
expec
,
self
.
optimal_policy
=
solver
.
solve
()
listDiceResults
=
[
i
for
i
in
range
(
diceChoice
+
1
)]
result
=
random
.
choice
(
listDiceResults
)
# Define all the strategies
self
.
optimal_strategy
=
self
.
optimal_policy
if
curPos
==
2
and
result
!=
0
:
self
.
safe_strategy
=
[
1
]
*
15
slowLane
=
random
.
choice
([
0
,
1
])
self
.
normal_strategy
=
[
2
]
*
15
if
slowLane
:
self
.
risky_strategy
=
[
3
]
*
15
newPos
=
curPos
+
result
self
.
random_strategy
=
[
rd
.
choice
([
1
,
2
,
3
])
for
_
in
range
(
15
)]
else
:
newPos
=
curPos
+
result
+
7
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
elif
((
curPos
==
9
and
result
!=
0
)
or
(
curPos
in
[
7
,
8
,
9
]
and
curPos
+
result
>=
10
)):
# Compute transition matrices for each dice
newPos
=
curPos
+
result
+
4
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
else
:
number_turns
=
[]
newPos
=
curPos
+
result
for
_
in
range
(
n_iterations
):
if
newPos
>
nSquares
-
1
:
total_turns
=
0
if
circle
:
state
=
0
# initial state
newPos
-=
nSquares
while
state
<
len
(
self
.
layout
)
-
1
:
# until goal state is reached
else
:
action
=
strategy
[
state
]
# get action according to strategy
return
nSquares
-
1
,
True
transition_matrix
=
transition_matrices
[
int
(
action
)
-
1
]
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
newSquare
=
layout
[
newPos
]
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
if
diceChoice
==
1
:
total_turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
return
newPos
,
False
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
elif
diceChoice
==
2
:
total_turns
+=
2
newSquare
=
random
.
choice
([
0
,
newSquare
])
else
:
total_turns
+=
1
if
newSquare
==
0
:
return
newPos
,
False
# nothing happens
number_turns
.
append
(
total_turns
)
elif
newSquare
==
1
:
return
0
,
False
# back to square one
return
np
.
mean
(
number_turns
)
elif
newSquare
==
2
:
if
newPos
-
3
<
0
:
def
simulate_state
(
self
,
strategy
,
n_iterations
=
10000
):
return
0
,
False
# back to square one
# Compute transition matrices for each dice
return
newPos
-
3
,
False
# back 3 squares
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
elif
newSquare
==
3
:
number_turns
=
[]
return
newPos
,
True
# prison
elif
newSquare
==
4
:
for
_
in
range
(
n_iterations
):
newSquare
=
random
.
choice
([
1
,
2
,
3
])
turns_per_state
=
[]
if
newSquare
==
1
:
state
=
0
return
0
,
False
# back to square one
elif
newSquare
==
2
:
while
state
<
len
(
self
.
layout
)
-
1
:
if
newPos
-
3
<
0
:
total_turns
=
0
return
0
,
False
# back to square one
action
=
strategy
[
state
]
return
newPos
-
3
,
False
# back 3 squares
transition_matrix
=
transition_matrices
[
int
(
action
)
-
1
]
elif
newSquare
==
3
:
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
return
newPos
,
True
# prison
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
def
playOneGame
(
layout
,
circle
,
policy
,
start
=
0
):
total_turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
nTurns
=
0
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
curPos
=
start
total_turns
+=
2
prison
=
False
else
:
total_turns
+=
1
if
circle
:
while
curPos
!=
nSquares
-
1
:
turns_per_state
.
append
(
total_turns
)
newPos
,
prison
=
playOneTurn
(
policy
[
curPos
],
curPos
,
layout
,
circle
,
prison
)
if
newPos
>
nSquares
-
1
:
number_turns
.
append
(
turns_per_state
)
curPos
=
nSquares
-
newPos
curPos
=
newPos
return
np
.
mean
(
number_turns
,
axis
=
0
)
nTurns
+=
1
else
:
while
curPos
<
nSquares
-
1
:
newPos
,
prison
=
playOneTurn
(
policy
[
curPos
],
curPos
,
layout
,
circle
,
prison
)
curPos
=
newPos
nTurns
+=
1
return
nTurns
def
empiric_cost_of_square
(
layout
,
circle
,
policy
):
expected_costs
=
np
.
zeros
(
nSquares
)
for
start_square
in
range
(
nSquares
):
total_turns
=
0
for
_
in
range
(
nSimul
):
total_turns
+=
playOneGame
(
layout
,
circle
,
policy
,
start
=
start_square
)
expected_costs
[
start_square
]
=
total_turns
/
nSimul
return
expected_costs
def
empirical_results
(
layout
,
circle
,
policy
):
avgnTurnsPlayed
=
0
for
_
in
range
(
nSimul
):
nTurns
=
playOneGame
(
layout
,
circle
,
policy
)
avgnTurnsPlayed
+=
nTurns
return
avgnTurnsPlayed
/
nSimul
def
comparison_theorical_empirical
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
expec
,
optimal_policy
=
solver
.
solve
()
actual
=
empiric_cost_of_square
(
layout
,
circle
,
optimal_policy
.
astype
(
int
))
# Plotting both arrays on the same plot
squares
=
np
.
arange
(
len
(
expec
))
plt
.
plot
(
squares
,
expec
,
label
=
"
Theoretical cost
"
)
plt
.
plot
(
squares
,
actual
,
label
=
"
Empirical cost
"
)
plt
.
xticks
(
np
.
arange
(
0
,
len
(
expec
),
step
=
1
))
plt
.
grid
(
True
)
plt
.
xlabel
(
"
Square
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
legend
()
plt
.
title
(
"
Comparison between the expected cost and the actual cost
"
)
plt
.
show
()
def
comparison_of_policies_total
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
_
,
optimal_policy
=
solver
.
solve
()
policies
=
[
optimal_policy
.
astype
(
int
),
np
.
ones
(
nSquares
,
dtype
=
int
),
np
.
ones
(
nSquares
,
dtype
=
int
)
*
2
,
np
.
ones
(
nSquares
,
dtype
=
int
)
*
3
,
np
.
random
.
randint
(
1
,
4
,
size
=
nSquares
)]
avgnTurns
=
[
empirical_results
(
layout
,
circle
,
policy
)
for
policy
in
policies
]
names
=
[
"
optimal
"
,
"
safe
"
,
"
normal
"
,
"
risky
"
,
"
random
"
]
# Creating the bar plot
plt
.
bar
(
names
,
avgnTurns
)
# Adding labels and title
plt
.
xlabel
(
"
Policy
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
title
(
"
Expected number of turns by policy
"
)
# Displaying the plot
plt
.
show
()
def
comparison_of_policies_squares
(
layout
,
circle
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
_
,
optimal_policy
=
solver
.
solve
()
policies
=
[
optimal_policy
.
astype
(
int
),
np
.
ones
(
nSquares
,
dtype
=
int
),
np
.
ones
(
nSquares
,
dtype
=
int
)
*
2
,
np
.
ones
(
nSquares
,
dtype
=
int
)
*
3
,
np
.
random
.
randint
(
1
,
4
,
size
=
nSquares
)]
avgnTurns
=
[
empiric_cost_of_square
(
layout
,
circle
,
policy
)
for
policy
in
policies
]
# Generating x-axis values (squares)
squares
=
np
.
arange
(
len
(
avgnTurns
[
0
]))
# Plotting both arrays on the same plot
plt
.
plot
(
squares
,
avgnTurns
[
0
],
label
=
"
Optimal
"
)
plt
.
plot
(
squares
,
avgnTurns
[
1
],
label
=
"
Safe
"
)
plt
.
plot
(
squares
,
avgnTurns
[
2
],
label
=
"
Normal
"
)
plt
.
plot
(
squares
,
avgnTurns
[
3
],
label
=
"
Risky
"
)
plt
.
plot
(
squares
,
avgnTurns
[
4
],
label
=
"
Random
"
)
plt
.
xticks
(
np
.
arange
(
0
,
len
(
avgnTurns
[
0
]),
step
=
1
))
plt
.
grid
(
True
)
plt
.
xlabel
(
"
Square
"
)
plt
.
ylabel
(
"
Cost
"
)
plt
.
legend
()
plt
.
title
(
"
Expected cost for different policies
"
)
plt
.
show
()
def
make_plots
():
layout
=
[
0
,
0
,
3
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
circle
=
False
comparison_theorical_empirical
(
layout
,
circle
)
# comparison_of_policies_total(layout, circle)
# comparison_of_policies_squares(layout, circle)
make_plots
()
Ce diff est replié.
Cliquez pour l'agrandir.
validation.py
+
61
−
23
Voir le fichier @
883d5811
...
@@ -25,43 +25,69 @@ class validation:
...
@@ -25,43 +25,69 @@ class validation:
self
.
random_strategy
=
[
rd
.
choice
([
0
,
1
,
2
,
3
])
for
_
in
range
(
15
)]
self
.
random_strategy
=
[
rd
.
choice
([
0
,
1
,
2
,
3
])
for
_
in
range
(
15
)]
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
# Compute transition matrices for each dice
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
number_turns
=
[]
number_turns
=
[]
for
_
in
range
(
n_iterations
):
for
_
in
range
(
n_iterations
):
total_turns
=
0
total_turns
=
0
state
=
0
# initial state
k
=
0
# état initial
while
state
<
len
(
self
.
layout
)
-
1
:
# until goal state is reached
action
=
strategy
[
state
]
# get action according to strategy
while
k
<
len
(
self
.
layout
)
-
1
:
transition_matrix
=
transition_matrices
[
int
(
action
-
1
)]
action
=
strategy
[
k
]
# action selon la stratégie
state
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
transition_matrix
[
state
])
if
self
.
layout
[
state
]
==
3
and
action
==
2
:
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index
=
int
(
action
)
-
1
transition_matrix
=
transition_matrices
[
action_index
]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs
=
transition_matrix
[
k
]
flattened_probs
/=
np
.
sum
(
flattened_probs
)
# Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
flattened_probs
)
# Mise à jour du nombre de tours en fonction de l'état actuel
if
self
.
layout
[
k
]
==
3
and
action
==
2
:
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
elif
self
.
layout
[
state
]
==
3
and
action
==
3
:
elif
self
.
layout
[
k
]
==
3
and
action
==
3
:
total_turns
+=
2
total_turns
+=
2
else
:
else
:
total_turns
+=
1
total_turns
+=
1
number_turns
.
append
(
total_turns
)
number_turns
.
append
(
total_turns
)
return
np
.
mean
(
number_turns
)
return
np
.
mean
(
number_turns
)
def
play_optimal_strategy
(
self
):
def
play_optimal_strategy
(
self
,
n_iterations
=
10000
):
return
turns
return
self
.
simulate_game
(
self
.
optimal_policy
,
n_iterations
)
def
play_dice_strategy
(
self
):
def
play_dice_strategy
(
self
,
dice_choice
,
n_iterations
=
10000
):
return
turns
if
dice_choice
==
'
SafeDice
'
:
strategy
=
self
.
safe_strategy
elif
dice_choice
==
'
NormalDice
'
:
strategy
=
self
.
normal_strategy
elif
dice_choice
==
'
RiskyDice
'
:
strategy
=
self
.
risky_strategy
else
:
raise
ValueError
(
"
Invalid dice choice
"
)
def
play_random_strategy
(
self
):
return
self
.
simulate_game
(
strategy
,
n_iterations
)
return
turns
def
play_random_strategy
(
self
,
n_iterations
=
10000
):
return
self
.
simulate_game
(
self
.
random_strategy
,
n_iterations
)
def
compare_strategies
(
self
,
num_games
=
1000
):
def
compare_strategies
(
self
,
num_games
=
1000
):
optimal_cost
=
self
.
simulate_game
(
s
trategy
=
'
Optimal
'
,
num_game
s
=
num_games
)
optimal_cost
=
self
.
simulate_game
(
s
elf
.
optimal_strategy
,
n_iteration
s
=
num_games
)
dice1_cost
=
self
.
simulate_game
(
s
trategy
=
'
SafeDice
'
,
num_game
s
=
num_games
)
dice1_cost
=
self
.
simulate_game
(
s
elf
.
safe_strategy
,
n_iteration
s
=
num_games
)
dice2_cost
=
self
.
simulate_game
(
s
trategy
=
'
NormalDice
'
,
num_game
s
=
num_games
)
dice2_cost
=
self
.
simulate_game
(
s
elf
.
normal_strategy
,
n_iteration
s
=
num_games
)
dice3_cost
=
self
.
simulate_game
(
s
trategy
=
'
RiskyDice
'
,
num_game
s
=
num_games
)
dice3_cost
=
self
.
simulate_game
(
s
elf
.
risky_strategy
,
n_iteration
s
=
num_games
)
random_cost
=
self
.
simulate_game
(
s
trategy
=
'
Random
'
,
num_game
s
=
num_games
)
random_cost
=
self
.
simulate_game
(
s
elf
.
random_strategy
,
n_iteration
s
=
num_games
)
return
{
return
{
'
Optimal
'
:
optimal_cost
,
'
Optimal
'
:
optimal_cost
,
...
@@ -75,11 +101,11 @@ class validation:
...
@@ -75,11 +101,11 @@ class validation:
# Utilisation d'exemple
# Utilisation d'exemple
layout
=
[
0
,
0
,
3
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
layout
=
[
0
,
0
,
3
,
0
,
2
,
0
,
2
,
0
,
2
,
0
,
3
,
0
,
0
,
1
,
0
]
validation
=
validation
(
layout
,
circle
=
False
)
validation
=
validation
(
layout
,
circle
=
False
)
circle
=
False
# Example circle value
circle
=
False
# Example circle value
"""
# Create an instance of validation
# Create an instance of validation
validator = validation(layout, circle)
validator = validation(layout, circle)
...
@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
...
@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
results = validation.compare_strategies(num_games=10000)
print(
"
Coûts moyens :
"
)
print(
"
Coûts moyens :
"
)
for strategy, cost in results.items():
for strategy, cost in results.items():
print
(
f
"
{
strategy
}
:
{
cost
}
"
)
print(f
"
{strategy}: {cost}
"
)
"""
optimal_cost
=
validation
.
play_optimal_strategy
(
n_iterations
=
10000
)
print
(
"
Optimal Strategy Cost:
"
,
optimal_cost
)
dice2_cost
=
validation
.
play_dice_strategy
(
'
NormalDice
'
,
n_iterations
=
10000
)
print
(
"
Normal Dice Strategy Cost:
"
,
dice2_cost
)
random_cost
=
validation
.
play_random_strategy
(
n_iterations
=
10000
)
print
(
"
Random Strategy Cost:
"
,
random_cost
)
strategy_comparison
=
validation
.
compare_strategies
(
num_games
=
10000
)
print
(
"
Strategy Comparison Results:
"
,
strategy_comparison
)
Ce diff est replié.
Cliquez pour l'agrandir.
Aperçu
0%
Chargement en cours
Veuillez réessayer
ou
joindre un nouveau fichier
.
Annuler
You are about to add
0
people
to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Enregistrer le commentaire
Annuler
Veuillez vous
inscrire
ou vous
se connecter
pour commenter