Skip to content
GitLab
Explorer
Connexion
S'inscrire
Navigation principale
Rechercher ou aller à…
Projet
M
MLP1
Gestion
Activité
Membres
Labels
Programmation
Tickets
Tableaux des tickets
Jalons
Wiki
Code
Requêtes de fusion
Dépôt
Branches
Validations
Étiquettes
Graphe du dépôt
Comparer les révisions
Extraits de code
Compilation
Pipelines
Jobs
Planifications de pipeline
Artéfacts
Déploiement
Releases
Registre de paquets
Registre de conteneur
Registre de modèles
Opération
Environnements
Modules Terraform
Surveillance
Incidents
Analyse
Données d'analyse des chaînes de valeur
Analyse des contributeurs
Données d'analyse CI/CD
Données d'analyse du dépôt
Expériences du modèle
Aide
Aide
Support
Documentation de GitLab
Comparer les forfaits GitLab
Forum de la communauté
Contribuer à GitLab
Donner votre avis
Conditions générales et politique de confidentialité
Raccourcis clavier
?
Extraits de code
Groupes
Projets
Afficher davantage de fils d'Ariane
machine_learning
MLP1
Validations
749713a6
Valider
749713a6
rédigé
1 year ago
par
Adrien Payen
Parcourir les fichiers
Options
Téléchargements
Correctifs
Plain Diff
comments code .py
parent
169c62f3
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Modifications
5
Masquer les modifications d'espaces
En ligne
Côte à côte
Affichage de
5 fichiers modifiés
markovDecision.py
+30
-30
30 ajouts, 30 suppressions
markovDecision.py
plot.py
+52
-24
52 ajouts, 24 suppressions
plot.py
strategy_comparison.png
+0
-0
0 ajout, 0 suppression
strategy_comparison.png
tmc.py
+15
-26
15 ajouts, 26 suppressions
tmc.py
validation.py
+68
-126
68 ajouts, 126 suppressions
validation.py
avec
165 ajouts
et
206 suppressions
markovDecision.py
+
30
−
30
Voir le fichier @
749713a6
import
numpy
as
np
from
tmc
import
TransitionMatrixCalculator
as
tmc
class
MarkovDecision
Solver
:
class
MarkovDecision
Process
:
def
__init__
(
self
,
layout
:
list
,
circle
:
bool
):
# Initialize the Markov Decision Process solver with layout and game mode (circle or not)
self
.
Numberk
=
15
self
.
tmc_instance
=
tmc
()
# Compute transition matrices for safe, normal, and risky scenarios
self
.
safe_dice
=
self
.
tmc_instance
.
_compute_safe_matrix
()
self
.
normal_dice
,
_
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)
# Make sure to capture only the normal_dice component
self
.
risky_dice
,
_
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)
# Make sure to capture only the risky_dice component
self
.
normal_dice
,
_
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)
self
.
risky_dice
,
_
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)
# Identify jail states in the layout
self
.
jail
=
[
i
for
i
,
x
in
enumerate
(
layout
)
if
x
==
3
]
# Initialize value and dice decision arrays
self
.
ValueI
=
np
.
zeros
(
self
.
Numberk
)
self
.
Dice
ForStates
=
np
.
zeros
(
self
.
Numberk
-
1
)
self
.
Dice
=
np
.
zeros
(
self
.
Numberk
-
1
)
def
_compute_vi_safe
(
self
,
k
):
def
_compute_vi_safe
(
self
,
k
:
int
):
# Compute the expected value using safe dice transition matrix for state k
return
np
.
dot
(
self
.
safe_dice
[
k
],
self
.
ValueI
)
+
np
.
sum
(
self
.
normal_dice
[
k
][
self
.
jail
])
def
_compute_vi_normal
(
self
,
k
):
def
_compute_vi_normal
(
self
,
k
:
int
):
# Compute the expected value using normal dice transition matrix for state k
vi_normal
=
np
.
dot
(
self
.
normal_dice
[
k
],
self
.
ValueI
)
+
np
.
sum
(
self
.
normal_dice
[
k
][
self
.
jail
])
return
vi_normal
def
_compute_vi_risky
(
self
,
k
):
def
_compute_vi_risky
(
self
,
k
:
int
):
# Compute the expected value using risky dice transition matrix for state k
vi_risky
=
np
.
dot
(
self
.
risky_dice
[
k
],
self
.
ValueI
)
+
np
.
sum
(
self
.
risky_dice
[
k
][
self
.
jail
])
return
vi_risky
def
solve
(
self
):
# Iteratively solve the Markov Decision Process until convergence
i
=
0
while
True
:
ValueINew
=
np
.
zeros
(
self
.
Numberk
)
i
+=
1
for
k
in
range
(
self
.
Numberk
-
1
):
# Compute expected values for safe, normal, and risky decisions at state k
vi_safe
=
self
.
_compute_vi_safe
(
k
)
vi_normal
=
self
.
_compute_vi_normal
(
k
)
vi_risky
=
self
.
_compute_vi_risky
(
k
)
#
Comput
e the minimum value among
vi_
safe,
vi_
normal, and
vi_
risky
#
Determin
e the minimum value among safe, normal, and risky
decisions
min_value
=
min
(
vi_safe
,
vi_normal
,
vi_risky
)
#
Find which index
(safe, normal
, or
risky) correspond
s
to the minimum value
#
Record the dice decision
(safe
=1
, normal
=2,
risky
=3
) correspond
ing
to the minimum value
if
min_value
==
vi_safe
:
ValueINew
[
k
]
=
1
+
vi_safe
self
.
Dice
ForStates
[
k
]
=
1
self
.
Dice
[
k
]
=
1
elif
min_value
==
vi_normal
:
ValueINew
[
k
]
=
1
+
vi_normal
self
.
Dice
ForStates
[
k
]
=
2
self
.
Dice
[
k
]
=
2
else
:
ValueINew
[
k
]
=
1
+
vi_risky
self
.
DiceForStates
[
k
]
=
3
self
.
Dice
[
k
]
=
3
# Check for convergence
if
np
.
allclose
(
ValueINew
,
self
.
ValueI
):
self
.
ValueI
=
ValueINew
break
self
.
ValueI
=
ValueINew
# Return the expected values and dice decisions for each state
Expec
=
self
.
ValueI
[:
-
1
]
return
[
Expec
,
self
.
Dice
ForStates
]
return
[
Expec
,
self
.
Dice
]
def
markovDecision
(
layout
:
list
,
circle
:
bool
):
solver
=
MarkovDecisionSolver
(
layout
,
circle
)
return
solver
.
solve
()
"""
# Exemple d
'
utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print(
"
\n
Win as soon as land on or overstep the final square
"
)
print(result_false)
result_true = markovDecision(layout, circle=True)
print(
"
\n
Stopping on the square to win
"
)
print(result_true)
"""
\ No newline at end of file
# Solve the Markov Decision Problem for the given layout and game mode
solver
=
MarkovDecisionProcess
(
layout
,
circle
)
return
solver
.
solve
()
\ No newline at end of file
Ce diff est replié.
Cliquez pour l'agrandir.
plot.py
+
52
−
24
Voir le fichier @
749713a6
...
...
@@ -2,29 +2,24 @@ import matplotlib.pyplot as plt
from
validation
import
Validation
as
Val
import
numpy
as
np
# Example layout and circle settings
layout
=
[
0
,
0
,
3
,
0
,
2
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
circle
=
False
# Create an instance of validation
validation_instance
=
Val
(
layout
,
circle
)
# Plotting function for strategy comparison
def
plot_strategy_comparison
(
num_games
=
10000
):
def
plot_strategy_comparison
(
num_games
:
int
):
"""
Plot a bar chart comparing average costs of different strategies over specified number of games.
"""
# Compare strategies and get their costs
strategy_costs
=
validation_instance
.
compare_strategies
(
num_games
=
num_games
)
#
Bar plot for strategy comparison
#
Plotting the bar chart
plt
.
figure
(
figsize
=
(
10
,
6
))
plt
.
bar
(
strategy_costs
.
keys
(),
strategy_costs
.
values
(),
color
=
[
'
blue
'
,
'
green
'
,
'
orange
'
,
'
red
'
,
'
purple
'
])
plt
.
xlabel
(
'
Strategies
'
)
plt
.
ylabel
(
'
Average Cost
'
)
plt
.
title
(
'
Comparison of Strategies
'
)
plt
.
savefig
(
'
strategy_comparison.png
'
)
# Save the plot
plt
.
show
()
# Plotting function for state-based average turns for all strategies on the same plot
def
plot_state_based_turns
(
save
=
True
):
def
plot_state_based_turns
():
"""
Plot the average number of turns per state for different strategies.
"""
strategies
=
[
validation_instance
.
optimal_policy
,
validation_instance
.
safe_strategy
,
validation_instance
.
normal_strategy
,
...
...
@@ -33,8 +28,9 @@ def plot_state_based_turns(save=True):
strategy_names
=
[
'
Optimal
'
,
'
SafeDice
'
,
'
NormalDice
'
,
'
RiskyDice
'
,
'
Random
'
]
plt
.
figure
(
figsize
=
(
12
,
6
))
# Simulate and plot average turns for each strategy
for
strategy
,
name
in
zip
(
strategies
,
strategy_names
):
mean_turns
=
validation_instance
.
simulate_state
(
strategy
,
layout
,
circle
)
mean_turns
=
validation_instance
.
simulate_state
(
strategy
,
layout
,
circle
,
num_games
)
plt
.
plot
(
range
(
len
(
mean_turns
)),
mean_turns
,
marker
=
'
o
'
,
linestyle
=
'
-
'
,
label
=
name
)
plt
.
xlabel
(
'
State
'
)
...
...
@@ -42,13 +38,35 @@ def plot_state_based_turns(save=True):
plt
.
title
(
'
Average Turns per State for Different Strategies
'
)
plt
.
grid
(
True
)
plt
.
legend
()
plt
.
show
()
def
plot_state_based_comparison
(
num_games_list
):
"""
Plot a comparison between optimal turns and empirical turns per state for different num_games.
"""
plt
.
figure
(
figsize
=
(
12
,
6
))
# Create a single figure for all plots
optimal_turns
=
None
# Initialize optimal_turns to None
for
num_games
in
num_games_list
:
_
,
empirical_turns
=
validation_instance
.
compare_state_based_turns
(
num_games
=
num_games
)
# Plotting empirical turns per state for the current num_games
plt
.
plot
(
range
(
len
(
empirical_turns
)),
empirical_turns
,
marker
=
'
x
'
,
linestyle
=
'
-
'
,
label
=
f
'
Empirical (num_games=
{
num_games
}
)
'
)
#if save:
#plt.savefig('state_based_turns_all_strategies.png') # Save the plot
if
optimal_turns
is
None
:
# Only fetch optimal_turns once (for the first num_games)
optimal_turns
,
_
=
validation_instance
.
compare_state_based_turns
(
num_games
=
num_games
)
plt
.
plot
(
range
(
len
(
optimal_turns
)),
optimal_turns
,
marker
=
'
o
'
,
linestyle
=
'
-
'
,
label
=
f
'
ValueIteration
'
)
plt
.
xlabel
(
'
State
'
)
plt
.
ylabel
(
'
Average Turns
'
)
plt
.
title
(
'
Average Turns per State - ValueIteration vs. Empirical
'
)
plt
.
grid
(
True
)
plt
.
legend
()
plt
.
show
()
def
plot_state_based_comparison
(
validation_instance
,
num_games
=
10000
):
def
plot_state_based_comparison_once
(
num_games
:
int
):
optimal_turns
,
empirical_turns
=
validation_instance
.
compare_state_based_turns
(
num_games
=
num_games
)
# Plotting the state-based average turns comparison
...
...
@@ -70,13 +88,23 @@ def plot_state_based_comparison(validation_instance, num_games=10000):
# Main function to generate and save plots
if
__name__
==
'
__main__
'
:
# Example of strategy comparison plot
plot_strategy_comparison
(
num_games
=
10000
)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns
(
save
=
True
)
##### Paramètres #####
# Define the layout of the game board
layout
=
[
0
,
0
,
3
,
0
,
0
,
0
,
2
,
0
,
0
,
0
,
3
,
0
,
0
,
1
,
0
]
# Indicates whether the board is circular or linear
circle
=
False
# Number of games to simulate
num_games
=
10000
# Initialize Validation instance with the specified layout and circle type
validation_instance
=
Val
(
layout
,
circle
)
##### Launch Plots #####
plot_state_based_comparison
(
validation_instance
,
num_games
=
10000
)
\ No newline at end of file
# Run the defined plotting functions with specified parameters
plot_strategy_comparison
(
num_games
)
plot_state_based_turns
()
plot_state_based_comparison
(
num_games_list
=
[
10
,
100
,
1000
])
plot_state_based_comparison_once
(
num_games
)
\ No newline at end of file
Ce diff est replié.
Cliquez pour l'agrandir.
strategy_comparison.png
+
0
−
0
Voir le fichier remplacé @
169c62f3
Voir le fichier @
749713a6
21,7 ko
|
W:
|
H:
21,7 ko
|
W:
|
H:
2-up
Swipe
Onion skin
Ce diff est replié.
Cliquez pour l'agrandir.
tmc.py
+
15
−
26
Voir le fichier @
749713a6
...
...
@@ -2,12 +2,14 @@ import numpy as np
class
TransitionMatrixCalculator
:
def
__init__
(
self
):
# Initialize the size of the transition matrices
self
.
size
=
15
self
.
matrix_safe
=
np
.
zeros
((
self
.
size
,
self
.
size
))
self
.
matrix_normal
=
np
.
zeros
((
self
.
size
,
self
.
size
))
self
.
matrix_risky
=
np
.
zeros
((
self
.
size
,
self
.
size
))
def
compute_transition_matrix
(
self
,
layout
,
circle
=
False
):
def
compute_transition_matrix
(
self
,
layout
:
list
,
circle
:
bool
):
# Compute transition matrices for safe, normal, and risky scenarios
self
.
matrix_safe
=
self
.
_compute_safe_matrix
()
self
.
matrix_normal
,
_
=
self
.
_compute_normal_matrix
(
layout
,
circle
)
self
.
matrix_risky
,
_
=
self
.
_compute_risky_matrix
(
layout
,
circle
)
...
...
@@ -16,11 +18,12 @@ class TransitionMatrixCalculator:
def
_compute_safe_matrix
(
self
):
# Compute transition matrix for safe scenario
p
=
np
.
zeros
((
self
.
size
,
self
.
size
))
for
k
in
range
(
self
.
size
-
1
):
if
k
==
2
:
p
[
k
,
k
+
1
]
=
1
/
4
# slow lane
p
[
k
,
k
+
8
]
=
1
/
4
# fast lane
p
[
k
,
k
+
1
]
=
1
/
4
p
[
k
,
k
+
8
]
=
1
/
4
elif
k
==
9
:
p
[
k
,
k
+
5
]
=
1
/
2
else
:
...
...
@@ -29,14 +32,15 @@ class TransitionMatrixCalculator:
p
[
self
.
size
-
1
,
self
.
size
-
1
]
=
1
return
p
def
_compute_normal_matrix
(
self
,
layout
,
circle
=
False
):
def
_compute_normal_matrix
(
self
,
layout
:
list
,
circle
:
bool
):
# Compute transition matrix for normal scenario
p
=
np
.
zeros
((
self
.
size
,
self
.
size
))
jail
=
np
.
zeros
((
self
.
size
,
self
.
size
))
for
k
in
range
(
self
.
size
-
1
):
if
k
==
2
:
p
[
k
,
k
+
1
:
k
+
3
]
=
1
/
6
# slow lane # slow lane
p
[
k
,
k
+
8
:
k
+
10
]
=
1
/
6
# fast lane # fast lane
p
[
k
,
k
+
1
:
k
+
3
]
=
1
/
6
p
[
k
,
k
+
8
:
k
+
10
]
=
1
/
6
elif
k
==
8
:
p
[
k
,
k
+
1
]
=
1
/
3
p
[
k
,
k
+
6
]
=
1
/
3
...
...
@@ -73,14 +77,15 @@ class TransitionMatrixCalculator:
p
[
self
.
size
-
1
,
self
.
size
-
1
]
=
1
return
p
,
jail
def
_compute_risky_matrix
(
self
,
layout
,
circle
=
False
):
def
_compute_risky_matrix
(
self
,
layout
:
list
,
circle
:
bool
):
# Compute transition matrix for risky scenario
p
=
np
.
zeros
((
self
.
size
,
self
.
size
))
jail
=
np
.
zeros
((
self
.
size
,
self
.
size
))
for
k
in
range
(
self
.
size
-
1
):
if
k
==
2
:
p
[
k
,
k
+
1
:
k
+
4
]
=
1
/
8
# slow lane
p
[
k
,
k
+
8
:
k
+
11
]
=
1
/
8
# fast lane
p
[
k
,
k
+
1
:
k
+
4
]
=
1
/
8
p
[
k
,
k
+
8
:
k
+
11
]
=
1
/
8
elif
k
==
7
:
p
[
k
,
k
+
1
:
k
+
3
]
=
1
/
4
p
[
k
,
k
+
7
]
=
1
/
4
...
...
@@ -131,20 +136,4 @@ class TransitionMatrixCalculator:
jail
[
k
,
j
]
=
p
[
k
,
j
]
p
[
self
.
size
-
1
,
self
.
size
-
1
]
=
1
return
p
,
jail
"""
def display_matrices(self):
print(
"
Safe Matrix:
"
)
print(self.matrix_safe)
print(
"
\n
Normal Matrix:
"
)
print(self.matrix_normal)
print(
"
\n
Risky Matrix:
"
)
print(self.matrix_risky)
# Example Usage:
layout_example = [0]*15
calculator = TransitionMatrixCalculator()
calculator.compute_transition_matrix(layout_example, circle=True)
calculator.display_matrices()
"""
\ No newline at end of file
return
p
,
jail
\ No newline at end of file
Ce diff est replié.
Cliquez pour l'agrandir.
validation.py
+
68
−
126
Voir le fichier @
749713a6
import
random
as
rd
import
numpy
as
np
from
tmc
import
TransitionMatrixCalculator
as
tmc
from
markovDecision
import
MarkovDecisionSolver
as
mD
from
markovDecision
import
MarkovDecisionProcess
as
mD
# Class for performing validation and simulation
class
Validation
:
def
__init__
(
self
,
layout
,
circle
=
False
):
def
__init__
(
self
,
layout
:
list
,
circle
:
bool
):
# Initialize with layout and circle configuration
self
.
layout
=
layout
self
.
circle
=
circle
# Initialize TransitionMatrixCalculator instance for transition matrix computation
self
.
tmc_instance
=
tmc
()
# Compute transition matrices for safe, normal, and risky dice
self
.
safe_dice
=
self
.
tmc_instance
.
_compute_safe_matrix
()
self
.
normal_dice
,
_
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)
self
.
risky_dice
,
_
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)
# Use MarkovDecisionSolver to find optimal policy and expected costs
solver
=
mD
(
self
.
layout
,
self
.
circle
)
self
.
expec
,
self
.
optimal_policy
=
solver
.
solve
()
# Predefined strategies for different dice types
self
.
safe_strategy
=
[
1
]
*
len
(
layout
)
self
.
normal_strategy
=
[
2
]
*
len
(
layout
)
self
.
risky_strategy
=
[
3
]
*
len
(
layout
)
self
.
random_strategy
=
[
rd
.
choice
([
0
,
1
,
2
,
3
])
for
_
in
range
(
len
(
layout
))]
# Dictionary to store costs by dice type
self
.
costs_by_dice_type
=
{
'
SafeDice
'
:
[
0
]
*
len
(
layout
),
'
NormalDice
'
:
[
0
]
*
len
(
layout
),
'
RiskyDice
'
:
[
0
]
*
len
(
layout
)
}
for
i
,
die_type
in
enumerate
(
self
.
layout
):
# Assign costs based on dice type to the respective lists in the dictionary
for
i
,
die_type
in
enumerate
(
self
.
layout
)
:
self
.
costs_by_dice_type
[
'
SafeDice
'
][
i
]
=
1
if
die_type
==
3
else
0
self
.
costs_by_dice_type
[
'
NormalDice
'
][
i
]
=
2
if
die_type
==
3
else
0
self
.
costs_by_dice_type
[
'
RiskyDice
'
][
i
]
=
3
if
die_type
==
3
else
0
def
simulate_game
(
self
,
strategy
,
n_iterations
=
10000
):
def
simulate_game
(
self
,
strategy
:
list
,
n_iterations
:
int
):
"""
Simulate the game using a given strategy over multiple iterations.
"""
transition_matrices
=
[
self
.
safe_dice
,
self
.
normal_dice
,
self
.
risky_dice
]
number
_turns
=
[]
total
_turns
=
np
.
zeros
(
n_iterations
)
for
_
in
range
(
n_iterations
):
total_turns
=
0
k
=
0
# initial state
for
i
in
range
(
n_iterations
):
k
=
0
turns
=
0
while
k
<
len
(
self
.
layout
)
-
1
:
action
=
strategy
[
k
]
...
...
@@ -50,32 +61,34 @@ class Validation:
k
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
flattened_probs
)
if
self
.
layout
[
k
]
==
3
and
action
==
2
:
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
elif
self
.
layout
[
k
]
==
3
and
action
==
3
:
total_turns
+=
2
if
self
.
layout
[
k
]
==
3
:
if
action
==
2
:
turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
elif
action
==
3
:
turns
+=
2
else
:
total_turns
+=
1
turns
+=
1
total_turns
[
i
]
=
turns
number_turns
.
append
(
total_turns
)
return
np
.
mean
(
total_turns
)
return
np
.
mean
(
number_turns
)
def
simulate_state
(
self
,
strategy
,
layout
,
circle
,
n_iterations
=
10000
):
def
simulate_state
(
self
,
strategy
:
list
,
layout
:
list
,
circle
:
bool
,
n_iterations
:
int
):
"""
Simulate game states using a given strategy.
"""
safe_dice
=
self
.
tmc_instance
.
_compute_safe_matrix
()
normal_dice
=
self
.
tmc_instance
.
_compute_normal_matrix
(
layout
,
circle
)[
0
]
risky_dice
=
self
.
tmc_instance
.
_compute_risky_matrix
(
layout
,
circle
)[
0
]
transition_matrices
=
[
safe_dice
,
normal_dice
,
risky_dice
]
number_turns
=
[]
number_mean
=
[]
total_turns
=
[]
for
_
in
range
(
n_iterations
):
number_t
ur
n
s
=
[]
state_turns
=
np
.
zeros
(
len
(
layout
)
-
1
)
# Utiliser un tableau numpy pour stocker les to
urs
par état
for
state
in
range
(
len
(
layout
)
-
1
):
total_turns
=
0
k
=
state
turns
=
0
while
k
<
len
(
layout
)
-
1
:
action
=
strategy
[
k
]
...
...
@@ -87,25 +100,27 @@ class Validation:
k
=
np
.
random
.
choice
(
len
(
layout
),
p
=
flattened_probs
)
if
layout
[
k
]
==
3
and
action
==
2
:
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
elif
layout
[
k
]
==
3
and
action
==
3
:
total_turns
+=
2
if
layout
[
k
]
==
3
:
if
action
==
2
:
turns
+=
np
.
random
.
choice
([
1
,
2
],
p
=
[
0.5
,
0.5
])
# Utiliser numpy pour la randomisation
elif
action
==
3
:
turns
+=
2
else
:
total_
turns
+=
1
turns
+=
1
number_turns
.
append
(
total_
turns
)
state_turns
[
state
]
=
turns
number_mean
.
append
(
number
_turns
)
total_turns
.
append
(
state
_turns
)
# calculate the average number of turns for each state
mean_turns
=
np
.
mean
(
number_mean
,
axis
=
0
)
mean_turns
=
np
.
mean
(
total_turns
,
axis
=
0
)
return
mean_turns
def
play_optimal_policy
(
self
,
n_iterations
=
10000
):
def
play_optimal_policy
(
self
,
n_iterations
:
int
):
"""
Play using the optimal policy for a number of iterations.
"""
return
self
.
simulate_game
(
self
.
optimal_policy
,
n_iterations
)
def
play_dice_strategy
(
self
,
dice_choice
,
n_iterations
=
10000
):
def
play_dice_strategy
(
self
,
dice_choice
,
n_iterations
:
int
):
"""
Play using a specific dice strategy for a number of iterations.
"""
strategy
=
{
'
SafeDice
'
:
self
.
safe_strategy
,
'
NormalDice
'
:
self
.
normal_strategy
,
...
...
@@ -117,33 +132,13 @@ class Validation:
return
self
.
simulate_game
(
strategy
,
n_iterations
)
def
play_random_strategy
(
self
,
n_iterations
=
10000
):
def
play_random_strategy
(
self
,
n_iterations
:
int
):
"""
Play using a random strategy for a number of iterations.
"""
return
self
.
simulate_game
(
self
.
random_strategy
,
n_iterations
)
def
play_empirical_strategy
(
self
):
k
=
0
total_turns
=
0
while
k
<
len
(
self
.
layout
)
-
1
:
action
=
self
.
optimal_policy
[
k
]
action_index
=
int
(
action
)
-
1
transition_matrix
=
self
.
normal_dice
flattened_probs
=
transition_matrix
[
k
]
flattened_probs
/=
np
.
sum
(
flattened_probs
)
k
=
np
.
random
.
choice
(
len
(
self
.
layout
),
p
=
flattened_probs
)
if
self
.
layout
[
k
]
==
3
and
action
==
2
:
total_turns
+=
1
if
np
.
random
.
uniform
(
0
,
1
)
<
0.5
else
2
elif
self
.
layout
[
k
]
==
3
and
action
==
3
:
total_turns
+=
2
else
:
total_turns
+=
1
return
total_turns
def
compare_empirical_vs_value_iteration
(
self
,
num_games
=
10000
):
def
compare_empirical_vs_value_iteration
(
self
,
num_games
:
int
):
"""
Compare expected value iteration turns with empirical turns.
"""
value_iteration_turns
=
self
.
expec
empirical_turns
=
self
.
simulate_state
(
self
.
optimal_policy
,
self
.
layout
,
self
.
circle
,
n_iterations
=
num_games
)
...
...
@@ -153,14 +148,29 @@ class Validation:
}
return
mean_turns_by_state
def
empirical_cost_of_square
(
self
,
strategy
:
list
,
n_iterations
:
int
):
"""
Calculate the empirical cost of a square for a given strategy.
"""
total_square_costs
=
[]
for
_
in
range
(
n_iterations
):
game_cost
=
self
.
simulate_game
(
strategy
,
1
)
square_cost
=
game_cost
**
2
total_square_costs
.
append
(
square_cost
)
empirical_cost
=
np
.
mean
(
total_square_costs
)
return
empirical_cost
def
compare_state_based_turns
(
self
,
num_games
=
10000
):
def
compare_state_based_turns
(
self
,
num_games
:
int
):
# Compare the expected turns from value iteration with empirical state-based turns
value_iteration
=
self
.
expec
empirical_turns
=
self
.
simulate_state
(
self
.
optimal_policy
,
self
.
layout
,
self
.
circle
,
n_iterations
=
num_games
)
return
value_iteration
,
empirical_turns
def
compare_strategies
(
self
,
num_games
=
10000
):
def
compare_strategies
(
self
,
num_games
:
int
):
# Compare the costs of different strategies over a number of games
optimal_cost
=
self
.
simulate_game
(
self
.
optimal_policy
,
n_iterations
=
num_games
)
dice1_cost
=
self
.
simulate_game
(
self
.
safe_strategy
,
n_iterations
=
num_games
)
dice2_cost
=
self
.
simulate_game
(
self
.
normal_strategy
,
n_iterations
=
num_games
)
...
...
@@ -174,71 +184,3 @@ class Validation:
'
RiskyDice
'
:
dice3_cost
,
'
Random
'
:
random_cost
}
"""
# Exemple d
'
utilisation
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = Validation(layout, circle)
# Comparaison entre la stratégie empirique et la value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
# Affichage des moyennes de tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f
"
État {state}:
"
)
print(f
"
ValueIteration - Tours moyens : {turns_by_state[
'
ValueIteration
'
][state]:.2f}
"
)
print(f
"
Empirical - Tours moyens : {turns_by_state[
'
Empirical
'
][state]:.2f}
"
)
# Exécution de la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print(
"
Coût de la stratégie empirique sur un tour :
"
, empirical_strategy_result)
# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
print(
"
Coût moyen de la stratégie de value iteration :
"
, comparison_result[
'
ValueIteration
'
])
print(
"
Coût moyen de la stratégie empirique :
"
, comparison_result[
'
Empirical
'
])
# Coûts des différentes stratégies
optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000)
print(
"
Optimal Strategy Cost:
"
, optimal_cost)
dice1_cost = validation_instance.play_dice_strategy(
'
SafeDice
'
, n_iterations=1000000)
print(
"
Safe Dice Strategy Cost:
"
, dice1_cost)
dice2_cost = validation_instance.play_dice_strategy(
'
NormalDice
'
, n_iterations=1000000)
print(
"
Normal Dice Strategy Cost:
"
, dice2_cost)
dice3_cost = validation_instance.play_dice_strategy(
'
RiskyDice
'
, n_iterations=1000000)
print(
"
Risky Dice Strategy Cost:
"
, dice3_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=1000000)
print(
"
Random Strategy Cost:
"
, random_cost)
# Comparaison entre les stratégies
strategy_comparison = validation_instance.compare_strategies(num_games=1000000)
print(
"
Strategy Comparison Results:
"
, strategy_comparison)
# Calcul des tours moyens pour différentes stratégies
optimal_policy = validation_instance.optimal_policy
mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000)
print(
"
Mean Turns for Optimal Strategy:
"
, mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000)
print(
"
Mean Turns for Safe Dice Strategy:
"
, mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000)
print(
"
Mean Turns for Normal Dice Strategy:
"
, mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000)
print(
"
Mean Turns for Risky Dice Strategy:
"
, mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000)
print(
"
Mean Turns for Random Dice Strategy:
"
, mean_turns_random_dice)
"""
\ No newline at end of file
Ce diff est replié.
Cliquez pour l'agrandir.
Aperçu
0%
Chargement en cours
Veuillez réessayer
ou
joindre un nouveau fichier
.
Annuler
You are about to add
0
people
to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Enregistrer le commentaire
Annuler
Veuillez vous
inscrire
ou vous
se connecter
pour commenter