Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from ancien.tmc import TransitionMatrixCalculator as tmc
from ancien.markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
tmc_instance = tmc()
P_safe = tmc_instance._compute_safe_matrix()
P_normal = tmc_instance._compute_normal_matrix(layout, circle)
P_risky = tmc_instance._compute_risky_matrix(layout, circle)
transition_matrices = [P_safe, P_normal, P_risky]
number_turns = []
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_strategy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0 # état initial
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation
action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=1000):
value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
# Calculer la moyenne des tours pour chaque état
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=1000):
optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
return optimal_turns, empirical_turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
# Comparer la stratégie empirique avec la stratégie de value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécuter la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
optimal_strategy = validation_instance.optimal_strategy
mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)