Newer
Older
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
tmc_instance = tmc()
P_safe = tmc_instance._compute_safe_matrix()
P_normal = tmc_instance._compute_normal_matrix(layout, circle)
P_risky = tmc_instance._compute_risky_matrix(layout, circle)
transition_matrices = [P_safe, P_normal, P_risky]
number_turns = []
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_strategy, n_iterations)
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
optimal_strategy = validation_instance.optimal_strategy
mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)