Skip to content
Extraits de code Groupes Projets
Valider 169c62f3 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update plot_results

parent c20e9247
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
100k_false/Figure_1.png

23 ko

100k_false/Figure_2.png

105 ko

100k_false/Figure_3.png

49,5 ko

10k_false/Figure_1.png

21,7 ko

10k_false/Figure_2.png

104 ko

10k_false/Figure_3.png

50,7 ko

1M_false/Figure_1.png

21,7 ko

1M_false/Figure_2.png

102 ko

1M_false/Figure_3.png

50,1 ko

import numpy as np
from tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout: list, circle: bool):
self.nSquares = 15
self.precision = 1e-9
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.matrix_safe = self.tmc_instance._compute_safe_matrix()
self.matrix_normal, self.jail_n = self.tmc_instance._compute_normal_matrix(layout, circle)
self.matrix_risky, self.jail_r = self.tmc_instance._compute_risky_matrix(layout, circle)
self.Dice = np.zeros(self.nSquares, dtype=int)
def solve(self):
ValueI = np.zeros(self.nSquares)
ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0])
i = 0
while i < 1000: # Limiter le nombre d'itérations pour éviter une boucle infinie
i += 1
# Copiez la valeur actuelle dans ValueI
np.copyto(ValueI, ValueINew)
# Mettez à jour les valeurs de ValueINew pour chaque état
for k in range(self.nSquares - 1):
ValueINew[k] = 1 + min(
np.dot(self.matrix_safe[k], ValueI),
np.dot(self.matrix_normal[k], ValueI) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueI) + np.sum(self.jail_r[k])
)
ValueINew[self.nSquares - 1] = min(
np.dot(self.matrix_safe[self.nSquares - 1], ValueI),
np.dot(self.matrix_normal[self.nSquares - 1], ValueI),
np.dot(self.matrix_risky[self.nSquares - 1], ValueI)
)
# Calculer les actions optimales (indice de l'action + 1)
for k in range(self.nSquares):
self.Dice[k] = np.argmin([
np.dot(self.matrix_safe[k], ValueINew),
np.dot(self.matrix_normal[k], ValueINew) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueINew) + np.sum(self.jail_r[k]),
]) + 1
# Vérifiez la convergence en utilisant une petite tolérance
if np.sum(np.abs(ValueINew - ValueI)) < self.precision:
break
# Retourne les valeurs finales de ValueINew et les actions optimales (Dice)
return ValueINew, self.Dice
def markovDecision(layout: list, circle: bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nStopping on the square to win")
print("Expected costs for each square:")
print(result_false[0])
print("Dice choices for each square:")
print(result_false[1])
result_true = markovDecision(layout, circle=True)
print("\nWin as soon as land on or overstep the final square")
print("Expected costs for each square:")
print(result_true[0])
print("Dice choices for each square:")
print(result_true[1])
import numpy as np
from ancien.tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
self.ValueI = np.zeros(self.Numberk)
self.DiceForStates = np.zeros(self.Numberk - 1)
def _compute_vi_safe(self, k):
return np.dot(self.safe_dice[k], self.ValueI)
def _compute_vi_normal(self, k):
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
return vi_normal
def _compute_vi_risky(self, k):
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
return vi_risky
def solve(self):
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
if ValueINew[k] == 1 + vi_safe:
self.DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal:
self.DiceForStates[k] = 2
else:
self.DiceForStates[k] = 3
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
Expec = self.ValueI[:-1]
return [Expec, self.DiceForStates]
def markovDecision(layout : list, circle : bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square")
print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win")
print(result_true)
import matplotlib.pyplot as plt
from ancien.validation import validation
import numpy as np
# Example layout and circle settings
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
# Create an instance of validation
validation_instance = validation(layout, circle)
# Plotting function for strategy comparison
def plot_strategy_comparison(num_games=1000):
strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison
plt.figure(figsize=(10, 6))
plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Strategies')
plt.ylabel('Average Cost')
plt.title('Comparison of Strategies')
plt.savefig('strategy_comparison.png') # Save the plot
plt.show()
# Plotting function for state-based average turns for all strategies on the same plot
def plot_state_based_turns(save=True):
strategies = [validation_instance.optimal_strategy,
validation_instance.safe_strategy,
validation_instance.normal_strategy,
validation_instance.risky_strategy,
validation_instance.random_strategy]
strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random']
plt.figure(figsize=(12, 6))
for strategy, name in zip(strategies, strategy_names):
mean_turns = validation_instance.simulate_state(strategy, layout, circle)
plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name)
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State for Different Strategies')
plt.grid(True)
plt.legend()
#if save:
#plt.savefig('state_based_turns_all_strategies.png') # Save the plot
plt.show()
def plot_state_based_comparison(validation_instance, num_games=1000):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison
plt.figure(figsize=(12, 6))
# Plot optimal strategy turns
plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration')
# Plot empirical strategy turns
plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical')
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State - ValueIteration vs. Empirical')
plt.grid(True)
plt.legend()
plt.show()
# Main function to generate and save plots
if __name__ == '__main__':
# Example of strategy comparison plot
plot_strategy_comparison(num_games=1000)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True)
plot_state_based_comparison(validation_instance, num_games=1000)
\ No newline at end of file
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Initialisation des matrices de transition pour les dés "safe", "normal" et "risky"
self.matrix_safe = np.zeros((15, 15))
self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15))
# Probability to go from state k to k'
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout, circle=False):
self.matrix_safe.fill(0)
self.matrix_normal.fill(0)
self.matrix_risky.fill(0)
self._compute_safe_matrix()
self._compute_normal_matrix(layout, circle)
self._compute_risky_matrix(layout, circle)
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_safe_matrix(self):
for k in range(15):
for s, p in enumerate(self.safe_dice):
if k == 9 and s == 1:
k_prime = 14
self.matrix_safe[k,k_prime] += p
elif k == 2 and s > 0:
p /= 2
k_prime = 10
self.matrix_safe[k,k_prime] += p
k_prime = 3
self.matrix_safe[k,k_prime] += p
else:
k_prime = k + s
k_prime = min(14, k_prime)
self.matrix_safe[k,k_prime] += p
return self.matrix_safe
def _compute_normal_matrix(self, layout, circle):
for k in range(15):
for s, p in enumerate(self.normal_dice):
if k == 8 and s == 2:
k_prime = 14
self.matrix_normal[k,k_prime] += p
continue
elif k == 9 and s in [1, 2]:
if not circle or s == 1:
k_prime = 14
self.matrix_normal[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
# handle the fast lane
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1) # rebalance the step before with s > 0
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
k_prime = 3 + (s - 1) # rebalance the step before with s > 0
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo
if layout[k_prime] in [1, 2]:
p /= 2
if layout[k_prime] == 1:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p
continue
self.matrix_normal[k,k_prime] += p
return self.matrix_normal
def _compute_risky_matrix(self, layout, circle):
for k in range(15):
for s, p in enumerate(self.risky_dice):
if k == 7 and s == 3:
k_prime = 14
self.matrix_risky[k,k_prime] += p
continue
elif k == 8 and s in [2, 3]:
if not circle or s == 2:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif k == 9 and s in [1, 2, 3]:
if not circle or s == 1:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif circle and s == 3:
k_prime = 1
if layout[k_prime] != 0:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
self.matrix_risky[k,k_prime] += p
continue
continue
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1)
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
else:
self.matrix_risky[k,k_prime] += p
k_prime = 3 + (s - 1)
self.matrix_risky[k,k_prime] += p
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime)
if layout[k_prime] in [1, 2]:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
continue
self.matrix_risky[k,k_prime] += p
return self.matrix_risky
def print_matrix_with_layout(self, title, matrix):
print(f"{title}:")
for i in range(matrix.shape[0]):
row_str = " | ".join(f"{matrix[i, j]:.3f}" for j in range(matrix.shape[1]))
print(row_str)
print()
# Example Usage:
layout_example = [0]*15
calculator = TransitionMatrixCalculator()
print(calculator.compute_transition_matrix(layout_example, circle=True))
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from ancien.tmc import TransitionMatrixCalculator as tmc
from ancien.markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
tmc_instance = tmc()
P_safe = tmc_instance._compute_safe_matrix()
P_normal = tmc_instance._compute_normal_matrix(layout, circle)
P_risky = tmc_instance._compute_risky_matrix(layout, circle)
transition_matrices = [P_safe, P_normal, P_risky]
number_turns = []
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_strategy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0 # état initial
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation
action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=1000):
value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
# Calculer la moyenne des tours pour chaque état
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=1000):
optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
return optimal_turns, empirical_turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
# Comparer la stratégie empirique avec la stratégie de value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécuter la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
optimal_strategy = validation_instance.optimal_strategy
mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from validation import validation from validation import Validation as Val
import numpy as np import numpy as np
# Example layout and circle settings # Example layout and circle settings
...@@ -7,11 +7,11 @@ layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] ...@@ -7,11 +7,11 @@ layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False circle = False
# Create an instance of validation # Create an instance of validation
validation_instance = validation(layout, circle) validation_instance = Val(layout, circle)
# Plotting function for strategy comparison # Plotting function for strategy comparison
def plot_strategy_comparison(num_games=1000): def plot_strategy_comparison(num_games=10000):
strategy_costs = validation_instance.compare_strategies(num_games=num_games) strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison # Bar plot for strategy comparison
...@@ -48,7 +48,7 @@ def plot_state_based_turns(save=True): ...@@ -48,7 +48,7 @@ def plot_state_based_turns(save=True):
plt.show() plt.show()
def plot_state_based_comparison(validation_instance, num_games=100000): def plot_state_based_comparison(validation_instance, num_games=10000):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison # Plotting the state-based average turns comparison
...@@ -74,9 +74,9 @@ def plot_state_based_comparison(validation_instance, num_games=100000): ...@@ -74,9 +74,9 @@ def plot_state_based_comparison(validation_instance, num_games=100000):
# Main function to generate and save plots # Main function to generate and save plots
if __name__ == '__main__': if __name__ == '__main__':
# Example of strategy comparison plot # Example of strategy comparison plot
plot_strategy_comparison(num_games=1000) plot_strategy_comparison(num_games=10000)
# Example of state-based average turns plot for all strategies on the same plot # Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True) plot_state_based_turns(save=True)
plot_state_based_comparison(validation_instance, num_games=1000) plot_state_based_comparison(validation_instance, num_games=10000)
\ No newline at end of file \ No newline at end of file
strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
  • 2-up
  • Swipe
  • Onion skin
import random as rd import random as rd
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY class Validation:
def __init__(self, layout, circle=False):
self.layout = layout self.layout = layout
self.circle = circle self.circle = circle
self.tmc_instance = tmc() self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix() self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle) solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve() self.expec, self.optimal_policy = solver.solve()
# Define all the strategy self.safe_strategy = [1] * len(layout)
self.safe_strategy = [1]*len(layout) self.normal_strategy = [2] * len(layout)
self.normal_strategy = [2]*len(layout) self.risky_strategy = [3] * len(layout)
self.risky_strategy = [3]*len(layout) self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = { self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout), 'SafeDice': [0] * len(layout),
'NormalDice': [0] * len(self.layout), 'NormalDice': [0] * len(layout),
'RiskyDice': [0] * len(self.layout) 'RiskyDice': [0] * len(layout)
} }
# Remplir les coûts pour chaque case en fonction du type de dé for i, die_type in enumerate(self.layout):
for i in range(len(self.layout)): self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
if self.layout[i] == 3: self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
def simulate_game(self, strategy, n_iterations=10000): def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
...@@ -44,23 +38,18 @@ class validation: ...@@ -44,23 +38,18 @@ class validation:
for _ in range(n_iterations): for _ in range(n_iterations):
total_turns = 0 total_turns = 0
k = 0 # état initial k = 0 # initial state
while k < len(self.layout) - 1: while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie action = strategy[k]
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1 action_index = int(action) - 1
transition_matrix = transition_matrices[action_index] transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k] flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités flattened_probs /= np.sum(flattened_probs)
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs) k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2: if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3: elif self.layout[k] == 3 and action == 3:
...@@ -73,9 +62,8 @@ class validation: ...@@ -73,9 +62,8 @@ class validation:
return np.mean(number_turns) return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000): def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
safe_dice = self.tmc_instance._compute_safe_matrix() safe_dice = self.tmc_instance._compute_safe_matrix()
normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] # Make sure to capture only the normal_dice component normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0] risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
transition_matrices = [safe_dice, normal_dice, risky_dice] transition_matrices = [safe_dice, normal_dice, risky_dice]
...@@ -87,14 +75,16 @@ class validation: ...@@ -87,14 +75,16 @@ class validation:
for state in range(len(layout) - 1): for state in range(len(layout) - 1):
total_turns = 0 total_turns = 0
k = state # starting state k = state
while k < len(layout) - 1: while k < len(layout) - 1:
action = strategy[k] # action based on strategy action = strategy[k]
action_index = int(action) - 1 action_index = int(action) - 1
transition_matrix = transition_matrices[action_index] transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k] flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs) k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2: if layout[k] == 3 and action == 2:
...@@ -110,47 +100,40 @@ class validation: ...@@ -110,47 +100,40 @@ class validation:
# calculate the average number of turns for each state # calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0) mean_turns = np.mean(number_mean, axis=0)
return mean_turns return mean_turns
def play_optimal_policy(self, n_iterations=10000): def play_optimal_policy(self, n_iterations=10000):
return self.simulate_game(self.optimal_policy, n_iterations) return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000): def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice': strategy = {
strategy = self.safe_strategy 'SafeDice': self.safe_strategy,
elif dice_choice == 'NormalDice': 'NormalDice': self.normal_strategy,
strategy = self.normal_strategy 'RiskyDice': self.risky_strategy
elif dice_choice == 'RiskyDice': }.get(dice_choice, None)
strategy = self.risky_strategy
else: if strategy is None:
raise ValueError("Invalid dice choice") raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations) return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000): def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations) return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self): def play_empirical_strategy(self):
k = 0 # état initial k = 0
total_turns = 0 total_turns = 0
while k < len(self.layout) - 1: while k < len(self.layout) - 1:
action = self.optimal_policy[k] # Utiliser la stratégie empirique pour la simulation action = self.optimal_policy[k]
action_index = int(action) - 1 action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique transition_matrix = self.normal_dice
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k] flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités flattened_probs /= np.sum(flattened_probs)
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs) k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2: if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3: elif self.layout[k] == 3 and action == 3:
...@@ -160,30 +143,24 @@ class validation: ...@@ -160,30 +143,24 @@ class validation:
return total_turns return total_turns
def compare_empirical_vs_value_iteration(self, num_games=10000):
def compare_empirical_vs_value_iteration(self, num_games=1000): value_iteration_turns = self.expec
value_iteration_turns = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
# Calculate the mean turns for each state
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=100000):
value_iteration = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
return value_iteration, empirical_turns mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=10000):
value_iteration = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
return value_iteration, empirical_turns
def compare_strategies(self, num_games=100000): def compare_strategies(self, num_games=10000):
optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
...@@ -197,72 +174,71 @@ class validation: ...@@ -197,72 +174,71 @@ class validation:
'RiskyDice': dice3_cost, 'RiskyDice': dice3_cost,
'Random': random_cost 'Random': random_cost
} }
""" """
# Utilisation d'exemple # Exemple d'utilisation
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False circle = False
validation_instance = validation(layout, circle) validation_instance = Validation(layout, circle)
# Comparaison entre la stratégie empirique et la value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
# Comparer la stratégie empirique avec la stratégie de value iteration # Affichage des moyennes de tours pour chaque état
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
num_states = len(layout) num_states = len(layout)
for state in range(num_states - 1): for state in range(num_states - 1):
print(f"État {state}:") print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécution de la stratégie empirique une fois
# Exécuter la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy() empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux # Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
optimal_cost = validation_instance.play_optimal_policy(n_iterations=10000) # Coûts des différentes stratégies
optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000)
print("Optimal Strategy Cost:", optimal_cost) print("Optimal Strategy Cost:", optimal_cost)
dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=10000) dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000)
print("Safe Dice Strategy Cost:", dice1_cost) print("Safe Dice Strategy Cost:", dice1_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000)
print("Normal Dice Strategy Cost:", dice2_cost) print("Normal Dice Strategy Cost:", dice2_cost)
dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=10000) dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000)
print("Risky Dice Strategy Cost:", dice3_cost) print("Risky Dice Strategy Cost:", dice3_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=10000) random_cost = validation_instance.play_random_strategy(n_iterations=1000000)
print("Random Strategy Cost:", random_cost) print("Random Strategy Cost:", random_cost)
strategy_comparison = validation_instance.compare_strategies(num_games=10000) # Comparaison entre les stratégies
strategy_comparison = validation_instance.compare_strategies(num_games=1000000)
print("Strategy Comparison Results:", strategy_comparison) print("Strategy Comparison Results:", strategy_comparison)
# Calcul des tours moyens pour différentes stratégies
optimal_policy = validation_instance.optimal_policy optimal_policy = validation_instance.optimal_policy
mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=10000) mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal) print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000) mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000) mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000) mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
""" """
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter