Skip to content
Extraits de code Groupes Projets
Valider 169c62f3 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update plot_results

parent c20e9247
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
100k_false/Figure_1.png

23 ko

100k_false/Figure_2.png

105 ko

100k_false/Figure_3.png

49,5 ko

10k_false/Figure_1.png

21,7 ko

10k_false/Figure_2.png

104 ko

10k_false/Figure_3.png

50,7 ko

1M_false/Figure_1.png

21,7 ko

1M_false/Figure_2.png

102 ko

1M_false/Figure_3.png

50,1 ko

import numpy as np
from tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout: list, circle: bool):
self.nSquares = 15
self.precision = 1e-9
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.matrix_safe = self.tmc_instance._compute_safe_matrix()
self.matrix_normal, self.jail_n = self.tmc_instance._compute_normal_matrix(layout, circle)
self.matrix_risky, self.jail_r = self.tmc_instance._compute_risky_matrix(layout, circle)
self.Dice = np.zeros(self.nSquares, dtype=int)
def solve(self):
ValueI = np.zeros(self.nSquares)
ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0])
i = 0
while i < 1000: # Limiter le nombre d'itérations pour éviter une boucle infinie
i += 1
# Copiez la valeur actuelle dans ValueI
np.copyto(ValueI, ValueINew)
# Mettez à jour les valeurs de ValueINew pour chaque état
for k in range(self.nSquares - 1):
ValueINew[k] = 1 + min(
np.dot(self.matrix_safe[k], ValueI),
np.dot(self.matrix_normal[k], ValueI) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueI) + np.sum(self.jail_r[k])
)
ValueINew[self.nSquares - 1] = min(
np.dot(self.matrix_safe[self.nSquares - 1], ValueI),
np.dot(self.matrix_normal[self.nSquares - 1], ValueI),
np.dot(self.matrix_risky[self.nSquares - 1], ValueI)
)
# Calculer les actions optimales (indice de l'action + 1)
for k in range(self.nSquares):
self.Dice[k] = np.argmin([
np.dot(self.matrix_safe[k], ValueINew),
np.dot(self.matrix_normal[k], ValueINew) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueINew) + np.sum(self.jail_r[k]),
]) + 1
# Vérifiez la convergence en utilisant une petite tolérance
if np.sum(np.abs(ValueINew - ValueI)) < self.precision:
break
# Retourne les valeurs finales de ValueINew et les actions optimales (Dice)
return ValueINew, self.Dice
def markovDecision(layout: list, circle: bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nStopping on the square to win")
print("Expected costs for each square:")
print(result_false[0])
print("Dice choices for each square:")
print(result_false[1])
result_true = markovDecision(layout, circle=True)
print("\nWin as soon as land on or overstep the final square")
print("Expected costs for each square:")
print(result_true[0])
print("Dice choices for each square:")
print(result_true[1])
import numpy as np
from ancien.tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
self.ValueI = np.zeros(self.Numberk)
self.DiceForStates = np.zeros(self.Numberk - 1)
def _compute_vi_safe(self, k):
return np.dot(self.safe_dice[k], self.ValueI)
def _compute_vi_normal(self, k):
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
return vi_normal
def _compute_vi_risky(self, k):
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
return vi_risky
def solve(self):
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
if ValueINew[k] == 1 + vi_safe:
self.DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal:
self.DiceForStates[k] = 2
else:
self.DiceForStates[k] = 3
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
Expec = self.ValueI[:-1]
return [Expec, self.DiceForStates]
def markovDecision(layout : list, circle : bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square")
print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win")
print(result_true)
import matplotlib.pyplot as plt
from ancien.validation import validation
import numpy as np
# Example layout and circle settings
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
# Create an instance of validation
validation_instance = validation(layout, circle)
# Plotting function for strategy comparison
def plot_strategy_comparison(num_games=1000):
strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison
plt.figure(figsize=(10, 6))
plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Strategies')
plt.ylabel('Average Cost')
plt.title('Comparison of Strategies')
plt.savefig('strategy_comparison.png') # Save the plot
plt.show()
# Plotting function for state-based average turns for all strategies on the same plot
def plot_state_based_turns(save=True):
strategies = [validation_instance.optimal_strategy,
validation_instance.safe_strategy,
validation_instance.normal_strategy,
validation_instance.risky_strategy,
validation_instance.random_strategy]
strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random']
plt.figure(figsize=(12, 6))
for strategy, name in zip(strategies, strategy_names):
mean_turns = validation_instance.simulate_state(strategy, layout, circle)
plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name)
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State for Different Strategies')
plt.grid(True)
plt.legend()
#if save:
#plt.savefig('state_based_turns_all_strategies.png') # Save the plot
plt.show()
def plot_state_based_comparison(validation_instance, num_games=1000):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison
plt.figure(figsize=(12, 6))
# Plot optimal strategy turns
plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration')
# Plot empirical strategy turns
plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical')
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State - ValueIteration vs. Empirical')
plt.grid(True)
plt.legend()
plt.show()
# Main function to generate and save plots
if __name__ == '__main__':
# Example of strategy comparison plot
plot_strategy_comparison(num_games=1000)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True)
plot_state_based_comparison(validation_instance, num_games=1000)
\ No newline at end of file
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Initialisation des matrices de transition pour les dés "safe", "normal" et "risky"
self.matrix_safe = np.zeros((15, 15))
self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15))
# Probability to go from state k to k'
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout, circle=False):
self.matrix_safe.fill(0)
self.matrix_normal.fill(0)
self.matrix_risky.fill(0)
self._compute_safe_matrix()
self._compute_normal_matrix(layout, circle)
self._compute_risky_matrix(layout, circle)
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_safe_matrix(self):
for k in range(15):
for s, p in enumerate(self.safe_dice):
if k == 9 and s == 1:
k_prime = 14
self.matrix_safe[k,k_prime] += p
elif k == 2 and s > 0:
p /= 2
k_prime = 10
self.matrix_safe[k,k_prime] += p
k_prime = 3
self.matrix_safe[k,k_prime] += p
else:
k_prime = k + s
k_prime = min(14, k_prime)
self.matrix_safe[k,k_prime] += p
return self.matrix_safe
def _compute_normal_matrix(self, layout, circle):
for k in range(15):
for s, p in enumerate(self.normal_dice):
if k == 8 and s == 2:
k_prime = 14
self.matrix_normal[k,k_prime] += p
continue
elif k == 9 and s in [1, 2]:
if not circle or s == 1:
k_prime = 14
self.matrix_normal[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
# handle the fast lane
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1) # rebalance the step before with s > 0
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
k_prime = 3 + (s - 1) # rebalance the step before with s > 0
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo
if layout[k_prime] in [1, 2]:
p /= 2
if layout[k_prime] == 1:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p
continue
self.matrix_normal[k,k_prime] += p
return self.matrix_normal
def _compute_risky_matrix(self, layout, circle):
for k in range(15):
for s, p in enumerate(self.risky_dice):
if k == 7 and s == 3:
k_prime = 14
self.matrix_risky[k,k_prime] += p
continue
elif k == 8 and s in [2, 3]:
if not circle or s == 2:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif k == 9 and s in [1, 2, 3]:
if not circle or s == 1:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif circle and s == 3:
k_prime = 1
if layout[k_prime] != 0:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
self.matrix_risky[k,k_prime] += p
continue
continue
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1)
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
else:
self.matrix_risky[k,k_prime] += p
k_prime = 3 + (s - 1)
self.matrix_risky[k,k_prime] += p
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime)
if layout[k_prime] in [1, 2]:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
continue
self.matrix_risky[k,k_prime] += p
return self.matrix_risky
def print_matrix_with_layout(self, title, matrix):
print(f"{title}:")
for i in range(matrix.shape[0]):
row_str = " | ".join(f"{matrix[i, j]:.3f}" for j in range(matrix.shape[1]))
print(row_str)
print()
# Example Usage:
layout_example = [0]*15
calculator = TransitionMatrixCalculator()
print(calculator.compute_transition_matrix(layout_example, circle=True))
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from ancien.tmc import TransitionMatrixCalculator as tmc
from ancien.markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
tmc_instance = tmc()
P_safe = tmc_instance._compute_safe_matrix()
P_normal = tmc_instance._compute_normal_matrix(layout, circle)
P_risky = tmc_instance._compute_risky_matrix(layout, circle)
transition_matrices = [P_safe, P_normal, P_risky]
number_turns = []
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_strategy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0 # état initial
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation
action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=1000):
value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
# Calculer la moyenne des tours pour chaque état
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=1000):
optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
return optimal_turns, empirical_turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
# Comparer la stratégie empirique avec la stratégie de value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécuter la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
optimal_strategy = validation_instance.optimal_strategy
mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
import matplotlib.pyplot as plt
from validation import validation
from validation import Validation as Val
import numpy as np
# Example layout and circle settings
......@@ -7,11 +7,11 @@ layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
# Create an instance of validation
validation_instance = validation(layout, circle)
validation_instance = Val(layout, circle)
# Plotting function for strategy comparison
def plot_strategy_comparison(num_games=1000):
def plot_strategy_comparison(num_games=10000):
strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison
......@@ -48,7 +48,7 @@ def plot_state_based_turns(save=True):
plt.show()
def plot_state_based_comparison(validation_instance, num_games=100000):
def plot_state_based_comparison(validation_instance, num_games=10000):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison
......@@ -74,9 +74,9 @@ def plot_state_based_comparison(validation_instance, num_games=100000):
# Main function to generate and save plots
if __name__ == '__main__':
# Example of strategy comparison plot
plot_strategy_comparison(num_games=1000)
plot_strategy_comparison(num_games=10000)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True)
plot_state_based_comparison(validation_instance, num_games=1000)
\ No newline at end of file
plot_state_based_comparison(validation_instance, num_games=10000)
\ No newline at end of file
strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
  • 2-up
  • Swipe
  • Onion skin
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
class Validation:
def __init__(self, layout, circle=False):
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
self.safe_strategy = [1] * len(layout)
self.normal_strategy = [2] * len(layout)
self.risky_strategy = [3] * len(layout)
self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
'SafeDice': [0] * len(layout),
'NormalDice': [0] * len(layout),
'RiskyDice': [0] * len(layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
for i, die_type in enumerate(self.layout):
self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
......@@ -44,23 +38,18 @@ class validation:
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
k = 0 # initial state
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action = strategy[k]
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
flattened_probs /= np.sum(flattened_probs)
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
......@@ -73,9 +62,8 @@ class validation:
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
safe_dice = self.tmc_instance._compute_safe_matrix()
normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] # Make sure to capture only the normal_dice component
normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
transition_matrices = [safe_dice, normal_dice, risky_dice]
......@@ -87,14 +75,16 @@ class validation:
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
k = state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action = strategy[k]
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
......@@ -110,47 +100,40 @@ class validation:
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_policy(self, n_iterations=10000):
return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
strategy = {
'SafeDice': self.safe_strategy,
'NormalDice': self.normal_strategy,
'RiskyDice': self.risky_strategy
}.get(dice_choice, None)
if strategy is None:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0 # état initial
k = 0
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_policy[k] # Utiliser la stratégie empirique pour la simulation
action = self.optimal_policy[k]
action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique
transition_matrix = self.normal_dice
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
flattened_probs /= np.sum(flattened_probs)
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
......@@ -160,30 +143,24 @@ class validation:
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=1000):
value_iteration_turns = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
# Calculate the mean turns for each state
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=100000):
value_iteration = self.expec
def compare_empirical_vs_value_iteration(self, num_games=10000):
value_iteration_turns = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
return value_iteration, empirical_turns
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=10000):
value_iteration = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
return value_iteration, empirical_turns
def compare_strategies(self, num_games=100000):
def compare_strategies(self, num_games=10000):
optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
......@@ -197,72 +174,71 @@ class validation:
'RiskyDice': dice3_cost,
'Random': random_cost
}
"""
# Utilisation d'exemple
# Exemple d'utilisation
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
validation_instance = Validation(layout, circle)
# Comparaison entre la stratégie empirique et la value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
# Comparer la stratégie empirique avec la stratégie de value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
# Affichage des moyennes de tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécuter la stratégie empirique une fois
# Exécution de la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
optimal_cost = validation_instance.play_optimal_policy(n_iterations=10000)
# Coûts des différentes stratégies
optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000)
print("Optimal Strategy Cost:", optimal_cost)
dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=10000)
dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000)
print("Safe Dice Strategy Cost:", dice1_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000)
print("Normal Dice Strategy Cost:", dice2_cost)
dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=10000)
dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000)
print("Risky Dice Strategy Cost:", dice3_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
random_cost = validation_instance.play_random_strategy(n_iterations=1000000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
# Comparaison entre les stratégies
strategy_comparison = validation_instance.compare_strategies(num_games=1000000)
print("Strategy Comparison Results:", strategy_comparison)
# Calcul des tours moyens pour différentes stratégies
optimal_policy = validation_instance.optimal_policy
mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=10000)
mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
"""
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter