Skip to content
Extraits de code Groupes Projets
Valider 40d8a99e rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update plot + validation

parent 9f373841
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
......@@ -60,6 +60,7 @@ def markovDecision(layout : list, circle : bool):
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
"""
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square")
......@@ -68,3 +69,4 @@ print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win")
print(result_true)
"""
\ No newline at end of file
import matplotlib.pyplot as plt
from validation import validation
import numpy as np
# Example layout and circle settings
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
# Create an instance of validation
validation_instance = validation(layout, circle)
# Plotting function for strategy comparison
def plot_strategy_comparison(num_games=1000):
strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison
plt.figure(figsize=(10, 6))
plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Strategies')
plt.ylabel('Average Cost')
plt.title('Comparison of Strategies')
plt.savefig('strategy_comparison.png') # Save the plot
plt.show()
# Plotting function for state-based average turns for all strategies on the same plot
def plot_state_based_turns(save=True):
strategies = [validation_instance.optimal_strategy,
validation_instance.safe_strategy,
validation_instance.normal_strategy,
validation_instance.risky_strategy,
validation_instance.random_strategy]
strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random']
plt.figure(figsize=(12, 6))
for strategy, name in zip(strategies, strategy_names):
mean_turns = validation_instance.simulate_state(strategy, layout, circle)
plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name)
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State for Different Strategies')
plt.grid(True)
plt.legend()
#if save:
#plt.savefig('state_based_turns_all_strategies.png') # Save the plot
plt.show()
def plot_state_based_comparison(validation_instance, num_games=1000):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison
plt.figure(figsize=(12, 6))
# Plot optimal strategy turns
plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration')
# Plot empirical strategy turns
plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical')
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State - ValueIteration vs. Empirical')
plt.grid(True)
plt.legend()
plt.show()
# Main function to generate and save plots
if __name__ == '__main__':
# Example of strategy comparison plot
plot_strategy_comparison(num_games=1000)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True)
plot_state_based_comparison(validation_instance, num_games=1000)
\ No newline at end of file
strategy_comparison.png

23 ko

......@@ -134,6 +134,53 @@ class validation:
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0 # état initial
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation
action_index = int(action) - 1
transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=1000):
value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
# Calculer la moyenne des tours pour chaque état
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def compare_state_based_turns(self, num_games=1000):
optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games)
return optimal_turns, empirical_turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
......@@ -157,6 +204,26 @@ circle = False
validation_instance = validation(layout, circle)
# Comparer la stratégie empirique avec la stratégie de value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
# Imprimer les moyennes des tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécuter la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
"""
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
......@@ -189,3 +256,5 @@ print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
"""
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter