diff --git a/.DS_Store b/.DS_Store index 895bd9eae285819cae90199894867d4ed1ce3958..55d82a11e81b11f3bbb9102c10fed3bcce6672f9 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/markovDecision.py b/markovDecision.py index 6bd17bcf2acda2cb848ff7bb36a9e63761568caf..25c5df10a8e77dafcd1fff2f03375bceda329a18 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -60,6 +60,7 @@ def markovDecision(layout : list, circle : bool): # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +""" # Résolution du problème avec différents modes de jeu result_false = markovDecision(layout, circle=False) print("\nWin as soon as land on or overstep the final square") @@ -68,3 +69,4 @@ print(result_false) result_true = markovDecision(layout, circle=True) print("\nStopping on the square to win") print(result_true) +""" \ No newline at end of file diff --git a/plot.py b/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..c5483806fb07f4cb14a364e205b4476c8f6366f5 --- /dev/null +++ b/plot.py @@ -0,0 +1,82 @@ +import matplotlib.pyplot as plt +from validation import validation +import numpy as np + +# Example layout and circle settings +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False + +# Create an instance of validation +validation_instance = validation(layout, circle) + + +# Plotting function for strategy comparison +def plot_strategy_comparison(num_games=1000): + strategy_costs = validation_instance.compare_strategies(num_games=num_games) + + # Bar plot for strategy comparison + plt.figure(figsize=(10, 6)) + plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple']) + plt.xlabel('Strategies') + plt.ylabel('Average Cost') + plt.title('Comparison of Strategies') + plt.savefig('strategy_comparison.png') # Save the plot + plt.show() + +# Plotting function for state-based average turns for all strategies on the same plot +def plot_state_based_turns(save=True): + strategies = [validation_instance.optimal_strategy, + validation_instance.safe_strategy, + validation_instance.normal_strategy, + validation_instance.risky_strategy, + validation_instance.random_strategy] + strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random'] + + plt.figure(figsize=(12, 6)) + for strategy, name in zip(strategies, strategy_names): + mean_turns = validation_instance.simulate_state(strategy, layout, circle) + plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name) + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State for Different Strategies') + plt.grid(True) + plt.legend() + + #if save: + #plt.savefig('state_based_turns_all_strategies.png') # Save the plot + + plt.show() + +def plot_state_based_comparison(validation_instance, num_games=1000): + optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) + + # Plotting the state-based average turns comparison + plt.figure(figsize=(12, 6)) + + # Plot optimal strategy turns + plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration') + + # Plot empirical strategy turns + plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical') + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State - ValueIteration vs. Empirical') + plt.grid(True) + plt.legend() + + plt.show() + + + + +# Main function to generate and save plots +if __name__ == '__main__': + # Example of strategy comparison plot + plot_strategy_comparison(num_games=1000) + + # Example of state-based average turns plot for all strategies on the same plot + plot_state_based_turns(save=True) + + plot_state_based_comparison(validation_instance, num_games=1000) \ No newline at end of file diff --git a/strategy_comparison.png b/strategy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..aefb1f990b1c89957981a9281815083011fbc9d2 Binary files /dev/null and b/strategy_comparison.png differ diff --git a/validation.py b/validation.py index 85cd23151a66fa8102ab9d9f890017f3bd94dd02..ee6b922d35f05ee81565a00d93ccb85a8e768308 100644 --- a/validation.py +++ b/validation.py @@ -134,6 +134,53 @@ class validation: def play_random_strategy(self, n_iterations=10000): return self.simulate_game(self.random_strategy, n_iterations) + + def play_empirical_strategy(self): + k = 0 # état initial + total_turns = 0 + + while k < len(self.layout) - 1: + action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation + action_index = int(action) - 1 + transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique + + # Aplatir la matrice de transition en une distribution de probabilité 1D + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + + # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie + k = np.random.choice(len(self.layout), p=flattened_probs) + + # Mise à jour du nombre de tours en fonction de l'état actuel + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + return total_turns + + + def compare_empirical_vs_value_iteration(self, num_games=1000): + value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle) + empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + + # Calculer la moyenne des tours pour chaque état + mean_turns_by_state = { + 'ValueIteration': value_iteration_turns.tolist(), + 'Empirical': empirical_turns.tolist() + } + + return mean_turns_by_state + + def compare_state_based_turns(self, num_games=1000): + optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + + return optimal_turns, empirical_turns + + def compare_strategies(self, num_games=1000): optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) @@ -157,6 +204,26 @@ circle = False validation_instance = validation(layout, circle) +# Comparer la stratégie empirique avec la stratégie de value iteration +turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) + +# Imprimer les moyennes des tours pour chaque état +num_states = len(layout) +for state in range(num_states - 1): + print(f"État {state}:") + print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") + print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") + +# Exécuter la stratégie empirique une fois +empirical_strategy_result = validation_instance.play_empirical_strategy() +print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) + +# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux +comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) +print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) +print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) +""" + optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000) print("Optimal Strategy Cost:", optimal_cost) @@ -189,3 +256,5 @@ print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) random_dice_strategy = validation_instance.random_strategy mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) +""" +