diff --git a/markovDecision.py b/markovDecision.py index 5b6e73db540ebea4b0bee80730d5436fb738d299..c87b43332bb524e9fe5e038c9fb74081fe94facd 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -1,79 +1,79 @@ import numpy as np from tmc import TransitionMatrixCalculator as tmc -class MarkovDecisionSolver: +class MarkovDecisionProcess : def __init__(self, layout: list, circle: bool): + # Initialize the Markov Decision Process solver with layout and game mode (circle or not) self.Numberk = 15 self.tmc_instance = tmc() + + # Compute transition matrices for safe, normal, and risky scenarios self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component - self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) # Make sure to capture only the risky_dice component + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) + + # Identify jail states in the layout self.jail = [i for i, x in enumerate(layout) if x == 3] + + # Initialize value and dice decision arrays self.ValueI = np.zeros(self.Numberk) - self.DiceForStates = np.zeros(self.Numberk - 1) + self.Dice = np.zeros(self.Numberk - 1) - def _compute_vi_safe(self, k): + def _compute_vi_safe(self, k : int ): + # Compute the expected value using safe dice transition matrix for state k return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) - def _compute_vi_normal(self, k): + def _compute_vi_normal(self, k : int ): + # Compute the expected value using normal dice transition matrix for state k vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) return vi_normal - def _compute_vi_risky(self, k): + def _compute_vi_risky(self, k : int ): + # Compute the expected value using risky dice transition matrix for state k vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) return vi_risky def solve(self): + # Iteratively solve the Markov Decision Process until convergence i = 0 while True: ValueINew = np.zeros(self.Numberk) i += 1 for k in range(self.Numberk - 1): + # Compute expected values for safe, normal, and risky decisions at state k vi_safe = self._compute_vi_safe(k) vi_normal = self._compute_vi_normal(k) vi_risky = self._compute_vi_risky(k) - # Compute the minimum value among vi_safe, vi_normal, and vi_risky + # Determine the minimum value among safe, normal, and risky decisions min_value = min(vi_safe, vi_normal, vi_risky) - # Find which index (safe, normal, or risky) corresponds to the minimum value + # Record the dice decision (safe=1, normal=2, risky=3) corresponding to the minimum value if min_value == vi_safe: ValueINew[k] = 1 + vi_safe - self.DiceForStates[k] = 1 + self.Dice[k] = 1 elif min_value == vi_normal: ValueINew[k] = 1 + vi_normal - self.DiceForStates[k] = 2 + self.Dice[k] = 2 else: ValueINew[k] = 1 + vi_risky - self.DiceForStates[k] = 3 - + self.Dice[k] = 3 + # Check for convergence if np.allclose(ValueINew, self.ValueI): self.ValueI = ValueINew break self.ValueI = ValueINew + # Return the expected values and dice decisions for each state Expec = self.ValueI[:-1] - return [Expec, self.DiceForStates] + return [Expec, self.Dice] def markovDecision(layout : list, circle : bool): - solver = MarkovDecisionSolver(layout, circle) - return solver.solve() - -""" -# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - - -# Résolution du problème avec différents modes de jeu -result_false = markovDecision(layout, circle=False) -print("\nWin as soon as land on or overstep the final square") -print(result_false) - -result_true = markovDecision(layout, circle=True) -print("\nStopping on the square to win") -print(result_true)""" \ No newline at end of file + # Solve the Markov Decision Problem for the given layout and game mode + solver = MarkovDecisionProcess(layout, circle) + return solver.solve() \ No newline at end of file diff --git a/plot.py b/plot.py index d84149546fac9a9fac0dc55711825368bae7b652..dd9d81a94f56c6565128e3e98c01ef73fe16aeaa 100644 --- a/plot.py +++ b/plot.py @@ -2,29 +2,24 @@ import matplotlib.pyplot as plt from validation import Validation as Val import numpy as np -# Example layout and circle settings -layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -circle = False -# Create an instance of validation -validation_instance = Val(layout, circle) - - -# Plotting function for strategy comparison -def plot_strategy_comparison(num_games=10000): +def plot_strategy_comparison(num_games : int): + """Plot a bar chart comparing average costs of different strategies over specified number of games.""" + + # Compare strategies and get their costs strategy_costs = validation_instance.compare_strategies(num_games=num_games) - # Bar plot for strategy comparison + # Plotting the bar chart plt.figure(figsize=(10, 6)) plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple']) plt.xlabel('Strategies') plt.ylabel('Average Cost') plt.title('Comparison of Strategies') - plt.savefig('strategy_comparison.png') # Save the plot plt.show() -# Plotting function for state-based average turns for all strategies on the same plot -def plot_state_based_turns(save=True): + +def plot_state_based_turns(): + """Plot the average number of turns per state for different strategies.""" strategies = [validation_instance.optimal_policy, validation_instance.safe_strategy, validation_instance.normal_strategy, @@ -33,8 +28,9 @@ def plot_state_based_turns(save=True): strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random'] plt.figure(figsize=(12, 6)) + # Simulate and plot average turns for each strategy for strategy, name in zip(strategies, strategy_names): - mean_turns = validation_instance.simulate_state(strategy, layout, circle) + mean_turns = validation_instance.simulate_state(strategy, layout, circle, num_games) plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name) plt.xlabel('State') @@ -42,13 +38,35 @@ def plot_state_based_turns(save=True): plt.title('Average Turns per State for Different Strategies') plt.grid(True) plt.legend() + plt.show() + + +def plot_state_based_comparison(num_games_list): + """Plot a comparison between optimal turns and empirical turns per state for different num_games.""" + plt.figure(figsize=(12, 6)) # Create a single figure for all plots + + optimal_turns = None # Initialize optimal_turns to None + + for num_games in num_games_list: + _, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) + + # Plotting empirical turns per state for the current num_games + plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label=f'Empirical (num_games={num_games})') - #if save: - #plt.savefig('state_based_turns_all_strategies.png') # Save the plot + if optimal_turns is None: + # Only fetch optimal_turns once (for the first num_games) + optimal_turns, _ = validation_instance.compare_state_based_turns(num_games=num_games) + plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label=f'ValueIteration') + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State - ValueIteration vs. Empirical') + plt.grid(True) + plt.legend() plt.show() -def plot_state_based_comparison(validation_instance, num_games=10000): + +def plot_state_based_comparison_once(num_games : int): optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) # Plotting the state-based average turns comparison @@ -70,13 +88,23 @@ def plot_state_based_comparison(validation_instance, num_games=10000): - -# Main function to generate and save plots if __name__ == '__main__': - # Example of strategy comparison plot - plot_strategy_comparison(num_games=10000) - # Example of state-based average turns plot for all strategies on the same plot - plot_state_based_turns(save=True) + ##### Paramètres ##### + + # Define the layout of the game board + layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + # Indicates whether the board is circular or linear + circle = False + # Number of games to simulate + num_games = 10000 + # Initialize Validation instance with the specified layout and circle type + validation_instance = Val(layout, circle) + + ##### Launch Plots ##### - plot_state_based_comparison(validation_instance, num_games=10000) \ No newline at end of file + # Run the defined plotting functions with specified parameters + plot_strategy_comparison(num_games) + plot_state_based_turns() + plot_state_based_comparison(num_games_list = [10, 100, 1000]) + plot_state_based_comparison_once(num_games) \ No newline at end of file diff --git a/strategy_comparison.png b/strategy_comparison.png index bcabdaa67b4519b11c1a7cfcc24ba2a35e5327d1..cf7d61d9ba5784ff527eee96c0de4ca903563ab4 100644 Binary files a/strategy_comparison.png and b/strategy_comparison.png differ diff --git a/tmc.py b/tmc.py index cb3b32ba32a97c12a4ca16bab29bb09d94babedc..73b496ee05a5f81bb58c708d8078229bb09173fc 100644 --- a/tmc.py +++ b/tmc.py @@ -2,12 +2,14 @@ import numpy as np class TransitionMatrixCalculator: def __init__(self): + # Initialize the size of the transition matrices self.size = 15 self.matrix_safe = np.zeros((self.size , self.size )) self.matrix_normal = np.zeros((self.size , self.size )) self.matrix_risky = np.zeros((self.size , self.size )) - def compute_transition_matrix(self, layout, circle=False): + def compute_transition_matrix(self, layout : list , circle : bool): + # Compute transition matrices for safe, normal, and risky scenarios self.matrix_safe = self._compute_safe_matrix() self.matrix_normal, _ = self._compute_normal_matrix(layout, circle) self.matrix_risky, _ = self._compute_risky_matrix(layout, circle) @@ -16,11 +18,12 @@ class TransitionMatrixCalculator: def _compute_safe_matrix(self): + # Compute transition matrix for safe scenario p = np.zeros((self.size ,self.size )) for k in range(self.size - 1): if k == 2: - p[k,k+1] = 1/4 # slow lane - p[k,k+8] = 1/4 # fast lane + p[k,k+1] = 1/4 + p[k,k+8] = 1/4 elif k == 9: p[k,k+5] = 1/2 else: @@ -29,14 +32,15 @@ class TransitionMatrixCalculator: p[self.size -1,self.size -1] = 1 return p - def _compute_normal_matrix(self, layout, circle=False): + def _compute_normal_matrix(self, layout : list , circle : bool): + # Compute transition matrix for normal scenario p = np.zeros((self.size ,self.size )) jail = np.zeros((self.size ,self.size )) for k in range(self.size - 1): if k == 2: - p[k,k+1:k+3] = 1/6 # slow lane # slow lane - p[k,k+8:k+10] = 1/6 # fast lane # fast lane + p[k,k+1:k+3] = 1/6 + p[k,k+8:k+10] = 1/6 elif k == 8: p[k,k+1] = 1/3 p[k,k+6] = 1/3 @@ -73,14 +77,15 @@ class TransitionMatrixCalculator: p[self.size -1,self.size -1] = 1 return p, jail - def _compute_risky_matrix(self, layout, circle=False): + def _compute_risky_matrix(self, layout : list , circle : bool): + # Compute transition matrix for risky scenario p = np.zeros((self.size ,self.size )) jail = np.zeros((self.size ,self.size )) for k in range(self.size -1): if k == 2: - p[k,k+1:k+4] = 1/8 # slow lane - p[k,k+8:k+11] = 1/8 # fast lane + p[k,k+1:k+4] = 1/8 + p[k,k+8:k+11] = 1/8 elif k == 7: p[k,k+1:k+3] = 1/4 p[k,k+7] = 1/4 @@ -131,20 +136,4 @@ class TransitionMatrixCalculator: jail[k,j] = p[k,j] p[self.size -1,self.size-1] = 1 - return p, jail - -""" - def display_matrices(self): - print("Safe Matrix:") - print(self.matrix_safe) - print("\nNormal Matrix:") - print(self.matrix_normal) - print("\nRisky Matrix:") - print(self.matrix_risky) - -# Example Usage: -layout_example = [0]*15 -calculator = TransitionMatrixCalculator() -calculator.compute_transition_matrix(layout_example, circle=True) -calculator.display_matrices() -""" \ No newline at end of file + return p, jail \ No newline at end of file diff --git a/validation.py b/validation.py index 02e656b03b8a06c2bc8a1f9fb14f3b0c7a958904..7e72b5e1340e6042451f7018ac1624e56f972c68 100644 --- a/validation.py +++ b/validation.py @@ -1,44 +1,55 @@ import random as rd import numpy as np from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD - +from markovDecision import MarkovDecisionProcess as mD +# Class for performing validation and simulation class Validation: - def __init__(self, layout, circle=False): + def __init__(self, layout : list, circle : bool): + # Initialize with layout and circle configuration self.layout = layout self.circle = circle + + # Initialize TransitionMatrixCalculator instance for transition matrix computation self.tmc_instance = tmc() + + # Compute transition matrices for safe, normal, and risky dice self.safe_dice = self.tmc_instance._compute_safe_matrix() self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) + # Use MarkovDecisionSolver to find optimal policy and expected costs solver = mD(self.layout, self.circle) self.expec, self.optimal_policy = solver.solve() + # Predefined strategies for different dice types self.safe_strategy = [1] * len(layout) self.normal_strategy = [2] * len(layout) self.risky_strategy = [3] * len(layout) self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))] + # Dictionary to store costs by dice type self.costs_by_dice_type = { 'SafeDice': [0] * len(layout), 'NormalDice': [0] * len(layout), 'RiskyDice': [0] * len(layout) } - for i, die_type in enumerate(self.layout): + # Assign costs based on dice type to the respective lists in the dictionary + for i, die_type in enumerate(self.layout) : self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0 self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0 self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0 - def simulate_game(self, strategy, n_iterations=10000): + + def simulate_game(self, strategy: list, n_iterations: int): + """Simulate the game using a given strategy over multiple iterations.""" transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] - number_turns = [] + total_turns = np.zeros(n_iterations) - for _ in range(n_iterations): - total_turns = 0 - k = 0 # initial state + for i in range(n_iterations): + k = 0 + turns = 0 while k < len(self.layout) - 1: action = strategy[k] @@ -50,32 +61,34 @@ class Validation: k = np.random.choice(len(self.layout), p=flattened_probs) - if self.layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[k] == 3 and action == 3: - total_turns += 2 + if self.layout[k] == 3: + if action == 2: + turns += np.random.choice([1, 2], p=[0.5, 0.5]) + elif action == 3: + turns += 2 else: - total_turns += 1 + turns += 1 + + total_turns[i] = turns - number_turns.append(total_turns) + return np.mean(total_turns) - return np.mean(number_turns) - def simulate_state(self, strategy, layout, circle, n_iterations=10000): + def simulate_state(self, strategy: list, layout: list, circle: bool, n_iterations: int): + """Simulate game states using a given strategy.""" safe_dice = self.tmc_instance._compute_safe_matrix() normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0] transition_matrices = [safe_dice, normal_dice, risky_dice] - number_turns = [] - number_mean = [] + total_turns = [] for _ in range(n_iterations): - number_turns = [] + state_turns = np.zeros(len(layout) - 1) # Utiliser un tableau numpy pour stocker les tours par état for state in range(len(layout) - 1): - total_turns = 0 k = state + turns = 0 while k < len(layout) - 1: action = strategy[k] @@ -87,25 +100,27 @@ class Validation: k = np.random.choice(len(layout), p=flattened_probs) - if layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif layout[k] == 3 and action == 3: - total_turns += 2 + if layout[k] == 3: + if action == 2: + turns += np.random.choice([1, 2], p=[0.5, 0.5]) # Utiliser numpy pour la randomisation + elif action == 3: + turns += 2 else: - total_turns += 1 + turns += 1 - number_turns.append(total_turns) + state_turns[state] = turns - number_mean.append(number_turns) + total_turns.append(state_turns) - # calculate the average number of turns for each state - mean_turns = np.mean(number_mean, axis=0) + mean_turns = np.mean(total_turns, axis=0) return mean_turns - def play_optimal_policy(self, n_iterations=10000): + def play_optimal_policy(self, n_iterations : int): + """Play using the optimal policy for a number of iterations.""" return self.simulate_game(self.optimal_policy, n_iterations) - def play_dice_strategy(self, dice_choice, n_iterations=10000): + def play_dice_strategy(self, dice_choice, n_iterations : int): + """Play using a specific dice strategy for a number of iterations.""" strategy = { 'SafeDice': self.safe_strategy, 'NormalDice': self.normal_strategy, @@ -117,33 +132,13 @@ class Validation: return self.simulate_game(strategy, n_iterations) - def play_random_strategy(self, n_iterations=10000): + def play_random_strategy(self, n_iterations : int ): + """Play using a random strategy for a number of iterations.""" return self.simulate_game(self.random_strategy, n_iterations) - def play_empirical_strategy(self): - k = 0 - total_turns = 0 - - while k < len(self.layout) - 1: - action = self.optimal_policy[k] - action_index = int(action) - 1 - transition_matrix = self.normal_dice - - flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) - k = np.random.choice(len(self.layout), p=flattened_probs) - - if self.layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[k] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - return total_turns - - def compare_empirical_vs_value_iteration(self, num_games=10000): + def compare_empirical_vs_value_iteration(self, num_games : int): + """Compare expected value iteration turns with empirical turns.""" value_iteration_turns = self.expec empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) @@ -153,14 +148,29 @@ class Validation: } return mean_turns_by_state + + def empirical_cost_of_square(self, strategy: list, n_iterations: int): + """Calculate the empirical cost of a square for a given strategy.""" + total_square_costs = [] + + for _ in range(n_iterations): + game_cost = self.simulate_game(strategy, 1) + square_cost = game_cost ** 2 + total_square_costs.append(square_cost) + + empirical_cost = np.mean(total_square_costs) + return empirical_cost - def compare_state_based_turns(self, num_games=10000): + + def compare_state_based_turns(self, num_games : int ): + # Compare the expected turns from value iteration with empirical state-based turns value_iteration = self.expec empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) return value_iteration, empirical_turns - def compare_strategies(self, num_games=10000): + def compare_strategies(self, num_games : int): + # Compare the costs of different strategies over a number of games optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) @@ -174,71 +184,3 @@ class Validation: 'RiskyDice': dice3_cost, 'Random': random_cost } - -""" -# Exemple d'utilisation -layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -circle = False -validation_instance = Validation(layout, circle) - -# Comparaison entre la stratégie empirique et la value iteration -turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) - -# Affichage des moyennes de tours pour chaque état -num_states = len(layout) -for state in range(num_states - 1): - print(f"État {state}:") - print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") - print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") - -# Exécution de la stratégie empirique une fois -empirical_strategy_result = validation_instance.play_empirical_strategy() -print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) - -# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux -comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) -print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) -print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) - -# Coûts des différentes stratégies -optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000) -print("Optimal Strategy Cost:", optimal_cost) - -dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000) -print("Safe Dice Strategy Cost:", dice1_cost) - -dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000) -print("Normal Dice Strategy Cost:", dice2_cost) - -dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000) -print("Risky Dice Strategy Cost:", dice3_cost) - -random_cost = validation_instance.play_random_strategy(n_iterations=1000000) -print("Random Strategy Cost:", random_cost) - -# Comparaison entre les stratégies -strategy_comparison = validation_instance.compare_strategies(num_games=1000000) -print("Strategy Comparison Results:", strategy_comparison) - -# Calcul des tours moyens pour différentes stratégies -optimal_policy = validation_instance.optimal_policy -mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000) -print("Mean Turns for Optimal Strategy:", mean_turns_optimal) - -safe_dice_strategy = validation_instance.safe_strategy -mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000) -print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) - -normal_dice_strategy = validation_instance.normal_strategy -mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000) -print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) - -risky_dice_strategy = validation_instance.risky_strategy -mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000) -print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) - -random_dice_strategy = validation_instance.random_strategy -mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000) -print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) - -""" \ No newline at end of file