diff --git a/100k_false/Figure_1.png b/100k_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..31946fbc5b2b23f1e2593c0f375dbadaba451293 Binary files /dev/null and b/100k_false/Figure_1.png differ diff --git a/100k_false/Figure_2.png b/100k_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..0283979c5bcbb4307a04becca47af171d1b82354 Binary files /dev/null and b/100k_false/Figure_2.png differ diff --git a/100k_false/Figure_3.png b/100k_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..45512984bee3f6cfef0249501251ffcc2c8a62c9 Binary files /dev/null and b/100k_false/Figure_3.png differ diff --git a/10k_false/Figure_1.png b/10k_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bcabdaa67b4519b11c1a7cfcc24ba2a35e5327d1 Binary files /dev/null and b/10k_false/Figure_1.png differ diff --git a/10k_false/Figure_2.png b/10k_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..61902c78045f28a7a9257f9c3951ee1acb754d36 Binary files /dev/null and b/10k_false/Figure_2.png differ diff --git a/10k_false/Figure_3.png b/10k_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..bbd6865943b059bd63d5349b6d286dbf1f0fdf03 Binary files /dev/null and b/10k_false/Figure_3.png differ diff --git a/1M_false/Figure_1.png b/1M_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..937a30ff16c5d7473d37dc41b4f72039b6c6b0fa Binary files /dev/null and b/1M_false/Figure_1.png differ diff --git a/1M_false/Figure_2.png b/1M_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..8a5df879c9d42a2e448dad8ee8eca0689094f4ee Binary files /dev/null and b/1M_false/Figure_2.png differ diff --git a/1M_false/Figure_3.png b/1M_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..e9893c99e7f5e9f88474a291e6b8752cece44b49 Binary files /dev/null and b/1M_false/Figure_3.png differ diff --git a/markovDecision.py b/markovDecision.py new file mode 100644 index 0000000000000000000000000000000000000000..5b6e73db540ebea4b0bee80730d5436fb738d299 --- /dev/null +++ b/markovDecision.py @@ -0,0 +1,79 @@ +import numpy as np +from tmc import TransitionMatrixCalculator as tmc + +class MarkovDecisionSolver: + def __init__(self, layout: list, circle: bool): + self.Numberk = 15 + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component + self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) # Make sure to capture only the risky_dice component + self.jail = [i for i, x in enumerate(layout) if x == 3] + self.ValueI = np.zeros(self.Numberk) + self.DiceForStates = np.zeros(self.Numberk - 1) + + def _compute_vi_safe(self, k): + return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) + + + def _compute_vi_normal(self, k): + vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) + return vi_normal + + + def _compute_vi_risky(self, k): + vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) + return vi_risky + + def solve(self): + i = 0 + while True: + ValueINew = np.zeros(self.Numberk) + i += 1 + + for k in range(self.Numberk - 1): + vi_safe = self._compute_vi_safe(k) + vi_normal = self._compute_vi_normal(k) + vi_risky = self._compute_vi_risky(k) + + # Compute the minimum value among vi_safe, vi_normal, and vi_risky + min_value = min(vi_safe, vi_normal, vi_risky) + + # Find which index (safe, normal, or risky) corresponds to the minimum value + if min_value == vi_safe: + ValueINew[k] = 1 + vi_safe + self.DiceForStates[k] = 1 + elif min_value == vi_normal: + ValueINew[k] = 1 + vi_normal + self.DiceForStates[k] = 2 + else: + ValueINew[k] = 1 + vi_risky + self.DiceForStates[k] = 3 + + + if np.allclose(ValueINew, self.ValueI): + self.ValueI = ValueINew + break + + self.ValueI = ValueINew + + Expec = self.ValueI[:-1] + return [Expec, self.DiceForStates] + +def markovDecision(layout : list, circle : bool): + solver = MarkovDecisionSolver(layout, circle) + return solver.solve() + +""" +# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + + +# Résolution du problème avec différents modes de jeu +result_false = markovDecision(layout, circle=False) +print("\nWin as soon as land on or overstep the final square") +print(result_false) + +result_true = markovDecision(layout, circle=True) +print("\nStopping on the square to win") +print(result_true)""" \ No newline at end of file diff --git a/plot.py b/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..d84149546fac9a9fac0dc55711825368bae7b652 --- /dev/null +++ b/plot.py @@ -0,0 +1,82 @@ +import matplotlib.pyplot as plt +from validation import Validation as Val +import numpy as np + +# Example layout and circle settings +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False + +# Create an instance of validation +validation_instance = Val(layout, circle) + + +# Plotting function for strategy comparison +def plot_strategy_comparison(num_games=10000): + strategy_costs = validation_instance.compare_strategies(num_games=num_games) + + # Bar plot for strategy comparison + plt.figure(figsize=(10, 6)) + plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple']) + plt.xlabel('Strategies') + plt.ylabel('Average Cost') + plt.title('Comparison of Strategies') + plt.savefig('strategy_comparison.png') # Save the plot + plt.show() + +# Plotting function for state-based average turns for all strategies on the same plot +def plot_state_based_turns(save=True): + strategies = [validation_instance.optimal_policy, + validation_instance.safe_strategy, + validation_instance.normal_strategy, + validation_instance.risky_strategy, + validation_instance.random_strategy] + strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random'] + + plt.figure(figsize=(12, 6)) + for strategy, name in zip(strategies, strategy_names): + mean_turns = validation_instance.simulate_state(strategy, layout, circle) + plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name) + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State for Different Strategies') + plt.grid(True) + plt.legend() + + #if save: + #plt.savefig('state_based_turns_all_strategies.png') # Save the plot + + plt.show() + +def plot_state_based_comparison(validation_instance, num_games=10000): + optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) + + # Plotting the state-based average turns comparison + plt.figure(figsize=(12, 6)) + + # Plot optimal strategy turns + plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration') + + # Plot empirical strategy turns + plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical') + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State - ValueIteration vs. Empirical') + plt.grid(True) + plt.legend() + + plt.show() + + + + +# Main function to generate and save plots +if __name__ == '__main__': + # Example of strategy comparison plot + plot_strategy_comparison(num_games=10000) + + # Example of state-based average turns plot for all strategies on the same plot + plot_state_based_turns(save=True) + + plot_state_based_comparison(validation_instance, num_games=10000) \ No newline at end of file diff --git a/strategy_comparison.png b/strategy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..bcabdaa67b4519b11c1a7cfcc24ba2a35e5327d1 Binary files /dev/null and b/strategy_comparison.png differ diff --git a/tmc.py b/tmc.py new file mode 100644 index 0000000000000000000000000000000000000000..cb3b32ba32a97c12a4ca16bab29bb09d94babedc --- /dev/null +++ b/tmc.py @@ -0,0 +1,150 @@ +import numpy as np + +class TransitionMatrixCalculator: + def __init__(self): + self.size = 15 + self.matrix_safe = np.zeros((self.size , self.size )) + self.matrix_normal = np.zeros((self.size , self.size )) + self.matrix_risky = np.zeros((self.size , self.size )) + + def compute_transition_matrix(self, layout, circle=False): + self.matrix_safe = self._compute_safe_matrix() + self.matrix_normal, _ = self._compute_normal_matrix(layout, circle) + self.matrix_risky, _ = self._compute_risky_matrix(layout, circle) + + return self.matrix_safe, self.matrix_normal, self.matrix_risky + + + def _compute_safe_matrix(self): + p = np.zeros((self.size ,self.size )) + for k in range(self.size - 1): + if k == 2: + p[k,k+1] = 1/4 # slow lane + p[k,k+8] = 1/4 # fast lane + elif k == 9: + p[k,k+5] = 1/2 + else: + p[k,k+1] = 1/2 + p[k, k] = 1 - np.sum(p[k]) + p[self.size -1,self.size -1] = 1 + return p + + def _compute_normal_matrix(self, layout, circle=False): + p = np.zeros((self.size ,self.size )) + jail = np.zeros((self.size ,self.size )) + + for k in range(self.size - 1): + if k == 2: + p[k,k+1:k+3] = 1/6 # slow lane # slow lane + p[k,k+8:k+10] = 1/6 # fast lane # fast lane + elif k == 8: + p[k,k+1] = 1/3 + p[k,k+6] = 1/3 + elif k == 9: + if circle: + p[k,k+5] = 1/3 + p[k,0] = 1/3 + else: + p[k,k+5] = 2/3 + elif k == 13: + if circle: + p[k,k+1] = 1/3 + p[k,0] = 1/3 + else: + p[k,k+1] = 2/3 + else: + p[k,k+1] = 1/3 + p[k,k+2] = 1/3 + p[k, k] = 1 - np.sum(p[k]) + + for k in range(self.size - 1): + for j in range(self.size - 1): + s = layout[j] + if s == 1: + if j != 0: + p[k,0] += p[k,j]/2 + p[k,j] /= 2 + elif s == 2: + p[k,j-3 if j-3 >= 0 else 0] += p[k,j]/2 + p[k,j] /= 2 + elif s == 3: + jail[k,j] = p[k,j]/2 + + p[self.size -1,self.size -1] = 1 + return p, jail + + def _compute_risky_matrix(self, layout, circle=False): + p = np.zeros((self.size ,self.size )) + jail = np.zeros((self.size ,self.size )) + + for k in range(self.size -1): + if k == 2: + p[k,k+1:k+4] = 1/8 # slow lane + p[k,k+8:k+11] = 1/8 # fast lane + elif k == 7: + p[k,k+1:k+3] = 1/4 + p[k,k+7] = 1/4 + elif k == 8: + if circle: + p[k,k+1] = 1/4 + p[k,k+6] = 1/4 + p[k,0] = 1/4 + else: + p[k,k+1] = 1/4 + p[k,k+6] = 1/2 + elif k == 9: + if circle: + p[k,k + 5] = 1/4 + p[k,0] = 1/4 + p[k,1] = 1/4 + else: + p[k,k+5] = 3/4 + elif k == 12: + if circle: + p[k,k+1:k+3] = 1/4 + p[k,0] = 1/4 + else: + p[k,k+1] = 1/4 + p[k,k+2] = 1/2 + elif k == 13: + if circle: + p[k,k+1] = 1/4 + p[k,0] = 1/4 + p[k,1] = 1/4 + else: + p[k,self.size -1] = 3/4 + else: + p[k,k+1:k+4] = 1/4 + p[k, k] = 1 - np.sum(p[k]) + + for k in range(self.size - 1): + for j in range(self.size - 1): + s = layout[j] + if s == 1: + if j != 0: + p[k,0] += p[k,j] + p[k,j] = 0 + elif s == 2: + p[k,j-3 if j-3 >= 0 else 0] += p[k,j] + p[k,j] = 0 + elif s == 3: + jail[k,j] = p[k,j] + + p[self.size -1,self.size-1] = 1 + return p, jail + +""" + def display_matrices(self): + print("Safe Matrix:") + print(self.matrix_safe) + print("\nNormal Matrix:") + print(self.matrix_normal) + print("\nRisky Matrix:") + print(self.matrix_risky) + +# Example Usage: +layout_example = [0]*15 +calculator = TransitionMatrixCalculator() +calculator.compute_transition_matrix(layout_example, circle=True) +calculator.display_matrices() +""" \ No newline at end of file diff --git a/validation.py b/validation.py new file mode 100644 index 0000000000000000000000000000000000000000..02e656b03b8a06c2bc8a1f9fb14f3b0c7a958904 --- /dev/null +++ b/validation.py @@ -0,0 +1,244 @@ +import random as rd +import numpy as np +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD + + +class Validation: + def __init__(self, layout, circle=False): + self.layout = layout + self.circle = circle + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) + + solver = mD(self.layout, self.circle) + self.expec, self.optimal_policy = solver.solve() + + self.safe_strategy = [1] * len(layout) + self.normal_strategy = [2] * len(layout) + self.risky_strategy = [3] * len(layout) + self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))] + + self.costs_by_dice_type = { + 'SafeDice': [0] * len(layout), + 'NormalDice': [0] * len(layout), + 'RiskyDice': [0] * len(layout) + } + + for i, die_type in enumerate(self.layout): + self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0 + self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0 + self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0 + + def simulate_game(self, strategy, n_iterations=10000): + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + number_turns = [] + + for _ in range(n_iterations): + total_turns = 0 + k = 0 # initial state + + while k < len(self.layout) - 1: + action = strategy[k] + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) + + k = np.random.choice(len(self.layout), p=flattened_probs) + + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + return np.mean(number_turns) + + def simulate_state(self, strategy, layout, circle, n_iterations=10000): + safe_dice = self.tmc_instance._compute_safe_matrix() + normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] + risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0] + + transition_matrices = [safe_dice, normal_dice, risky_dice] + number_turns = [] + number_mean = [] + + for _ in range(n_iterations): + number_turns = [] + + for state in range(len(layout) - 1): + total_turns = 0 + k = state + + while k < len(layout) - 1: + action = strategy[k] + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) + + k = np.random.choice(len(layout), p=flattened_probs) + + if layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + number_mean.append(number_turns) + + # calculate the average number of turns for each state + mean_turns = np.mean(number_mean, axis=0) + return mean_turns + + def play_optimal_policy(self, n_iterations=10000): + return self.simulate_game(self.optimal_policy, n_iterations) + + def play_dice_strategy(self, dice_choice, n_iterations=10000): + strategy = { + 'SafeDice': self.safe_strategy, + 'NormalDice': self.normal_strategy, + 'RiskyDice': self.risky_strategy + }.get(dice_choice, None) + + if strategy is None: + raise ValueError("Invalid dice choice") + + return self.simulate_game(strategy, n_iterations) + + def play_random_strategy(self, n_iterations=10000): + return self.simulate_game(self.random_strategy, n_iterations) + + def play_empirical_strategy(self): + k = 0 + total_turns = 0 + + while k < len(self.layout) - 1: + action = self.optimal_policy[k] + action_index = int(action) - 1 + transition_matrix = self.normal_dice + + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) + + k = np.random.choice(len(self.layout), p=flattened_probs) + + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + return total_turns + + def compare_empirical_vs_value_iteration(self, num_games=10000): + value_iteration_turns = self.expec + empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) + + mean_turns_by_state = { + 'ValueIteration': value_iteration_turns.tolist(), + 'Empirical': empirical_turns.tolist() + } + + return mean_turns_by_state + + def compare_state_based_turns(self, num_games=10000): + value_iteration = self.expec + empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) + + return value_iteration, empirical_turns + + def compare_strategies(self, num_games=10000): + optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) + dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) + dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) + dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) + random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) + + return { + 'Optimal': optimal_cost, + 'SafeDice': dice1_cost, + 'NormalDice': dice2_cost, + 'RiskyDice': dice3_cost, + 'Random': random_cost + } + +""" +# Exemple d'utilisation +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False +validation_instance = Validation(layout, circle) + +# Comparaison entre la stratégie empirique et la value iteration +turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) + +# Affichage des moyennes de tours pour chaque état +num_states = len(layout) +for state in range(num_states - 1): + print(f"État {state}:") + print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") + print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") + +# Exécution de la stratégie empirique une fois +empirical_strategy_result = validation_instance.play_empirical_strategy() +print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) + +# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux +comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) +print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) +print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) + +# Coûts des différentes stratégies +optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000) +print("Optimal Strategy Cost:", optimal_cost) + +dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000) +print("Safe Dice Strategy Cost:", dice1_cost) + +dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000) +print("Normal Dice Strategy Cost:", dice2_cost) + +dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000) +print("Risky Dice Strategy Cost:", dice3_cost) + +random_cost = validation_instance.play_random_strategy(n_iterations=1000000) +print("Random Strategy Cost:", random_cost) + +# Comparaison entre les stratégies +strategy_comparison = validation_instance.compare_strategies(num_games=1000000) +print("Strategy Comparison Results:", strategy_comparison) + +# Calcul des tours moyens pour différentes stratégies +optimal_policy = validation_instance.optimal_policy +mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000) +print("Mean Turns for Optimal Strategy:", mean_turns_optimal) + +safe_dice_strategy = validation_instance.safe_strategy +mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000) +print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) + +normal_dice_strategy = validation_instance.normal_strategy +mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000) +print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) + +risky_dice_strategy = validation_instance.risky_strategy +mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000) +print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) + +random_dice_strategy = validation_instance.random_strategy +mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000) +print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) + +""" \ No newline at end of file