diff --git a/.DS_Store b/.DS_Store index 55d82a11e81b11f3bbb9102c10fed3bcce6672f9..6d15e9587b9e9d630aec3bbcdf9497b36b9e9b42 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/markov.py.py b/ancien/markoVVV.py similarity index 98% rename from markov.py.py rename to ancien/markoVVV.py index 286d24310db443276ac5784480fbc5317983a1b9..8e9292188e820228902c3b55253edbe590055f82 100644 --- a/markov.py.py +++ b/ancien/markoVVV.py @@ -1,5 +1,5 @@ import numpy as np -from tmc import TransitionMatrixCalculator as tmc +from ancien.tmc import TransitionMatrixCalculator as tmc class MarkovDecisionSolver: def __init__(self, layout: list, circle: bool): diff --git a/ancien/mdppp.py b/ancien/mdppp.py new file mode 100644 index 0000000000000000000000000000000000000000..5c88f50f03c56e86b968d65cf4854ece1423cf7c --- /dev/null +++ b/ancien/mdppp.py @@ -0,0 +1,71 @@ +import numpy as np +from ancien.tmc import TransitionMatrixCalculator as tmc + +class MarkovDecisionSolver: + def __init__(self, layout : list, circle : bool): + self.Numberk = 15 + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + self.jail = [i for i, x in enumerate(layout) if x == 3] + self.ValueI = np.zeros(self.Numberk) + self.DiceForStates = np.zeros(self.Numberk - 1) + + def _compute_vi_safe(self, k): + return np.dot(self.safe_dice[k], self.ValueI) + + def _compute_vi_normal(self, k): + vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail]) + return vi_normal + + def _compute_vi_risky(self, k): + vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) + return vi_risky + + def solve(self): + i = 0 + while True: + ValueINew = np.zeros(self.Numberk) + i += 1 + + for k in range(self.Numberk - 1): + vi_safe = self._compute_vi_safe(k) + vi_normal = self._compute_vi_normal(k) + vi_risky = self._compute_vi_risky(k) + + ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky) + + if ValueINew[k] == 1 + vi_safe: + self.DiceForStates[k] = 1 + elif ValueINew[k] == 1 + vi_normal: + self.DiceForStates[k] = 2 + else: + self.DiceForStates[k] = 3 + + if np.allclose(ValueINew, self.ValueI): + self.ValueI = ValueINew + break + + self.ValueI = ValueINew + + Expec = self.ValueI[:-1] + return [Expec, self.DiceForStates] + +def markovDecision(layout : list, circle : bool): + solver = MarkovDecisionSolver(layout, circle) + return solver.solve() + + +# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + + +# Résolution du problème avec différents modes de jeu +result_false = markovDecision(layout, circle=False) +print("\nWin as soon as land on or overstep the final square") +print(result_false) + +result_true = markovDecision(layout, circle=True) +print("\nStopping on the square to win") +print(result_true) diff --git a/ancien/plotinggg.py b/ancien/plotinggg.py new file mode 100644 index 0000000000000000000000000000000000000000..d1eb1e05f7fa1bcc3d90851825f67cc1f812997e --- /dev/null +++ b/ancien/plotinggg.py @@ -0,0 +1,82 @@ +import matplotlib.pyplot as plt +from ancien.validation import validation +import numpy as np + +# Example layout and circle settings +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False + +# Create an instance of validation +validation_instance = validation(layout, circle) + + +# Plotting function for strategy comparison +def plot_strategy_comparison(num_games=1000): + strategy_costs = validation_instance.compare_strategies(num_games=num_games) + + # Bar plot for strategy comparison + plt.figure(figsize=(10, 6)) + plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple']) + plt.xlabel('Strategies') + plt.ylabel('Average Cost') + plt.title('Comparison of Strategies') + plt.savefig('strategy_comparison.png') # Save the plot + plt.show() + +# Plotting function for state-based average turns for all strategies on the same plot +def plot_state_based_turns(save=True): + strategies = [validation_instance.optimal_strategy, + validation_instance.safe_strategy, + validation_instance.normal_strategy, + validation_instance.risky_strategy, + validation_instance.random_strategy] + strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random'] + + plt.figure(figsize=(12, 6)) + for strategy, name in zip(strategies, strategy_names): + mean_turns = validation_instance.simulate_state(strategy, layout, circle) + plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name) + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State for Different Strategies') + plt.grid(True) + plt.legend() + + #if save: + #plt.savefig('state_based_turns_all_strategies.png') # Save the plot + + plt.show() + +def plot_state_based_comparison(validation_instance, num_games=1000): + optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) + + # Plotting the state-based average turns comparison + plt.figure(figsize=(12, 6)) + + # Plot optimal strategy turns + plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration') + + # Plot empirical strategy turns + plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical') + + plt.xlabel('State') + plt.ylabel('Average Turns') + plt.title('Average Turns per State - ValueIteration vs. Empirical') + plt.grid(True) + plt.legend() + + plt.show() + + + + +# Main function to generate and save plots +if __name__ == '__main__': + # Example of strategy comparison plot + plot_strategy_comparison(num_games=1000) + + # Example of state-based average turns plot for all strategies on the same plot + plot_state_based_turns(save=True) + + plot_state_based_comparison(validation_instance, num_games=1000) \ No newline at end of file diff --git a/ancien/tmcccc.py b/ancien/tmcccc.py new file mode 100644 index 0000000000000000000000000000000000000000..388cc13756e4a26351ad0785fc55a89793bfb6e3 --- /dev/null +++ b/ancien/tmcccc.py @@ -0,0 +1,197 @@ +import numpy as np +import random as rd + +class TransitionMatrixCalculator: + def __init__(self): + # Initialisation des matrices de transition pour les dés "safe", "normal" et "risky" + self.matrix_safe = np.zeros((15, 15)) + self.matrix_normal = np.zeros((15, 15)) + self.matrix_risky = np.zeros((15, 15)) + # Probability to go from state k to k' + self.safe_dice = np.array([1/2, 1/2]) + self.normal_dice = np.array([1/3, 1/3, 1/3]) + self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) + + def compute_transition_matrix(self, layout, circle=False): + self.matrix_safe.fill(0) + self.matrix_normal.fill(0) + self.matrix_risky.fill(0) + + self._compute_safe_matrix() + self._compute_normal_matrix(layout, circle) + self._compute_risky_matrix(layout, circle) + + return self.matrix_safe, self.matrix_normal, self.matrix_risky + + + def _compute_safe_matrix(self): + for k in range(15): + for s, p in enumerate(self.safe_dice): + if k == 9 and s == 1: + k_prime = 14 + self.matrix_safe[k,k_prime] += p + elif k == 2 and s > 0: + p /= 2 + k_prime = 10 + self.matrix_safe[k,k_prime] += p + k_prime = 3 + self.matrix_safe[k,k_prime] += p + else: + k_prime = k + s + k_prime = min(14, k_prime) + self.matrix_safe[k,k_prime] += p + + return self.matrix_safe + + def _compute_normal_matrix(self, layout, circle): + for k in range(15): + for s, p in enumerate(self.normal_dice): + if k == 8 and s == 2: + k_prime = 14 + self.matrix_normal[k,k_prime] += p + continue + elif k == 9 and s in [1, 2]: + if not circle or s == 1: + k_prime = 14 + self.matrix_normal[k,k_prime] += p + elif circle and s == 2: + k_prime = 0 + self.matrix_normal[k,k_prime] += p + continue + + # handle the fast lane + if k == 2 and s > 0: + p /= 2 + k_prime = 10 + (s - 1) # rebalance the step before with s > 0 + if layout[k_prime] in [0, 3]: # normal or prison square + self.matrix_normal[k,k_prime] += p + elif layout[k_prime] == 1: # handle type 1 trap + self.matrix_normal[k,k_prime] += p / 2 + k_prime = 0 + self.matrix_normal[k,k_prime] += p / 2 + elif layout[k_prime] == 2: # handle type 2 trap + self.matrix_normal[k,k_prime] += p / 2 + if k_prime == 10: + k_prime = 0 + elif k_prime == 11: + k_prime = 1 + elif k_prime == 12: + k_prime = 2 + else: + k_prime = max(0, k_prime - 3) + self.matrix_normal[k,k_prime] += p / 2 + k_prime = 3 + (s - 1) # rebalance the step before with s > 0 + if layout[k_prime] in [0, 3]: # normal or prison square + self.matrix_normal[k,k_prime] += p + elif layout[k_prime] == 1: # handle type 1 trap + self.matrix_normal[k,k_prime] += p / 2 + k_prime = 0 + self.matrix_normal[k,k_prime] += p / 2 + elif layout[k_prime] == 2: # handle type 2 trap + self.matrix_normal[k,k_prime] += p / 2 + k_prime = max(0, k_prime - 3) + self.matrix_normal[k,k_prime] += p / 2 + continue + + k_prime = k + s + k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo + if layout[k_prime] in [1, 2]: + p /= 2 + if layout[k_prime] == 1: + k_prime = 0 + self.matrix_normal[k,k_prime] += p + continue + elif layout[k_prime] == 2: + if k_prime == 10: + k_prime = 0 + elif k_prime == 11: + k_prime = 1 + elif k_prime == 12: + k_prime = 2 + else: + k_prime = max(0, k_prime - 3) + self.matrix_normal[k,k_prime] += p + continue + self.matrix_normal[k,k_prime] += p + return self.matrix_normal + + def _compute_risky_matrix(self, layout, circle): + for k in range(15): + for s, p in enumerate(self.risky_dice): + if k == 7 and s == 3: + k_prime = 14 + self.matrix_risky[k,k_prime] += p + continue + elif k == 8 and s in [2, 3]: + if not circle or s == 2: + k_prime = 14 + self.matrix_risky[k,k_prime] += p + elif circle: + k_prime = 0 + self.matrix_risky[k,k_prime] += p + continue + elif k == 9 and s in [1, 2, 3]: + if not circle or s == 1: + k_prime = 14 + self.matrix_risky[k,k_prime] += p + elif circle and s == 2: + k_prime = 0 + self.matrix_risky[k,k_prime] += p + elif circle and s == 3: + k_prime = 1 + if layout[k_prime] != 0: + if layout[k_prime] == 1: + k_prime = 0 + self.matrix_risky[k,k_prime] += p + elif layout[k_prime] == 2: + k_prime = max(0, k_prime - 3) + self.matrix_risky[k,k_prime] += p + self.matrix_risky[k,k_prime] += p + continue + continue + + if k == 2 and s > 0: + p /= 2 + k_prime = 10 + (s - 1) + if layout[k_prime] == 1: + k_prime = 0 + self.matrix_risky[k,k_prime] += p + elif layout[k_prime] == 2: + if k_prime == 10: + k_prime = 0 + elif k_prime == 11: + k_prime = 1 + elif k_prime == 12: + k_prime = 2 + else: + k_prime = max(0, k_prime - 3) + self.matrix_risky[k,k_prime] += p + else: + self.matrix_risky[k,k_prime] += p + k_prime = 3 + (s - 1) + self.matrix_risky[k,k_prime] += p + continue + + k_prime = k + s + k_prime = k_prime % 15 if circle else min(14, k_prime) + if layout[k_prime] in [1, 2]: + if layout[k_prime] == 1: + k_prime = 0 + self.matrix_risky[k,k_prime] += p + continue + elif layout[k_prime] == 2: + if k_prime == 10: + k_prime = 0 + elif k_prime == 11: + k_prime = 1 + elif k_prime == 12: + k_prime = 2 + else: + k_prime = max(0, k_prime - 3) + self.matrix_risky[k,k_prime] += p + continue + self.matrix_risky[k,k_prime] += p + return self.matrix_risky + +#tmc = TransitionMatrixCalculator() +#tmc.tst_transition_matrix() diff --git a/ancien/validationnnnn.py b/ancien/validationnnnn.py new file mode 100644 index 0000000000000000000000000000000000000000..86f6ff7dcb173d0e454286f0753ac9778621a214 --- /dev/null +++ b/ancien/validationnnnn.py @@ -0,0 +1,258 @@ +import random as rd +import numpy as np +import matplotlib.pyplot as plt +from ancien.tmc import TransitionMatrixCalculator as tmc +from ancien.markovDecision import MarkovDecisionSolver as mD + +class validation: + def __init__(self, layout, circle=False): + + # import from other .PY + self.layout = layout + self.circle = circle + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + solver = mD(self.layout, self.circle) + self.expec, self.optimal_policy = solver.solve() + + # Define all the strategy + self.optimal_strategy = self.optimal_policy + self.safe_strategy = [1]*len(layout) + self.normal_strategy = [2]*len(layout) + self.risky_strategy = [3]*len(layout) + self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] + + # Définir les coûts par case et par type de dé + self.costs_by_dice_type = { + 'SafeDice': [0] * len(self.layout), + 'NormalDice': [0] * len(self.layout), + 'RiskyDice': [0] * len(self.layout) + } + + # Remplir les coûts pour chaque case en fonction du type de dé + for i in range(len(self.layout)): + if self.layout[i] == 3: + self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr + self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal + self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué + + + def simulate_game(self, strategy, n_iterations=10000): + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + number_turns = [] + + for _ in range(n_iterations): + total_turns = 0 + k = 0 # état initial + + while k < len(self.layout) - 1: + action = strategy[k] # action selon la stratégie + + # Convertir action en entier pour accéder à l'indice correct dans transition_matrices + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + + # Aplatir la matrice de transition en une distribution de probabilité 1D + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + + # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie + k = np.random.choice(len(self.layout), p=flattened_probs) + + # Mise à jour du nombre de tours en fonction de l'état actuel + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + return np.mean(number_turns) + + def simulate_state(self, strategy, layout, circle, n_iterations=10000): + # Compute transition matrices for each dice + tmc_instance = tmc() + P_safe = tmc_instance._compute_safe_matrix() + P_normal = tmc_instance._compute_normal_matrix(layout, circle) + P_risky = tmc_instance._compute_risky_matrix(layout, circle) + + transition_matrices = [P_safe, P_normal, P_risky] + number_turns = [] + number_mean = [] + + for _ in range(n_iterations): + number_turns = [] + + for state in range(len(layout) - 1): + total_turns = 0 + k = state # starting state + + while k < len(layout) - 1: + action = strategy[k] # action based on strategy + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) + k = np.random.choice(len(layout), p=flattened_probs) + + if layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + number_mean.append(number_turns) + + # calculate the average number of turns for each state + mean_turns = np.mean(number_mean, axis=0) + + return mean_turns + + + def play_optimal_strategy(self, n_iterations=10000): + return self.simulate_game(self.optimal_strategy, n_iterations) + + + def play_dice_strategy(self, dice_choice, n_iterations=10000): + if dice_choice == 'SafeDice': + strategy = self.safe_strategy + elif dice_choice == 'NormalDice': + strategy = self.normal_strategy + elif dice_choice == 'RiskyDice': + strategy = self.risky_strategy + else: + raise ValueError("Invalid dice choice") + + return self.simulate_game(strategy, n_iterations) + + def play_random_strategy(self, n_iterations=10000): + return self.simulate_game(self.random_strategy, n_iterations) + + def play_empirical_strategy(self): + k = 0 # état initial + total_turns = 0 + + while k < len(self.layout) - 1: + action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation + action_index = int(action) - 1 + transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique + + # Aplatir la matrice de transition en une distribution de probabilité 1D + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + + # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie + k = np.random.choice(len(self.layout), p=flattened_probs) + + # Mise à jour du nombre de tours en fonction de l'état actuel + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + return total_turns + + + def compare_empirical_vs_value_iteration(self, num_games=1000): + value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle) + empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + + # Calculer la moyenne des tours pour chaque état + mean_turns_by_state = { + 'ValueIteration': value_iteration_turns.tolist(), + 'Empirical': empirical_turns.tolist() + } + + return mean_turns_by_state + + def compare_state_based_turns(self, num_games=1000): + optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + + return optimal_turns, empirical_turns + + + + def compare_strategies(self, num_games=1000): + optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) + dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) + dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) + dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) + random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) + + return { + 'Optimal': optimal_cost, + 'SafeDice': dice1_cost, + 'NormalDice': dice2_cost, + 'RiskyDice': dice3_cost, + 'Random': random_cost + } + + +# Utilisation d'exemple +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False +validation_instance = validation(layout, circle) + + +# Comparer la stratégie empirique avec la stratégie de value iteration +turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) + +# Imprimer les moyennes des tours pour chaque état +num_states = len(layout) +for state in range(num_states - 1): + print(f"État {state}:") + print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") + print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") + +# Exécuter la stratégie empirique une fois +empirical_strategy_result = validation_instance.play_empirical_strategy() +print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) + +# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux +comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) +print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) +print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) + + +optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000) +print("Optimal Strategy Cost:", optimal_cost) + +dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) +print("Normal Dice Strategy Cost:", dice2_cost) + +random_cost = validation_instance.play_random_strategy(n_iterations=10000) +print("Random Strategy Cost:", random_cost) + +strategy_comparison = validation_instance.compare_strategies(num_games=10000) +print("Strategy Comparison Results:", strategy_comparison) + + +optimal_strategy = validation_instance.optimal_strategy +mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Optimal Strategy:", mean_turns_optimal) + +safe_dice_strategy = validation_instance.safe_strategy +mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) + +normal_dice_strategy = validation_instance.normal_strategy +mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) + +risky_dice_strategy = validation_instance.risky_strategy +mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) + +random_dice_strategy = validation_instance.random_strategy +mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) diff --git a/markovDecision.py b/markovDecision.py index e8e68be0cf9a32e0fdacd134d21008f43fb19295..276043e74996f0f1b5e9474a859d9ce0c0bf98fe 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -1,26 +1,28 @@ import numpy as np -from tmc import TransitionMatrixCalculator as tmc +from tmc_2 import TransitionMatrixCalculator as tmc class MarkovDecisionSolver: - def __init__(self, layout : list, circle : bool): + def __init__(self, layout: list, circle: bool): self.Numberk = 15 self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + self.safe_dice = self.tmc_instance.proba_security_dice() + self.normal_dice, _ = self.tmc_instance.proba_normal_dice(layout, circle) # Make sure to capture only the normal_dice component + self.risky_dice, _ = self.tmc_instance.proba_risky_dice(layout, circle) # Make sure to capture only the risky_dice component self.jail = [i for i, x in enumerate(layout) if x == 3] self.ValueI = np.zeros(self.Numberk) self.DiceForStates = np.zeros(self.Numberk - 1) def _compute_vi_safe(self, k): - return np.dot(self.safe_dice[k], self.ValueI) + return np.sum(self.safe_dice[k] * self.ValueI) + np.sum(self.normal_dice[k][self.jail]) + def _compute_vi_normal(self, k): - vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail]) + vi_normal = np.sum(self.normal_dice[k] * self.ValueI) + np.sum(self.normal_dice[k][self.jail]) return vi_normal + def _compute_vi_risky(self, k): - vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) + vi_risky = np.sum(self.risky_dice[k] * self.ValueI) + np.sum(self.risky_dice[k][self.jail]) return vi_risky def solve(self): @@ -56,6 +58,7 @@ def markovDecision(layout : list, circle : bool): solver = MarkovDecisionSolver(layout, circle) return solver.solve() +""" # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] @@ -68,4 +71,4 @@ print(result_false) result_true = markovDecision(layout, circle=True) print("\nStopping on the square to win") -print(result_true) +print(result_true)""" \ No newline at end of file diff --git a/plot.py b/plot.py index c5483806fb07f4cb14a364e205b4476c8f6366f5..46b023a9f315b02706356c9ae9f1f0b62705f9fa 100644 --- a/plot.py +++ b/plot.py @@ -1,5 +1,5 @@ import matplotlib.pyplot as plt -from validation import validation +from valid import validation import numpy as np # Example layout and circle settings @@ -25,7 +25,7 @@ def plot_strategy_comparison(num_games=1000): # Plotting function for state-based average turns for all strategies on the same plot def plot_state_based_turns(save=True): - strategies = [validation_instance.optimal_strategy, + strategies = [validation_instance.optimal_policy, validation_instance.safe_strategy, validation_instance.normal_strategy, validation_instance.risky_strategy, diff --git a/strategy_comparison.png b/strategy_comparison.png index aefb1f990b1c89957981a9281815083011fbc9d2..d8b19f2ae884d963e9a1a6f2a97b07cd2f744384 100644 Binary files a/strategy_comparison.png and b/strategy_comparison.png differ diff --git a/tmc.py b/tmc.py index 388cc13756e4a26351ad0785fc55a89793bfb6e3..04b03925db4e3697d560b113611fc74d82717283 100644 --- a/tmc.py +++ b/tmc.py @@ -1,197 +1,157 @@ import numpy as np -import random as rd class TransitionMatrixCalculator: def __init__(self): - # Initialisation des matrices de transition pour les dés "safe", "normal" et "risky" - self.matrix_safe = np.zeros((15, 15)) - self.matrix_normal = np.zeros((15, 15)) - self.matrix_risky = np.zeros((15, 15)) - # Probability to go from state k to k' - self.safe_dice = np.array([1/2, 1/2]) - self.normal_dice = np.array([1/3, 1/3, 1/3]) - self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) - - def compute_transition_matrix(self, layout, circle=False): - self.matrix_safe.fill(0) - self.matrix_normal.fill(0) - self.matrix_risky.fill(0) + self.nSquares = 15 + self.matrix_safe = np.zeros((self.nSquares, self.nSquares)) + self.matrix_normal = np.zeros((self.nSquares, self.nSquares)) + self.matrix_risky = np.zeros((self.nSquares, self.nSquares)) - self._compute_safe_matrix() - self._compute_normal_matrix(layout, circle) - self._compute_risky_matrix(layout, circle) + def proba_security_dice(self): + proba = np.zeros((self.nSquares, self.nSquares)) - return self.matrix_safe, self.matrix_normal, self.matrix_risky + for i in range(self.nSquares - 1): + proba[i][i] = 0.5 + if i == 2: + proba[i][i + 1] = 0.25 # slow lane + proba[i][i + 8] = 0.25 # fast lane + elif i == 9: + proba[i][i + 5] = 0.5 + else: + proba[i][i + 1] = 0.5 + + proba[self.nSquares - 1][self.nSquares - 1] = 1 + return proba + def proba_normal_dice(self, layout, circle=False): + proba = np.zeros((self.nSquares, self.nSquares)) + proba_prison = np.zeros((self.nSquares, self.nSquares)) - def _compute_safe_matrix(self): - for k in range(15): - for s, p in enumerate(self.safe_dice): - if k == 9 and s == 1: - k_prime = 14 - self.matrix_safe[k,k_prime] += p - elif k == 2 and s > 0: - p /= 2 - k_prime = 10 - self.matrix_safe[k,k_prime] += p - k_prime = 3 - self.matrix_safe[k,k_prime] += p + for i in range(self.nSquares - 1): + proba[i][i] = 1 / 3 + if i == 2: + proba[i][i + 1] = 1 / 6 # slow lane + proba[i][i + 2] = 1 / 6 # slow lane + proba[i][i + 8] = 1 / 6 # fast lane + proba[i][i + 9] = 1 / 6 # fast lane + elif i == 8: + proba[i][i + 1] = 1 / 3 + proba[i][i + 6] = 1 / 3 + elif i == 9: + if circle: + proba[i][i + 5] = 1 / 3 + proba[i][0] = 1 / 3 else: - k_prime = k + s - k_prime = min(14, k_prime) - self.matrix_safe[k,k_prime] += p - - return self.matrix_safe + proba[i][i + 5] = 2 / 3 + elif i == 13: + if circle: + proba[i][i + 1] = 1 / 3 + proba[i][0] = 1 / 3 + else: + proba[i][i + 1] = 2 / 3 + else: + proba[i][i + 1] = 1 / 3 + proba[i][i + 2] = 1 / 3 - def _compute_normal_matrix(self, layout, circle): - for k in range(15): - for s, p in enumerate(self.normal_dice): - if k == 8 and s == 2: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - continue - elif k == 9 and s in [1, 2]: - if not circle or s == 1: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue + for i in range(self.nSquares - 1): + for j in range(self.nSquares - 1): + case_value = layout[j] + if case_value == 1: + if j != 0: + proba[i][0] += proba[i][j] / 2 + proba[i][j] /= 2 + elif case_value == 2: + proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 2 + proba[i][j] /= 2 + elif case_value == 3: + proba_prison[i][j] = proba[i][j] / 2 + elif case_value == 4: + proba[i][j] /= 2 + if j != 0: + proba[i][0] += proba[i][j] / 6 + proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 6 + proba_prison[i][j] = proba[i][j] / 6 - # handle the fast lane - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 3 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - continue + proba[self.nSquares - 1][self.nSquares - 1] = 1 + return proba, proba_prison - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo - if layout[k_prime] in [1, 2]: - p /= 2 - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p - continue - self.matrix_normal[k,k_prime] += p - return self.matrix_normal + def proba_risky_dice(self, layout, circle=False): + proba = np.zeros((self.nSquares, self.nSquares)) + proba_prison = np.zeros((self.nSquares, self.nSquares)) - def _compute_risky_matrix(self, layout, circle): - for k in range(15): - for s, p in enumerate(self.risky_dice): - if k == 7 and s == 3: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - continue - elif k == 8 and s in [2, 3]: - if not circle or s == 2: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif k == 9 and s in [1, 2, 3]: - if not circle or s == 1: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif circle and s == 3: - k_prime = 1 - if layout[k_prime] != 0: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - self.matrix_risky[k,k_prime] += p - continue - continue + for i in range(self.nSquares - 1): + proba[i][i] = 1 / 4 + if i == 2: + proba[i][i + 1] = 1 / 8 # slow lane + proba[i][i + 2] = 1 / 8 # slow lane + proba[i][i + 3] = 1 / 8 # slow lane + proba[i][i + 8] = 1 / 8 # fast lane + proba[i][i + 9] = 1 / 8 # fast lane + proba[i][i + 10] = 1 / 8 # fast lane + elif i == 7: + proba[i][i + 1] = 1 / 4 + proba[i][i + 2] = 1 / 4 + proba[i][i + 7] = 1 / 4 + elif i == 8: + if circle: + proba[i][i + 1] = 1 / 4 + proba[i][i + 6] = 1 / 4 + proba[i][0] = 1 / 4 + else: + proba[i][i + 1] = 1 / 4 + proba[i][i + 6] = 1 / 2 + elif i == 9: + if circle: + proba[i][i + 5] = 1 / 4 + proba[i][0] = 1 / 4 + proba[i][1] = 1 / 4 + else: + proba[i][i + 5] = 3 / 4 + elif i == 12: + if circle: + proba[i][i + 1] = 1 / 4 + proba[i][i + 2] = 1 / 4 + proba[i][0] = 1 / 4 + else: + proba[i][i + 1] = 1 / 4 + proba[i][i + 2] = 1 / 2 + elif i == 13: + if circle: + proba[i][i + 1] = 1 / 4 + proba[i][0] = 1 / 4 + proba[i][1] = 1 / 4 + else: + proba[i][self.nSquares - 1] = 3 / 4 + else: + proba[i][i + 1] = 1 / 4 + proba[i][i + 2] = 1 / 4 + proba[i][i + 3] = 1 / 4 - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - else: - self.matrix_risky[k,k_prime] += p - k_prime = 3 + (s - 1) - self.matrix_risky[k,k_prime] += p - continue + for i in range(self.nSquares - 1): + for j in range(self.nSquares - 1): + case_value = layout[j] + if case_value == 1: + if j != 0: + proba[i][0] += proba[i][j] + proba[i][j] = 0 + elif case_value == 2: + proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] + proba[i][j] = 0 + elif case_value == 3: + proba_prison[i][j] = proba[i][j] + elif case_value == 4: + if j != 0: + proba[i][0] += proba[i][j] / 3 + proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 3 + proba_prison[i][j] = proba[i][j] / 3 + proba[i][j] /= 3 - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) - if layout[k_prime] in [1, 2]: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - continue - self.matrix_risky[k,k_prime] += p - return self.matrix_risky + proba[self.nSquares - 1][self.nSquares - 1] = 1 + return proba, proba_prison -#tmc = TransitionMatrixCalculator() -#tmc.tst_transition_matrix() + def compute_transition_matrix(self, layout, circle=False): + self.matrix_safe = self.proba_security_dice() + self.matrix_normal, _ = self.proba_normal_dice(layout, circle) + self.matrix_risky, _ = self.proba_risky_dice(layout, circle) + + return self.matrix_safe, self.matrix_normal, self.matrix_risky diff --git a/validation.py b/validation.py index ee6b922d35f05ee81565a00d93ccb85a8e768308..16174fc88da73ec2ce138aca2728423c980cbe89 100644 --- a/validation.py +++ b/validation.py @@ -1,8 +1,8 @@ import random as rd import numpy as np import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD +from tmc_2 import TransitionMatrixCalculator as tmc +from mdp import MarkovDecisionSolver as mD class validation: def __init__(self, layout, circle=False): @@ -11,14 +11,13 @@ class validation: self.layout = layout self.circle = circle self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + self.safe_dice = self.tmc_instance.proba_security_dice() + self.normal_dice, _ = self.tmc_instance.proba_normal_dice(layout, circle) # Make sure to capture only the normal_dice component + self.risky_dice, _ = self.tmc_instance.proba_risky_dice(layout, circle) # Make sure to capture only the risky_dice component solver = mD(self.layout, self.circle) self.expec, self.optimal_policy = solver.solve() # Define all the strategy - self.optimal_strategy = self.optimal_policy self.safe_strategy = [1]*len(layout) self.normal_strategy = [2]*len(layout) self.risky_strategy = [3]*len(layout) @@ -75,12 +74,11 @@ class validation: def simulate_state(self, strategy, layout, circle, n_iterations=10000): # Compute transition matrices for each dice - tmc_instance = tmc() - P_safe = tmc_instance._compute_safe_matrix() - P_normal = tmc_instance._compute_normal_matrix(layout, circle) - P_risky = tmc_instance._compute_risky_matrix(layout, circle) + safe_dice = self.tmc_instance.proba_security_dice() + normal_dice = self.tmc_instance.proba_normal_dice(layout, circle)[0] # Get only the normal dice transition matrix + risky_dice = self.tmc_instance.proba_risky_dice(layout, circle)[0] # Get only the risky dice transition matrix - transition_matrices = [P_safe, P_normal, P_risky] + transition_matrices = [safe_dice, normal_dice, risky_dice] number_turns = [] number_mean = [] @@ -116,8 +114,9 @@ class validation: return mean_turns - def play_optimal_strategy(self, n_iterations=10000): - return self.simulate_game(self.optimal_strategy, n_iterations) + + def play_optimal_policy(self, n_iterations=10000): + return self.simulate_game(self.optimal_policy, n_iterations) def play_dice_strategy(self, dice_choice, n_iterations=10000): @@ -140,7 +139,7 @@ class validation: total_turns = 0 while k < len(self.layout) - 1: - action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation + action = self.optimal_policy[k] # Utiliser la stratégie empirique pour la simulation action_index = int(action) - 1 transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique @@ -163,27 +162,29 @@ class validation: def compare_empirical_vs_value_iteration(self, num_games=1000): - value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle) - empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + value_iteration_turns = self.optimal_policy + empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) + + # Calculate the mean turns for each state + mean_turns_by_state = { + 'ValueIteration': value_iteration_turns.tolist(), + 'Empirical': empirical_turns.tolist() + } + + return mean_turns_by_state - # Calculer la moyenne des tours pour chaque état - mean_turns_by_state = { - 'ValueIteration': value_iteration_turns.tolist(), - 'Empirical': empirical_turns.tolist() - } - return mean_turns_by_state def compare_state_based_turns(self, num_games=1000): - optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) - empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) + value_iteration = self.expec + empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) - return optimal_turns, empirical_turns + return value_iteration, empirical_turns def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) + optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) @@ -197,7 +198,7 @@ class validation: 'Random': random_cost } - +""" # Utilisation d'exemple layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] circle = False @@ -208,12 +209,14 @@ validation_instance = validation(layout, circle) turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) # Imprimer les moyennes des tours pour chaque état + num_states = len(layout) for state in range(num_states - 1): print(f"État {state}:") print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") + # Exécuter la stratégie empirique une fois empirical_strategy_result = validation_instance.play_empirical_strategy() print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) @@ -222,14 +225,19 @@ print("Coût de la stratégie empirique sur un tour :", empirical_strategy_resul comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) -""" -optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000) +optimal_cost = validation_instance.play_optimal_policy(n_iterations=10000) print("Optimal Strategy Cost:", optimal_cost) +dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=10000) +print("Safe Dice Strategy Cost:", dice1_cost) + dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) print("Normal Dice Strategy Cost:", dice2_cost) +dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=10000) +print("Risky Dice Strategy Cost:", dice3_cost) + random_cost = validation_instance.play_random_strategy(n_iterations=10000) print("Random Strategy Cost:", random_cost) @@ -237,8 +245,8 @@ strategy_comparison = validation_instance.compare_strategies(num_games=10000) print("Strategy Comparison Results:", strategy_comparison) -optimal_strategy = validation_instance.optimal_strategy -mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000) +optimal_policy = validation_instance.optimal_policy +mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=10000) print("Mean Turns for Optimal Strategy:", mean_turns_optimal) safe_dice_strategy = validation_instance.safe_strategy @@ -256,5 +264,6 @@ print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) random_dice_strategy = validation_instance.random_strategy mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) -""" + +""" \ No newline at end of file