diff --git a/.DS_Store b/.DS_Store index 6d15e9587b9e9d630aec3bbcdf9497b36b9e9b42..5e91970b662b0257e02ae4324a8e0d354a38064b 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/100k_false/Figure_1.png b/100k_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..31946fbc5b2b23f1e2593c0f375dbadaba451293 Binary files /dev/null and b/100k_false/Figure_1.png differ diff --git a/100k_false/Figure_2.png b/100k_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..0283979c5bcbb4307a04becca47af171d1b82354 Binary files /dev/null and b/100k_false/Figure_2.png differ diff --git a/100k_false/Figure_3.png b/100k_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..45512984bee3f6cfef0249501251ffcc2c8a62c9 Binary files /dev/null and b/100k_false/Figure_3.png differ diff --git a/10k_false/Figure_1.png b/10k_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bcabdaa67b4519b11c1a7cfcc24ba2a35e5327d1 Binary files /dev/null and b/10k_false/Figure_1.png differ diff --git a/10k_false/Figure_2.png b/10k_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..61902c78045f28a7a9257f9c3951ee1acb754d36 Binary files /dev/null and b/10k_false/Figure_2.png differ diff --git a/10k_false/Figure_3.png b/10k_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..bbd6865943b059bd63d5349b6d286dbf1f0fdf03 Binary files /dev/null and b/10k_false/Figure_3.png differ diff --git a/1M_false/Figure_1.png b/1M_false/Figure_1.png new file mode 100644 index 0000000000000000000000000000000000000000..937a30ff16c5d7473d37dc41b4f72039b6c6b0fa Binary files /dev/null and b/1M_false/Figure_1.png differ diff --git a/1M_false/Figure_2.png b/1M_false/Figure_2.png new file mode 100644 index 0000000000000000000000000000000000000000..8a5df879c9d42a2e448dad8ee8eca0689094f4ee Binary files /dev/null and b/1M_false/Figure_2.png differ diff --git a/1M_false/Figure_3.png b/1M_false/Figure_3.png new file mode 100644 index 0000000000000000000000000000000000000000..e9893c99e7f5e9f88474a291e6b8752cece44b49 Binary files /dev/null and b/1M_false/Figure_3.png differ diff --git a/ancien/markoVVV.py b/ancien/markoVVV.py deleted file mode 100644 index 5cd533da36c502534d7221ab7df2d8555759fc02..0000000000000000000000000000000000000000 --- a/ancien/markoVVV.py +++ /dev/null @@ -1,78 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - -class MarkovDecisionSolver: - def __init__(self, layout: list, circle: bool): - self.nSquares = 15 - self.precision = 1e-9 - self.layout = layout - self.circle = circle - self.tmc_instance = tmc() - self.matrix_safe = self.tmc_instance._compute_safe_matrix() - self.matrix_normal, self.jail_n = self.tmc_instance._compute_normal_matrix(layout, circle) - self.matrix_risky, self.jail_r = self.tmc_instance._compute_risky_matrix(layout, circle) - self.Dice = np.zeros(self.nSquares, dtype=int) - - def solve(self): - ValueI = np.zeros(self.nSquares) - ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0]) - - i = 0 - while i < 1000: # Limiter le nombre d'itérations pour éviter une boucle infinie - i += 1 - - # Copiez la valeur actuelle dans ValueI - np.copyto(ValueI, ValueINew) - - # Mettez à jour les valeurs de ValueINew pour chaque état - for k in range(self.nSquares - 1): - ValueINew[k] = 1 + min( - np.dot(self.matrix_safe[k], ValueI), - np.dot(self.matrix_normal[k], ValueI) + np.sum(self.jail_n[k]), - np.dot(self.matrix_risky[k], ValueI) + np.sum(self.jail_r[k]) - ) - - ValueINew[self.nSquares - 1] = min( - np.dot(self.matrix_safe[self.nSquares - 1], ValueI), - np.dot(self.matrix_normal[self.nSquares - 1], ValueI), - np.dot(self.matrix_risky[self.nSquares - 1], ValueI) - ) - - # Calculer les actions optimales (indice de l'action + 1) - for k in range(self.nSquares): - self.Dice[k] = np.argmin([ - np.dot(self.matrix_safe[k], ValueINew), - np.dot(self.matrix_normal[k], ValueINew) + np.sum(self.jail_n[k]), - np.dot(self.matrix_risky[k], ValueINew) + np.sum(self.jail_r[k]), - ]) + 1 - - # Vérifiez la convergence en utilisant une petite tolérance - if np.sum(np.abs(ValueINew - ValueI)) < self.precision: - break - - # Retourne les valeurs finales de ValueINew et les actions optimales (Dice) - return ValueINew, self.Dice - - -def markovDecision(layout: list, circle: bool): - solver = MarkovDecisionSolver(layout, circle) - return solver.solve() - - -# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - -# Résolution du problème avec différents modes de jeu -result_false = markovDecision(layout, circle=False) -print("\nStopping on the square to win") -print("Expected costs for each square:") -print(result_false[0]) -print("Dice choices for each square:") -print(result_false[1]) - -result_true = markovDecision(layout, circle=True) -print("\nWin as soon as land on or overstep the final square") -print("Expected costs for each square:") -print(result_true[0]) -print("Dice choices for each square:") -print(result_true[1]) diff --git a/ancien/mdppp.py b/ancien/mdppp.py deleted file mode 100644 index 5c88f50f03c56e86b968d65cf4854ece1423cf7c..0000000000000000000000000000000000000000 --- a/ancien/mdppp.py +++ /dev/null @@ -1,71 +0,0 @@ -import numpy as np -from ancien.tmc import TransitionMatrixCalculator as tmc - -class MarkovDecisionSolver: - def __init__(self, layout : list, circle : bool): - self.Numberk = 15 - self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - self.jail = [i for i, x in enumerate(layout) if x == 3] - self.ValueI = np.zeros(self.Numberk) - self.DiceForStates = np.zeros(self.Numberk - 1) - - def _compute_vi_safe(self, k): - return np.dot(self.safe_dice[k], self.ValueI) - - def _compute_vi_normal(self, k): - vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail]) - return vi_normal - - def _compute_vi_risky(self, k): - vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) - return vi_risky - - def solve(self): - i = 0 - while True: - ValueINew = np.zeros(self.Numberk) - i += 1 - - for k in range(self.Numberk - 1): - vi_safe = self._compute_vi_safe(k) - vi_normal = self._compute_vi_normal(k) - vi_risky = self._compute_vi_risky(k) - - ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky) - - if ValueINew[k] == 1 + vi_safe: - self.DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal: - self.DiceForStates[k] = 2 - else: - self.DiceForStates[k] = 3 - - if np.allclose(ValueINew, self.ValueI): - self.ValueI = ValueINew - break - - self.ValueI = ValueINew - - Expec = self.ValueI[:-1] - return [Expec, self.DiceForStates] - -def markovDecision(layout : list, circle : bool): - solver = MarkovDecisionSolver(layout, circle) - return solver.solve() - - -# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - - -# Résolution du problème avec différents modes de jeu -result_false = markovDecision(layout, circle=False) -print("\nWin as soon as land on or overstep the final square") -print(result_false) - -result_true = markovDecision(layout, circle=True) -print("\nStopping on the square to win") -print(result_true) diff --git a/ancien/plotinggg.py b/ancien/plotinggg.py deleted file mode 100644 index d1eb1e05f7fa1bcc3d90851825f67cc1f812997e..0000000000000000000000000000000000000000 --- a/ancien/plotinggg.py +++ /dev/null @@ -1,82 +0,0 @@ -import matplotlib.pyplot as plt -from ancien.validation import validation -import numpy as np - -# Example layout and circle settings -layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -circle = False - -# Create an instance of validation -validation_instance = validation(layout, circle) - - -# Plotting function for strategy comparison -def plot_strategy_comparison(num_games=1000): - strategy_costs = validation_instance.compare_strategies(num_games=num_games) - - # Bar plot for strategy comparison - plt.figure(figsize=(10, 6)) - plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple']) - plt.xlabel('Strategies') - plt.ylabel('Average Cost') - plt.title('Comparison of Strategies') - plt.savefig('strategy_comparison.png') # Save the plot - plt.show() - -# Plotting function for state-based average turns for all strategies on the same plot -def plot_state_based_turns(save=True): - strategies = [validation_instance.optimal_strategy, - validation_instance.safe_strategy, - validation_instance.normal_strategy, - validation_instance.risky_strategy, - validation_instance.random_strategy] - strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random'] - - plt.figure(figsize=(12, 6)) - for strategy, name in zip(strategies, strategy_names): - mean_turns = validation_instance.simulate_state(strategy, layout, circle) - plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name) - - plt.xlabel('State') - plt.ylabel('Average Turns') - plt.title('Average Turns per State for Different Strategies') - plt.grid(True) - plt.legend() - - #if save: - #plt.savefig('state_based_turns_all_strategies.png') # Save the plot - - plt.show() - -def plot_state_based_comparison(validation_instance, num_games=1000): - optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) - - # Plotting the state-based average turns comparison - plt.figure(figsize=(12, 6)) - - # Plot optimal strategy turns - plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label='ValueIteration') - - # Plot empirical strategy turns - plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label='Empirical') - - plt.xlabel('State') - plt.ylabel('Average Turns') - plt.title('Average Turns per State - ValueIteration vs. Empirical') - plt.grid(True) - plt.legend() - - plt.show() - - - - -# Main function to generate and save plots -if __name__ == '__main__': - # Example of strategy comparison plot - plot_strategy_comparison(num_games=1000) - - # Example of state-based average turns plot for all strategies on the same plot - plot_state_based_turns(save=True) - - plot_state_based_comparison(validation_instance, num_games=1000) \ No newline at end of file diff --git a/ancien/tmcccc.py b/ancien/tmcccc.py deleted file mode 100644 index 29086c4fa271070a1e1b4dfcfef97147ccfd0430..0000000000000000000000000000000000000000 --- a/ancien/tmcccc.py +++ /dev/null @@ -1,209 +0,0 @@ -import numpy as np -import random as rd - -class TransitionMatrixCalculator: - def __init__(self): - # Initialisation des matrices de transition pour les dés "safe", "normal" et "risky" - self.matrix_safe = np.zeros((15, 15)) - self.matrix_normal = np.zeros((15, 15)) - self.matrix_risky = np.zeros((15, 15)) - # Probability to go from state k to k' - self.safe_dice = np.array([1/2, 1/2]) - self.normal_dice = np.array([1/3, 1/3, 1/3]) - self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) - - def compute_transition_matrix(self, layout, circle=False): - self.matrix_safe.fill(0) - self.matrix_normal.fill(0) - self.matrix_risky.fill(0) - - self._compute_safe_matrix() - self._compute_normal_matrix(layout, circle) - self._compute_risky_matrix(layout, circle) - - return self.matrix_safe, self.matrix_normal, self.matrix_risky - - - def _compute_safe_matrix(self): - for k in range(15): - for s, p in enumerate(self.safe_dice): - if k == 9 and s == 1: - k_prime = 14 - self.matrix_safe[k,k_prime] += p - elif k == 2 and s > 0: - p /= 2 - k_prime = 10 - self.matrix_safe[k,k_prime] += p - k_prime = 3 - self.matrix_safe[k,k_prime] += p - else: - k_prime = k + s - k_prime = min(14, k_prime) - self.matrix_safe[k,k_prime] += p - - return self.matrix_safe - - def _compute_normal_matrix(self, layout, circle): - for k in range(15): - for s, p in enumerate(self.normal_dice): - if k == 8 and s == 2: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - continue - elif k == 9 and s in [1, 2]: - if not circle or s == 1: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue - - # handle the fast lane - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 3 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - continue - - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo - if layout[k_prime] in [1, 2]: - p /= 2 - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p - continue - self.matrix_normal[k,k_prime] += p - return self.matrix_normal - - def _compute_risky_matrix(self, layout, circle): - for k in range(15): - for s, p in enumerate(self.risky_dice): - if k == 7 and s == 3: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - continue - elif k == 8 and s in [2, 3]: - if not circle or s == 2: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif k == 9 and s in [1, 2, 3]: - if not circle or s == 1: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif circle and s == 3: - k_prime = 1 - if layout[k_prime] != 0: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - self.matrix_risky[k,k_prime] += p - continue - continue - - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - else: - self.matrix_risky[k,k_prime] += p - k_prime = 3 + (s - 1) - self.matrix_risky[k,k_prime] += p - continue - - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) - if layout[k_prime] in [1, 2]: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - continue - self.matrix_risky[k,k_prime] += p - return self.matrix_risky - - def print_matrix_with_layout(self, title, matrix): - print(f"{title}:") - for i in range(matrix.shape[0]): - row_str = " | ".join(f"{matrix[i, j]:.3f}" for j in range(matrix.shape[1])) - print(row_str) - print() - -# Example Usage: -layout_example = [0]*15 -calculator = TransitionMatrixCalculator() -print(calculator.compute_transition_matrix(layout_example, circle=True)) - -#tmc = TransitionMatrixCalculator() -#tmc.tst_transition_matrix() diff --git a/ancien/validationnnnn.py b/ancien/validationnnnn.py deleted file mode 100644 index 86f6ff7dcb173d0e454286f0753ac9778621a214..0000000000000000000000000000000000000000 --- a/ancien/validationnnnn.py +++ /dev/null @@ -1,258 +0,0 @@ -import random as rd -import numpy as np -import matplotlib.pyplot as plt -from ancien.tmc import TransitionMatrixCalculator as tmc -from ancien.markovDecision import MarkovDecisionSolver as mD - -class validation: - def __init__(self, layout, circle=False): - - # import from other .PY - self.layout = layout - self.circle = circle - self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - solver = mD(self.layout, self.circle) - self.expec, self.optimal_policy = solver.solve() - - # Define all the strategy - self.optimal_strategy = self.optimal_policy - self.safe_strategy = [1]*len(layout) - self.normal_strategy = [2]*len(layout) - self.risky_strategy = [3]*len(layout) - self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] - - # Définir les coûts par case et par type de dé - self.costs_by_dice_type = { - 'SafeDice': [0] * len(self.layout), - 'NormalDice': [0] * len(self.layout), - 'RiskyDice': [0] * len(self.layout) - } - - # Remplir les coûts pour chaque case en fonction du type de dé - for i in range(len(self.layout)): - if self.layout[i] == 3: - self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr - self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal - self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué - - - def simulate_game(self, strategy, n_iterations=10000): - transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] - number_turns = [] - - for _ in range(n_iterations): - total_turns = 0 - k = 0 # état initial - - while k < len(self.layout) - 1: - action = strategy[k] # action selon la stratégie - - # Convertir action en entier pour accéder à l'indice correct dans transition_matrices - action_index = int(action) - 1 - transition_matrix = transition_matrices[action_index] - - # Aplatir la matrice de transition en une distribution de probabilité 1D - flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités - - # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie - k = np.random.choice(len(self.layout), p=flattened_probs) - - # Mise à jour du nombre de tours en fonction de l'état actuel - if self.layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[k] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - number_turns.append(total_turns) - - return np.mean(number_turns) - - def simulate_state(self, strategy, layout, circle, n_iterations=10000): - # Compute transition matrices for each dice - tmc_instance = tmc() - P_safe = tmc_instance._compute_safe_matrix() - P_normal = tmc_instance._compute_normal_matrix(layout, circle) - P_risky = tmc_instance._compute_risky_matrix(layout, circle) - - transition_matrices = [P_safe, P_normal, P_risky] - number_turns = [] - number_mean = [] - - for _ in range(n_iterations): - number_turns = [] - - for state in range(len(layout) - 1): - total_turns = 0 - k = state # starting state - - while k < len(layout) - 1: - action = strategy[k] # action based on strategy - action_index = int(action) - 1 - transition_matrix = transition_matrices[action_index] - flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) - k = np.random.choice(len(layout), p=flattened_probs) - - if layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif layout[k] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - number_turns.append(total_turns) - - number_mean.append(number_turns) - - # calculate the average number of turns for each state - mean_turns = np.mean(number_mean, axis=0) - - return mean_turns - - - def play_optimal_strategy(self, n_iterations=10000): - return self.simulate_game(self.optimal_strategy, n_iterations) - - - def play_dice_strategy(self, dice_choice, n_iterations=10000): - if dice_choice == 'SafeDice': - strategy = self.safe_strategy - elif dice_choice == 'NormalDice': - strategy = self.normal_strategy - elif dice_choice == 'RiskyDice': - strategy = self.risky_strategy - else: - raise ValueError("Invalid dice choice") - - return self.simulate_game(strategy, n_iterations) - - def play_random_strategy(self, n_iterations=10000): - return self.simulate_game(self.random_strategy, n_iterations) - - def play_empirical_strategy(self): - k = 0 # état initial - total_turns = 0 - - while k < len(self.layout) - 1: - action = self.optimal_strategy[k] # Utiliser la stratégie empirique pour la simulation - action_index = int(action) - 1 - transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique - - # Aplatir la matrice de transition en une distribution de probabilité 1D - flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités - - # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie - k = np.random.choice(len(self.layout), p=flattened_probs) - - # Mise à jour du nombre de tours en fonction de l'état actuel - if self.layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[k] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - return total_turns - - - def compare_empirical_vs_value_iteration(self, num_games=1000): - value_iteration_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle) - empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) - - # Calculer la moyenne des tours pour chaque état - mean_turns_by_state = { - 'ValueIteration': value_iteration_turns.tolist(), - 'Empirical': empirical_turns.tolist() - } - - return mean_turns_by_state - - def compare_state_based_turns(self, num_games=1000): - optimal_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) - empirical_turns = self.simulate_state(self.optimal_strategy, self.layout, self.circle, n_iterations=num_games) - - return optimal_turns, empirical_turns - - - - def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) - dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) - dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) - dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) - random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) - - return { - 'Optimal': optimal_cost, - 'SafeDice': dice1_cost, - 'NormalDice': dice2_cost, - 'RiskyDice': dice3_cost, - 'Random': random_cost - } - - -# Utilisation d'exemple -layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -circle = False -validation_instance = validation(layout, circle) - - -# Comparer la stratégie empirique avec la stratégie de value iteration -turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) - -# Imprimer les moyennes des tours pour chaque état -num_states = len(layout) -for state in range(num_states - 1): - print(f"État {state}:") - print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") - print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") - -# Exécuter la stratégie empirique une fois -empirical_strategy_result = validation_instance.play_empirical_strategy() -print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) - -# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux -comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) -print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) -print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) - - -optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000) -print("Optimal Strategy Cost:", optimal_cost) - -dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) -print("Normal Dice Strategy Cost:", dice2_cost) - -random_cost = validation_instance.play_random_strategy(n_iterations=10000) -print("Random Strategy Cost:", random_cost) - -strategy_comparison = validation_instance.compare_strategies(num_games=10000) -print("Strategy Comparison Results:", strategy_comparison) - - -optimal_strategy = validation_instance.optimal_strategy -mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000) -print("Mean Turns for Optimal Strategy:", mean_turns_optimal) - -safe_dice_strategy = validation_instance.safe_strategy -mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000) -print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) - -normal_dice_strategy = validation_instance.normal_strategy -mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000) -print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) - -risky_dice_strategy = validation_instance.risky_strategy -mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000) -print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) - -random_dice_strategy = validation_instance.random_strategy -mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) -print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) diff --git a/plot.py b/plot.py index cf39b576bc99b711873c63d3e3dda18396fc4d88..d84149546fac9a9fac0dc55711825368bae7b652 100644 --- a/plot.py +++ b/plot.py @@ -1,5 +1,5 @@ import matplotlib.pyplot as plt -from validation import validation +from validation import Validation as Val import numpy as np # Example layout and circle settings @@ -7,11 +7,11 @@ layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] circle = False # Create an instance of validation -validation_instance = validation(layout, circle) +validation_instance = Val(layout, circle) # Plotting function for strategy comparison -def plot_strategy_comparison(num_games=1000): +def plot_strategy_comparison(num_games=10000): strategy_costs = validation_instance.compare_strategies(num_games=num_games) # Bar plot for strategy comparison @@ -48,7 +48,7 @@ def plot_state_based_turns(save=True): plt.show() -def plot_state_based_comparison(validation_instance, num_games=100000): +def plot_state_based_comparison(validation_instance, num_games=10000): optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) # Plotting the state-based average turns comparison @@ -74,9 +74,9 @@ def plot_state_based_comparison(validation_instance, num_games=100000): # Main function to generate and save plots if __name__ == '__main__': # Example of strategy comparison plot - plot_strategy_comparison(num_games=1000) + plot_strategy_comparison(num_games=10000) # Example of state-based average turns plot for all strategies on the same plot plot_state_based_turns(save=True) - plot_state_based_comparison(validation_instance, num_games=1000) \ No newline at end of file + plot_state_based_comparison(validation_instance, num_games=10000) \ No newline at end of file diff --git a/strategy_comparison.png b/strategy_comparison.png index 089723dd698d20af173f314912b2e90a01d89143..bcabdaa67b4519b11c1a7cfcc24ba2a35e5327d1 100644 Binary files a/strategy_comparison.png and b/strategy_comparison.png differ diff --git a/validation.py b/validation.py index c86c66327041c52901ea052ef827118f32af3a18..02e656b03b8a06c2bc8a1f9fb14f3b0c7a958904 100644 --- a/validation.py +++ b/validation.py @@ -1,42 +1,36 @@ import random as rd import numpy as np -import matplotlib.pyplot as plt from tmc import TransitionMatrixCalculator as tmc from markovDecision import MarkovDecisionSolver as mD -class validation: - def __init__(self, layout, circle=False): - # import from other .PY +class Validation: + def __init__(self, layout, circle=False): self.layout = layout self.circle = circle self.tmc_instance = tmc() self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) + solver = mD(self.layout, self.circle) self.expec, self.optimal_policy = solver.solve() - # Define all the strategy - self.safe_strategy = [1]*len(layout) - self.normal_strategy = [2]*len(layout) - self.risky_strategy = [3]*len(layout) - self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] + self.safe_strategy = [1] * len(layout) + self.normal_strategy = [2] * len(layout) + self.risky_strategy = [3] * len(layout) + self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))] - # Définir les coûts par case et par type de dé self.costs_by_dice_type = { - 'SafeDice': [0] * len(self.layout), - 'NormalDice': [0] * len(self.layout), - 'RiskyDice': [0] * len(self.layout) + 'SafeDice': [0] * len(layout), + 'NormalDice': [0] * len(layout), + 'RiskyDice': [0] * len(layout) } - - # Remplir les coûts pour chaque case en fonction du type de dé - for i in range(len(self.layout)): - if self.layout[i] == 3: - self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr - self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal - self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué - + + for i, die_type in enumerate(self.layout): + self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0 + self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0 + self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0 def simulate_game(self, strategy, n_iterations=10000): transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] @@ -44,23 +38,18 @@ class validation: for _ in range(n_iterations): total_turns = 0 - k = 0 # état initial + k = 0 # initial state while k < len(self.layout) - 1: - action = strategy[k] # action selon la stratégie - - # Convertir action en entier pour accéder à l'indice correct dans transition_matrices + action = strategy[k] action_index = int(action) - 1 transition_matrix = transition_matrices[action_index] - # Aplatir la matrice de transition en une distribution de probabilité 1D flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + flattened_probs /= np.sum(flattened_probs) - # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie k = np.random.choice(len(self.layout), p=flattened_probs) - # Mise à jour du nombre de tours en fonction de l'état actuel if self.layout[k] == 3 and action == 2: total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 elif self.layout[k] == 3 and action == 3: @@ -73,9 +62,8 @@ class validation: return np.mean(number_turns) def simulate_state(self, strategy, layout, circle, n_iterations=10000): - # Compute transition matrices for each dice safe_dice = self.tmc_instance._compute_safe_matrix() - normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] # Make sure to capture only the normal_dice component + normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0] transition_matrices = [safe_dice, normal_dice, risky_dice] @@ -87,14 +75,16 @@ class validation: for state in range(len(layout) - 1): total_turns = 0 - k = state # starting state + k = state while k < len(layout) - 1: - action = strategy[k] # action based on strategy + action = strategy[k] action_index = int(action) - 1 transition_matrix = transition_matrices[action_index] + flattened_probs = transition_matrix[k] flattened_probs /= np.sum(flattened_probs) + k = np.random.choice(len(layout), p=flattened_probs) if layout[k] == 3 and action == 2: @@ -110,47 +100,40 @@ class validation: # calculate the average number of turns for each state mean_turns = np.mean(number_mean, axis=0) - return mean_turns - - def play_optimal_policy(self, n_iterations=10000): return self.simulate_game(self.optimal_policy, n_iterations) - def play_dice_strategy(self, dice_choice, n_iterations=10000): - if dice_choice == 'SafeDice': - strategy = self.safe_strategy - elif dice_choice == 'NormalDice': - strategy = self.normal_strategy - elif dice_choice == 'RiskyDice': - strategy = self.risky_strategy - else: + strategy = { + 'SafeDice': self.safe_strategy, + 'NormalDice': self.normal_strategy, + 'RiskyDice': self.risky_strategy + }.get(dice_choice, None) + + if strategy is None: raise ValueError("Invalid dice choice") return self.simulate_game(strategy, n_iterations) def play_random_strategy(self, n_iterations=10000): return self.simulate_game(self.random_strategy, n_iterations) - + def play_empirical_strategy(self): - k = 0 # état initial + k = 0 total_turns = 0 while k < len(self.layout) - 1: - action = self.optimal_policy[k] # Utiliser la stratégie empirique pour la simulation + action = self.optimal_policy[k] action_index = int(action) - 1 - transition_matrix = self.normal_dice # Utiliser le dé normal pour la stratégie empirique + transition_matrix = self.normal_dice - # Aplatir la matrice de transition en une distribution de probabilité 1D flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + flattened_probs /= np.sum(flattened_probs) - # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie k = np.random.choice(len(self.layout), p=flattened_probs) - # Mise à jour du nombre de tours en fonction de l'état actuel if self.layout[k] == 3 and action == 2: total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 elif self.layout[k] == 3 and action == 3: @@ -160,30 +143,24 @@ class validation: return total_turns - - def compare_empirical_vs_value_iteration(self, num_games=1000): - value_iteration_turns = self.expec - empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) - - # Calculate the mean turns for each state - mean_turns_by_state = { - 'ValueIteration': value_iteration_turns.tolist(), - 'Empirical': empirical_turns.tolist() - } - - return mean_turns_by_state - - - - def compare_state_based_turns(self, num_games=100000): - value_iteration = self.expec + def compare_empirical_vs_value_iteration(self, num_games=10000): + value_iteration_turns = self.expec empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) - return value_iteration, empirical_turns + mean_turns_by_state = { + 'ValueIteration': value_iteration_turns.tolist(), + 'Empirical': empirical_turns.tolist() + } + + return mean_turns_by_state + def compare_state_based_turns(self, num_games=10000): + value_iteration = self.expec + empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) + return value_iteration, empirical_turns - def compare_strategies(self, num_games=100000): + def compare_strategies(self, num_games=10000): optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) @@ -197,72 +174,71 @@ class validation: 'RiskyDice': dice3_cost, 'Random': random_cost } - + """ -# Utilisation d'exemple +# Exemple d'utilisation layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] circle = False -validation_instance = validation(layout, circle) +validation_instance = Validation(layout, circle) +# Comparaison entre la stratégie empirique et la value iteration +turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) -# Comparer la stratégie empirique avec la stratégie de value iteration -turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) - -# Imprimer les moyennes des tours pour chaque état - +# Affichage des moyennes de tours pour chaque état num_states = len(layout) for state in range(num_states - 1): print(f"État {state}:") print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}") print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}") - -# Exécuter la stratégie empirique une fois +# Exécution de la stratégie empirique une fois empirical_strategy_result = validation_instance.play_empirical_strategy() print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result) -# Comparer la stratégie empirique avec la stratégie de value iteration sur plusieurs jeux -comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000) +# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux +comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000) print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration']) print("Coût moyen de la stratégie empirique :", comparison_result['Empirical']) -optimal_cost = validation_instance.play_optimal_policy(n_iterations=10000) +# Coûts des différentes stratégies +optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000) print("Optimal Strategy Cost:", optimal_cost) -dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=10000) +dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000) print("Safe Dice Strategy Cost:", dice1_cost) -dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) +dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000) print("Normal Dice Strategy Cost:", dice2_cost) -dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=10000) +dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000) print("Risky Dice Strategy Cost:", dice3_cost) -random_cost = validation_instance.play_random_strategy(n_iterations=10000) +random_cost = validation_instance.play_random_strategy(n_iterations=1000000) print("Random Strategy Cost:", random_cost) -strategy_comparison = validation_instance.compare_strategies(num_games=10000) +# Comparaison entre les stratégies +strategy_comparison = validation_instance.compare_strategies(num_games=1000000) print("Strategy Comparison Results:", strategy_comparison) - +# Calcul des tours moyens pour différentes stratégies optimal_policy = validation_instance.optimal_policy -mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=10000) +mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000) print("Mean Turns for Optimal Strategy:", mean_turns_optimal) safe_dice_strategy = validation_instance.safe_strategy -mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000) +mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000) print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) normal_dice_strategy = validation_instance.normal_strategy -mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000) +mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000) print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) risky_dice_strategy = validation_instance.risky_strategy -mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000) +mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000) print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) random_dice_strategy = validation_instance.random_strategy -mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) +mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000) print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) """ \ No newline at end of file