diff --git a/ancien/markoVVV.py b/ancien/markoVVV.py index 8e9292188e820228902c3b55253edbe590055f82..5cd533da36c502534d7221ab7df2d8555759fc02 100644 --- a/ancien/markoVVV.py +++ b/ancien/markoVVV.py @@ -1,18 +1,20 @@ import numpy as np -from ancien.tmc import TransitionMatrixCalculator as tmc +from tmc import TransitionMatrixCalculator as tmc class MarkovDecisionSolver: def __init__(self, layout: list, circle: bool): - self.Numberk = 15 + self.nSquares = 15 + self.precision = 1e-9 + self.layout = layout + self.circle = circle self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - self.jail = [i for i, x in enumerate(layout) if x == 3] - self.Dice = np.zeros(self.Numberk) + self.matrix_safe = self.tmc_instance._compute_safe_matrix() + self.matrix_normal, self.jail_n = self.tmc_instance._compute_normal_matrix(layout, circle) + self.matrix_risky, self.jail_r = self.tmc_instance._compute_risky_matrix(layout, circle) + self.Dice = np.zeros(self.nSquares, dtype=int) def solve(self): - ValueI = np.zeros(self.Numberk) + ValueI = np.zeros(self.nSquares) ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0]) i = 0 @@ -23,36 +25,36 @@ class MarkovDecisionSolver: np.copyto(ValueI, ValueINew) # Mettez à jour les valeurs de ValueINew pour chaque état - for k in range(self.Numberk - 1): + for k in range(self.nSquares - 1): ValueINew[k] = 1 + min( - np.dot(self.safe_dice[k], ValueI), - np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]), - np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail]) + np.dot(self.matrix_safe[k], ValueI), + np.dot(self.matrix_normal[k], ValueI) + np.sum(self.jail_n[k]), + np.dot(self.matrix_risky[k], ValueI) + np.sum(self.jail_r[k]) ) - ValueINew[self.Numberk - 1] = min( - np.dot(self.safe_dice[self.Numberk - 1], ValueI), - np.dot(self.normal_dice[self.Numberk - 1], ValueI), - np.dot(self.risky_dice[self.Numberk - 1], ValueI) + ValueINew[self.nSquares - 1] = min( + np.dot(self.matrix_safe[self.nSquares - 1], ValueI), + np.dot(self.matrix_normal[self.nSquares - 1], ValueI), + np.dot(self.matrix_risky[self.nSquares - 1], ValueI) ) # Calculer les actions optimales (indice de l'action + 1) - for k in range(self.Numberk): + for k in range(self.nSquares): self.Dice[k] = np.argmin([ - np.dot(self.safe_dice[k], ValueI), - np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]), - np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail]), + np.dot(self.matrix_safe[k], ValueINew), + np.dot(self.matrix_normal[k], ValueINew) + np.sum(self.jail_n[k]), + np.dot(self.matrix_risky[k], ValueINew) + np.sum(self.jail_r[k]), ]) + 1 # Vérifiez la convergence en utilisant une petite tolérance - if np.sum(np.abs(ValueINew - ValueI)) < 1e-9: + if np.sum(np.abs(ValueINew - ValueI)) < self.precision: break # Retourne les valeurs finales de ValueINew et les actions optimales (Dice) return ValueINew, self.Dice -def markovDecision(layout : list, circle : bool): +def markovDecision(layout: list, circle: bool): solver = MarkovDecisionSolver(layout, circle) return solver.solve() @@ -60,12 +62,17 @@ def markovDecision(layout : list, circle : bool): # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - # Résolution du problème avec différents modes de jeu result_false = markovDecision(layout, circle=False) -print("\nWin as soon as land on or overstep the final square") -print(result_false) +print("\nStopping on the square to win") +print("Expected costs for each square:") +print(result_false[0]) +print("Dice choices for each square:") +print(result_false[1]) result_true = markovDecision(layout, circle=True) -print("\nStopping on the square to win") -print(result_true) +print("\nWin as soon as land on or overstep the final square") +print("Expected costs for each square:") +print(result_true[0]) +print("Dice choices for each square:") +print(result_true[1]) diff --git a/ancien/tmcccc.py b/ancien/tmcccc.py index 388cc13756e4a26351ad0785fc55a89793bfb6e3..29086c4fa271070a1e1b4dfcfef97147ccfd0430 100644 --- a/ancien/tmcccc.py +++ b/ancien/tmcccc.py @@ -192,6 +192,18 @@ class TransitionMatrixCalculator: continue self.matrix_risky[k,k_prime] += p return self.matrix_risky + + def print_matrix_with_layout(self, title, matrix): + print(f"{title}:") + for i in range(matrix.shape[0]): + row_str = " | ".join(f"{matrix[i, j]:.3f}" for j in range(matrix.shape[1])) + print(row_str) + print() + +# Example Usage: +layout_example = [0]*15 +calculator = TransitionMatrixCalculator() +print(calculator.compute_transition_matrix(layout_example, circle=True)) #tmc = TransitionMatrixCalculator() #tmc.tst_transition_matrix() diff --git a/markovDecision.py b/markovDecision.py index 276043e74996f0f1b5e9474a859d9ce0c0bf98fe..5b6e73db540ebea4b0bee80730d5436fb738d299 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -1,28 +1,28 @@ import numpy as np -from tmc_2 import TransitionMatrixCalculator as tmc +from tmc import TransitionMatrixCalculator as tmc class MarkovDecisionSolver: def __init__(self, layout: list, circle: bool): self.Numberk = 15 self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance.proba_security_dice() - self.normal_dice, _ = self.tmc_instance.proba_normal_dice(layout, circle) # Make sure to capture only the normal_dice component - self.risky_dice, _ = self.tmc_instance.proba_risky_dice(layout, circle) # Make sure to capture only the risky_dice component + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component + self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) # Make sure to capture only the risky_dice component self.jail = [i for i, x in enumerate(layout) if x == 3] self.ValueI = np.zeros(self.Numberk) self.DiceForStates = np.zeros(self.Numberk - 1) def _compute_vi_safe(self, k): - return np.sum(self.safe_dice[k] * self.ValueI) + np.sum(self.normal_dice[k][self.jail]) + return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) def _compute_vi_normal(self, k): - vi_normal = np.sum(self.normal_dice[k] * self.ValueI) + np.sum(self.normal_dice[k][self.jail]) + vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail]) return vi_normal def _compute_vi_risky(self, k): - vi_risky = np.sum(self.risky_dice[k] * self.ValueI) + np.sum(self.risky_dice[k][self.jail]) + vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) return vi_risky def solve(self): @@ -36,15 +36,21 @@ class MarkovDecisionSolver: vi_normal = self._compute_vi_normal(k) vi_risky = self._compute_vi_risky(k) - ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky) + # Compute the minimum value among vi_safe, vi_normal, and vi_risky + min_value = min(vi_safe, vi_normal, vi_risky) - if ValueINew[k] == 1 + vi_safe: + # Find which index (safe, normal, or risky) corresponds to the minimum value + if min_value == vi_safe: + ValueINew[k] = 1 + vi_safe self.DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal: + elif min_value == vi_normal: + ValueINew[k] = 1 + vi_normal self.DiceForStates[k] = 2 else: + ValueINew[k] = 1 + vi_risky self.DiceForStates[k] = 3 + if np.allclose(ValueINew, self.ValueI): self.ValueI = ValueINew break @@ -59,7 +65,6 @@ def markovDecision(layout : list, circle : bool): return solver.solve() """ - # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] diff --git a/plot.py b/plot.py index 46b023a9f315b02706356c9ae9f1f0b62705f9fa..cf39b576bc99b711873c63d3e3dda18396fc4d88 100644 --- a/plot.py +++ b/plot.py @@ -1,5 +1,5 @@ import matplotlib.pyplot as plt -from valid import validation +from validation import validation import numpy as np # Example layout and circle settings @@ -48,7 +48,7 @@ def plot_state_based_turns(save=True): plt.show() -def plot_state_based_comparison(validation_instance, num_games=1000): +def plot_state_based_comparison(validation_instance, num_games=100000): optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games) # Plotting the state-based average turns comparison diff --git a/strategy_comparison.png b/strategy_comparison.png index d8b19f2ae884d963e9a1a6f2a97b07cd2f744384..089723dd698d20af173f314912b2e90a01d89143 100644 Binary files a/strategy_comparison.png and b/strategy_comparison.png differ diff --git a/tmc.py b/tmc.py index 04b03925db4e3697d560b113611fc74d82717283..cb3b32ba32a97c12a4ca16bab29bb09d94babedc 100644 --- a/tmc.py +++ b/tmc.py @@ -2,156 +2,149 @@ import numpy as np class TransitionMatrixCalculator: def __init__(self): - self.nSquares = 15 - self.matrix_safe = np.zeros((self.nSquares, self.nSquares)) - self.matrix_normal = np.zeros((self.nSquares, self.nSquares)) - self.matrix_risky = np.zeros((self.nSquares, self.nSquares)) + self.size = 15 + self.matrix_safe = np.zeros((self.size , self.size )) + self.matrix_normal = np.zeros((self.size , self.size )) + self.matrix_risky = np.zeros((self.size , self.size )) - def proba_security_dice(self): - proba = np.zeros((self.nSquares, self.nSquares)) + def compute_transition_matrix(self, layout, circle=False): + self.matrix_safe = self._compute_safe_matrix() + self.matrix_normal, _ = self._compute_normal_matrix(layout, circle) + self.matrix_risky, _ = self._compute_risky_matrix(layout, circle) - for i in range(self.nSquares - 1): - proba[i][i] = 0.5 - if i == 2: - proba[i][i + 1] = 0.25 # slow lane - proba[i][i + 8] = 0.25 # fast lane - elif i == 9: - proba[i][i + 5] = 0.5 - else: - proba[i][i + 1] = 0.5 + return self.matrix_safe, self.matrix_normal, self.matrix_risky - proba[self.nSquares - 1][self.nSquares - 1] = 1 - return proba - def proba_normal_dice(self, layout, circle=False): - proba = np.zeros((self.nSquares, self.nSquares)) - proba_prison = np.zeros((self.nSquares, self.nSquares)) + def _compute_safe_matrix(self): + p = np.zeros((self.size ,self.size )) + for k in range(self.size - 1): + if k == 2: + p[k,k+1] = 1/4 # slow lane + p[k,k+8] = 1/4 # fast lane + elif k == 9: + p[k,k+5] = 1/2 + else: + p[k,k+1] = 1/2 + p[k, k] = 1 - np.sum(p[k]) + p[self.size -1,self.size -1] = 1 + return p + + def _compute_normal_matrix(self, layout, circle=False): + p = np.zeros((self.size ,self.size )) + jail = np.zeros((self.size ,self.size )) - for i in range(self.nSquares - 1): - proba[i][i] = 1 / 3 - if i == 2: - proba[i][i + 1] = 1 / 6 # slow lane - proba[i][i + 2] = 1 / 6 # slow lane - proba[i][i + 8] = 1 / 6 # fast lane - proba[i][i + 9] = 1 / 6 # fast lane - elif i == 8: - proba[i][i + 1] = 1 / 3 - proba[i][i + 6] = 1 / 3 - elif i == 9: + for k in range(self.size - 1): + if k == 2: + p[k,k+1:k+3] = 1/6 # slow lane # slow lane + p[k,k+8:k+10] = 1/6 # fast lane # fast lane + elif k == 8: + p[k,k+1] = 1/3 + p[k,k+6] = 1/3 + elif k == 9: if circle: - proba[i][i + 5] = 1 / 3 - proba[i][0] = 1 / 3 + p[k,k+5] = 1/3 + p[k,0] = 1/3 else: - proba[i][i + 5] = 2 / 3 - elif i == 13: + p[k,k+5] = 2/3 + elif k == 13: if circle: - proba[i][i + 1] = 1 / 3 - proba[i][0] = 1 / 3 + p[k,k+1] = 1/3 + p[k,0] = 1/3 else: - proba[i][i + 1] = 2 / 3 + p[k,k+1] = 2/3 else: - proba[i][i + 1] = 1 / 3 - proba[i][i + 2] = 1 / 3 + p[k,k+1] = 1/3 + p[k,k+2] = 1/3 + p[k, k] = 1 - np.sum(p[k]) - for i in range(self.nSquares - 1): - for j in range(self.nSquares - 1): - case_value = layout[j] - if case_value == 1: + for k in range(self.size - 1): + for j in range(self.size - 1): + s = layout[j] + if s == 1: if j != 0: - proba[i][0] += proba[i][j] / 2 - proba[i][j] /= 2 - elif case_value == 2: - proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 2 - proba[i][j] /= 2 - elif case_value == 3: - proba_prison[i][j] = proba[i][j] / 2 - elif case_value == 4: - proba[i][j] /= 2 - if j != 0: - proba[i][0] += proba[i][j] / 6 - proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 6 - proba_prison[i][j] = proba[i][j] / 6 + p[k,0] += p[k,j]/2 + p[k,j] /= 2 + elif s == 2: + p[k,j-3 if j-3 >= 0 else 0] += p[k,j]/2 + p[k,j] /= 2 + elif s == 3: + jail[k,j] = p[k,j]/2 - proba[self.nSquares - 1][self.nSquares - 1] = 1 - return proba, proba_prison + p[self.size -1,self.size -1] = 1 + return p, jail - def proba_risky_dice(self, layout, circle=False): - proba = np.zeros((self.nSquares, self.nSquares)) - proba_prison = np.zeros((self.nSquares, self.nSquares)) + def _compute_risky_matrix(self, layout, circle=False): + p = np.zeros((self.size ,self.size )) + jail = np.zeros((self.size ,self.size )) - for i in range(self.nSquares - 1): - proba[i][i] = 1 / 4 - if i == 2: - proba[i][i + 1] = 1 / 8 # slow lane - proba[i][i + 2] = 1 / 8 # slow lane - proba[i][i + 3] = 1 / 8 # slow lane - proba[i][i + 8] = 1 / 8 # fast lane - proba[i][i + 9] = 1 / 8 # fast lane - proba[i][i + 10] = 1 / 8 # fast lane - elif i == 7: - proba[i][i + 1] = 1 / 4 - proba[i][i + 2] = 1 / 4 - proba[i][i + 7] = 1 / 4 - elif i == 8: + for k in range(self.size -1): + if k == 2: + p[k,k+1:k+4] = 1/8 # slow lane + p[k,k+8:k+11] = 1/8 # fast lane + elif k == 7: + p[k,k+1:k+3] = 1/4 + p[k,k+7] = 1/4 + elif k == 8: if circle: - proba[i][i + 1] = 1 / 4 - proba[i][i + 6] = 1 / 4 - proba[i][0] = 1 / 4 + p[k,k+1] = 1/4 + p[k,k+6] = 1/4 + p[k,0] = 1/4 else: - proba[i][i + 1] = 1 / 4 - proba[i][i + 6] = 1 / 2 - elif i == 9: + p[k,k+1] = 1/4 + p[k,k+6] = 1/2 + elif k == 9: if circle: - proba[i][i + 5] = 1 / 4 - proba[i][0] = 1 / 4 - proba[i][1] = 1 / 4 + p[k,k + 5] = 1/4 + p[k,0] = 1/4 + p[k,1] = 1/4 else: - proba[i][i + 5] = 3 / 4 - elif i == 12: + p[k,k+5] = 3/4 + elif k == 12: if circle: - proba[i][i + 1] = 1 / 4 - proba[i][i + 2] = 1 / 4 - proba[i][0] = 1 / 4 + p[k,k+1:k+3] = 1/4 + p[k,0] = 1/4 else: - proba[i][i + 1] = 1 / 4 - proba[i][i + 2] = 1 / 2 - elif i == 13: + p[k,k+1] = 1/4 + p[k,k+2] = 1/2 + elif k == 13: if circle: - proba[i][i + 1] = 1 / 4 - proba[i][0] = 1 / 4 - proba[i][1] = 1 / 4 + p[k,k+1] = 1/4 + p[k,0] = 1/4 + p[k,1] = 1/4 else: - proba[i][self.nSquares - 1] = 3 / 4 + p[k,self.size -1] = 3/4 else: - proba[i][i + 1] = 1 / 4 - proba[i][i + 2] = 1 / 4 - proba[i][i + 3] = 1 / 4 + p[k,k+1:k+4] = 1/4 + p[k, k] = 1 - np.sum(p[k]) - for i in range(self.nSquares - 1): - for j in range(self.nSquares - 1): - case_value = layout[j] - if case_value == 1: - if j != 0: - proba[i][0] += proba[i][j] - proba[i][j] = 0 - elif case_value == 2: - proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] - proba[i][j] = 0 - elif case_value == 3: - proba_prison[i][j] = proba[i][j] - elif case_value == 4: + for k in range(self.size - 1): + for j in range(self.size - 1): + s = layout[j] + if s == 1: if j != 0: - proba[i][0] += proba[i][j] / 3 - proba[i][j - 3 if j - 3 >= 0 else 0] += proba[i][j] / 3 - proba_prison[i][j] = proba[i][j] / 3 - proba[i][j] /= 3 + p[k,0] += p[k,j] + p[k,j] = 0 + elif s == 2: + p[k,j-3 if j-3 >= 0 else 0] += p[k,j] + p[k,j] = 0 + elif s == 3: + jail[k,j] = p[k,j] - proba[self.nSquares - 1][self.nSquares - 1] = 1 - return proba, proba_prison + p[self.size -1,self.size-1] = 1 + return p, jail - def compute_transition_matrix(self, layout, circle=False): - self.matrix_safe = self.proba_security_dice() - self.matrix_normal, _ = self.proba_normal_dice(layout, circle) - self.matrix_risky, _ = self.proba_risky_dice(layout, circle) +""" + def display_matrices(self): + print("Safe Matrix:") + print(self.matrix_safe) + print("\nNormal Matrix:") + print(self.matrix_normal) + print("\nRisky Matrix:") + print(self.matrix_risky) - return self.matrix_safe, self.matrix_normal, self.matrix_risky +# Example Usage: +layout_example = [0]*15 +calculator = TransitionMatrixCalculator() +calculator.compute_transition_matrix(layout_example, circle=True) +calculator.display_matrices() +""" \ No newline at end of file diff --git a/validation.py b/validation.py index 16174fc88da73ec2ce138aca2728423c980cbe89..c86c66327041c52901ea052ef827118f32af3a18 100644 --- a/validation.py +++ b/validation.py @@ -1,8 +1,8 @@ import random as rd import numpy as np import matplotlib.pyplot as plt -from tmc_2 import TransitionMatrixCalculator as tmc -from mdp import MarkovDecisionSolver as mD +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD class validation: def __init__(self, layout, circle=False): @@ -11,9 +11,9 @@ class validation: self.layout = layout self.circle = circle self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance.proba_security_dice() - self.normal_dice, _ = self.tmc_instance.proba_normal_dice(layout, circle) # Make sure to capture only the normal_dice component - self.risky_dice, _ = self.tmc_instance.proba_risky_dice(layout, circle) # Make sure to capture only the risky_dice component + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component + self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) solver = mD(self.layout, self.circle) self.expec, self.optimal_policy = solver.solve() @@ -74,9 +74,9 @@ class validation: def simulate_state(self, strategy, layout, circle, n_iterations=10000): # Compute transition matrices for each dice - safe_dice = self.tmc_instance.proba_security_dice() - normal_dice = self.tmc_instance.proba_normal_dice(layout, circle)[0] # Get only the normal dice transition matrix - risky_dice = self.tmc_instance.proba_risky_dice(layout, circle)[0] # Get only the risky dice transition matrix + safe_dice = self.tmc_instance._compute_safe_matrix() + normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0] # Make sure to capture only the normal_dice component + risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0] transition_matrices = [safe_dice, normal_dice, risky_dice] number_turns = [] @@ -162,7 +162,7 @@ class validation: def compare_empirical_vs_value_iteration(self, num_games=1000): - value_iteration_turns = self.optimal_policy + value_iteration_turns = self.expec empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) # Calculate the mean turns for each state @@ -175,7 +175,7 @@ class validation: - def compare_state_based_turns(self, num_games=1000): + def compare_state_based_turns(self, num_games=100000): value_iteration = self.expec empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games) @@ -183,7 +183,7 @@ class validation: - def compare_strategies(self, num_games=1000): + def compare_strategies(self, num_games=100000): optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) @@ -265,5 +265,4 @@ random_dice_strategy = validation_instance.random_strategy mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) - """ \ No newline at end of file