diff --git a/markov.py.py b/markov.py.py new file mode 100644 index 0000000000000000000000000000000000000000..286d24310db443276ac5784480fbc5317983a1b9 --- /dev/null +++ b/markov.py.py @@ -0,0 +1,71 @@ +import numpy as np +from tmc import TransitionMatrixCalculator as tmc + +class MarkovDecisionSolver: + def __init__(self, layout: list, circle: bool): + self.Numberk = 15 + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + self.jail = [i for i, x in enumerate(layout) if x == 3] + self.Dice = np.zeros(self.Numberk) + + def solve(self): + ValueI = np.zeros(self.Numberk) + ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0]) + + i = 0 + while i < 1000: # Limiter le nombre d'itérations pour éviter une boucle infinie + i += 1 + + # Copiez la valeur actuelle dans ValueI + np.copyto(ValueI, ValueINew) + + # Mettez à jour les valeurs de ValueINew pour chaque état + for k in range(self.Numberk - 1): + ValueINew[k] = 1 + min( + np.dot(self.safe_dice[k], ValueI), + np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]), + np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail]) + ) + + ValueINew[self.Numberk - 1] = min( + np.dot(self.safe_dice[self.Numberk - 1], ValueI), + np.dot(self.normal_dice[self.Numberk - 1], ValueI), + np.dot(self.risky_dice[self.Numberk - 1], ValueI) + ) + + # Calculer les actions optimales (indice de l'action + 1) + for k in range(self.Numberk): + self.Dice[k] = np.argmin([ + np.dot(self.safe_dice[k], ValueI), + np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]), + np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail]), + ]) + 1 + + # Vérifiez la convergence en utilisant une petite tolérance + if np.sum(np.abs(ValueINew - ValueI)) < 1e-9: + break + + # Retourne les valeurs finales de ValueINew et les actions optimales (Dice) + return ValueINew, self.Dice + + +def markovDecision(layout : list, circle : bool): + solver = MarkovDecisionSolver(layout, circle) + return solver.solve() + + +# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + + +# Résolution du problème avec différents modes de jeu +result_false = markovDecision(layout, circle=False) +print("\nWin as soon as land on or overstep the final square") +print(result_false) + +result_true = markovDecision(layout, circle=True) +print("\nStopping on the square to win") +print(result_true) diff --git a/markovDecision.py b/markovDecision.py index 25c5df10a8e77dafcd1fff2f03375bceda329a18..e8e68be0cf9a32e0fdacd134d21008f43fb19295 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -60,7 +60,7 @@ def markovDecision(layout : list, circle : bool): # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -""" + # Résolution du problème avec différents modes de jeu result_false = markovDecision(layout, circle=False) print("\nWin as soon as land on or overstep the final square") @@ -69,4 +69,3 @@ print(result_false) result_true = markovDecision(layout, circle=True) print("\nStopping on the square to win") print(result_true) -""" \ No newline at end of file diff --git a/tmc.py b/tmc.py index 085fe462113a869528fec8eada34114390d0bb13..388cc13756e4a26351ad0785fc55a89793bfb6e3 100644 --- a/tmc.py +++ b/tmc.py @@ -17,7 +17,7 @@ class TransitionMatrixCalculator: self.matrix_normal.fill(0) self.matrix_risky.fill(0) - self._compute_safe_matrix(layout, circle) + self._compute_safe_matrix() self._compute_normal_matrix(layout, circle) self._compute_risky_matrix(layout, circle) @@ -192,36 +192,6 @@ class TransitionMatrixCalculator: continue self.matrix_risky[k,k_prime] += p return self.matrix_risky - - - def generate_arrays(self,n): - # Initialize an empty list to store all the arrays - arrays = [] - - for _ in range(n): - # Initialize a zero array of size 15 - array = np.zeros(15, dtype=int) - - # Generate 3 random indices between 1 and 13 (exclusive) - indices = rd.sample(range(1, 14), 3) - - # Assign the values 1, 2 and 3 to the randomly generated indices - array[indices] = 1, 2, 3 - - # Append the generated array to the list - arrays.append(array) - - return arrays - - # create a function that test the transition matrix for different layout each time with one trap of each sort - def tst_transition_matrix(self): - # create a list of 100 different layouts - layouts = self.generate_arrays(100) - for array in layouts: - print(array) - self.compute_transition_matrix(array, False) - self.compute_transition_matrix(array, True) - #tmc = TransitionMatrixCalculator() #tmc.tst_transition_matrix()