update markov.py

2cccc7b8 · Adrien Payen · 40d8a99e · 2cccc7b8 · 2cccc7b8 · 2cccc7b8
--- a/markov.py.py
+++ b/markov.py.py
+import numpy as np
+from tmc import TransitionMatrixCalculator as tmc
+
+class MarkovDecisionSolver:
+    def __init__(self, layout: list, circle: bool):
+        self.Numberk = 15
+        self.tmc_instance = tmc()
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        self.jail = [i for i, x in enumerate(layout) if x == 3]
+        self.Dice = np.zeros(self.Numberk)
+
+    def solve(self):
+        ValueI = np.zeros(self.Numberk)
+        ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0])
+
+        i = 0
+        while i < 1000:  # Limiter le nombre d'itérations pour éviter une boucle infinie
+            i += 1
+
+            # Copiez la valeur actuelle dans ValueI
+            np.copyto(ValueI, ValueINew)
+
+            # Mettez à jour les valeurs de ValueINew pour chaque état
+            for k in range(self.Numberk - 1):
+                ValueINew[k] = 1 + min(
+                    np.dot(self.safe_dice[k], ValueI),
+                    np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]),
+                    np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail])
+                )
+
+            ValueINew[self.Numberk - 1] = min(
+                np.dot(self.safe_dice[self.Numberk - 1], ValueI),
+                np.dot(self.normal_dice[self.Numberk - 1], ValueI),
+                np.dot(self.risky_dice[self.Numberk - 1], ValueI)
+            )
+
+            # Calculer les actions optimales (indice de l'action + 1)
+            for k in range(self.Numberk):
+                self.Dice[k] = np.argmin([
+                    np.dot(self.safe_dice[k], ValueI),
+                    np.dot(self.normal_dice[k], ValueI) + np.sum(self.normal_dice[k][self.jail]),
+                    np.dot(self.risky_dice[k], ValueI) + np.sum(self.risky_dice[k][self.jail]),
+                ]) + 1
+
+            # Vérifiez la convergence en utilisant une petite tolérance
+            if np.sum(np.abs(ValueINew - ValueI)) < 1e-9:
+                break
+
+        # Retourne les valeurs finales de ValueINew et les actions optimales (Dice)
+        return ValueINew, self.Dice
+
+
+def markovDecision(layout : list, circle : bool):
+    solver = MarkovDecisionSolver(layout, circle)
+    return solver.solve()
+
+
+# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+
+
+# Résolution du problème avec différents modes de jeu
+result_false = markovDecision(layout, circle=False)
+print("\nWin as soon as land on or overstep the final square")
+print(result_false)
+
+result_true = markovDecision(layout, circle=True)
+print("\nStopping on the square to win")
+print(result_true)
--- a/markovDecision.py
+++ b/markovDecision.py
@@ -60,7 +60,7 @@ def markovDecision(layout : list, circle : bool):
 # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
 layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]

-"""
+
 # Résolution du problème avec différents modes de jeu
 result_false = markovDecision(layout, circle=False)
 print("\nWin as soon as land on or overstep the final square")
@@ -69,4 +69,3 @@ print(result_false)
 result_true = markovDecision(layout, circle=True)
 print("\nStopping on the square to win")
 print(result_true)
-"""
\ No newline at end of file
--- a/tmc.py
+++ b/tmc.py
@@ -17,7 +17,7 @@ class TransitionMatrixCalculator:
        self.matrix_normal.fill(0)
        self.matrix_risky.fill(0)

-        self._compute_safe_matrix(layout, circle)
+        self._compute_safe_matrix()
        self._compute_normal_matrix(layout, circle)
        self._compute_risky_matrix(layout, circle)

@@ -192,36 +192,6 @@ class TransitionMatrixCalculator:
                        continue
                self.matrix_risky[k,k_prime] += p
        return self.matrix_risky
-    
-
-    def generate_arrays(self,n):
-        # Initialize an empty list to store all the arrays
-        arrays = []
-
-        for _ in range(n):
-            # Initialize a zero array of size 15
-            array = np.zeros(15, dtype=int)
-
-            # Generate 3 random indices between 1 and 13 (exclusive)
-            indices = rd.sample(range(1, 14), 3)
-
-            # Assign the values 1, 2 and 3 to the randomly generated indices
-            array[indices] = 1, 2, 3
-
-            # Append the generated array to the list
-            arrays.append(array)
-
-        return arrays
-    
-    # create a function that test the transition matrix for different layout each time with one trap of each sort
-    def tst_transition_matrix(self):
-        # create a list of 100 different layouts
-        layouts = self.generate_arrays(100)
-        for array in layouts:
-            print(array)
-            self.compute_transition_matrix(array, False)
-            self.compute_transition_matrix(array, True)
-

 #tmc = TransitionMatrixCalculator()
 #tmc.tst_transition_matrix()