update files

04f5b4f1 · Adrien Payen · dc96f8ad · 04f5b4f1 · 04f5b4f1 · 04f5b4f1
--- a/markovDecision.py
+++ b/markovDecision.py
 import numpy as np
 from tmc import TransitionMatrixCalculator as tmc

+class MarkovDecisionSolver:
+    def __init__(self, layout : list, circle : bool):
+        self.Numberk = 15
+        self.tmc_instance = tmc()
+        self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        self.jail = [i for i, x in enumerate(layout) if x == 3]
+        self.ValueI = np.zeros(self.Numberk)
+        self.DiceForStates = np.zeros(self.Numberk - 1)

-# testing our TransitionMatrix function based on random layout
-# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-def markovDecision(layout : list, circle : bool) :
-
-    Numberk = 15 # Number of states k on the board
-    tmc_instance = tmc()
-    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
-    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
-    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-
-    # Initialisation of the variables before the iteration
-    ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
-    jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
-    DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
-    i = 0 # set the iteration of Value
-
-    while True :
-        ValueINew = np.zeros(Numberk)
-        i += 1 # iter + 1
-
-        for k in range(Numberk - 1) :
-            vi_safe = np.sum(safe_dice[k] * ValueI)
-            vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
-            vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
-            ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
-        
-            if ValueINew[k] == 1 + vi_safe :
-                DiceForStates[k] = 1
-            elif ValueINew[k] == 1 + vi_normal :
-                DiceForStates[k] = 2
-            else :
-                DiceForStates[k] = 3
-
-        if np.allclose(ValueINew, ValueI) :
-            ValueI = ValueINew
-            break
-
-        ValueI = ValueINew
-
-    Expec = ValueI[:-1]
-    return [Expec, DiceForStates]
+    def _compute_vi_safe(self, k):
+        return np.dot(self.safe_dice[k], self.ValueI)

+    def _compute_vi_normal(self, k):
+        vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
+        return vi_normal
+
+    def _compute_vi_risky(self, k):
+        vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
+        return vi_risky
+
+    def solve(self):
+        i = 0
+        while True:
+            ValueINew = np.zeros(self.Numberk)
+            i += 1
+
+            for k in range(self.Numberk - 1):
+                vi_safe = self._compute_vi_safe(k)
+                vi_normal = self._compute_vi_normal(k)
+                vi_risky = self._compute_vi_risky(k)
+
+                ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
+
+                if ValueINew[k] == 1 + vi_safe:
+                    self.DiceForStates[k] = 1
+                elif ValueINew[k] == 1 + vi_normal:
+                    self.DiceForStates[k] = 2
+                else:
+                    self.DiceForStates[k] = 3
+
+            if np.allclose(ValueINew, self.ValueI):
+                self.ValueI = ValueINew
+                break
+
+            self.ValueI = ValueINew
+
+        Expec = self.ValueI[:-1]
+        return [Expec, self.DiceForStates]
+
+def markovDecision(layout : list, circle : bool):
+    solver = MarkovDecisionSolver(layout, circle)
+    return solver.solve()
+
+
+# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
 layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+
+# Résolution du problème avec différents modes de jeu
+result_false = markovDecision(layout, circle=False)
 print("\nWin as soon as land on or overstep the final square")
-print(markovDecision(layout, False))
+print(result_false)
+
+result_true = markovDecision(layout, circle=True)
 print("\nStopping on the square to win")
-print(markovDecision(layout, True))
+print(result_true)
--- a/plot.py
+++ b/plot.py
@@ -2,7 +2,7 @@ import numpy as np
 import random as rd
 import matplotlib.pyplot as plt
 from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import markovDecision as mD
+from test_files.markovDecision_testing import markovDecision as mD
 from validation import Validation

 def plot_results(validation_instance):

--- a/test_files/markovDecision_testing.py
+++ b/test_files/markovDecision_testing.py
+import numpy as np
+from tmc import TransitionMatrixCalculator as tmc
+
+
+# testing our TransitionMatrix function based on random layout
+# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+def markovDecision(layout : list, circle : bool) :
+
+    Numberk = 15 # Number of states k on the board
+    tmc_instance = tmc()
+    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
+    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
+    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
+
+    # Initialisation of the variables before the iteration
+    ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
+    jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
+    DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
+    i = 0 # set the iteration of Value
+
+    while True :
+        ValueINew = np.zeros(Numberk)
+        i += 1 # iter + 1
+
+        for k in range(Numberk - 1) :
+            vi_safe = np.sum(safe_dice[k] * ValueI)
+            vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
+            vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
+            ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
+        
+            if ValueINew[k] == 1 + vi_safe :
+                DiceForStates[k] = 1
+            elif ValueINew[k] == 1 + vi_normal :
+                DiceForStates[k] = 2
+            else :
+                DiceForStates[k] = 3
+
+        if np.allclose(ValueINew, ValueI) :
+            ValueI = ValueINew
+            break
+
+        ValueI = ValueINew
+
+    Expec = ValueI[:-1]
+    return [Expec, DiceForStates]
+
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+print("\nWin as soon as land on or overstep the final square")
+print(markovDecision(layout, False))
+print("\nStopping on the square to win")
+print(markovDecision(layout, True))
--- a/validation_ex.py
+++ b/validation_ex.py
@@ -2,7 +2,7 @@ import numpy as np
 from tmc import TransitionMatrixCalculator
 import random
 import matplotlib.pyplot as plt
-from markovDecision import markovDecision
+from markovDecision import MarkovDecisionSolver as mD

 class Validation:
    def __init__(self, layout, circle=False):

--- a/validation_test.py
+++ b/validation_test.py
@@ -2,7 +2,7 @@ import random as rd
 import numpy as np
 import matplotlib.pyplot as plt
 from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import markovDecision as mD
+from markovDecision import MarkovDecisionSolver as mD

 class EmpiricalComparision :
    def __init__(self) :