update markovDecision

4fdf8e94 · Adrien Payen · e66a9a26 · 4fdf8e94 · 4fdf8e94 · 4fdf8e94
--- a/__pycache__/tmc.cpython-312.pyc
+++ b/__pycache__/tmc.cpython-312.pyc
--- a/markovDecion.py
+++ b/markovDecion.py
 import numpy as np
-from tmc import *
+from tmc import TransitionMatrixCalculator as tmc
-from tmc import TransitionMatrixCalculator
 # testing our TransitionMatrix function based on random layout
 # [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
 def markovDecision(layout, circle) :
-    layout = 15
+    Numberk = 15 # Number of states k on the board
+    tmc_instance = tmc()
+    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
+    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
+    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-layout = np.array([0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0])
+    # Initialisation of the variables before the iteration
+    ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
+    jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
+    DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
+    i = 0 # set the iteration of Value
+    while True :
+        ValueINew = np.zeros(Numberk)
+        i += 1 # iter + 1
+        for k in range(Numberk - 1) :
+            vi_safe = np.sum(safe_dice[k] * ValueI)
+            vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
+            vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(normal_dice[k][jail]) # 100% chance of triggering the trap
+            ValueINew = 1 + min(vi_safe,vi_normal,vi_risky)
+            if ValueINew[k] == 1 + vi_safe :
+                DiceForStates[k] = 1
+            elif ValueINew[k] == 1 + vi_normal :
+                DiceForStates[k] = 2
+            else :
+                DiceForStates[k] = 3
+        if np.allclose(ValueINew, ValueI) :
+            ValueI = ValueINew
+            break
+        ValueI = ValueINew
+    Expec = ValueI[:-1]
+    return [Expec, DiceForStates]
+layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
 print(markovDecision(layout, False))
 print(markovDecision(layout, True))
--- a/tmc.py
+++ b/tmc.py
@@ -9,9 +9,12 @@ class TransitionMatrixCalculator:
        self.matrix_normal = np.zeros((15, 15))
        self.matrix_risky = np.zeros((15, 15))
        # Probability to go from state k to k'
-        self.safe_dice = np.array([1/2,1/2])
+        safe_dice = np.array([1/2, 1/2])
-        self.normal_dice = np.array([1/3,1/3,1/3])
+        normal_dice = np.array([1/3, 1/3, 1/3])
-        self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
+        risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
+        self.safe_dice = safe_dice 
+        self.normal_dice = normal_dice
+        self.risky_dice = risky_dice
    def compute_transition_matrix(self, layout, circle=False):
        self.matrix_safe.fill(0)
@@ -41,7 +44,7 @@ class TransitionMatrixCalculator:
                    k_prime = k + s
                    k_prime = min(14, k_prime)
                    self.matrix_safe[k,k_prime] += p
        return self.matrix_safe
    def _compute_normal_matrix(self, layout, circle):