first step of validation.py

7707bd45 · Adrien Payen · 306411a2 · 7707bd45 · 7707bd45 · 7707bd45
--- a/markovDecision.py
+++ b/markovDecision.py
@@ -4,7 +4,7 @@ from tmc import TransitionMatrixCalculator as tmc

 # testing our TransitionMatrix function based on random layout
 # [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-def markovDecision(layout, circle) :
+def markovDecision(layout : list, circle : bool) :

    Numberk = 15 # Number of states k on the board
    tmc_instance = tmc()

--- a/test_files/md_test.py
+++ b/test_files/md_test.py
+import numpy as np
+from tmc import TransitionMatrixCalculator as tmc
+
+def markov_decision(layout: list, circle: bool):
+    Numberk = 15
+    tmc_instance = tmc()
+    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
+    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
+    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
+    
+    jail = [i for i, x in enumerate(layout) if x == 3]
+    
+    def compute_value(v, dice_matrix):
+        return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])
+
+
+    value = np.zeros(Numberk)
+    dice_for_states = np.zeros(Numberk - 1)
+
+    while True:
+        new_value = np.zeros(Numberk)
+
+        for k in range(Numberk - 1):
+            vi_safe = compute_value(value, safe_dice[k])
+            vi_normal = compute_value(value, normal_dice[k])
+            vi_risky = compute_value(value, risky_dice[k])
+
+            new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
+            dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)
+
+        if np.allclose(new_value, value):
+            value = new_value
+            break
+
+        value = new_value
+
+    return value[:-1], dice_for_states
+
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+print("\nWin as soon as land on or overstep the final square")
+print(markov_decision(layout, False))
+print("\nStopping on the square to win")
+print(markov_decision(layout, True))
--- a/test_files/tmc_test.py
+++ b/test_files/tmc_test.py
+import numpy as np
+import random as rd
+
+class TransitionMatrixCalculator:
+    def __init__(self):
+        # Probabilités de transition pour les dés "safe", "normal" et "risky"
+        self.safe_dice = np.array([1/2, 1/2])
+        self.normal_dice = np.array([1/3, 1/3, 1/3])
+        self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
+    
+    def compute_transition_matrix(self, layout: list, circle: bool):
+        size = len(layout)
+        matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe')
+        matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal')
+        matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky')
+        return matrix_safe, matrix_normal, matrix_risky
+
+    def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str):
+        transition_matrix = np.zeros((size, size))
+        dice_type = None
+
+        if matrix_type == 'safe':
+            dice_type = self.safe_dice
+        elif matrix_type == 'normal':
+            dice_type = self.normal_dice
+        elif matrix_type == 'risky':
+            dice_type = self.risky_dice
+
+        for k in range(size):
+            for s, p in enumerate(dice_probs):
+                k_prime = (k + s) % size if circle else min(size - 1, k + s)
+
+                if k == 9 and s == 1 and matrix_type == 'safe':
+                    k_prime = size - 1
+                elif k == 2 and s > 0 and matrix_type == 'safe':
+                    p /= 2
+                    k_prime = 10 + s - 1
+                    if layout[k_prime] == 1:
+                        k_prime = 0
+                    elif layout[k_prime] == 2:
+                        k_prime = max(0, k_prime - 3)
+                elif k == 7 and s == 3 and matrix_type == 'risky':
+                    k_prime = size - 1
+                elif k == 8 and s in [2, 3] and matrix_type == 'risky':
+                    if circle or s == 2:
+                        k_prime = size - 1
+                    else:
+                        k_prime = 0
+                elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky':
+                    if not circle or s == 1:
+                        k_prime = size - 1
+                    elif circle and s == 2:
+                        k_prime = 0
+                    elif circle and s == 3:
+                        k_prime = 1
+                        if layout[k_prime] in [1, 2]:
+                            k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0
+
+                transition_matrix[k, k_prime] += p * dice_type[s]
+
+        return transition_matrix
+
+    def generate_arrays(self,n):
+        arrays = []
+        for _ in range(n):
+            array = np.zeros(15, dtype=int)
+            indices = rd.sample(range(1, 14), 3)
+            array[indices] = 1, 2, 3
+            arrays.append(array)
+        return arrays
+    
+    def tst_transition_matrix(self):
+        layouts = self.generate_arrays(1000)
+        for array in layouts:
+            print(array)
+            self.compute_transition_matrix(array, False)
+            self.compute_transition_matrix(array, True)
+
+#tmc = TransitionMatrixCalculator()
+#tmc.tst_transition_matrix()
--- a/validation.py
+++ b/validation.py
@@ -2,4 +2,56 @@ import random as rd
 import numpy as np
 import matplotlib.pyplot as plt
 from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import markovDecion as mD
+from markovDecision import markovDecision as mD
+
+class EmpiricalComparision :
+    def __init__(self) : 
+        return
+        
+
+    def simulation(strategy, layout : list, circle, nIter : int) :
+        tmc_instance = tmc()
+        safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
+        normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
+        risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
+        matrices_transition = [safe_dice, normal_dice, risky_dice]
+        nTurns = []
+        turns = 0
+        for _ in range(nIter) : 
+            turns = 0
+            k = 0
+            while k < len(layout)-1 :
+                action = strategy[k]
+                transitionMatrix  = matrices_transition[int(action -1)]
+                k = np.rd.choice(len(layout), p = transitionMatrix[k])
+                if layout[k] == 3 and action == 2 : 
+                    turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
+                elif layout[k] == 3 and action == 3 :
+                    turns += 2
+                else :
+                    turns += 1
+            nTurns.append(turns)
+        
+        return np.mean(nTurns)
+
+    
+    def plot(layouts : list, circle, nIter : int) :
+        Markov = []
+        Safe = []
+        Normal = []
+        Risky = []
+        Random = []
+        for layout in layouts :
+            expec, policy = mD(layout, circle)
+            # Simulate the game
+
+        return
+
+
+
+
+
+
+layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
+results(layout, False, 1000000)
+results(layout, True, 1000000)
\ No newline at end of file