From c682c832f3ee9924b998c15c3c23f5a5784b54ab Mon Sep 17 00:00:00 2001
From: Adrienucl <adrien.payen@student.uclouvain.be>
Date: Thu, 2 May 2024 23:40:48 +0200
Subject: [PATCH] update files

---
 test_files/markovDecision_testing.py |  51 ------------
 test_files/md_test.py                |  43 ----------
 test_files/plot.py                   |  45 -----------
 test_files/tmc_test.py               |  80 ------------------
 validation.py                        | 116 ++++++++++++++++++++-------
 5 files changed, 88 insertions(+), 247 deletions(-)
 delete mode 100644 test_files/markovDecision_testing.py
 delete mode 100644 test_files/md_test.py
 delete mode 100644 test_files/plot.py
 delete mode 100644 test_files/tmc_test.py

diff --git a/test_files/markovDecision_testing.py b/test_files/markovDecision_testing.py
deleted file mode 100644
index 39e9e26..0000000
--- a/test_files/markovDecision_testing.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import numpy as np
-from tmc import TransitionMatrixCalculator as tmc
-
-
-# testing our TransitionMatrix function based on random layout
-# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-def markovDecision(layout : list, circle : bool) :
-
-    Numberk = 15 # Number of states k on the board
-    tmc_instance = tmc()
-    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
-    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
-    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-
-    # Initialisation of the variables before the iteration
-    ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
-    jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
-    DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
-    i = 0 # set the iteration of Value
-
-    while True :
-        ValueINew = np.zeros(Numberk)
-        i += 1 # iter + 1
-
-        for k in range(Numberk - 1) :
-            vi_safe = np.sum(safe_dice[k] * ValueI)
-            vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
-            vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
-            ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
-        
-            if ValueINew[k] == 1 + vi_safe :
-                DiceForStates[k] = 1
-            elif ValueINew[k] == 1 + vi_normal :
-                DiceForStates[k] = 2
-            else :
-                DiceForStates[k] = 3
-
-        if np.allclose(ValueINew, ValueI) :
-            ValueI = ValueINew
-            break
-
-        ValueI = ValueINew
-
-    Expec = ValueI[:-1]
-    return [Expec, DiceForStates]
-
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-print("\nWin as soon as land on or overstep the final square")
-print(markovDecision(layout, False))
-print("\nStopping on the square to win")
-print(markovDecision(layout, True))
diff --git a/test_files/md_test.py b/test_files/md_test.py
deleted file mode 100644
index 722766b..0000000
--- a/test_files/md_test.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import numpy as np
-from tmc import TransitionMatrixCalculator as tmc
-
-def markov_decision(layout: list, circle: bool):
-    Numberk = 15
-    tmc_instance = tmc()
-    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
-    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
-    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-    
-    jail = [i for i, x in enumerate(layout) if x == 3]
-    
-    def compute_value(v, dice_matrix):
-        return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])
-
-
-    value = np.zeros(Numberk)
-    dice_for_states = np.zeros(Numberk - 1)
-
-    while True:
-        new_value = np.zeros(Numberk)
-
-        for k in range(Numberk - 1):
-            vi_safe = compute_value(value, safe_dice[k])
-            vi_normal = compute_value(value, normal_dice[k])
-            vi_risky = compute_value(value, risky_dice[k])
-
-            new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
-            dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)
-
-        if np.allclose(new_value, value):
-            value = new_value
-            break
-
-        value = new_value
-
-    return value[:-1], dice_for_states
-
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-print("\nWin as soon as land on or overstep the final square")
-print(markov_decision(layout, False))
-print("\nStopping on the square to win")
-print(markov_decision(layout, True))
diff --git a/test_files/plot.py b/test_files/plot.py
deleted file mode 100644
index 9de7974..0000000
--- a/test_files/plot.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import numpy as np
-import random as rd
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import MarkovDecisionSolver as mD
-from validation import Validation
-
-def make_plots():
-    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-    circle = False
-    validation = Validation(layout, circle)
-    expec, optimal_policy = mD(layout, circle).solve()
-
-    # Plot 1: Theoretical vs Empirical Cost
-    expected_costs = np.zeros(len(expec))
-    for start_square in range(len(expec)):
-        total_turns = 0
-        for _ in range(10000):
-            total_turns += validation.play_one_game(start_square)
-        expected_costs[start_square] = total_turns / 10000
-
-    squares = np.arange(len(expec))
-    plt.plot(squares, expec, label="Theoretical cost")
-    plt.plot(squares, expected_costs, label="Empirical cost")
-    plt.xticks(np.arange(0, len(expec), step=1))
-    plt.grid(True)
-    plt.xlabel("Square")
-    plt.ylabel("Cost")
-    plt.legend()
-    plt.title("Comparison between the expected cost and the actual cost")
-    plt.show()
-
-    # Plot 2: Expected number of turns for different policies
-    policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))]
-    avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies]
-    names = ["optimal", "safe", "normal", "risky", "random"]
-    plt.bar(names, avgn_turns)
-    plt.xlabel("Policy")
-    plt.ylabel("Cost")
-    plt.title("Expected number of turns for different policies")
-    plt.show()
-
-# Call make_plots function
-if __name__ == "__main__":
-    make_plots()
diff --git a/test_files/tmc_test.py b/test_files/tmc_test.py
deleted file mode 100644
index 461afbb..0000000
--- a/test_files/tmc_test.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import numpy as np
-import random as rd
-
-class TransitionMatrixCalculator:
-    def __init__(self):
-        # Probabilités de transition pour les dés "safe", "normal" et "risky"
-        self.safe_dice = np.array([1/2, 1/2])
-        self.normal_dice = np.array([1/3, 1/3, 1/3])
-        self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
-    
-    def compute_transition_matrix(self, layout: list, circle: bool):
-        size = len(layout)
-        matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe')
-        matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal')
-        matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky')
-        return matrix_safe, matrix_normal, matrix_risky
-
-    def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str):
-        transition_matrix = np.zeros((size, size))
-        dice_type = None
-
-        if matrix_type == 'safe':
-            dice_type = self.safe_dice
-        elif matrix_type == 'normal':
-            dice_type = self.normal_dice
-        elif matrix_type == 'risky':
-            dice_type = self.risky_dice
-
-        for k in range(size):
-            for s, p in enumerate(dice_probs):
-                k_prime = (k + s) % size if circle else min(size - 1, k + s)
-
-                if k == 9 and s == 1 and matrix_type == 'safe':
-                    k_prime = size - 1
-                elif k == 2 and s > 0 and matrix_type == 'safe':
-                    p /= 2
-                    k_prime = 10 + s - 1
-                    if layout[k_prime] == 1:
-                        k_prime = 0
-                    elif layout[k_prime] == 2:
-                        k_prime = max(0, k_prime - 3)
-                elif k == 7 and s == 3 and matrix_type == 'risky':
-                    k_prime = size - 1
-                elif k == 8 and s in [2, 3] and matrix_type == 'risky':
-                    if circle or s == 2:
-                        k_prime = size - 1
-                    else:
-                        k_prime = 0
-                elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky':
-                    if not circle or s == 1:
-                        k_prime = size - 1
-                    elif circle and s == 2:
-                        k_prime = 0
-                    elif circle and s == 3:
-                        k_prime = 1
-                        if layout[k_prime] in [1, 2]:
-                            k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0
-
-                transition_matrix[k, k_prime] += p * dice_type[s]
-
-        return transition_matrix
-
-    def generate_arrays(self,n):
-        arrays = []
-        for _ in range(n):
-            array = np.zeros(15, dtype=int)
-            indices = rd.sample(range(1, 14), 3)
-            array[indices] = 1, 2, 3
-            arrays.append(array)
-        return arrays
-    
-    def tst_transition_matrix(self):
-        layouts = self.generate_arrays(1000)
-        for array in layouts:
-            print(array)
-            self.compute_transition_matrix(array, False)
-            self.compute_transition_matrix(array, True)
-
-#tmc = TransitionMatrixCalculator()
-#tmc.tst_transition_matrix()
diff --git a/validation.py b/validation.py
index 8f94f24..85cd231 100644
--- a/validation.py
+++ b/validation.py
@@ -19,11 +19,25 @@ class validation:
 
         # Define all the strategy
         self.optimal_strategy = self.optimal_policy
-        self.safe_strategy = [1]*15
-        self.normal_strategy = [2]*15
-        self.risky_strategy = [3]*15
+        self.safe_strategy = [1]*len(layout)
+        self.normal_strategy = [2]*len(layout)
+        self.risky_strategy = [3]*len(layout)
         self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
 
+        # Définir les coûts par case et par type de dé
+        self.costs_by_dice_type = {
+            'SafeDice': [0] * len(self.layout),
+            'NormalDice': [0] * len(self.layout),
+            'RiskyDice': [0] * len(self.layout)
+        }
+
+        # Remplir les coûts pour chaque case en fonction du type de dé
+        for i in range(len(self.layout)):
+            if self.layout[i] == 3:
+                self.costs_by_dice_type['SafeDice'][i] = 1  # Coût par défaut pour le dé sûr
+                self.costs_by_dice_type['NormalDice'][i] = 2  # Coût par défaut pour le dé normal
+                self.costs_by_dice_type['RiskyDice'][i] = 3  # Coût par défaut pour le dé risqué
+
 
     def simulate_game(self, strategy, n_iterations=10000):
         transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
@@ -40,9 +54,6 @@ class validation:
                 action_index = int(action) - 1
                 transition_matrix = transition_matrices[action_index]
 
-                #print(f"Current state (k): {k}, Action chosen: {action}")
-                #print(f"Transition matrix: {transition_matrix}")
-
                 # Aplatir la matrice de transition en une distribution de probabilité 1D
                 flattened_probs = transition_matrix[k]
                 flattened_probs /= np.sum(flattened_probs)  # Normalisation des probabilités
@@ -62,9 +73,51 @@ class validation:
 
         return np.mean(number_turns)
 
+    def simulate_state(self, strategy, layout, circle, n_iterations=10000):
+        # Compute transition matrices for each dice
+        tmc_instance = tmc()
+        P_safe = tmc_instance._compute_safe_matrix()
+        P_normal = tmc_instance._compute_normal_matrix(layout, circle)
+        P_risky = tmc_instance._compute_risky_matrix(layout, circle)
+
+        transition_matrices = [P_safe, P_normal, P_risky]
+        number_turns = []
+        number_mean = []
+
+        for _ in range(n_iterations):
+            number_turns = []
+
+            for state in range(len(layout) - 1):
+                total_turns = 0
+                k = state  # starting state
+
+                while k < len(layout) - 1:
+                    action = strategy[k]  # action based on strategy
+                    action_index = int(action) - 1
+                    transition_matrix = transition_matrices[action_index]
+                    flattened_probs = transition_matrix[k]
+                    flattened_probs /= np.sum(flattened_probs)
+                    k = np.random.choice(len(layout), p=flattened_probs)
+
+                    if layout[k] == 3 and action == 2:
+                        total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
+                    elif layout[k] == 3 and action == 3:
+                        total_turns += 2
+                    else:
+                        total_turns += 1
+
+                number_turns.append(total_turns)
+
+            number_mean.append(number_turns)
+
+        # calculate the average number of turns for each state
+        mean_turns = np.mean(number_mean, axis=0)
+
+        return mean_turns
+
 
     def play_optimal_strategy(self, n_iterations=10000):
-        return self.simulate_game(self.optimal_policy, n_iterations)
+        return self.simulate_game(self.optimal_strategy, n_iterations)
 
 
     def play_dice_strategy(self, dice_choice, n_iterations=10000):
@@ -98,34 +151,41 @@ class validation:
         }
     
 
-    
-
 # Utilisation d'exemple
-layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
-validation = validation(layout, circle=False)
-
-circle = False  # Example circle value
-"""
-# Create an instance of validation
-validator = validation(layout, circle)
-
-# Use the methods
-validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
-
+layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+circle = False
+validation_instance = validation(layout, circle)
 
-results = validation.compare_strategies(num_games=10000)
-print("Coûts moyens :")
-for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")"""
 
-optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
+optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
 print("Optimal Strategy Cost:", optimal_cost)
 
-dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
+dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
 print("Normal Dice Strategy Cost:", dice2_cost)
 
-random_cost = validation.play_random_strategy(n_iterations=10000)
+random_cost = validation_instance.play_random_strategy(n_iterations=10000)
 print("Random Strategy Cost:", random_cost)
 
-strategy_comparison = validation.compare_strategies(num_games=10000)
+strategy_comparison = validation_instance.compare_strategies(num_games=10000)
 print("Strategy Comparison Results:", strategy_comparison)
+
+
+optimal_strategy = validation_instance.optimal_strategy
+mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
+print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
+
+safe_dice_strategy = validation_instance.safe_strategy
+mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
+print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
+
+normal_dice_strategy = validation_instance.normal_strategy
+mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
+print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
+
+risky_dice_strategy = validation_instance.risky_strategy
+mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
+print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
+
+random_dice_strategy = validation_instance.random_strategy
+mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
+print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
-- 
GitLab