From f78dbd5f5f743ae2a93882236f0287966a6cf97c Mon Sep 17 00:00:00 2001
From: Adrienucl <adrien.payen@student.uclouvain.be>
Date: Thu, 2 May 2024 21:18:44 +0200
Subject: [PATCH] update validation.py

---
 .DS_Store                | Bin 6148 -> 6148 bytes
 markovDecision.py        |   2 +-
 test_files/validation.py | 173 ---------------------------------------
 tmc.py                   |   2 +-
 validation.py            | 131 +++++++++++++++++++++++++++++
 5 files changed, 133 insertions(+), 175 deletions(-)
 delete mode 100644 test_files/validation.py
 create mode 100644 validation.py

diff --git a/.DS_Store b/.DS_Store
index 837b005cf49930c5ead4ac78ca4b5abed2e970f5..895bd9eae285819cae90199894867d4ed1ce3958 100644
GIT binary patch
delta 36
scmZoMXfc@J&&ahgU^g=(*JK`+?#)M8?lEq@%cjLRu_0qKJI7ys0OGI=t^fc4

delta 79
zcmZoMXfc@J&&abeU^g=(&tx8!ZccWF0)`xhe1?+A0c;{njIx_MSl%*<@-mb$Bm%`V
b8B&0B36Rcb$b+cp-kio}$+(%F<1aq|5PK76

diff --git a/markovDecision.py b/markovDecision.py
index 1383600..6bd17bc 100644
--- a/markovDecision.py
+++ b/markovDecision.py
@@ -5,7 +5,7 @@ class MarkovDecisionSolver:
     def __init__(self, layout : list, circle : bool):
         self.Numberk = 15
         self.tmc_instance = tmc()
-        self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
         self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
         self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
         self.jail = [i for i, x in enumerate(layout) if x == 3]
diff --git a/test_files/validation.py b/test_files/validation.py
deleted file mode 100644
index de8dd96..0000000
--- a/test_files/validation.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import random
-import numpy as np
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator
-from markovDecision import MarkovDecisionSolver as mD
-
-class Validation:
-    def __init__(self, layout, circle=False):
-        self.layout = layout
-        self.circle = circle
-        self.tmc_instance = TransitionMatrixCalculator()
-
-        # Compute optimal value iteration results
-        solver = mD(self.layout, self.circle)
-        self.optimal_values, self.optimal_dice = solver.solve()
-
-    def simulate_game(self, strategy='optimal', num_games=1000):
-        total_turns = 0
-
-        for _ in range(num_games):
-            if strategy == 'Optimal':
-                turns = self.play_optimal_strategy()
-            elif strategy == 'SafeDice':
-                turns = self.play_dice_strategy(1)
-            elif strategy == 'NormalDice':
-                turns = self.play_dice_strategy(2)
-            elif strategy == 'RiskyDice':
-                turns = self.play_dice_strategy(3)
-            elif strategy == 'Random':
-                turns = self.play_random_strategy()
-
-            total_turns += turns
-
-        average_turns = total_turns / num_games
-        return average_turns
-
-    def play_optimal_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            optimal_action = int(self.optimal_dice[current_state])  # Get the optimal action for the current state
-            current_state += optimal_action  # Move to the next state based on the optimal action
-            turns += 1
-
-        return turns
-
-    def play_dice_strategy(self, dice):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Always use the specified dice type (1, 2, or 3)
-            current_state += dice
-            turns += 1
-
-        return turns
-
-    def play_random_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Choose a random dice roll between 1 and 3
-            dice_roll = np.random.randint(1, 4)
-            current_state += dice_roll
-            turns += 1
-
-        return turns
-
-    def compare_strategies(self, num_games=1000):
-        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
-        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
-        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
-        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
-        random_cost = self.simulate_game(strategy='Random', num_games=num_games)
-
-        return {
-            'Optimal': optimal_cost,
-            'SafeDice': dice1_cost,
-            'NormalDice': dice2_cost,
-            'RiskyDice': dice3_cost,
-            'Random': random_cost
-        }
-
-    def play_one_turn(self, dice_choice, cur_pos, prison):
-        if cur_pos == len(self.layout) - 1:
-            return len(self.layout) - 1, False
-    
-        if prison:
-            return cur_pos, False
-    
-        # Convert dice_choice to integer to avoid TypeError
-        dice_choice = int(dice_choice)
-    
-        list_dice_results = [i for i in range(dice_choice + 1)]
-        result = random.choice(list_dice_results)
-    
-        if cur_pos == 2 and result != 0:
-            slow_lane = random.choice([0, 1])
-            if slow_lane:
-                new_pos = cur_pos + result
-            else:
-                new_pos = cur_pos + result + 7
-    
-        elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
-            new_pos = cur_pos + result + 4
-    
-        else:
-            new_pos = cur_pos + result
-    
-        if new_pos > len(self.layout) - 1:
-            if self.circle:
-                new_pos -= len(self.layout)
-            else:
-                return len(self.layout) - 1, True
-    
-        new_square = self.layout[new_pos]
-    
-        if dice_choice == 1:
-            return new_pos, False
-    
-        elif dice_choice == 2:
-            new_square = random.choice([0, new_square])
-    
-        if new_square == 0:
-            return new_pos, False  # nothing happens
-        elif new_square == 1:
-            return 0, False  # back to square one
-        elif new_square == 2:
-            if new_pos - 3 < 0:
-                return 0, False  # back to square one
-            return new_pos - 3, False  # back 3 squares
-        elif new_square == 3:
-            return new_pos, True  # prison
-    
-
-    def play_one_game(self, start=0):
-        n_turns = 0
-        cur_pos = start
-        prison = False
-
-        if self.circle:
-            while cur_pos != len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                if new_pos > len(self.layout) - 1:
-                    cur_pos = len(self.layout) - new_pos
-                cur_pos = new_pos
-                n_turns += 1
-        else:
-            while cur_pos < len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                cur_pos = new_pos
-                n_turns += 1
-
-        return n_turns
-
-    def empirical_results(self):
-        total_turns_played = 0
-        for _ in range(10000):
-            n_turns = self.play_one_game()
-            total_turns_played += n_turns
-
-        return total_turns_played / 10000
-    
-
-# Utilisation d'exemple
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-validation = Validation(layout, circle=False)
-results = validation.compare_strategies(num_games=10000)
-print("Coûts moyens :")
-for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")
diff --git a/tmc.py b/tmc.py
index 6f78a3d..5941ed4 100644
--- a/tmc.py
+++ b/tmc.py
@@ -24,7 +24,7 @@ class TransitionMatrixCalculator:
         return self.matrix_safe, self.matrix_normal, self.matrix_risky
 
 
-    def _compute_safe_matrix(self, layout, circle):
+    def _compute_safe_matrix(self):
         for k in range(0,15):
             for s, p in enumerate(self.safe_dice):
                 if k == 9 and s == 1:
diff --git a/validation.py b/validation.py
new file mode 100644
index 0000000..8f94f24
--- /dev/null
+++ b/validation.py
@@ -0,0 +1,131 @@
+import random as rd
+import numpy as np
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import MarkovDecisionSolver as mD
+
+class validation:
+    def __init__(self, layout, circle=False):
+
+        # import from other .PY
+        self.layout = layout
+        self.circle = circle
+        self.tmc_instance = tmc()
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        solver = mD(self.layout, self.circle)
+        self.expec, self.optimal_policy = solver.solve()
+
+        # Define all the strategy
+        self.optimal_strategy = self.optimal_policy
+        self.safe_strategy = [1]*15
+        self.normal_strategy = [2]*15
+        self.risky_strategy = [3]*15
+        self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
+
+
+    def simulate_game(self, strategy, n_iterations=10000):
+        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+        number_turns = []
+
+        for _ in range(n_iterations):
+            total_turns = 0
+            k = 0  # état initial
+
+            while k < len(self.layout) - 1:
+                action = strategy[k]  # action selon la stratégie
+
+                # Convertir action en entier pour accéder à l'indice correct dans transition_matrices
+                action_index = int(action) - 1
+                transition_matrix = transition_matrices[action_index]
+
+                #print(f"Current state (k): {k}, Action chosen: {action}")
+                #print(f"Transition matrix: {transition_matrix}")
+
+                # Aplatir la matrice de transition en une distribution de probabilité 1D
+                flattened_probs = transition_matrix[k]
+                flattened_probs /= np.sum(flattened_probs)  # Normalisation des probabilités
+
+                # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
+                k = np.random.choice(len(self.layout), p=flattened_probs)
+
+                # Mise à jour du nombre de tours en fonction de l'état actuel
+                if self.layout[k] == 3 and action == 2:
+                    total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
+                elif self.layout[k] == 3 and action == 3:
+                    total_turns += 2
+                else:
+                    total_turns += 1
+
+            number_turns.append(total_turns)
+
+        return np.mean(number_turns)
+
+
+    def play_optimal_strategy(self, n_iterations=10000):
+        return self.simulate_game(self.optimal_policy, n_iterations)
+
+
+    def play_dice_strategy(self, dice_choice, n_iterations=10000):
+        if dice_choice == 'SafeDice':
+            strategy = self.safe_strategy
+        elif dice_choice == 'NormalDice':
+            strategy = self.normal_strategy
+        elif dice_choice == 'RiskyDice':
+            strategy = self.risky_strategy
+        else:
+            raise ValueError("Invalid dice choice")
+
+        return self.simulate_game(strategy, n_iterations)
+
+    def play_random_strategy(self, n_iterations=10000):
+        return self.simulate_game(self.random_strategy, n_iterations)
+
+    def compare_strategies(self, num_games=1000):
+        optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
+        dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
+        dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
+        dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
+        random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
+
+        return {
+            'Optimal': optimal_cost,
+            'SafeDice': dice1_cost,
+            'NormalDice': dice2_cost,
+            'RiskyDice': dice3_cost,
+            'Random': random_cost
+        }
+    
+
+    
+
+# Utilisation d'exemple
+layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
+validation = validation(layout, circle=False)
+
+circle = False  # Example circle value
+"""
+# Create an instance of validation
+validator = validation(layout, circle)
+
+# Use the methods
+validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
+
+
+results = validation.compare_strategies(num_games=10000)
+print("Coûts moyens :")
+for strategy, cost in results.items():
+    print(f"{strategy}: {cost}")"""
+
+optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
+print("Optimal Strategy Cost:", optimal_cost)
+
+dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
+print("Normal Dice Strategy Cost:", dice2_cost)
+
+random_cost = validation.play_random_strategy(n_iterations=10000)
+print("Random Strategy Cost:", random_cost)
+
+strategy_comparison = validation.compare_strategies(num_games=10000)
+print("Strategy Comparison Results:", strategy_comparison)
-- 
GitLab