update validation.py

f78dbd5f · Adrien Payen · adf46a6b · f78dbd5f · f78dbd5f · adf46a6b
--- a/.DS_Store
+++ b/.DS_Store
--- a/markovDecision.py
+++ b/markovDecision.py
@@ -5,7 +5,7 @@ class MarkovDecisionSolver:
    def __init__(self, layout : list, circle : bool):
        self.Numberk = 15
        self.tmc_instance = tmc()
-        self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
        self.jail = [i for i, x in enumerate(layout) if x == 3]

--- a/test_files/validation.py
+++ b/test_files/validation.py
-import random
-import numpy as np
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator
-from markovDecision import MarkovDecisionSolver as mD
-
-class Validation:
-    def __init__(self, layout, circle=False):
-        self.layout = layout
-        self.circle = circle
-        self.tmc_instance = TransitionMatrixCalculator()
-
-        # Compute optimal value iteration results
-        solver = mD(self.layout, self.circle)
-        self.optimal_values, self.optimal_dice = solver.solve()
-
-    def simulate_game(self, strategy='optimal', num_games=1000):
-        total_turns = 0
-
-        for _ in range(num_games):
-            if strategy == 'Optimal':
-                turns = self.play_optimal_strategy()
-            elif strategy == 'SafeDice':
-                turns = self.play_dice_strategy(1)
-            elif strategy == 'NormalDice':
-                turns = self.play_dice_strategy(2)
-            elif strategy == 'RiskyDice':
-                turns = self.play_dice_strategy(3)
-            elif strategy == 'Random':
-                turns = self.play_random_strategy()
-
-            total_turns += turns
-
-        average_turns = total_turns / num_games
-        return average_turns
-
-    def play_optimal_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            optimal_action = int(self.optimal_dice[current_state])  # Get the optimal action for the current state
-            current_state += optimal_action  # Move to the next state based on the optimal action
-            turns += 1
-
-        return turns
-
-    def play_dice_strategy(self, dice):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Always use the specified dice type (1, 2, or 3)
-            current_state += dice
-            turns += 1
-
-        return turns
-
-    def play_random_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Choose a random dice roll between 1 and 3
-            dice_roll = np.random.randint(1, 4)
-            current_state += dice_roll
-            turns += 1
-
-        return turns
-
-    def compare_strategies(self, num_games=1000):
-        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
-        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
-        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
-        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
-        random_cost = self.simulate_game(strategy='Random', num_games=num_games)
-
-        return {
-            'Optimal': optimal_cost,
-            'SafeDice': dice1_cost,
-            'NormalDice': dice2_cost,
-            'RiskyDice': dice3_cost,
-            'Random': random_cost
-        }
-
-    def play_one_turn(self, dice_choice, cur_pos, prison):
-        if cur_pos == len(self.layout) - 1:
-            return len(self.layout) - 1, False
-    
-        if prison:
-            return cur_pos, False
-    
-        # Convert dice_choice to integer to avoid TypeError
-        dice_choice = int(dice_choice)
-    
-        list_dice_results = [i for i in range(dice_choice + 1)]
-        result = random.choice(list_dice_results)
-    
-        if cur_pos == 2 and result != 0:
-            slow_lane = random.choice([0, 1])
-            if slow_lane:
-                new_pos = cur_pos + result
-            else:
-                new_pos = cur_pos + result + 7
-    
-        elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
-            new_pos = cur_pos + result + 4
-    
-        else:
-            new_pos = cur_pos + result
-    
-        if new_pos > len(self.layout) - 1:
-            if self.circle:
-                new_pos -= len(self.layout)
-            else:
-                return len(self.layout) - 1, True
-    
-        new_square = self.layout[new_pos]
-    
-        if dice_choice == 1:
-            return new_pos, False
-    
-        elif dice_choice == 2:
-            new_square = random.choice([0, new_square])
-    
-        if new_square == 0:
-            return new_pos, False  # nothing happens
-        elif new_square == 1:
-            return 0, False  # back to square one
-        elif new_square == 2:
-            if new_pos - 3 < 0:
-                return 0, False  # back to square one
-            return new_pos - 3, False  # back 3 squares
-        elif new_square == 3:
-            return new_pos, True  # prison
-    
-
-    def play_one_game(self, start=0):
-        n_turns = 0
-        cur_pos = start
-        prison = False
-
-        if self.circle:
-            while cur_pos != len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                if new_pos > len(self.layout) - 1:
-                    cur_pos = len(self.layout) - new_pos
-                cur_pos = new_pos
-                n_turns += 1
-        else:
-            while cur_pos < len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                cur_pos = new_pos
-                n_turns += 1
-
-        return n_turns
-
-    def empirical_results(self):
-        total_turns_played = 0
-        for _ in range(10000):
-            n_turns = self.play_one_game()
-            total_turns_played += n_turns
-
-        return total_turns_played / 10000
-    
-
-# Utilisation d'exemple
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-validation = Validation(layout, circle=False)
-results = validation.compare_strategies(num_games=10000)
-print("Coûts moyens :")
-for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")
--- a/tmc.py
+++ b/tmc.py
@@ -24,7 +24,7 @@ class TransitionMatrixCalculator:
        return self.matrix_safe, self.matrix_normal, self.matrix_risky


-    def _compute_safe_matrix(self, layout, circle):
+    def _compute_safe_matrix(self):
        for k in range(0,15):
            for s, p in enumerate(self.safe_dice):
                if k == 9 and s == 1:

--- a/validation.py
+++ b/validation.py
+import random as rd
+import numpy as np
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import MarkovDecisionSolver as mD
+
+class validation:
+    def __init__(self, layout, circle=False):
+
+        # import from other .PY
+        self.layout = layout
+        self.circle = circle
+        self.tmc_instance = tmc()
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        solver = mD(self.layout, self.circle)
+        self.expec, self.optimal_policy = solver.solve()
+
+        # Define all the strategy
+        self.optimal_strategy = self.optimal_policy
+        self.safe_strategy = [1]*15
+        self.normal_strategy = [2]*15
+        self.risky_strategy = [3]*15
+        self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
+
+
+    def simulate_game(self, strategy, n_iterations=10000):
+        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+        number_turns = []
+
+        for _ in range(n_iterations):
+            total_turns = 0
+            k = 0  # état initial
+
+            while k < len(self.layout) - 1:
+                action = strategy[k]  # action selon la stratégie
+
+                # Convertir action en entier pour accéder à l'indice correct dans transition_matrices
+                action_index = int(action) - 1
+                transition_matrix = transition_matrices[action_index]
+
+                #print(f"Current state (k): {k}, Action chosen: {action}")
+                #print(f"Transition matrix: {transition_matrix}")
+
+                # Aplatir la matrice de transition en une distribution de probabilité 1D
+                flattened_probs = transition_matrix[k]
+                flattened_probs /= np.sum(flattened_probs)  # Normalisation des probabilités
+
+                # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
+                k = np.random.choice(len(self.layout), p=flattened_probs)
+
+                # Mise à jour du nombre de tours en fonction de l'état actuel
+                if self.layout[k] == 3 and action == 2:
+                    total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
+                elif self.layout[k] == 3 and action == 3:
+                    total_turns += 2
+                else:
+                    total_turns += 1
+
+            number_turns.append(total_turns)
+
+        return np.mean(number_turns)
+
+
+    def play_optimal_strategy(self, n_iterations=10000):
+        return self.simulate_game(self.optimal_policy, n_iterations)
+
+
+    def play_dice_strategy(self, dice_choice, n_iterations=10000):
+        if dice_choice == 'SafeDice':
+            strategy = self.safe_strategy
+        elif dice_choice == 'NormalDice':
+            strategy = self.normal_strategy
+        elif dice_choice == 'RiskyDice':
+            strategy = self.risky_strategy
+        else:
+            raise ValueError("Invalid dice choice")
+
+        return self.simulate_game(strategy, n_iterations)
+
+    def play_random_strategy(self, n_iterations=10000):
+        return self.simulate_game(self.random_strategy, n_iterations)
+
+    def compare_strategies(self, num_games=1000):
+        optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
+        dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
+        dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
+        dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
+        random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
+
+        return {
+            'Optimal': optimal_cost,
+            'SafeDice': dice1_cost,
+            'NormalDice': dice2_cost,
+            'RiskyDice': dice3_cost,
+            'Random': random_cost
+        }
+    
+
+    
+
+# Utilisation d'exemple
+layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
+validation = validation(layout, circle=False)
+
+circle = False  # Example circle value
+"""
+# Create an instance of validation
+validator = validation(layout, circle)
+
+# Use the methods
+validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
+
+
+results = validation.compare_strategies(num_games=10000)
+print("Coûts moyens :")
+for strategy, cost in results.items():
+    print(f"{strategy}: {cost}")"""
+
+optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
+print("Optimal Strategy Cost:", optimal_cost)
+
+dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
+print("Normal Dice Strategy Cost:", dice2_cost)
+
+random_cost = validation.play_random_strategy(n_iterations=10000)
+print("Random Strategy Cost:", random_cost)
+
+strategy_comparison = validation.compare_strategies(num_games=10000)
+print("Strategy Comparison Results:", strategy_comparison)