update

02b9ef6f · Adrien Payen · b775d77f · 02b9ef6f · 02b9ef6f · 02b9ef6f
--- a/markovDecision.py
+++ b/markovDecision.py
@@ -5,7 +5,7 @@ class MarkovDecisionSolver:
    def __init__(self, layout : list, circle : bool):
        self.Numberk = 15
        self.tmc_instance = tmc()
-        self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
        self.jail = [i for i, x in enumerate(layout) if x == 3]

--- a/test_files/Validation_2.py
+++ b/test_files/Validation_2.py
+import numpy as np
+import random as rd
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import MarkovDecisionSolver as mD
+
+class Validation:
+    def __init__(self):
+        self.tmc_instance = tmc()
+
+    def simulate_games(self, layout, circle, num_games):
+        results = []
+
+        for _ in range(num_games):
+            result = mD(layout, circle)
+            # Assuming result is a tuple (costs, path) and you want the last element of 'costs'
+            results.append(result[0][-1])  # Append the number of turns to reach the goal
+
+        return results
+
+    def compare_strategies(self, layout, circle, num_games):
+        optimal_results = self.simulate_games(layout, circle, num_games)
+
+        suboptimal_strategies = {
+            "Dice 1 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 1 simulation
+            "Dice 2 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 2 simulation
+            "Dice 3 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 3 simulation
+            "Mixed Random Strategy": self.simulate_games(layout, circle, num_games),  # Replace with mixed random strategy simulation
+            "Purely Random Choice": self.simulate_games(layout, circle, num_games)  # Replace with purely random choice simulation
+        }
+
+        self.plot_results(optimal_results, suboptimal_strategies)
+
+    def plot_results(self, optimal_results, suboptimal_results):
+        strategies = ["Optimal Strategy"] + list(suboptimal_results.keys())
+        avg_costs = [np.mean(optimal_results)] + [np.mean(suboptimal_results[strategy]) for strategy in suboptimal_results]
+
+        plt.figure(figsize=(10, 6))
+        plt.bar(strategies, avg_costs, color=['blue'] + ['orange'] * len(suboptimal_results))
+        plt.xlabel("Strategies")
+        plt.ylabel("Average Cost")
+        plt.title("Comparison of Strategy Performance")
+        plt.show()
+
+    def run_validation(self, layout, circle, num_games):
+        solver = mD(layout, circle)
+        theoretical_cost, optimal_dice_strategy = solver.solve()
+
+        optimal_results = self.simulate_games(layout, circle, num_games)
+        optimal_average_cost = np.mean(optimal_results)
+
+        suboptimal_strategies = {
+            "Dice 1 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 1 simulation
+            "Dice 2 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 2 simulation
+            "Dice 3 Only": self.simulate_games(layout, circle, num_games),  # Replace with Dice 3 simulation
+            "Mixed Random Strategy": self.simulate_games(layout, circle, num_games),  # Replace with mixed random strategy simulation
+            "Purely Random Choice": self.simulate_games(layout, circle, num_games)  # Replace with purely random choice simulation
+        }
+
+        self.plot_results(optimal_results, suboptimal_strategies)
+
+        print("Theoretical Expected Cost (Value Iteration):", theoretical_cost)
+        print("Empirical Average Cost (Optimal Strategy):", optimal_average_cost)
+
+        for strategy, results in suboptimal_strategies.items():
+            avg_cost = np.mean(results)
+            print(f"Empirical Average Cost ({strategy}):", avg_cost)
+
+# Exemple d'utilisation de la classe Validation
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+circle = True
+num_games = 1000
+
+validation = Validation()
+validation.run_validation(layout, circle, num_games)
--- a/plotting.py
+++ b/plotting.py
--- a/test_files/validation.py
+++ b/test_files/validation.py
-import random
-import numpy as np
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator
-from markovDecision import MarkovDecisionSolver as mD
-
-class Validation:
-    def __init__(self, layout, circle=False):
-        self.layout = layout
-        self.circle = circle
-        self.tmc_instance = TransitionMatrixCalculator()
-
-        # Compute optimal value iteration results
-        solver = mD(self.layout, self.circle)
-        self.optimal_values, self.optimal_dice = solver.solve()
-
-    def simulate_game(self, strategy='optimal', num_games=1000):
-        total_turns = 0
-
-        for _ in range(num_games):
-            if strategy == 'Optimal':
-                turns = self.play_optimal_strategy()
-            elif strategy == 'SafeDice':
-                turns = self.play_dice_strategy(1)
-            elif strategy == 'NormalDice':
-                turns = self.play_dice_strategy(2)
-            elif strategy == 'RiskyDice':
-                turns = self.play_dice_strategy(3)
-            elif strategy == 'Random':
-                turns = self.play_random_strategy()
-
-            total_turns += turns
-
-        average_turns = total_turns / num_games
-        return average_turns
-
-    def play_optimal_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            optimal_action = int(self.optimal_dice[current_state])  # Get the optimal action for the current state
-            current_state += optimal_action  # Move to the next state based on the optimal action
-            turns += 1
-
-        return turns
-
-    def play_dice_strategy(self, dice):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Always use the specified dice type (1, 2, or 3)
-            current_state += dice
-            turns += 1
-
-        return turns
-
-    def play_random_strategy(self):
-        current_state = 0  # Start from the initial state
-        turns = 0
-
-        while current_state < len(self.layout) - 1:
-            # Choose a random dice roll between 1 and 3
-            dice_roll = np.random.randint(1, 4)
-            current_state += dice_roll
-            turns += 1
-
-        return turns
-
-    def compare_strategies(self, num_games=1000):
-        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
-        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
-        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
-        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
-        random_cost = self.simulate_game(strategy='Random', num_games=num_games)
-
-        return {
-            'Optimal': optimal_cost,
-            'SafeDice': dice1_cost,
-            'NormalDice': dice2_cost,
-            'RiskyDice': dice3_cost,
-            'Random': random_cost
-        }
-
-    def play_one_turn(self, dice_choice, cur_pos, prison):
-        if cur_pos == len(self.layout) - 1:
-            return len(self.layout) - 1, False
-    
-        if prison:
-            return cur_pos, False
-    
-        # Convert dice_choice to integer to avoid TypeError
-        dice_choice = int(dice_choice)
-    
-        list_dice_results = [i for i in range(dice_choice + 1)]
-        result = random.choice(list_dice_results)
-    
-        if cur_pos == 2 and result != 0:
-            slow_lane = random.choice([0, 1])
-            if slow_lane:
-                new_pos = cur_pos + result
-            else:
-                new_pos = cur_pos + result + 7
-    
-        elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
-            new_pos = cur_pos + result + 4
-    
-        else:
-            new_pos = cur_pos + result
-    
-        if new_pos > len(self.layout) - 1:
-            if self.circle:
-                new_pos -= len(self.layout)
-            else:
-                return len(self.layout) - 1, True
-    
-        new_square = self.layout[new_pos]
-    
-        if dice_choice == 1:
-            return new_pos, False
-    
-        elif dice_choice == 2:
-            new_square = random.choice([0, new_square])
-    
-        if new_square == 0:
-            return new_pos, False  # nothing happens
-        elif new_square == 1:
-            return 0, False  # back to square one
-        elif new_square == 2:
-            if new_pos - 3 < 0:
-                return 0, False  # back to square one
-            return new_pos - 3, False  # back 3 squares
-        elif new_square == 3:
-            return new_pos, True  # prison
-    
-
-    def play_one_game(self, start=0):
-        n_turns = 0
-        cur_pos = start
-        prison = False
-
-        if self.circle:
-            while cur_pos != len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                if new_pos > len(self.layout) - 1:
-                    cur_pos = len(self.layout) - new_pos
-                cur_pos = new_pos
-                n_turns += 1
-        else:
-            while cur_pos < len(self.layout) - 1:
-                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
-                cur_pos = new_pos
-                n_turns += 1
-
-        return n_turns
-
-    def empirical_results(self):
-        total_turns_played = 0
-        for _ in range(10000):
-            n_turns = self.play_one_game()
-            total_turns_played += n_turns
-
-        return total_turns_played / 10000
-    
-
-# Utilisation d'exemple
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-validation = Validation(layout, circle=False)
-results = validation.compare_strategies(num_games=10000)
-print("Coûts moyens :")
-for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")
--- a/tmc.py
+++ b/tmc.py
@@ -7,6 +7,7 @@ class TransitionMatrixCalculator:
        self.matrix_safe = np.zeros((15, 15))
        self.matrix_normal = np.zeros((15, 15))
        self.matrix_risky = np.zeros((15, 15))
+        
        # Probability to go from state k to k'
        self.safe_dice = np.array([1/2, 1/2])
        self.normal_dice = np.array([1/3, 1/3, 1/3])
@@ -17,14 +18,14 @@ class TransitionMatrixCalculator:
        self.matrix_normal.fill(0)
        self.matrix_risky.fill(0)

-        self._compute_safe_matrix(layout, circle)
+        self._compute_safe_matrix()
        self._compute_normal_matrix(layout, circle)
        self._compute_risky_matrix(layout, circle)

        return self.matrix_safe, self.matrix_normal, self.matrix_risky


-    def _compute_safe_matrix(self, layout, circle):
+    def _compute_safe_matrix(self):
        for k in range(0,15):
            for s, p in enumerate(self.safe_dice):
                if k == 9 and s == 1:
@@ -193,7 +194,7 @@ class TransitionMatrixCalculator:
                self.matrix_risky[k,k_prime] += p
        return self.matrix_risky
    
-
+    """
    def generate_arrays(self,n):
        # Initialize an empty list to store all the arrays
        arrays = []
@@ -223,5 +224,16 @@ class TransitionMatrixCalculator:
            self.compute_transition_matrix(array, True)


-#tmc = TransitionMatrixCalculator()
-#tmc.tst_transition_matrix()
+
+
+    def tst_transition_matrix(self):
+        # create a list of 100 different layouts
+        layout = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0]
+        
+        print(self.compute_transition_matrix(layout, False))
+        print(self.compute_transition_matrix(layout, True))
+        
+
+tmc = TransitionMatrixCalculator()
+tmc.tst_transition_matrix()
+"""
\ No newline at end of file
--- a/validation.py
+++ b/validation.py
+import random as rd
+import numpy as np
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import MarkovDecisionSolver as mD
+
+class validation:
+    def __init__(self, layout, circle=False):
+
+        # import from other .PY
+        self.layout = layout
+        self.circle = circle
+        self.tmc_instance = tmc()
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        solver = mD(self.layout, self.circle)
+        self.expec, self.optimal_policy = solver.solve()
+
+        # Define all the strategy
+        self.optimal_strategy = self.optimal_policy
+        self.safe_strategy = [1]*15
+        self.normal_strategy = [2]*15
+        self.risky_strategy = [3]*15
+        self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
+
+
+
+    def simulate_game(self, strategy, n_iterations=10000):
+        # Compute transition matrices for each dice
+        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+        number_turns = []
+        for _ in range(n_iterations):
+            total_turns = 0
+            state = 0  # initial state
+            while state < len(self.layout) - 1:  # until goal state is reached
+                action = strategy[state]  # get action according to strategy
+                transition_matrix = transition_matrices[int(action - 1)]
+                state = np.random.choice(len(self.layout), p=transition_matrix[state])
+                if self.layout[state] == 3 and action == 2:
+                    total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
+                elif self.layout[state] == 3 and action == 3:
+                    total_turns += 2
+                else:
+                    total_turns += 1
+            number_turns.append(total_turns)
+        return np.mean(number_turns)
+
+
+    def play_optimal_strategy(self):
+        return turns
+
+    def play_dice_strategy(self):
+        return turns
+
+    def play_random_strategy(self):
+        return turns
+
+    def compare_strategies(self, num_games=1000):
+        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
+        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
+        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
+        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
+        random_cost = self.simulate_game(strategy='Random', num_games=num_games)
+
+        return {
+            'Optimal': optimal_cost,
+            'SafeDice': dice1_cost,
+            'NormalDice': dice2_cost,
+            'RiskyDice': dice3_cost,
+            'Random': random_cost
+        }
+    
+
+    
+
+# Utilisation d'exemple
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+validation = validation(layout, circle=False)
+
+circle = False  # Example circle value
+
+# Create an instance of validation
+validator = validation(layout, circle)
+
+# Use the methods
+validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
+
+
+results = validation.compare_strategies(num_games=10000)
+print("Coûts moyens :")
+for strategy, cost in results.items():
+    print(f"{strategy}: {cost}")