update files

4897f074 · Adrien Payen · 04f5b4f1 · 4897f074 · 4897f074 · 4897f074
--- a/plot.py
+++ b/plot.py
@@ -2,34 +2,44 @@ import numpy as np
 import random as rd
 import matplotlib.pyplot as plt
 from tmc import TransitionMatrixCalculator as tmc
-from test_files.markovDecision_testing import markovDecision as mD
+from markovDecision import MarkovDecisionSolver as mD
 from validation import Validation
-def plot_results(validation_instance):
+def make_plots():
-    results_markov = validation_instance.simulate_game('markov')
+    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-    results_safe = validation_instance.simulate_game([1]*15)
+    circle = False
-    results_normal = validation_instance.simulate_game([2]*15)
+    validation = Validation(layout, circle)
-    results_risky = validation_instance.simulate_game([3]*15)
+    expec, optimal_policy = mD(layout, circle).solve()
-    results_random = validation_instance.simulate_game(np.random.randint(1, 4, size=15))
-    plt.figure(figsize=(12, 8))
+    # Plot 1: Theoretical vs Empirical Cost
-    plt.plot(range(len(validation_instance.layouts)), results_markov, label='Markov')
+    expected_costs = np.zeros(len(expec))
-    plt.plot(range(len(validation_instance.layouts)), results_safe, label='SafeDice')
+    for start_square in range(len(expec)):
-    plt.plot(range(len(validation_instance.layouts)), results_normal, label='NormalDice')
+        total_turns = 0
-    plt.plot(range(len(validation_instance.layouts)), results_risky, label='RiskyDice')
+        for _ in range(10000):
-    plt.plot(range(len(validation_instance.layouts)), results_random, label='Random')
+            total_turns += validation.play_one_game(start_square)
+        expected_costs[start_square] = total_turns / 10000
-    plt.xticks(range(len(validation_instance.layouts)), range(len(validation_instance.layouts)))
+    squares = np.arange(len(expec))
-    plt.xlabel('Layout Number', fontsize=13)
+    plt.plot(squares, expec, label="Theoretical cost")
-    plt.ylabel('Average Number of Turns', fontsize=13)
+    plt.plot(squares, expected_costs, label="Empirical cost")
-    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
+    plt.xticks(np.arange(0, len(expec), step=1))
+    plt.grid(True)
+    plt.xlabel("Square")
+    plt.ylabel("Cost")
+    plt.legend()
+    plt.title("Comparison between the expected cost and the actual cost")
    plt.show()
-# Example usage
+    # Plot 2: Expected number of turns for different policies
-layouts = [
+    policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))]
-    [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0],
+    avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies]
-    # Add more layouts as needed
+    names = ["optimal", "safe", "normal", "risky", "random"]
-]
+    plt.bar(names, avgn_turns)
+    plt.xlabel("Policy")
+    plt.ylabel("Cost")
+    plt.title("Expected number of turns for different policies")
+    plt.show()
-validation_instance = Validation(layouts, circle=False, n_iterations=10000)
+# Call make_plots function
-plot_results(validation_instance)
+if __name__ == "__main__":
\ No newline at end of file
+    make_plots()
--- a/validation.py
+++ b/validation.py
+import random
 import numpy as np
+import matplotlib.pyplot as plt
 from tmc import TransitionMatrixCalculator
+from markovDecision import MarkovDecisionSolver as mD
 class Validation:
    def __init__(self, layout, circle=False):
@@ -7,6 +10,10 @@ class Validation:
        self.circle = circle
        self.tmc_instance = TransitionMatrixCalculator()
+        # Compute optimal value iteration results
+        solver = mD(self.layout, self.circle)
+        self.optimal_values, self.optimal_dice = solver.solve()
    def simulate_game(self, strategy='optimal', num_games=1000):
        total_turns = 0
@@ -28,22 +35,38 @@ class Validation:
        return average_turns
    def play_optimal_strategy(self):
-        # Implement the optimal strategy using value iteration results
+        current_state = 0  # Start from the initial state
-        # Use TransitionMatrixCalculator to compute transitions and make decisions
+        turns = 0
-        # calculer la stratégie optimale pour ou un tour 
+        while current_state < len(self.layout) - 1:
+            optimal_action = int(self.optimal_dice[current_state])  # Get the optimal action for the current state
+            current_state += optimal_action  # Move to the next state based on the optimal action
+            turns += 1
+        return turns
+    def play_dice_strategy(self, dice):
+        current_state = 0  # Start from the initial state
+        turns = 0
-        pass
+        while current_state < len(self.layout) - 1:
+            # Always use the specified dice type (1, 2, or 3)
+            current_state += dice
+            turns += 1
-    def play_dice_strategy(self, dice):
+        return turns
-        # Implement a strategy where only one type of dice is used (1, 2, or 3)
-        pass
    def play_random_strategy(self):
-        # Implement a purely random strategy
+        current_state = 0  # Start from the initial state
-        pass
+        turns = 0
+        while current_state < len(self.layout) - 1:
+            # Choose a random dice roll between 1 and 3
+            dice_roll = np.random.randint(1, 4)
+            current_state += dice_roll
+            turns += 1
+        return turns
    def compare_strategies(self, num_games=1000):
        optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
@@ -60,10 +83,82 @@ class Validation:
            'random': random_cost
        }
-# Example usage
+    def play_one_turn(self, dice_choice, cur_pos, prison):
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+        if cur_pos == len(self.layout) - 1:
-validation = Validation(layout, circle=False)
+            return len(self.layout) - 1, False
-results = validation.compare_strategies(num_games=10000)
-print("Average Costs:")
+        if prison:
-for strategy, cost in results.items():
+            return cur_pos, False
-    print(f"{strategy}: {cost}")
+        # Convert dice_choice to integer to avoid TypeError
+        dice_choice = int(dice_choice)
+        list_dice_results = [i for i in range(dice_choice + 1)]
+        result = random.choice(list_dice_results)
+        if cur_pos == 2 and result != 0:
+            slow_lane = random.choice([0, 1])
+            if slow_lane:
+                new_pos = cur_pos + result
+            else:
+                new_pos = cur_pos + result + 7
+        elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
+            new_pos = cur_pos + result + 4
+        else:
+            new_pos = cur_pos + result
+        if new_pos > len(self.layout) - 1:
+            if self.circle:
+                new_pos -= len(self.layout)
+            else:
+                return len(self.layout) - 1, True
+        new_square = self.layout[new_pos]
+        if dice_choice == 1:
+            return new_pos, False
+        elif dice_choice == 2:
+            new_square = random.choice([0, new_square])
+        if new_square == 0:
+            return new_pos, False  # nothing happens
+        elif new_square == 1:
+            return 0, False  # back to square one
+        elif new_square == 2:
+            if new_pos - 3 < 0:
+                return 0, False  # back to square one
+            return new_pos - 3, False  # back 3 squares
+        elif new_square == 3:
+            return new_pos, True  # prison
+    def play_one_game(self, start=0):
+        n_turns = 0
+        cur_pos = start
+        prison = False
+        if self.circle:
+            while cur_pos != len(self.layout) - 1:
+                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
+                if new_pos > len(self.layout) - 1:
+                    cur_pos = len(self.layout) - new_pos
+                cur_pos = new_pos
+                n_turns += 1
+        else:
+            while cur_pos < len(self.layout) - 1:
+                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
+                cur_pos = new_pos
+                n_turns += 1
+        return n_turns
+    def empirical_results(self):
+        total_turns_played = 0
+        for _ in range(10000):
+            n_turns = self.play_one_game()
+            total_turns_played += n_turns
+        return total_turns_played / 10000
\ No newline at end of file
--- a/validation_ex.py
+++ b/validation_ex.py
@@ -31,7 +31,7 @@ class Validation:
        return average_turns
    def play_optimal_strategy(self):
-        _, optimal_policy = markovDecision(self.layout, self.circle)
+        _, optimal_policy = mD(self.layout, self.circle)
        return self.empirical_results(optimal_policy.astype(int))
    def play_dice_strategy(self, dice):

--- a/validation_test.py
+++ b/validation_test.py
-import random as rd
-import numpy as np
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import MarkovDecisionSolver as mD
-class EmpiricalComparision :
-    def __init__(self) : 
-        return
-    def simulation(strategy, layout : list, circle, nIter : int) :
-        tmc_instance = tmc()
-        safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
-        normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
-        risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-        matrices_transition = [safe_dice, normal_dice, risky_dice]
-        nTurns = []
-        turns = 0
-        for _ in range(nIter) : 
-            turns = 0
-            k = 0
-            while k < len(layout)-1 :
-                action = strategy[k]
-                transitionMatrix  = matrices_transition[int(action -1)]
-                k = np.rd.choice(len(layout), p = transitionMatrix[k])
-                if layout[k] == 3 and action == 2 : 
-                    turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
-                elif layout[k] == 3 and action == 3 :
-                    turns += 2
-                else :
-                    turns += 1
-            nTurns.append(turns)
-        return np.mean(nTurns)
-    def plot(layouts : list, circle, nIter : int) :
-        Markov = []
-        Safe = []
-        Normal = []
-        Risky = []
-        Random = []
-        for layout in layouts :
-            expec, policy = mD(layout, circle)
-            # Simulate the game
-        return
-layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
-results(layout, False, 1000000)
-results(layout, True, 1000000)
\ No newline at end of file