update plots

81121bf8 · Adrien Payen · 02b9ef6f · 81121bf8 · 81121bf8 · 81121bf8
--- a/.DS_Store
+++ b/.DS_Store
--- a/plot.py
+++ b/plot.py
+import matplotlib.pyplot as plt
+from simulate import Validation as Val
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import MarkovDecisionSolver as mD
+import random as rd
+import numpy as np
+def plot_results(layouts, circle, n_iterations=100):
+    results_markov = []
+    results_safe = []
+    results_normal = []
+    results_risky = []
+    results_random = []
+    for layout in layouts:
+        # Compute optimal policy
+        expec, policy = mD(layout, circle)
+        # Simulate game
+        result_markov = Val.simulate_game(policy, layout, circle, n_iterations)
+        results_markov.append(result_markov)
+        result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations)
+        results_safe.append(result_safe)
+        result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations)
+        results_normal.append(result_normal)
+        result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations)
+        results_risky.append(result_risky)
+        result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations)
+        results_random.append(result_random)
+    # Plot the results
+    plt.figure(figsize=(12, 8))
+    plt.plot(range(len(layouts)), results_markov, label='Markov')
+    plt.plot(range(len(layouts)), results_safe, label='Safe')
+    plt.plot(range(len(layouts)), results_normal, label='Normal')
+    plt.plot(range(len(layouts)), results_risky, label='Risky')
+    plt.plot(range(len(layouts)), results_random, label='Random')
+    plt.xticks(range(len(layouts)), range(len(layouts)))
+    plt.xlabel('Layout number', fontsize=13)
+    plt.ylabel('Average number of turns', fontsize=13)
+    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
+    plt.show()
--- a/simulate.py
+++ b/simulate.py
-import random as rd
-import numpy as np
 from tmc import TransitionMatrixCalculator as tmc
 from markovDecision import MarkovDecisionSolver as mD
+import random as rd
+import numpy as np
+class Validation:
-class Simulate:
+    def __init__(self, layout, circle=False):
-    def __init__(self, layout, circle):
        self.layout = layout
        self.circle = circle
+        # Compute transition matrices using TransitionMatrixCalculator
        self.tmc_instance = tmc()
-        self.safe_dice, self.normal_dice, self.risky_dice = self.tmc_instance.compute_transition_matrix(layout, circle)
+        self.safe_dice = self.tmc_instance._compute_safe_matrix()
-        self.transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+        # Solve Markov Decision Problem
+        solver = mD(self.layout, self.circle)
+        self.expec, self.optimal_policy = solver.solve()
+        # Define all the strategies
+        self.optimal_strategy = self.optimal_policy
+        self.safe_strategy = [1] * 15
+        self.normal_strategy = [2] * 15
+        self.risky_strategy = [3] * 15
+        self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)]
    def simulate_game(self, strategy, n_iterations=10000):
+        # Compute transition matrices for each dice
+        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
        number_turns = []
        for _ in range(n_iterations):
            total_turns = 0
            state = 0  # initial state
            while state < len(self.layout) - 1:  # until goal state is reached
                action = strategy[state]  # get action according to strategy
-                transition_matrix = self.transition_matrices[int(action) - 1]
+                transition_matrix = transition_matrices[int(action) - 1]
                state = np.random.choice(len(self.layout), p=transition_matrix[state])
                if self.layout[state] == 3 and action == 2:
-                    total_turns += rd.choice([1, 2], p=[0.5, 0.5])
+                    total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
                elif self.layout[state] == 3 and action == 3:
                    total_turns += 2
                else:
@@ -35,27 +50,29 @@ class Simulate:
        return np.mean(number_turns)
    def simulate_state(self, strategy, n_iterations=10000):
-        number_mean = []
+        # Compute transition matrices for each dice
+        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+        number_turns = []
        for _ in range(n_iterations):
-            number_turns = []
+            turns_per_state = []
+            state = 0
-            for state in range(len(self.layout) - 1):
+            while state < len(self.layout) - 1:
                total_turns = 0
+                action = strategy[state]
+                transition_matrix = transition_matrices[int(action) - 1]
+                state = np.random.choice(len(self.layout), p=transition_matrix[state])
-                while state < len(self.layout) - 1:
+                if self.layout[state] == 3 and action == 2:
-                    print("Current state:", state)
+                    total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
-                    print("Transition matrix:", transition_matrix[state])
+                elif self.layout[state] == 3 and action == 3:
-                    state = np.random.choice(len(self.layout), p=transition_matrix[state])
+                    total_turns += 2
+                else:
-                    if self.layout[state] == 3 and action == 2:
+                    total_turns += 1
-                        total_turns += rd.choice([1, 2], p=[0.5, 0.5])
-                    elif self.layout[state] == 3 and action == 3:
-                        total_turns += 2
-                    else:
-                        total_turns += 1
-                number_turns.append(total_turns)
+                turns_per_state.append(total_turns)
-            number_mean.append(number_turns)
+            number_turns.append(turns_per_state)
-        return np.mean(number_mean, axis=0)
+        return np.mean(number_turns, axis=0)