merge solution

883d5811 · Adrien Payen · 81121bf8 · 81121bf8 · 883d5811 · 883d5811
--- a/plot.py
+++ b/plot.py
-import matplotlib.pyplot as plt
-from simulate import Validation as Val
-from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import MarkovDecisionSolver as mD
-import random as rd
-import numpy as np
-def plot_results(layouts, circle, n_iterations=100):
-    results_markov = []
-    results_safe = []
-    results_normal = []
-    results_risky = []
-    results_random = []
-    for layout in layouts:
-        # Compute optimal policy
-        expec, policy = mD(layout, circle)
-        # Simulate game
-        result_markov = Val.simulate_game(policy, layout, circle, n_iterations)
-        results_markov.append(result_markov)
-        result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations)
-        results_safe.append(result_safe)
-        result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations)
-        results_normal.append(result_normal)
-        result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations)
-        results_risky.append(result_risky)
-        result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations)
-        results_random.append(result_random)
-    # Plot the results
-    plt.figure(figsize=(12, 8))
-    plt.plot(range(len(layouts)), results_markov, label='Markov')
-    plt.plot(range(len(layouts)), results_safe, label='Safe')
-    plt.plot(range(len(layouts)), results_normal, label='Normal')
-    plt.plot(range(len(layouts)), results_risky, label='Risky')
-    plt.plot(range(len(layouts)), results_random, label='Random')
-    plt.xticks(range(len(layouts)), range(len(layouts)))
-    plt.xlabel('Layout number', fontsize=13)
-    plt.ylabel('Average number of turns', fontsize=13)
-    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
-    plt.show()
--- a/simulate.py
+++ b/simulate.py
-from tmc import TransitionMatrixCalculator as tmc
+import random
-from markovDecision import MarkovDecisionSolver as mD
-import random as rd
 import numpy as np
+import matplotlib.pyplot as plt
-class Validation:
+from tmc import TransitionMatrixCalculator as tmc
-    def __init__(self, layout, circle=False):
+from markovDecision import MarkovDecisionSolver
-        self.layout = layout
-        self.circle = circle
+nSquares = 15
+nSimul = 10000
-        # Compute transition matrices using TransitionMatrixCalculator
-        self.tmc_instance = tmc()
+def playOneTurn(diceChoice, curPos, layout, circle, prison):
-        self.safe_dice = self.tmc_instance._compute_safe_matrix()
+    if curPos == nSquares - 1:
-        self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
+        return nSquares - 1, False
-        self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
+    if prison:
-        # Solve Markov Decision Problem
+        return curPos, False
-        solver = mD(self.layout, self.circle)
-        self.expec, self.optimal_policy = solver.solve()
+    listDiceResults = [i for i in range(diceChoice + 1)]
+    result = random.choice(listDiceResults)
-        # Define all the strategies
-        self.optimal_strategy = self.optimal_policy
+    if curPos == 2 and result != 0:
-        self.safe_strategy = [1] * 15
+        slowLane = random.choice([0, 1])
-        self.normal_strategy = [2] * 15
+        if slowLane:
-        self.risky_strategy = [3] * 15
+            newPos = curPos + result
-        self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)]
+        else:
+            newPos = curPos + result + 7
-    def simulate_game(self, strategy, n_iterations=10000):
+    elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)):
-        # Compute transition matrices for each dice
+        newPos = curPos + result + 4
-        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+    else:
-        number_turns = []
+        newPos = curPos + result
-        for _ in range(n_iterations):
+    if newPos > nSquares - 1:
-            total_turns = 0
+        if circle:
-            state = 0  # initial state
+            newPos -= nSquares
-            while state < len(self.layout) - 1:  # until goal state is reached
+        else:
-                action = strategy[state]  # get action according to strategy
+            return nSquares - 1, True
-                transition_matrix = transition_matrices[int(action) - 1]
-                state = np.random.choice(len(self.layout), p=transition_matrix[state])
+    newSquare = layout[newPos]
-                if self.layout[state] == 3 and action == 2:
+    if diceChoice == 1:
-                    total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
+        return newPos, False
-                elif self.layout[state] == 3 and action == 3:
+    elif diceChoice == 2:
-                    total_turns += 2
+        newSquare = random.choice([0, newSquare])
-                else:
-                    total_turns += 1
+    if newSquare == 0:
+        return newPos, False  # nothing happens
-            number_turns.append(total_turns)
+    elif newSquare == 1:
+        return 0, False  # back to square one
-        return np.mean(number_turns)
+    elif newSquare == 2:
+        if newPos - 3 < 0:
-    def simulate_state(self, strategy, n_iterations=10000):
+            return 0, False  # back to square one
-        # Compute transition matrices for each dice
+        return newPos - 3, False  # back 3 squares
-        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
+    elif newSquare == 3:
-        number_turns = []
+        return newPos, True  # prison
+    elif newSquare == 4:
-        for _ in range(n_iterations):
+        newSquare = random.choice([1, 2, 3])
-            turns_per_state = []
+        if newSquare == 1:
-            state = 0
+            return 0, False  # back to square one
+        elif newSquare == 2:
-            while state < len(self.layout) - 1:
+            if newPos - 3 < 0:
-                total_turns = 0
+                return 0, False  # back to square one
-                action = strategy[state]
+            return newPos - 3, False  # back 3 squares
-                transition_matrix = transition_matrices[int(action) - 1]
+        elif newSquare == 3:
-                state = np.random.choice(len(self.layout), p=transition_matrix[state])
+            return newPos, True  # prison
-                if self.layout[state] == 3 and action == 2:
+def playOneGame(layout, circle, policy, start=0):
-                    total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
+    nTurns = 0
-                elif self.layout[state] == 3 and action == 3:
+    curPos = start
-                    total_turns += 2
+    prison = False
-                else:
-                    total_turns += 1
+    if circle:
+        while curPos != nSquares - 1:
-                turns_per_state.append(total_turns)
+            newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
+            if newPos > nSquares - 1:
-            number_turns.append(turns_per_state)
+                curPos = nSquares - newPos
+            curPos = newPos
-        return np.mean(number_turns, axis=0)
+            nTurns += 1
+    else:
+        while curPos < nSquares - 1:
+            newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
+            curPos = newPos
+            nTurns += 1
+    return nTurns
+def empiric_cost_of_square(layout, circle, policy):
+    expected_costs = np.zeros(nSquares)
+    for start_square in range(nSquares):
+        total_turns = 0
+        for _ in range(nSimul):
+            total_turns += playOneGame(layout, circle, policy, start=start_square)
+        expected_costs[start_square] = total_turns / nSimul
+    return expected_costs
+def empirical_results(layout, circle, policy):
+    avgnTurnsPlayed = 0
+    for _ in range(nSimul):
+        nTurns = playOneGame(layout, circle, policy)
+        avgnTurnsPlayed += nTurns
+    return avgnTurnsPlayed / nSimul
+def comparison_theorical_empirical(layout, circle):
+    solver = MarkovDecisionSolver(layout, circle)
+    expec, optimal_policy = solver.solve()
+    actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int))
+    # Plotting both arrays on the same plot
+    squares = np.arange(len(expec))
+    plt.plot(squares, expec, label="Theoretical cost")
+    plt.plot(squares, actual, label="Empirical cost")
+    plt.xticks(np.arange(0, len(expec), step=1))
+    plt.grid(True)
+    plt.xlabel("Square")
+    plt.ylabel("Cost")
+    plt.legend()
+    plt.title("Comparison between the expected cost and the actual cost")
+    plt.show()
+def comparison_of_policies_total(layout, circle):
+    solver = MarkovDecisionSolver(layout, circle)
+    _, optimal_policy = solver.solve()
+    policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
+                np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
+                np.random.randint(1, 4, size=nSquares)]
+    avgnTurns = [empirical_results(layout, circle, policy) for policy in policies]
+    names = ["optimal", "safe", "normal", "risky", "random"]
+    # Creating the bar plot
+    plt.bar(names, avgnTurns)
+    # Adding labels and title
+    plt.xlabel("Policy")
+    plt.ylabel("Cost")
+    plt.title("Expected number of turns by policy")
+    # Displaying the plot
+    plt.show()
+def comparison_of_policies_squares(layout, circle):
+    solver = MarkovDecisionSolver(layout, circle)
+    _, optimal_policy = solver.solve()
+    policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
+                np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
+                np.random.randint(1, 4, size=nSquares)]
+    avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies]
+    # Generating x-axis values (squares)
+    squares = np.arange(len(avgnTurns[0]))
+    # Plotting both arrays on the same plot
+    plt.plot(squares, avgnTurns[0], label="Optimal")
+    plt.plot(squares, avgnTurns[1], label="Safe")
+    plt.plot(squares, avgnTurns[2], label="Normal")
+    plt.plot(squares, avgnTurns[3], label="Risky")
+    plt.plot(squares, avgnTurns[4], label="Random")
+    plt.xticks(np.arange(0, len(avgnTurns[0]), step=1))
+    plt.grid(True)
+    plt.xlabel("Square")
+    plt.ylabel("Cost")
+    plt.legend()
+    plt.title("Expected cost for different policies")
+    plt.show()
+def make_plots():
+    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+    circle = False
+    comparison_theorical_empirical(layout, circle)
+    # comparison_of_policies_total(layout, circle)
+    # comparison_of_policies_squares(layout, circle)
+make_plots()
--- a/validation.py
+++ b/validation.py
@@ -25,43 +25,69 @@ class validation:
        self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
    def simulate_game(self, strategy, n_iterations=10000):
-        # Compute transition matrices for each dice
        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
        number_turns = []
        for _ in range(n_iterations):
            total_turns = 0
-            state = 0  # initial state
+            k = 0  # état initial
-            while state < len(self.layout) - 1:  # until goal state is reached
-                action = strategy[state]  # get action according to strategy
+            while k < len(self.layout) - 1:
-                transition_matrix = transition_matrices[int(action - 1)]
+                action = strategy[k]  # action selon la stratégie
-                state = np.random.choice(len(self.layout), p=transition_matrix[state])
-                if self.layout[state] == 3 and action == 2:
+                # Convertir action en entier pour accéder à l'indice correct dans transition_matrices
+                action_index = int(action) - 1
+                transition_matrix = transition_matrices[action_index]
+                #print(f"Current state (k): {k}, Action chosen: {action}")
+                #print(f"Transition matrix: {transition_matrix}")
+                # Aplatir la matrice de transition en une distribution de probabilité 1D
+                flattened_probs = transition_matrix[k]
+                flattened_probs /= np.sum(flattened_probs)  # Normalisation des probabilités
+                # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
+                k = np.random.choice(len(self.layout), p=flattened_probs)
+                # Mise à jour du nombre de tours en fonction de l'état actuel
+                if self.layout[k] == 3 and action == 2:
                    total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
-                elif self.layout[state] == 3 and action == 3:
+                elif self.layout[k] == 3 and action == 3:
                    total_turns += 2
                else:
                    total_turns += 1
            number_turns.append(total_turns)
        return np.mean(number_turns)
-    def play_optimal_strategy(self):
+    def play_optimal_strategy(self, n_iterations=10000):
-        return turns
+        return self.simulate_game(self.optimal_policy, n_iterations)
-    def play_dice_strategy(self):
+    def play_dice_strategy(self, dice_choice, n_iterations=10000):
-        return turns
+        if dice_choice == 'SafeDice':
+            strategy = self.safe_strategy
+        elif dice_choice == 'NormalDice':
+            strategy = self.normal_strategy
+        elif dice_choice == 'RiskyDice':
+            strategy = self.risky_strategy
+        else:
+            raise ValueError("Invalid dice choice")
-    def play_random_strategy(self):
+        return self.simulate_game(strategy, n_iterations)
-        return turns
+    def play_random_strategy(self, n_iterations=10000):
+        return self.simulate_game(self.random_strategy, n_iterations)
    def compare_strategies(self, num_games=1000):
-        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
+        optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
-        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
+        dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
-        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
+        dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
-        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
+        dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
-        random_cost = self.simulate_game(strategy='Random', num_games=num_games)
+        random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
        return {
            'Optimal': optimal_cost,
@@ -75,11 +101,11 @@ class validation:
 # Utilisation d'exemple
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
 validation = validation(layout, circle=False)
 circle = False  # Example circle value
+"""
 # Create an instance of validation
 validator = validation(layout, circle)
@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
 results = validation.compare_strategies(num_games=10000)
 print("Coûts moyens :")
 for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")
+    print(f"{strategy}: {cost}")"""
+optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
+print("Optimal Strategy Cost:", optimal_cost)
+dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
+print("Normal Dice Strategy Cost:", dice2_cost)
+random_cost = validation.play_random_strategy(n_iterations=10000)
+print("Random Strategy Cost:", random_cost)
+strategy_comparison = validation.compare_strategies(num_games=10000)
+print("Strategy Comparison Results:", strategy_comparison)