commit ML

399700c8 · Adrien Payen · 7707bd45 · 399700c8 · 399700c8 · 399700c8
--- a/plot.py
+++ b/plot.py
+import numpy as np
+import random as rd
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import markovDecision as mD
+from validation import Validation
+
+def plot_results(validation_instance):
+    results_markov = validation_instance.simulate_game('markov')
+    results_safe = validation_instance.simulate_game([1]*15)
+    results_normal = validation_instance.simulate_game([2]*15)
+    results_risky = validation_instance.simulate_game([3]*15)
+    results_random = validation_instance.simulate_game(np.random.randint(1, 4, size=15))
+
+    plt.figure(figsize=(12, 8))
+    plt.plot(range(len(validation_instance.layouts)), results_markov, label='Markov')
+    plt.plot(range(len(validation_instance.layouts)), results_safe, label='SafeDice')
+    plt.plot(range(len(validation_instance.layouts)), results_normal, label='NormalDice')
+    plt.plot(range(len(validation_instance.layouts)), results_risky, label='RiskyDice')
+    plt.plot(range(len(validation_instance.layouts)), results_random, label='Random')
+
+    plt.xticks(range(len(validation_instance.layouts)), range(len(validation_instance.layouts)))
+    plt.xlabel('Layout Number', fontsize=13)
+    plt.ylabel('Average Number of Turns', fontsize=13)
+    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
+    plt.show()
+
+# Example usage
+layouts = [
+    [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0],
+    # Add more layouts as needed
+]
+
+validation_instance = Validation(layouts, circle=False, n_iterations=10000)
+plot_results(validation_instance)
\ No newline at end of file
--- a/tmc.py
+++ b/tmc.py
@@ -217,7 +217,7 @@ class TransitionMatrixCalculator:
    # create a function that test the transition matrix for different layout each time with one trap of each sort
    def tst_transition_matrix(self):
        # create a list of 100 different layouts
-        layouts = self.generate_arrays(1000)
+        layouts = self.generate_arrays(100)
        for array in layouts:
            print(array)
            self.compute_transition_matrix(array, False)

--- a/validation.py
+++ b/validation.py
-import random as rd
 import numpy as np
-import matplotlib.pyplot as plt
-from tmc import TransitionMatrixCalculator as tmc
-from markovDecision import markovDecision as mD
-
-class EmpiricalComparision :
-    def __init__(self) : 
-        return
-        
-
-    def simulation(strategy, layout : list, circle, nIter : int) :
-        tmc_instance = tmc()
-        safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
-        normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
-        risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
-        matrices_transition = [safe_dice, normal_dice, risky_dice]
-        nTurns = []
-        turns = 0
-        for _ in range(nIter) : 
-            turns = 0
-            k = 0
-            while k < len(layout)-1 :
-                action = strategy[k]
-                transitionMatrix  = matrices_transition[int(action -1)]
-                k = np.rd.choice(len(layout), p = transitionMatrix[k])
-                if layout[k] == 3 and action == 2 : 
-                    turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
-                elif layout[k] == 3 and action == 3 :
-                    turns += 2
-                else :
-                    turns += 1
-            nTurns.append(turns)
-        
-        return np.mean(nTurns)
-
-    
-    def plot(layouts : list, circle, nIter : int) :
-        Markov = []
-        Safe = []
-        Normal = []
-        Risky = []
-        Random = []
-        for layout in layouts :
-            expec, policy = mD(layout, circle)
-            # Simulate the game
-
-        return
-
-
-
-
-
-
-layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
-results(layout, False, 1000000)
-results(layout, True, 1000000)
\ No newline at end of file
+from tmc import TransitionMatrixCalculator
+
+class Validation:
+    def __init__(self, layout, circle=False):
+        self.layout = layout
+        self.circle = circle
+        self.tmc_instance = TransitionMatrixCalculator()
+
+    def simulate_game(self, strategy='optimal', num_games=1000):
+        total_turns = 0
+
+        for _ in range(num_games):
+            if strategy == 'optimal':
+                turns = self.play_optimal_strategy()
+            elif strategy == 'dice1':
+                turns = self.play_dice_strategy(1)
+            elif strategy == 'dice2':
+                turns = self.play_dice_strategy(2)
+            elif strategy == 'dice3':
+                turns = self.play_dice_strategy(3)
+            elif strategy == 'random':
+                turns = self.play_random_strategy()
+
+            total_turns += turns
+
+        average_turns = total_turns / num_games
+        return average_turns
+
+    def play_optimal_strategy(self):
+        # Implement the optimal strategy using value iteration results
+        # Use TransitionMatrixCalculator to compute transitions and make decisions
+
+        # calculer la stratégie optimale pour ou un tour 
+
+
+
+        pass
+
+    def play_dice_strategy(self, dice):
+        # Implement a strategy where only one type of dice is used (1, 2, or 3)
+        pass
+
+    def play_random_strategy(self):
+        # Implement a purely random strategy
+        pass
+
+    def compare_strategies(self, num_games=1000):
+        optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
+        dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games)
+        dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games)
+        dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games)
+        random_cost = self.simulate_game(strategy='random', num_games=num_games)
+
+        return {
+            'optimal': optimal_cost,
+            'dice1': dice1_cost,
+            'dice2': dice2_cost,
+            'dice3': dice3_cost,
+            'random': random_cost
+        }
+
+# Example usage
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+validation = Validation(layout, circle=False)
+results = validation.compare_strategies(num_games=10000)
+print("Average Costs:")
+for strategy, cost in results.items():
+    print(f"{strategy}: {cost}")
--- a/validation_ex.py
+++ b/validation_ex.py
+import numpy as np
+from tmc import TransitionMatrixCalculator
+import random
+import matplotlib.pyplot as plt
+from markovDecision import markovDecision
+
+class Validation:
+    def __init__(self, layout, circle=False):
+        self.layout = layout
+        self.circle = circle
+        self.tmc_instance = TransitionMatrixCalculator()
+
+    def simulate_game(self, strategy='optimal', num_games=1000):
+        total_turns = 0
+
+        for _ in range(num_games):
+            if strategy == 'optimal':
+                turns = self.play_optimal_strategy()
+            elif strategy == 'dice1':
+                turns = self.play_dice_strategy(1)
+            elif strategy == 'dice2':
+                turns = self.play_dice_strategy(2)
+            elif strategy == 'dice3':
+                turns = self.play_dice_strategy(3)
+            elif strategy == 'random':
+                turns = self.play_random_strategy()
+
+            total_turns += turns
+
+        average_turns = total_turns / num_games
+        return average_turns
+
+    def play_optimal_strategy(self):
+        _, optimal_policy = markovDecision(self.layout, self.circle)
+        return self.empirical_results(optimal_policy.astype(int))
+
+    def play_dice_strategy(self, dice):
+        policy = np.ones(len(self.layout), dtype=int) * dice
+        return self.empirical_results(policy)
+
+    def play_random_strategy(self):
+        policy = np.zeros(len(self.layout), dtype=int)
+        for i in range(len(policy) - 1):
+            policy[i] = random.choice([1, 2, 3])
+        return self.empirical_results(policy)
+
+    def empirical_results(self, policy):
+        avgnTurnsPlayed = 0
+        nSimul = 10000
+
+        for _ in range(nSimul):
+            nTurns = self.playOneGame(policy)
+            avgnTurnsPlayed += nTurns
+
+        return avgnTurnsPlayed / nSimul
+
+    def playOneGame(self, policy):
+        nSquares = len(self.layout)
+        nTurns = 0
+        curPos = 0
+        jail = False
+
+        while curPos < nSquares - 1:
+            newPos, jail = self.playOneTurn(policy[curPos], curPos)
+            curPos = newPos
+            nTurns += 1
+
+        return nTurns
+
+    def playOneTurn(self, diceChoice, curPos):
+        nSquares = len(self.layout)
+
+        if curPos == nSquares - 1:
+            return nSquares - 1, False
+
+        if jail :
+            return curPos, False
+
+        listDiceResults = [i for i in range(diceChoice + 1)]
+        result = random.choice(listDiceResults)
+
+        if curPos == 2 and result != 0:
+            slowLane = random.choice([0, 1])
+            if slowLane:
+                newPos = curPos + result
+            else:
+                newPos = curPos + result + 7
+        elif ((curPos == 9 and result != 0) or ((curPos in [7, 8, 9]) and (curPos + result >= 10))):
+            newPos = curPos + result + 4
+        else:
+            newPos = curPos + result
+
+        if newPos > nSquares - 1:
+            if self.circle:
+                newPos -= nSquares
+            else:
+                return nSquares - 1, True
+
+        newSquare = self.layout[newPos]
+
+        if diceChoice == 1:
+            return newPos, False
+        elif diceChoice == 2:
+            newSquare = random.choice([0, newSquare])
+
+        if newSquare == 0:
+            return newPos, False
+        elif newSquare == 1:
+            return 0, False
+        elif newSquare == 2:
+            if newPos - 3 < 0:
+                return 0, False
+            return newPos - 3, False
+        elif newSquare == 3:
+            return newPos, True
+        elif newSquare == 4:
+            newSquare = random.choice([1, 2, 3])
+            if newSquare == 1:
+                return 0, False
+            elif newSquare == 2:
+                if newPos - 3 < 0:
+                    return 0, False
+                return newPos - 3, False
+            elif newSquare == 3:
+                return newPos, True
+
+    def compare_strategies(self, num_games=1000):
+        optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
+        dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games)
+        dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games)
+        dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games)
+        random_cost = self.simulate_game(strategy='random', num_games=num_games)
+
+        return {
+            'optimal': optimal_cost,
+            'dice1': dice1_cost,
+            'dice2': dice2_cost,
+            'dice3': dice3_cost,
+            'random': random_cost
+        }
+
+# Example usage
+layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
+validation = Validation(layout, circle=False)
+results = validation.compare_strategies(num_games=10000)
+print("Average Costs:")
+for strategy, cost in results.items():
+    print(f"{strategy}: {cost}")
--- a/validation_test.py
+++ b/validation_test.py
+import random as rd
+import numpy as np
+import matplotlib.pyplot as plt
+from tmc import TransitionMatrixCalculator as tmc
+from markovDecision import markovDecision as mD
+
+class EmpiricalComparision :
+    def __init__(self) : 
+        return
+        
+
+    def simulation(strategy, layout : list, circle, nIter : int) :
+        tmc_instance = tmc()
+        safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
+        normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
+        risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
+        matrices_transition = [safe_dice, normal_dice, risky_dice]
+        nTurns = []
+        turns = 0
+        for _ in range(nIter) : 
+            turns = 0
+            k = 0
+            while k < len(layout)-1 :
+                action = strategy[k]
+                transitionMatrix  = matrices_transition[int(action -1)]
+                k = np.rd.choice(len(layout), p = transitionMatrix[k])
+                if layout[k] == 3 and action == 2 : 
+                    turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
+                elif layout[k] == 3 and action == 3 :
+                    turns += 2
+                else :
+                    turns += 1
+            nTurns.append(turns)
+        
+        return np.mean(nTurns)
+
+    
+    def plot(layouts : list, circle, nIter : int) :
+        Markov = []
+        Safe = []
+        Normal = []
+        Risky = []
+        Random = []
+        for layout in layouts :
+            expec, policy = mD(layout, circle)
+            # Simulate the game
+
+        return
+
+
+
+
+
+
+layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
+results(layout, False, 1000000)
+results(layout, True, 1000000)
\ No newline at end of file