update validation and plot files

ee62edf2 · Adrien Payen · 4897f074 · 4897f074
--- a/validation_ex.py
+++ b/validation_ex.py
-import numpy as np
-from tmc import TransitionMatrixCalculator
-import random
-import matplotlib.pyplot as plt
-from markovDecision import MarkovDecisionSolver as mD
-
-class Validation:
-    def __init__(self, layout, circle=False):
-        self.layout = layout
-        self.circle = circle
-        self.tmc_instance = TransitionMatrixCalculator()
-
-    def simulate_game(self, strategy='optimal', num_games=1000):
-        total_turns = 0
-
-        for _ in range(num_games):
-            if strategy == 'optimal':
-                turns = self.play_optimal_strategy()
-            elif strategy == 'dice1':
-                turns = self.play_dice_strategy(1)
-            elif strategy == 'dice2':
-                turns = self.play_dice_strategy(2)
-            elif strategy == 'dice3':
-                turns = self.play_dice_strategy(3)
-            elif strategy == 'random':
-                turns = self.play_random_strategy()
-
-            total_turns += turns
-
-        average_turns = total_turns / num_games
-        return average_turns
-
-    def play_optimal_strategy(self):
-        _, optimal_policy = mD(self.layout, self.circle)
-        return self.empirical_results(optimal_policy.astype(int))
-
-    def play_dice_strategy(self, dice):
-        policy = np.ones(len(self.layout), dtype=int) * dice
-        return self.empirical_results(policy)
-
-    def play_random_strategy(self):
-        policy = np.zeros(len(self.layout), dtype=int)
-        for i in range(len(policy) - 1):
-            policy[i] = random.choice([1, 2, 3])
-        return self.empirical_results(policy)
-
-    def empirical_results(self, policy):
-        avgnTurnsPlayed = 0
-        nSimul = 10000
-
-        for _ in range(nSimul):
-            nTurns = self.playOneGame(policy)
-            avgnTurnsPlayed += nTurns
-
-        return avgnTurnsPlayed / nSimul
-
-    def playOneGame(self, policy):
-        nSquares = len(self.layout)
-        nTurns = 0
-        curPos = 0
-        jail = False
-
-        while curPos < nSquares - 1:
-            newPos, jail = self.playOneTurn(policy[curPos], curPos)
-            curPos = newPos
-            nTurns += 1
-
-        return nTurns
-
-    def playOneTurn(self, diceChoice, curPos):
-        nSquares = len(self.layout)
-
-        if curPos == nSquares - 1:
-            return nSquares - 1, False
-
-        if jail :
-            return curPos, False
-
-        listDiceResults = [i for i in range(diceChoice + 1)]
-        result = random.choice(listDiceResults)
-
-        if curPos == 2 and result != 0:
-            slowLane = random.choice([0, 1])
-            if slowLane:
-                newPos = curPos + result
-            else:
-                newPos = curPos + result + 7
-        elif ((curPos == 9 and result != 0) or ((curPos in [7, 8, 9]) and (curPos + result >= 10))):
-            newPos = curPos + result + 4
-        else:
-            newPos = curPos + result
-
-        if newPos > nSquares - 1:
-            if self.circle:
-                newPos -= nSquares
-            else:
-                return nSquares - 1, True
-
-        newSquare = self.layout[newPos]
-
-        if diceChoice == 1:
-            return newPos, False
-        elif diceChoice == 2:
-            newSquare = random.choice([0, newSquare])
-
-        if newSquare == 0:
-            return newPos, False
-        elif newSquare == 1:
-            return 0, False
-        elif newSquare == 2:
-            if newPos - 3 < 0:
-                return 0, False
-            return newPos - 3, False
-        elif newSquare == 3:
-            return newPos, True
-        elif newSquare == 4:
-            newSquare = random.choice([1, 2, 3])
-            if newSquare == 1:
-                return 0, False
-            elif newSquare == 2:
-                if newPos - 3 < 0:
-                    return 0, False
-                return newPos - 3, False
-            elif newSquare == 3:
-                return newPos, True
-
-    def compare_strategies(self, num_games=1000):
-        optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
-        dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games)
-        dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games)
-        dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games)
-        random_cost = self.simulate_game(strategy='random', num_games=num_games)
-
-        return {
-            'optimal': optimal_cost,
-            'dice1': dice1_cost,
-            'dice2': dice2_cost,
-            'dice3': dice3_cost,
-            'random': random_cost
-        }
-
-# Example usage
-layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
-validation = Validation(layout, circle=False)
-results = validation.compare_strategies(num_games=10000)
-print("Average Costs:")
-for strategy, cost in results.items():
-    print(f"{strategy}: {cost}")