Skip to content
Extraits de code Groupes Projets
validation_ex.py 4,69 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import numpy as np
    from tmc import TransitionMatrixCalculator
    import random
    import matplotlib.pyplot as plt
    
    Adrien Payen's avatar
    Adrien Payen a validé
    from markovDecision import MarkovDecisionSolver as mD
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    class Validation:
        def __init__(self, layout, circle=False):
            self.layout = layout
            self.circle = circle
            self.tmc_instance = TransitionMatrixCalculator()
    
        def simulate_game(self, strategy='optimal', num_games=1000):
            total_turns = 0
    
            for _ in range(num_games):
                if strategy == 'optimal':
                    turns = self.play_optimal_strategy()
                elif strategy == 'dice1':
                    turns = self.play_dice_strategy(1)
                elif strategy == 'dice2':
                    turns = self.play_dice_strategy(2)
                elif strategy == 'dice3':
                    turns = self.play_dice_strategy(3)
                elif strategy == 'random':
                    turns = self.play_random_strategy()
    
                total_turns += turns
    
            average_turns = total_turns / num_games
            return average_turns
    
        def play_optimal_strategy(self):
            _, optimal_policy = markovDecision(self.layout, self.circle)
            return self.empirical_results(optimal_policy.astype(int))
    
        def play_dice_strategy(self, dice):
            policy = np.ones(len(self.layout), dtype=int) * dice
            return self.empirical_results(policy)
    
        def play_random_strategy(self):
            policy = np.zeros(len(self.layout), dtype=int)
            for i in range(len(policy) - 1):
                policy[i] = random.choice([1, 2, 3])
            return self.empirical_results(policy)
    
        def empirical_results(self, policy):
            avgnTurnsPlayed = 0
            nSimul = 10000
    
            for _ in range(nSimul):
                nTurns = self.playOneGame(policy)
                avgnTurnsPlayed += nTurns
    
            return avgnTurnsPlayed / nSimul
    
        def playOneGame(self, policy):
            nSquares = len(self.layout)
            nTurns = 0
            curPos = 0
            jail = False
    
            while curPos < nSquares - 1:
                newPos, jail = self.playOneTurn(policy[curPos], curPos)
                curPos = newPos
                nTurns += 1
    
            return nTurns
    
        def playOneTurn(self, diceChoice, curPos):
            nSquares = len(self.layout)
    
            if curPos == nSquares - 1:
                return nSquares - 1, False
    
            if jail :
                return curPos, False
    
            listDiceResults = [i for i in range(diceChoice + 1)]
            result = random.choice(listDiceResults)
    
            if curPos == 2 and result != 0:
                slowLane = random.choice([0, 1])
                if slowLane:
                    newPos = curPos + result
                else:
                    newPos = curPos + result + 7
            elif ((curPos == 9 and result != 0) or ((curPos in [7, 8, 9]) and (curPos + result >= 10))):
                newPos = curPos + result + 4
            else:
                newPos = curPos + result
    
            if newPos > nSquares - 1:
                if self.circle:
                    newPos -= nSquares
                else:
                    return nSquares - 1, True
    
            newSquare = self.layout[newPos]
    
            if diceChoice == 1:
                return newPos, False
            elif diceChoice == 2:
                newSquare = random.choice([0, newSquare])
    
            if newSquare == 0:
                return newPos, False
            elif newSquare == 1:
                return 0, False
            elif newSquare == 2:
                if newPos - 3 < 0:
                    return 0, False
                return newPos - 3, False
            elif newSquare == 3:
                return newPos, True
            elif newSquare == 4:
                newSquare = random.choice([1, 2, 3])
                if newSquare == 1:
                    return 0, False
                elif newSquare == 2:
                    if newPos - 3 < 0:
                        return 0, False
                    return newPos - 3, False
                elif newSquare == 3:
                    return newPos, True
    
        def compare_strategies(self, num_games=1000):
            optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
            dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games)
            dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games)
            dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games)
            random_cost = self.simulate_game(strategy='random', num_games=num_games)
    
            return {
                'optimal': optimal_cost,
                'dice1': dice1_cost,
                'dice2': dice2_cost,
                'dice3': dice3_cost,
                'random': random_cost
            }
    
    # Example usage
    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
    validation = Validation(layout, circle=False)
    results = validation.compare_strategies(num_games=10000)
    print("Average Costs:")
    for strategy, cost in results.items():
        print(f"{strategy}: {cost}")