validation_ex.py

import numpy as np
from tmc import TransitionMatrixCalculator
import random
import matplotlib.pyplot as plt
from markovDecision import MarkovDecisionSolver as mD

class Validation:
    def __init__(self, layout, circle=False):
        self.layout = layout
        self.circle = circle
        self.tmc_instance = TransitionMatrixCalculator()

    def simulate_game(self, strategy='optimal', num_games=1000):
        total_turns = 0

        for _ in range(num_games):
            if strategy == 'optimal':
                turns = self.play_optimal_strategy()
            elif strategy == 'dice1':
                turns = self.play_dice_strategy(1)
            elif strategy == 'dice2':
                turns = self.play_dice_strategy(2)
            elif strategy == 'dice3':
                turns = self.play_dice_strategy(3)
            elif strategy == 'random':
                turns = self.play_random_strategy()

            total_turns += turns

        average_turns = total_turns / num_games
        return average_turns

    def play_optimal_strategy(self):
        _, optimal_policy = markovDecision(self.layout, self.circle)
        return self.empirical_results(optimal_policy.astype(int))

    def play_dice_strategy(self, dice):
        policy = np.ones(len(self.layout), dtype=int) * dice
        return self.empirical_results(policy)

    def play_random_strategy(self):
        policy = np.zeros(len(self.layout), dtype=int)
        for i in range(len(policy) - 1):
            policy[i] = random.choice([1, 2, 3])
        return self.empirical_results(policy)

    def empirical_results(self, policy):
        avgnTurnsPlayed = 0
        nSimul = 10000

        for _ in range(nSimul):
            nTurns = self.playOneGame(policy)
            avgnTurnsPlayed += nTurns

        return avgnTurnsPlayed / nSimul

    def playOneGame(self, policy):
        nSquares = len(self.layout)
        nTurns = 0
        curPos = 0
        jail = False

        while curPos < nSquares - 1:
            newPos, jail = self.playOneTurn(policy[curPos], curPos)
            curPos = newPos
            nTurns += 1

        return nTurns

    def playOneTurn(self, diceChoice, curPos):
        nSquares = len(self.layout)

        if curPos == nSquares - 1:
            return nSquares - 1, False

        if jail :
            return curPos, False

        listDiceResults = [i for i in range(diceChoice + 1)]
        result = random.choice(listDiceResults)

        if curPos == 2 and result != 0:
            slowLane = random.choice([0, 1])
            if slowLane:
                newPos = curPos + result
            else:
                newPos = curPos + result + 7
        elif ((curPos == 9 and result != 0) or ((curPos in [7, 8, 9]) and (curPos + result >= 10))):
            newPos = curPos + result + 4
        else:
            newPos = curPos + result

        if newPos > nSquares - 1:
            if self.circle:
                newPos -= nSquares
            else:
                return nSquares - 1, True

        newSquare = self.layout[newPos]

        if diceChoice == 1:
            return newPos, False
        elif diceChoice == 2:
            newSquare = random.choice([0, newSquare])

        if newSquare == 0:
            return newPos, False
        elif newSquare == 1:
            return 0, False
        elif newSquare == 2:
            if newPos - 3 < 0:
                return 0, False
            return newPos - 3, False
        elif newSquare == 3:
            return newPos, True
        elif newSquare == 4:
            newSquare = random.choice([1, 2, 3])
            if newSquare == 1:
                return 0, False
            elif newSquare == 2:
                if newPos - 3 < 0:
                    return 0, False
                return newPos - 3, False
            elif newSquare == 3:
                return newPos, True

    def compare_strategies(self, num_games=1000):
        optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
        dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games)
        dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games)
        dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games)
        random_cost = self.simulate_game(strategy='random', num_games=num_games)

        return {
            'optimal': optimal_cost,
            'dice1': dice1_cost,
            'dice2': dice2_cost,
            'dice3': dice3_cost,
            'random': random_cost
        }

# Example usage
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = Validation(layout, circle=False)
results = validation.compare_strategies(num_games=10000)
print("Average Costs:")
for strategy, cost in results.items():
    print(f"{strategy}: {cost}")