diff --git a/validation_ex.py b/validation_ex.py deleted file mode 100644 index 741137aca6b9b6d75a966a2891fd681dadb02e94..0000000000000000000000000000000000000000 --- a/validation_ex.py +++ /dev/null @@ -1,148 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator -import random -import matplotlib.pyplot as plt -from markovDecision import MarkovDecisionSolver as mD - -class Validation: - def __init__(self, layout, circle=False): - self.layout = layout - self.circle = circle - self.tmc_instance = TransitionMatrixCalculator() - - def simulate_game(self, strategy='optimal', num_games=1000): - total_turns = 0 - - for _ in range(num_games): - if strategy == 'optimal': - turns = self.play_optimal_strategy() - elif strategy == 'dice1': - turns = self.play_dice_strategy(1) - elif strategy == 'dice2': - turns = self.play_dice_strategy(2) - elif strategy == 'dice3': - turns = self.play_dice_strategy(3) - elif strategy == 'random': - turns = self.play_random_strategy() - - total_turns += turns - - average_turns = total_turns / num_games - return average_turns - - def play_optimal_strategy(self): - _, optimal_policy = mD(self.layout, self.circle) - return self.empirical_results(optimal_policy.astype(int)) - - def play_dice_strategy(self, dice): - policy = np.ones(len(self.layout), dtype=int) * dice - return self.empirical_results(policy) - - def play_random_strategy(self): - policy = np.zeros(len(self.layout), dtype=int) - for i in range(len(policy) - 1): - policy[i] = random.choice([1, 2, 3]) - return self.empirical_results(policy) - - def empirical_results(self, policy): - avgnTurnsPlayed = 0 - nSimul = 10000 - - for _ in range(nSimul): - nTurns = self.playOneGame(policy) - avgnTurnsPlayed += nTurns - - return avgnTurnsPlayed / nSimul - - def playOneGame(self, policy): - nSquares = len(self.layout) - nTurns = 0 - curPos = 0 - jail = False - - while curPos < nSquares - 1: - newPos, jail = self.playOneTurn(policy[curPos], curPos) - curPos = newPos - nTurns += 1 - - return nTurns - - def playOneTurn(self, diceChoice, curPos): - nSquares = len(self.layout) - - if curPos == nSquares - 1: - return nSquares - 1, False - - if jail : - return curPos, False - - listDiceResults = [i for i in range(diceChoice + 1)] - result = random.choice(listDiceResults) - - if curPos == 2 and result != 0: - slowLane = random.choice([0, 1]) - if slowLane: - newPos = curPos + result - else: - newPos = curPos + result + 7 - elif ((curPos == 9 and result != 0) or ((curPos in [7, 8, 9]) and (curPos + result >= 10))): - newPos = curPos + result + 4 - else: - newPos = curPos + result - - if newPos > nSquares - 1: - if self.circle: - newPos -= nSquares - else: - return nSquares - 1, True - - newSquare = self.layout[newPos] - - if diceChoice == 1: - return newPos, False - elif diceChoice == 2: - newSquare = random.choice([0, newSquare]) - - if newSquare == 0: - return newPos, False - elif newSquare == 1: - return 0, False - elif newSquare == 2: - if newPos - 3 < 0: - return 0, False - return newPos - 3, False - elif newSquare == 3: - return newPos, True - elif newSquare == 4: - newSquare = random.choice([1, 2, 3]) - if newSquare == 1: - return 0, False - elif newSquare == 2: - if newPos - 3 < 0: - return 0, False - return newPos - 3, False - elif newSquare == 3: - return newPos, True - - def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games) - dice1_cost = self.simulate_game(strategy='dice1', num_games=num_games) - dice2_cost = self.simulate_game(strategy='dice2', num_games=num_games) - dice3_cost = self.simulate_game(strategy='dice3', num_games=num_games) - random_cost = self.simulate_game(strategy='random', num_games=num_games) - - return { - 'optimal': optimal_cost, - 'dice1': dice1_cost, - 'dice2': dice2_cost, - 'dice3': dice3_cost, - 'random': random_cost - } - -# Example usage -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -validation = Validation(layout, circle=False) -results = validation.compare_strategies(num_games=10000) -print("Average Costs:") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}")