validation.py

# All the imports
import random as rd
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionProcess as mD

# Class for performing validation and simulation
class Validation:
    def __init__(self, layout : list, circle : bool):
        # Initialize with layout and circle configuration
        self.layout = layout
        self.circle = circle

        # Initialize TransitionMatrixCalculator instance for transition matrix computation
        self.tmc_instance = tmc()

        # Compute transition matrices for safe, normal, and risky dice
        self.safe_dice = self.tmc_instance._compute_safe_matrix()
        self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
        self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
        
        # Use MarkovDecisionSolver to find optimal policy and expected costs
        solver = mD(self.layout, self.circle)
        self.expec, self.optimal_policy = solver.solve()

        # Predefined strategies for different dice types
        self.safe_strategy = [1] * len(layout)
        self.normal_strategy = [2] * len(layout)
        self.risky_strategy = [3] * len(layout)
        self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]

        # Dictionary to store costs by dice type
        self.costs_by_dice_type = {
            'SafeDice': [0] * len(layout),
            'NormalDice': [0] * len(layout),
            'RiskyDice': [0] * len(layout)
        }
        
        # Assign costs based on dice type to the respective lists in the dictionary
        for i, die_type in enumerate(self.layout) :
            self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
            self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
            self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0


    def simulate_rounds(self, strategy: list, n_iterations: int):
        """Simulate the game using a given strategy over multiple iterations."""
        transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
        total_turns = np.zeros(n_iterations) 

        for i in range(n_iterations):
            k = 0
            turns = 0

            while k < len(self.layout) - 1:
                action = strategy[k]
                action_index = int(action) - 1
                transition_matrix = transition_matrices[action_index]

                flattened_probs = transition_matrix[k]
                flattened_probs /= np.sum(flattened_probs)

                k = np.random.choice(len(self.layout), p=flattened_probs)

                if self.layout[k] == 3:
                    if action == 2:
                        turns += np.random.choice([1, 2], p=[0.5, 0.5]) 
                    elif action == 3:
                        turns += 2
                else:
                    turns += 1

            total_turns[i] = turns

        return np.mean(total_turns)


    def state_simulation(self, strategy: list, layout: list, circle: bool, n_iterations: int):
        """Simulate game states using a given strategy."""
        safe_dice = self.tmc_instance._compute_safe_matrix()
        normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
        risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]

        transition_matrices = [safe_dice, normal_dice, risky_dice]
        total_turns = []

        for _ in range(n_iterations):
            state_turns = np.zeros(len(layout) - 1)  # Numpy to store the turns by state

            for state in range(len(layout) - 1):
                k = state
                turns = 0

                while k < len(layout) - 1:
                    action = strategy[k]
                    action_index = int(action) - 1
                    transition_matrix = transition_matrices[action_index]

                    flattened_probs = transition_matrix[k]
                    flattened_probs /= np.sum(flattened_probs)

                    k = np.random.choice(len(layout), p=flattened_probs)

                    if layout[k] == 3:
                        if action == 2:
                            turns += np.random.choice([1, 2], p=[0.5, 0.5])  # Numpy for randomisation
                        elif action == 3:
                            turns += 2
                    else:
                        turns += 1

                state_turns[state] = turns

            total_turns.append(state_turns)

        mean_turns = np.mean(total_turns, axis=0)
        return mean_turns

    def play_optimal_policy(self, n_iterations : int):
        """Play using the optimal policy for a number of iterations."""
        return self.simulate_rounds(self.optimal_policy, n_iterations)

    def play_dice_strategy(self, dice_choice, n_iterations : int):
        """Play using a specific dice strategy for a number of iterations."""
        strategy = {
            'SafeDice': self.safe_strategy,
            'NormalDice': self.normal_strategy,
            'RiskyDice': self.risky_strategy
        }.get(dice_choice, None)

        if strategy is None:
            raise ValueError("Invalid dice choice")

        return self.simulate_rounds(strategy, n_iterations)

    def play_random_strategy(self, n_iterations : int ):
        """Play using a random strategy for a number of iterations."""
        return self.simulate_rounds(self.random_strategy, n_iterations)


    def compare_empirical_vs_value_iteration(self, num_games : int):
        """Compare expected value iteration turns with empirical turns."""
        value_iteration_turns = self.expec
        empirical_turns = self.state_simulation(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)

        mean_turns_by_state = {
            'ValueIteration': value_iteration_turns.tolist(),
            'Empirical': empirical_turns.tolist()
        }

        return mean_turns_by_state


    def compare_state_based_turns(self, num_games : int ):
         # Compare the expected turns from value iteration with empirical state-based turns
        value_iteration = self.expec
        empirical_turns = self.state_simulation(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)

        return value_iteration, empirical_turns

    def compare_strategies(self, num_games : int):
        # Compare the costs of different strategies over a number of games
        optimal_cost = self.simulate_rounds(self.optimal_policy, n_iterations=num_games)
        dice1_cost = self.simulate_rounds(self.safe_strategy, n_iterations=num_games)
        dice2_cost = self.simulate_rounds(self.normal_strategy, n_iterations=num_games)
        dice3_cost = self.simulate_rounds(self.risky_strategy, n_iterations=num_games)
        random_cost = self.simulate_rounds(self.random_strategy, n_iterations=num_games)

        return {
            'Optimal': optimal_cost,
            'SafeDice': dice1_cost,
            'NormalDice': dice2_cost,
            'RiskyDice': dice3_cost,
            'Random': random_cost
        }