Skip to content
Extraits de code Groupes Projets
validation.py 6,96 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    # All the imports
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import random as rd
    import numpy as np
    
    from tmc import TransitionMatrixCalculator as tmc
    
    Adrien Payen's avatar
    Adrien Payen a validé
    from markovDecision import MarkovDecisionProcess as mD
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Class for performing validation and simulation
    
    Adrien Payen's avatar
    Adrien Payen a validé
    class Validation:
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def __init__(self, layout : list, circle : bool):
            # Initialize with layout and circle configuration
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.layout = layout
            self.circle = circle
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            # Initialize TransitionMatrixCalculator instance for transition matrix computation
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.tmc_instance = tmc()
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            # Compute transition matrices for safe, normal, and risky dice
    
            self.safe_dice = self.tmc_instance._compute_safe_matrix()
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
    
            self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Use MarkovDecisionSolver to find optimal policy and expected costs
    
    Adrien Payen's avatar
    Adrien Payen a validé
            solver = mD(self.layout, self.circle)
            self.expec, self.optimal_policy = solver.solve()
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Predefined strategies for different dice types
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.safe_strategy = [1] * len(layout)
            self.normal_strategy = [2] * len(layout)
            self.risky_strategy = [3] * len(layout)
            self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Dictionary to store costs by dice type
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.costs_by_dice_type = {
    
    Adrien Payen's avatar
    Adrien Payen a validé
                'SafeDice': [0] * len(layout),
                'NormalDice': [0] * len(layout),
                'RiskyDice': [0] * len(layout)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            }
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Assign costs based on dice type to the respective lists in the dictionary
            for i, die_type in enumerate(self.layout) :
    
    Adrien Payen's avatar
    Adrien Payen a validé
                self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
                self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
                self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def simulate_rounds(self, strategy: list, n_iterations: int):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            """Simulate the game using a given strategy over multiple iterations."""
    
    Adrien Payen's avatar
    Adrien Payen a validé
            transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            total_turns = np.zeros(n_iterations) 
    
    Adrien Payen's avatar
    Adrien Payen a validé
            for i in range(n_iterations):
                k = 0
                turns = 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                while k < len(self.layout) - 1:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    action = strategy[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    action_index = int(action) - 1
                    transition_matrix = transition_matrices[action_index]
    
                    flattened_probs = transition_matrix[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    flattened_probs /= np.sum(flattened_probs)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                    k = np.random.choice(len(self.layout), p=flattened_probs)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    if self.layout[k] == 3:
                        if action == 2:
                            turns += np.random.choice([1, 2], p=[0.5, 0.5]) 
                        elif action == 3:
                            turns += 2
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        turns += 1
    
                total_turns[i] = turns
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return np.mean(total_turns)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def state_simulation(self, strategy: list, layout: list, circle: bool, n_iterations: int):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            """Simulate game states using a given strategy."""
    
            safe_dice = self.tmc_instance._compute_safe_matrix()
    
    Adrien Payen's avatar
    Adrien Payen a validé
            normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
    
            risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
            transition_matrices = [safe_dice, normal_dice, risky_dice]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            total_turns = []
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            for _ in range(n_iterations):
    
    Adrien Payen's avatar
    Adrien Payen a validé
                state_turns = np.zeros(len(layout) - 1)  # Numpy to store the turns by state
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                for state in range(len(layout) - 1):
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    k = state
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    turns = 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                    while k < len(layout) - 1:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        action = strategy[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        action_index = int(action) - 1
                        transition_matrix = transition_matrices[action_index]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        flattened_probs = transition_matrix[k]
                        flattened_probs /= np.sum(flattened_probs)
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        k = np.random.choice(len(layout), p=flattened_probs)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        if layout[k] == 3:
                            if action == 2:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                                turns += np.random.choice([1, 2], p=[0.5, 0.5])  # Numpy for randomisation
    
    Adrien Payen's avatar
    Adrien Payen a validé
                            elif action == 3:
                                turns += 2
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        else:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                            turns += 1
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    state_turns[state] = turns
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                total_turns.append(state_turns)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            mean_turns = np.mean(total_turns, axis=0)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return mean_turns
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def play_optimal_policy(self, n_iterations : int):
            """Play using the optimal policy for a number of iterations."""
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return self.simulate_rounds(self.optimal_policy, n_iterations)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def play_dice_strategy(self, dice_choice, n_iterations : int):
            """Play using a specific dice strategy for a number of iterations."""
    
    Adrien Payen's avatar
    Adrien Payen a validé
            strategy = {
                'SafeDice': self.safe_strategy,
                'NormalDice': self.normal_strategy,
                'RiskyDice': self.risky_strategy
            }.get(dice_choice, None)
    
            if strategy is None:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                raise ValueError("Invalid dice choice")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return self.simulate_rounds(strategy, n_iterations)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def play_random_strategy(self, n_iterations : int ):
            """Play using a random strategy for a number of iterations."""
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return self.simulate_rounds(self.random_strategy, n_iterations)
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def compare_empirical_vs_value_iteration(self, num_games : int):
            """Compare expected value iteration turns with empirical turns."""
    
    Adrien Payen's avatar
    Adrien Payen a validé
            value_iteration_turns = self.expec
    
    Adrien Payen's avatar
    Adrien Payen a validé
            empirical_turns = self.state_simulation(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            mean_turns_by_state = {
                'ValueIteration': value_iteration_turns.tolist(),
                'Empirical': empirical_turns.tolist()
            }
    
            return mean_turns_by_state
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        def compare_state_based_turns(self, num_games : int ):
             # Compare the expected turns from value iteration with empirical state-based turns
    
    Adrien Payen's avatar
    Adrien Payen a validé
            value_iteration = self.expec
    
    Adrien Payen's avatar
    Adrien Payen a validé
            empirical_turns = self.state_simulation(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return value_iteration, empirical_turns
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def compare_strategies(self, num_games : int):
            # Compare the costs of different strategies over a number of games
    
    Adrien Payen's avatar
    Adrien Payen a validé
            optimal_cost = self.simulate_rounds(self.optimal_policy, n_iterations=num_games)
            dice1_cost = self.simulate_rounds(self.safe_strategy, n_iterations=num_games)
            dice2_cost = self.simulate_rounds(self.normal_strategy, n_iterations=num_games)
            dice3_cost = self.simulate_rounds(self.risky_strategy, n_iterations=num_games)
            random_cost = self.simulate_rounds(self.random_strategy, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            return {
                'Optimal': optimal_cost,
                'SafeDice': dice1_cost,
                'NormalDice': dice2_cost,
                'RiskyDice': dice3_cost,
                'Random': random_cost
            }