Skip to content
Extraits de code Groupes Projets
validation.py 9,78 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import random as rd
    import numpy as np
    
    from tmc import TransitionMatrixCalculator as tmc
    from markovDecision import MarkovDecisionSolver as mD
    
    Adrien Payen's avatar
    Adrien Payen a validé
    class Validation:
        def __init__(self, layout, circle=False):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.layout = layout
            self.circle = circle
            self.tmc_instance = tmc()
    
            self.safe_dice = self.tmc_instance._compute_safe_matrix()
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
    
            self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            solver = mD(self.layout, self.circle)
            self.expec, self.optimal_policy = solver.solve()
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.safe_strategy = [1] * len(layout)
            self.normal_strategy = [2] * len(layout)
            self.risky_strategy = [3] * len(layout)
            self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.costs_by_dice_type = {
    
    Adrien Payen's avatar
    Adrien Payen a validé
                'SafeDice': [0] * len(layout),
                'NormalDice': [0] * len(layout),
                'RiskyDice': [0] * len(layout)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            }
    
    Adrien Payen's avatar
    Adrien Payen a validé
            
            for i, die_type in enumerate(self.layout):
                self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
                self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
                self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        def simulate_game(self, strategy, n_iterations=10000):
            transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
            number_turns = []
    
            for _ in range(n_iterations):
                total_turns = 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
                k = 0  # initial state
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                while k < len(self.layout) - 1:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    action = strategy[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    action_index = int(action) - 1
                    transition_matrix = transition_matrices[action_index]
    
                    flattened_probs = transition_matrix[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    flattened_probs /= np.sum(flattened_probs)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                    k = np.random.choice(len(self.layout), p=flattened_probs)
    
                    if self.layout[k] == 3 and action == 2:
                        total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
                    elif self.layout[k] == 3 and action == 3:
                        total_turns += 2
                    else:
                        total_turns += 1
    
                number_turns.append(total_turns)
    
            return np.mean(number_turns)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def simulate_state(self, strategy, layout, circle, n_iterations=10000):
    
            safe_dice = self.tmc_instance._compute_safe_matrix()
    
    Adrien Payen's avatar
    Adrien Payen a validé
            normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
    
            risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
            transition_matrices = [safe_dice, normal_dice, risky_dice]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            number_turns = []
            number_mean = []
    
            for _ in range(n_iterations):
                number_turns = []
    
                for state in range(len(layout) - 1):
                    total_turns = 0
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    k = state
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
                    while k < len(layout) - 1:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        action = strategy[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        action_index = int(action) - 1
                        transition_matrix = transition_matrices[action_index]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        flattened_probs = transition_matrix[k]
                        flattened_probs /= np.sum(flattened_probs)
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        k = np.random.choice(len(layout), p=flattened_probs)
    
                        if layout[k] == 3 and action == 2:
                            total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
                        elif layout[k] == 3 and action == 3:
                            total_turns += 2
                        else:
                            total_turns += 1
    
                    number_turns.append(total_turns)
    
                number_mean.append(number_turns)
    
            # calculate the average number of turns for each state
            mean_turns = np.mean(number_mean, axis=0)
            return mean_turns
    
    
        def play_optimal_policy(self, n_iterations=10000):
            return self.simulate_game(self.optimal_policy, n_iterations)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
        def play_dice_strategy(self, dice_choice, n_iterations=10000):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            strategy = {
                'SafeDice': self.safe_strategy,
                'NormalDice': self.normal_strategy,
                'RiskyDice': self.risky_strategy
            }.get(dice_choice, None)
    
            if strategy is None:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                raise ValueError("Invalid dice choice")
    
            return self.simulate_game(strategy, n_iterations)
    
        def play_random_strategy(self, n_iterations=10000):
            return self.simulate_game(self.random_strategy, n_iterations)
    
        def play_empirical_strategy(self):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            k = 0
    
            total_turns = 0
    
            while k < len(self.layout) - 1:
    
    Adrien Payen's avatar
    Adrien Payen a validé
                action = self.optimal_policy[k]
    
                action_index = int(action) - 1
    
    Adrien Payen's avatar
    Adrien Payen a validé
                transition_matrix = self.normal_dice
    
    
                flattened_probs = transition_matrix[k]
    
    Adrien Payen's avatar
    Adrien Payen a validé
                flattened_probs /= np.sum(flattened_probs)
    
    
                k = np.random.choice(len(self.layout), p=flattened_probs)
    
                if self.layout[k] == 3 and action == 2:
                    total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
                elif self.layout[k] == 3 and action == 3:
                    total_turns += 2
                else:
                    total_turns += 1
    
            return total_turns
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def compare_empirical_vs_value_iteration(self, num_games=10000):
            value_iteration_turns = self.expec
    
            empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            mean_turns_by_state = {
                'ValueIteration': value_iteration_turns.tolist(),
                'Empirical': empirical_turns.tolist()
            }
    
            return mean_turns_by_state
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def compare_state_based_turns(self, num_games=10000):
            value_iteration = self.expec
            empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return value_iteration, empirical_turns
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def compare_strategies(self, num_games=10000):
    
            optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
            dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
            dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
            random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
    
            return {
                'Optimal': optimal_cost,
                'SafeDice': dice1_cost,
                'NormalDice': dice2_cost,
                'RiskyDice': dice3_cost,
                'Random': random_cost
            }
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Exemple d'utilisation
    
    Adrien Payen's avatar
    Adrien Payen a validé
    layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
    circle = False
    
    Adrien Payen's avatar
    Adrien Payen a validé
    validation_instance = Validation(layout, circle)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Comparaison entre la stratégie empirique et la value iteration
    turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Affichage des moyennes de tours pour chaque état
    
    num_states = len(layout)
    for state in range(num_states - 1):
        print(f"État {state}:")
        print(f"   ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
        print(f"   Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Exécution de la stratégie empirique une fois
    
    empirical_strategy_result = validation_instance.play_empirical_strategy()
    print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux
    comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
    
    print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
    print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Coûts des différentes stratégies
    optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Optimal Strategy Cost:", optimal_cost)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000)
    
    print("Safe Dice Strategy Cost:", dice1_cost)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Normal Dice Strategy Cost:", dice2_cost)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000)
    
    print("Risky Dice Strategy Cost:", dice3_cost)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    random_cost = validation_instance.play_random_strategy(n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Random Strategy Cost:", random_cost)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Comparaison entre les stratégies
    strategy_comparison = validation_instance.compare_strategies(num_games=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Strategy Comparison Results:", strategy_comparison)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    # Calcul des tours moyens pour différentes stratégies
    
    optimal_policy = validation_instance.optimal_policy
    
    Adrien Payen's avatar
    Adrien Payen a validé
    mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
    
    safe_dice_strategy = validation_instance.safe_strategy
    
    Adrien Payen's avatar
    Adrien Payen a validé
    mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
    
    normal_dice_strategy = validation_instance.normal_strategy
    
    Adrien Payen's avatar
    Adrien Payen a validé
    mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
    
    risky_dice_strategy = validation_instance.risky_strategy
    
    Adrien Payen's avatar
    Adrien Payen a validé
    mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
    
    random_dice_strategy = validation_instance.random_strategy
    
    Adrien Payen's avatar
    Adrien Payen a validé
    mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)