Skip to content
Extraits de code Groupes Projets
validation.py 4,85 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import random as rd
    import numpy as np
    import matplotlib.pyplot as plt
    from tmc import TransitionMatrixCalculator as tmc
    from markovDecision import MarkovDecisionSolver as mD
    
    class validation:
        def __init__(self, layout, circle=False):
    
            # import from other .PY
            self.layout = layout
            self.circle = circle
            self.tmc_instance = tmc()
            self.safe_dice = self.tmc_instance._compute_safe_matrix()
            self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
            self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
            solver = mD(self.layout, self.circle)
            self.expec, self.optimal_policy = solver.solve()
    
            # Define all the strategy
            self.optimal_strategy = self.optimal_policy
            self.safe_strategy = [1]*15
            self.normal_strategy = [2]*15
            self.risky_strategy = [3]*15
            self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
    
    
        def simulate_game(self, strategy, n_iterations=10000):
            transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
            number_turns = []
    
            for _ in range(n_iterations):
                total_turns = 0
                k = 0  # état initial
    
                while k < len(self.layout) - 1:
                    action = strategy[k]  # action selon la stratégie
    
                    # Convertir action en entier pour accéder à l'indice correct dans transition_matrices
                    action_index = int(action) - 1
                    transition_matrix = transition_matrices[action_index]
    
                    #print(f"Current state (k): {k}, Action chosen: {action}")
                    #print(f"Transition matrix: {transition_matrix}")
    
                    # Aplatir la matrice de transition en une distribution de probabilité 1D
                    flattened_probs = transition_matrix[k]
                    flattened_probs /= np.sum(flattened_probs)  # Normalisation des probabilités
    
                    # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
                    k = np.random.choice(len(self.layout), p=flattened_probs)
    
                    # Mise à jour du nombre de tours en fonction de l'état actuel
                    if self.layout[k] == 3 and action == 2:
                        total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
                    elif self.layout[k] == 3 and action == 3:
                        total_turns += 2
                    else:
                        total_turns += 1
    
                number_turns.append(total_turns)
    
            return np.mean(number_turns)
    
    
        def play_optimal_strategy(self, n_iterations=10000):
            return self.simulate_game(self.optimal_policy, n_iterations)
    
    
        def play_dice_strategy(self, dice_choice, n_iterations=10000):
            if dice_choice == 'SafeDice':
                strategy = self.safe_strategy
            elif dice_choice == 'NormalDice':
                strategy = self.normal_strategy
            elif dice_choice == 'RiskyDice':
                strategy = self.risky_strategy
            else:
                raise ValueError("Invalid dice choice")
    
            return self.simulate_game(strategy, n_iterations)
    
        def play_random_strategy(self, n_iterations=10000):
            return self.simulate_game(self.random_strategy, n_iterations)
    
        def compare_strategies(self, num_games=1000):
            optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
            dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
            dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
            dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
            random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
    
            return {
                'Optimal': optimal_cost,
                'SafeDice': dice1_cost,
                'NormalDice': dice2_cost,
                'RiskyDice': dice3_cost,
                'Random': random_cost
            }
        
    
        
    
    # Utilisation d'exemple
    layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
    validation = validation(layout, circle=False)
    
    circle = False  # Example circle value
    """
    # Create an instance of validation
    validator = validation(layout, circle)
    
    # Use the methods
    validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
    
    
    results = validation.compare_strategies(num_games=10000)
    print("Coûts moyens :")
    for strategy, cost in results.items():
        print(f"{strategy}: {cost}")"""
    
    optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
    print("Optimal Strategy Cost:", optimal_cost)
    
    dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
    print("Normal Dice Strategy Cost:", dice2_cost)
    
    random_cost = validation.play_random_strategy(n_iterations=10000)
    print("Random Strategy Cost:", random_cost)
    
    strategy_comparison = validation.compare_strategies(num_games=10000)
    print("Strategy Comparison Results:", strategy_comparison)