validation.py

import random
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator
from markovDecision import MarkovDecisionSolver as mD

class Validation:
    def __init__(self, layout, circle=False):
        self.layout = layout
        self.circle = circle
        self.tmc_instance = TransitionMatrixCalculator()

        # Compute optimal value iteration results
        solver = mD(self.layout, self.circle)
        self.optimal_values, self.optimal_dice = solver.solve()

    def simulate_game(self, strategy='optimal', num_games=1000):
        total_turns = 0

        for _ in range(num_games):
            if strategy == 'Optimal':
                turns = self.play_optimal_strategy()
            elif strategy == 'SafeDice':
                turns = self.play_dice_strategy(1)
            elif strategy == 'NormalDice':
                turns = self.play_dice_strategy(2)
            elif strategy == 'RiskyDice':
                turns = self.play_dice_strategy(3)
            elif strategy == 'Random':
                turns = self.play_random_strategy()

            total_turns += turns

        average_turns = total_turns / num_games
        return average_turns

    def play_optimal_strategy(self):
        current_state = 0  # Start from the initial state
        turns = 0

        while current_state < len(self.layout) - 1:
            optimal_action = int(self.optimal_dice[current_state])  # Get the optimal action for the current state
            current_state += optimal_action  # Move to the next state based on the optimal action
            turns += 1

        return turns

    def play_dice_strategy(self, dice):
        current_state = 0  # Start from the initial state
        turns = 0

        while current_state < len(self.layout) - 1:
            # Always use the specified dice type (1, 2, or 3)
            current_state += dice
            turns += 1

        return turns

    def play_random_strategy(self):
        current_state = 0  # Start from the initial state
        turns = 0

        while current_state < len(self.layout) - 1:
            # Choose a random dice roll between 1 and 3
            dice_roll = np.random.randint(1, 4)
            current_state += dice_roll
            turns += 1

        return turns

    def compare_strategies(self, num_games=1000):
        optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
        dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
        dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
        dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
        random_cost = self.simulate_game(strategy='Random', num_games=num_games)

        return {
            'Optimal': optimal_cost,
            'SafeDice': dice1_cost,
            'NormalDice': dice2_cost,
            'RiskyDice': dice3_cost,
            'Random': random_cost
        }

    def play_one_turn(self, dice_choice, cur_pos, prison):
        if cur_pos == len(self.layout) - 1:
            return len(self.layout) - 1, False
    
        if prison:
            return cur_pos, False
    
        # Convert dice_choice to integer to avoid TypeError
        dice_choice = int(dice_choice)
    
        list_dice_results = [i for i in range(dice_choice + 1)]
        result = random.choice(list_dice_results)
    
        if cur_pos == 2 and result != 0:
            slow_lane = random.choice([0, 1])
            if slow_lane:
                new_pos = cur_pos + result
            else:
                new_pos = cur_pos + result + 7
    
        elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
            new_pos = cur_pos + result + 4
    
        else:
            new_pos = cur_pos + result
    
        if new_pos > len(self.layout) - 1:
            if self.circle:
                new_pos -= len(self.layout)
            else:
                return len(self.layout) - 1, True
    
        new_square = self.layout[new_pos]
    
        if dice_choice == 1:
            return new_pos, False
    
        elif dice_choice == 2:
            new_square = random.choice([0, new_square])
    
        if new_square == 0:
            return new_pos, False  # nothing happens
        elif new_square == 1:
            return 0, False  # back to square one
        elif new_square == 2:
            if new_pos - 3 < 0:
                return 0, False  # back to square one
            return new_pos - 3, False  # back 3 squares
        elif new_square == 3:
            return new_pos, True  # prison
    

    def play_one_game(self, start=0):
        n_turns = 0
        cur_pos = start
        prison = False

        if self.circle:
            while cur_pos != len(self.layout) - 1:
                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
                if new_pos > len(self.layout) - 1:
                    cur_pos = len(self.layout) - new_pos
                cur_pos = new_pos
                n_turns += 1
        else:
            while cur_pos < len(self.layout) - 1:
                new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
                cur_pos = new_pos
                n_turns += 1

        return n_turns

    def empirical_results(self):
        total_turns_played = 0
        for _ in range(10000):
            n_turns = self.play_one_game()
            total_turns_played += n_turns

        return total_turns_played / 10000
    

# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = Validation(layout, circle=False)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
    print(f"{strategy}: {cost}")