Newer
Older
from tmc import TransitionMatrixCalculator as tmc
def __init__(self, layout : list, circle : bool):
# Initialize with layout and circle configuration
# Initialize TransitionMatrixCalculator instance for transition matrix computation
# Compute transition matrices for safe, normal, and risky dice
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
# Use MarkovDecisionSolver to find optimal policy and expected costs
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
self.safe_strategy = [1] * len(layout)
self.normal_strategy = [2] * len(layout)
self.risky_strategy = [3] * len(layout)
self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
'SafeDice': [0] * len(layout),
'NormalDice': [0] * len(layout),
'RiskyDice': [0] * len(layout)
# Assign costs based on dice type to the respective lists in the dictionary
for i, die_type in enumerate(self.layout) :
self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
def simulate_game(self, strategy: list, n_iterations: int):
"""Simulate the game using a given strategy over multiple iterations."""
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
k = np.random.choice(len(self.layout), p=flattened_probs)
if self.layout[k] == 3:
if action == 2:
turns += np.random.choice([1, 2], p=[0.5, 0.5])
elif action == 3:
turns += 2
def simulate_state(self, strategy: list, layout: list, circle: bool, n_iterations: int):
"""Simulate game states using a given strategy."""
safe_dice = self.tmc_instance._compute_safe_matrix()
normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
transition_matrices = [safe_dice, normal_dice, risky_dice]
state_turns = np.zeros(len(layout) - 1) # Utiliser un tableau numpy pour stocker les tours par état
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
if layout[k] == 3:
if action == 2:
turns += np.random.choice([1, 2], p=[0.5, 0.5]) # Utiliser numpy pour la randomisation
elif action == 3:
turns += 2
def play_optimal_policy(self, n_iterations : int):
"""Play using the optimal policy for a number of iterations."""
return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations : int):
"""Play using a specific dice strategy for a number of iterations."""
strategy = {
'SafeDice': self.safe_strategy,
'NormalDice': self.normal_strategy,
'RiskyDice': self.risky_strategy
}.get(dice_choice, None)
if strategy is None:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations : int ):
"""Play using a random strategy for a number of iterations."""
return self.simulate_game(self.random_strategy, n_iterations)
def compare_empirical_vs_value_iteration(self, num_games : int):
"""Compare expected value iteration turns with empirical turns."""
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
mean_turns_by_state = {
'ValueIteration': value_iteration_turns.tolist(),
'Empirical': empirical_turns.tolist()
}
return mean_turns_by_state
def empirical_cost_of_square(self, strategy: list, n_iterations: int):
"""Calculate the empirical cost of a square for a given strategy."""
total_square_costs = []
for _ in range(n_iterations):
game_cost = self.simulate_game(strategy, 1)
square_cost = game_cost ** 2
total_square_costs.append(square_cost)
empirical_cost = np.mean(total_square_costs)
return empirical_cost
def compare_state_based_turns(self, num_games : int ):
# Compare the expected turns from value iteration with empirical state-based turns
value_iteration = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
def compare_strategies(self, num_games : int):
# Compare the costs of different strategies over a number of games
optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}