Newer
Older
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
# Compute transition matrices using TransitionMatrixCalculator
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
# Solve Markov Decision Problem
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategies
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1] * 15
self.normal_strategy = [2] * 15
self.risky_strategy = [3] * 15
self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
action = strategy[state]
transition_matrix = transition_matrices[int(action) - 1]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1