Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import random as rd
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class Simulate:
def __init__(self, layout, circle):
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice, self.normal_dice, self.risky_dice = self.tmc_instance.compute_transition_matrix(layout, circle)
self.transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
def simulate_game(self, strategy, n_iterations=10000):
number_turns = []
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
transition_matrix = self.transition_matrices[int(action) - 1]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += rd.choice([1, 2], p=[0.5, 0.5])
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, n_iterations=10000):
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(self.layout) - 1):
total_turns = 0
while state < len(self.layout) - 1:
print("Current state:", state)
print("Transition matrix:", transition_matrix[state])
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += rd.choice([1, 2], p=[0.5, 0.5])
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
return np.mean(number_mean, axis=0)