Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
def markov_decision(layout: list, circle: bool):
Numberk = 15
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
jail = [i for i, x in enumerate(layout) if x == 3]
def compute_value(v, dice_matrix):
return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])
value = np.zeros(Numberk)
dice_for_states = np.zeros(Numberk - 1)
while True:
new_value = np.zeros(Numberk)
for k in range(Numberk - 1):
vi_safe = compute_value(value, safe_dice[k])
vi_normal = compute_value(value, normal_dice[k])
vi_risky = compute_value(value, risky_dice[k])
new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)
if np.allclose(new_value, value):
value = new_value
break
value = new_value
return value[:-1], dice_for_states
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markov_decision(layout, False))
print("\nStopping on the square to win")
print(markov_decision(layout, True))