Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout : list, circle : bool) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
# Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))