Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
from ancien.tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
self.ValueI = np.zeros(self.Numberk)
self.DiceForStates = np.zeros(self.Numberk - 1)
def _compute_vi_safe(self, k):
return np.dot(self.safe_dice[k], self.ValueI)
def _compute_vi_normal(self, k):
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
return vi_normal
def _compute_vi_risky(self, k):
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
return vi_risky
def solve(self):
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
if ValueINew[k] == 1 + vi_safe:
self.DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal:
self.DiceForStates[k] = 2
else:
self.DiceForStates[k] = 3
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
Expec = self.ValueI[:-1]
return [Expec, self.DiceForStates]
def markovDecision(layout : list, circle : bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square")
print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win")
print(result_true)