Newer
Older
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver
nSquares = 15
nSimul = 10000
def playOneTurn(diceChoice, curPos, layout, circle, prison):
if curPos == nSquares - 1:
return nSquares - 1, False
if prison:
return curPos, False
listDiceResults = [i for i in range(diceChoice + 1)]
result = random.choice(listDiceResults)
if curPos == 2 and result != 0:
slowLane = random.choice([0, 1])
if slowLane:
newPos = curPos + result
else:
newPos = curPos + result + 7
elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)):
newPos = curPos + result + 4
else:
newPos = curPos + result
if newPos > nSquares - 1:
if circle:
newPos -= nSquares
else:
return nSquares - 1, True
newSquare = layout[newPos]
if diceChoice == 1:
return newPos, False
elif diceChoice == 2:
newSquare = random.choice([0, newSquare])
if newSquare == 0:
return newPos, False # nothing happens
elif newSquare == 1:
return 0, False # back to square one
elif newSquare == 2:
if newPos - 3 < 0:
return 0, False # back to square one
return newPos - 3, False # back 3 squares
elif newSquare == 3:
return newPos, True # prison
elif newSquare == 4:
newSquare = random.choice([1, 2, 3])
if newSquare == 1:
return 0, False # back to square one
elif newSquare == 2:
if newPos - 3 < 0:
return 0, False # back to square one
return newPos - 3, False # back 3 squares
elif newSquare == 3:
return newPos, True # prison
def playOneGame(layout, circle, policy, start=0):
nTurns = 0
curPos = start
prison = False
if circle:
while curPos != nSquares - 1:
newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
if newPos > nSquares - 1:
curPos = nSquares - newPos
curPos = newPos
nTurns += 1
else:
while curPos < nSquares - 1:
newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
curPos = newPos
nTurns += 1
return nTurns
def empiric_cost_of_square(layout, circle, policy):
expected_costs = np.zeros(nSquares)
for start_square in range(nSquares):
total_turns = 0
for _ in range(nSimul):
total_turns += playOneGame(layout, circle, policy, start=start_square)
expected_costs[start_square] = total_turns / nSimul
return expected_costs
def empirical_results(layout, circle, policy):
avgnTurnsPlayed = 0
for _ in range(nSimul):
nTurns = playOneGame(layout, circle, policy)
avgnTurnsPlayed += nTurns
return avgnTurnsPlayed / nSimul
def comparison_theorical_empirical(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
expec, optimal_policy = solver.solve()
actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int))
# Plotting both arrays on the same plot
squares = np.arange(len(expec))
plt.plot(squares, expec, label="Theoretical cost")
plt.plot(squares, actual, label="Empirical cost")
plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show()
def comparison_of_policies_total(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empirical_results(layout, circle, policy) for policy in policies]
names = ["optimal", "safe", "normal", "risky", "random"]
# Creating the bar plot
plt.bar(names, avgnTurns)
# Adding labels and title
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns by policy")
# Displaying the plot
plt.show()
def comparison_of_policies_squares(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies]
# Generating x-axis values (squares)
squares = np.arange(len(avgnTurns[0]))
# Plotting both arrays on the same plot
plt.plot(squares, avgnTurns[0], label="Optimal")
plt.plot(squares, avgnTurns[1], label="Safe")
plt.plot(squares, avgnTurns[2], label="Normal")
plt.plot(squares, avgnTurns[3], label="Risky")
plt.plot(squares, avgnTurns[4], label="Random")
plt.xticks(np.arange(0, len(avgnTurns[0]), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Expected cost for different policies")
plt.show()
def make_plots():
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
comparison_theorical_empirical(layout, circle)
# comparison_of_policies_total(layout, circle)
# comparison_of_policies_squares(layout, circle)
make_plots()