Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*15
self.normal_strategy = [2]*15
self.risky_strategy = [3]*15
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
transition_matrix = transition_matrices[int(action - 1)]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def play_optimal_strategy(self):
return turns
def play_dice_strategy(self):
return turns
def play_random_strategy(self):
return turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False)
circle = False # Example circle value
# Create an instance of validation
validator = validation(layout, circle)
# Use the methods
validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")