Newer
Older
from tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout: list, circle: bool):
self.nSquares = 15
self.precision = 1e-9
self.layout = layout
self.circle = circle
self.matrix_safe = self.tmc_instance._compute_safe_matrix()
self.matrix_normal, self.jail_n = self.tmc_instance._compute_normal_matrix(layout, circle)
self.matrix_risky, self.jail_r = self.tmc_instance._compute_risky_matrix(layout, circle)
self.Dice = np.zeros(self.nSquares, dtype=int)
ValueINew = np.array([8.5, 7.5, 6.5, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 0])
i = 0
while i < 1000: # Limiter le nombre d'itérations pour éviter une boucle infinie
i += 1
# Copiez la valeur actuelle dans ValueI
np.copyto(ValueI, ValueINew)
# Mettez à jour les valeurs de ValueINew pour chaque état
np.dot(self.matrix_safe[k], ValueI),
np.dot(self.matrix_normal[k], ValueI) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueI) + np.sum(self.jail_r[k])
ValueINew[self.nSquares - 1] = min(
np.dot(self.matrix_safe[self.nSquares - 1], ValueI),
np.dot(self.matrix_normal[self.nSquares - 1], ValueI),
np.dot(self.matrix_risky[self.nSquares - 1], ValueI)
)
# Calculer les actions optimales (indice de l'action + 1)
np.dot(self.matrix_safe[k], ValueINew),
np.dot(self.matrix_normal[k], ValueINew) + np.sum(self.jail_n[k]),
np.dot(self.matrix_risky[k], ValueINew) + np.sum(self.jail_r[k]),
]) + 1
# Vérifiez la convergence en utilisant une petite tolérance
if np.sum(np.abs(ValueINew - ValueI)) < self.precision:
break
# Retourne les valeurs finales de ValueINew et les actions optimales (Dice)
return ValueINew, self.Dice
def markovDecision(layout: list, circle: bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nStopping on the square to win")
print("Expected costs for each square:")
print(result_false[0])
print("Dice choices for each square:")
print(result_false[1])
print("\nWin as soon as land on or overstep the final square")
print("Expected costs for each square:")
print(result_true[0])
print("Dice choices for each square:")
print(result_true[1])