Newer
Older
from tmc import TransitionMatrixCalculator as tmc
# Initialize the Markov Decision Process solver with layout and game mode (circle or not)
# Compute transition matrices for safe, normal, and risky scenarios
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
# Identify jail states in the layout
def _compute_vi_safe(self, k : int ):
# Compute the expected value using safe dice transition matrix for state k
return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
def _compute_vi_normal(self, k : int ):
# Compute the expected value using normal dice transition matrix for state k
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
def _compute_vi_risky(self, k : int ):
# Compute the expected value using risky dice transition matrix for state k
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
# Iteratively solve the Markov Decision Process until convergence
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
# Compute expected values for safe, normal, and risky decisions at state k
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
# Determine the minimum value among safe, normal, and risky decisions
min_value = min(vi_safe, vi_normal, vi_risky)
# Record the dice decision (safe=1, normal=2, risky=3) corresponding to the minimum value
if min_value == vi_safe:
ValueINew[k] = 1 + vi_safe
elif min_value == vi_normal:
ValueINew[k] = 1 + vi_normal
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
# Return the expected values and dice decisions for each state
# Solve the Markov Decision Problem for the given layout and game mode
solver = MarkovDecisionProcess(layout, circle)
return solver.solve()