md_test.py

import numpy as np
from tmc import TransitionMatrixCalculator as tmc

def markov_decision(layout: list, circle: bool):
    Numberk = 15
    tmc_instance = tmc()
    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
    
    jail = [i for i, x in enumerate(layout) if x == 3]
    
    def compute_value(v, dice_matrix):
        return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])


    value = np.zeros(Numberk)
    dice_for_states = np.zeros(Numberk - 1)

    while True:
        new_value = np.zeros(Numberk)

        for k in range(Numberk - 1):
            vi_safe = compute_value(value, safe_dice[k])
            vi_normal = compute_value(value, normal_dice[k])
            vi_risky = compute_value(value, risky_dice[k])

            new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
            dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)

        if np.allclose(new_value, value):
            value = new_value
            break

        value = new_value

    return value[:-1], dice_for_states

layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markov_decision(layout, False))
print("\nStopping on the square to win")
print(markov_decision(layout, True))