markovDecision_testing.py

import numpy as np
from tmc import TransitionMatrixCalculator as tmc


# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout : list, circle : bool) :

    Numberk = 15 # Number of states k on the board
    tmc_instance = tmc()
    safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
    normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
    risky_dice = tmc_instance._compute_risky_matrix(layout, circle)

    # Initialisation of the variables before the iteration
    ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
    jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
    DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
    i = 0 # set the iteration of Value

    while True :
        ValueINew = np.zeros(Numberk)
        i += 1 # iter + 1

        for k in range(Numberk - 1) :
            vi_safe = np.sum(safe_dice[k] * ValueI)
            vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
            vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
            ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
        
            if ValueINew[k] == 1 + vi_safe :
                DiceForStates[k] = 1
            elif ValueINew[k] == 1 + vi_normal :
                DiceForStates[k] = 2
            else :
                DiceForStates[k] = 3

        if np.allclose(ValueINew, ValueI) :
            ValueI = ValueINew
            break

        ValueI = ValueINew

    Expec = ValueI[:-1]
    return [Expec, DiceForStates]

layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))