Skip to content
Extraits de code Groupes Projets
markovDecision_testing.py 1,89 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import numpy as np
    from tmc import TransitionMatrixCalculator as tmc
    
    
    # testing our TransitionMatrix function based on random layout
    # [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
    def markovDecision(layout : list, circle : bool) :
    
        Numberk = 15 # Number of states k on the board
        tmc_instance = tmc()
        safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
        normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
        risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
    
        # Initialisation of the variables before the iteration
        ValueI = np.zeros(Numberk)  # Algorithm of Value iteration
        jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
        DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
        i = 0 # set the iteration of Value
    
        while True :
            ValueINew = np.zeros(Numberk)
            i += 1 # iter + 1
    
            for k in range(Numberk - 1) :
                vi_safe = np.sum(safe_dice[k] * ValueI)
                vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
                vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
                ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
            
                if ValueINew[k] == 1 + vi_safe :
                    DiceForStates[k] = 1
                elif ValueINew[k] == 1 + vi_normal :
                    DiceForStates[k] = 2
                else :
                    DiceForStates[k] = 3
    
            if np.allclose(ValueINew, ValueI) :
                ValueI = ValueINew
                break
    
            ValueI = ValueINew
    
        Expec = ValueI[:-1]
        return [Expec, DiceForStates]
    
    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
    print("\nWin as soon as land on or overstep the final square")
    print(markovDecision(layout, False))
    print("\nStopping on the square to win")
    print(markovDecision(layout, True))