Skip to content
Extraits de code Groupes Projets
mdppp.py 2,45 ko
Newer Older
  • Learn to ignore specific revisions
  • import numpy as np
    from ancien.tmc import TransitionMatrixCalculator as tmc
    
    class MarkovDecisionSolver:
        def __init__(self, layout : list, circle : bool):
            self.Numberk = 15
            self.tmc_instance = tmc()
            self.safe_dice = self.tmc_instance._compute_safe_matrix()
            self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
            self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
            self.jail = [i for i, x in enumerate(layout) if x == 3]
            self.ValueI = np.zeros(self.Numberk)
            self.DiceForStates = np.zeros(self.Numberk - 1)
    
        def _compute_vi_safe(self, k):
            return np.dot(self.safe_dice[k], self.ValueI)
    
        def _compute_vi_normal(self, k):
            vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
            return vi_normal
    
        def _compute_vi_risky(self, k):
            vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
            return vi_risky
    
        def solve(self):
            i = 0
            while True:
                ValueINew = np.zeros(self.Numberk)
                i += 1
    
                for k in range(self.Numberk - 1):
                    vi_safe = self._compute_vi_safe(k)
                    vi_normal = self._compute_vi_normal(k)
                    vi_risky = self._compute_vi_risky(k)
    
                    ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
    
                    if ValueINew[k] == 1 + vi_safe:
                        self.DiceForStates[k] = 1
                    elif ValueINew[k] == 1 + vi_normal:
                        self.DiceForStates[k] = 2
                    else:
                        self.DiceForStates[k] = 3
    
                if np.allclose(ValueINew, self.ValueI):
                    self.ValueI = ValueINew
                    break
    
                self.ValueI = ValueINew
    
            Expec = self.ValueI[:-1]
            return [Expec, self.DiceForStates]
    
    def markovDecision(layout : list, circle : bool):
        solver = MarkovDecisionSolver(layout, circle)
        return solver.solve()
    
    
    # Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
    layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
    
    
    # Résolution du problème avec différents modes de jeu
    result_false = markovDecision(layout, circle=False)
    print("\nWin as soon as land on or overstep the final square")
    print(result_false)
    
    result_true = markovDecision(layout, circle=True)
    print("\nStopping on the square to win")
    print(result_true)