Skip to content
Extraits de code Groupes Projets
markovDecision.py 3,18 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import numpy as np
    
    from tmc import TransitionMatrixCalculator as tmc
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
    class MarkovDecisionProcess :
    
        def __init__(self, layout: list, circle: bool):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Initialize the Markov Decision Process solver with layout and game mode (circle or not)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.Numberk = 15
            self.tmc_instance = tmc()
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            # Compute transition matrices for safe, normal, and risky scenarios
    
            self.safe_dice = self.tmc_instance._compute_safe_matrix()
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)  
            self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)    
    
            # Identify jail states in the layout
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.jail = [i for i, x in enumerate(layout) if x == 3]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
            # Initialize value and dice decision arrays
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.ValueI = np.zeros(self.Numberk)
    
    Adrien Payen's avatar
    Adrien Payen a validé
            self.Dice = np.zeros(self.Numberk - 1)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def _compute_vi_safe(self, k : int ):
            # Compute the expected value using safe dice transition matrix for state k
    
            return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def _compute_vi_normal(self, k : int ):
            # Compute the expected value using normal dice transition matrix for state k
    
            vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return vi_normal
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
        def _compute_vi_risky(self, k : int ):
            # Compute the expected value using risky dice transition matrix for state k
    
            vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return vi_risky
    
        def solve(self):
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Iteratively solve the Markov Decision Process until convergence
    
    Adrien Payen's avatar
    Adrien Payen a validé
            i = 0
            while True:
                ValueINew = np.zeros(self.Numberk)
                i += 1
    
                for k in range(self.Numberk - 1):
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    # Compute expected values for safe, normal, and risky decisions at state k
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    vi_safe = self._compute_vi_safe(k)
                    vi_normal = self._compute_vi_normal(k)
                    vi_risky = self._compute_vi_risky(k)
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    # Determine the minimum value among safe, normal, and risky decisions
    
                    min_value = min(vi_safe, vi_normal, vi_risky)
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    # Record the dice decision (safe=1, normal=2, risky=3) corresponding to the minimum value
    
                    if min_value == vi_safe:
                        ValueINew[k] = 1 + vi_safe
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        self.Dice[k] = 1
    
                    elif min_value == vi_normal:
                        ValueINew[k] = 1 + vi_normal
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        self.Dice[k] = 2
    
    Adrien Payen's avatar
    Adrien Payen a validé
                    else:
    
                        ValueINew[k] = 1 + vi_risky
    
    Adrien Payen's avatar
    Adrien Payen a validé
                        self.Dice[k] = 3
    
    Adrien Payen's avatar
    Adrien Payen a validé
                # Check for convergence
    
    Adrien Payen's avatar
    Adrien Payen a validé
                if np.allclose(ValueINew, self.ValueI):
                    self.ValueI = ValueINew
                    break
    
                self.ValueI = ValueINew
    
    
    Adrien Payen's avatar
    Adrien Payen a validé
            # Return the expected values and dice decisions for each state
    
    Adrien Payen's avatar
    Adrien Payen a validé
            Expec = self.ValueI[:-1]
    
    Adrien Payen's avatar
    Adrien Payen a validé
            return [Expec, self.Dice]
    
    Adrien Payen's avatar
    Adrien Payen a validé
    
    def markovDecision(layout : list, circle : bool):
    
    Adrien Payen's avatar
    Adrien Payen a validé
        # Solve the Markov Decision Problem for the given layout and game mode
        solver = MarkovDecisionProcess(layout, circle)
        return solver.solve()