From 04f5b4f18b1e9ea7b8b7855e6ad9cce62f3fd680 Mon Sep 17 00:00:00 2001 From: Adrienucl <adrien.payen@student.uclouvain.be> Date: Sat, 27 Apr 2024 11:37:23 +0200 Subject: [PATCH] update files --- markovDecision.py | 105 ++++++++++++++++----------- plot.py | 2 +- test_files/markovDecision_testing.py | 51 +++++++++++++ validation_ex.py | 2 +- validation_test.py | 2 +- 5 files changed, 116 insertions(+), 46 deletions(-) create mode 100644 test_files/markovDecision_testing.py diff --git a/markovDecision.py b/markovDecision.py index 39e9e26..1383600 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -1,51 +1,70 @@ import numpy as np from tmc import TransitionMatrixCalculator as tmc +class MarkovDecisionSolver: + def __init__(self, layout : list, circle : bool): + self.Numberk = 15 + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle) + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + self.jail = [i for i, x in enumerate(layout) if x == 3] + self.ValueI = np.zeros(self.Numberk) + self.DiceForStates = np.zeros(self.Numberk - 1) -# testing our TransitionMatrix function based on random layout -# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -def markovDecision(layout : list, circle : bool) : - - Numberk = 15 # Number of states k on the board - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - - # Initialisation of the variables before the iteration - ValueI = np.zeros(Numberk) # Algorithm of Value iteration - jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board - DiceForStates = np.zeros(Numberk - 1) # Set the each states as O - i = 0 # set the iteration of Value - - while True : - ValueINew = np.zeros(Numberk) - i += 1 # iter + 1 - - for k in range(Numberk - 1) : - vi_safe = np.sum(safe_dice[k] * ValueI) - vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail]) - vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap - ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky) - - if ValueINew[k] == 1 + vi_safe : - DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal : - DiceForStates[k] = 2 - else : - DiceForStates[k] = 3 - - if np.allclose(ValueINew, ValueI) : - ValueI = ValueINew - break - - ValueI = ValueINew - - Expec = ValueI[:-1] - return [Expec, DiceForStates] + def _compute_vi_safe(self, k): + return np.dot(self.safe_dice[k], self.ValueI) + def _compute_vi_normal(self, k): + vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail]) + return vi_normal + + def _compute_vi_risky(self, k): + vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) + return vi_risky + + def solve(self): + i = 0 + while True: + ValueINew = np.zeros(self.Numberk) + i += 1 + + for k in range(self.Numberk - 1): + vi_safe = self._compute_vi_safe(k) + vi_normal = self._compute_vi_normal(k) + vi_risky = self._compute_vi_risky(k) + + ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky) + + if ValueINew[k] == 1 + vi_safe: + self.DiceForStates[k] = 1 + elif ValueINew[k] == 1 + vi_normal: + self.DiceForStates[k] = 2 + else: + self.DiceForStates[k] = 3 + + if np.allclose(ValueINew, self.ValueI): + self.ValueI = ValueINew + break + + self.ValueI = ValueINew + + Expec = self.ValueI[:-1] + return [Expec, self.DiceForStates] + +def markovDecision(layout : list, circle : bool): + solver = MarkovDecisionSolver(layout, circle) + return solver.solve() + + +# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + +# Résolution du problème avec différents modes de jeu +result_false = markovDecision(layout, circle=False) print("\nWin as soon as land on or overstep the final square") -print(markovDecision(layout, False)) +print(result_false) + +result_true = markovDecision(layout, circle=True) print("\nStopping on the square to win") -print(markovDecision(layout, True)) +print(result_true) diff --git a/plot.py b/plot.py index c08062b..68b62cd 100644 --- a/plot.py +++ b/plot.py @@ -2,7 +2,7 @@ import numpy as np import random as rd import matplotlib.pyplot as plt from tmc import TransitionMatrixCalculator as tmc -from markovDecision import markovDecision as mD +from test_files.markovDecision_testing import markovDecision as mD from validation import Validation def plot_results(validation_instance): diff --git a/test_files/markovDecision_testing.py b/test_files/markovDecision_testing.py new file mode 100644 index 0000000..39e9e26 --- /dev/null +++ b/test_files/markovDecision_testing.py @@ -0,0 +1,51 @@ +import numpy as np +from tmc import TransitionMatrixCalculator as tmc + + +# testing our TransitionMatrix function based on random layout +# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +def markovDecision(layout : list, circle : bool) : + + Numberk = 15 # Number of states k on the board + tmc_instance = tmc() + safe_dice = tmc_instance._compute_safe_matrix(layout, circle) + normal_dice = tmc_instance._compute_normal_matrix(layout, circle) + risky_dice = tmc_instance._compute_risky_matrix(layout, circle) + + # Initialisation of the variables before the iteration + ValueI = np.zeros(Numberk) # Algorithm of Value iteration + jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board + DiceForStates = np.zeros(Numberk - 1) # Set the each states as O + i = 0 # set the iteration of Value + + while True : + ValueINew = np.zeros(Numberk) + i += 1 # iter + 1 + + for k in range(Numberk - 1) : + vi_safe = np.sum(safe_dice[k] * ValueI) + vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail]) + vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap + ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky) + + if ValueINew[k] == 1 + vi_safe : + DiceForStates[k] = 1 + elif ValueINew[k] == 1 + vi_normal : + DiceForStates[k] = 2 + else : + DiceForStates[k] = 3 + + if np.allclose(ValueINew, ValueI) : + ValueI = ValueINew + break + + ValueI = ValueINew + + Expec = ValueI[:-1] + return [Expec, DiceForStates] + +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +print("\nWin as soon as land on or overstep the final square") +print(markovDecision(layout, False)) +print("\nStopping on the square to win") +print(markovDecision(layout, True)) diff --git a/validation_ex.py b/validation_ex.py index 4ea062a..151431c 100644 --- a/validation_ex.py +++ b/validation_ex.py @@ -2,7 +2,7 @@ import numpy as np from tmc import TransitionMatrixCalculator import random import matplotlib.pyplot as plt -from markovDecision import markovDecision +from markovDecision import MarkovDecisionSolver as mD class Validation: def __init__(self, layout, circle=False): diff --git a/validation_test.py b/validation_test.py index 0c8d7d3..aaba7d2 100644 --- a/validation_test.py +++ b/validation_test.py @@ -2,7 +2,7 @@ import random as rd import numpy as np import matplotlib.pyplot as plt from tmc import TransitionMatrixCalculator as tmc -from markovDecision import markovDecision as mD +from markovDecision import MarkovDecisionSolver as mD class EmpiricalComparision : def __init__(self) : -- GitLab