Skip to content
Extraits de code Groupes Projets
Valider 04f5b4f1 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update files

parent dc96f8ad
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import numpy as np import numpy as np
from tmc import TransitionMatrixCalculator as tmc from tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
self.ValueI = np.zeros(self.Numberk)
self.DiceForStates = np.zeros(self.Numberk - 1)
# testing our TransitionMatrix function based on random layout def _compute_vi_safe(self, k):
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] return np.dot(self.safe_dice[k], self.ValueI)
def markovDecision(layout : list, circle : bool) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
# Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
def _compute_vi_normal(self, k):
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail])
return vi_normal
def _compute_vi_risky(self, k):
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
return vi_risky
def solve(self):
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
if ValueINew[k] == 1 + vi_safe:
self.DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal:
self.DiceForStates[k] = 2
else:
self.DiceForStates[k] = 3
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
Expec = self.ValueI[:-1]
return [Expec, self.DiceForStates]
def markovDecision(layout : list, circle : bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square") print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False)) print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win") print("\nStopping on the square to win")
print(markovDecision(layout, True)) print(result_true)
...@@ -2,7 +2,7 @@ import numpy as np ...@@ -2,7 +2,7 @@ import numpy as np
import random as rd import random as rd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc from tmc import TransitionMatrixCalculator as tmc
from markovDecision import markovDecision as mD from test_files.markovDecision_testing import markovDecision as mD
from validation import Validation from validation import Validation
def plot_results(validation_instance): def plot_results(validation_instance):
......
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout : list, circle : bool) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
# Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))
...@@ -2,7 +2,7 @@ import numpy as np ...@@ -2,7 +2,7 @@ import numpy as np
from tmc import TransitionMatrixCalculator from tmc import TransitionMatrixCalculator
import random import random
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from markovDecision import markovDecision from markovDecision import MarkovDecisionSolver as mD
class Validation: class Validation:
def __init__(self, layout, circle=False): def __init__(self, layout, circle=False):
......
...@@ -2,7 +2,7 @@ import random as rd ...@@ -2,7 +2,7 @@ import random as rd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc from tmc import TransitionMatrixCalculator as tmc
from markovDecision import markovDecision as mD from markovDecision import MarkovDecisionSolver as mD
class EmpiricalComparision : class EmpiricalComparision :
def __init__(self) : def __init__(self) :
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter