Skip to content
Extraits de code Groupes Projets
Valider 4fdf8e94 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update markovDecision

parent e66a9a26
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
import numpy as np import numpy as np
from tmc import * from tmc import TransitionMatrixCalculator as tmc
from tmc import TransitionMatrixCalculator
# testing our TransitionMatrix function based on random layout # testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] # [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout, circle) : def markovDecision(layout, circle) :
layout = 15 Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
layout = np.array([0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]) # Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(normal_dice[k][jail]) # 100% chance of triggering the trap
ValueINew = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
print(markovDecision(layout, False)) print(markovDecision(layout, False))
print(markovDecision(layout, True)) print(markovDecision(layout, True))
...@@ -9,9 +9,12 @@ class TransitionMatrixCalculator: ...@@ -9,9 +9,12 @@ class TransitionMatrixCalculator:
self.matrix_normal = np.zeros((15, 15)) self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15)) self.matrix_risky = np.zeros((15, 15))
# Probability to go from state k to k' # Probability to go from state k to k'
self.safe_dice = np.array([1/2,1/2]) safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3,1/3,1/3]) normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
self.safe_dice = safe_dice
self.normal_dice = normal_dice
self.risky_dice = risky_dice
def compute_transition_matrix(self, layout, circle=False): def compute_transition_matrix(self, layout, circle=False):
self.matrix_safe.fill(0) self.matrix_safe.fill(0)
...@@ -41,7 +44,7 @@ class TransitionMatrixCalculator: ...@@ -41,7 +44,7 @@ class TransitionMatrixCalculator:
k_prime = k + s k_prime = k + s
k_prime = min(14, k_prime) k_prime = min(14, k_prime)
self.matrix_safe[k,k_prime] += p self.matrix_safe[k,k_prime] += p
return self.matrix_safe return self.matrix_safe
def _compute_normal_matrix(self, layout, circle): def _compute_normal_matrix(self, layout, circle):
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter