Skip to content
Extraits de code Groupes Projets
Valider 11500a5b rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

changing the files

parent 2998aec3
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Fichier ajouté
Fichier ajouté
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout, circle) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
# Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(normal_dice[k][jail]) # 100% chance of triggering the trap
ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))
import numpy as np
from tmc_test import TransitionMatrixCalculator as tmc
def markovDecision(layout, circle):
Numberk = 15
tmc_instance = tmc()
safe_matrix, normal_matrix, risky_matrix = tmc_instance.compute_transition_matrix(layout, circle)
ValueI = np.zeros(Numberk)
jail = [i for i, x in enumerate(layout) if x == 3]
DiceForStates = np.zeros(Numberk - 1)
i = 0
while True:
ValueINew = np.zeros(Numberk)
i += 1
for k in range(Numberk - 1):
vi_safe = np.sum(safe_matrix[k] * ValueI)
vi_normal = np.sum(normal_matrix[k] * ValueI) + 0.5 * np.sum(normal_matrix[k][jail])
vi_risky = np.sum(risky_matrix[k] * ValueI) + np.sum(normal_matrix[k][jail])
ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky)
if ValueINew[k] == 1 + vi_safe:
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal:
DiceForStates[k] = 2
else:
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI):
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Initialisation des matrices de transition pour les dés "safe", "normal" et "risky"
self.matrix_safe = np.zeros((15, 15))
self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15))
# Probability to go from state k to k'
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout, circle=False):
self.matrix_safe.fill(0)
self.matrix_normal.fill(0)
self.matrix_risky.fill(0)
self._compute_safe_matrix(layout, circle)
self._compute_normal_matrix(layout, circle)
self._compute_risky_matrix(layout, circle)
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_safe_matrix(self, layout, circle):
for k in range(0,15):
for s, p in enumerate(self.safe_dice):
if k == 9 and s == 1:
k_prime = 14
self.matrix_safe[k,k_prime] += p
elif k == 2 and s > 0:
p /= 2
k_prime = 10
self.matrix_safe[k,k_prime] += p
k_prime = 3
self.matrix_safe[k,k_prime] += p
else:
k_prime = k + s
k_prime = min(14, k_prime)
self.matrix_safe[k,k_prime] += p
return self.matrix_safe
def _compute_normal_matrix(self, layout, circle):
for k in range(0, 15):
for s, p in enumerate(self.normal_dice):
if k == 8 and s == 2:
k_prime = 14
self.matrix_normal[k,k_prime] += p
continue
elif k == 9 and s in [1, 2]:
if not circle or s == 1:
k_prime = 14
self.matrix_normal[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
# handle the fast lane
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1)
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
k_prime = 3 + (s - 1)
if layout[k_prime] in [0, 3]: # normal or prison square
self.matrix_normal[k,k_prime] += p
elif layout[k_prime] == 1: # handle type 1 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = 0
self.matrix_normal[k,k_prime] += p / 2
elif layout[k_prime] == 2: # handle type 2 trap
self.matrix_normal[k,k_prime] += p / 2
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p / 2
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime)
if layout[k_prime] in [1, 2]:
p /= 2
if layout[k_prime] == 1:
k_prime = 0
self.matrix_normal[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_normal[k,k_prime] += p
continue
self.matrix_normal[k,k_prime] += p
return self.matrix_normal
def _compute_risky_matrix(self, layout, circle):
for k in range(0, 15):
for s, p in enumerate(self.risky_dice):
if k == 7 and s == 3:
k_prime = 14
self.matrix_risky[k,k_prime] += p
continue
elif k == 8 and s in [2, 3]:
if not circle or s == 2:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif k == 9 and s in [1, 2, 3]:
if not circle or s == 1:
k_prime = 14
self.matrix_risky[k,k_prime] += p
elif circle and s == 2:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif circle and s == 3:
k_prime = 1
if layout[k_prime] != 0:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
self.matrix_risky[k,k_prime] += p
continue
continue
if k == 2 and s > 0:
p /= 2
k_prime = 10 + (s - 1)
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
else:
self.matrix_risky[k,k_prime] += p
k_prime = 3 + (s - 1)
self.matrix_risky[k,k_prime] += p
continue
k_prime = k + s
k_prime = k_prime % 15 if circle else min(14, k_prime)
if layout[k_prime] in [1, 2]:
if layout[k_prime] == 1:
k_prime = 0
self.matrix_risky[k,k_prime] += p
continue
elif layout[k_prime] == 2:
if k_prime == 10:
k_prime = 0
elif k_prime == 11:
k_prime = 1
elif k_prime == 12:
k_prime = 2
else:
k_prime = max(0, k_prime - 3)
self.matrix_risky[k,k_prime] += p
continue
self.matrix_risky[k,k_prime] += p
return self.matrix_risky
def generate_arrays(self,n):
# Initialize an empty list to store all the arrays
arrays = []
for _ in range(n):
# Initialize a zero array of size 15
array = np.zeros(15, dtype=int)
# Generate 3 random indices between 1 and 13 (exclusive)
indices = rd.sample(range(1, 14), 3)
# Assign the values 1, 2 and 3 to the randomly generated indices
array[indices] = 1, 2, 3
# Append the generated array to the list
arrays.append(array)
return arrays
# create a function that test the transition matrix for different layout each time with one trap of each sort
def tst_transition_matrix(self):
# create a list of 100 different layouts
layouts = self.generate_arrays(1000)
for array in layouts:
print(array)
self.compute_transition_matrix(array, False)
self.compute_transition_matrix(array, True)
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Initialize transition matrices for "safe", "normal", and "risky" dice
self.matrix_safe = np.zeros((15, 15))
self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15))
# Probability to transition from state k to k'
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout, circle=False):
self.matrix_safe.fill(0)
self.matrix_normal.fill(0)
self.matrix_risky.fill(0)
self._compute_matrix(self.matrix_safe, layout, self.safe_dice, circle)
self._compute_matrix(self.matrix_normal, layout, self.normal_dice, circle)
self._compute_matrix(self.matrix_risky, layout, self.risky_dice, circle)
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_matrix(self, matrix, layout, dice, circle):
for k in range(15):
for s, p in enumerate(dice):
if k == 9 and s == 1:
k_prime = 14
matrix[k, k_prime] += p
elif k == 2 and s > 0:
p /= 2
k_prime = min(14, k + 1)
matrix[k, k_prime] += p
k_prime = min(14, k + 2)
matrix[k, k_prime] += p
else:
k_prime = min(14, k + s)
matrix[k, k_prime] += p
return matrix
def generate_arrays(self, n):
arrays = []
for _ in range(n):
array = np.zeros(15, dtype=int)
indices = rd.sample(range(1, 14), 3)
array[indices] = 1, 2, 3
arrays.append(array)
return arrays
def test_transition_matrix(self):
layouts = self.generate_arrays(1000)
for array in layouts:
print(array)
self.compute_transition_matrix(array, False)
self.compute_transition_matrix(array, True)
# Initialize the calculator and test the transition matrix
#tmc = TransitionMatrixCalculator()
#tmc.test_transition_matrix()
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter