Skip to content
Extraits de code Groupes Projets
Valider 7707bd45 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

first step of validation.py

parent 306411a2
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -4,7 +4,7 @@ from tmc import TransitionMatrixCalculator as tmc
# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout, circle) :
def markovDecision(layout : list, circle : bool) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
......
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
def markov_decision(layout: list, circle: bool):
Numberk = 15
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
jail = [i for i, x in enumerate(layout) if x == 3]
def compute_value(v, dice_matrix):
return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])
value = np.zeros(Numberk)
dice_for_states = np.zeros(Numberk - 1)
while True:
new_value = np.zeros(Numberk)
for k in range(Numberk - 1):
vi_safe = compute_value(value, safe_dice[k])
vi_normal = compute_value(value, normal_dice[k])
vi_risky = compute_value(value, risky_dice[k])
new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)
if np.allclose(new_value, value):
value = new_value
break
value = new_value
return value[:-1], dice_for_states
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markov_decision(layout, False))
print("\nStopping on the square to win")
print(markov_decision(layout, True))
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Probabilités de transition pour les dés "safe", "normal" et "risky"
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout: list, circle: bool):
size = len(layout)
matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe')
matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal')
matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky')
return matrix_safe, matrix_normal, matrix_risky
def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str):
transition_matrix = np.zeros((size, size))
dice_type = None
if matrix_type == 'safe':
dice_type = self.safe_dice
elif matrix_type == 'normal':
dice_type = self.normal_dice
elif matrix_type == 'risky':
dice_type = self.risky_dice
for k in range(size):
for s, p in enumerate(dice_probs):
k_prime = (k + s) % size if circle else min(size - 1, k + s)
if k == 9 and s == 1 and matrix_type == 'safe':
k_prime = size - 1
elif k == 2 and s > 0 and matrix_type == 'safe':
p /= 2
k_prime = 10 + s - 1
if layout[k_prime] == 1:
k_prime = 0
elif layout[k_prime] == 2:
k_prime = max(0, k_prime - 3)
elif k == 7 and s == 3 and matrix_type == 'risky':
k_prime = size - 1
elif k == 8 and s in [2, 3] and matrix_type == 'risky':
if circle or s == 2:
k_prime = size - 1
else:
k_prime = 0
elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky':
if not circle or s == 1:
k_prime = size - 1
elif circle and s == 2:
k_prime = 0
elif circle and s == 3:
k_prime = 1
if layout[k_prime] in [1, 2]:
k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0
transition_matrix[k, k_prime] += p * dice_type[s]
return transition_matrix
def generate_arrays(self,n):
arrays = []
for _ in range(n):
array = np.zeros(15, dtype=int)
indices = rd.sample(range(1, 14), 3)
array[indices] = 1, 2, 3
arrays.append(array)
return arrays
def tst_transition_matrix(self):
layouts = self.generate_arrays(1000)
for array in layouts:
print(array)
self.compute_transition_matrix(array, False)
self.compute_transition_matrix(array, True)
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
......@@ -2,4 +2,56 @@ import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import markovDecion as mD
from markovDecision import markovDecision as mD
class EmpiricalComparision :
def __init__(self) :
return
def simulation(strategy, layout : list, circle, nIter : int) :
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
matrices_transition = [safe_dice, normal_dice, risky_dice]
nTurns = []
turns = 0
for _ in range(nIter) :
turns = 0
k = 0
while k < len(layout)-1 :
action = strategy[k]
transitionMatrix = matrices_transition[int(action -1)]
k = np.rd.choice(len(layout), p = transitionMatrix[k])
if layout[k] == 3 and action == 2 :
turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
elif layout[k] == 3 and action == 3 :
turns += 2
else :
turns += 1
nTurns.append(turns)
return np.mean(nTurns)
def plot(layouts : list, circle, nIter : int) :
Markov = []
Safe = []
Normal = []
Risky = []
Random = []
for layout in layouts :
expec, policy = mD(layout, circle)
# Simulate the game
return
layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
results(layout, False, 1000000)
results(layout, True, 1000000)
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter