Skip to content
Extraits de code Groupes Projets
Valider c682c832 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update files

parent f78dbd5f
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
# testing our TransitionMatrix function based on random layout
# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
def markovDecision(layout : list, circle : bool) :
Numberk = 15 # Number of states k on the board
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
# Initialisation of the variables before the iteration
ValueI = np.zeros(Numberk) # Algorithm of Value iteration
jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board
DiceForStates = np.zeros(Numberk - 1) # Set the each states as O
i = 0 # set the iteration of Value
while True :
ValueINew = np.zeros(Numberk)
i += 1 # iter + 1
for k in range(Numberk - 1) :
vi_safe = np.sum(safe_dice[k] * ValueI)
vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail])
vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap
ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky)
if ValueINew[k] == 1 + vi_safe :
DiceForStates[k] = 1
elif ValueINew[k] == 1 + vi_normal :
DiceForStates[k] = 2
else :
DiceForStates[k] = 3
if np.allclose(ValueINew, ValueI) :
ValueI = ValueINew
break
ValueI = ValueINew
Expec = ValueI[:-1]
return [Expec, DiceForStates]
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markovDecision(layout, False))
print("\nStopping on the square to win")
print(markovDecision(layout, True))
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
def markov_decision(layout: list, circle: bool):
Numberk = 15
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
jail = [i for i, x in enumerate(layout) if x == 3]
def compute_value(v, dice_matrix):
return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail])
value = np.zeros(Numberk)
dice_for_states = np.zeros(Numberk - 1)
while True:
new_value = np.zeros(Numberk)
for k in range(Numberk - 1):
vi_safe = compute_value(value, safe_dice[k])
vi_normal = compute_value(value, normal_dice[k])
vi_risky = compute_value(value, risky_dice[k])
new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky)
dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3)
if np.allclose(new_value, value):
value = new_value
break
value = new_value
return value[:-1], dice_for_states
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
print("\nWin as soon as land on or overstep the final square")
print(markov_decision(layout, False))
print("\nStopping on the square to win")
print(markov_decision(layout, True))
import numpy as np
import random as rd
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
from validation import Validation
def make_plots():
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation = Validation(layout, circle)
expec, optimal_policy = mD(layout, circle).solve()
# Plot 1: Theoretical vs Empirical Cost
expected_costs = np.zeros(len(expec))
for start_square in range(len(expec)):
total_turns = 0
for _ in range(10000):
total_turns += validation.play_one_game(start_square)
expected_costs[start_square] = total_turns / 10000
squares = np.arange(len(expec))
plt.plot(squares, expec, label="Theoretical cost")
plt.plot(squares, expected_costs, label="Empirical cost")
plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show()
# Plot 2: Expected number of turns for different policies
policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))]
avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies]
names = ["optimal", "safe", "normal", "risky", "random"]
plt.bar(names, avgn_turns)
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns for different policies")
plt.show()
# Call make_plots function
if __name__ == "__main__":
make_plots()
import numpy as np
import random as rd
class TransitionMatrixCalculator:
def __init__(self):
# Probabilités de transition pour les dés "safe", "normal" et "risky"
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4])
def compute_transition_matrix(self, layout: list, circle: bool):
size = len(layout)
matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe')
matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal')
matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky')
return matrix_safe, matrix_normal, matrix_risky
def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str):
transition_matrix = np.zeros((size, size))
dice_type = None
if matrix_type == 'safe':
dice_type = self.safe_dice
elif matrix_type == 'normal':
dice_type = self.normal_dice
elif matrix_type == 'risky':
dice_type = self.risky_dice
for k in range(size):
for s, p in enumerate(dice_probs):
k_prime = (k + s) % size if circle else min(size - 1, k + s)
if k == 9 and s == 1 and matrix_type == 'safe':
k_prime = size - 1
elif k == 2 and s > 0 and matrix_type == 'safe':
p /= 2
k_prime = 10 + s - 1
if layout[k_prime] == 1:
k_prime = 0
elif layout[k_prime] == 2:
k_prime = max(0, k_prime - 3)
elif k == 7 and s == 3 and matrix_type == 'risky':
k_prime = size - 1
elif k == 8 and s in [2, 3] and matrix_type == 'risky':
if circle or s == 2:
k_prime = size - 1
else:
k_prime = 0
elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky':
if not circle or s == 1:
k_prime = size - 1
elif circle and s == 2:
k_prime = 0
elif circle and s == 3:
k_prime = 1
if layout[k_prime] in [1, 2]:
k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0
transition_matrix[k, k_prime] += p * dice_type[s]
return transition_matrix
def generate_arrays(self,n):
arrays = []
for _ in range(n):
array = np.zeros(15, dtype=int)
indices = rd.sample(range(1, 14), 3)
array[indices] = 1, 2, 3
arrays.append(array)
return arrays
def tst_transition_matrix(self):
layouts = self.generate_arrays(1000)
for array in layouts:
print(array)
self.compute_transition_matrix(array, False)
self.compute_transition_matrix(array, True)
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
......@@ -19,11 +19,25 @@ class validation:
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*15
self.normal_strategy = [2]*15
self.risky_strategy = [3]*15
self.safe_strategy = [1]*len(layout)
self.normal_strategy = [2]*len(layout)
self.risky_strategy = [3]*len(layout)
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
# Définir les coûts par case et par type de dé
self.costs_by_dice_type = {
'SafeDice': [0] * len(self.layout),
'NormalDice': [0] * len(self.layout),
'RiskyDice': [0] * len(self.layout)
}
# Remplir les coûts pour chaque case en fonction du type de dé
for i in range(len(self.layout)):
if self.layout[i] == 3:
self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr
self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal
self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
......@@ -40,9 +54,6 @@ class validation:
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
......@@ -62,9 +73,51 @@ class validation:
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
# Compute transition matrices for each dice
tmc_instance = tmc()
P_safe = tmc_instance._compute_safe_matrix()
P_normal = tmc_instance._compute_normal_matrix(layout, circle)
P_risky = tmc_instance._compute_risky_matrix(layout, circle)
transition_matrices = [P_safe, P_normal, P_risky]
number_turns = []
number_mean = []
for _ in range(n_iterations):
number_turns = []
for state in range(len(layout) - 1):
total_turns = 0
k = state # starting state
while k < len(layout) - 1:
action = strategy[k] # action based on strategy
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
number_mean.append(number_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
return mean_turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_policy, n_iterations)
return self.simulate_game(self.optimal_strategy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
......@@ -98,34 +151,41 @@ class validation:
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False)
circle = False # Example circle value
"""
# Create an instance of validation
validator = validation(layout, circle)
# Use the methods
validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = validation(layout, circle)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")"""
optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation.play_random_strategy(n_iterations=10000)
random_cost = validation_instance.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation.compare_strategies(num_games=10000)
strategy_comparison = validation_instance.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
optimal_strategy = validation_instance.optimal_strategy
mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter