Skip to content
Extraits de code Groupes Projets
Valider f78dbd5f rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update validation.py

parent adf46a6b
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Aucun aperçu pour ce type de fichier
......@@ -5,7 +5,7 @@ class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
......
import random
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator
from markovDecision import MarkovDecisionSolver as mD
class Validation:
def __init__(self, layout, circle=False):
self.layout = layout
self.circle = circle
self.tmc_instance = TransitionMatrixCalculator()
# Compute optimal value iteration results
solver = mD(self.layout, self.circle)
self.optimal_values, self.optimal_dice = solver.solve()
def simulate_game(self, strategy='optimal', num_games=1000):
total_turns = 0
for _ in range(num_games):
if strategy == 'Optimal':
turns = self.play_optimal_strategy()
elif strategy == 'SafeDice':
turns = self.play_dice_strategy(1)
elif strategy == 'NormalDice':
turns = self.play_dice_strategy(2)
elif strategy == 'RiskyDice':
turns = self.play_dice_strategy(3)
elif strategy == 'Random':
turns = self.play_random_strategy()
total_turns += turns
average_turns = total_turns / num_games
return average_turns
def play_optimal_strategy(self):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state
current_state += optimal_action # Move to the next state based on the optimal action
turns += 1
return turns
def play_dice_strategy(self, dice):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
# Always use the specified dice type (1, 2, or 3)
current_state += dice
turns += 1
return turns
def play_random_strategy(self):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
# Choose a random dice roll between 1 and 3
dice_roll = np.random.randint(1, 4)
current_state += dice_roll
turns += 1
return turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
def play_one_turn(self, dice_choice, cur_pos, prison):
if cur_pos == len(self.layout) - 1:
return len(self.layout) - 1, False
if prison:
return cur_pos, False
# Convert dice_choice to integer to avoid TypeError
dice_choice = int(dice_choice)
list_dice_results = [i for i in range(dice_choice + 1)]
result = random.choice(list_dice_results)
if cur_pos == 2 and result != 0:
slow_lane = random.choice([0, 1])
if slow_lane:
new_pos = cur_pos + result
else:
new_pos = cur_pos + result + 7
elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
new_pos = cur_pos + result + 4
else:
new_pos = cur_pos + result
if new_pos > len(self.layout) - 1:
if self.circle:
new_pos -= len(self.layout)
else:
return len(self.layout) - 1, True
new_square = self.layout[new_pos]
if dice_choice == 1:
return new_pos, False
elif dice_choice == 2:
new_square = random.choice([0, new_square])
if new_square == 0:
return new_pos, False # nothing happens
elif new_square == 1:
return 0, False # back to square one
elif new_square == 2:
if new_pos - 3 < 0:
return 0, False # back to square one
return new_pos - 3, False # back 3 squares
elif new_square == 3:
return new_pos, True # prison
def play_one_game(self, start=0):
n_turns = 0
cur_pos = start
prison = False
if self.circle:
while cur_pos != len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
if new_pos > len(self.layout) - 1:
cur_pos = len(self.layout) - new_pos
cur_pos = new_pos
n_turns += 1
else:
while cur_pos < len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
cur_pos = new_pos
n_turns += 1
return n_turns
def empirical_results(self):
total_turns_played = 0
for _ in range(10000):
n_turns = self.play_one_game()
total_turns_played += n_turns
return total_turns_played / 10000
# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = Validation(layout, circle=False)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")
......@@ -24,7 +24,7 @@ class TransitionMatrixCalculator:
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_safe_matrix(self, layout, circle):
def _compute_safe_matrix(self):
for k in range(0,15):
for s, p in enumerate(self.safe_dice):
if k == 9 and s == 1:
......
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*15
self.normal_strategy = [2]*15
self.risky_strategy = [3]*15
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False)
circle = False # Example circle value
"""
# Create an instance of validation
validator = validation(layout, circle)
# Use the methods
validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")"""
optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter