Skip to content
Extraits de code Groupes Projets
Valider 02b9ef6f rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update

parent b775d77f
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -5,7 +5,7 @@ class MarkovDecisionSolver:
def __init__(self, layout : list, circle : bool):
self.Numberk = 15
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle)
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
self.jail = [i for i, x in enumerate(layout) if x == 3]
......
import numpy as np
import random as rd
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class Validation:
def __init__(self):
self.tmc_instance = tmc()
def simulate_games(self, layout, circle, num_games):
results = []
for _ in range(num_games):
result = mD(layout, circle)
# Assuming result is a tuple (costs, path) and you want the last element of 'costs'
results.append(result[0][-1]) # Append the number of turns to reach the goal
return results
def compare_strategies(self, layout, circle, num_games):
optimal_results = self.simulate_games(layout, circle, num_games)
suboptimal_strategies = {
"Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation
"Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation
"Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation
"Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation
"Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation
}
self.plot_results(optimal_results, suboptimal_strategies)
def plot_results(self, optimal_results, suboptimal_results):
strategies = ["Optimal Strategy"] + list(suboptimal_results.keys())
avg_costs = [np.mean(optimal_results)] + [np.mean(suboptimal_results[strategy]) for strategy in suboptimal_results]
plt.figure(figsize=(10, 6))
plt.bar(strategies, avg_costs, color=['blue'] + ['orange'] * len(suboptimal_results))
plt.xlabel("Strategies")
plt.ylabel("Average Cost")
plt.title("Comparison of Strategy Performance")
plt.show()
def run_validation(self, layout, circle, num_games):
solver = mD(layout, circle)
theoretical_cost, optimal_dice_strategy = solver.solve()
optimal_results = self.simulate_games(layout, circle, num_games)
optimal_average_cost = np.mean(optimal_results)
suboptimal_strategies = {
"Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation
"Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation
"Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation
"Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation
"Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation
}
self.plot_results(optimal_results, suboptimal_strategies)
print("Theoretical Expected Cost (Value Iteration):", theoretical_cost)
print("Empirical Average Cost (Optimal Strategy):", optimal_average_cost)
for strategy, results in suboptimal_strategies.items():
avg_cost = np.mean(results)
print(f"Empirical Average Cost ({strategy}):", avg_cost)
# Exemple d'utilisation de la classe Validation
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = True
num_games = 1000
validation = Validation()
validation.run_validation(layout, circle, num_games)
Fichier déplacé
import random
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator
from markovDecision import MarkovDecisionSolver as mD
class Validation:
def __init__(self, layout, circle=False):
self.layout = layout
self.circle = circle
self.tmc_instance = TransitionMatrixCalculator()
# Compute optimal value iteration results
solver = mD(self.layout, self.circle)
self.optimal_values, self.optimal_dice = solver.solve()
def simulate_game(self, strategy='optimal', num_games=1000):
total_turns = 0
for _ in range(num_games):
if strategy == 'Optimal':
turns = self.play_optimal_strategy()
elif strategy == 'SafeDice':
turns = self.play_dice_strategy(1)
elif strategy == 'NormalDice':
turns = self.play_dice_strategy(2)
elif strategy == 'RiskyDice':
turns = self.play_dice_strategy(3)
elif strategy == 'Random':
turns = self.play_random_strategy()
total_turns += turns
average_turns = total_turns / num_games
return average_turns
def play_optimal_strategy(self):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state
current_state += optimal_action # Move to the next state based on the optimal action
turns += 1
return turns
def play_dice_strategy(self, dice):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
# Always use the specified dice type (1, 2, or 3)
current_state += dice
turns += 1
return turns
def play_random_strategy(self):
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
# Choose a random dice roll between 1 and 3
dice_roll = np.random.randint(1, 4)
current_state += dice_roll
turns += 1
return turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
def play_one_turn(self, dice_choice, cur_pos, prison):
if cur_pos == len(self.layout) - 1:
return len(self.layout) - 1, False
if prison:
return cur_pos, False
# Convert dice_choice to integer to avoid TypeError
dice_choice = int(dice_choice)
list_dice_results = [i for i in range(dice_choice + 1)]
result = random.choice(list_dice_results)
if cur_pos == 2 and result != 0:
slow_lane = random.choice([0, 1])
if slow_lane:
new_pos = cur_pos + result
else:
new_pos = cur_pos + result + 7
elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
new_pos = cur_pos + result + 4
else:
new_pos = cur_pos + result
if new_pos > len(self.layout) - 1:
if self.circle:
new_pos -= len(self.layout)
else:
return len(self.layout) - 1, True
new_square = self.layout[new_pos]
if dice_choice == 1:
return new_pos, False
elif dice_choice == 2:
new_square = random.choice([0, new_square])
if new_square == 0:
return new_pos, False # nothing happens
elif new_square == 1:
return 0, False # back to square one
elif new_square == 2:
if new_pos - 3 < 0:
return 0, False # back to square one
return new_pos - 3, False # back 3 squares
elif new_square == 3:
return new_pos, True # prison
def play_one_game(self, start=0):
n_turns = 0
cur_pos = start
prison = False
if self.circle:
while cur_pos != len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
if new_pos > len(self.layout) - 1:
cur_pos = len(self.layout) - new_pos
cur_pos = new_pos
n_turns += 1
else:
while cur_pos < len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
cur_pos = new_pos
n_turns += 1
return n_turns
def empirical_results(self):
total_turns_played = 0
for _ in range(10000):
n_turns = self.play_one_game()
total_turns_played += n_turns
return total_turns_played / 10000
# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = Validation(layout, circle=False)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")
......@@ -7,6 +7,7 @@ class TransitionMatrixCalculator:
self.matrix_safe = np.zeros((15, 15))
self.matrix_normal = np.zeros((15, 15))
self.matrix_risky = np.zeros((15, 15))
# Probability to go from state k to k'
self.safe_dice = np.array([1/2, 1/2])
self.normal_dice = np.array([1/3, 1/3, 1/3])
......@@ -17,14 +18,14 @@ class TransitionMatrixCalculator:
self.matrix_normal.fill(0)
self.matrix_risky.fill(0)
self._compute_safe_matrix(layout, circle)
self._compute_safe_matrix()
self._compute_normal_matrix(layout, circle)
self._compute_risky_matrix(layout, circle)
return self.matrix_safe, self.matrix_normal, self.matrix_risky
def _compute_safe_matrix(self, layout, circle):
def _compute_safe_matrix(self):
for k in range(0,15):
for s, p in enumerate(self.safe_dice):
if k == 9 and s == 1:
......@@ -193,7 +194,7 @@ class TransitionMatrixCalculator:
self.matrix_risky[k,k_prime] += p
return self.matrix_risky
"""
def generate_arrays(self,n):
# Initialize an empty list to store all the arrays
arrays = []
......@@ -223,5 +224,16 @@ class TransitionMatrixCalculator:
self.compute_transition_matrix(array, True)
#tmc = TransitionMatrixCalculator()
#tmc.tst_transition_matrix()
def tst_transition_matrix(self):
# create a list of 100 different layouts
layout = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0]
print(self.compute_transition_matrix(layout, False))
print(self.compute_transition_matrix(layout, True))
tmc = TransitionMatrixCalculator()
tmc.tst_transition_matrix()
"""
\ No newline at end of file
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class validation:
def __init__(self, layout, circle=False):
# import from other .PY
self.layout = layout
self.circle = circle
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategy
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1]*15
self.normal_strategy = [2]*15
self.risky_strategy = [3]*15
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
transition_matrix = transition_matrices[int(action - 1)]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def play_optimal_strategy(self):
return turns
def play_dice_strategy(self):
return turns
def play_random_strategy(self):
return turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games)
return {
'Optimal': optimal_cost,
'SafeDice': dice1_cost,
'NormalDice': dice2_cost,
'RiskyDice': dice3_cost,
'Random': random_cost
}
# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False)
circle = False # Example circle value
# Create an instance of validation
validator = validation(layout, circle)
# Use the methods
validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter