Skip to content
Extraits de code Groupes Projets
Valider 749713a6 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

comments code .py

parent 169c62f3
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
class MarkovDecisionSolver:
class MarkovDecisionProcess :
def __init__(self, layout: list, circle: bool):
# Initialize the Markov Decision Process solver with layout and game mode (circle or not)
self.Numberk = 15
self.tmc_instance = tmc()
# Compute transition matrices for safe, normal, and risky scenarios
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle) # Make sure to capture only the normal_dice component
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle) # Make sure to capture only the risky_dice component
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
# Identify jail states in the layout
self.jail = [i for i, x in enumerate(layout) if x == 3]
# Initialize value and dice decision arrays
self.ValueI = np.zeros(self.Numberk)
self.DiceForStates = np.zeros(self.Numberk - 1)
self.Dice = np.zeros(self.Numberk - 1)
def _compute_vi_safe(self, k):
def _compute_vi_safe(self, k : int ):
# Compute the expected value using safe dice transition matrix for state k
return np.dot(self.safe_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
def _compute_vi_normal(self, k):
def _compute_vi_normal(self, k : int ):
# Compute the expected value using normal dice transition matrix for state k
vi_normal = np.dot(self.normal_dice[k], self.ValueI) + np.sum(self.normal_dice[k][self.jail])
return vi_normal
def _compute_vi_risky(self, k):
def _compute_vi_risky(self, k : int ):
# Compute the expected value using risky dice transition matrix for state k
vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail])
return vi_risky
def solve(self):
# Iteratively solve the Markov Decision Process until convergence
i = 0
while True:
ValueINew = np.zeros(self.Numberk)
i += 1
for k in range(self.Numberk - 1):
# Compute expected values for safe, normal, and risky decisions at state k
vi_safe = self._compute_vi_safe(k)
vi_normal = self._compute_vi_normal(k)
vi_risky = self._compute_vi_risky(k)
# Compute the minimum value among vi_safe, vi_normal, and vi_risky
# Determine the minimum value among safe, normal, and risky decisions
min_value = min(vi_safe, vi_normal, vi_risky)
# Find which index (safe, normal, or risky) corresponds to the minimum value
# Record the dice decision (safe=1, normal=2, risky=3) corresponding to the minimum value
if min_value == vi_safe:
ValueINew[k] = 1 + vi_safe
self.DiceForStates[k] = 1
self.Dice[k] = 1
elif min_value == vi_normal:
ValueINew[k] = 1 + vi_normal
self.DiceForStates[k] = 2
self.Dice[k] = 2
else:
ValueINew[k] = 1 + vi_risky
self.DiceForStates[k] = 3
self.Dice[k] = 3
# Check for convergence
if np.allclose(ValueINew, self.ValueI):
self.ValueI = ValueINew
break
self.ValueI = ValueINew
# Return the expected values and dice decisions for each state
Expec = self.ValueI[:-1]
return [Expec, self.DiceForStates]
return [Expec, self.Dice]
def markovDecision(layout : list, circle : bool):
solver = MarkovDecisionSolver(layout, circle)
return solver.solve()
"""
# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Résolution du problème avec différents modes de jeu
result_false = markovDecision(layout, circle=False)
print("\nWin as soon as land on or overstep the final square")
print(result_false)
result_true = markovDecision(layout, circle=True)
print("\nStopping on the square to win")
print(result_true)"""
\ No newline at end of file
# Solve the Markov Decision Problem for the given layout and game mode
solver = MarkovDecisionProcess(layout, circle)
return solver.solve()
\ No newline at end of file
......@@ -2,29 +2,24 @@ import matplotlib.pyplot as plt
from validation import Validation as Val
import numpy as np
# Example layout and circle settings
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
# Create an instance of validation
validation_instance = Val(layout, circle)
# Plotting function for strategy comparison
def plot_strategy_comparison(num_games=10000):
def plot_strategy_comparison(num_games : int):
"""Plot a bar chart comparing average costs of different strategies over specified number of games."""
# Compare strategies and get their costs
strategy_costs = validation_instance.compare_strategies(num_games=num_games)
# Bar plot for strategy comparison
# Plotting the bar chart
plt.figure(figsize=(10, 6))
plt.bar(strategy_costs.keys(), strategy_costs.values(), color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Strategies')
plt.ylabel('Average Cost')
plt.title('Comparison of Strategies')
plt.savefig('strategy_comparison.png') # Save the plot
plt.show()
# Plotting function for state-based average turns for all strategies on the same plot
def plot_state_based_turns(save=True):
def plot_state_based_turns():
"""Plot the average number of turns per state for different strategies."""
strategies = [validation_instance.optimal_policy,
validation_instance.safe_strategy,
validation_instance.normal_strategy,
......@@ -33,8 +28,9 @@ def plot_state_based_turns(save=True):
strategy_names = ['Optimal', 'SafeDice', 'NormalDice', 'RiskyDice', 'Random']
plt.figure(figsize=(12, 6))
# Simulate and plot average turns for each strategy
for strategy, name in zip(strategies, strategy_names):
mean_turns = validation_instance.simulate_state(strategy, layout, circle)
mean_turns = validation_instance.simulate_state(strategy, layout, circle, num_games)
plt.plot(range(len(mean_turns)), mean_turns, marker='o', linestyle='-', label=name)
plt.xlabel('State')
......@@ -42,13 +38,35 @@ def plot_state_based_turns(save=True):
plt.title('Average Turns per State for Different Strategies')
plt.grid(True)
plt.legend()
plt.show()
def plot_state_based_comparison(num_games_list):
"""Plot a comparison between optimal turns and empirical turns per state for different num_games."""
plt.figure(figsize=(12, 6)) # Create a single figure for all plots
optimal_turns = None # Initialize optimal_turns to None
for num_games in num_games_list:
_, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting empirical turns per state for the current num_games
plt.plot(range(len(empirical_turns)), empirical_turns, marker='x', linestyle='-', label=f'Empirical (num_games={num_games})')
#if save:
#plt.savefig('state_based_turns_all_strategies.png') # Save the plot
if optimal_turns is None:
# Only fetch optimal_turns once (for the first num_games)
optimal_turns, _ = validation_instance.compare_state_based_turns(num_games=num_games)
plt.plot(range(len(optimal_turns)), optimal_turns, marker='o', linestyle='-', label=f'ValueIteration')
plt.xlabel('State')
plt.ylabel('Average Turns')
plt.title('Average Turns per State - ValueIteration vs. Empirical')
plt.grid(True)
plt.legend()
plt.show()
def plot_state_based_comparison(validation_instance, num_games=10000):
def plot_state_based_comparison_once(num_games : int):
optimal_turns, empirical_turns = validation_instance.compare_state_based_turns(num_games=num_games)
# Plotting the state-based average turns comparison
......@@ -70,13 +88,23 @@ def plot_state_based_comparison(validation_instance, num_games=10000):
# Main function to generate and save plots
if __name__ == '__main__':
# Example of strategy comparison plot
plot_strategy_comparison(num_games=10000)
# Example of state-based average turns plot for all strategies on the same plot
plot_state_based_turns(save=True)
##### Paramètres #####
# Define the layout of the game board
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
# Indicates whether the board is circular or linear
circle = False
# Number of games to simulate
num_games = 10000
# Initialize Validation instance with the specified layout and circle type
validation_instance = Val(layout, circle)
##### Launch Plots #####
plot_state_based_comparison(validation_instance, num_games=10000)
\ No newline at end of file
# Run the defined plotting functions with specified parameters
plot_strategy_comparison(num_games)
plot_state_based_turns()
plot_state_based_comparison(num_games_list = [10, 100, 1000])
plot_state_based_comparison_once(num_games)
\ No newline at end of file
strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png

21,7 ko | W: | H:

strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
strategy_comparison.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -2,12 +2,14 @@ import numpy as np
class TransitionMatrixCalculator:
def __init__(self):
# Initialize the size of the transition matrices
self.size = 15
self.matrix_safe = np.zeros((self.size , self.size ))
self.matrix_normal = np.zeros((self.size , self.size ))
self.matrix_risky = np.zeros((self.size , self.size ))
def compute_transition_matrix(self, layout, circle=False):
def compute_transition_matrix(self, layout : list , circle : bool):
# Compute transition matrices for safe, normal, and risky scenarios
self.matrix_safe = self._compute_safe_matrix()
self.matrix_normal, _ = self._compute_normal_matrix(layout, circle)
self.matrix_risky, _ = self._compute_risky_matrix(layout, circle)
......@@ -16,11 +18,12 @@ class TransitionMatrixCalculator:
def _compute_safe_matrix(self):
# Compute transition matrix for safe scenario
p = np.zeros((self.size ,self.size ))
for k in range(self.size - 1):
if k == 2:
p[k,k+1] = 1/4 # slow lane
p[k,k+8] = 1/4 # fast lane
p[k,k+1] = 1/4
p[k,k+8] = 1/4
elif k == 9:
p[k,k+5] = 1/2
else:
......@@ -29,14 +32,15 @@ class TransitionMatrixCalculator:
p[self.size -1,self.size -1] = 1
return p
def _compute_normal_matrix(self, layout, circle=False):
def _compute_normal_matrix(self, layout : list , circle : bool):
# Compute transition matrix for normal scenario
p = np.zeros((self.size ,self.size ))
jail = np.zeros((self.size ,self.size ))
for k in range(self.size - 1):
if k == 2:
p[k,k+1:k+3] = 1/6 # slow lane # slow lane
p[k,k+8:k+10] = 1/6 # fast lane # fast lane
p[k,k+1:k+3] = 1/6
p[k,k+8:k+10] = 1/6
elif k == 8:
p[k,k+1] = 1/3
p[k,k+6] = 1/3
......@@ -73,14 +77,15 @@ class TransitionMatrixCalculator:
p[self.size -1,self.size -1] = 1
return p, jail
def _compute_risky_matrix(self, layout, circle=False):
def _compute_risky_matrix(self, layout : list , circle : bool):
# Compute transition matrix for risky scenario
p = np.zeros((self.size ,self.size ))
jail = np.zeros((self.size ,self.size ))
for k in range(self.size -1):
if k == 2:
p[k,k+1:k+4] = 1/8 # slow lane
p[k,k+8:k+11] = 1/8 # fast lane
p[k,k+1:k+4] = 1/8
p[k,k+8:k+11] = 1/8
elif k == 7:
p[k,k+1:k+3] = 1/4
p[k,k+7] = 1/4
......@@ -131,20 +136,4 @@ class TransitionMatrixCalculator:
jail[k,j] = p[k,j]
p[self.size -1,self.size-1] = 1
return p, jail
"""
def display_matrices(self):
print("Safe Matrix:")
print(self.matrix_safe)
print("\nNormal Matrix:")
print(self.matrix_normal)
print("\nRisky Matrix:")
print(self.matrix_risky)
# Example Usage:
layout_example = [0]*15
calculator = TransitionMatrixCalculator()
calculator.compute_transition_matrix(layout_example, circle=True)
calculator.display_matrices()
"""
\ No newline at end of file
return p, jail
\ No newline at end of file
import random as rd
import numpy as np
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
from markovDecision import MarkovDecisionProcess as mD
# Class for performing validation and simulation
class Validation:
def __init__(self, layout, circle=False):
def __init__(self, layout : list, circle : bool):
# Initialize with layout and circle configuration
self.layout = layout
self.circle = circle
# Initialize TransitionMatrixCalculator instance for transition matrix computation
self.tmc_instance = tmc()
# Compute transition matrices for safe, normal, and risky dice
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice, _ = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice, _ = self.tmc_instance._compute_risky_matrix(layout, circle)
# Use MarkovDecisionSolver to find optimal policy and expected costs
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Predefined strategies for different dice types
self.safe_strategy = [1] * len(layout)
self.normal_strategy = [2] * len(layout)
self.risky_strategy = [3] * len(layout)
self.random_strategy = [rd.choice([0, 1, 2, 3]) for _ in range(len(layout))]
# Dictionary to store costs by dice type
self.costs_by_dice_type = {
'SafeDice': [0] * len(layout),
'NormalDice': [0] * len(layout),
'RiskyDice': [0] * len(layout)
}
for i, die_type in enumerate(self.layout):
# Assign costs based on dice type to the respective lists in the dictionary
for i, die_type in enumerate(self.layout) :
self.costs_by_dice_type['SafeDice'][i] = 1 if die_type == 3 else 0
self.costs_by_dice_type['NormalDice'][i] = 2 if die_type == 3 else 0
self.costs_by_dice_type['RiskyDice'][i] = 3 if die_type == 3 else 0
def simulate_game(self, strategy, n_iterations=10000):
def simulate_game(self, strategy: list, n_iterations: int):
"""Simulate the game using a given strategy over multiple iterations."""
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
total_turns = np.zeros(n_iterations)
for _ in range(n_iterations):
total_turns = 0
k = 0 # initial state
for i in range(n_iterations):
k = 0
turns = 0
while k < len(self.layout) - 1:
action = strategy[k]
......@@ -50,32 +61,34 @@ class Validation:
k = np.random.choice(len(self.layout), p=flattened_probs)
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
if self.layout[k] == 3:
if action == 2:
turns += np.random.choice([1, 2], p=[0.5, 0.5])
elif action == 3:
turns += 2
else:
total_turns += 1
turns += 1
total_turns[i] = turns
number_turns.append(total_turns)
return np.mean(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, layout, circle, n_iterations=10000):
def simulate_state(self, strategy: list, layout: list, circle: bool, n_iterations: int):
"""Simulate game states using a given strategy."""
safe_dice = self.tmc_instance._compute_safe_matrix()
normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)[0]
risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)[0]
transition_matrices = [safe_dice, normal_dice, risky_dice]
number_turns = []
number_mean = []
total_turns = []
for _ in range(n_iterations):
number_turns = []
state_turns = np.zeros(len(layout) - 1) # Utiliser un tableau numpy pour stocker les tours par état
for state in range(len(layout) - 1):
total_turns = 0
k = state
turns = 0
while k < len(layout) - 1:
action = strategy[k]
......@@ -87,25 +100,27 @@ class Validation:
k = np.random.choice(len(layout), p=flattened_probs)
if layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif layout[k] == 3 and action == 3:
total_turns += 2
if layout[k] == 3:
if action == 2:
turns += np.random.choice([1, 2], p=[0.5, 0.5]) # Utiliser numpy pour la randomisation
elif action == 3:
turns += 2
else:
total_turns += 1
turns += 1
number_turns.append(total_turns)
state_turns[state] = turns
number_mean.append(number_turns)
total_turns.append(state_turns)
# calculate the average number of turns for each state
mean_turns = np.mean(number_mean, axis=0)
mean_turns = np.mean(total_turns, axis=0)
return mean_turns
def play_optimal_policy(self, n_iterations=10000):
def play_optimal_policy(self, n_iterations : int):
"""Play using the optimal policy for a number of iterations."""
return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self, dice_choice, n_iterations=10000):
def play_dice_strategy(self, dice_choice, n_iterations : int):
"""Play using a specific dice strategy for a number of iterations."""
strategy = {
'SafeDice': self.safe_strategy,
'NormalDice': self.normal_strategy,
......@@ -117,33 +132,13 @@ class Validation:
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
def play_random_strategy(self, n_iterations : int ):
"""Play using a random strategy for a number of iterations."""
return self.simulate_game(self.random_strategy, n_iterations)
def play_empirical_strategy(self):
k = 0
total_turns = 0
while k < len(self.layout) - 1:
action = self.optimal_policy[k]
action_index = int(action) - 1
transition_matrix = self.normal_dice
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs)
k = np.random.choice(len(self.layout), p=flattened_probs)
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
return total_turns
def compare_empirical_vs_value_iteration(self, num_games=10000):
def compare_empirical_vs_value_iteration(self, num_games : int):
"""Compare expected value iteration turns with empirical turns."""
value_iteration_turns = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
......@@ -153,14 +148,29 @@ class Validation:
}
return mean_turns_by_state
def empirical_cost_of_square(self, strategy: list, n_iterations: int):
"""Calculate the empirical cost of a square for a given strategy."""
total_square_costs = []
for _ in range(n_iterations):
game_cost = self.simulate_game(strategy, 1)
square_cost = game_cost ** 2
total_square_costs.append(square_cost)
empirical_cost = np.mean(total_square_costs)
return empirical_cost
def compare_state_based_turns(self, num_games=10000):
def compare_state_based_turns(self, num_games : int ):
# Compare the expected turns from value iteration with empirical state-based turns
value_iteration = self.expec
empirical_turns = self.simulate_state(self.optimal_policy, self.layout, self.circle, n_iterations=num_games)
return value_iteration, empirical_turns
def compare_strategies(self, num_games=10000):
def compare_strategies(self, num_games : int):
# Compare the costs of different strategies over a number of games
optimal_cost = self.simulate_game(self.optimal_policy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
......@@ -174,71 +184,3 @@ class Validation:
'RiskyDice': dice3_cost,
'Random': random_cost
}
"""
# Exemple d'utilisation
layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation_instance = Validation(layout, circle)
# Comparaison entre la stratégie empirique et la value iteration
turns_by_state = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
# Affichage des moyennes de tours pour chaque état
num_states = len(layout)
for state in range(num_states - 1):
print(f"État {state}:")
print(f" ValueIteration - Tours moyens : {turns_by_state['ValueIteration'][state]:.2f}")
print(f" Empirical - Tours moyens : {turns_by_state['Empirical'][state]:.2f}")
# Exécution de la stratégie empirique une fois
empirical_strategy_result = validation_instance.play_empirical_strategy()
print("Coût de la stratégie empirique sur un tour :", empirical_strategy_result)
# Comparaison entre la stratégie empirique et la value iteration sur plusieurs jeux
comparison_result = validation_instance.compare_empirical_vs_value_iteration(num_games=1000000)
print("Coût moyen de la stratégie de value iteration :", comparison_result['ValueIteration'])
print("Coût moyen de la stratégie empirique :", comparison_result['Empirical'])
# Coûts des différentes stratégies
optimal_cost = validation_instance.play_optimal_policy(n_iterations=1000000)
print("Optimal Strategy Cost:", optimal_cost)
dice1_cost = validation_instance.play_dice_strategy('SafeDice', n_iterations=1000000)
print("Safe Dice Strategy Cost:", dice1_cost)
dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=1000000)
print("Normal Dice Strategy Cost:", dice2_cost)
dice3_cost = validation_instance.play_dice_strategy('RiskyDice', n_iterations=1000000)
print("Risky Dice Strategy Cost:", dice3_cost)
random_cost = validation_instance.play_random_strategy(n_iterations=1000000)
print("Random Strategy Cost:", random_cost)
# Comparaison entre les stratégies
strategy_comparison = validation_instance.compare_strategies(num_games=1000000)
print("Strategy Comparison Results:", strategy_comparison)
# Calcul des tours moyens pour différentes stratégies
optimal_policy = validation_instance.optimal_policy
mean_turns_optimal = validation_instance.simulate_state(optimal_policy, layout, circle, n_iterations=1000000)
print("Mean Turns for Optimal Strategy:", mean_turns_optimal)
safe_dice_strategy = validation_instance.safe_strategy
mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice)
normal_dice_strategy = validation_instance.normal_strategy
mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice)
risky_dice_strategy = validation_instance.risky_strategy
mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice)
random_dice_strategy = validation_instance.random_strategy
mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=1000000)
print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice)
"""
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter