Skip to content
Extraits de code Groupes Projets
Valider 883d5811 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

merge solution

parent 81121bf8
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import matplotlib.pyplot as plt
from simulate import Validation as Val
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
import random as rd
import numpy as np
def plot_results(layouts, circle, n_iterations=100):
results_markov = []
results_safe = []
results_normal = []
results_risky = []
results_random = []
for layout in layouts:
# Compute optimal policy
expec, policy = mD(layout, circle)
# Simulate game
result_markov = Val.simulate_game(policy, layout, circle, n_iterations)
results_markov.append(result_markov)
result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations)
results_safe.append(result_safe)
result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations)
results_normal.append(result_normal)
result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations)
results_risky.append(result_risky)
result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations)
results_random.append(result_random)
# Plot the results
plt.figure(figsize=(12, 8))
plt.plot(range(len(layouts)), results_markov, label='Markov')
plt.plot(range(len(layouts)), results_safe, label='Safe')
plt.plot(range(len(layouts)), results_normal, label='Normal')
plt.plot(range(len(layouts)), results_risky, label='Risky')
plt.plot(range(len(layouts)), results_random, label='Random')
plt.xticks(range(len(layouts)), range(len(layouts)))
plt.xlabel('Layout number', fontsize=13)
plt.ylabel('Average number of turns', fontsize=13)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
plt.show()
from tmc import TransitionMatrixCalculator as tmc import random
from markovDecision import MarkovDecisionSolver as mD
import random as rd
import numpy as np import numpy as np
import matplotlib.pyplot as plt
class Validation: from tmc import TransitionMatrixCalculator as tmc
def __init__(self, layout, circle=False): from markovDecision import MarkovDecisionSolver
self.layout = layout
self.circle = circle nSquares = 15
nSimul = 10000
# Compute transition matrices using TransitionMatrixCalculator
self.tmc_instance = tmc() def playOneTurn(diceChoice, curPos, layout, circle, prison):
self.safe_dice = self.tmc_instance._compute_safe_matrix() if curPos == nSquares - 1:
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) return nSquares - 1, False
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
if prison:
# Solve Markov Decision Problem return curPos, False
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve() listDiceResults = [i for i in range(diceChoice + 1)]
result = random.choice(listDiceResults)
# Define all the strategies
self.optimal_strategy = self.optimal_policy if curPos == 2 and result != 0:
self.safe_strategy = [1] * 15 slowLane = random.choice([0, 1])
self.normal_strategy = [2] * 15 if slowLane:
self.risky_strategy = [3] * 15 newPos = curPos + result
self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)] else:
newPos = curPos + result + 7
def simulate_game(self, strategy, n_iterations=10000): elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)):
# Compute transition matrices for each dice newPos = curPos + result + 4
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] else:
number_turns = [] newPos = curPos + result
for _ in range(n_iterations): if newPos > nSquares - 1:
total_turns = 0 if circle:
state = 0 # initial state newPos -= nSquares
while state < len(self.layout) - 1: # until goal state is reached else:
action = strategy[state] # get action according to strategy return nSquares - 1, True
transition_matrix = transition_matrices[int(action) - 1]
state = np.random.choice(len(self.layout), p=transition_matrix[state]) newSquare = layout[newPos]
if self.layout[state] == 3 and action == 2: if diceChoice == 1:
total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) return newPos, False
elif self.layout[state] == 3 and action == 3: elif diceChoice == 2:
total_turns += 2 newSquare = random.choice([0, newSquare])
else:
total_turns += 1 if newSquare == 0:
return newPos, False # nothing happens
number_turns.append(total_turns) elif newSquare == 1:
return 0, False # back to square one
return np.mean(number_turns) elif newSquare == 2:
if newPos - 3 < 0:
def simulate_state(self, strategy, n_iterations=10000): return 0, False # back to square one
# Compute transition matrices for each dice return newPos - 3, False # back 3 squares
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] elif newSquare == 3:
number_turns = [] return newPos, True # prison
elif newSquare == 4:
for _ in range(n_iterations): newSquare = random.choice([1, 2, 3])
turns_per_state = [] if newSquare == 1:
state = 0 return 0, False # back to square one
elif newSquare == 2:
while state < len(self.layout) - 1: if newPos - 3 < 0:
total_turns = 0 return 0, False # back to square one
action = strategy[state] return newPos - 3, False # back 3 squares
transition_matrix = transition_matrices[int(action) - 1] elif newSquare == 3:
state = np.random.choice(len(self.layout), p=transition_matrix[state]) return newPos, True # prison
if self.layout[state] == 3 and action == 2: def playOneGame(layout, circle, policy, start=0):
total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) nTurns = 0
elif self.layout[state] == 3 and action == 3: curPos = start
total_turns += 2 prison = False
else:
total_turns += 1 if circle:
while curPos != nSquares - 1:
turns_per_state.append(total_turns) newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
if newPos > nSquares - 1:
number_turns.append(turns_per_state) curPos = nSquares - newPos
curPos = newPos
return np.mean(number_turns, axis=0) nTurns += 1
else:
while curPos < nSquares - 1:
newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
curPos = newPos
nTurns += 1
return nTurns
def empiric_cost_of_square(layout, circle, policy):
expected_costs = np.zeros(nSquares)
for start_square in range(nSquares):
total_turns = 0
for _ in range(nSimul):
total_turns += playOneGame(layout, circle, policy, start=start_square)
expected_costs[start_square] = total_turns / nSimul
return expected_costs
def empirical_results(layout, circle, policy):
avgnTurnsPlayed = 0
for _ in range(nSimul):
nTurns = playOneGame(layout, circle, policy)
avgnTurnsPlayed += nTurns
return avgnTurnsPlayed / nSimul
def comparison_theorical_empirical(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
expec, optimal_policy = solver.solve()
actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int))
# Plotting both arrays on the same plot
squares = np.arange(len(expec))
plt.plot(squares, expec, label="Theoretical cost")
plt.plot(squares, actual, label="Empirical cost")
plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show()
def comparison_of_policies_total(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empirical_results(layout, circle, policy) for policy in policies]
names = ["optimal", "safe", "normal", "risky", "random"]
# Creating the bar plot
plt.bar(names, avgnTurns)
# Adding labels and title
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns by policy")
# Displaying the plot
plt.show()
def comparison_of_policies_squares(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies]
# Generating x-axis values (squares)
squares = np.arange(len(avgnTurns[0]))
# Plotting both arrays on the same plot
plt.plot(squares, avgnTurns[0], label="Optimal")
plt.plot(squares, avgnTurns[1], label="Safe")
plt.plot(squares, avgnTurns[2], label="Normal")
plt.plot(squares, avgnTurns[3], label="Risky")
plt.plot(squares, avgnTurns[4], label="Random")
plt.xticks(np.arange(0, len(avgnTurns[0]), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Expected cost for different policies")
plt.show()
def make_plots():
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
comparison_theorical_empirical(layout, circle)
# comparison_of_policies_total(layout, circle)
# comparison_of_policies_squares(layout, circle)
make_plots()
...@@ -25,43 +25,69 @@ class validation: ...@@ -25,43 +25,69 @@ class validation:
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000): def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = [] number_turns = []
for _ in range(n_iterations): for _ in range(n_iterations):
total_turns = 0 total_turns = 0
state = 0 # initial state k = 0 # état initial
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy while k < len(self.layout) - 1:
transition_matrix = transition_matrices[int(action - 1)] action = strategy[k] # action selon la stratégie
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2: # Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[state] == 3 and action == 3: elif self.layout[k] == 3 and action == 3:
total_turns += 2 total_turns += 2
else: else:
total_turns += 1 total_turns += 1
number_turns.append(total_turns) number_turns.append(total_turns)
return np.mean(number_turns) return np.mean(number_turns)
def play_optimal_strategy(self): def play_optimal_strategy(self, n_iterations=10000):
return turns return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self): def play_dice_strategy(self, dice_choice, n_iterations=10000):
return turns if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
def play_random_strategy(self): return self.simulate_game(strategy, n_iterations)
return turns
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def compare_strategies(self, num_games=1000): def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games) optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games) dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games) dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games) dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games) random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return { return {
'Optimal': optimal_cost, 'Optimal': optimal_cost,
...@@ -75,11 +101,11 @@ class validation: ...@@ -75,11 +101,11 @@ class validation:
# Utilisation d'exemple # Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False) validation = validation(layout, circle=False)
circle = False # Example circle value circle = False # Example circle value
"""
# Create an instance of validation # Create an instance of validation
validator = validation(layout, circle) validator = validation(layout, circle)
...@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000) ...@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000) results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :") print("Coûts moyens :")
for strategy, cost in results.items(): for strategy, cost in results.items():
print(f"{strategy}: {cost}") print(f"{strategy}: {cost}")"""
optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter