Skip to content
Extraits de code Groupes Projets
Valider 883d5811 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

merge solution

parent 81121bf8
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import matplotlib.pyplot as plt
from simulate import Validation as Val
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
import random as rd
import numpy as np
def plot_results(layouts, circle, n_iterations=100):
results_markov = []
results_safe = []
results_normal = []
results_risky = []
results_random = []
for layout in layouts:
# Compute optimal policy
expec, policy = mD(layout, circle)
# Simulate game
result_markov = Val.simulate_game(policy, layout, circle, n_iterations)
results_markov.append(result_markov)
result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations)
results_safe.append(result_safe)
result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations)
results_normal.append(result_normal)
result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations)
results_risky.append(result_risky)
result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations)
results_random.append(result_random)
# Plot the results
plt.figure(figsize=(12, 8))
plt.plot(range(len(layouts)), results_markov, label='Markov')
plt.plot(range(len(layouts)), results_safe, label='Safe')
plt.plot(range(len(layouts)), results_normal, label='Normal')
plt.plot(range(len(layouts)), results_risky, label='Risky')
plt.plot(range(len(layouts)), results_random, label='Random')
plt.xticks(range(len(layouts)), range(len(layouts)))
plt.xlabel('Layout number', fontsize=13)
plt.ylabel('Average number of turns', fontsize=13)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
plt.show()
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
import random as rd
import random
import numpy as np
class Validation:
def __init__(self, layout, circle=False):
self.layout = layout
self.circle = circle
# Compute transition matrices using TransitionMatrixCalculator
self.tmc_instance = tmc()
self.safe_dice = self.tmc_instance._compute_safe_matrix()
self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle)
self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle)
# Solve Markov Decision Problem
solver = mD(self.layout, self.circle)
self.expec, self.optimal_policy = solver.solve()
# Define all the strategies
self.optimal_strategy = self.optimal_policy
self.safe_strategy = [1] * 15
self.normal_strategy = [2] * 15
self.risky_strategy = [3] * 15
self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
transition_matrix = transition_matrices[int(action) - 1]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def simulate_state(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
turns_per_state = []
state = 0
while state < len(self.layout) - 1:
total_turns = 0
action = strategy[state]
transition_matrix = transition_matrices[int(action) - 1]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
total_turns += np.random.choice([1, 2], p=[0.5, 0.5])
elif self.layout[state] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
turns_per_state.append(total_turns)
number_turns.append(turns_per_state)
return np.mean(number_turns, axis=0)
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver
nSquares = 15
nSimul = 10000
def playOneTurn(diceChoice, curPos, layout, circle, prison):
if curPos == nSquares - 1:
return nSquares - 1, False
if prison:
return curPos, False
listDiceResults = [i for i in range(diceChoice + 1)]
result = random.choice(listDiceResults)
if curPos == 2 and result != 0:
slowLane = random.choice([0, 1])
if slowLane:
newPos = curPos + result
else:
newPos = curPos + result + 7
elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)):
newPos = curPos + result + 4
else:
newPos = curPos + result
if newPos > nSquares - 1:
if circle:
newPos -= nSquares
else:
return nSquares - 1, True
newSquare = layout[newPos]
if diceChoice == 1:
return newPos, False
elif diceChoice == 2:
newSquare = random.choice([0, newSquare])
if newSquare == 0:
return newPos, False # nothing happens
elif newSquare == 1:
return 0, False # back to square one
elif newSquare == 2:
if newPos - 3 < 0:
return 0, False # back to square one
return newPos - 3, False # back 3 squares
elif newSquare == 3:
return newPos, True # prison
elif newSquare == 4:
newSquare = random.choice([1, 2, 3])
if newSquare == 1:
return 0, False # back to square one
elif newSquare == 2:
if newPos - 3 < 0:
return 0, False # back to square one
return newPos - 3, False # back 3 squares
elif newSquare == 3:
return newPos, True # prison
def playOneGame(layout, circle, policy, start=0):
nTurns = 0
curPos = start
prison = False
if circle:
while curPos != nSquares - 1:
newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
if newPos > nSquares - 1:
curPos = nSquares - newPos
curPos = newPos
nTurns += 1
else:
while curPos < nSquares - 1:
newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
curPos = newPos
nTurns += 1
return nTurns
def empiric_cost_of_square(layout, circle, policy):
expected_costs = np.zeros(nSquares)
for start_square in range(nSquares):
total_turns = 0
for _ in range(nSimul):
total_turns += playOneGame(layout, circle, policy, start=start_square)
expected_costs[start_square] = total_turns / nSimul
return expected_costs
def empirical_results(layout, circle, policy):
avgnTurnsPlayed = 0
for _ in range(nSimul):
nTurns = playOneGame(layout, circle, policy)
avgnTurnsPlayed += nTurns
return avgnTurnsPlayed / nSimul
def comparison_theorical_empirical(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
expec, optimal_policy = solver.solve()
actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int))
# Plotting both arrays on the same plot
squares = np.arange(len(expec))
plt.plot(squares, expec, label="Theoretical cost")
plt.plot(squares, actual, label="Empirical cost")
plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show()
def comparison_of_policies_total(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empirical_results(layout, circle, policy) for policy in policies]
names = ["optimal", "safe", "normal", "risky", "random"]
# Creating the bar plot
plt.bar(names, avgnTurns)
# Adding labels and title
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns by policy")
# Displaying the plot
plt.show()
def comparison_of_policies_squares(layout, circle):
solver = MarkovDecisionSolver(layout, circle)
_, optimal_policy = solver.solve()
policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
np.random.randint(1, 4, size=nSquares)]
avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies]
# Generating x-axis values (squares)
squares = np.arange(len(avgnTurns[0]))
# Plotting both arrays on the same plot
plt.plot(squares, avgnTurns[0], label="Optimal")
plt.plot(squares, avgnTurns[1], label="Safe")
plt.plot(squares, avgnTurns[2], label="Normal")
plt.plot(squares, avgnTurns[3], label="Risky")
plt.plot(squares, avgnTurns[4], label="Random")
plt.xticks(np.arange(0, len(avgnTurns[0]), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Expected cost for different policies")
plt.show()
def make_plots():
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
comparison_theorical_empirical(layout, circle)
# comparison_of_policies_total(layout, circle)
# comparison_of_policies_squares(layout, circle)
make_plots()
......@@ -25,43 +25,69 @@ class validation:
self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)]
def simulate_game(self, strategy, n_iterations=10000):
# Compute transition matrices for each dice
transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice]
number_turns = []
for _ in range(n_iterations):
total_turns = 0
state = 0 # initial state
while state < len(self.layout) - 1: # until goal state is reached
action = strategy[state] # get action according to strategy
transition_matrix = transition_matrices[int(action - 1)]
state = np.random.choice(len(self.layout), p=transition_matrix[state])
if self.layout[state] == 3 and action == 2:
k = 0 # état initial
while k < len(self.layout) - 1:
action = strategy[k] # action selon la stratégie
# Convertir action en entier pour accéder à l'indice correct dans transition_matrices
action_index = int(action) - 1
transition_matrix = transition_matrices[action_index]
#print(f"Current state (k): {k}, Action chosen: {action}")
#print(f"Transition matrix: {transition_matrix}")
# Aplatir la matrice de transition en une distribution de probabilité 1D
flattened_probs = transition_matrix[k]
flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités
# Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie
k = np.random.choice(len(self.layout), p=flattened_probs)
# Mise à jour du nombre de tours en fonction de l'état actuel
if self.layout[k] == 3 and action == 2:
total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2
elif self.layout[state] == 3 and action == 3:
elif self.layout[k] == 3 and action == 3:
total_turns += 2
else:
total_turns += 1
number_turns.append(total_turns)
return np.mean(number_turns)
def play_optimal_strategy(self):
return turns
def play_optimal_strategy(self, n_iterations=10000):
return self.simulate_game(self.optimal_policy, n_iterations)
def play_dice_strategy(self):
return turns
def play_dice_strategy(self, dice_choice, n_iterations=10000):
if dice_choice == 'SafeDice':
strategy = self.safe_strategy
elif dice_choice == 'NormalDice':
strategy = self.normal_strategy
elif dice_choice == 'RiskyDice':
strategy = self.risky_strategy
else:
raise ValueError("Invalid dice choice")
def play_random_strategy(self):
return turns
return self.simulate_game(strategy, n_iterations)
def play_random_strategy(self, n_iterations=10000):
return self.simulate_game(self.random_strategy, n_iterations)
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games)
dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games)
dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games)
dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games)
random_cost = self.simulate_game(strategy='Random', num_games=num_games)
optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games)
dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games)
dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games)
dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games)
random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games)
return {
'Optimal': optimal_cost,
......@@ -75,11 +101,11 @@ class validation:
# Utilisation d'exemple
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0]
validation = validation(layout, circle=False)
circle = False # Example circle value
"""
# Create an instance of validation
validator = validation(layout, circle)
......@@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000)
results = validation.compare_strategies(num_games=10000)
print("Coûts moyens :")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")
print(f"{strategy}: {cost}")"""
optimal_cost = validation.play_optimal_strategy(n_iterations=10000)
print("Optimal Strategy Cost:", optimal_cost)
dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000)
print("Normal Dice Strategy Cost:", dice2_cost)
random_cost = validation.play_random_strategy(n_iterations=10000)
print("Random Strategy Cost:", random_cost)
strategy_comparison = validation.compare_strategies(num_games=10000)
print("Strategy Comparison Results:", strategy_comparison)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter