Skip to content
Extraits de code Groupes Projets
Valider 4897f074 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update files

parent 04f5b4f1
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
...@@ -2,34 +2,44 @@ import numpy as np ...@@ -2,34 +2,44 @@ import numpy as np
import random as rd import random as rd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc from tmc import TransitionMatrixCalculator as tmc
from test_files.markovDecision_testing import markovDecision as mD from markovDecision import MarkovDecisionSolver as mD
from validation import Validation from validation import Validation
def plot_results(validation_instance): def make_plots():
results_markov = validation_instance.simulate_game('markov') layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
results_safe = validation_instance.simulate_game([1]*15) circle = False
results_normal = validation_instance.simulate_game([2]*15) validation = Validation(layout, circle)
results_risky = validation_instance.simulate_game([3]*15) expec, optimal_policy = mD(layout, circle).solve()
results_random = validation_instance.simulate_game(np.random.randint(1, 4, size=15))
plt.figure(figsize=(12, 8)) # Plot 1: Theoretical vs Empirical Cost
plt.plot(range(len(validation_instance.layouts)), results_markov, label='Markov') expected_costs = np.zeros(len(expec))
plt.plot(range(len(validation_instance.layouts)), results_safe, label='SafeDice') for start_square in range(len(expec)):
plt.plot(range(len(validation_instance.layouts)), results_normal, label='NormalDice') total_turns = 0
plt.plot(range(len(validation_instance.layouts)), results_risky, label='RiskyDice') for _ in range(10000):
plt.plot(range(len(validation_instance.layouts)), results_random, label='Random') total_turns += validation.play_one_game(start_square)
expected_costs[start_square] = total_turns / 10000
plt.xticks(range(len(validation_instance.layouts)), range(len(validation_instance.layouts))) squares = np.arange(len(expec))
plt.xlabel('Layout Number', fontsize=13) plt.plot(squares, expec, label="Theoretical cost")
plt.ylabel('Average Number of Turns', fontsize=13) plt.plot(squares, expected_costs, label="Empirical cost")
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1) plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show() plt.show()
# Example usage # Plot 2: Expected number of turns for different policies
layouts = [ policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))]
[0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0], avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies]
# Add more layouts as needed names = ["optimal", "safe", "normal", "risky", "random"]
] plt.bar(names, avgn_turns)
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns for different policies")
plt.show()
validation_instance = Validation(layouts, circle=False, n_iterations=10000) # Call make_plots function
plot_results(validation_instance) if __name__ == "__main__":
\ No newline at end of file make_plots()
import random
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator from tmc import TransitionMatrixCalculator
from markovDecision import MarkovDecisionSolver as mD
class Validation: class Validation:
def __init__(self, layout, circle=False): def __init__(self, layout, circle=False):
...@@ -7,6 +10,10 @@ class Validation: ...@@ -7,6 +10,10 @@ class Validation:
self.circle = circle self.circle = circle
self.tmc_instance = TransitionMatrixCalculator() self.tmc_instance = TransitionMatrixCalculator()
# Compute optimal value iteration results
solver = mD(self.layout, self.circle)
self.optimal_values, self.optimal_dice = solver.solve()
def simulate_game(self, strategy='optimal', num_games=1000): def simulate_game(self, strategy='optimal', num_games=1000):
total_turns = 0 total_turns = 0
...@@ -28,22 +35,38 @@ class Validation: ...@@ -28,22 +35,38 @@ class Validation:
return average_turns return average_turns
def play_optimal_strategy(self): def play_optimal_strategy(self):
# Implement the optimal strategy using value iteration results current_state = 0 # Start from the initial state
# Use TransitionMatrixCalculator to compute transitions and make decisions turns = 0
# calculer la stratégie optimale pour ou un tour while current_state < len(self.layout) - 1:
optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state
current_state += optimal_action # Move to the next state based on the optimal action
turns += 1
return turns
def play_dice_strategy(self, dice):
current_state = 0 # Start from the initial state
turns = 0
pass while current_state < len(self.layout) - 1:
# Always use the specified dice type (1, 2, or 3)
current_state += dice
turns += 1
def play_dice_strategy(self, dice): return turns
# Implement a strategy where only one type of dice is used (1, 2, or 3)
pass
def play_random_strategy(self): def play_random_strategy(self):
# Implement a purely random strategy current_state = 0 # Start from the initial state
pass turns = 0
while current_state < len(self.layout) - 1:
# Choose a random dice roll between 1 and 3
dice_roll = np.random.randint(1, 4)
current_state += dice_roll
turns += 1
return turns
def compare_strategies(self, num_games=1000): def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games) optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
...@@ -60,10 +83,82 @@ class Validation: ...@@ -60,10 +83,82 @@ class Validation:
'random': random_cost 'random': random_cost
} }
# Example usage def play_one_turn(self, dice_choice, cur_pos, prison):
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] if cur_pos == len(self.layout) - 1:
validation = Validation(layout, circle=False) return len(self.layout) - 1, False
results = validation.compare_strategies(num_games=10000)
print("Average Costs:") if prison:
for strategy, cost in results.items(): return cur_pos, False
print(f"{strategy}: {cost}")
# Convert dice_choice to integer to avoid TypeError
dice_choice = int(dice_choice)
list_dice_results = [i for i in range(dice_choice + 1)]
result = random.choice(list_dice_results)
if cur_pos == 2 and result != 0:
slow_lane = random.choice([0, 1])
if slow_lane:
new_pos = cur_pos + result
else:
new_pos = cur_pos + result + 7
elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
new_pos = cur_pos + result + 4
else:
new_pos = cur_pos + result
if new_pos > len(self.layout) - 1:
if self.circle:
new_pos -= len(self.layout)
else:
return len(self.layout) - 1, True
new_square = self.layout[new_pos]
if dice_choice == 1:
return new_pos, False
elif dice_choice == 2:
new_square = random.choice([0, new_square])
if new_square == 0:
return new_pos, False # nothing happens
elif new_square == 1:
return 0, False # back to square one
elif new_square == 2:
if new_pos - 3 < 0:
return 0, False # back to square one
return new_pos - 3, False # back 3 squares
elif new_square == 3:
return new_pos, True # prison
def play_one_game(self, start=0):
n_turns = 0
cur_pos = start
prison = False
if self.circle:
while cur_pos != len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
if new_pos > len(self.layout) - 1:
cur_pos = len(self.layout) - new_pos
cur_pos = new_pos
n_turns += 1
else:
while cur_pos < len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
cur_pos = new_pos
n_turns += 1
return n_turns
def empirical_results(self):
total_turns_played = 0
for _ in range(10000):
n_turns = self.play_one_game()
total_turns_played += n_turns
return total_turns_played / 10000
\ No newline at end of file
...@@ -31,7 +31,7 @@ class Validation: ...@@ -31,7 +31,7 @@ class Validation:
return average_turns return average_turns
def play_optimal_strategy(self): def play_optimal_strategy(self):
_, optimal_policy = markovDecision(self.layout, self.circle) _, optimal_policy = mD(self.layout, self.circle)
return self.empirical_results(optimal_policy.astype(int)) return self.empirical_results(optimal_policy.astype(int))
def play_dice_strategy(self, dice): def play_dice_strategy(self, dice):
......
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class EmpiricalComparision :
def __init__(self) :
return
def simulation(strategy, layout : list, circle, nIter : int) :
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
matrices_transition = [safe_dice, normal_dice, risky_dice]
nTurns = []
turns = 0
for _ in range(nIter) :
turns = 0
k = 0
while k < len(layout)-1 :
action = strategy[k]
transitionMatrix = matrices_transition[int(action -1)]
k = np.rd.choice(len(layout), p = transitionMatrix[k])
if layout[k] == 3 and action == 2 :
turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
elif layout[k] == 3 and action == 3 :
turns += 2
else :
turns += 1
nTurns.append(turns)
return np.mean(nTurns)
def plot(layouts : list, circle, nIter : int) :
Markov = []
Safe = []
Normal = []
Risky = []
Random = []
for layout in layouts :
expec, policy = mD(layout, circle)
# Simulate the game
return
layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
results(layout, False, 1000000)
results(layout, True, 1000000)
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter