Skip to content
Extraits de code Groupes Projets
Valider 4897f074 rédigé par Adrien Payen's avatar Adrien Payen
Parcourir les fichiers

update files

parent 04f5b4f1
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -2,34 +2,44 @@ import numpy as np
import random as rd
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from test_files.markovDecision_testing import markovDecision as mD
from markovDecision import MarkovDecisionSolver as mD
from validation import Validation
def plot_results(validation_instance):
results_markov = validation_instance.simulate_game('markov')
results_safe = validation_instance.simulate_game([1]*15)
results_normal = validation_instance.simulate_game([2]*15)
results_risky = validation_instance.simulate_game([3]*15)
results_random = validation_instance.simulate_game(np.random.randint(1, 4, size=15))
def make_plots():
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
circle = False
validation = Validation(layout, circle)
expec, optimal_policy = mD(layout, circle).solve()
plt.figure(figsize=(12, 8))
plt.plot(range(len(validation_instance.layouts)), results_markov, label='Markov')
plt.plot(range(len(validation_instance.layouts)), results_safe, label='SafeDice')
plt.plot(range(len(validation_instance.layouts)), results_normal, label='NormalDice')
plt.plot(range(len(validation_instance.layouts)), results_risky, label='RiskyDice')
plt.plot(range(len(validation_instance.layouts)), results_random, label='Random')
# Plot 1: Theoretical vs Empirical Cost
expected_costs = np.zeros(len(expec))
for start_square in range(len(expec)):
total_turns = 0
for _ in range(10000):
total_turns += validation.play_one_game(start_square)
expected_costs[start_square] = total_turns / 10000
plt.xticks(range(len(validation_instance.layouts)), range(len(validation_instance.layouts)))
plt.xlabel('Layout Number', fontsize=13)
plt.ylabel('Average Number of Turns', fontsize=13)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
squares = np.arange(len(expec))
plt.plot(squares, expec, label="Theoretical cost")
plt.plot(squares, expected_costs, label="Empirical cost")
plt.xticks(np.arange(0, len(expec), step=1))
plt.grid(True)
plt.xlabel("Square")
plt.ylabel("Cost")
plt.legend()
plt.title("Comparison between the expected cost and the actual cost")
plt.show()
# Example usage
layouts = [
[0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0],
# Add more layouts as needed
]
# Plot 2: Expected number of turns for different policies
policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))]
avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies]
names = ["optimal", "safe", "normal", "risky", "random"]
plt.bar(names, avgn_turns)
plt.xlabel("Policy")
plt.ylabel("Cost")
plt.title("Expected number of turns for different policies")
plt.show()
validation_instance = Validation(layouts, circle=False, n_iterations=10000)
plot_results(validation_instance)
\ No newline at end of file
# Call make_plots function
if __name__ == "__main__":
make_plots()
import random
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator
from markovDecision import MarkovDecisionSolver as mD
class Validation:
def __init__(self, layout, circle=False):
......@@ -7,6 +10,10 @@ class Validation:
self.circle = circle
self.tmc_instance = TransitionMatrixCalculator()
# Compute optimal value iteration results
solver = mD(self.layout, self.circle)
self.optimal_values, self.optimal_dice = solver.solve()
def simulate_game(self, strategy='optimal', num_games=1000):
total_turns = 0
......@@ -28,22 +35,38 @@ class Validation:
return average_turns
def play_optimal_strategy(self):
# Implement the optimal strategy using value iteration results
# Use TransitionMatrixCalculator to compute transitions and make decisions
current_state = 0 # Start from the initial state
turns = 0
# calculer la stratégie optimale pour ou un tour
while current_state < len(self.layout) - 1:
optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state
current_state += optimal_action # Move to the next state based on the optimal action
turns += 1
return turns
def play_dice_strategy(self, dice):
current_state = 0 # Start from the initial state
turns = 0
pass
while current_state < len(self.layout) - 1:
# Always use the specified dice type (1, 2, or 3)
current_state += dice
turns += 1
def play_dice_strategy(self, dice):
# Implement a strategy where only one type of dice is used (1, 2, or 3)
pass
return turns
def play_random_strategy(self):
# Implement a purely random strategy
pass
current_state = 0 # Start from the initial state
turns = 0
while current_state < len(self.layout) - 1:
# Choose a random dice roll between 1 and 3
dice_roll = np.random.randint(1, 4)
current_state += dice_roll
turns += 1
return turns
def compare_strategies(self, num_games=1000):
optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games)
......@@ -60,10 +83,82 @@ class Validation:
'random': random_cost
}
# Example usage
layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
validation = Validation(layout, circle=False)
results = validation.compare_strategies(num_games=10000)
print("Average Costs:")
for strategy, cost in results.items():
print(f"{strategy}: {cost}")
def play_one_turn(self, dice_choice, cur_pos, prison):
if cur_pos == len(self.layout) - 1:
return len(self.layout) - 1, False
if prison:
return cur_pos, False
# Convert dice_choice to integer to avoid TypeError
dice_choice = int(dice_choice)
list_dice_results = [i for i in range(dice_choice + 1)]
result = random.choice(list_dice_results)
if cur_pos == 2 and result != 0:
slow_lane = random.choice([0, 1])
if slow_lane:
new_pos = cur_pos + result
else:
new_pos = cur_pos + result + 7
elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))):
new_pos = cur_pos + result + 4
else:
new_pos = cur_pos + result
if new_pos > len(self.layout) - 1:
if self.circle:
new_pos -= len(self.layout)
else:
return len(self.layout) - 1, True
new_square = self.layout[new_pos]
if dice_choice == 1:
return new_pos, False
elif dice_choice == 2:
new_square = random.choice([0, new_square])
if new_square == 0:
return new_pos, False # nothing happens
elif new_square == 1:
return 0, False # back to square one
elif new_square == 2:
if new_pos - 3 < 0:
return 0, False # back to square one
return new_pos - 3, False # back 3 squares
elif new_square == 3:
return new_pos, True # prison
def play_one_game(self, start=0):
n_turns = 0
cur_pos = start
prison = False
if self.circle:
while cur_pos != len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
if new_pos > len(self.layout) - 1:
cur_pos = len(self.layout) - new_pos
cur_pos = new_pos
n_turns += 1
else:
while cur_pos < len(self.layout) - 1:
new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison)
cur_pos = new_pos
n_turns += 1
return n_turns
def empirical_results(self):
total_turns_played = 0
for _ in range(10000):
n_turns = self.play_one_game()
total_turns_played += n_turns
return total_turns_played / 10000
\ No newline at end of file
......@@ -31,7 +31,7 @@ class Validation:
return average_turns
def play_optimal_strategy(self):
_, optimal_policy = markovDecision(self.layout, self.circle)
_, optimal_policy = mD(self.layout, self.circle)
return self.empirical_results(optimal_policy.astype(int))
def play_dice_strategy(self, dice):
......
import random as rd
import numpy as np
import matplotlib.pyplot as plt
from tmc import TransitionMatrixCalculator as tmc
from markovDecision import MarkovDecisionSolver as mD
class EmpiricalComparision :
def __init__(self) :
return
def simulation(strategy, layout : list, circle, nIter : int) :
tmc_instance = tmc()
safe_dice = tmc_instance._compute_safe_matrix(layout, circle)
normal_dice = tmc_instance._compute_normal_matrix(layout, circle)
risky_dice = tmc_instance._compute_risky_matrix(layout, circle)
matrices_transition = [safe_dice, normal_dice, risky_dice]
nTurns = []
turns = 0
for _ in range(nIter) :
turns = 0
k = 0
while k < len(layout)-1 :
action = strategy[k]
transitionMatrix = matrices_transition[int(action -1)]
k = np.rd.choice(len(layout), p = transitionMatrix[k])
if layout[k] == 3 and action == 2 :
turns +=1 if np.rd.uniform(0,1) < 0.5 else 2
elif layout[k] == 3 and action == 3 :
turns += 2
else :
turns += 1
nTurns.append(turns)
return np.mean(nTurns)
def plot(layouts : list, circle, nIter : int) :
Markov = []
Safe = []
Normal = []
Risky = []
Random = []
for layout in layouts :
expec, policy = mD(layout, circle)
# Simulate the game
return
layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0]
results(layout, False, 1000000)
results(layout, True, 1000000)
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter