diff --git a/markovDecision.py b/markovDecision.py index 13836009a79de3a0d6a86dec67791becf15dbc25..6bd17bcf2acda2cb848ff7bb36a9e63761568caf 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -5,7 +5,7 @@ class MarkovDecisionSolver: def __init__(self, layout : list, circle : bool): self.Numberk = 15 self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle) + self.safe_dice = self.tmc_instance._compute_safe_matrix() self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) self.jail = [i for i, x in enumerate(layout) if x == 3] diff --git a/test_files/Validation_2.py b/test_files/Validation_2.py new file mode 100644 index 0000000000000000000000000000000000000000..1741debc2017d58f09f24ade194745e2326c59f4 --- /dev/null +++ b/test_files/Validation_2.py @@ -0,0 +1,75 @@ +import numpy as np +import random as rd +import matplotlib.pyplot as plt +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD + +class Validation: + def __init__(self): + self.tmc_instance = tmc() + + def simulate_games(self, layout, circle, num_games): + results = [] + + for _ in range(num_games): + result = mD(layout, circle) + # Assuming result is a tuple (costs, path) and you want the last element of 'costs' + results.append(result[0][-1]) # Append the number of turns to reach the goal + + return results + + def compare_strategies(self, layout, circle, num_games): + optimal_results = self.simulate_games(layout, circle, num_games) + + suboptimal_strategies = { + "Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation + "Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation + "Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation + "Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation + "Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation + } + + self.plot_results(optimal_results, suboptimal_strategies) + + def plot_results(self, optimal_results, suboptimal_results): + strategies = ["Optimal Strategy"] + list(suboptimal_results.keys()) + avg_costs = [np.mean(optimal_results)] + [np.mean(suboptimal_results[strategy]) for strategy in suboptimal_results] + + plt.figure(figsize=(10, 6)) + plt.bar(strategies, avg_costs, color=['blue'] + ['orange'] * len(suboptimal_results)) + plt.xlabel("Strategies") + plt.ylabel("Average Cost") + plt.title("Comparison of Strategy Performance") + plt.show() + + def run_validation(self, layout, circle, num_games): + solver = mD(layout, circle) + theoretical_cost, optimal_dice_strategy = solver.solve() + + optimal_results = self.simulate_games(layout, circle, num_games) + optimal_average_cost = np.mean(optimal_results) + + suboptimal_strategies = { + "Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation + "Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation + "Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation + "Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation + "Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation + } + + self.plot_results(optimal_results, suboptimal_strategies) + + print("Theoretical Expected Cost (Value Iteration):", theoretical_cost) + print("Empirical Average Cost (Optimal Strategy):", optimal_average_cost) + + for strategy, results in suboptimal_strategies.items(): + avg_cost = np.mean(results) + print(f"Empirical Average Cost ({strategy}):", avg_cost) + +# Exemple d'utilisation de la classe Validation +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = True +num_games = 1000 + +validation = Validation() +validation.run_validation(layout, circle, num_games) diff --git a/plotting.py b/test_files/plotting.py similarity index 100% rename from plotting.py rename to test_files/plotting.py diff --git a/test_files/validation.py b/test_files/validation.py deleted file mode 100644 index de8dd96e7784cf8cf730721bed2b7af9c459ef53..0000000000000000000000000000000000000000 --- a/test_files/validation.py +++ /dev/null @@ -1,173 +0,0 @@ -import random -import numpy as np -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator -from markovDecision import MarkovDecisionSolver as mD - -class Validation: - def __init__(self, layout, circle=False): - self.layout = layout - self.circle = circle - self.tmc_instance = TransitionMatrixCalculator() - - # Compute optimal value iteration results - solver = mD(self.layout, self.circle) - self.optimal_values, self.optimal_dice = solver.solve() - - def simulate_game(self, strategy='optimal', num_games=1000): - total_turns = 0 - - for _ in range(num_games): - if strategy == 'Optimal': - turns = self.play_optimal_strategy() - elif strategy == 'SafeDice': - turns = self.play_dice_strategy(1) - elif strategy == 'NormalDice': - turns = self.play_dice_strategy(2) - elif strategy == 'RiskyDice': - turns = self.play_dice_strategy(3) - elif strategy == 'Random': - turns = self.play_random_strategy() - - total_turns += turns - - average_turns = total_turns / num_games - return average_turns - - def play_optimal_strategy(self): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state - current_state += optimal_action # Move to the next state based on the optimal action - turns += 1 - - return turns - - def play_dice_strategy(self, dice): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - # Always use the specified dice type (1, 2, or 3) - current_state += dice - turns += 1 - - return turns - - def play_random_strategy(self): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - # Choose a random dice roll between 1 and 3 - dice_roll = np.random.randint(1, 4) - current_state += dice_roll - turns += 1 - - return turns - - def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games) - dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games) - dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games) - dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games) - random_cost = self.simulate_game(strategy='Random', num_games=num_games) - - return { - 'Optimal': optimal_cost, - 'SafeDice': dice1_cost, - 'NormalDice': dice2_cost, - 'RiskyDice': dice3_cost, - 'Random': random_cost - } - - def play_one_turn(self, dice_choice, cur_pos, prison): - if cur_pos == len(self.layout) - 1: - return len(self.layout) - 1, False - - if prison: - return cur_pos, False - - # Convert dice_choice to integer to avoid TypeError - dice_choice = int(dice_choice) - - list_dice_results = [i for i in range(dice_choice + 1)] - result = random.choice(list_dice_results) - - if cur_pos == 2 and result != 0: - slow_lane = random.choice([0, 1]) - if slow_lane: - new_pos = cur_pos + result - else: - new_pos = cur_pos + result + 7 - - elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))): - new_pos = cur_pos + result + 4 - - else: - new_pos = cur_pos + result - - if new_pos > len(self.layout) - 1: - if self.circle: - new_pos -= len(self.layout) - else: - return len(self.layout) - 1, True - - new_square = self.layout[new_pos] - - if dice_choice == 1: - return new_pos, False - - elif dice_choice == 2: - new_square = random.choice([0, new_square]) - - if new_square == 0: - return new_pos, False # nothing happens - elif new_square == 1: - return 0, False # back to square one - elif new_square == 2: - if new_pos - 3 < 0: - return 0, False # back to square one - return new_pos - 3, False # back 3 squares - elif new_square == 3: - return new_pos, True # prison - - - def play_one_game(self, start=0): - n_turns = 0 - cur_pos = start - prison = False - - if self.circle: - while cur_pos != len(self.layout) - 1: - new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) - if new_pos > len(self.layout) - 1: - cur_pos = len(self.layout) - new_pos - cur_pos = new_pos - n_turns += 1 - else: - while cur_pos < len(self.layout) - 1: - new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) - cur_pos = new_pos - n_turns += 1 - - return n_turns - - def empirical_results(self): - total_turns_played = 0 - for _ in range(10000): - n_turns = self.play_one_game() - total_turns_played += n_turns - - return total_turns_played / 10000 - - -# Utilisation d'exemple -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -validation = Validation(layout, circle=False) -results = validation.compare_strategies(num_games=10000) -print("Coûts moyens :") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}") diff --git a/tmc.py b/tmc.py index 6f78a3d7ef7bf358b44f1a52e10c2b80591118d3..1c8ef0823ae1f597e61cb25e64016252b8200d6c 100644 --- a/tmc.py +++ b/tmc.py @@ -7,6 +7,7 @@ class TransitionMatrixCalculator: self.matrix_safe = np.zeros((15, 15)) self.matrix_normal = np.zeros((15, 15)) self.matrix_risky = np.zeros((15, 15)) + # Probability to go from state k to k' self.safe_dice = np.array([1/2, 1/2]) self.normal_dice = np.array([1/3, 1/3, 1/3]) @@ -17,14 +18,14 @@ class TransitionMatrixCalculator: self.matrix_normal.fill(0) self.matrix_risky.fill(0) - self._compute_safe_matrix(layout, circle) + self._compute_safe_matrix() self._compute_normal_matrix(layout, circle) self._compute_risky_matrix(layout, circle) return self.matrix_safe, self.matrix_normal, self.matrix_risky - def _compute_safe_matrix(self, layout, circle): + def _compute_safe_matrix(self): for k in range(0,15): for s, p in enumerate(self.safe_dice): if k == 9 and s == 1: @@ -193,7 +194,7 @@ class TransitionMatrixCalculator: self.matrix_risky[k,k_prime] += p return self.matrix_risky - + """ def generate_arrays(self,n): # Initialize an empty list to store all the arrays arrays = [] @@ -223,5 +224,16 @@ class TransitionMatrixCalculator: self.compute_transition_matrix(array, True) -#tmc = TransitionMatrixCalculator() -#tmc.tst_transition_matrix() + + + def tst_transition_matrix(self): + # create a list of 100 different layouts + layout = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0] + + print(self.compute_transition_matrix(layout, False)) + print(self.compute_transition_matrix(layout, True)) + + +tmc = TransitionMatrixCalculator() +tmc.tst_transition_matrix() +""" \ No newline at end of file diff --git a/validation.py b/validation.py new file mode 100644 index 0000000000000000000000000000000000000000..a4fb1ff70a5ca2f623003e8e3260d346c2992549 --- /dev/null +++ b/validation.py @@ -0,0 +1,93 @@ +import random as rd +import numpy as np +import matplotlib.pyplot as plt +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD + +class validation: + def __init__(self, layout, circle=False): + + # import from other .PY + self.layout = layout + self.circle = circle + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + solver = mD(self.layout, self.circle) + self.expec, self.optimal_policy = solver.solve() + + # Define all the strategy + self.optimal_strategy = self.optimal_policy + self.safe_strategy = [1]*15 + self.normal_strategy = [2]*15 + self.risky_strategy = [3]*15 + self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] + + + + def simulate_game(self, strategy, n_iterations=10000): + # Compute transition matrices for each dice + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + number_turns = [] + for _ in range(n_iterations): + total_turns = 0 + state = 0 # initial state + while state < len(self.layout) - 1: # until goal state is reached + action = strategy[state] # get action according to strategy + transition_matrix = transition_matrices[int(action - 1)] + state = np.random.choice(len(self.layout), p=transition_matrix[state]) + if self.layout[state] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[state] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + number_turns.append(total_turns) + return np.mean(number_turns) + + + def play_optimal_strategy(self): + return turns + + def play_dice_strategy(self): + return turns + + def play_random_strategy(self): + return turns + + def compare_strategies(self, num_games=1000): + optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games) + dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games) + dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games) + dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games) + random_cost = self.simulate_game(strategy='Random', num_games=num_games) + + return { + 'Optimal': optimal_cost, + 'SafeDice': dice1_cost, + 'NormalDice': dice2_cost, + 'RiskyDice': dice3_cost, + 'Random': random_cost + } + + + + +# Utilisation d'exemple +layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +validation = validation(layout, circle=False) + +circle = False # Example circle value + +# Create an instance of validation +validator = validation(layout, circle) + +# Use the methods +validator.simulate_game(validator.optimal_strategy, n_iterations=10000) + + +results = validation.compare_strategies(num_games=10000) +print("Coûts moyens :") +for strategy, cost in results.items(): + print(f"{strategy}: {cost}")