From c682c832f3ee9924b998c15c3c23f5a5784b54ab Mon Sep 17 00:00:00 2001 From: Adrienucl <adrien.payen@student.uclouvain.be> Date: Thu, 2 May 2024 23:40:48 +0200 Subject: [PATCH] update files --- test_files/markovDecision_testing.py | 51 ------------ test_files/md_test.py | 43 ---------- test_files/plot.py | 45 ----------- test_files/tmc_test.py | 80 ------------------ validation.py | 116 ++++++++++++++++++++------- 5 files changed, 88 insertions(+), 247 deletions(-) delete mode 100644 test_files/markovDecision_testing.py delete mode 100644 test_files/md_test.py delete mode 100644 test_files/plot.py delete mode 100644 test_files/tmc_test.py diff --git a/test_files/markovDecision_testing.py b/test_files/markovDecision_testing.py deleted file mode 100644 index 39e9e26..0000000 --- a/test_files/markovDecision_testing.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - - -# testing our TransitionMatrix function based on random layout -# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -def markovDecision(layout : list, circle : bool) : - - Numberk = 15 # Number of states k on the board - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - - # Initialisation of the variables before the iteration - ValueI = np.zeros(Numberk) # Algorithm of Value iteration - jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board - DiceForStates = np.zeros(Numberk - 1) # Set the each states as O - i = 0 # set the iteration of Value - - while True : - ValueINew = np.zeros(Numberk) - i += 1 # iter + 1 - - for k in range(Numberk - 1) : - vi_safe = np.sum(safe_dice[k] * ValueI) - vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail]) - vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap - ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky) - - if ValueINew[k] == 1 + vi_safe : - DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal : - DiceForStates[k] = 2 - else : - DiceForStates[k] = 3 - - if np.allclose(ValueINew, ValueI) : - ValueI = ValueINew - break - - ValueI = ValueINew - - Expec = ValueI[:-1] - return [Expec, DiceForStates] - -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -print("\nWin as soon as land on or overstep the final square") -print(markovDecision(layout, False)) -print("\nStopping on the square to win") -print(markovDecision(layout, True)) diff --git a/test_files/md_test.py b/test_files/md_test.py deleted file mode 100644 index 722766b..0000000 --- a/test_files/md_test.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - -def markov_decision(layout: list, circle: bool): - Numberk = 15 - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - - jail = [i for i, x in enumerate(layout) if x == 3] - - def compute_value(v, dice_matrix): - return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail]) - - - value = np.zeros(Numberk) - dice_for_states = np.zeros(Numberk - 1) - - while True: - new_value = np.zeros(Numberk) - - for k in range(Numberk - 1): - vi_safe = compute_value(value, safe_dice[k]) - vi_normal = compute_value(value, normal_dice[k]) - vi_risky = compute_value(value, risky_dice[k]) - - new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky) - dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3) - - if np.allclose(new_value, value): - value = new_value - break - - value = new_value - - return value[:-1], dice_for_states - -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -print("\nWin as soon as land on or overstep the final square") -print(markov_decision(layout, False)) -print("\nStopping on the square to win") -print(markov_decision(layout, True)) diff --git a/test_files/plot.py b/test_files/plot.py deleted file mode 100644 index 9de7974..0000000 --- a/test_files/plot.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import random as rd -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD -from validation import Validation - -def make_plots(): - layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - circle = False - validation = Validation(layout, circle) - expec, optimal_policy = mD(layout, circle).solve() - - # Plot 1: Theoretical vs Empirical Cost - expected_costs = np.zeros(len(expec)) - for start_square in range(len(expec)): - total_turns = 0 - for _ in range(10000): - total_turns += validation.play_one_game(start_square) - expected_costs[start_square] = total_turns / 10000 - - squares = np.arange(len(expec)) - plt.plot(squares, expec, label="Theoretical cost") - plt.plot(squares, expected_costs, label="Empirical cost") - plt.xticks(np.arange(0, len(expec), step=1)) - plt.grid(True) - plt.xlabel("Square") - plt.ylabel("Cost") - plt.legend() - plt.title("Comparison between the expected cost and the actual cost") - plt.show() - - # Plot 2: Expected number of turns for different policies - policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))] - avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies] - names = ["optimal", "safe", "normal", "risky", "random"] - plt.bar(names, avgn_turns) - plt.xlabel("Policy") - plt.ylabel("Cost") - plt.title("Expected number of turns for different policies") - plt.show() - -# Call make_plots function -if __name__ == "__main__": - make_plots() diff --git a/test_files/tmc_test.py b/test_files/tmc_test.py deleted file mode 100644 index 461afbb..0000000 --- a/test_files/tmc_test.py +++ /dev/null @@ -1,80 +0,0 @@ -import numpy as np -import random as rd - -class TransitionMatrixCalculator: - def __init__(self): - # Probabilités de transition pour les dés "safe", "normal" et "risky" - self.safe_dice = np.array([1/2, 1/2]) - self.normal_dice = np.array([1/3, 1/3, 1/3]) - self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) - - def compute_transition_matrix(self, layout: list, circle: bool): - size = len(layout) - matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe') - matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal') - matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky') - return matrix_safe, matrix_normal, matrix_risky - - def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str): - transition_matrix = np.zeros((size, size)) - dice_type = None - - if matrix_type == 'safe': - dice_type = self.safe_dice - elif matrix_type == 'normal': - dice_type = self.normal_dice - elif matrix_type == 'risky': - dice_type = self.risky_dice - - for k in range(size): - for s, p in enumerate(dice_probs): - k_prime = (k + s) % size if circle else min(size - 1, k + s) - - if k == 9 and s == 1 and matrix_type == 'safe': - k_prime = size - 1 - elif k == 2 and s > 0 and matrix_type == 'safe': - p /= 2 - k_prime = 10 + s - 1 - if layout[k_prime] == 1: - k_prime = 0 - elif layout[k_prime] == 2: - k_prime = max(0, k_prime - 3) - elif k == 7 and s == 3 and matrix_type == 'risky': - k_prime = size - 1 - elif k == 8 and s in [2, 3] and matrix_type == 'risky': - if circle or s == 2: - k_prime = size - 1 - else: - k_prime = 0 - elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky': - if not circle or s == 1: - k_prime = size - 1 - elif circle and s == 2: - k_prime = 0 - elif circle and s == 3: - k_prime = 1 - if layout[k_prime] in [1, 2]: - k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0 - - transition_matrix[k, k_prime] += p * dice_type[s] - - return transition_matrix - - def generate_arrays(self,n): - arrays = [] - for _ in range(n): - array = np.zeros(15, dtype=int) - indices = rd.sample(range(1, 14), 3) - array[indices] = 1, 2, 3 - arrays.append(array) - return arrays - - def tst_transition_matrix(self): - layouts = self.generate_arrays(1000) - for array in layouts: - print(array) - self.compute_transition_matrix(array, False) - self.compute_transition_matrix(array, True) - -#tmc = TransitionMatrixCalculator() -#tmc.tst_transition_matrix() diff --git a/validation.py b/validation.py index 8f94f24..85cd231 100644 --- a/validation.py +++ b/validation.py @@ -19,11 +19,25 @@ class validation: # Define all the strategy self.optimal_strategy = self.optimal_policy - self.safe_strategy = [1]*15 - self.normal_strategy = [2]*15 - self.risky_strategy = [3]*15 + self.safe_strategy = [1]*len(layout) + self.normal_strategy = [2]*len(layout) + self.risky_strategy = [3]*len(layout) self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] + # Définir les coûts par case et par type de dé + self.costs_by_dice_type = { + 'SafeDice': [0] * len(self.layout), + 'NormalDice': [0] * len(self.layout), + 'RiskyDice': [0] * len(self.layout) + } + + # Remplir les coûts pour chaque case en fonction du type de dé + for i in range(len(self.layout)): + if self.layout[i] == 3: + self.costs_by_dice_type['SafeDice'][i] = 1 # Coût par défaut pour le dé sûr + self.costs_by_dice_type['NormalDice'][i] = 2 # Coût par défaut pour le dé normal + self.costs_by_dice_type['RiskyDice'][i] = 3 # Coût par défaut pour le dé risqué + def simulate_game(self, strategy, n_iterations=10000): transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] @@ -40,9 +54,6 @@ class validation: action_index = int(action) - 1 transition_matrix = transition_matrices[action_index] - #print(f"Current state (k): {k}, Action chosen: {action}") - #print(f"Transition matrix: {transition_matrix}") - # Aplatir la matrice de transition en une distribution de probabilité 1D flattened_probs = transition_matrix[k] flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités @@ -62,9 +73,51 @@ class validation: return np.mean(number_turns) + def simulate_state(self, strategy, layout, circle, n_iterations=10000): + # Compute transition matrices for each dice + tmc_instance = tmc() + P_safe = tmc_instance._compute_safe_matrix() + P_normal = tmc_instance._compute_normal_matrix(layout, circle) + P_risky = tmc_instance._compute_risky_matrix(layout, circle) + + transition_matrices = [P_safe, P_normal, P_risky] + number_turns = [] + number_mean = [] + + for _ in range(n_iterations): + number_turns = [] + + for state in range(len(layout) - 1): + total_turns = 0 + k = state # starting state + + while k < len(layout) - 1: + action = strategy[k] # action based on strategy + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) + k = np.random.choice(len(layout), p=flattened_probs) + + if layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + number_mean.append(number_turns) + + # calculate the average number of turns for each state + mean_turns = np.mean(number_mean, axis=0) + + return mean_turns + def play_optimal_strategy(self, n_iterations=10000): - return self.simulate_game(self.optimal_policy, n_iterations) + return self.simulate_game(self.optimal_strategy, n_iterations) def play_dice_strategy(self, dice_choice, n_iterations=10000): @@ -98,34 +151,41 @@ class validation: } - - # Utilisation d'exemple -layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0] -validation = validation(layout, circle=False) - -circle = False # Example circle value -""" -# Create an instance of validation -validator = validation(layout, circle) - -# Use the methods -validator.simulate_game(validator.optimal_strategy, n_iterations=10000) - +layout = [0, 0, 3, 0, 2, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +circle = False +validation_instance = validation(layout, circle) -results = validation.compare_strategies(num_games=10000) -print("Coûts moyens :") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}")""" -optimal_cost = validation.play_optimal_strategy(n_iterations=10000) +optimal_cost = validation_instance.play_optimal_strategy(n_iterations=10000) print("Optimal Strategy Cost:", optimal_cost) -dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000) +dice2_cost = validation_instance.play_dice_strategy('NormalDice', n_iterations=10000) print("Normal Dice Strategy Cost:", dice2_cost) -random_cost = validation.play_random_strategy(n_iterations=10000) +random_cost = validation_instance.play_random_strategy(n_iterations=10000) print("Random Strategy Cost:", random_cost) -strategy_comparison = validation.compare_strategies(num_games=10000) +strategy_comparison = validation_instance.compare_strategies(num_games=10000) print("Strategy Comparison Results:", strategy_comparison) + + +optimal_strategy = validation_instance.optimal_strategy +mean_turns_optimal = validation_instance.simulate_state(optimal_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Optimal Strategy:", mean_turns_optimal) + +safe_dice_strategy = validation_instance.safe_strategy +mean_turns_safe_dice = validation_instance.simulate_state(safe_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Safe Dice Strategy:", mean_turns_safe_dice) + +normal_dice_strategy = validation_instance.normal_strategy +mean_turns_normal_dice = validation_instance.simulate_state(normal_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Normal Dice Strategy:", mean_turns_normal_dice) + +risky_dice_strategy = validation_instance.risky_strategy +mean_turns_risky_dice = validation_instance.simulate_state(risky_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Risky Dice Strategy:", mean_turns_risky_dice) + +random_dice_strategy = validation_instance.random_strategy +mean_turns_random_dice = validation_instance.simulate_state(random_dice_strategy, layout, circle, n_iterations=10000) +print("Mean Turns for Random Dice Strategy:", mean_turns_random_dice) -- GitLab