From f78dbd5f5f743ae2a93882236f0287966a6cf97c Mon Sep 17 00:00:00 2001 From: Adrienucl <adrien.payen@student.uclouvain.be> Date: Thu, 2 May 2024 21:18:44 +0200 Subject: [PATCH] update validation.py --- .DS_Store | Bin 6148 -> 6148 bytes markovDecision.py | 2 +- test_files/validation.py | 173 --------------------------------------- tmc.py | 2 +- validation.py | 131 +++++++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 175 deletions(-) delete mode 100644 test_files/validation.py create mode 100644 validation.py diff --git a/.DS_Store b/.DS_Store index 837b005cf49930c5ead4ac78ca4b5abed2e970f5..895bd9eae285819cae90199894867d4ed1ce3958 100644 GIT binary patch delta 36 scmZoMXfc@J&&ahgU^g=(*JK`+?#)M8?lEq@%cjLRu_0qKJI7ys0OGI=t^fc4 delta 79 zcmZoMXfc@J&&abeU^g=(&tx8!ZccWF0)`xhe1?+A0c;{njIx_MSl%*<@-mb$Bm%`V b8B&0B36Rcb$b+cp-kio}$+(%F<1aq|5PK76 diff --git a/markovDecision.py b/markovDecision.py index 1383600..6bd17bc 100644 --- a/markovDecision.py +++ b/markovDecision.py @@ -5,7 +5,7 @@ class MarkovDecisionSolver: def __init__(self, layout : list, circle : bool): self.Numberk = 15 self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix(layout, circle) + self.safe_dice = self.tmc_instance._compute_safe_matrix() self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) self.jail = [i for i, x in enumerate(layout) if x == 3] diff --git a/test_files/validation.py b/test_files/validation.py deleted file mode 100644 index de8dd96..0000000 --- a/test_files/validation.py +++ /dev/null @@ -1,173 +0,0 @@ -import random -import numpy as np -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator -from markovDecision import MarkovDecisionSolver as mD - -class Validation: - def __init__(self, layout, circle=False): - self.layout = layout - self.circle = circle - self.tmc_instance = TransitionMatrixCalculator() - - # Compute optimal value iteration results - solver = mD(self.layout, self.circle) - self.optimal_values, self.optimal_dice = solver.solve() - - def simulate_game(self, strategy='optimal', num_games=1000): - total_turns = 0 - - for _ in range(num_games): - if strategy == 'Optimal': - turns = self.play_optimal_strategy() - elif strategy == 'SafeDice': - turns = self.play_dice_strategy(1) - elif strategy == 'NormalDice': - turns = self.play_dice_strategy(2) - elif strategy == 'RiskyDice': - turns = self.play_dice_strategy(3) - elif strategy == 'Random': - turns = self.play_random_strategy() - - total_turns += turns - - average_turns = total_turns / num_games - return average_turns - - def play_optimal_strategy(self): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state - current_state += optimal_action # Move to the next state based on the optimal action - turns += 1 - - return turns - - def play_dice_strategy(self, dice): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - # Always use the specified dice type (1, 2, or 3) - current_state += dice - turns += 1 - - return turns - - def play_random_strategy(self): - current_state = 0 # Start from the initial state - turns = 0 - - while current_state < len(self.layout) - 1: - # Choose a random dice roll between 1 and 3 - dice_roll = np.random.randint(1, 4) - current_state += dice_roll - turns += 1 - - return turns - - def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games) - dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games) - dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games) - dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games) - random_cost = self.simulate_game(strategy='Random', num_games=num_games) - - return { - 'Optimal': optimal_cost, - 'SafeDice': dice1_cost, - 'NormalDice': dice2_cost, - 'RiskyDice': dice3_cost, - 'Random': random_cost - } - - def play_one_turn(self, dice_choice, cur_pos, prison): - if cur_pos == len(self.layout) - 1: - return len(self.layout) - 1, False - - if prison: - return cur_pos, False - - # Convert dice_choice to integer to avoid TypeError - dice_choice = int(dice_choice) - - list_dice_results = [i for i in range(dice_choice + 1)] - result = random.choice(list_dice_results) - - if cur_pos == 2 and result != 0: - slow_lane = random.choice([0, 1]) - if slow_lane: - new_pos = cur_pos + result - else: - new_pos = cur_pos + result + 7 - - elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))): - new_pos = cur_pos + result + 4 - - else: - new_pos = cur_pos + result - - if new_pos > len(self.layout) - 1: - if self.circle: - new_pos -= len(self.layout) - else: - return len(self.layout) - 1, True - - new_square = self.layout[new_pos] - - if dice_choice == 1: - return new_pos, False - - elif dice_choice == 2: - new_square = random.choice([0, new_square]) - - if new_square == 0: - return new_pos, False # nothing happens - elif new_square == 1: - return 0, False # back to square one - elif new_square == 2: - if new_pos - 3 < 0: - return 0, False # back to square one - return new_pos - 3, False # back 3 squares - elif new_square == 3: - return new_pos, True # prison - - - def play_one_game(self, start=0): - n_turns = 0 - cur_pos = start - prison = False - - if self.circle: - while cur_pos != len(self.layout) - 1: - new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) - if new_pos > len(self.layout) - 1: - cur_pos = len(self.layout) - new_pos - cur_pos = new_pos - n_turns += 1 - else: - while cur_pos < len(self.layout) - 1: - new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) - cur_pos = new_pos - n_turns += 1 - - return n_turns - - def empirical_results(self): - total_turns_played = 0 - for _ in range(10000): - n_turns = self.play_one_game() - total_turns_played += n_turns - - return total_turns_played / 10000 - - -# Utilisation d'exemple -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -validation = Validation(layout, circle=False) -results = validation.compare_strategies(num_games=10000) -print("Coûts moyens :") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}") diff --git a/tmc.py b/tmc.py index 6f78a3d..5941ed4 100644 --- a/tmc.py +++ b/tmc.py @@ -24,7 +24,7 @@ class TransitionMatrixCalculator: return self.matrix_safe, self.matrix_normal, self.matrix_risky - def _compute_safe_matrix(self, layout, circle): + def _compute_safe_matrix(self): for k in range(0,15): for s, p in enumerate(self.safe_dice): if k == 9 and s == 1: diff --git a/validation.py b/validation.py new file mode 100644 index 0000000..8f94f24 --- /dev/null +++ b/validation.py @@ -0,0 +1,131 @@ +import random as rd +import numpy as np +import matplotlib.pyplot as plt +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD + +class validation: + def __init__(self, layout, circle=False): + + # import from other .PY + self.layout = layout + self.circle = circle + self.tmc_instance = tmc() + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + solver = mD(self.layout, self.circle) + self.expec, self.optimal_policy = solver.solve() + + # Define all the strategy + self.optimal_strategy = self.optimal_policy + self.safe_strategy = [1]*15 + self.normal_strategy = [2]*15 + self.risky_strategy = [3]*15 + self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] + + + def simulate_game(self, strategy, n_iterations=10000): + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + number_turns = [] + + for _ in range(n_iterations): + total_turns = 0 + k = 0 # état initial + + while k < len(self.layout) - 1: + action = strategy[k] # action selon la stratégie + + # Convertir action en entier pour accéder à l'indice correct dans transition_matrices + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + + #print(f"Current state (k): {k}, Action chosen: {action}") + #print(f"Transition matrix: {transition_matrix}") + + # Aplatir la matrice de transition en une distribution de probabilité 1D + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + + # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie + k = np.random.choice(len(self.layout), p=flattened_probs) + + # Mise à jour du nombre de tours en fonction de l'état actuel + if self.layout[k] == 3 and action == 2: + total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 + elif self.layout[k] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 + + number_turns.append(total_turns) + + return np.mean(number_turns) + + + def play_optimal_strategy(self, n_iterations=10000): + return self.simulate_game(self.optimal_policy, n_iterations) + + + def play_dice_strategy(self, dice_choice, n_iterations=10000): + if dice_choice == 'SafeDice': + strategy = self.safe_strategy + elif dice_choice == 'NormalDice': + strategy = self.normal_strategy + elif dice_choice == 'RiskyDice': + strategy = self.risky_strategy + else: + raise ValueError("Invalid dice choice") + + return self.simulate_game(strategy, n_iterations) + + def play_random_strategy(self, n_iterations=10000): + return self.simulate_game(self.random_strategy, n_iterations) + + def compare_strategies(self, num_games=1000): + optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) + dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) + dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) + dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) + random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) + + return { + 'Optimal': optimal_cost, + 'SafeDice': dice1_cost, + 'NormalDice': dice2_cost, + 'RiskyDice': dice3_cost, + 'Random': random_cost + } + + + + +# Utilisation d'exemple +layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0] +validation = validation(layout, circle=False) + +circle = False # Example circle value +""" +# Create an instance of validation +validator = validation(layout, circle) + +# Use the methods +validator.simulate_game(validator.optimal_strategy, n_iterations=10000) + + +results = validation.compare_strategies(num_games=10000) +print("Coûts moyens :") +for strategy, cost in results.items(): + print(f"{strategy}: {cost}")""" + +optimal_cost = validation.play_optimal_strategy(n_iterations=10000) +print("Optimal Strategy Cost:", optimal_cost) + +dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000) +print("Normal Dice Strategy Cost:", dice2_cost) + +random_cost = validation.play_random_strategy(n_iterations=10000) +print("Random Strategy Cost:", random_cost) + +strategy_comparison = validation.compare_strategies(num_games=10000) +print("Strategy Comparison Results:", strategy_comparison) -- GitLab