From 8b42291486bcb2873df134cf2ddb3e2393671277 Mon Sep 17 00:00:00 2001 From: Adrienucl <adrien.payen@student.uclouvain.be> Date: Fri, 3 May 2024 21:51:05 +0200 Subject: [PATCH] back to zero files --- markovDecision.py | 70 -------- simulate.py | 174 ------------------- test_files/Validation_2.py | 75 --------- test_files/markovDecision_testing.py | 51 ------ test_files/md_test.py | 43 ----- test_files/plot.py | 45 ----- test_files/plotting.py | 41 ----- test_files/tmc_test.py | 80 --------- tmc.py | 239 --------------------------- validation.py | 131 --------------- 10 files changed, 949 deletions(-) delete mode 100644 markovDecision.py delete mode 100644 simulate.py delete mode 100644 test_files/Validation_2.py delete mode 100644 test_files/markovDecision_testing.py delete mode 100644 test_files/md_test.py delete mode 100644 test_files/plot.py delete mode 100644 test_files/plotting.py delete mode 100644 test_files/tmc_test.py delete mode 100644 tmc.py delete mode 100644 validation.py diff --git a/markovDecision.py b/markovDecision.py deleted file mode 100644 index 6bd17bc..0000000 --- a/markovDecision.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - -class MarkovDecisionSolver: - def __init__(self, layout : list, circle : bool): - self.Numberk = 15 - self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - self.jail = [i for i, x in enumerate(layout) if x == 3] - self.ValueI = np.zeros(self.Numberk) - self.DiceForStates = np.zeros(self.Numberk - 1) - - def _compute_vi_safe(self, k): - return np.dot(self.safe_dice[k], self.ValueI) - - def _compute_vi_normal(self, k): - vi_normal = np.dot(self.normal_dice[k], self.ValueI) + 0.5 * np.sum(self.normal_dice[k][self.jail]) - return vi_normal - - def _compute_vi_risky(self, k): - vi_risky = np.dot(self.risky_dice[k], self.ValueI) + np.sum(self.risky_dice[k][self.jail]) - return vi_risky - - def solve(self): - i = 0 - while True: - ValueINew = np.zeros(self.Numberk) - i += 1 - - for k in range(self.Numberk - 1): - vi_safe = self._compute_vi_safe(k) - vi_normal = self._compute_vi_normal(k) - vi_risky = self._compute_vi_risky(k) - - ValueINew[k] = 1 + min(vi_safe, vi_normal, vi_risky) - - if ValueINew[k] == 1 + vi_safe: - self.DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal: - self.DiceForStates[k] = 2 - else: - self.DiceForStates[k] = 3 - - if np.allclose(ValueINew, self.ValueI): - self.ValueI = ValueINew - break - - self.ValueI = ValueINew - - Expec = self.ValueI[:-1] - return [Expec, self.DiceForStates] - -def markovDecision(layout : list, circle : bool): - solver = MarkovDecisionSolver(layout, circle) - return solver.solve() - - -# Exemple d'utilisation de la fonction markovDecision avec les paramètres layout et circle -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - -# Résolution du problème avec différents modes de jeu -result_false = markovDecision(layout, circle=False) -print("\nWin as soon as land on or overstep the final square") -print(result_false) - -result_true = markovDecision(layout, circle=True) -print("\nStopping on the square to win") -print(result_true) diff --git a/simulate.py b/simulate.py deleted file mode 100644 index 5f3cdcb..0000000 --- a/simulate.py +++ /dev/null @@ -1,174 +0,0 @@ -import random -import numpy as np -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver - -nSquares = 15 -nSimul = 10000 - -def playOneTurn(diceChoice, curPos, layout, circle, prison): - if curPos == nSquares - 1: - return nSquares - 1, False - - if prison: - return curPos, False - - listDiceResults = [i for i in range(diceChoice + 1)] - result = random.choice(listDiceResults) - - if curPos == 2 and result != 0: - slowLane = random.choice([0, 1]) - if slowLane: - newPos = curPos + result - else: - newPos = curPos + result + 7 - elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)): - newPos = curPos + result + 4 - else: - newPos = curPos + result - - if newPos > nSquares - 1: - if circle: - newPos -= nSquares - else: - return nSquares - 1, True - - newSquare = layout[newPos] - - if diceChoice == 1: - return newPos, False - elif diceChoice == 2: - newSquare = random.choice([0, newSquare]) - - if newSquare == 0: - return newPos, False # nothing happens - elif newSquare == 1: - return 0, False # back to square one - elif newSquare == 2: - if newPos - 3 < 0: - return 0, False # back to square one - return newPos - 3, False # back 3 squares - elif newSquare == 3: - return newPos, True # prison - elif newSquare == 4: - newSquare = random.choice([1, 2, 3]) - if newSquare == 1: - return 0, False # back to square one - elif newSquare == 2: - if newPos - 3 < 0: - return 0, False # back to square one - return newPos - 3, False # back 3 squares - elif newSquare == 3: - return newPos, True # prison - -def playOneGame(layout, circle, policy, start=0): - nTurns = 0 - curPos = start - prison = False - - if circle: - while curPos != nSquares - 1: - newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison) - if newPos > nSquares - 1: - curPos = nSquares - newPos - curPos = newPos - nTurns += 1 - else: - while curPos < nSquares - 1: - newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison) - curPos = newPos - nTurns += 1 - - return nTurns - -def empiric_cost_of_square(layout, circle, policy): - expected_costs = np.zeros(nSquares) - for start_square in range(nSquares): - total_turns = 0 - for _ in range(nSimul): - total_turns += playOneGame(layout, circle, policy, start=start_square) - expected_costs[start_square] = total_turns / nSimul - return expected_costs - -def empirical_results(layout, circle, policy): - avgnTurnsPlayed = 0 - for _ in range(nSimul): - nTurns = playOneGame(layout, circle, policy) - avgnTurnsPlayed += nTurns - return avgnTurnsPlayed / nSimul - -def comparison_theorical_empirical(layout, circle): - solver = MarkovDecisionSolver(layout, circle) - expec, optimal_policy = solver.solve() - actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int)) - - # Plotting both arrays on the same plot - squares = np.arange(len(expec)) - plt.plot(squares, expec, label="Theoretical cost") - plt.plot(squares, actual, label="Empirical cost") - - plt.xticks(np.arange(0, len(expec), step=1)) - plt.grid(True) - plt.xlabel("Square") - plt.ylabel("Cost") - plt.legend() - plt.title("Comparison between the expected cost and the actual cost") - plt.show() - -def comparison_of_policies_total(layout, circle): - solver = MarkovDecisionSolver(layout, circle) - _, optimal_policy = solver.solve() - policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int), - np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3, - np.random.randint(1, 4, size=nSquares)] - - avgnTurns = [empirical_results(layout, circle, policy) for policy in policies] - names = ["optimal", "safe", "normal", "risky", "random"] - - # Creating the bar plot - plt.bar(names, avgnTurns) - - # Adding labels and title - plt.xlabel("Policy") - plt.ylabel("Cost") - plt.title("Expected number of turns by policy") - - # Displaying the plot - plt.show() - -def comparison_of_policies_squares(layout, circle): - solver = MarkovDecisionSolver(layout, circle) - _, optimal_policy = solver.solve() - policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int), - np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3, - np.random.randint(1, 4, size=nSquares)] - - avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies] - - # Generating x-axis values (squares) - squares = np.arange(len(avgnTurns[0])) - - # Plotting both arrays on the same plot - plt.plot(squares, avgnTurns[0], label="Optimal") - plt.plot(squares, avgnTurns[1], label="Safe") - plt.plot(squares, avgnTurns[2], label="Normal") - plt.plot(squares, avgnTurns[3], label="Risky") - plt.plot(squares, avgnTurns[4], label="Random") - - plt.xticks(np.arange(0, len(avgnTurns[0]), step=1)) - plt.grid(True) - plt.xlabel("Square") - plt.ylabel("Cost") - plt.legend() - plt.title("Expected cost for different policies") - plt.show() - -def make_plots(): - layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - circle = False - comparison_theorical_empirical(layout, circle) - # comparison_of_policies_total(layout, circle) - # comparison_of_policies_squares(layout, circle) - -make_plots() diff --git a/test_files/Validation_2.py b/test_files/Validation_2.py deleted file mode 100644 index 1741deb..0000000 --- a/test_files/Validation_2.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -import random as rd -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD - -class Validation: - def __init__(self): - self.tmc_instance = tmc() - - def simulate_games(self, layout, circle, num_games): - results = [] - - for _ in range(num_games): - result = mD(layout, circle) - # Assuming result is a tuple (costs, path) and you want the last element of 'costs' - results.append(result[0][-1]) # Append the number of turns to reach the goal - - return results - - def compare_strategies(self, layout, circle, num_games): - optimal_results = self.simulate_games(layout, circle, num_games) - - suboptimal_strategies = { - "Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation - "Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation - "Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation - "Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation - "Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation - } - - self.plot_results(optimal_results, suboptimal_strategies) - - def plot_results(self, optimal_results, suboptimal_results): - strategies = ["Optimal Strategy"] + list(suboptimal_results.keys()) - avg_costs = [np.mean(optimal_results)] + [np.mean(suboptimal_results[strategy]) for strategy in suboptimal_results] - - plt.figure(figsize=(10, 6)) - plt.bar(strategies, avg_costs, color=['blue'] + ['orange'] * len(suboptimal_results)) - plt.xlabel("Strategies") - plt.ylabel("Average Cost") - plt.title("Comparison of Strategy Performance") - plt.show() - - def run_validation(self, layout, circle, num_games): - solver = mD(layout, circle) - theoretical_cost, optimal_dice_strategy = solver.solve() - - optimal_results = self.simulate_games(layout, circle, num_games) - optimal_average_cost = np.mean(optimal_results) - - suboptimal_strategies = { - "Dice 1 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 1 simulation - "Dice 2 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 2 simulation - "Dice 3 Only": self.simulate_games(layout, circle, num_games), # Replace with Dice 3 simulation - "Mixed Random Strategy": self.simulate_games(layout, circle, num_games), # Replace with mixed random strategy simulation - "Purely Random Choice": self.simulate_games(layout, circle, num_games) # Replace with purely random choice simulation - } - - self.plot_results(optimal_results, suboptimal_strategies) - - print("Theoretical Expected Cost (Value Iteration):", theoretical_cost) - print("Empirical Average Cost (Optimal Strategy):", optimal_average_cost) - - for strategy, results in suboptimal_strategies.items(): - avg_cost = np.mean(results) - print(f"Empirical Average Cost ({strategy}):", avg_cost) - -# Exemple d'utilisation de la classe Validation -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -circle = True -num_games = 1000 - -validation = Validation() -validation.run_validation(layout, circle, num_games) diff --git a/test_files/markovDecision_testing.py b/test_files/markovDecision_testing.py deleted file mode 100644 index 39e9e26..0000000 --- a/test_files/markovDecision_testing.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - - -# testing our TransitionMatrix function based on random layout -# [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -def markovDecision(layout : list, circle : bool) : - - Numberk = 15 # Number of states k on the board - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - - # Initialisation of the variables before the iteration - ValueI = np.zeros(Numberk) # Algorithm of Value iteration - jail = [i for i, x in enumerate(layout) if x == 3] # For all the jailsquare on the board - DiceForStates = np.zeros(Numberk - 1) # Set the each states as O - i = 0 # set the iteration of Value - - while True : - ValueINew = np.zeros(Numberk) - i += 1 # iter + 1 - - for k in range(Numberk - 1) : - vi_safe = np.sum(safe_dice[k] * ValueI) - vi_normal = np.sum(normal_dice[k] * ValueI) + 0.5 * np.sum(normal_dice[k][jail]) - vi_risky = np.sum(risky_dice[k] * ValueI) + np.sum(risky_dice[k][jail]) # 100% chance of triggering the trap - ValueINew[k] = 1 + min(vi_safe,vi_normal,vi_risky) - - if ValueINew[k] == 1 + vi_safe : - DiceForStates[k] = 1 - elif ValueINew[k] == 1 + vi_normal : - DiceForStates[k] = 2 - else : - DiceForStates[k] = 3 - - if np.allclose(ValueINew, ValueI) : - ValueI = ValueINew - break - - ValueI = ValueINew - - Expec = ValueI[:-1] - return [Expec, DiceForStates] - -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -print("\nWin as soon as land on or overstep the final square") -print(markovDecision(layout, False)) -print("\nStopping on the square to win") -print(markovDecision(layout, True)) diff --git a/test_files/md_test.py b/test_files/md_test.py deleted file mode 100644 index 722766b..0000000 --- a/test_files/md_test.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -from tmc import TransitionMatrixCalculator as tmc - -def markov_decision(layout: list, circle: bool): - Numberk = 15 - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - - jail = [i for i, x in enumerate(layout) if x == 3] - - def compute_value(v, dice_matrix): - return np.sum(dice_matrix * v) + (0.5 if dice_matrix is normal_dice else 1) * np.sum(dice_matrix[jail]) - - - value = np.zeros(Numberk) - dice_for_states = np.zeros(Numberk - 1) - - while True: - new_value = np.zeros(Numberk) - - for k in range(Numberk - 1): - vi_safe = compute_value(value, safe_dice[k]) - vi_normal = compute_value(value, normal_dice[k]) - vi_risky = compute_value(value, risky_dice[k]) - - new_value[k] = 1 + min(vi_safe, vi_normal, vi_risky) - dice_for_states[k] = 1 if new_value[k] == 1 + vi_safe else (2 if new_value[k] == 1 + vi_normal else 3) - - if np.allclose(new_value, value): - value = new_value - break - - value = new_value - - return value[:-1], dice_for_states - -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -print("\nWin as soon as land on or overstep the final square") -print(markov_decision(layout, False)) -print("\nStopping on the square to win") -print(markov_decision(layout, True)) diff --git a/test_files/plot.py b/test_files/plot.py deleted file mode 100644 index 9de7974..0000000 --- a/test_files/plot.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import random as rd -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD -from validation import Validation - -def make_plots(): - layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] - circle = False - validation = Validation(layout, circle) - expec, optimal_policy = mD(layout, circle).solve() - - # Plot 1: Theoretical vs Empirical Cost - expected_costs = np.zeros(len(expec)) - for start_square in range(len(expec)): - total_turns = 0 - for _ in range(10000): - total_turns += validation.play_one_game(start_square) - expected_costs[start_square] = total_turns / 10000 - - squares = np.arange(len(expec)) - plt.plot(squares, expec, label="Theoretical cost") - plt.plot(squares, expected_costs, label="Empirical cost") - plt.xticks(np.arange(0, len(expec), step=1)) - plt.grid(True) - plt.xlabel("Square") - plt.ylabel("Cost") - plt.legend() - plt.title("Comparison between the expected cost and the actual cost") - plt.show() - - # Plot 2: Expected number of turns for different policies - policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))] - avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies] - names = ["optimal", "safe", "normal", "risky", "random"] - plt.bar(names, avgn_turns) - plt.xlabel("Policy") - plt.ylabel("Cost") - plt.title("Expected number of turns for different policies") - plt.show() - -# Call make_plots function -if __name__ == "__main__": - make_plots() diff --git a/test_files/plotting.py b/test_files/plotting.py deleted file mode 100644 index 5ce6476..0000000 --- a/test_files/plotting.py +++ /dev/null @@ -1,41 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -from simulate import Simulate as sim -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD - -def get_results(layouts, circle, n_iterations=100): - results_markov = [] - results_safe = [] - results_normal = [] - results_risky = [] - results_random = [] - - for layout in layouts: - # Compute optimal policy - expec, policy = mD(layout, circle).solve() - - # Simulate game using Simulate class - sim_instance = sim(layout, circle) - result_markov = sim_instance.simulate_game(policy, n_iterations) - results_markov.append(result_markov) - - # Simulate with fixed strategies using Simulate class - results_safe.append(sim_instance.simulate_game([1]*15, n_iterations)) - results_normal.append(sim_instance.simulate_game([2]*15, n_iterations)) - results_risky.append(sim_instance.simulate_game([3]*15, n_iterations)) - results_random.append(sim_instance.simulate_game(np.random.randint(1, 4, size=15), n_iterations)) - - return results_markov, results_safe, results_normal, results_risky, results_random - -# Utilisation de la fonction get_results pour obtenir les résultats -layouts = [[0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]] # Exemple de layouts à utiliser -circle = True # Exemple de valeur pour circle -results_markov, results_safe, results_normal, results_risky, results_random = get_results(layouts, circle, n_iterations=100) - -# Imprimer les résultats (vous pouvez les enregistrer dans un fichier si nécessaire) -print("Results Markov:", results_markov) -print("Results Safe:", results_safe) -print("Results Normal:", results_normal) -print("Results Risky:", results_risky) -print("Results Random:", results_random) diff --git a/test_files/tmc_test.py b/test_files/tmc_test.py deleted file mode 100644 index 461afbb..0000000 --- a/test_files/tmc_test.py +++ /dev/null @@ -1,80 +0,0 @@ -import numpy as np -import random as rd - -class TransitionMatrixCalculator: - def __init__(self): - # Probabilités de transition pour les dés "safe", "normal" et "risky" - self.safe_dice = np.array([1/2, 1/2]) - self.normal_dice = np.array([1/3, 1/3, 1/3]) - self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) - - def compute_transition_matrix(self, layout: list, circle: bool): - size = len(layout) - matrix_safe = self._compute_matrix(layout, self.safe_dice, size, circle, 'safe') - matrix_normal = self._compute_matrix(layout, self.normal_dice, size, circle, 'normal') - matrix_risky = self._compute_matrix(layout, self.risky_dice, size, circle, 'risky') - return matrix_safe, matrix_normal, matrix_risky - - def _compute_matrix(self, layout: list, dice_probs: list, size: int, circle: bool, matrix_type: str): - transition_matrix = np.zeros((size, size)) - dice_type = None - - if matrix_type == 'safe': - dice_type = self.safe_dice - elif matrix_type == 'normal': - dice_type = self.normal_dice - elif matrix_type == 'risky': - dice_type = self.risky_dice - - for k in range(size): - for s, p in enumerate(dice_probs): - k_prime = (k + s) % size if circle else min(size - 1, k + s) - - if k == 9 and s == 1 and matrix_type == 'safe': - k_prime = size - 1 - elif k == 2 and s > 0 and matrix_type == 'safe': - p /= 2 - k_prime = 10 + s - 1 - if layout[k_prime] == 1: - k_prime = 0 - elif layout[k_prime] == 2: - k_prime = max(0, k_prime - 3) - elif k == 7 and s == 3 and matrix_type == 'risky': - k_prime = size - 1 - elif k == 8 and s in [2, 3] and matrix_type == 'risky': - if circle or s == 2: - k_prime = size - 1 - else: - k_prime = 0 - elif k == 9 and s in [1, 2, 3] and matrix_type == 'risky': - if not circle or s == 1: - k_prime = size - 1 - elif circle and s == 2: - k_prime = 0 - elif circle and s == 3: - k_prime = 1 - if layout[k_prime] in [1, 2]: - k_prime = max(0, k_prime - 3) if layout[k_prime] == 2 else 0 - - transition_matrix[k, k_prime] += p * dice_type[s] - - return transition_matrix - - def generate_arrays(self,n): - arrays = [] - for _ in range(n): - array = np.zeros(15, dtype=int) - indices = rd.sample(range(1, 14), 3) - array[indices] = 1, 2, 3 - arrays.append(array) - return arrays - - def tst_transition_matrix(self): - layouts = self.generate_arrays(1000) - for array in layouts: - print(array) - self.compute_transition_matrix(array, False) - self.compute_transition_matrix(array, True) - -#tmc = TransitionMatrixCalculator() -#tmc.tst_transition_matrix() diff --git a/tmc.py b/tmc.py deleted file mode 100644 index 1c8ef08..0000000 --- a/tmc.py +++ /dev/null @@ -1,239 +0,0 @@ -import numpy as np -import random as rd - -class TransitionMatrixCalculator: - def __init__(self): - # Initialisation des matrices de transition pour les dés "safe", "normal" et "risky" - self.matrix_safe = np.zeros((15, 15)) - self.matrix_normal = np.zeros((15, 15)) - self.matrix_risky = np.zeros((15, 15)) - - # Probability to go from state k to k' - self.safe_dice = np.array([1/2, 1/2]) - self.normal_dice = np.array([1/3, 1/3, 1/3]) - self.risky_dice = np.array([1/4, 1/4, 1/4, 1/4]) - - def compute_transition_matrix(self, layout, circle=False): - self.matrix_safe.fill(0) - self.matrix_normal.fill(0) - self.matrix_risky.fill(0) - - self._compute_safe_matrix() - self._compute_normal_matrix(layout, circle) - self._compute_risky_matrix(layout, circle) - - return self.matrix_safe, self.matrix_normal, self.matrix_risky - - - def _compute_safe_matrix(self): - for k in range(0,15): - for s, p in enumerate(self.safe_dice): - if k == 9 and s == 1: - k_prime = 14 - self.matrix_safe[k,k_prime] += p - elif k == 2 and s > 0: - p /= 2 - k_prime = 10 - self.matrix_safe[k,k_prime] += p - k_prime = 3 - self.matrix_safe[k,k_prime] += p - else: - k_prime = k + s - k_prime = min(14, k_prime) - self.matrix_safe[k,k_prime] += p - - return self.matrix_safe - - def _compute_normal_matrix(self, layout, circle): - for k in range(0, 15): - for s, p in enumerate(self.normal_dice): - if k == 8 and s == 2: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - continue - elif k == 9 and s in [1, 2]: - if not circle or s == 1: - k_prime = 14 - self.matrix_normal[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue - - # handle the fast lane - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 3 + (s - 1) # rebalance the step before with s > 0 - if layout[k_prime] in [0, 3]: # normal or prison square - self.matrix_normal[k,k_prime] += p - elif layout[k_prime] == 1: # handle type 1 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = 0 - self.matrix_normal[k,k_prime] += p / 2 - elif layout[k_prime] == 2: # handle type 2 trap - self.matrix_normal[k,k_prime] += p / 2 - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p / 2 - continue - - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) # modulo - if layout[k_prime] in [1, 2]: - p /= 2 - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_normal[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_normal[k,k_prime] += p - continue - self.matrix_normal[k,k_prime] += p - return self.matrix_normal - - def _compute_risky_matrix(self, layout, circle): - for k in range(0, 15): - for s, p in enumerate(self.risky_dice): - if k == 7 and s == 3: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - continue - elif k == 8 and s in [2, 3]: - if not circle or s == 2: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif k == 9 and s in [1, 2, 3]: - if not circle or s == 1: - k_prime = 14 - self.matrix_risky[k,k_prime] += p - elif circle and s == 2: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif circle and s == 3: - k_prime = 1 - if layout[k_prime] != 0: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - self.matrix_risky[k,k_prime] += p - continue - continue - - if k == 2 and s > 0: - p /= 2 - k_prime = 10 + (s - 1) - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - else: - self.matrix_risky[k,k_prime] += p - k_prime = 3 + (s - 1) - self.matrix_risky[k,k_prime] += p - continue - - k_prime = k + s - k_prime = k_prime % 15 if circle else min(14, k_prime) - if layout[k_prime] in [1, 2]: - if layout[k_prime] == 1: - k_prime = 0 - self.matrix_risky[k,k_prime] += p - continue - elif layout[k_prime] == 2: - if k_prime == 10: - k_prime = 0 - elif k_prime == 11: - k_prime = 1 - elif k_prime == 12: - k_prime = 2 - else: - k_prime = max(0, k_prime - 3) - self.matrix_risky[k,k_prime] += p - continue - self.matrix_risky[k,k_prime] += p - return self.matrix_risky - - """ - def generate_arrays(self,n): - # Initialize an empty list to store all the arrays - arrays = [] - - for _ in range(n): - # Initialize a zero array of size 15 - array = np.zeros(15, dtype=int) - - # Generate 3 random indices between 1 and 13 (exclusive) - indices = rd.sample(range(1, 14), 3) - - # Assign the values 1, 2 and 3 to the randomly generated indices - array[indices] = 1, 2, 3 - - # Append the generated array to the list - arrays.append(array) - - return arrays - - # create a function that test the transition matrix for different layout each time with one trap of each sort - def tst_transition_matrix(self): - # create a list of 100 different layouts - layouts = self.generate_arrays(100) - for array in layouts: - print(array) - self.compute_transition_matrix(array, False) - self.compute_transition_matrix(array, True) - - - - - def tst_transition_matrix(self): - # create a list of 100 different layouts - layout = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 0, 0] - - print(self.compute_transition_matrix(layout, False)) - print(self.compute_transition_matrix(layout, True)) - - -tmc = TransitionMatrixCalculator() -tmc.tst_transition_matrix() -""" \ No newline at end of file diff --git a/validation.py b/validation.py deleted file mode 100644 index 8f94f24..0000000 --- a/validation.py +++ /dev/null @@ -1,131 +0,0 @@ -import random as rd -import numpy as np -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD - -class validation: - def __init__(self, layout, circle=False): - - # import from other .PY - self.layout = layout - self.circle = circle - self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - solver = mD(self.layout, self.circle) - self.expec, self.optimal_policy = solver.solve() - - # Define all the strategy - self.optimal_strategy = self.optimal_policy - self.safe_strategy = [1]*15 - self.normal_strategy = [2]*15 - self.risky_strategy = [3]*15 - self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] - - - def simulate_game(self, strategy, n_iterations=10000): - transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] - number_turns = [] - - for _ in range(n_iterations): - total_turns = 0 - k = 0 # état initial - - while k < len(self.layout) - 1: - action = strategy[k] # action selon la stratégie - - # Convertir action en entier pour accéder à l'indice correct dans transition_matrices - action_index = int(action) - 1 - transition_matrix = transition_matrices[action_index] - - #print(f"Current state (k): {k}, Action chosen: {action}") - #print(f"Transition matrix: {transition_matrix}") - - # Aplatir la matrice de transition en une distribution de probabilité 1D - flattened_probs = transition_matrix[k] - flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités - - # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie - k = np.random.choice(len(self.layout), p=flattened_probs) - - # Mise à jour du nombre de tours en fonction de l'état actuel - if self.layout[k] == 3 and action == 2: - total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[k] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - number_turns.append(total_turns) - - return np.mean(number_turns) - - - def play_optimal_strategy(self, n_iterations=10000): - return self.simulate_game(self.optimal_policy, n_iterations) - - - def play_dice_strategy(self, dice_choice, n_iterations=10000): - if dice_choice == 'SafeDice': - strategy = self.safe_strategy - elif dice_choice == 'NormalDice': - strategy = self.normal_strategy - elif dice_choice == 'RiskyDice': - strategy = self.risky_strategy - else: - raise ValueError("Invalid dice choice") - - return self.simulate_game(strategy, n_iterations) - - def play_random_strategy(self, n_iterations=10000): - return self.simulate_game(self.random_strategy, n_iterations) - - def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) - dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) - dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) - dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) - random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) - - return { - 'Optimal': optimal_cost, - 'SafeDice': dice1_cost, - 'NormalDice': dice2_cost, - 'RiskyDice': dice3_cost, - 'Random': random_cost - } - - - - -# Utilisation d'exemple -layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0] -validation = validation(layout, circle=False) - -circle = False # Example circle value -""" -# Create an instance of validation -validator = validation(layout, circle) - -# Use the methods -validator.simulate_game(validator.optimal_strategy, n_iterations=10000) - - -results = validation.compare_strategies(num_games=10000) -print("Coûts moyens :") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}")""" - -optimal_cost = validation.play_optimal_strategy(n_iterations=10000) -print("Optimal Strategy Cost:", optimal_cost) - -dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000) -print("Normal Dice Strategy Cost:", dice2_cost) - -random_cost = validation.play_random_strategy(n_iterations=10000) -print("Random Strategy Cost:", random_cost) - -strategy_comparison = validation.compare_strategies(num_games=10000) -print("Strategy Comparison Results:", strategy_comparison) -- GitLab