diff --git a/.DS_Store b/.DS_Store index 837b005cf49930c5ead4ac78ca4b5abed2e970f5..7e1cf6af650f4856839383fec3ac7e1495eaf13e 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/plot.py b/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..0501d755c1dcdef3c82d76bd64098cb6c48c77eb --- /dev/null +++ b/plot.py @@ -0,0 +1,48 @@ +import matplotlib.pyplot as plt +from simulate import Validation as Val +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver as mD +import random as rd +import numpy as np + + +def plot_results(layouts, circle, n_iterations=100): + results_markov = [] + results_safe = [] + results_normal = [] + results_risky = [] + results_random = [] + + for layout in layouts: + # Compute optimal policy + expec, policy = mD(layout, circle) + + # Simulate game + result_markov = Val.simulate_game(policy, layout, circle, n_iterations) + results_markov.append(result_markov) + + result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations) + results_safe.append(result_safe) + + result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations) + results_normal.append(result_normal) + + result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations) + results_risky.append(result_risky) + + result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations) + results_random.append(result_random) + + # Plot the results + plt.figure(figsize=(12, 8)) + plt.plot(range(len(layouts)), results_markov, label='Markov') + plt.plot(range(len(layouts)), results_safe, label='Safe') + plt.plot(range(len(layouts)), results_normal, label='Normal') + plt.plot(range(len(layouts)), results_risky, label='Risky') + plt.plot(range(len(layouts)), results_random, label='Random') + + plt.xticks(range(len(layouts)), range(len(layouts))) + plt.xlabel('Layout number', fontsize=13) + plt.ylabel('Average number of turns', fontsize=13) + plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1) + plt.show() diff --git a/simulate.py b/simulate.py index bbaa211f8998013a57c5536726a1a0c083a5baf3..49d86502a4cce933f81439fd72c165c6f6a6f902 100644 --- a/simulate.py +++ b/simulate.py @@ -1,30 +1,45 @@ -import random as rd -import numpy as np from tmc import TransitionMatrixCalculator as tmc from markovDecision import MarkovDecisionSolver as mD +import random as rd +import numpy as np - -class Simulate: - def __init__(self, layout, circle): +class Validation: + def __init__(self, layout, circle=False): self.layout = layout self.circle = circle + + # Compute transition matrices using TransitionMatrixCalculator self.tmc_instance = tmc() - self.safe_dice, self.normal_dice, self.risky_dice = self.tmc_instance.compute_transition_matrix(layout, circle) - self.transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + self.safe_dice = self.tmc_instance._compute_safe_matrix() + self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) + self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) + + # Solve Markov Decision Problem + solver = mD(self.layout, self.circle) + self.expec, self.optimal_policy = solver.solve() + + # Define all the strategies + self.optimal_strategy = self.optimal_policy + self.safe_strategy = [1] * 15 + self.normal_strategy = [2] * 15 + self.risky_strategy = [3] * 15 + self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)] def simulate_game(self, strategy, n_iterations=10000): + # Compute transition matrices for each dice + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] number_turns = [] + for _ in range(n_iterations): total_turns = 0 state = 0 # initial state - while state < len(self.layout) - 1: # until goal state is reached action = strategy[state] # get action according to strategy - transition_matrix = self.transition_matrices[int(action) - 1] + transition_matrix = transition_matrices[int(action) - 1] state = np.random.choice(len(self.layout), p=transition_matrix[state]) if self.layout[state] == 3 and action == 2: - total_turns += rd.choice([1, 2], p=[0.5, 0.5]) + total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) elif self.layout[state] == 3 and action == 3: total_turns += 2 else: @@ -35,27 +50,29 @@ class Simulate: return np.mean(number_turns) def simulate_state(self, strategy, n_iterations=10000): - number_mean = [] + # Compute transition matrices for each dice + transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] + number_turns = [] + for _ in range(n_iterations): - number_turns = [] + turns_per_state = [] + state = 0 - for state in range(len(self.layout) - 1): + while state < len(self.layout) - 1: total_turns = 0 + action = strategy[state] + transition_matrix = transition_matrices[int(action) - 1] + state = np.random.choice(len(self.layout), p=transition_matrix[state]) - while state < len(self.layout) - 1: - print("Current state:", state) - print("Transition matrix:", transition_matrix[state]) - state = np.random.choice(len(self.layout), p=transition_matrix[state]) - - if self.layout[state] == 3 and action == 2: - total_turns += rd.choice([1, 2], p=[0.5, 0.5]) - elif self.layout[state] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 + if self.layout[state] == 3 and action == 2: + total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) + elif self.layout[state] == 3 and action == 3: + total_turns += 2 + else: + total_turns += 1 - number_turns.append(total_turns) + turns_per_state.append(total_turns) - number_mean.append(number_turns) + number_turns.append(turns_per_state) - return np.mean(number_mean, axis=0) + return np.mean(number_turns, axis=0)