diff --git a/plot.py b/plot.py deleted file mode 100644 index 0501d755c1dcdef3c82d76bd64098cb6c48c77eb..0000000000000000000000000000000000000000 --- a/plot.py +++ /dev/null @@ -1,48 +0,0 @@ -import matplotlib.pyplot as plt -from simulate import Validation as Val -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD -import random as rd -import numpy as np - - -def plot_results(layouts, circle, n_iterations=100): - results_markov = [] - results_safe = [] - results_normal = [] - results_risky = [] - results_random = [] - - for layout in layouts: - # Compute optimal policy - expec, policy = mD(layout, circle) - - # Simulate game - result_markov = Val.simulate_game(policy, layout, circle, n_iterations) - results_markov.append(result_markov) - - result_safe = Val.simulate_game([1]*15, layout, circle, n_iterations) - results_safe.append(result_safe) - - result_normal = Val.simulate_game([2]*15, layout, circle, n_iterations) - results_normal.append(result_normal) - - result_risky = Val.simulate_game([3]*15, layout, circle, n_iterations) - results_risky.append(result_risky) - - result_random = Val.simulate_game(np.random.randint(1, 4, size=15), layout, circle, n_iterations) - results_random.append(result_random) - - # Plot the results - plt.figure(figsize=(12, 8)) - plt.plot(range(len(layouts)), results_markov, label='Markov') - plt.plot(range(len(layouts)), results_safe, label='Safe') - plt.plot(range(len(layouts)), results_normal, label='Normal') - plt.plot(range(len(layouts)), results_risky, label='Risky') - plt.plot(range(len(layouts)), results_random, label='Random') - - plt.xticks(range(len(layouts)), range(len(layouts))) - plt.xlabel('Layout number', fontsize=13) - plt.ylabel('Average number of turns', fontsize=13) - plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1) - plt.show() diff --git a/simulate.py b/simulate.py index 49d86502a4cce933f81439fd72c165c6f6a6f902..5f3cdcb99a9cf86b17a39a4e54142b180ed44a68 100644 --- a/simulate.py +++ b/simulate.py @@ -1,78 +1,174 @@ -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD -import random as rd +import random import numpy as np - -class Validation: - def __init__(self, layout, circle=False): - self.layout = layout - self.circle = circle - - # Compute transition matrices using TransitionMatrixCalculator - self.tmc_instance = tmc() - self.safe_dice = self.tmc_instance._compute_safe_matrix() - self.normal_dice = self.tmc_instance._compute_normal_matrix(layout, circle) - self.risky_dice = self.tmc_instance._compute_risky_matrix(layout, circle) - - # Solve Markov Decision Problem - solver = mD(self.layout, self.circle) - self.expec, self.optimal_policy = solver.solve() - - # Define all the strategies - self.optimal_strategy = self.optimal_policy - self.safe_strategy = [1] * 15 - self.normal_strategy = [2] * 15 - self.risky_strategy = [3] * 15 - self.random_strategy = [rd.choice([1, 2, 3]) for _ in range(15)] - - def simulate_game(self, strategy, n_iterations=10000): - # Compute transition matrices for each dice - transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] - number_turns = [] - - for _ in range(n_iterations): - total_turns = 0 - state = 0 # initial state - while state < len(self.layout) - 1: # until goal state is reached - action = strategy[state] # get action according to strategy - transition_matrix = transition_matrices[int(action) - 1] - state = np.random.choice(len(self.layout), p=transition_matrix[state]) - - if self.layout[state] == 3 and action == 2: - total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) - elif self.layout[state] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - number_turns.append(total_turns) - - return np.mean(number_turns) - - def simulate_state(self, strategy, n_iterations=10000): - # Compute transition matrices for each dice - transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] - number_turns = [] - - for _ in range(n_iterations): - turns_per_state = [] - state = 0 - - while state < len(self.layout) - 1: - total_turns = 0 - action = strategy[state] - transition_matrix = transition_matrices[int(action) - 1] - state = np.random.choice(len(self.layout), p=transition_matrix[state]) - - if self.layout[state] == 3 and action == 2: - total_turns += np.random.choice([1, 2], p=[0.5, 0.5]) - elif self.layout[state] == 3 and action == 3: - total_turns += 2 - else: - total_turns += 1 - - turns_per_state.append(total_turns) - - number_turns.append(turns_per_state) - - return np.mean(number_turns, axis=0) +import matplotlib.pyplot as plt +from tmc import TransitionMatrixCalculator as tmc +from markovDecision import MarkovDecisionSolver + +nSquares = 15 +nSimul = 10000 + +def playOneTurn(diceChoice, curPos, layout, circle, prison): + if curPos == nSquares - 1: + return nSquares - 1, False + + if prison: + return curPos, False + + listDiceResults = [i for i in range(diceChoice + 1)] + result = random.choice(listDiceResults) + + if curPos == 2 and result != 0: + slowLane = random.choice([0, 1]) + if slowLane: + newPos = curPos + result + else: + newPos = curPos + result + 7 + elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)): + newPos = curPos + result + 4 + else: + newPos = curPos + result + + if newPos > nSquares - 1: + if circle: + newPos -= nSquares + else: + return nSquares - 1, True + + newSquare = layout[newPos] + + if diceChoice == 1: + return newPos, False + elif diceChoice == 2: + newSquare = random.choice([0, newSquare]) + + if newSquare == 0: + return newPos, False # nothing happens + elif newSquare == 1: + return 0, False # back to square one + elif newSquare == 2: + if newPos - 3 < 0: + return 0, False # back to square one + return newPos - 3, False # back 3 squares + elif newSquare == 3: + return newPos, True # prison + elif newSquare == 4: + newSquare = random.choice([1, 2, 3]) + if newSquare == 1: + return 0, False # back to square one + elif newSquare == 2: + if newPos - 3 < 0: + return 0, False # back to square one + return newPos - 3, False # back 3 squares + elif newSquare == 3: + return newPos, True # prison + +def playOneGame(layout, circle, policy, start=0): + nTurns = 0 + curPos = start + prison = False + + if circle: + while curPos != nSquares - 1: + newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison) + if newPos > nSquares - 1: + curPos = nSquares - newPos + curPos = newPos + nTurns += 1 + else: + while curPos < nSquares - 1: + newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison) + curPos = newPos + nTurns += 1 + + return nTurns + +def empiric_cost_of_square(layout, circle, policy): + expected_costs = np.zeros(nSquares) + for start_square in range(nSquares): + total_turns = 0 + for _ in range(nSimul): + total_turns += playOneGame(layout, circle, policy, start=start_square) + expected_costs[start_square] = total_turns / nSimul + return expected_costs + +def empirical_results(layout, circle, policy): + avgnTurnsPlayed = 0 + for _ in range(nSimul): + nTurns = playOneGame(layout, circle, policy) + avgnTurnsPlayed += nTurns + return avgnTurnsPlayed / nSimul + +def comparison_theorical_empirical(layout, circle): + solver = MarkovDecisionSolver(layout, circle) + expec, optimal_policy = solver.solve() + actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int)) + + # Plotting both arrays on the same plot + squares = np.arange(len(expec)) + plt.plot(squares, expec, label="Theoretical cost") + plt.plot(squares, actual, label="Empirical cost") + + plt.xticks(np.arange(0, len(expec), step=1)) + plt.grid(True) + plt.xlabel("Square") + plt.ylabel("Cost") + plt.legend() + plt.title("Comparison between the expected cost and the actual cost") + plt.show() + +def comparison_of_policies_total(layout, circle): + solver = MarkovDecisionSolver(layout, circle) + _, optimal_policy = solver.solve() + policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int), + np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3, + np.random.randint(1, 4, size=nSquares)] + + avgnTurns = [empirical_results(layout, circle, policy) for policy in policies] + names = ["optimal", "safe", "normal", "risky", "random"] + + # Creating the bar plot + plt.bar(names, avgnTurns) + + # Adding labels and title + plt.xlabel("Policy") + plt.ylabel("Cost") + plt.title("Expected number of turns by policy") + + # Displaying the plot + plt.show() + +def comparison_of_policies_squares(layout, circle): + solver = MarkovDecisionSolver(layout, circle) + _, optimal_policy = solver.solve() + policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int), + np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3, + np.random.randint(1, 4, size=nSquares)] + + avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies] + + # Generating x-axis values (squares) + squares = np.arange(len(avgnTurns[0])) + + # Plotting both arrays on the same plot + plt.plot(squares, avgnTurns[0], label="Optimal") + plt.plot(squares, avgnTurns[1], label="Safe") + plt.plot(squares, avgnTurns[2], label="Normal") + plt.plot(squares, avgnTurns[3], label="Risky") + plt.plot(squares, avgnTurns[4], label="Random") + + plt.xticks(np.arange(0, len(avgnTurns[0]), step=1)) + plt.grid(True) + plt.xlabel("Square") + plt.ylabel("Cost") + plt.legend() + plt.title("Expected cost for different policies") + plt.show() + +def make_plots(): + layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + circle = False + comparison_theorical_empirical(layout, circle) + # comparison_of_policies_total(layout, circle) + # comparison_of_policies_squares(layout, circle) + +make_plots() diff --git a/validation.py b/validation.py index a4fb1ff70a5ca2f623003e8e3260d346c2992549..8f94f24e812c49b7160602b05cbfe18a0b2b58d9 100644 --- a/validation.py +++ b/validation.py @@ -25,43 +25,69 @@ class validation: self.random_strategy = [rd.choice([0,1,2,3]) for _ in range(15)] - def simulate_game(self, strategy, n_iterations=10000): - # Compute transition matrices for each dice transition_matrices = [self.safe_dice, self.normal_dice, self.risky_dice] number_turns = [] + for _ in range(n_iterations): total_turns = 0 - state = 0 # initial state - while state < len(self.layout) - 1: # until goal state is reached - action = strategy[state] # get action according to strategy - transition_matrix = transition_matrices[int(action - 1)] - state = np.random.choice(len(self.layout), p=transition_matrix[state]) - if self.layout[state] == 3 and action == 2: + k = 0 # état initial + + while k < len(self.layout) - 1: + action = strategy[k] # action selon la stratégie + + # Convertir action en entier pour accéder à l'indice correct dans transition_matrices + action_index = int(action) - 1 + transition_matrix = transition_matrices[action_index] + + #print(f"Current state (k): {k}, Action chosen: {action}") + #print(f"Transition matrix: {transition_matrix}") + + # Aplatir la matrice de transition en une distribution de probabilité 1D + flattened_probs = transition_matrix[k] + flattened_probs /= np.sum(flattened_probs) # Normalisation des probabilités + + # Mise à jour de l'état (k) en fonction de la distribution de probabilité aplatie + k = np.random.choice(len(self.layout), p=flattened_probs) + + # Mise à jour du nombre de tours en fonction de l'état actuel + if self.layout[k] == 3 and action == 2: total_turns += 1 if np.random.uniform(0, 1) < 0.5 else 2 - elif self.layout[state] == 3 and action == 3: + elif self.layout[k] == 3 and action == 3: total_turns += 2 else: total_turns += 1 + number_turns.append(total_turns) + return np.mean(number_turns) - def play_optimal_strategy(self): - return turns + def play_optimal_strategy(self, n_iterations=10000): + return self.simulate_game(self.optimal_policy, n_iterations) + - def play_dice_strategy(self): - return turns + def play_dice_strategy(self, dice_choice, n_iterations=10000): + if dice_choice == 'SafeDice': + strategy = self.safe_strategy + elif dice_choice == 'NormalDice': + strategy = self.normal_strategy + elif dice_choice == 'RiskyDice': + strategy = self.risky_strategy + else: + raise ValueError("Invalid dice choice") - def play_random_strategy(self): - return turns + return self.simulate_game(strategy, n_iterations) + + def play_random_strategy(self, n_iterations=10000): + return self.simulate_game(self.random_strategy, n_iterations) def compare_strategies(self, num_games=1000): - optimal_cost = self.simulate_game(strategy='Optimal', num_games=num_games) - dice1_cost = self.simulate_game(strategy='SafeDice', num_games=num_games) - dice2_cost = self.simulate_game(strategy='NormalDice', num_games=num_games) - dice3_cost = self.simulate_game(strategy='RiskyDice', num_games=num_games) - random_cost = self.simulate_game(strategy='Random', num_games=num_games) + optimal_cost = self.simulate_game(self.optimal_strategy, n_iterations=num_games) + dice1_cost = self.simulate_game(self.safe_strategy, n_iterations=num_games) + dice2_cost = self.simulate_game(self.normal_strategy, n_iterations=num_games) + dice3_cost = self.simulate_game(self.risky_strategy, n_iterations=num_games) + random_cost = self.simulate_game(self.random_strategy, n_iterations=num_games) return { 'Optimal': optimal_cost, @@ -75,11 +101,11 @@ class validation: # Utilisation d'exemple -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] +layout = [0, 0, 3, 0, 2, 0, 2, 0, 2, 0, 3, 0, 0, 1, 0] validation = validation(layout, circle=False) circle = False # Example circle value - +""" # Create an instance of validation validator = validation(layout, circle) @@ -90,4 +116,16 @@ validator.simulate_game(validator.optimal_strategy, n_iterations=10000) results = validation.compare_strategies(num_games=10000) print("Coûts moyens :") for strategy, cost in results.items(): - print(f"{strategy}: {cost}") + print(f"{strategy}: {cost}")""" + +optimal_cost = validation.play_optimal_strategy(n_iterations=10000) +print("Optimal Strategy Cost:", optimal_cost) + +dice2_cost = validation.play_dice_strategy('NormalDice', n_iterations=10000) +print("Normal Dice Strategy Cost:", dice2_cost) + +random_cost = validation.play_random_strategy(n_iterations=10000) +print("Random Strategy Cost:", random_cost) + +strategy_comparison = validation.compare_strategies(num_games=10000) +print("Strategy Comparison Results:", strategy_comparison)