diff --git a/plot.py b/plot.py index 68b62cd395394e978d81991d541c7c7e3b0b4c03..9de7974a4acedd9cb6f62a02dfcd33349ccaec44 100644 --- a/plot.py +++ b/plot.py @@ -2,34 +2,44 @@ import numpy as np import random as rd import matplotlib.pyplot as plt from tmc import TransitionMatrixCalculator as tmc -from test_files.markovDecision_testing import markovDecision as mD +from markovDecision import MarkovDecisionSolver as mD from validation import Validation -def plot_results(validation_instance): - results_markov = validation_instance.simulate_game('markov') - results_safe = validation_instance.simulate_game([1]*15) - results_normal = validation_instance.simulate_game([2]*15) - results_risky = validation_instance.simulate_game([3]*15) - results_random = validation_instance.simulate_game(np.random.randint(1, 4, size=15)) +def make_plots(): + layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] + circle = False + validation = Validation(layout, circle) + expec, optimal_policy = mD(layout, circle).solve() - plt.figure(figsize=(12, 8)) - plt.plot(range(len(validation_instance.layouts)), results_markov, label='Markov') - plt.plot(range(len(validation_instance.layouts)), results_safe, label='SafeDice') - plt.plot(range(len(validation_instance.layouts)), results_normal, label='NormalDice') - plt.plot(range(len(validation_instance.layouts)), results_risky, label='RiskyDice') - plt.plot(range(len(validation_instance.layouts)), results_random, label='Random') + # Plot 1: Theoretical vs Empirical Cost + expected_costs = np.zeros(len(expec)) + for start_square in range(len(expec)): + total_turns = 0 + for _ in range(10000): + total_turns += validation.play_one_game(start_square) + expected_costs[start_square] = total_turns / 10000 - plt.xticks(range(len(validation_instance.layouts)), range(len(validation_instance.layouts))) - plt.xlabel('Layout Number', fontsize=13) - plt.ylabel('Average Number of Turns', fontsize=13) - plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1) + squares = np.arange(len(expec)) + plt.plot(squares, expec, label="Theoretical cost") + plt.plot(squares, expected_costs, label="Empirical cost") + plt.xticks(np.arange(0, len(expec), step=1)) + plt.grid(True) + plt.xlabel("Square") + plt.ylabel("Cost") + plt.legend() + plt.title("Comparison between the expected cost and the actual cost") plt.show() -# Example usage -layouts = [ - [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0], - # Add more layouts as needed -] + # Plot 2: Expected number of turns for different policies + policies = [optimal_policy, np.ones(len(expec)), np.ones(len(expec)) * 2, np.ones(len(expec)) * 3, np.random.randint(1, 4, len(expec))] + avgn_turns = [Validation(layout, circle).empirical_results() for policy in policies] + names = ["optimal", "safe", "normal", "risky", "random"] + plt.bar(names, avgn_turns) + plt.xlabel("Policy") + plt.ylabel("Cost") + plt.title("Expected number of turns for different policies") + plt.show() -validation_instance = Validation(layouts, circle=False, n_iterations=10000) -plot_results(validation_instance) \ No newline at end of file +# Call make_plots function +if __name__ == "__main__": + make_plots() diff --git a/validation.py b/validation.py index 71ebcab5ccc31d03c5003378397b6db1ed6786d5..a270da57b2de1e9f784a154f1649e3dfc9602158 100644 --- a/validation.py +++ b/validation.py @@ -1,5 +1,8 @@ +import random import numpy as np +import matplotlib.pyplot as plt from tmc import TransitionMatrixCalculator +from markovDecision import MarkovDecisionSolver as mD class Validation: def __init__(self, layout, circle=False): @@ -7,6 +10,10 @@ class Validation: self.circle = circle self.tmc_instance = TransitionMatrixCalculator() + # Compute optimal value iteration results + solver = mD(self.layout, self.circle) + self.optimal_values, self.optimal_dice = solver.solve() + def simulate_game(self, strategy='optimal', num_games=1000): total_turns = 0 @@ -28,22 +35,38 @@ class Validation: return average_turns def play_optimal_strategy(self): - # Implement the optimal strategy using value iteration results - # Use TransitionMatrixCalculator to compute transitions and make decisions + current_state = 0 # Start from the initial state + turns = 0 - # calculer la stratégie optimale pour ou un tour + while current_state < len(self.layout) - 1: + optimal_action = int(self.optimal_dice[current_state]) # Get the optimal action for the current state + current_state += optimal_action # Move to the next state based on the optimal action + turns += 1 + return turns + def play_dice_strategy(self, dice): + current_state = 0 # Start from the initial state + turns = 0 - pass + while current_state < len(self.layout) - 1: + # Always use the specified dice type (1, 2, or 3) + current_state += dice + turns += 1 - def play_dice_strategy(self, dice): - # Implement a strategy where only one type of dice is used (1, 2, or 3) - pass + return turns def play_random_strategy(self): - # Implement a purely random strategy - pass + current_state = 0 # Start from the initial state + turns = 0 + + while current_state < len(self.layout) - 1: + # Choose a random dice roll between 1 and 3 + dice_roll = np.random.randint(1, 4) + current_state += dice_roll + turns += 1 + + return turns def compare_strategies(self, num_games=1000): optimal_cost = self.simulate_game(strategy='optimal', num_games=num_games) @@ -60,10 +83,82 @@ class Validation: 'random': random_cost } -# Example usage -layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0] -validation = Validation(layout, circle=False) -results = validation.compare_strategies(num_games=10000) -print("Average Costs:") -for strategy, cost in results.items(): - print(f"{strategy}: {cost}") + def play_one_turn(self, dice_choice, cur_pos, prison): + if cur_pos == len(self.layout) - 1: + return len(self.layout) - 1, False + + if prison: + return cur_pos, False + + # Convert dice_choice to integer to avoid TypeError + dice_choice = int(dice_choice) + + list_dice_results = [i for i in range(dice_choice + 1)] + result = random.choice(list_dice_results) + + if cur_pos == 2 and result != 0: + slow_lane = random.choice([0, 1]) + if slow_lane: + new_pos = cur_pos + result + else: + new_pos = cur_pos + result + 7 + + elif ((cur_pos == 9 and result != 0) or ((cur_pos in [7, 8, 9]) and (cur_pos + result >= 10))): + new_pos = cur_pos + result + 4 + + else: + new_pos = cur_pos + result + + if new_pos > len(self.layout) - 1: + if self.circle: + new_pos -= len(self.layout) + else: + return len(self.layout) - 1, True + + new_square = self.layout[new_pos] + + if dice_choice == 1: + return new_pos, False + + elif dice_choice == 2: + new_square = random.choice([0, new_square]) + + if new_square == 0: + return new_pos, False # nothing happens + elif new_square == 1: + return 0, False # back to square one + elif new_square == 2: + if new_pos - 3 < 0: + return 0, False # back to square one + return new_pos - 3, False # back 3 squares + elif new_square == 3: + return new_pos, True # prison + + + def play_one_game(self, start=0): + n_turns = 0 + cur_pos = start + prison = False + + if self.circle: + while cur_pos != len(self.layout) - 1: + new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) + if new_pos > len(self.layout) - 1: + cur_pos = len(self.layout) - new_pos + cur_pos = new_pos + n_turns += 1 + else: + while cur_pos < len(self.layout) - 1: + new_pos, prison = self.play_one_turn(self.optimal_dice[cur_pos], cur_pos, prison) + cur_pos = new_pos + n_turns += 1 + + return n_turns + + def empirical_results(self): + total_turns_played = 0 + for _ in range(10000): + n_turns = self.play_one_game() + total_turns_played += n_turns + + return total_turns_played / 10000 \ No newline at end of file diff --git a/validation_ex.py b/validation_ex.py index 151431c126a384e52658def16a7fbc00ce90810f..741137aca6b9b6d75a966a2891fd681dadb02e94 100644 --- a/validation_ex.py +++ b/validation_ex.py @@ -31,7 +31,7 @@ class Validation: return average_turns def play_optimal_strategy(self): - _, optimal_policy = markovDecision(self.layout, self.circle) + _, optimal_policy = mD(self.layout, self.circle) return self.empirical_results(optimal_policy.astype(int)) def play_dice_strategy(self, dice): diff --git a/validation_test.py b/validation_test.py deleted file mode 100644 index aaba7d24eefc4222e5691697ac1f888c96d31889..0000000000000000000000000000000000000000 --- a/validation_test.py +++ /dev/null @@ -1,57 +0,0 @@ -import random as rd -import numpy as np -import matplotlib.pyplot as plt -from tmc import TransitionMatrixCalculator as tmc -from markovDecision import MarkovDecisionSolver as mD - -class EmpiricalComparision : - def __init__(self) : - return - - - def simulation(strategy, layout : list, circle, nIter : int) : - tmc_instance = tmc() - safe_dice = tmc_instance._compute_safe_matrix(layout, circle) - normal_dice = tmc_instance._compute_normal_matrix(layout, circle) - risky_dice = tmc_instance._compute_risky_matrix(layout, circle) - matrices_transition = [safe_dice, normal_dice, risky_dice] - nTurns = [] - turns = 0 - for _ in range(nIter) : - turns = 0 - k = 0 - while k < len(layout)-1 : - action = strategy[k] - transitionMatrix = matrices_transition[int(action -1)] - k = np.rd.choice(len(layout), p = transitionMatrix[k]) - if layout[k] == 3 and action == 2 : - turns +=1 if np.rd.uniform(0,1) < 0.5 else 2 - elif layout[k] == 3 and action == 3 : - turns += 2 - else : - turns += 1 - nTurns.append(turns) - - return np.mean(nTurns) - - - def plot(layouts : list, circle, nIter : int) : - Markov = [] - Safe = [] - Normal = [] - Risky = [] - Random = [] - for layout in layouts : - expec, policy = mD(layout, circle) - # Simulate the game - - return - - - - - - -layout = [0,0,3,0,0,0,2,0,0,0,3,0,0,1,0] -results(layout, False, 1000000) -results(layout, True, 1000000) \ No newline at end of file