Skip to content
Extraits de code Groupes Projets
simulate.py 5,59 ko
Newer Older
  • Learn to ignore specific revisions
  • Adrien Payen's avatar
    Adrien Payen a validé
    import random
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import numpy as np
    
    Adrien Payen's avatar
    Adrien Payen a validé
    import matplotlib.pyplot as plt
    from tmc import TransitionMatrixCalculator as tmc
    from markovDecision import MarkovDecisionSolver
    
    nSquares = 15
    nSimul = 10000
    
    def playOneTurn(diceChoice, curPos, layout, circle, prison):
        if curPos == nSquares - 1:
            return nSquares - 1, False
    
        if prison:
            return curPos, False
    
        listDiceResults = [i for i in range(diceChoice + 1)]
        result = random.choice(listDiceResults)
    
        if curPos == 2 and result != 0:
            slowLane = random.choice([0, 1])
            if slowLane:
                newPos = curPos + result
            else:
                newPos = curPos + result + 7
        elif ((curPos == 9 and result != 0) or (curPos in [7, 8, 9] and curPos + result >= 10)):
            newPos = curPos + result + 4
        else:
            newPos = curPos + result
    
        if newPos > nSquares - 1:
            if circle:
                newPos -= nSquares
            else:
                return nSquares - 1, True
    
        newSquare = layout[newPos]
    
        if diceChoice == 1:
            return newPos, False
        elif diceChoice == 2:
            newSquare = random.choice([0, newSquare])
    
        if newSquare == 0:
            return newPos, False  # nothing happens
        elif newSquare == 1:
            return 0, False  # back to square one
        elif newSquare == 2:
            if newPos - 3 < 0:
                return 0, False  # back to square one
            return newPos - 3, False  # back 3 squares
        elif newSquare == 3:
            return newPos, True  # prison
        elif newSquare == 4:
            newSquare = random.choice([1, 2, 3])
            if newSquare == 1:
                return 0, False  # back to square one
            elif newSquare == 2:
                if newPos - 3 < 0:
                    return 0, False  # back to square one
                return newPos - 3, False  # back 3 squares
            elif newSquare == 3:
                return newPos, True  # prison
    
    def playOneGame(layout, circle, policy, start=0):
        nTurns = 0
        curPos = start
        prison = False
    
        if circle:
            while curPos != nSquares - 1:
                newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
                if newPos > nSquares - 1:
                    curPos = nSquares - newPos
                curPos = newPos
                nTurns += 1
        else:
            while curPos < nSquares - 1:
                newPos, prison = playOneTurn(policy[curPos], curPos, layout, circle, prison)
                curPos = newPos
                nTurns += 1
    
        return nTurns
    
    def empiric_cost_of_square(layout, circle, policy):
        expected_costs = np.zeros(nSquares)
        for start_square in range(nSquares):
            total_turns = 0
            for _ in range(nSimul):
                total_turns += playOneGame(layout, circle, policy, start=start_square)
            expected_costs[start_square] = total_turns / nSimul
        return expected_costs
    
    def empirical_results(layout, circle, policy):
        avgnTurnsPlayed = 0
        for _ in range(nSimul):
            nTurns = playOneGame(layout, circle, policy)
            avgnTurnsPlayed += nTurns
        return avgnTurnsPlayed / nSimul
    
    def comparison_theorical_empirical(layout, circle):
        solver = MarkovDecisionSolver(layout, circle)
        expec, optimal_policy = solver.solve()
        actual = empiric_cost_of_square(layout, circle, optimal_policy.astype(int))
    
        # Plotting both arrays on the same plot
        squares = np.arange(len(expec))
        plt.plot(squares, expec, label="Theoretical cost")
        plt.plot(squares, actual, label="Empirical cost")
    
        plt.xticks(np.arange(0, len(expec), step=1))
        plt.grid(True)
        plt.xlabel("Square")
        plt.ylabel("Cost")
        plt.legend()
        plt.title("Comparison between the expected cost and the actual cost")
        plt.show()
    
    def comparison_of_policies_total(layout, circle):
        solver = MarkovDecisionSolver(layout, circle)
        _, optimal_policy = solver.solve()
        policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
                    np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
                    np.random.randint(1, 4, size=nSquares)]
    
        avgnTurns = [empirical_results(layout, circle, policy) for policy in policies]
        names = ["optimal", "safe", "normal", "risky", "random"]
    
        # Creating the bar plot
        plt.bar(names, avgnTurns)
    
        # Adding labels and title
        plt.xlabel("Policy")
        plt.ylabel("Cost")
        plt.title("Expected number of turns by policy")
    
        # Displaying the plot
        plt.show()
    
    def comparison_of_policies_squares(layout, circle):
        solver = MarkovDecisionSolver(layout, circle)
        _, optimal_policy = solver.solve()
        policies = [optimal_policy.astype(int), np.ones(nSquares, dtype=int),
                    np.ones(nSquares, dtype=int) * 2, np.ones(nSquares, dtype=int) * 3,
                    np.random.randint(1, 4, size=nSquares)]
    
        avgnTurns = [empiric_cost_of_square(layout, circle, policy) for policy in policies]
    
        # Generating x-axis values (squares)
        squares = np.arange(len(avgnTurns[0]))
    
        # Plotting both arrays on the same plot
        plt.plot(squares, avgnTurns[0], label="Optimal")
        plt.plot(squares, avgnTurns[1], label="Safe")
        plt.plot(squares, avgnTurns[2], label="Normal")
        plt.plot(squares, avgnTurns[3], label="Risky")
        plt.plot(squares, avgnTurns[4], label="Random")
    
        plt.xticks(np.arange(0, len(avgnTurns[0]), step=1))
        plt.grid(True)
        plt.xlabel("Square")
        plt.ylabel("Cost")
        plt.legend()
        plt.title("Expected cost for different policies")
        plt.show()
    
    def make_plots():
        layout = [0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0]
        circle = False
        comparison_theorical_empirical(layout, circle)
        # comparison_of_policies_total(layout, circle)
        # comparison_of_policies_squares(layout, circle)
    
    make_plots()