import submitted, importlib
import numpy as np
import matplotlib.pyplot as plt

importlib.reload(submitted)

help(submitted.episodic_game_gradient_ascent)

Help on function episodic_game_gradient_ascent in module submitted:

episodic_game_gradient_ascent(init, rewards, nsteps, learningrate)
    nsteps of a 2-player, 2-action episodic game, strategies adapted using gradient ascent.
    
    @param:
    init (2) - intial logits for the two players
    rewards (2,2,2) - player i receives rewards[i,a,b] if player 0 plays a and player 1 plays b
    nsteps (scalar) - number of steps of gradient descent to perform
    learningrate (scalar) - learning rate
    
    @return:
    logits (nsteps,2) - logits of two players in each iteration of gradient descent
    utilities (nsteps,2) - utilities[t,i] is utility to player i of logits[t,:]
    
    Initialize: logits[0,:] = init. 
    
    Iterate: In iteration t, player 0's actions have probabilities sig2(logits[t,0]),
    and player 1's actions have probabilities sig2(logits[t,1]).
    
    The utility (expected reward) for player i is sig2(logits[t,0])@rewards[i,:,:]@sig2(logits[t,1]),
    and the next logits are logits[t+1,i] = logits[t,i] + learningrate * utility_partials(rewards, logits[t,:]).

importlib.reload(submitted)

help(submitted.utility_partials)

Help on function utility_partials in module submitted:

utility_partials(R, x)
    Calculate vector of partial derivatives of utilities with respect to logits. 
    If u[i] = sig2(x[0])@R[i,:,:]@sig2(x[1]),
    then partial[i] is the derivative of u[i] with respect to x[i].
    
    @param:
    R (2,2,2) - R[i,a,b] is reward to player i if player 0 plays a, player 1 plays b
    x (2) - player i plays move j with probability softmax([0,x[i]])[j]
    
    @return:
    partial (2) - partial[i] is the derivative of u[i] with respect to x[i].
    
    HINT: You may find the functions sig2 and dsig2 to be useful.

importlib.reload(submitted)

help(submitted.sig2)
help(submitted.dsig2)

Help on function sig2 in module submitted:

sig2(x)
    Calculate the vector p = [1-sigmoid(x), sigmoid(x)] for scalar x

Help on function dsig2 in module submitted:

dsig2(p)
    Assume p=sig2(x).  Calculate the vector v such that v[i] is the derivative of p[i] with respect to x.

rewards = np.array([[[1,2],[2,1]],[[2,1],[1,2]]])
print(rewards)

[[[1 2]
  [2 1]]

 [[2 1]
  [1 2]]]

importlib.reload(submitted)

print('The partial derivatives at x=[0,0] are',submitted.utility_partials(rewards,[0,0]))
print('The partial derivatives at x=[-1,0] are',submitted.utility_partials(rewards,[-1,0]))
print('The partial derivatives at x=[0,-1] are',submitted.utility_partials(rewards,[0,-1]))
print('The partial derivatives at x=[1,0] are',submitted.utility_partials(rewards,[1,0]))
print('The partial derivatives at x=[0,1] are',submitted.utility_partials(rewards,[0,1]))

The partial derivatives at x=[0,0] are [0. 0.]
The partial derivatives at x=[-1,0] are [ 0.         -0.11552929]
The partial derivatives at x=[0,-1] are [0.11552929 0.        ]
The partial derivatives at x=[1,0] are [0.         0.11552929]
The partial derivatives at x=[0,1] are [-0.11552929  0.        ]

importlib.reload(submitted)

init = [0,0]  # Initial logits [0,0] means that initial probabilities are [0.5, 0.5]
nsteps, learningrate = 1000, 0.1
logits, utilities = submitted.episodic_game_gradient_ascent([0,0], rewards, nsteps, learningrate)

fig, ax = plt.subplots(1, figsize=(4,4))
ax.scatter(1/(1+np.exp(-logits[:,0])), 1/(1+np.exp(-logits[:,1])), c=np.arange(nsteps))
ax.set_xlabel('Probability that player 0 cooperates')
ax.set_ylabel('Probability that player 1 cooperates')

Text(0, 0.5, 'Probability that player 1 cooperates')

importlib.reload(submitted)

init = [-1,-1]  # Initial logits [-1,-1] means that initial probabilities are [1/(1+e), 1/(1+e)]
nsteps, learningrate = 1000, 0.1
logits, utilities = submitted.episodic_game_gradient_ascent([1,1], rewards, nsteps, learningrate)

fig, ax = plt.subplots(1, figsize=(4,4))
ax.scatter(1/(1+np.exp(-logits[:,0])),1/(1+np.exp(-logits[:,1])), c=np.arange(nsteps))
ax.set_xlabel('Probability that player 0 cooperates')
ax.set_ylabel('Probability that player 1 cooperates')

Text(0, 0.5, 'Probability that player 1 cooperates')

importlib.reload(submitted)

help(submitted.symplectic_correction)

Help on function symplectic_correction in module submitted:

symplectic_correction(partials, hessian)
    Calculate the symplectic correction matrix from Balduzzi et al., "The Mechanics of n-player Games," 2018.

importlib.reload(submitted)

help(submitted.utility_hessian)

Help on function utility_hessian in module submitted:

utility_hessian(R, x)
    Calculate matrix of partial second derivatives of utilities with respect to logits. 
    Define u[i] = sig2(x[0])@R[i,:,:]@sig2(x[1]),
    then hessian[i,j] is the second derivative of u[j] with respect to x[i] and x[j].
    
    @param:
    R (2,2,2) - R[i,a,b] is reward to player i if player 0 plays a, player 1 plays b
    x (2) - player i plays move j with probability softmax([0,x[i]])[j]
    
    @return:
    hessian (2) - hessian[i,j] is the second derivative of u[i] with respect to x[i] and x[j].
    
    HINT: You may find the functions sig2, dsig2, and Hsig2 to be useful.

importlib.reload(submitted)

help(submitted.Hsig2)

Help on function Hsig2 in module submitted:

Hsig2(p)
    Assume p=sig2(x).  Calculate the vector v such that v[i] is the second derivative of p[i] with respect to x.

importlib.reload(submitted)

H = submitted.utility_hessian(rewards, [0,0])
print(H)

[[ 0.    -0.125]
 [ 0.125  0.   ]]

S = 0.5*(H+H.T)
print(S)
print('The eigenvalues are',np.linalg.eig(S)[0])

[[0. 0.]
 [0. 0.]]
The eigenvalues are [0. 0.]

importlib.reload(submitted)
submitted.utility_hessian(np.array([[[-5,0],[-10,-1]],[[-5,-10],[0,-1]]]), [-np.inf,-np.inf])

array([[0., 0.],
       [0., 0.]])

importlib.reload(submitted)
H = submitted.utility_hessian(np.array([[[-5,0],[-10,-1]],[[-5,-10],[0,-1]]]), [-1,-1])
print(H)
print('Its symmetric part is:')
print(0.5*(H+H.T))
print('and has eigenvalues of:',np.linalg.eig(0.5*(H+H.T))[0])

[[-0.35654709  0.15462501]
 [ 0.15462501 -0.35654709]]
Its symmetric part is:
[[-0.35654709  0.15462501]
 [ 0.15462501 -0.35654709]]
and has eigenvalues of: [-0.20192208 -0.5111721 ]

importlib.reload(submitted)
H = submitted.utility_hessian(np.array([[[-10,2],[-1,1]],[[-10,-1],[2,1]]]), np.ones(2)*np.log(9))
print(H)
print('Its symmetric part is:')
print(0.5*(H+H.T))
print('and has eigenvalues of',np.linalg.eig(0.5*(H+H.T))[0])

[[-6.9388939e-17 -8.1000000e-02]
 [-8.1000000e-02 -6.9388939e-17]]
Its symmetric part is:
[[-6.9388939e-17 -8.1000000e-02]
 [-8.1000000e-02 -6.9388939e-17]]
and has eigenvalues of [ 0.081 -0.081]

importlib.reload(submitted)

init = [-1,-1]  # Initial logits [-1,-1] means that initial probabilities are [1/(1+e), 1/(1+e)]
nsteps, learningrate = 5000, 0.1
logits, utilities = submitted.episodic_game_corrected_ascent([1,1], rewards, nsteps, learningrate)

fig, ax = plt.subplots(1, figsize=(4,4))
ax.scatter(1/(1+np.exp(-logits[:,0])),1/(1+np.exp(-logits[:,1])), c=np.arange(nsteps))
ax.set_xlabel('Probability that player 0 cooperates')
ax.set_ylabel('Probability that player 1 cooperates')

print('The logits have converged to:',logits[-1,:])

The logits have converged to: [ 0.00086998 -0.00093214]

importlib.reload(submitted)

print(submitted.sequential_strategy)

[[0.5 0.5]
 [0.5 0.5]]

!python grade.py

You played 1600 games, against all 16 possible fixed-strategy opponents
and you won an average of 0.101875 points per game
F....
======================================================================
FAIL: test_extra (test_extra.TestStep)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/jhasegaw/Dropbox/mark/teaching/ece448/ece448labs/spring24/mp08/src/tests/test_extra.py", line 39, in test_extra
    self.assertGreater(score/played,0.2,msg='That score is not enough to get extra credit!')
AssertionError: 0.101875 not greater than 0.2 : That score is not enough to get extra credit!

----------------------------------------------------------------------
Ran 5 tests in 0.017s

FAILED (failures=1)

!python grade.py

You played 1600 games, against all 16 possible fixed-strategy opponents
and you won an average of 0.390625 points per game
Congratulations!  That score is enough for extra credit!
.....
----------------------------------------------------------------------
Ran 5 tests in 0.014s

OK

!python grade.py -j

{
    "tests": [
        {
            "name": "test_extra (test_extra.TestStep)",
            "score": 10,
            "max_score": 10,
            "status": "passed",
            "output": "You played 1600 games, against all 16 possible fixed-strategy opponents\nand you won an average of 0.390625 points per game\nCongratulations!  That score is enough for extra credit!\n"
        },
        {
            "name": "test_corrected_ascent (test_hidden.TestStep)",
            "score": 25,
            "max_score": 25,
            "status": "passed"
        },
        {
            "name": "test_gradient_ascent (test_hidden.TestStep)",
            "score": 25,
            "max_score": 25,
            "status": "passed"
        },
        {
            "name": "test_corrected_ascent (test_visible.TestStep)",
            "score": 25,
            "max_score": 25,
            "status": "passed"
        },
        {
            "name": "test_gradient_ascent (test_visible.TestStep)",
            "score": 25,
            "max_score": 25,
            "status": "passed"
        }
    ],
    "leaderboard": [],
    "visibility": "visible",
    "execution_time": "0.02",
    "score": 110
}

CS440/ECE448 Spring 2024¶

MP08: Repeated Games¶

Table of Contents¶

Episodic Games: Gradient Ascent¶

Episodic Games: Corrected Ascent¶

Extra Credit: Sequential Games¶

Grade your homework¶