import numpy as np

reward = np.array([[[-10,2],[-1,1]],[[-10,-1],[2,1]]])
print('The reward matrices are:')
print(reward)

pA = np.array([0.1,0.9])
print("\nA's mixed strategy vector is:")
print(pA)

print("\nThe expected rewards for B, as a function of his actions, are:")
print(pA@reward[1,:,:])

pB = np.array([0.1,0.9])
print("\nB's mixed strategy vector is:")
print(pB)

print("\nThe expected rewards for A, as a function of her actions, are:")
print(reward[0,:,:]@pB)

The reward matrices are:
[[[-10   2]
  [ -1   1]]

 [[-10  -1]
  [  2   1]]]

A's mixed strategy vector is:
[0.1 0.9]

The expected rewards for B, as a function of his actions, are:
[0.8 0.8]

B's mixed strategy vector is:
[0.1 0.9]

The expected rewards for A, as a function of her actions, are:
[0.8 0.8]

print("A's expected reward is:")
print(pA@reward[0,:,:]@pB)

print("\n")
print("B's expected reward is:")
print(pA@reward[1,:,:]@pB)

A's expected reward is:
0.8


B's expected reward is:
0.8

import submitted, importlib

help(submitted.utility_gradients)

Help on function utility_gradients in module submitted:

utility_gradients(logit, reward)
    Calculate partial derivatives of expected rewards with respect to logits.

    @param:
    logit - player i plays move 1 with probability 1/(1+exp(-logit[i]))
    reward - reward[i,a,b] is reward to player i if player 0 plays a, and player 1 plays b

    @return:
    gradients - gradients[i]= dE[reward[i,:,:]]/dlogit[i]
    utilities - utilities[i] = E[reward[i,:,:]]
      where the expectation is computed over the distribution of possible moves by both players.

importlib.reload(submitted)

logit = np.log([9,9])
print("The logits of both players are:")
print(logit)
print("\n")

gradients, utilities = submitted.utility_gradients(logit, reward)

print("This is a Nash equilibrium, because the gradients of the expected rewards are both zero:")
print(gradients)
print("\n")

print("As we previously calculated, the expected rewards for each player are:")
print(utilities)

The logits of both players are:
[2.19722458 2.19722458]


This is a Nash equilibrium, because the gradients of the expected rewards are both zero:
[ 0.00000000e+00 -1.38777878e-17]


As we previously calculated, the expected rewards for each player are:
[0.8 0.8]

gradients, utilities = submitted.utility_gradients(logit+np.array([0,0.1]), reward)
print("If Bob increases his logit slightly, the gradients become:")
print(gradients)
print("\n")

gradients, utilities = submitted.utility_gradients(logit+np.array([0,-0.1]), reward)
print("If Bob decreases his logit slightly, the gradients become:")
print(gradients)
print("\n")

gradients, utilities = submitted.utility_gradients(logit+np.array([0.1,0]), reward)
print("If Alice increases her logit slightly, the gradients become:")
print(gradients)
print("\n")

gradients, utilities = submitted.utility_gradients(logit+np.array([-0.1,0]), reward)
print("If Alice decreases her logit slightly, the gradients become:")
print(gradients)
print("\n")

If Bob increases his logit slightly, the gradients become:
[-7.78222682e-03 -1.38777878e-17]


If Bob decreases his logit slightly, the gradients become:
[ 8.43018335e-03 -1.38777878e-17]


If Alice increases her logit slightly, the gradients become:
[ 0.         -0.00778223]


If Alice decreases her logit slightly, the gradients become:
[0.         0.00843018]

importlib.reload(submitted)
help(submitted.strategy_gradient_ascent)

Help on function strategy_gradient_ascent in module submitted:

strategy_gradient_ascent(logit, reward, nsteps, learningrate)
    nsteps of a 2-player, 2-action episodic game, strategies learned
    using simultaneous gradient ascent.

    @param:
    logit - intial logits for the two players
    reward - reward[i,a,b] is reward to player i if player 0 plays a, and player 1 plays b
    nsteps - number of steps of ascent to perform
    learningrate - learning rate

    @return:
    path - path[t,i] is the logit of the i'th player's strategy after t steps of
      simultaneous gradient ascent (path[0,:]==logit).
    utilities (nsteps,2) - utilities[t,i] is the expected reward to player i on step t,
      where expectation is over the distribution of moves given by logits[t,:]

importlib.reload(submitted)

logit = np.array([np.log(9), np.log(9)])
path, utilities = submitted.strategy_gradient_ascent(logit, reward, 1000, 0.1)

import matplotlib.pyplot as plt

fig, ax = plt.subplots(3,1, figsize=(4,6),layout='tight')
ax[0].scatter(1/(1+np.exp(-path[:,0])), 1/(1+np.exp(-path[:,1])), c=np.arange(1000))
ax[0].set_xlabel('P(player 0 cooperates)')
ax[0].set_ylabel('P(player 1 cooperates)')
ax[0].set_title('Path taken by simultaneous gradient ascent')
ax[1].plot(utilities[:,0])
ax[1].set_title('Expected reward for player 0')
ax[2].plot(utilities[:,1])
ax[2].set_title('Expected reward for player 1')
ax[2].set_xlabel('Iteration number')

Text(0.5, 0, 'Iteration number')

starting_points = np.tile(logit, (2,2,1)) + np.array([[[0,0.1],[0,-0.1]],[[0.1,0],[-0.1,0]]])

fig, axs = plt.subplots(2,2, figsize=(6,6), layout='tight')
for i in range(2):
    for j in range(2):
        path, utilities = submitted.strategy_gradient_ascent(starting_points[i,j,:], reward, 1000, 0.1)
        axs[i,j].scatter(1/(1+np.exp(-path[:,0])), 1/(1+np.exp(-path[:,1])), c=np.arange(1000))
        axs[i,j].set_xlabel('Probability that player 0 cooperates')
        axs[i,j].set_ylabel('Probability that player 1 cooperates')
        axs[i,j].set_title("Path starting from (%.2f,%.2f)"%(starting_points[i,j,0],starting_points[i,j,1]))
        axs[i,j].set_xlim(0,1)
        axs[i,j].set_ylim(0,1)

reward = np.array([[[-1,1],[1,-1]],[[1,-1],[-1,1]]])
print(reward)

[[[-1  1]
  [ 1 -1]]

 [[ 1 -1]
  [-1  1]]]

importlib.reload(submitted)

path, utilities = submitted.strategy_gradient_ascent([0,0], reward, 1000, 0.1)

fig, ax = plt.subplots(1, figsize=(4,4))
ax.scatter(1/(1+np.exp(-path[:,0])), 1/(1+np.exp(-path[:,1])), c=np.arange(1000))
ax.set_xlabel('Probability that player 0 cooperates')
ax.set_ylabel('Probability that player 1 cooperates')
ax.set_title('Simultaneous gradient ascent for the Paparazzi game, starting from equlibrium')

Text(0.5, 1.0, 'Simultaneous gradient ascent for the Paparazzi game, starting from equlibrium')

importlib.reload(submitted)

path, utilities = submitted.strategy_gradient_ascent([1,1], reward, 1000, 0.1)

fig, ax = plt.subplots(1, figsize=(4,4))
ax.scatter(1/(1+np.exp(-path[:,0])),1/(1+np.exp(-path[:,1])), c=np.arange(1000))
ax.set_xlabel('Probability that player 0 cooperates')
ax.set_ylabel('Probability that player 1 cooperates')

Text(0, 0.5, 'Probability that player 1 cooperates')

importlib.reload(submitted)

help(submitted.mechanism_gradient)

Help on function mechanism_gradient in module submitted:

mechanism_gradient(logit, reward)
    Calculate partial derivative of mechanism loss with respect to rewards.

    @param:
    logit - The goal is to make this pair of strategies a Nash equlibrium:
        player i plays move 1 with probability 1/(1+exp(-logit[i])), else move 0
    reward - reward[i,a,b] is reward to player i if player 0 plays a, and player 1 plays b

    @return:
    gradient - gradient[i,a,b]= derivative of loss w.r.t. reward[i,a,b]
    loss - half of the mean-squared strategy mismatch.
        Mean = average across both players.
        Strategy mismatch = difference between the expected reward that
        the player earns by cooperating (move 1) minus the expected reward that
        they earn by defecting (move 0).

importlib.reload(submitted)

# paparazzi game
reward = np.array([[[-1,1],[1,-1]],[[1,-1],[-1,1]]]) # paparazzi
gradient, loss = submitted.mechanism_gradient(np.zeros(2), reward) # paparazzi game

print("At equilibrium, the gradient of mean-squared strategy mismatch w.r.t. reward matrices is:")
print(gradient)
print("\n")

print("... and the mean-squared strategy mismatch is:")
print(loss)

At equilibrium, the gradient of mean-squared strategy mismatch w.r.t. reward matrices is:
[[[-0. -0.]
  [ 0.  0.]]

 [[-0.  0.]
  [-0.  0.]]]


... and the mean-squared strategy mismatch is:
[0.]

importlib.reload(submitted)
gradient, loss = submitted.mechanism_gradient(np.log([11/9,9/11]), reward)

print("If we want to adjust the paparazzi game to make [log(11/9),log(9/11)] an equilibrium, the gradient is:")
print(gradient)
print("\n")

print("... and the mean-squared strategy mismatch is:")
print(loss)

If we want to adjust the paparazzi game to make [log(11/9),log(9/11)] an equilibrium, the gradient is:
[[[-0.11 -0.09]
  [ 0.11  0.09]]

 [[-0.09  0.09]
  [-0.11  0.11]]]


... and the mean-squared strategy mismatch is:
[0.04]

importlib.reload(submitted)
help(submitted.mechanism_gradient_descent)

Help on function mechanism_gradient_descent in module submitted:

mechanism_gradient_descent(logit, reward, nsteps, learningrate)
    nsteps of gradient descent on the mean-squared strategy mismatch
    using simultaneous gradient ascent.

    @param:
    logit - The goal is to make this pair of strategies a Nash equlibrium:
        player i plays move 1 with probability 1/(1+exp(-logit[i])), else move 0.
    reward - Initial setting of the rewards.
        reward[i,a,b] is reward to player i if player 0 plays a, and player 1 plays b
    nsteps - number of steps of gradient descent to perform
    learningrate - learning rate

    @return:
    path - path[t,i,a,b] is the reward to player i of the moves (a,b) after t steps
      of gradient descent (path[0,:,:,:] = initial reward).
    loss - loss[t] is half of the mean-squared strategy mismatch at iteration [t].
        Mean = average across both players.
        Strategy mismatch = difference between the expected reward that
        the player earns by cooperating (move 1) minus the expected reward that
        they earn by defecting (move 0).

importlib.reload(submitted)

path, loss = submitted.mechanism_gradient_descent(np.log([11/9,9/11]), reward, 1000, 0.1)

print("The following reward matrices have a Nash equilibrium where Alice cooperates w/prob 11/20, Bob w/prob 9/20:")
print(path[-1,:,:,:])
print("\n")
print("... with a mean-squared strategy mismatch of:",loss[-1],"\n")

print("Just to make sure, let's calculate the utility gradients:")
g, u = submitted.utility_gradients(np.log([11/9,9/11]), path[-1,:,:,:])
print("At the new equilibrium, the gradients are",g)
print("... and the utilities are",u)

The following reward matrices have a Nash equilibrium where Alice cooperates w/prob 11/20, Bob w/prob 9/20:
[[[-0.89108911  1.08910891]
  [ 0.89108911 -1.08910891]]

 [[ 1.08910891 -1.08910891]
  [-0.89108911  0.89108911]]]


... with a mean-squared strategy mismatch of: 9.98402083170343e-31 

Just to make sure, let's calculate the utility gradients:
At the new equilibrium, the gradients are [2.49800181e-16 2.47302179e-16]
... and the utilities are [ 2.08166817e-17 -4.99600361e-17]

fig, axs = plt.subplots(2,2, figsize=(6,6), layout='tight')
for i in range(2):
    for j in range(2):
        r0, r1 = axs[i,j].plot(path[:,:,i,j])
        axs[i,j].set_title("R(A=%d,B=%d)"%(i,j))
axs[1,0].set_xlabel('Iteration')
axs[1,1].set_xlabel('Iteration')
plt.legend((r0,r1),("A's reward","B's reward"))

<matplotlib.legend.Legend at 0x1132ef410>

# game of chicken
reward = np.array([[[-10,2],[-1,1]],[[-10,-1],[2,1]]]) ### delete this to use paparazzi game
gradient, loss = submitted.mechanism_gradient(np.log([9,9]), reward) # chicken game

print("At equilibrium, the gradient of mean-squared strategy mismatch w.r.t. reward matrices is:")
print(gradient)
print("\n")

print("... and the mean-squared strategy mismatch is:")
print(loss)
print("\n")

gradient, loss = submitted.mechanism_gradient(np.log([8,8]), reward)

print("If we want to adjust the paparazzi game to make [log(8),log(8)] an equilibrium, the gradient is:")
print(gradient)
print("\n")

print("... and the mean-squared strategy mismatch is:")
print(loss)
print("\n")

path, loss = submitted.mechanism_gradient_descent(np.log([8,8]), reward, 1000, 0.1)

print("The following reward matrices have a Nash equilibrium where each player cooperates with probability 8/9:")
print(path[-1,:,:,:])
print("\n")

print("...with a mean-squared strategy mismatch of")
print(loss[-1])
print("\n")

print("Just to make sure, let's calculate the utility gradients:")
g, u = submitted.utility_gradients(np.log([8,8]), path[-1,:,:,:])
print("At the new equilibrium, the gradients are",g)
print("... and the utilities are",u)


fig, axs = plt.subplots(2,2, figsize=(6,6), layout='tight')
for i in range(2):
    for j in range(2):
        r0, r1 = axs[i,j].plot(path[:,:,i,j])
        axs[i,j].set_title("R(A=%d,B=%d)"%(i,j))
axs[1,0].set_xlabel('Iteration')
axs[1,1].set_xlabel('Iteration')
plt.legend((r0,r1),("A's reward","B's reward"))

At equilibrium, the gradient of mean-squared strategy mismatch w.r.t. reward matrices is:
[[[ 1.11022302e-17  9.99200722e-17]
  [-1.11022302e-17 -9.99200722e-17]]

 [[ 1.11022302e-17 -1.11022302e-17]
  [ 9.99200722e-17 -9.99200722e-17]]]


... and the mean-squared strategy mismatch is:
[1.23259516e-32]


If we want to adjust the paparazzi game to make [log(8),log(8)] an equilibrium, the gradient is:
[[[-0.01234568 -0.09876543]
  [ 0.01234568  0.09876543]]

 [[-0.01234568  0.01234568]
  [-0.09876543  0.09876543]]]


... and the mean-squared strategy mismatch is:
[0.01234568]


The following reward matrices have a Nash equilibrium where each player cooperates with probability 8/9:
[[[-9.99230769  2.06153846]
  [-1.00769231  0.93846154]]

 [[-9.99230769 -1.00769231]
  [ 2.06153846  0.93846154]]]


...with a mean-squared strategy mismatch of
1.9721522630525295e-31


Just to make sure, let's calculate the utility gradients:
At the new equilibrium, the gradients are [2.77555756e-17 5.55111512e-17]
... and the utilities are [0.72222222 0.72222222]

<matplotlib.legend.Legend at 0x11356d8b0>

A's reward:	B defects	B cooperates		B's reward:	B defects	B cooperates
A defects	-10	2		A defects	-10	-1
A cooperates	-1	1		A cooperates	2	1

CS440/ECE448¶

MP9: Adversarial Learning and Mechanism Design¶

Table of Contents¶

Nash Equilibrium¶

Stability¶

Adversarial Learning¶

No stable equilibrium: The Paparazzi game¶

Mechanism Design¶

Grading¶