### Run this cell once at the start to auto-import your functions in submitted.py! 
### This allows you to update your functions automatically without needing to restart the notebook kernel. 
%load_ext autoreload
%autoreload 1
%aimport submitted

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

### If your python version does not support autoreload, then run the following to manually reload your functions after updating.
import importlib
importlib.reload(submitted)

help(submitted.initialize)

Help on function initialize in module submitted:

initialize(data, dim)
    Initialize embeddings for all distinct words in the input data.
    Most of the dimensions will be zero-mean unit-variance Gaussian random variables.
    In order to make debugging easier, however, we will assign special geometric values
    to the first two dimensions of the embedding:

    (1) Find out how many distinct words there are.
    (2) Choose that many locations uniformly spaced on a unit circle in the first two dimensions.
    (3) Put the words into those spots in the same order that they occur in the data.

    Thus if data[0] and data[1] are different words, you should have

    embedding[data[0]] = np.array([np.cos(0), np.sin(0), random, random, random, ...])
    embedding[data[1]] = np.array([np.cos(2*np.pi/N), np.sin(2*np.pi/N), random, random, random, ...])

    ... and so on, where N is the number of distinct words, and each random element is
    a Gaussian random variable with mean=0 and standard deviation=1.

    @param:
    data (list) - list of words in the input text, split on whitespace
    dim (int) - dimension of the learned embeddings

    @return:
    embedding - dict mapping from words (strings) to numpy arrays of dimension=dim.

data='''
# # #
a b c d a b c d a b c d a b c d
# # #
e f g h e f g h e f g h e f g h
# # #
'''.split()
embedding = submitted.initialize(data, 4)

import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(8,4))
ax0 = fig.add_subplot(1,2,1)
ax1 = fig.add_subplot(1,2,2,projection='3d')
ax1.set_zlim(-2,2)

# Plot the axes as dashed lines
theta = np.linspace(0,2*np.pi,1000)
ax0.plot([-2,2],[0,0],'k--',[0,0],[-2,2],'k--',np.cos(theta),np.sin(theta),'k--')
ax1.plot([-2,2],[0,0],[0,0],'k--')
ax1.plot([0,0],[-2,2],[0,0],'k--')
ax1.plot([0,0],[0,0],[-2,2],'k--')
ax1.plot(np.cos(theta),np.sin(theta),np.zeros(1000),'k--')

# Plot the vector value of each embedding
for w,v in embedding.items():
    ax0.plot(v[0],v[1], '.')
    ax0.text(v[0],v[1],w,None)
    ax1.plot(v[0],v[1],v[2], '.')
    ax1.text(v[0],v[1],v[2],w,None)

help(submitted.gradient)

Help on function gradient in module submitted:

gradient(embedding, data, t, d, k)
    Calculate gradient of the skipgram NCE loss with respect to the embedding of data[t]

    @param:
    embedding - dict mapping from words (strings) to numpy arrays.
    data (list) - list of words in the input text, split on whitespace
    t (int) - data index of word with respect to which you want the gradient
    d (int) - choose context words from t-d through t+d, not including t
    k (int) - compare each context word to k words chosen uniformly at random from the data

    @return:
    g (numpy array) - loss gradients with respect to embedding of data[t]

g = [np.zeros(2)]*len(data)
for t in [3,4,5,6,22,23,24,25]:
    print('Computing gradient of',data[t])
    g[t] = submitted.gradient(embedding, data, t, 4, 15)

Computing gradient of a
Computing gradient of b
Computing gradient of c
Computing gradient of d
Computing gradient of e
Computing gradient of f
Computing gradient of g
Computing gradient of h

fig = plt.figure(figsize=(4,4))
ax0 = fig.add_subplot(1,1,1)

# Plot the axes as dashed lines
theta = np.linspace(0,2*np.pi,1000)
ax0.plot([-2,2],[0,0],'k--',[0,0],[-2,2],'k--',np.cos(theta),np.sin(theta),'k--')

# Plot the vector value of each embedding
for w,v in embedding.items():
    ax0.plot(v[0],v[1], '.')
    ax0.text(v[0],v[1],w,None)
    
# Plot the negative gradients as arrows pointing away from their current embeddings
lr = 0.2
for t in [3,4,5,6,22,23,24,25]:
    v = embedding[data[t]]
    ax0.arrow(v[0],v[1],-lr*g[t][0],-lr*g[t][1],head_width=0.1)

help(submitted.sgd)

Help on function sgd in module submitted:

sgd(embedding, data, learning_rate, num_iters, d, k)
    Perform num_iters steps of stochastic gradient descent.

    @param:
    embedding - dict mapping from words (strings) to numpy arrays.
    data (list) - list of words in the input text, split on whitespace
    learning_rate (scalar) - scale the negative gradient by this amount at each step
    num_iters (int) - the number of iterations to perform
    d (int) - context width hyperparameter for gradient computation
    k (int) - noise sample size hyperparameter for gradient computation

    @return:
    embedding - the updated embeddings

embedding = submitted.sgd(embedding, data, 0.01, 2000, 4, 15)

import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(8,4))
ax0 = fig.add_subplot(1,2,1)
ax1 = fig.add_subplot(1,2,2,projection='3d')

# Plot the axes as dashed lines
theta = np.linspace(0,2*np.pi,1000)
ax0.plot([-2,2],[0,0],'k--',[0,0],[-2,2],'k--',np.cos(theta),np.sin(theta),'k--')
ax1.plot([-2,2],[0,0],[0,0],'k--')
ax1.plot([0,0],[-2,2],[0,0],'k--')
ax1.plot([0,0],[0,0],[-2,2],'k--')
ax1.plot(np.cos(theta),np.sin(theta),np.zeros(1000),'k--')

# Plot the vector value of each embedding
for w,v in embedding.items():
    ax0.plot(v[0],v[1], '.')
    ax0.text(v[0],v[1],w,None)
    ax1.plot(v[0],v[1],v[2], '.')
    ax1.text(v[0],v[1],v[2],w,None)

!python grade.py

........
----------------------------------------------------------------------
Ran 8 tests in 5.050s

OK

ECE448 Spring 2026¶

MP4: Lexical Embedding (word2vec)¶

Table of Contents¶

1. Initialize the Embeddings¶

2. Gradients of the Skipgram NCE Loss¶

3. Stochastic Gradient Descent¶

4. Grade Your Homework¶