import utils
train_set = utils.load_dataset('data/brown-training.txt')
dev_set = utils.load_dataset('data/brown-test.txt')

print('training set has {} sentences'.format(len(train_set)))
print('dev set has {} sentences'.format(len(dev_set)))
print('The first sentence of training set has {} words'.format(len(train_set[0])))
print('The 10th word of the first sentence in the training set is "{}" with ground-truth tag "{}"'.format(train_set[0][9][0], train_set[0][9][1]))

training set has 35655 sentences
dev set has 9912 sentences
The first sentence of training set has 27 words
The 10th word of the first sentence in the training set is "investigation" with ground-truth tag "NOUN"

print('Here is an sample sentence from the training set:\n', train_set[0])

Here is an sample sentence from the training set:
 [('START', 'START'), ('the', 'DET'), ('fulton', 'NOUN'), ('county', 'NOUN'), ('grand', 'ADJ'), ('jury', 'NOUN'), ('said', 'VERB'), ('friday', 'NOUN'), ('an', 'DET'), ('investigation', 'NOUN'), ('of', 'IN'), ("atlanta's", 'NOUN'), ('recent', 'ADJ'), ('primary', 'NOUN'), ('election', 'NOUN'), ('produced', 'VERB'), ('``', 'PUNCT'), ('no', 'DET'), ('evidence', 'NOUN'), ("''", 'PUNCT'), ('that', 'CONJ'), ('any', 'DET'), ('irregularities', 'NOUN'), ('took', 'VERB'), ('place', 'NOUN'), ('.', 'PERIOD'), ('END', 'END')]

import submitted
import importlib
importlib.reload(submitted)
print(submitted.__doc__)

import submitted

help(submitted.compute_word_tag_counts)
help(submitted.get_most_frequent_tag)
help(submitted.predict_baseline_tag)
help(submitted.baseline)
help(submitted.collect_hmm_counts)
help(submitted.compute_transition_logprobs)
help(submitted.compute_emission_logprobs)
help(submitted.viterbi_decode)

# Tiny toy example to understand input/output formats
toy_train = [[('START','START'), ('dog','NN'), ('barks','VBZ'), ('END','END')]]
toy_test  = [['START','dog','barks','END']]

try:
    wtc, tc = submitted.compute_word_tag_counts(toy_train)
    print('Top tags:', tc.most_common(5))
    print("Tags for 'dog':", wtc.get('dog', {}))

    tag_counts, trans_counts, emit_counts, tagset, vocab = submitted.collect_hmm_counts(toy_train)
    print('tagset:', tagset)
    print('vocab:', vocab)

    trans_lp = submitted.compute_transition_logprobs(trans_counts, tagset)
    emit_lp, unk_lp = submitted.compute_emission_logprobs(emit_counts, tag_counts, vocab)
    print('Example transition logP(START->NN):', trans_lp.get(('START','NN')))
    print("Example emission logP('dog'|NN):", emit_lp.get(('NN','dog')))

    tags = submitted.viterbi_decode(toy_test[0], tagset, trans_lp, emit_lp, unk_lp)
    print('Decoded tags:', tags)
except NotImplementedError:
    print('Not implemented yet — implement helper functions in submitted.py, then re-run this cell.')

Top tags: [('START', 1), ('NN', 1), ('VBZ', 1), ('END', 1)]
Tags for 'dog': Counter({'NN': 1})
tagset: ['END', 'NN', 'START', 'VBZ']
vocab: ['END', 'START', 'barks', 'dog']
Example transition logP(START->NN): -2.9999250020973856e-05
Example emission logP('dog'|NN): -3.999880004137177e-05
Decoded tags: ['START', 'NN', 'VBZ', 'END']

analyze_ambiguous_words(train_sentences, predicted_sentences, tag_sentences, k=5)

# Example usage (after you implement analyze_ambiguous_words)
import utils, submitted

train_set = utils.load_dataset('data/brown-training.txt')
test_set  = utils.load_dataset('data/brown-test.txt')

# Run a tagger (baseline or viterbi) to get predictions
pred = submitted.baseline(utils.strip_tags(test_set), train_set)

# Get top-5 ambiguous mis-tagged words
try:
    report = submitted.analyze_ambiguous_words(train_set, pred, test_set, k=5)
    for item in report:
        print(item["word"])
        print("  train:", item["train_gold_tag_counts"])
        print("  pred :", item["test_pred_tag_counts"])
        print("  gold :", item["test_gold_tag_counts"])
except NotImplementedError:
    print("Implement analyze_ambiguous_words in submitted.py first, then re-run.")

to
  train: {'TO': 9236, 'IN': 6923, 'NOUN': 1, 'ADV': 1, 'X': 1}
  pred : {'TO': 4773}
  gold : {'IN': 1977, 'TO': 2796}
that
  train: {'CONJ': 3960, 'PRON': 1136, 'DET': 1422, 'ADV': 31, 'X': 1}
  pred : {'CONJ': 2044}
  gold : {'DET': 421, 'PRON': 358, 'CONJ': 1255, 'ADV': 10}
her
  train: {'DET': 1220, 'PRON': 632}
  pred : {'DET': 826}
  gold : {'DET': 491, 'PRON': 335}
as
  train: {'CONJ': 3766, 'IN': 77, 'ADV': 704, 'X': 1}
  pred : {'CONJ': 1334}
  gold : {'CONJ': 1121, 'ADV': 197, 'IN': 16}
more
  train: {'ADJ': 633, 'ADV': 786, 'X': 1}
  pred : {'ADV': 402}
  gold : {'ADJ': 179, 'ADV': 223}

help(submitted.baseline)

Help on function baseline in module submitted:

baseline(test, train)

import time
importlib.reload(submitted)
train_set = utils.load_dataset('data/brown-training.txt')
dev_set = utils.load_dataset('data/brown-test.txt')
start_time = time.time()
predicted = submitted.baseline(utils.strip_tags(dev_set), train_set)
time_spend = time.time() - start_time
accuracy, _, _ = utils.evaluate_accuracies(predicted, dev_set)
multi_tag_accuracy, unseen_words_accuracy, = utils.specialword_accuracies(train_set, predicted, dev_set)

print("time spent: {0:.4f} sec".format(time_spend))
print("accuracy: {0:.4f}".format(accuracy))
print("multi-tag accuracy: {0:.4f}".format(multi_tag_accuracy))
print("unseen word accuracy: {0:.4f}".format(unseen_words_accuracy))

time spent: 0.4107 sec
accuracy: 0.9387
multi-tag accuracy: 0.9019
unseen word accuracy: 0.6782

help(submitted.viterbi)

Help on function viterbi in module submitted:

viterbi(test, train)

import time
importlib.reload(submitted)
train_set = utils.load_dataset('data/brown-training.txt')
dev_set = utils.load_dataset('data/brown-test.txt')
start_time = time.time()
predicted = submitted.viterbi(utils.strip_tags(dev_set), train_set)
time_spend = time.time() - start_time
accuracy, _, _ = utils.evaluate_accuracies(predicted, dev_set)
multi_tag_accuracy, unseen_words_accuracy, = utils.specialword_accuracies(train_set, predicted, dev_set)

print("time spent: {0:.4f} sec".format(time_spend))
print("accuracy: {0:.4f}".format(accuracy))
print("multi-tag accuracy: {0:.4f}".format(multi_tag_accuracy))
print("unseen word accuracy: {0:.4f}".format(unseen_words_accuracy))

time spent: 9.2806 sec
accuracy: 0.9380
multi-tag accuracy: 0.9381
unseen word accuracy: 0.2490

!python3 grade.py

{
    "tests": [
        {
            "name": "Visible test for the assignment helper:",
            "score": 10,
            "max_score": 10,
            "status": "passed",
            "visibility": "visible"
        },
        {
            "name": "Sanity check: baseline count tables have expected types and non-empty content.",
            "score": 5,
            "max_score": 5,
            "status": "passed",
            "visibility": "visible"
        },
        {
            "name": "Sanity check HMM helpers:",
            "score": 5,
            "max_score": 5,
            "status": "passed",
            "visibility": "visible"
        }
    ],
    "leaderboard": [],
    "visibility": "visible",
    "execution_time": "1.11",
    "score": 20
}

Getting Started¶

CS440/ECE448¶

MP03: Hidden Markov Model¶

Table of Contents¶

Reading the data¶

Tagset

Taggers and Required Functions

Baseline Tagger

Debugging helpers¶

helper: `analyze_ambiguous_words` (spec + example)¶

Goal¶

Inputs¶

Output format¶

Ranking / tie-breaking (deterministic)¶

If fewer than `k` ambiguous words are mis-tagged¶

¶

Viterbi: HMM Tagger

Grade your homework¶

Getting Started¶

CS440/ECE448¶

MP03: Hidden Markov Model¶

Table of Contents¶

Reading the data¶

Tagset

Taggers and Required Functions

Baseline Tagger

Debugging helpers¶

helper: analyze_ambiguous_words (spec + example)¶

Goal¶

Inputs¶

Output format¶

Ranking / tie-breaking (deterministic)¶

If fewer than k ambiguous words are mis-tagged¶

¶

Viterbi: HMM Tagger

Grade your homework¶

helper: `analyze_ambiguous_words` (spec + example)¶

If fewer than `k` ambiguous words are mis-tagged¶