Skip to content
Snippets Groups Projects
Commit 5c682b55 authored by Shaoxuan Yin's avatar Shaoxuan Yin :bicyclist:
Browse files

74%

parent 65a87756
No related branches found
No related tags found
No related merge requests found
......@@ -149,30 +149,31 @@ def make_vocabs(gold_data):
import torch
import torch.nn as nn
class FixedWindowModel(nn.Module):
import numpy as np
def __init__(self, embedding_specs, hidden_dim, output_dim):
class FixedWindowModel(nn.Module):
def __init__(self, embedding_specs, hidden_dim, output_dim, pretrained_embeddings=None):
super().__init__()
# Create the embeddings based on the given specifications
self.embeddings = nn.ModuleList()
for n, num_embeddings, embedding_dim in embedding_specs:
embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
nn.init.normal_(embedding.weight, std=1e-2)
for i in range(n):
if pretrained_embeddings is not None and n == 3: # Assuming word embeddings are the first in the list
embedding = nn.Embedding.from_pretrained(pretrained_embeddings, padding_idx=0, freeze=False)
else:
embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
nn.init.normal_(embedding.weight, std=1e-4)
for _ in range(n):
self.embeddings.append(embedding)
# Set up the FFN
input_dim = sum(e.embedding_dim for e in self.embeddings)
self.pipe = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim),
)
self.hidden = nn.Linear(input_dim, hidden_dim)
self.output = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
embedded = [e(x[..., i]) for i, e in enumerate(self.embeddings)]
return self.pipe(torch.cat(embedded, -1))
embedded = torch.cat([e(x[..., i]) for i, e in enumerate(self.embeddings)], dim=-1)
hidden = F.relu(self.hidden(embedded))
return self.output(hidden)
# ## Part 4: Part-of-speech tagger
......@@ -541,10 +542,10 @@ class BeamState:
class FixedWindowParser(ArcStandardParser):
def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE):
def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE, pretrained_embeddings=None):
embedding_specs = [(3, len(vocab_words), word_dim), (3, len(vocab_tags), tag_dim)]
num_actions = len(ArcStandardParser.MOVES) + 1
self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions)
self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions, pretrained_embeddings)
self.w2i = vocab_words
self.t2i = vocab_tags
self.beam_size = beam_size
......@@ -561,48 +562,6 @@ class FixedWindowParser(ArcStandardParser):
x[5] = tags[stack[-2]] if len(stack) >= 2 else PAD_IDX
return x
# def predict(self, words, tags):
# words = [self.w2i.get(w, UNK_IDX) for w in words]
# tags = [self.t2i.get(t, UNK_IDX) for t in tags]
# initial_config = self.initial_config(len(words))
# beam = [BeamState(initial_config, 0, [])]
# final_states = []
# while beam and len(final_states) < self.beam_size:
# new_beam = []
# for state in beam:
# if self.is_final_config(state.config):
# final_states.append(state)
# continue
# features = self.featurize(words, tags, state.config)
# with torch.no_grad():
# logits = self.model(features.unsqueeze(0))
# log_probs = F.log_softmax(logits, dim=1).squeeze(0)
# for move in self.valid_moves(state.config):
# new_config = self.next_config(state.config, move)
# new_score = state.score + log_probs[move].item()
# new_actions = state.actions + [move]
# new_beam.append(BeamState(new_config, new_score, new_actions))
# # Normalize scores by sequence length
# for state in new_beam:
# state.score /= len(state.actions)
# # Keep top-k states
# beam = sorted(new_beam, key=lambda x: x.score, reverse=True)[:self.beam_size]
# if final_states:
# best_state = max(final_states, key=lambda x: x.score)
# _, _, heads = best_state.config
# return heads
# else:
# # If no final state is found, return the best partial parse
# best_state = max(beam, key=lambda x: x.score)
# _, _, heads = best_state.config
# return heads
def predict(self, words, tags):
words = [self.w2i.get(w, UNK_IDX) for w in words]
tags = [self.t2i.get(t, UNK_IDX) for t in tags]
......@@ -782,26 +741,32 @@ import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM_SIZE, hidden_dim=256):
# Create the vocabularies
def load_pretrained_embeddings(vocab_words, embed_file, embed_dim=100):
embeddings = np.random.uniform(-0.25, 0.25, (len(vocab_words), embed_dim))
embeddings[0] = 0 # Padding
with open(embed_file, 'r', encoding='utf-8') as f:
for line in f:
word, *vector = line.split()
if word in vocab_words:
embeddings[vocab_words[word]] = np.array(vector, dtype=float)
return torch.FloatTensor(embeddings)
def train_parser(train_data, n_epochs=2, batch_size=64, lr=5e-4, beam_size=4, hidden_dim=300, embed_file='glove.6B.100d.txt'):
vocab_words, vocab_tags = make_vocabs(train_data)
# Instantiate the parser
parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim)
# Instantiate the optimizer
pretrained_embeddings = load_pretrained_embeddings(vocab_words, embed_file)
parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim, pretrained_embeddings=pretrained_embeddings)
optimizer = optim.Adam(parser.model.parameters(), lr=lr)
# Training loop
for epoch in range(n_epochs):
running_loss = 0
n_examples = 0
parser.model.train()
with tqdm(total=sum(2*len(s)-1 for s in train_data)) as pbar:
for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser):
for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser, batch_size):
optimizer.zero_grad()
output = parser.model.forward(bx)
loss = F.cross_entropy(output, by) # Ensure correct handling of all classes, including error state
output = parser.model(bx)
loss = F.cross_entropy(output, by)
loss.backward()
optimizer.step()
running_loss += loss.item()
......@@ -809,6 +774,8 @@ def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM
pbar.set_postfix(loss=running_loss/n_examples)
pbar.update(len(bx))
print(f"Epoch {epoch+1}/{n_epochs}, Loss: {running_loss/n_examples:.4f}")
return parser
......@@ -883,10 +850,20 @@ def evaluate(tagger, parser, gold_sentences):
# The tagging accuracy and unlabelled attachment score on the development data should be around 88% and 65%, respectively.
# %%
for beam_size in [1, 2, 3, 4, 5 , 6, 7, 8, 9, 10]:
beam_sizes = [2, 3]
results = []
for beam_size in beam_sizes:
parser = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=beam_size, hidden_dim=300)
score = uas(parser, EN_DEV_DATA)
results.append((beam_size, score))
print(f"Beam size {beam_size}: UAS = {score:.4f}")
# Print summary of results
print("\nSummary of results:")
for beam_size, score in results:
print(f"Beam size {beam_size}: UAS = {score:.4f}")
# Train the final model with the best beam size
# PARSER = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=best_beam_size, hidden_dim=300)
# print(f"Final UAS score: {uas(PARSER, EN_DEV_DATA):.4f}")
\ No newline at end of file
# Find best beam size
best_beam_size, best_score = max(results, key=lambda x: x[1])
print(f"\nBest performance: Beam size {best_beam_size} with UAS = {best_score:.4f}")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment