diff --git a/Aug12.py b/Aug12.py index 7fee837b3e863fc4710dc1ac7c28c2b914cd5375..d84b0adeceb21b1b075b482d8f2c7545470eff32 100644 --- a/Aug12.py +++ b/Aug12.py @@ -149,30 +149,31 @@ def make_vocabs(gold_data): import torch import torch.nn as nn -class FixedWindowModel(nn.Module): +import numpy as np - def __init__(self, embedding_specs, hidden_dim, output_dim): +class FixedWindowModel(nn.Module): + def __init__(self, embedding_specs, hidden_dim, output_dim, pretrained_embeddings=None): super().__init__() - - # Create the embeddings based on the given specifications + self.embeddings = nn.ModuleList() for n, num_embeddings, embedding_dim in embedding_specs: - embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0) - nn.init.normal_(embedding.weight, std=1e-2) - for i in range(n): + if pretrained_embeddings is not None and n == 3: # Assuming word embeddings are the first in the list + embedding = nn.Embedding.from_pretrained(pretrained_embeddings, padding_idx=0, freeze=False) + else: + embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0) + nn.init.normal_(embedding.weight, std=1e-4) + for _ in range(n): self.embeddings.append(embedding) - - # Set up the FFN + input_dim = sum(e.embedding_dim for e in self.embeddings) - self.pipe = nn.Sequential( - nn.Linear(input_dim, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, output_dim), - ) - + self.hidden = nn.Linear(input_dim, hidden_dim) + self.output = nn.Linear(hidden_dim, output_dim) + def forward(self, x): - embedded = [e(x[..., i]) for i, e in enumerate(self.embeddings)] - return self.pipe(torch.cat(embedded, -1)) + embedded = torch.cat([e(x[..., i]) for i, e in enumerate(self.embeddings)], dim=-1) + hidden = F.relu(self.hidden(embedded)) + return self.output(hidden) + # ## Part 4: Part-of-speech tagger @@ -541,10 +542,10 @@ class BeamState: class FixedWindowParser(ArcStandardParser): - def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE): + def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE, pretrained_embeddings=None): embedding_specs = [(3, len(vocab_words), word_dim), (3, len(vocab_tags), tag_dim)] num_actions = len(ArcStandardParser.MOVES) + 1 - self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions) + self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions, pretrained_embeddings) self.w2i = vocab_words self.t2i = vocab_tags self.beam_size = beam_size @@ -561,48 +562,6 @@ class FixedWindowParser(ArcStandardParser): x[5] = tags[stack[-2]] if len(stack) >= 2 else PAD_IDX return x - # def predict(self, words, tags): - # words = [self.w2i.get(w, UNK_IDX) for w in words] - # tags = [self.t2i.get(t, UNK_IDX) for t in tags] - # initial_config = self.initial_config(len(words)) - - # beam = [BeamState(initial_config, 0, [])] - # final_states = [] - - # while beam and len(final_states) < self.beam_size: - # new_beam = [] - # for state in beam: - # if self.is_final_config(state.config): - # final_states.append(state) - # continue - - # features = self.featurize(words, tags, state.config) - # with torch.no_grad(): - # logits = self.model(features.unsqueeze(0)) - # log_probs = F.log_softmax(logits, dim=1).squeeze(0) - - # for move in self.valid_moves(state.config): - # new_config = self.next_config(state.config, move) - # new_score = state.score + log_probs[move].item() - # new_actions = state.actions + [move] - # new_beam.append(BeamState(new_config, new_score, new_actions)) - - # # Normalize scores by sequence length - # for state in new_beam: - # state.score /= len(state.actions) - - # # Keep top-k states - # beam = sorted(new_beam, key=lambda x: x.score, reverse=True)[:self.beam_size] - - # if final_states: - # best_state = max(final_states, key=lambda x: x.score) - # _, _, heads = best_state.config - # return heads - # else: - # # If no final state is found, return the best partial parse - # best_state = max(beam, key=lambda x: x.score) - # _, _, heads = best_state.config - # return heads def predict(self, words, tags): words = [self.w2i.get(w, UNK_IDX) for w in words] tags = [self.t2i.get(t, UNK_IDX) for t in tags] @@ -782,26 +741,32 @@ import torch.nn.functional as F import torch.optim as optim from tqdm import tqdm - -def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM_SIZE, hidden_dim=256): - # Create the vocabularies +def load_pretrained_embeddings(vocab_words, embed_file, embed_dim=100): + embeddings = np.random.uniform(-0.25, 0.25, (len(vocab_words), embed_dim)) + embeddings[0] = 0 # Padding + with open(embed_file, 'r', encoding='utf-8') as f: + for line in f: + word, *vector = line.split() + if word in vocab_words: + embeddings[vocab_words[word]] = np.array(vector, dtype=float) + return torch.FloatTensor(embeddings) + +def train_parser(train_data, n_epochs=2, batch_size=64, lr=5e-4, beam_size=4, hidden_dim=300, embed_file='glove.6B.100d.txt'): vocab_words, vocab_tags = make_vocabs(train_data) - - # Instantiate the parser - parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim) - - # Instantiate the optimizer + pretrained_embeddings = load_pretrained_embeddings(vocab_words, embed_file) + + parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim, pretrained_embeddings=pretrained_embeddings) optimizer = optim.Adam(parser.model.parameters(), lr=lr) - # Training loop for epoch in range(n_epochs): running_loss = 0 n_examples = 0 + parser.model.train() with tqdm(total=sum(2*len(s)-1 for s in train_data)) as pbar: - for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser): + for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser, batch_size): optimizer.zero_grad() - output = parser.model.forward(bx) - loss = F.cross_entropy(output, by) # Ensure correct handling of all classes, including error state + output = parser.model(bx) + loss = F.cross_entropy(output, by) loss.backward() optimizer.step() running_loss += loss.item() @@ -809,6 +774,8 @@ def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM pbar.set_postfix(loss=running_loss/n_examples) pbar.update(len(bx)) + print(f"Epoch {epoch+1}/{n_epochs}, Loss: {running_loss/n_examples:.4f}") + return parser @@ -883,10 +850,20 @@ def evaluate(tagger, parser, gold_sentences): # The tagging accuracy and unlabelled attachment score on the development data should be around 88% and 65%, respectively. # %% -for beam_size in [1, 2, 3, 4, 5 , 6, 7, 8, 9, 10]: +beam_sizes = [2, 3] +results = [] + +for beam_size in beam_sizes: parser = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=beam_size, hidden_dim=300) score = uas(parser, EN_DEV_DATA) + results.append((beam_size, score)) + print(f"Beam size {beam_size}: UAS = {score:.4f}") + +# Print summary of results +print("\nSummary of results:") +for beam_size, score in results: print(f"Beam size {beam_size}: UAS = {score:.4f}") -# Train the final model with the best beam size -# PARSER = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=best_beam_size, hidden_dim=300) -# print(f"Final UAS score: {uas(PARSER, EN_DEV_DATA):.4f}") \ No newline at end of file + +# Find best beam size +best_beam_size, best_score = max(results, key=lambda x: x[1]) +print(f"\nBest performance: Beam size {best_beam_size} with UAS = {best_score:.4f}") \ No newline at end of file