74%

5c682b55 · Shaoxuan Yin · 65a87756 · 5c682b55
Commit 5c682b55 authored 10 months ago by Shaoxuan Yin
--- a/Aug12.py
+++ b/Aug12.py
@@ -149,30 +149,31 @@ def make_vocabs(gold_data):
 import torch
 import torch.nn as nn

-class FixedWindowModel(nn.Module):
+import numpy as np

-    def __init__(self, embedding_specs, hidden_dim, output_dim):
+class FixedWindowModel(nn.Module):
+    def __init__(self, embedding_specs, hidden_dim, output_dim, pretrained_embeddings=None):
        super().__init__()
-
-        # Create the embeddings based on the given specifications
+        
        self.embeddings = nn.ModuleList()
        for n, num_embeddings, embedding_dim in embedding_specs:
-            embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
-            nn.init.normal_(embedding.weight, std=1e-2)
-            for i in range(n):
+            if pretrained_embeddings is not None and n == 3:  # Assuming word embeddings are the first in the list
+                embedding = nn.Embedding.from_pretrained(pretrained_embeddings, padding_idx=0, freeze=False)
+            else:
+                embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
+                nn.init.normal_(embedding.weight, std=1e-4)
+            for _ in range(n):
                self.embeddings.append(embedding)
-
-        # Set up the FFN
+        
        input_dim = sum(e.embedding_dim for e in self.embeddings)
-        self.pipe = nn.Sequential(
-            nn.Linear(input_dim, hidden_dim),
-            nn.ReLU(),
-            nn.Linear(hidden_dim, output_dim),
-        )
-
+        self.hidden = nn.Linear(input_dim, hidden_dim)
+        self.output = nn.Linear(hidden_dim, output_dim)
+    
    def forward(self, x):
-        embedded = [e(x[..., i]) for i, e in enumerate(self.embeddings)]
-        return self.pipe(torch.cat(embedded, -1))
+        embedded = torch.cat([e(x[..., i]) for i, e in enumerate(self.embeddings)], dim=-1)
+        hidden = F.relu(self.hidden(embedded))
+        return self.output(hidden)
+


 # ## Part 4: Part-of-speech tagger
@@ -541,10 +542,10 @@ class BeamState:

 class FixedWindowParser(ArcStandardParser):

-    def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE):
+    def __init__(self, vocab_words, vocab_tags, word_dim=50, tag_dim=10, hidden_dim=180, beam_size=BEAM_SIZE, pretrained_embeddings=None):
        embedding_specs = [(3, len(vocab_words), word_dim), (3, len(vocab_tags), tag_dim)]
        num_actions = len(ArcStandardParser.MOVES) + 1
-        self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions)
+        self.model = FixedWindowModel(embedding_specs, hidden_dim, num_actions, pretrained_embeddings)
        self.w2i = vocab_words
        self.t2i = vocab_tags
        self.beam_size = beam_size
@@ -561,48 +562,6 @@ class FixedWindowParser(ArcStandardParser):
        x[5] = tags[stack[-2]] if len(stack) >= 2 else PAD_IDX
        return x

-    # def predict(self, words, tags):
-    #     words = [self.w2i.get(w, UNK_IDX) for w in words]
-    #     tags = [self.t2i.get(t, UNK_IDX) for t in tags]
-    #     initial_config = self.initial_config(len(words))
-        
-    #     beam = [BeamState(initial_config, 0, [])]
-    #     final_states = []
-        
-    #     while beam and len(final_states) < self.beam_size:
-    #         new_beam = []
-    #         for state in beam:
-    #             if self.is_final_config(state.config):
-    #                 final_states.append(state)
-    #                 continue
-                
-    #             features = self.featurize(words, tags, state.config)
-    #             with torch.no_grad():
-    #                 logits = self.model(features.unsqueeze(0))
-    #             log_probs = F.log_softmax(logits, dim=1).squeeze(0)
-                
-    #             for move in self.valid_moves(state.config):
-    #                 new_config = self.next_config(state.config, move)
-    #                 new_score = state.score + log_probs[move].item()
-    #                 new_actions = state.actions + [move]
-    #                 new_beam.append(BeamState(new_config, new_score, new_actions))
-            
-    #         # Normalize scores by sequence length
-    #         for state in new_beam:
-    #             state.score /= len(state.actions)
-            
-    #         # Keep top-k states
-    #         beam = sorted(new_beam, key=lambda x: x.score, reverse=True)[:self.beam_size]
-        
-    #     if final_states:
-    #         best_state = max(final_states, key=lambda x: x.score)
-    #         _, _, heads = best_state.config
-    #         return heads
-    #     else:
-    #         # If no final state is found, return the best partial parse
-    #         best_state = max(beam, key=lambda x: x.score)
-    #         _, _, heads = best_state.config
-    #         return heads
    def predict(self, words, tags):
        words = [self.w2i.get(w, UNK_IDX) for w in words]
        tags = [self.t2i.get(t, UNK_IDX) for t in tags]
@@ -782,26 +741,32 @@ import torch.nn.functional as F
 import torch.optim as optim

 from tqdm import tqdm
-
-def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM_SIZE, hidden_dim=256):
-    # Create the vocabularies
+def load_pretrained_embeddings(vocab_words, embed_file, embed_dim=100):
+    embeddings = np.random.uniform(-0.25, 0.25, (len(vocab_words), embed_dim))
+    embeddings[0] = 0  # Padding
+    with open(embed_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            word, *vector = line.split()
+            if word in vocab_words:
+                embeddings[vocab_words[word]] = np.array(vector, dtype=float)
+    return torch.FloatTensor(embeddings)
+
+def train_parser(train_data, n_epochs=2, batch_size=64, lr=5e-4, beam_size=4, hidden_dim=300, embed_file='glove.6B.100d.txt'):
    vocab_words, vocab_tags = make_vocabs(train_data)
-
-    # Instantiate the parser
-    parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim)
-
-    # Instantiate the optimizer
+    pretrained_embeddings = load_pretrained_embeddings(vocab_words, embed_file)
+    
+    parser = FixedWindowParser(vocab_words, vocab_tags, beam_size=beam_size, hidden_dim=hidden_dim, pretrained_embeddings=pretrained_embeddings)
    optimizer = optim.Adam(parser.model.parameters(), lr=lr)

-    # Training loop
    for epoch in range(n_epochs):
        running_loss = 0
        n_examples = 0
+        parser.model.train()
        with tqdm(total=sum(2*len(s)-1 for s in train_data)) as pbar:
-            for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser):
+            for bx, by in training_examples(vocab_words, vocab_tags, train_data, parser, batch_size):
                optimizer.zero_grad()
-                output = parser.model.forward(bx)
-                loss = F.cross_entropy(output, by)  # Ensure correct handling of all classes, including error state
+                output = parser.model(bx)
+                loss = F.cross_entropy(output, by)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
@@ -809,6 +774,8 @@ def train_parser(train_data, n_epochs=1, batch_size=100, lr=1e-3, beam_size=BEAM
                pbar.set_postfix(loss=running_loss/n_examples)
                pbar.update(len(bx))

+        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {running_loss/n_examples:.4f}")
+
    return parser


@@ -883,10 +850,20 @@ def evaluate(tagger, parser, gold_sentences):
 # The tagging accuracy and unlabelled attachment score on the development data should be around 88% and 65%, respectively.
 # %%

-for beam_size in [1, 2, 3, 4, 5 , 6, 7, 8, 9, 10]:
+beam_sizes = [2, 3]
+results = []
+
+for beam_size in beam_sizes:
    parser = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=beam_size, hidden_dim=300)
    score = uas(parser, EN_DEV_DATA)
+    results.append((beam_size, score))
+    print(f"Beam size {beam_size}: UAS = {score:.4f}")
+
+# Print summary of results
+print("\nSummary of results:")
+for beam_size, score in results:
    print(f"Beam size {beam_size}: UAS = {score:.4f}")
-# Train the final model with the best beam size
-# PARSER = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=best_beam_size, hidden_dim=300)
-# print(f"Final UAS score: {uas(PARSER, EN_DEV_DATA):.4f}")
\ No newline at end of file
+
+# Find best beam size
+best_beam_size, best_score = max(results, key=lambda x: x[1])
+print(f"\nBest performance: Beam size {best_beam_size} with UAS = {best_score:.4f}")
\ No newline at end of file