diff --git a/Neural graph module/ngm.ipynb b/Neural graph module/ngm.ipynb
index 529b7d2ba3c0c0b0a457fc8c293954f3c241b12e..a1576638b3ea4097ce2d5ff2925261ea74af84c4 100644
--- a/Neural graph module/ngm.ipynb	
+++ b/Neural graph module/ngm.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,29 +20,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading: 100%|██████████| 232k/232k [00:00<00:00, 636kB/s] \n",
-      "c:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\huggingface_hub\\file_download.py:123: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\maxbj\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-      "  warnings.warn(message)\n",
-      "Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 28.9kB/s]\n",
-      "Downloading: 100%|██████████| 570/570 [00:00<00:00, 572kB/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,8 +42,10 @@
     "        self.linear = nn.Linear(768, 247)\n",
     "        self.softmax = nn.Softmax(dim=1)\n",
     "    \n",
-    "    def forward(self, tokenized_seq):\n",
-    "        x = self.bert.forward(tokenized_seq)\n",
+    "    def forward(self, tokenized_seq, tokenized_mask):\n",
+    "        x = self.bert.forward(tokenized_seq, attention_mask=tokenized_mask)\n",
+    "        x = x[0][:,0,:]\n",
+    "        \n",
     "        x = self.linear(x)\n",
     "\n",
     "        x = self.softmax(x)\n",
@@ -65,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,7 +92,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -173,13 +162,16 @@
     "\n",
     "    inputs_padded = np.array([i + [0]*(inputs_max_len-len(i)) for i in inputs])\n",
     "    correct_rels_padded = np.array([i + [0]*(correct_rels_max_len-len(i)) for i in correct_rels])\n",
+    "\n",
+    "    inputs_attention_mask = np.where(inputs_padded != 0, 1, 0)\n",
+    "    correct_rels_attention_mask = np.where(correct_rels_padded != 0, 1, 0)\n",
     "    print(\"Finished with batches\")\n",
-    "    return torch.IntTensor(inputs_padded), torch.IntTensor(correct_rels_padded)\n"
+    "    return torch.IntTensor(inputs_padded), torch.IntTensor(inputs_attention_mask), torch.IntTensor(correct_rels_padded), torch.IntTensor(correct_rels_attention_mask)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -196,28 +188,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
-     "ename": "MemoryError",
-     "evalue": "",
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Beginning making batch\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 408/408 [00:00<00:00, 688.03it/s]\n",
+      "100%|██████████| 408/408 [00:00<00:00, 2241.79it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finished with batches\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/3 [03:03<?, ?it/s]\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "0D or 1D target tensor expected, multi-target not supported",
      "output_type": "error",
      "traceback": [
       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\transformers\\modeling_utils.py:399\u001b[0m, in \u001b[0;36mload_state_dict\u001b[1;34m(checkpoint_file)\u001b[0m\n\u001b[0;32m    398\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 399\u001b[0m     \u001b[39mreturn\u001b[39;00m torch\u001b[39m.\u001b[39;49mload(checkpoint_file, map_location\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mcpu\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m    400\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py:713\u001b[0m, in \u001b[0;36mload\u001b[1;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[0;32m    712\u001b[0m         \u001b[39mreturn\u001b[39;00m _load(opened_zipfile, map_location, pickle_module, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mpickle_load_args)\n\u001b[1;32m--> 713\u001b[0m \u001b[39mreturn\u001b[39;00m _legacy_load(opened_file, map_location, pickle_module, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mpickle_load_args)\n",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py:930\u001b[0m, in \u001b[0;36m_legacy_load\u001b[1;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[0;32m    929\u001b[0m unpickler\u001b[39m.\u001b[39mpersistent_load \u001b[39m=\u001b[39m persistent_load\n\u001b[1;32m--> 930\u001b[0m result \u001b[39m=\u001b[39m unpickler\u001b[39m.\u001b[39;49mload()\n\u001b[0;32m    932\u001b[0m deserialized_storage_keys \u001b[39m=\u001b[39m pickle_module\u001b[39m.\u001b[39mload(f, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mpickle_load_args)\n",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py:871\u001b[0m, in \u001b[0;36m_legacy_load.<locals>.persistent_load\u001b[1;34m(saved_id)\u001b[0m\n\u001b[0;32m    870\u001b[0m \u001b[39mif\u001b[39;00m root_key \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m deserialized_objects:\n\u001b[1;32m--> 871\u001b[0m     obj \u001b[39m=\u001b[39m cast(Storage, torch\u001b[39m.\u001b[39;49m_UntypedStorage(nbytes))\n\u001b[0;32m    872\u001b[0m     obj\u001b[39m.\u001b[39m_torch_load_uninitialized \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
-      "\u001b[1;31mRuntimeError\u001b[0m: [enforce fail at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\c10\\core\\impl\\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 93763584 bytes.",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001b[1;31mMemoryError\u001b[0m                               Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn [69], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m model \u001b[39m=\u001b[39m NgmOne()\n\u001b[0;32m      3\u001b[0m EPOCHS \u001b[39m=\u001b[39m \u001b[39m3\u001b[39m\n\u001b[0;32m      4\u001b[0m criterion \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mCrossEntropyLoss()\n",
-      "Cell \u001b[1;32mIn [64], line 5\u001b[0m, in \u001b[0;36mNgmOne.__init__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[39msuper\u001b[39m(NgmOne, \u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m()\n\u001b[0;32m      4\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtokenizer \u001b[39m=\u001b[39m BertTokenizer\u001b[39m.\u001b[39mfrom_pretrained(\u001b[39m\"\u001b[39m\u001b[39mbert-base-uncased\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 5\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbert \u001b[39m=\u001b[39m BertModel\u001b[39m.\u001b[39;49mfrom_pretrained(\u001b[39m\"\u001b[39;49m\u001b[39mbert-base-uncased\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m      6\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlinear \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mLinear(\u001b[39m768\u001b[39m, \u001b[39m247\u001b[39m)\n\u001b[0;32m      7\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39msoftmax \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mSoftmax(dim\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\transformers\\modeling_utils.py:2184\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m   2181\u001b[0m \u001b[39mif\u001b[39;00m from_pt:\n\u001b[0;32m   2182\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_sharded \u001b[39mand\u001b[39;00m state_dict \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m   2183\u001b[0m         \u001b[39m# Time to load the checkpoint\u001b[39;00m\n\u001b[1;32m-> 2184\u001b[0m         state_dict \u001b[39m=\u001b[39m load_state_dict(resolved_archive_file)\n\u001b[0;32m   2186\u001b[0m     \u001b[39m# set dtype to instantiate the model under:\u001b[39;00m\n\u001b[0;32m   2187\u001b[0m     \u001b[39m# 1. If torch_dtype is not None, we use that dtype\u001b[39;00m\n\u001b[0;32m   2188\u001b[0m     \u001b[39m# 2. If torch_dtype is \"auto\", we auto-detect dtype from the loaded state_dict, by checking its first\u001b[39;00m\n\u001b[0;32m   2189\u001b[0m     \u001b[39m#    weights entry that is of a floating type - we assume all floating dtype weights are of the same dtype\u001b[39;00m\n\u001b[0;32m   2190\u001b[0m     \u001b[39m# we also may have config.torch_dtype available, but we won't rely on it till v5\u001b[39;00m\n\u001b[0;32m   2191\u001b[0m     dtype_orig \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\transformers\\modeling_utils.py:403\u001b[0m, in \u001b[0;36mload_state_dict\u001b[1;34m(checkpoint_file)\u001b[0m\n\u001b[0;32m    401\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m    402\u001b[0m     \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(checkpoint_file) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m--> 403\u001b[0m         \u001b[39mif\u001b[39;00m f\u001b[39m.\u001b[39;49mread()\u001b[39m.\u001b[39mstartswith(\u001b[39m\"\u001b[39m\u001b[39mversion\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m    404\u001b[0m             \u001b[39mraise\u001b[39;00m \u001b[39mOSError\u001b[39;00m(\n\u001b[0;32m    405\u001b[0m                 \u001b[39m\"\u001b[39m\u001b[39mYou seem to have cloned a repository without having git-lfs installed. Please install \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    406\u001b[0m                 \u001b[39m\"\u001b[39m\u001b[39mgit-lfs and run `git lfs install` followed by `git lfs pull` in the folder \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    407\u001b[0m                 \u001b[39m\"\u001b[39m\u001b[39myou cloned.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    408\u001b[0m             )\n\u001b[0;32m    409\u001b[0m         \u001b[39melse\u001b[39;00m:\n",
-      "\u001b[1;31mMemoryError\u001b[0m: "
+      "Cell \u001b[1;32mIn [26], line 13\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[39m# Forward pass\u001b[39;00m\n\u001b[0;32m     12\u001b[0m output \u001b[39m=\u001b[39m model(train, train_mask)\n\u001b[1;32m---> 13\u001b[0m loss \u001b[39m=\u001b[39m criterion(output, corr_rels)\n\u001b[0;32m     15\u001b[0m \u001b[39mif\u001b[39;00m (epoch \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m) \u001b[39m%\u001b[39m \u001b[39m10\u001b[39m \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m     16\u001b[0m     \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mEpoch:\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m%04d\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (epoch \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m), \u001b[39m'\u001b[39m\u001b[39mcost =\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m{:.6f}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(loss))\n",
+      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\nn\\modules\\module.py:1130\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1126\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1127\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1128\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1129\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1130\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39m\u001b[39minput\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m   1131\u001b[0m \u001b[39m# Do not call functions when jit is used\u001b[39;00m\n\u001b[0;32m   1132\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[39m=\u001b[39m [], []\n",
+      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\nn\\modules\\loss.py:1164\u001b[0m, in \u001b[0;36mCrossEntropyLoss.forward\u001b[1;34m(self, input, target)\u001b[0m\n\u001b[0;32m   1163\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mforward\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39minput\u001b[39m: Tensor, target: Tensor) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tensor:\n\u001b[1;32m-> 1164\u001b[0m     \u001b[39mreturn\u001b[39;00m F\u001b[39m.\u001b[39;49mcross_entropy(\u001b[39minput\u001b[39;49m, target, weight\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mweight,\n\u001b[0;32m   1165\u001b[0m                            ignore_index\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mignore_index, reduction\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreduction,\n\u001b[0;32m   1166\u001b[0m                            label_smoothing\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlabel_smoothing)\n",
+      "File \u001b[1;32mc:\\Users\\maxbj\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\nn\\functional.py:3014\u001b[0m, in \u001b[0;36mcross_entropy\u001b[1;34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[0m\n\u001b[0;32m   3012\u001b[0m \u001b[39mif\u001b[39;00m size_average \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m reduce \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m   3013\u001b[0m     reduction \u001b[39m=\u001b[39m _Reduction\u001b[39m.\u001b[39mlegacy_get_string(size_average, reduce)\n\u001b[1;32m-> 3014\u001b[0m \u001b[39mreturn\u001b[39;00m torch\u001b[39m.\u001b[39;49m_C\u001b[39m.\u001b[39;49m_nn\u001b[39m.\u001b[39;49mcross_entropy_loss(\u001b[39minput\u001b[39;49m, target, weight, _Reduction\u001b[39m.\u001b[39;49mget_enum(reduction), ignore_index, label_smoothing)\n",
+      "\u001b[1;31mRuntimeError\u001b[0m: 0D or 1D target tensor expected, multi-target not supported"
      ]
     }
    ],
@@ -228,12 +251,12 @@
     "criterion = nn.CrossEntropyLoss()\n",
     "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
     "\n",
-    "train, corr_rels = make_batch()\n",
+    "train,train_mask, corr_rels, correct_rels_mask = make_batch()\n",
     "for epoch in tqdm(range(EPOCHS)):\n",
     "    optimizer.zero_grad()\n",
     "\n",
     "    # Forward pass\n",
-    "    output = model(train)\n",
+    "    output = model(train, train_mask)\n",
     "    loss = criterion(output, corr_rels)\n",
     "\n",
     "    if (epoch + 1) % 10 == 0:\n",