Merge branch 'main' of gitlab.liu.se:tdde19-2022-1/codebase

ec668f9b · Ludwig Forsberg · 164ffa75 · ad92d313 · ec668f9b · ec668f9b
Commit ec668f9b authored 2 years ago by Ludwig Forsberg
--- a/bart/sparql.ipynb
+++ b/bart/sparql.ipynb
--- a/data/eval/lc-quad-requeried-linked-test-eval-append-1-4-epochs.txt
+++ b/data/eval/lc-quad-requeried-linked-test-eval-append-1-4-epochs.txt
+String evaluation
+
+Precision macro: 0.7997795749087769
+Recall macro: 0.7902940991442298
+F1 macro: 0.7930972243375974
+Precision micro: 0.7881321203180067
+Recall micro: 0.7784343884076742
+F1 micro: 0.7832532377616681
+Fully correct: 168
+
+Query evaluation
+
+Precision macro: 0.3578564718337943
+Recall macro: 0.3604568928579589
+F1 macro: 0.3538064587750891
+Precision micro: 0.5226153669987389
+Recall micro: 0.5906943635540006
+F1 micro: 0.5545733466496597
+Fully correct: 373
+
--- a/data/eval/lc-quad-requeried-linked-test-eval-append-1-large-10-epochs.txt
+++ b/data/eval/lc-quad-requeried-linked-test-eval-append-1-large-10-epochs.txt
+String evaluation
+
+Precision macro: 0.7837609705826785
+Recall macro: 0.7773669243953512
+F1 macro: 0.7778599845931444
+Precision micro: 0.7833761498301152
+Recall micro: 0.765239213146604
+F1 micro: 0.7742014742014742
+Fully correct: 152
+
+Query evaluation
+
+Precision macro: 0.3293007083724157
+Recall macro: 0.3320987435926118
+F1 macro: 0.3252376722062768
+Precision micro: 0.16137158132839782
+Recall micro: 0.5483607989285475
+F1 micro: 0.24936117249462159
+Fully correct: 343
+
--- a/data/eval/lc-quad-requeried-linked-test-eval-append-1-large.txt
+++ b/data/eval/lc-quad-requeried-linked-test-eval-append-1-large.txt
+String evaluation
+
+Precision macro: 0.7919314392182623
+Recall macro: 0.7705734044881352
+F1 macro: 0.7785799816305221
+Precision micro: 0.7792661619103087
+Recall micro: 0.7581963895410022
+F1 micro: 0.7685869030034466
+Fully correct: 153
+
+Query evaluation
+
+Precision macro: 0.3124380200374491
+Recall macro: 0.3151890072254368
+F1 macro: 0.30831872223612
+Precision micro: 0.44101080834221007
+Recall micro: 0.35187659419409484
+F1 micro: 0.39143359000602723
+Fully correct: 330
+
--- a/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-4-epochs.json
+++ b/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-4-epochs.json
--- a/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-large-10-epochs.json
+++ b/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-large-10-epochs.json
--- a/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-large.json
+++ b/data/pred_responses/lc-quad-requeried-linked-test-pred_responses-append-1-large.json
--- a/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-4-epochs.csv
+++ b/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-4-epochs.csv
--- a/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv
+++ b/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv
--- a/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large.csv
+++ b/data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large.csv
--- a/evaluate.ipynb
+++ b/evaluate.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -11,7 +11,8 @@
    "\n",
    "# Specify the correct and predicted query responses\n",
    "input_path= \"data/lc-quad-requeried-linked-test.json\"\n",
-    "pred_path = \"data/predicted/lc-quad-requeried-linked-test-predicted-append-1.csv\""
+    "pred_path = \"data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv\"\n",
+    "dump_path = pred_path.replace(\"predicted\", \"pred_responses\").replace(\".csv\", \".json\")\n"
   ]
  },
  {
@@ -23,57 +24,43 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "222it [00:34,  6.11it/s]"
+      "1161it [03:22,  5.74it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Errors: 137\n"
     ]
    }
   ],
   "source": [
    "# Runs the predicted queries against the qald-10 server, then generates a file of responses, and returns path to it\n",
-    "dump_path = pred_path.replace(\"predicted\", \"pred_responses\").replace(\".csv\", \".json\")\n",
    "pqr.build_responsefile(dump_path, input_path, pred_path)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Evaluation againts server results\n",
-      "Begin evaluation\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 1161/1161 [00:00<00:00, 72567.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Precision macro:  0.3145370033483092\n",
-      "Recall macro:  0.3201454048853542\n",
-      "F1 macro:  0.3105830987501682\n",
-      "Precision micro:  0.25646965145318235\n",
-      "Recall micro:  0.3746827411167493\n",
-      "F1 micro:  0.30450570459926785\n",
-      "TP micro:  7086\n",
-      "FP micro:  20543\n",
-      "FN micro:  11826\n",
-      "Fully correct:  331\n"
+      "Evaluation againts server results\n"
     ]
    },
    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+     "ename": "NameError",
+     "evalue": "name 'dump_path' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\Albin\\workspace\\codebase\\evaluate.ipynb Cell 3\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Evaluate expected query response, against the predicted query response\u001b[39;00m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEvaluation againts server results\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query \u001b[39m=\u001b[39m eq\u001b[39m.\u001b[39meval_query_response(input_path, dump_path)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'dump_path' is not defined"
     ]
    }
   ],
@@ -86,45 +73,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Evaluation of queries as strings\n",
-      "Begin evaluation\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 1161/1161 [00:00<00:00, 197733.66it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Precision macro:  0.7909642157058177\n",
-      "Recall macro:  0.7592750870528654\n",
-      "F1 macro:  0.7718536879219824\n",
-      "Precision micro:  0.7782839787395596\n",
-      "Recall micro:  0.7467821581801991\n",
-      "F1 micro:  0.7622077170949351\n",
-      "TP micro:  9225\n",
-      "FP micro:  2628\n",
-      "FN micro:  3128\n",
-      "Fully correct:  151\n"
+      "Evaluation of queries as strings\n"
     ]
    },
    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+     "ename": "NameError",
+     "evalue": "name 'eq' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\Albin\\workspace\\codebase\\evaluate.ipynb Cell 4\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Evaluate the expected query against the predicted query as string likness.\u001b[39;00m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEvaluation of queries as strings\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string \u001b[39m=\u001b[39m eq\u001b[39m.\u001b[39meval_query_json(input_path, dump_path)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'eq' is not defined"
     ]
    }
   ],
@@ -136,7 +103,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [

 %% Cell type:code id: tags:

 ``` python
 from data import eval_query as eq
 from data import pred_query_responses as pqr

 # Specify the correct and predicted query responses
 input_path= "data/lc-quad-requeried-linked-test.json"
-pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1.csv"
+pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv"
+dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Runs the predicted queries against the qald-10 server, then generates a file of responses, and returns path to it
-dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
 pqr.build_responsefile(dump_path, input_path, pred_path)
 ```

 %% Output

-    222it [00:34,  6.11it/s]
+    1161it [03:22,  5.74it/s]
+
+    Errors: 137

 %% Cell type:code id: tags:

 ``` python

 # Evaluate expected query response, against the predicted query response
 print("Evaluation againts server results")
 precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
 ```

 %% Output

    Evaluation againts server results
-    Begin evaluation
-
-    100%|██████████| 1161/1161 [00:00<00:00, 72567.76it/s]

-    Precision macro:  0.3145370033483092
-    Recall macro:  0.3201454048853542
-    F1 macro:  0.3105830987501682
-    Precision micro:  0.25646965145318235
-    Recall micro:  0.3746827411167493
-    F1 micro:  0.30450570459926785
-    TP micro:  7086
-    FP micro:  20543
-    FN micro:  11826
-    Fully correct:  331
-
-    
+    ---------------------------------------------------------------------------
+    NameError                                 Traceback (most recent call last)
+    c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 3 in <cell line: 3>()
+          <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate expected query response, against the predicted query response
+          <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation againts server results")
+    ----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
+    NameError: name 'dump_path' is not defined

 %% Cell type:code id: tags:

 ``` python
 # Evaluate the expected query against the predicted query as string likness.
 print("Evaluation of queries as strings")
 precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
 ```

 %% Output

    Evaluation of queries as strings
-    Begin evaluation
-
-    100%|██████████| 1161/1161 [00:00<00:00, 197733.66it/s]
-
-    Precision macro:  0.7909642157058177
-    Recall macro:  0.7592750870528654
-    F1 macro:  0.7718536879219824
-    Precision micro:  0.7782839787395596
-    Recall micro:  0.7467821581801991
-    F1 micro:  0.7622077170949351
-    TP micro:  9225
-    FP micro:  2628
-    FN micro:  3128
-    Fully correct:  151

-    
+    ---------------------------------------------------------------------------
+    NameError                                 Traceback (most recent call last)
+    c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 4 in <cell line: 3>()
+          <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate the expected query against the predicted query as string likness.
+          <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation of queries as strings")
+    ----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
+    NameError: name 'eq' is not defined

 %% Cell type:code id: tags:

 ``` python
 # Save the results to a file
 res_path = dump_path.replace("pred_responses", "eval").replace(".json", ".txt")
 with open(res_path, "w") as f:
    f.write("String evaluation\n\n")
    f.write(f"Precision macro: {precision_macro_string}\n")
    f.write(f"Recall macro: {recall_macro_string}\n")
    f.write(f"F1 macro: {f1_macro_string}\n")
    f.write(f"Precision micro: {precision_micro_string}\n")
    f.write(f"Recall micro: {recall_micro_string}\n")
    f.write(f"F1 micro: {f1_micro_string}\n")
    f.write(f"Fully correct: {fully_correct_string}\n\n")
    f.write("Query evaluation\n\n")
    f.write(f"Precision macro: {precision_macro_query}\n")
    f.write(f"Recall macro: {recall_macro_query}\n")
    f.write(f"F1 macro: {f1_macro_query}\n")
    f.write(f"Precision micro: {precision_micro_query}\n")
    f.write(f"Recall micro: {recall_micro_query}\n")
    f.write(f"F1 micro: {f1_micro_query}\n")
    f.write(f"Fully correct: {fully_correct_query}\n\n")

 ```