Skip to content
Snippets Groups Projects
Commit ad92d313 authored by Albin Henriksson's avatar Albin Henriksson
Browse files

Add large-10-epochs and helper to make epochs part of predicted file's name

parent 706542df
Branches
No related tags found
No related merge requests found
This diff is collapsed.
String evaluation
Precision macro: 0.7837609705826785
Recall macro: 0.7773669243953512
F1 macro: 0.7778599845931444
Precision micro: 0.7833761498301152
Recall micro: 0.765239213146604
F1 micro: 0.7742014742014742
Fully correct: 152
Query evaluation
Precision macro: 0.3293007083724157
Recall macro: 0.3320987435926118
F1 macro: 0.3252376722062768
Precision micro: 0.16137158132839782
Recall micro: 0.5483607989285475
F1 micro: 0.24936117249462159
Fully correct: 343
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:code id: tags:
``` python
from data import eval_query as eq
from data import pred_query_responses as pqr
# Specify the correct and predicted query responses
input_path= "data/lc-quad-requeried-linked-test.json"
pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1-4-epochs.csv"
pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv"
dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
```
%% Cell type:code id: tags:
``` python
# Runs the predicted queries against the qald-10 server, then generates a file of responses, and returns path to it
dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
pqr.build_responsefile(dump_path, input_path, pred_path)
```
%% Output
1161it [03:05, 6.27it/s]
1161it [03:22, 5.74it/s]
Errors: 38
Errors: 137
%% Cell type:code id: tags:
``` python
# Evaluate expected query response, against the predicted query response
print("Evaluation againts server results")
precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
```
%% Output
Evaluation againts server results
Begin evaluation
100%|██████████| 1161/1161 [00:00<00:00, 52773.69it/s]
Precision macro: 0.3578564718337943
Recall macro: 0.3604568928579589
F1 macro: 0.3538064587750891
Precision micro: 0.5226153669987389
Recall micro: 0.5906943635540006
F1 micro: 0.5545733466496597
TP micro: 18256
FP micro: 16676
FN micro: 12650
Fully correct: 373
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 3 in <cell line: 3>()
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate expected query response, against the predicted query response
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation againts server results")
----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
NameError: name 'dump_path' is not defined
%% Cell type:code id: tags:
``` python
# Evaluate the expected query against the predicted query as string likness.
print("Evaluation of queries as strings")
precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
```
%% Output
Evaluation of queries as strings
Begin evaluation
100%|██████████| 1161/1161 [00:00<00:00, 193298.94it/s]
Precision macro: 0.7997795749087769
Recall macro: 0.7902940991442298
F1 macro: 0.7930972243375974
Precision micro: 0.7881321203180067
Recall micro: 0.7784343884076742
F1 micro: 0.7832532377616681
TP micro: 9616
FP micro: 2585
FN micro: 2737
Fully correct: 168
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 4 in <cell line: 3>()
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate the expected query against the predicted query as string likness.
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation of queries as strings")
----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
NameError: name 'eq' is not defined
%% Cell type:code id: tags:
``` python
# Save the results to a file
res_path = dump_path.replace("pred_responses", "eval").replace(".json", ".txt")
with open(res_path, "w") as f:
f.write("String evaluation\n\n")
f.write(f"Precision macro: {precision_macro_string}\n")
f.write(f"Recall macro: {recall_macro_string}\n")
f.write(f"F1 macro: {f1_macro_string}\n")
f.write(f"Precision micro: {precision_micro_string}\n")
f.write(f"Recall micro: {recall_micro_string}\n")
f.write(f"F1 micro: {f1_micro_string}\n")
f.write(f"Fully correct: {fully_correct_string}\n\n")
f.write("Query evaluation\n\n")
f.write(f"Precision macro: {precision_macro_query}\n")
f.write(f"Recall macro: {recall_macro_query}\n")
f.write(f"F1 macro: {f1_macro_query}\n")
f.write(f"Precision micro: {precision_micro_query}\n")
f.write(f"Recall micro: {recall_micro_query}\n")
f.write(f"F1 micro: {f1_micro_query}\n")
f.write(f"Fully correct: {fully_correct_query}\n\n")
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment