Skip to content
Snippets Groups Projects
Commit f147f318 authored by Albin Henriksson's avatar Albin Henriksson
Browse files

Add large 3 and 4 evaluations

parent 23a6821e
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
No preview for this file type
String evaluation
Precision macro: 0.8030609762521919
Recall macro: 0.7913620401992508
F1 macro: 0.7947304198110867
Precision micro: 0.7912972085385879
Recall micro: 0.7802153323079414
F1 micro: 0.7857171972445279
Fully correct: 162
Query evaluation
Precision macro: 0.34168246759219223
Recall macro: 0.34538766263173165
F1 macro: 0.33773159161379057
Precision micro: 0.2601919788161299
Recall micro: 0.34827395237216047
F1 micro: 0.29785756006408937
Fully correct: 358
String evaluation
Precision macro: 0.7982044656463279
Recall macro: 0.7850570068270347
F1 macro: 0.7890307366150056
Precision micro: 0.787020260253665
Recall micro: 0.7735772686796729
F1 micro: 0.7802408654827515
Fully correct: 152
Query evaluation
Precision macro: 0.3288445236328654
Recall macro: 0.33148790322402794
F1 macro: 0.3254230404437138
Precision micro: 0.4538291509556613
Recall micro: 0.6749951653451918
F1 micro: 0.5427460736574067
Fully correct: 345
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:code id: tags:
``` python
from data import eval_query as eq
from data import pred_query_responses as pqr
# Specify the correct and predicted query responses
input_path= "data/lc-quad-requeried-linked-test.json"
pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-10-epochs.csv"
pred_path = "data/predicted/lc-quad-requeried-linked-test-predicted-append-1-large-4-epochs.csv"
dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
```
%% Cell type:code id: tags:
``` python
# Runs the predicted queries against the qald-10 server, then generates a file of responses, and returns path to it
pqr.build_responsefile(dump_path, input_path, pred_path)
```
%% Output
1161it [03:22, 5.74it/s]
1161it [03:03, 6.33it/s]
Errors: 137
Errors: 15
%% Cell type:code id: tags:
``` python
# Evaluate expected query response, against the predicted query response
print("Evaluation againts server results")
precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
```
%% Output
Evaluation againts server results
Begin evaluation
100%|██████████| 1161/1161 [00:00<00:00, 67736.64it/s]
Precision macro: 0.34168246759219223
Recall macro: 0.34538766263173165
F1 macro: 0.33773159161379057
Precision micro: 0.2601919788161299
Recall micro: 0.34827395237216047
F1 micro: 0.29785756006408937
TP micro: 9433
FP micro: 26821
FN micro: 17652
Fully correct: 358
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 3 in <cell line: 3>()
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate expected query response, against the predicted query response
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation againts server results")
----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W2sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
NameError: name 'dump_path' is not defined
%% Cell type:code id: tags:
``` python
# Evaluate the expected query against the predicted query as string likness.
print("Evaluation of queries as strings")
precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
```
%% Output
Evaluation of queries as strings
Begin evaluation
100%|██████████| 1161/1161 [00:00<00:00, 193337.31it/s]
Precision macro: 0.8030609762521919
Recall macro: 0.7913620401992508
F1 macro: 0.7947304198110867
Precision micro: 0.7912972085385879
Recall micro: 0.7802153323079414
F1 micro: 0.7857171972445279
TP micro: 9638
FP micro: 2542
FN micro: 2715
Fully correct: 162
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
c:\Users\Albin\workspace\codebase\evaluate.ipynb Cell 4 in <cell line: 3>()
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=0'>1</a> # Evaluate the expected query against the predicted query as string likness.
<a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=1'>2</a> print("Evaluation of queries as strings")
----> <a href='vscode-notebook-cell:/c%3A/Users/Albin/workspace/codebase/evaluate.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a> precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
NameError: name 'eq' is not defined
%% Cell type:code id: tags:
``` python
# Save the results to a file
res_path = dump_path.replace("pred_responses", "eval").replace(".json", ".txt")
with open(res_path, "w") as f:
f.write("String evaluation\n\n")
f.write(f"Precision macro: {precision_macro_string}\n")
f.write(f"Recall macro: {recall_macro_string}\n")
f.write(f"F1 macro: {f1_macro_string}\n")
f.write(f"Precision micro: {precision_micro_string}\n")
f.write(f"Recall micro: {recall_micro_string}\n")
f.write(f"F1 micro: {f1_micro_string}\n")
f.write(f"Fully correct: {fully_correct_string}\n\n")
f.write("Query evaluation\n\n")
f.write(f"Precision macro: {precision_macro_query}\n")
f.write(f"Recall macro: {recall_macro_query}\n")
f.write(f"F1 macro: {f1_macro_query}\n")
f.write(f"Precision micro: {precision_micro_query}\n")
f.write(f"Recall micro: {recall_micro_query}\n")
f.write(f"F1 micro: {f1_micro_query}\n")
f.write(f"Fully correct: {fully_correct_query}\n\n")
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment