Skip to content
Snippets Groups Projects
Commit 0b46933b authored by Max Björkander's avatar Max Björkander
Browse files

ngm still not working :)

parent 6f218bbb
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:code id: tags:
``` python
from data import eval_query as eq
from data import pred_query_responses as pqr
# Specify the correct and predicted query responses
input_path = "data/lcquad-test.json"
#pred_path = "data/predicted/lcquad-test-append-1-base-3-with-ngm-e2400.csv"
pred_path = "data/predicted/lcquad-test-append-1-base-3-with-ngm.csv"
#pred_path = "data/predicted/lcquad-test-append-1-base-3.csv"
dump_path = pred_path.replace("predicted", "pred_responses").replace(".csv", ".json")
```
%% Cell type:code id: tags:
``` python
# Runs the predicted queries against the qald-10 server, then generates a file of responses, and returns path to it
pqr.build_responsefile(dump_path, input_path, pred_path)
```
%% Output
514it [01:25, 6.04it/s]
514it [01:28, 5.82it/s]
Errors: 50
Errors: 19
%% Cell type:code id: tags:
``` python
# Evaluate expected query response, against the predicted query response
print("Evaluation againts server results")
precision_macro_query, recall_macro_query, f1_macro_query, precision_micro_query, recall_micro_query, f1_micro_query, fully_correct_query = eq.eval_query_response(input_path, dump_path)
```
%% Output
Evaluation againts server results
Begin evaluation
100%|██████████| 514/514 [00:00<00:00, 36714.45it/s]
100%|██████████| 514/514 [00:00<00:00, 46612.45it/s]
Precision macro: 0.22203438214005614
Recall macro: 0.22226681473492532
F1 macro: 0.21788767449322025
Precision micro: 0.6842240493319454
Recall micro: 0.5006580184245065
F1 micro: 0.5782216914070555
TP micro: 2663
FP micro: 1229
FN micro: 2656
Fully correct: 94
Precision macro: 0.3921867816212494
Recall macro: 0.397039836784212
F1 macro: 0.38901669678955775
Precision micro: 0.7288204296716512
Recall micro: 0.2740435909160168
F1 micro: 0.398316349096299
TP micro: 3596
FP micro: 1338
FN micro: 9526
Fully correct: 179
%% Cell type:code id: tags:
``` python
# Evaluate the expected query against the predicted query as string likness.
print("Evaluation of queries as strings")
precision_macro_string, recall_macro_string, f1_macro_string, precision_micro_string, recall_micro_string, f1_micro_string, fully_correct_string = eq.eval_query_json(input_path, dump_path)
```
%% Output
Evaluation of queries as strings
Begin evaluation
100%|██████████| 1161/1161 [00:00<00:00, 193337.31it/s]
100%|██████████| 514/514 [00:00<00:00, 128463.37it/s]
Precision macro: 0.8030609762521919
Recall macro: 0.7913620401992508
F1 macro: 0.7947304198110867
Precision micro: 0.7912972085385879
Recall micro: 0.7802153323079414
F1 micro: 0.7857171972445279
TP micro: 9638
FP micro: 2542
FN micro: 2715
Fully correct: 162
Precision macro: 0.7649602235205356
Recall macro: 0.7759334213127996
F1 macro: 0.7685126947056656
Precision micro: 0.7640531198835728
Recall micro: 0.7674036177599123
F1 micro: 0.7657247037374658
TP micro: 4200
FP micro: 1297
FN micro: 1273
Fully correct: 15
%% Cell type:code id: tags:
``` python
# Save the results to a file
res_path = dump_path.replace("pred_responses", "eval").replace(".json", ".txt")
with open(res_path, "w") as f:
f.write("String evaluation\n\n")
f.write(f"Precision macro: {precision_macro_string}\n")
f.write(f"Recall macro: {recall_macro_string}\n")
f.write(f"F1 macro: {f1_macro_string}\n")
f.write(f"Precision micro: {precision_micro_string}\n")
f.write(f"Recall micro: {recall_micro_string}\n")
f.write(f"F1 micro: {f1_micro_string}\n")
f.write(f"Fully correct: {fully_correct_string}\n\n")
f.write("Query evaluation\n\n")
f.write(f"Precision macro: {precision_macro_query}\n")
f.write(f"Recall macro: {recall_macro_query}\n")
f.write(f"F1 macro: {f1_macro_query}\n")
f.write(f"Precision micro: {precision_micro_query}\n")
f.write(f"Recall micro: {recall_micro_query}\n")
f.write(f"F1 micro: {f1_micro_query}\n")
f.write(f"Fully correct: {fully_correct_query}\n\n")
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment