Skip to content
Snippets Groups Projects
Commit 55ac0d23 authored by Cyrille Berger's avatar Cyrille Berger
Browse files

update script to include the model in the dataset

parent 392503ec
Branches
No related tags found
No related merge requests found
credentials.json
\ No newline at end of file
Text-to-scQL Dataset
====================
This can be used to generate a dataset of sentences and corresponding queries.
The default is to use the following command to generate the dataset:
```bash
./generate.py
```
It will output three lines per test data:
1) a sentence in English
2) a scQL query
3) a json representation of the expected outcome of the query
The script can also be used with the following arguments:
* `--validate` can be used to validate the queries (check for errors, for instance).
* `--localhost` to use an analyser server running locally
* `--no-model` to generate training data without a model
#!/usr/bin/python3 #!/usr/bin/python3
import itertools import itertools
import sys
import json
#
# ./generate.py [options]
# --validate indicates that you want to validate and show the model for the queries generated by the script
# --localhost indicates that you want to use a scql_analyser server located on this computer (default is to use terra8 webserver)
# --no-model indicates that you want to generate dataset without generating models
validate = '--validate' in sys.argv
localhost = '--localhost' in sys.argv
nomodel = '--no-model' in sys.argv
if localhost:
WEBSERVER_URI = 'http://localhost:8181/api/get_model'
WEBSERVER_AUTH = None
else:
credentials = json.load(open('credentials.json'))
WEBSERVER_URI = credentials['uri']
WEBSERVER_AUTH = (credentials['username'], credentials['password'])
class select_query: class select_query:
def __init__(self, variables, constraints, restrictions): def __init__(self, variables, constraints, restrictions):
...@@ -50,13 +73,23 @@ def add_query(queries, sentence_fragments, query_def): ...@@ -50,13 +73,23 @@ def add_query(queries, sentence_fragments, query_def):
for s in sentences: for s in sentences:
queries.append([s, query_def]) queries.append([s, query_def])
return queries return queries
def generate_scql(queries): def generate_scql(queries):
# Take a list of queries and generate the scQL query # Take a list of queries and generate the scQL query
for s,q in queries: for s,q in queries:
print(s) print(s)
print(q.to_scql()) print(q.to_scql())
def generate_model_scql(queries):
import requests
# Take a list of queries and generate the scQL query
for s,q in queries:
print(s)
print(q.to_scql())
payload = {'query': q.to_scql()}
r = requests.post(WEBSERVER_URI, data=payload, auth=WEBSERVER_AUTH).json()
print(r["model"]["models"])
def validate_scql(queries): def validate_scql(queries):
import requests import requests
from termcolor import colored from termcolor import colored
...@@ -64,7 +97,8 @@ def validate_scql(queries): ...@@ -64,7 +97,8 @@ def validate_scql(queries):
results = [] results = []
for s,q in queries: for s,q in queries:
payload = {'query': q.to_scql()} payload = {'query': q.to_scql()}
r = requests.post('http://localhost:8181/api/get_model', data=payload).json()
r = requests.post(WEBSERVER_URI, data=payload, auth=WEBSERVER_AUTH).json()
# print(r) # print(r)
if not r['model']['parse']: if not r['model']['parse']:
print(f"Failed to parse: '{colored(q.to_scql(), 'blue')}' with error '{colored(r['model']['message'], 'red')}'") print(f"Failed to parse: '{colored(q.to_scql(), 'blue')}' with error '{colored(r['model']['message'], 'red')}'")
...@@ -160,5 +194,10 @@ for bbd in human_sick_injured_statuses: ...@@ -160,5 +194,10 @@ for bbd in human_sick_injured_statuses:
queries = add_query(queries, [desire_perf, human_in_need_statuses], select_query([[mu("scql_types:salient_point"), "human"]], [["human.klass", "=", mu("ex:human")], ["human.status", "in", all_human_bad_statuses]], [])) queries = add_query(queries, [desire_perf, human_in_need_statuses], select_query([[mu("scql_types:salient_point"), "human"]], [["human.klass", "=", mu("ex:human")], ["human.status", "in", all_human_bad_statuses]], []))
generate_scql(queries) if validate:
# validate_scql(queries) validate_scql(queries)
elif nomodel:
generate_scql(queries)
else:
generate_model_scql(queries)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment