diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c7c52a9d66bd25f7f9a09dad6a4bc2931fb148d9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+credentials.json
\ No newline at end of file
diff --git a/README.MD b/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..ee035e61919f66ce7983a1e19296ba72b83878cd
--- /dev/null
+++ b/README.MD
@@ -0,0 +1,22 @@
+Text-to-scQL Dataset
+====================
+
+This can be used to generate a dataset of sentences and corresponding queries.
+
+The default is to use the following command to generate the dataset:
+
+```bash
+./generate.py
+```
+
+It will output three lines per test data:
+
+1) a sentence in English
+2) a scQL query
+3) a json representation of the expected outcome of the query
+
+The script can also be used with the following arguments:
+
+* `--validate` can be used to validate the queries (check for errors, for instance).
+* `--localhost` to use an analyser server running locally
+* `--no-model` to generate training data without a model
diff --git a/generate.py b/generate.py
index 958aa3cf972ddaa660bfd42d39d3a233c931d8b7..5b86cade130bc6c875addafb2d174f1555005f68 100755
--- a/generate.py
+++ b/generate.py
@@ -1,6 +1,29 @@
 #!/usr/bin/python3
 
 import itertools
+import sys
+import json
+
+#
+# ./generate.py [options]
+# --validate indicates that you want to validate and show the model for the queries generated by the script
+# --localhost indicates that you want to use a scql_analyser server located on this computer (default is to use terra8 webserver)
+# --no-model indicates that you want to generate dataset without generating models
+
+validate  = '--validate' in sys.argv
+localhost = '--localhost' in sys.argv
+nomodel   = '--no-model' in sys.argv
+
+if localhost:
+  WEBSERVER_URI = 'http://localhost:8181/api/get_model'
+  WEBSERVER_AUTH = None
+else:
+  credentials = json.load(open('credentials.json'))
+  WEBSERVER_URI = credentials['uri']
+  WEBSERVER_AUTH = (credentials['username'], credentials['password'])
+
+
+
 
 class select_query:
   def __init__(self, variables, constraints, restrictions):
@@ -50,13 +73,23 @@ def add_query(queries, sentence_fragments, query_def):
   for s in sentences:
     queries.append([s, query_def])
   return queries
-  
+
 def generate_scql(queries):
   # Take a list of queries and generate the scQL query
   for s,q in queries:
     print(s)
     print(q.to_scql())
 
+def generate_model_scql(queries):
+  import requests
+  # Take a list of queries and generate the scQL query
+  for s,q in queries:
+    print(s)
+    print(q.to_scql())
+    payload = {'query': q.to_scql()}
+    r = requests.post(WEBSERVER_URI, data=payload, auth=WEBSERVER_AUTH).json()
+    print(r["model"]["models"])
+
 def validate_scql(queries):
   import requests
   from termcolor import colored
@@ -64,7 +97,8 @@ def validate_scql(queries):
   results = []
   for s,q in queries:
     payload = {'query': q.to_scql()}
-    r = requests.post('http://localhost:8181/api/get_model', data=payload).json()
+    
+    r = requests.post(WEBSERVER_URI, data=payload, auth=WEBSERVER_AUTH).json()
     # print(r)
     if not r['model']['parse']:
       print(f"Failed to parse: '{colored(q.to_scql(), 'blue')}' with error '{colored(r['model']['message'], 'red')}'")
@@ -160,5 +194,10 @@ for bbd in human_sick_injured_statuses:
 queries = add_query(queries, [desire_perf, human_in_need_statuses], select_query([[mu("scql_types:salient_point"), "human"]], [["human.klass", "=", mu("ex:human")], ["human.status",  "in", all_human_bad_statuses]], []))
 
 
-generate_scql(queries)
-# validate_scql(queries)
+if validate:
+  validate_scql(queries)
+elif nomodel:
+  generate_scql(queries)
+else:
+  generate_model_scql(queries)
+