Skip to content
Snippets Groups Projects
Commit 8e2574c6 authored by Ludwig Forsberg's avatar Ludwig Forsberg
Browse files

Added LC-Quad dataset

parent 826a63b5
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
This diff is collapsed.
import sys
import json
from queue import Queue
from threading import Thread
from tqdm import tqdm
import time
combined = {}
with open("test-data.json", 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
pbar = tqdm(total=len(data['questions']))
for i in range(len(data['questions'])):
question = data['questions'][i]
combined[question["corrected_question"]] = question["sparql_query"]
pbar.update(1)
pbar.close()
with open("train-data.json", 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
pbar = tqdm(total=len(data['questions']))
for i in range(len(data['questions'])):
question = data['questions'][i]
combined[question["corrected_question"]] = question["sparql_query"]
pbar.update(1)
pbar.close()
print(len(combined))
out = {
"questions": []
}
i = 1
for question, query in combined.items():
out["questions"].append({
"id": str(i),
"question": [{
"language": "en",
"string": question
}],
"query":{
"sparql": query
}
})
i += 1
with open("combined.json", 'w', encoding='utf-8') as output:
json.dump(out, output)
This diff is collapsed.
This diff is collapsed.
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment