diff --git a/Neural graph module/get_relations.py b/Neural graph module/get_relations.py index e8111558d31823f4d58b64ac7f3475ed3f45dcd9..86864d8fcc6077c22a2e41576352fbe630d9db5b 100644 --- a/Neural graph module/get_relations.py +++ b/Neural graph module/get_relations.py @@ -1,18 +1,47 @@ import json -#open file -with open('./data/qald-9-train-linked.json') as f: - data = json.load(f) - - #extract relations - relations = set() - for question in data["questions"]: - if "relations" in question: - for relation in question["relations"]: - relations.add(relation["URI"]) - -print("Relations: ", relations) + +prefixes = { + "res:": "http://dbpedia.org/resource/", + "dbo:": "http://dbpedia.org/ontology/", + "dbp:": "http://dbpedia.org/property/", + "rdfs:": "http://www.w3.org/2000/01/rdf-schema#", + "rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "yago:": "http://dbpedia.org/class/yago/", + "wdt:": "http://www.wikidata.org/prop/direct/", + "wd:": "http://www.wikidata.org/entity/", + "p:": "http://www.wikidata.org/prop/", + "ps:": "https://w3id.org/payswarm#", + "pq:": "http://www.wikidata.org/prop/qualifier/", + "bd:": "http://www.bigdata.com/rdf#", + "wikibase:": "http://wikiba.se/ontology#", + "skos:": "http://www.w3.org/2004/02/skos/core#", +} + +def main(): + #open file + with open('./data/qald-9-train-linked.json', "r") as f: + data = json.load(f) + + #extract relations + relations = set() + for question in data["questions"]: + if "relations" in question: + for relation in question["relations"]: + pref_relation = uri_to_prefix(relation["URI"]) + relations.add(pref_relation) + + print("Relations: ", relations) -json.dump(list(relations), open("./data/relations-qald-9-train-linked.json", "w")) + json.dump(list(relations), open("./data/prefix-relations-qald-9-train-linked.json", "w")) + +def uri_to_prefix(uri): + for prefix, uri_prefix in prefixes.items(): + if uri.startswith(uri_prefix): + return prefix + uri[len(uri_prefix):] + return uri + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data/prefix-relations-qald-9-train-linked.json b/data/prefix-relations-qald-9-train-linked.json new file mode 100644 index 0000000000000000000000000000000000000000..7845245949dcd4d6cd6fe5eef2a63a4ba0df1d82 --- /dev/null +++ b/data/prefix-relations-qald-9-train-linked.json @@ -0,0 +1,249 @@ +[ + "dbo:discovered", + "dbo:billed", + "dbo:chancellor", + "dbp:programmingLanguage", + "dbo:capital", + "dbp:grass", + "dbo:area", + "dbp:author", + "dbo:flag", + "dbp:developer", + "dbo:result", + "dbo:chain", + "dbp:elevation", + "dbp:name", + "dbo:discharge", + "dbo:areaTotal", + "dbo:region", + "dbo:species", + "dbo:division", + "dbo:artist", + "dbp:stadium", + "dbo:subsidiary", + "dbp:founded", + "dbo:sport", + "dbo:effectiveRadiatedPower", + "dbo:class", + "dbo:iso6393Code", + "dbo:missions", + "dbo:budget", + "dbo:ingredient", + "dbo:deathCause", + "dbp:designer", + "dbo:primeMinister", + "dbo:operator", + "dbo:spouse", + "dbo:numberOfVisitors", + "dbo:musicBy", + "dbo:battle", + "dbo:painter", + "dbo:material", + "dbo:channel", + "dbo:deathPlace", + "dbo:principal", + "dbo:language", + "dbo:raceHorse", + "dbo:mainspan", + "dbo:country", + "dbo:foundingYear", + "dbo:location", + "dbo:restingPlace", + "dbo:type", + "dbo:patron", + "dbo:composer", + "dbo:numberOfFilms", + "dbo:races", + "dbp:birthName", + "dbp:timezone", + "dbo:numberOfEmployees", + "dbo:influencedBy", + "dbo:doctoralAdvisor", + "dbp:programming", + "dbp:members", + "dbo:founder", + "dbo:river", + "dbo:spokenIn", + "dbo:place", + "dbo:author", + "dbp:popeElected", + "dbo:manufacturer", + "dbo:governor", + "dbo:programmingLanguage", + "dbp:firstAired", + "dbo:board", + "dbo:gross", + "dbo:totalLaunches", + "dbo:birthDate", + "dbp:location", + "dbo:colour", + "dbo:president", + "dbo:province", + "dbp:erected", + "dbo:film", + "dbo:date", + "dbo:wineProduced", + "dbo:iso6391Code", + "dbo:developer", + "dbo:animal", + "dbo:publisher", + "dbo:countryWithFirstSpaceflight", + "dbo:leader", + "dbo:careerPrizeMoney", + "dbo:state", + "dbp:founder", + "dbo:coach", + "dbp:musicals", + "dbo:university", + "dbo:countryWithFirstAstronaut", + "dbo:production", + "dbp:owner", + "dbo:crosses", + "dbp:country", + "dbo:mayor", + "dbo:party", + "dbo:service", + "dbo:influenced", + "dbo:largestCity", + "dbo:family", + "dbo:strength", + "dbo:operatedBy", + "dbo:militaryBranch", + "dbo:parkingInformation", + "dbo:wineRegion", + "dbo:leaderName", + "dbo:creator", + "dbo:award", + "dbp:area", + "dbp:children", + "dbo:highestPlace", + "dbo:musicComposer", + "dbo:starring", + "dbp:languages", + "dbo:musicalBand", + "dbp:date", + "dbo:iso6392Code", + "dbo:city", + "dbp:title", + "dbo:knownFor", + "dbo:company", + "dbo:building", + "dbo:icaoAirlineCode", + "dbo:recordLabel", + "dbo:elevation", + "dbo:federalState", + "dbo:designer", + "dbo:abstract", + "dbo:fat", + "dbp:industry", + "dbo:discipline", + "dbo:deathDate", + "dbp:directedby", + "dbo:height", + "dbo:seasonNumber", + "dbo:bandMember", + "dbo:hubAirport", + "dbo:network", + "dbo:populationTotal", + "dbo:picture", + "dbo:highestMountain", + "dbo:outflow", + "dbo:product", + "dbo:position", + "dbo:head", + "dbo:writer", + "dbo:meaning", + "dbo:parent", + "dbo:producer", + "dbo:league", + "dbo:rival", + "dbo:engineer", + "dbp:writer", + "dbo:established", + "dbp:weapons", + "dbp:gross", + "dbo:office", + "dbo:governmentType", + "dbo:fate", + "dbo:house", + "dbo:participant", + "dbo:border", + "dbo:almaMater", + "dbo:lake", + "dbo:successor", + "dbo:currency", + "dbo:presenter", + "dbo:background", + "dbo:manager", + "dbo:weight", + "dbo:numberOfIslands", + "dbo:anthem", + "dbo:runtime", + "dbo:numberOfSeasons", + "dbo:completionDate", + "dbo:birthYear", + "dbo:vicePresident", + "dbo:residence", + "dbo:timeZone", + "dbo:child", + "dbo:games", + "dbo:origin", + "dbo:headquarter", + "dbp:type", + "dbp:released", + "dbo:populationMetro", + "dbo:agency", + "dbo:portrayer", + "dbo:volume", + "dbp:instrument", + "dbo:album", + "dbo:trainer", + "dbo:birthPlace", + "dbo:incumbent", + "dbo:feastDay", + "dbo:title", + "dbo:numberBuilt", + "dbp:spouse", + "dbo:municipality", + "dbo:team", + "dbo:shipBeam", + "dbo:floorCount", + "dbp:flag", + "dbp:float", + "dbo:address", + "dbp:nickname", + "dbo:ethnicGroup", + "dbo:director", + "dbo:source", + "dbp:work", + "dbo:authority", + "dbo:profession", + "dbo:institution", + "dbo:editor", + "dbp:row", + "dbo:discoverer", + "dbo:birthName", + "dbo:televisionSeries", + "dbo:populationDensity", + "dbo:clubsRecordGoalscorer", + "dbo:person", + "dbo:officialLanguage", + "dbo:owner", + "dbo:totalPopulation", + "dbo:locatedInArea", + "dbo:champion", + "dbo:architect", + "dbp:label", + "dbo:alias", + "dbo:museum", + "dbo:government", + "dbp:books", + "dbp:known", + "dbo:episodeNumber", + "dbo:movement", + "dbo:seniority", + "dbo:guest", + "dbp:release", + "dbo:genre", + "dbo:show" +]