forked from knowsys/Course-Knowledge-Graphs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
11-metis-from-sparql.py
90 lines (71 loc) · 2.51 KB
/
11-metis-from-sparql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python3
import argparse
import requests
import itertools
from collections import defaultdict
from graphs import Graph, io
# getting all the results will time out, so we do paging
PAGE_SIZE = 10000
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
USER_AGENT = "tud-kbs-example-subgraph-extractor/0.0.1 (https://github.com/knowsys/Course-Knowledge-Graphs/)"
# first, in a subquery, get the v/w pairs in a stable order and
# retrieve a page of those. then find the labels for these results.
QUERY = """#TOOL:tud-kbs-example-subgraph-extractor
SELECT DISTINCT ?v ?vLabel ?w ?wLabel WHERE {{
{{ SELECT DISTINCT ?v ?w WHERE {{
?v wdt:P1344 ?tournament .
?tournament ^wdt:P2522 ?w .
}} ORDER BY ASC(?v) ASC(?w) LIMIT {limit} OFFSET {offset} }}
SERVICE wikibase:label {{
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?v rdfs:label ?vLabel .
?w rdfs:label ?wLabel .
}}
}}
"""
def counter():
c = itertools.count(1)
def step():
return next(c)
return step
def paged(page):
return QUERY.format(limit=PAGE_SIZE + 1, offset=(PAGE_SIZE + 1) * page)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Extract tournament winner/" "participant subgraph from" "Wikidata"
)
)
parser.add_argument("output", help="path to output graph")
parser.add_argument("dict", help="path to output dictionary")
args = parser.parse_args()
ids = defaultdict(counter())
labels = {}
edges = set([])
done = False
page = 0
while not done:
print("-!- getting page {}".format(page))
request = requests.get(
SPARQL_ENDPOINT,
params={"query": paged(page), "format": "json"},
headers={"user-agent": USER_AGENT},
)
request.raise_for_status()
results = request.json()["results"]["bindings"]
done = len(results) <= PAGE_SIZE
page += 1
for binding in results:
v = ids[binding["v"]["value"]]
labels[v] = binding["vLabel"]["value"]
w = ids[binding["w"]["value"]]
labels[w] = binding["wLabel"]["value"]
edges |= {(w, v)}
graph = Graph(vertices=ids.values())
for (w, v) in edges:
graph.add_edge(w, v)
with open(args.output, "w") as metis:
io.write_metis_graph(metis, graph)
with open(args.dict, "w") as dictionary:
for vertex_id, label in labels.items():
print('{},"{}"'.format(vertex_id, label), file=dictionary)