-
Notifications
You must be signed in to change notification settings - Fork 2
/
02MAKG_paper_titels.py
48 lines (38 loc) · 1.6 KB
/
02MAKG_paper_titels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#Queries the titles of publications from the MAKG via the public SPARQL endpoint.
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
df = pd.read_csv("/DSKG_Author_Profiles.csv")
df["paper_title"] = ""
def sparql_PaperTitle(paper_uri):
sparql = SPARQLWrapper("http://ma-graph.org/sparql")
uri = "<{0}>".format(paper_uri)
sparql.setQuery("\n"
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"
"PREFIX magc: <http://ma-graph.org/class/>"
"PREFIX dcterms: <http://purl.org/dc/terms/>"
"PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n"
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"
"SELECT distinct ?paperTitle WHERE {\n"
+uri+" dcterms:title ?paperTitle"
"}\n"
" ")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
title = ""
for result in results["results"]["bindings"]:
title += result["paperTitle"]["value"]
return title
i = 0
while i < len(df):
mag_uris = df["paper"][i]
mag_uris_list = mag_uris.split(", ")
for uri in mag_uris_list:
title = sparql_PaperTitle(uri)
if len(title) > 1:
df["paper_title"][i] = df["paper_title"][i] + title + "; "
max_len = len(str(df["paper_title"][i]))
unwanted_cut_off = max_len - 2
df["paper_title"][i] = str(df["paper_title"][i])[0:unwanted_cut_off]
i += 1
print(i)
df.to_csv("/DSKG_Author_Profiles_MAKGTitels")