-
Notifications
You must be signed in to change notification settings - Fork 0
/
rdf-generator.py
122 lines (100 loc) · 3.33 KB
/
rdf-generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
#import pycurl
import json,csv,pickle
from rdflib import Graph, Literal, BNode, Namespace, RDF, URIRef, OWL
from rdflib.namespace import DC, FOAF, Namespace, NamespaceManager,XSD
from optparse import OptionParser
#Parser options
parser = OptionParser()
parser.add_option("-i","--input",help="input file to use for the raw data",action="store", type="string")
parser.add_option("-s","--scheme",help="input ontological scheme to use")
parser.add_option("-m","--mapping",help="input mapping to use")
parser.add_option("-o","--output",help="output file to use")
parser.add_option("-g","--graph",help="base graph file to use")
(options,args) = parser.parse_args()
if not options.input:
parser.error('Input not given')
if not options.scheme:
parser.error('Scheme not given')
if not options.mapping:
parser.error('Mapping not given')
inputFile=options.input
schemeFile=options.scheme
mappingFile=options.mapping
if not options.output:
outputFile="output.ttl"
else:
outputFile=options.output
if not options.graph:
graphFile="input/baseGraph.ttl"
else:
graphFile=options.output
"""
mappingFlat={
":var1":"latitude",
":var2":"longitude",
":var3":"timestamp",
":var4":"vehicle_speed",
":cst1":"<http://automotive.eurecom.fr/simulator/vehicle>",
":cst2":"xsd:float",
":cst3":"<http://automotive.eurecom.fr/simulator/signal/speed>"}
mappingComplex={
":var1":"location.latitude",
":var2":"location.longitude",
":var3":"timestamp",
":var4":"data.vehicle_speed",
":cst1":"<http://automotive.eurecom.fr/simulator/vehicle>",
":cst2":"xsd:float",
":cst3":"<http://automotive.eurecom.fr/simulator/signal/speed>"}
with open('mapping-flat.pkl','w') as f:
pickle.dump(mappingFlat,f)
f.close()
with open('mapping-complex.pkl','w') as f:
pickle.dump(mappingComplex,f)
f.close()
"""
#Open mapping as a dictionary
with open(mappingFile,"r") as f:
mappingComplex=pickle.load(f)
f.close()
#Find the value in a JSON file from a path
def getValue(data, path):
y=data
for x in path.split('.'):
y=y[x]
return str(y)
#get dict from JSON or CSV inputs
def getRawData():
with open(inputFile) as f:
if inputFile.split(".")[1]=="json":
data=json.load(f)
elif inputFile.split(".")[1]=="csv":
data=dict((row[0],row[1]) for row in csv.reader(f))
else:
print "Input file not supported"
return data
#Replace variable in the scheme with their value
def fillScheme(scheme, mapping):
data=getRawData()
for variable in mapping:
if "cst" in variable:
scheme=scheme.replace(variable,mapping[variable])
elif "var" in variable:
#scheme=scheme.replace(variable,str(data[mapping[variable]]))
scheme=scheme.replace(variable,getValue(data, mapping[variable]))
else:
print "Error in the mapping file"
return scheme
#Open scheme and replace its variables with the mapped values
with open(schemeFile,"r") as x:
scheme=x.read().replace('\n', '')
scheme=fillScheme(scheme,mappingComplex)
#Create a graph in RDFlib with the new triples
gScheme=Graph()
gScheme.parse(data=scheme,format='turtle')
#Open base graph and merge it with the new triples
g=Graph()
g.parse("input/baseGraph.ttl",format='turtle')
g=g+gScheme
#Save the output graph
g.serialize(destination=outputFile,format="turtle")