forked from mblwhoi/reconciliation_service_skeleton
-
Notifications
You must be signed in to change notification settings - Fork 9
/
example_composers_reconciliation_service.py
executable file
·100 lines (79 loc) · 2.86 KB
/
example_composers_reconciliation_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
"""
An example reconciliation service API for Google Refine 2.0.
See http://code.google.com/p/google-refine/wiki/ReconciliationServiceApi.
"""
import csv
from fuzzywuzzy import fuzz
import re
import sys
import simplejson as json
from flask import Flask, request, jsonify
app = Flask(__name__)
# Matching threshold.
match_threshold = 70
# Basic service metadata. There are a number of other documented options
# but this is all we need for a simple service.
metadata = {
"name": "Person Reconciliation Service",
"defaultTypes": [{"id": "/people/person", "name" : "Person"}]
}
# Read in person records from csv file.
reader = csv.DictReader(open('example_composers.csv', 'rb'))
records = [r for r in reader]
def search(query):
# Initialize matches.
matches = []
# Search person records for matches.
for r in records:
score = fuzz.token_set_ratio(query, r['label'])
if score > match_threshold:
matches.append({
"id": r['id'],
"name": r['label'],
"score": score,
"match": query == r['label'],
"type": [{"id": "/people/person", "name": "Person"}]
})
print >> sys.stderr, matches
return matches
def jsonpify(obj):
"""
Like jsonify but wraps result in a JSONP callback if a 'callback'
query param is supplied.
"""
try:
callback = request.args['callback']
response = app.make_response("%s(%s)" % (callback, json.dumps(obj)))
response.mimetype = "text/javascript"
return response
except KeyError:
return jsonify(obj)
@app.route("/reconcile", methods=['POST', 'GET'])
def reconcile():
# If a single 'query' is provided do a straightforward search.
query = request.form.get('query')
if query:
# If the 'query' param starts with a "{" then it is a JSON object
# with the search string as the 'query' member. Otherwise,
# the 'query' param is the search string itself.
if query.startswith("{"):
query = json.loads(query)['query']
results = search(query)
return jsonpify({"result": results})
# If a 'queries' parameter is supplied then it is a dictionary
# of (key, query) pairs representing a batch of queries. We
# should return a dictionary of (key, results) pairs.
queries = request.form.get('queries')
if queries:
queries = json.loads(queries)
results = {}
for (key, query) in queries.items():
results[key] = {"result": search(query['query'])}
return jsonpify(results)
# If neither a 'query' nor 'queries' parameter is supplied then
# we should return the service metadata.
return jsonpify(metadata)
if __name__ == '__main__':
#print search("Doe, John")
app.run(debug=True)