Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entity marking #101

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tuw_nlp/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,18 @@ def to_dot(self, marked_nodes=set(), edge_color=None):
).replace(
"-", "_"
)
elif "entity" in show_graph.nodes[node] and show_graph.nodes[node]["entity"] == 1:
node_line = '\t{0} [shape = circle, label = "{1}", style=filled, fillcolor=lightblue];'.format(
d_node, printname
).replace(
"-", "_"
)
elif "entity" in show_graph.nodes[node] and show_graph.nodes[node]["entity"] == 2:
node_line = '\t{0} [shape = circle, label = "{1}", style=filled, fillcolor=pink];'.format(
d_node, printname
).replace(
"-", "_"
)
else:
node_line = '\t{0} [shape = circle, label = "{1}"];'.format(
d_node, printname
Expand Down
55 changes: 44 additions & 11 deletions tuw_nlp/graph/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
dummy_tree = "dummy(dummy)"



class GraphFormulaPatternMatcher:

"""
Expand Down Expand Up @@ -61,11 +62,23 @@ def node_matcher(n1, n2, flags):
if n1["name"] is None or n2["name"] is None:
return True

name_check = False
if ( re.match(rf"\b({n2['name']})\b", n1["name"], flags)
or n2["name"] == n1["name"] ):
name_check = True

return (
True
if (
re.match(rf"\b({n2['name']})\b", n1["name"], flags)
or n2["name"] == n1["name"]
(
re.match(rf"\b({n2['name']})\b", n1["name"], flags)
or n2["name"] == n1["name"]
)
and
(
("entity" in n2 and "entity" in n1 and n1["entity"] == n2["entity"])
or ("entity" not in n2 and "entity" not in n2)
)
)
else False
)
Expand Down Expand Up @@ -152,8 +165,9 @@ def max_distance(self, graph, node_data, subgraphs):
if not possible:
return False
for n1, n2 in product(node1_matches, node2_matches):
n1_root = [n for n, d in n1.in_degree() if d == 0][0]
n2_root = [n for n, d in n2.in_degree() if d == 0][0]
# This is a temporary fix. Should be d==0 for both.
n1_root = [n for n, d in n1.in_degree() if d < 2][0]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have changed this as a workaround solution for using the path() function with entities. Particularly, if I want to look for a path to or from any arbitrary entity 1, the penman code looks like this: ( .* / u-1 :entity (e_1 / 1 )). This seems to create a single node subgraph with one edge. The list returned by this comprehension is therefore empty, and the reference to element 0 will throw an exception. The same workaround is included in the path_between function

n2_root = [n for n, d in n2.in_degree() if d < 2][0]
try:
if nx.shortest_path_length(graph, n1_root, n2_root) <= max_dist:
subgraphs.append(nx.compose(n1, n2))
Expand All @@ -172,8 +186,9 @@ def path_between(self, graph, nodes, subgraphs):
if not possible:
return False
for n1, n2 in product(node1_matches, node2_matches):
n1_root = [n for n, d in n1.in_degree() if d == 0][0]
n2_root = [n for n, d in n2.in_degree() if d == 0][0]
# This is a temporary fix. Should be d==0 for both.
n1_root = [n for n, d in n1.in_degree() if d < 2][0]
n2_root = [n for n, d in n2.in_degree() if d < 2][0]
try:
if nx.has_path(graph, n1_root, n2_root):
subgraphs.append(nx.compose(n1, n2))
Expand Down Expand Up @@ -280,6 +295,7 @@ def pn_to_graph(raw_dl, edge_attr="color"):
g = pn.decode(raw_dl)
G = nx.DiGraph()
node_to_id = {}
node_to_entity = {}
root_id = None

for i, trip in enumerate(g.instances()):
Expand All @@ -301,6 +317,9 @@ def pn_to_graph(raw_dl, edge_attr="color"):
G.add_node(i, name=name, token_id=ud_id)
elif indicator == "u":
G.add_node(i, name=name, token_id=None)
elif indicator == "entity":
#entities are not added as nodes but as node attributes
node_to_entity[node_id] = name
else:
raise ValueError("Unknown indicator")

Expand All @@ -319,16 +338,19 @@ def pn_to_graph(raw_dl, edge_attr="color"):
tgt_id = node_to_id[tgt]

if edge != "UNKNOWN":
G.add_edge(src_id, tgt_id)
if edge.isnumeric():
edge = int(edge)
G[src_id][tgt_id].update({edge_attr: edge})
if edge == "entity":
G.nodes[src_id]["entity"] = int(node_to_entity[tgt])
else:
G.add_edge(src_id, tgt_id)
if edge.isnumeric():
edge = int(edge)
G[src_id][tgt_id].update({edge_attr: edge})

return G, root_id


def graph_to_pn(graph):
nodes = {}
nodes, entity_nodes = {}, {}
pn_edges, pn_nodes = [], []

for u, v, e in graph.edges(data=True):
Expand All @@ -348,6 +370,17 @@ def graph_to_pn(graph):
nodes[node] = (pn_id, name)
pn_nodes.append((pn_id, ":instance", name))

for node in graph.nodes():
if "entity" in graph.nodes[node]:
entity_num = graph.nodes[node]["entity"]
if entity_num not in entity_nodes:
node_id = f"entity_{len(entity_nodes)}"
pn_id = node_id
nodes[node_id] = (pn_id, entity_num)
entity_nodes[entity_num] = pn_id
pn_nodes.append((pn_id, ":instance", entity_num))
pn_edges.append((nodes[node][0], f':entity', entity_nodes[entity_num]))

G = pn.Graph(pn_nodes + pn_edges)

try:
Expand Down