Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(parentheticals): Fix three bugs in parenthetical functionality #1919

Merged
merged 5 commits into from
Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cl/citations/description_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from cl.search.models import OpinionCluster

_GERUND = re.compile(r"(?:\S+ing)", re.IGNORECASE)
_GERUND_THAT = re.compile(rf"{_GERUND} that", re.IGNORECASE)
_GERUND_THAT = re.compile(rf"{_GERUND.pattern} that", re.IGNORECASE)
_HOLDING = re.compile(
r"(?:holding|deciding|ruling|recognizing|concluding)", re.IGNORECASE
)
_HOLDING_THAT = re.compile(rf"{_HOLDING} that", re.IGNORECASE)
_HOLDING_THAT = re.compile(rf"{_HOLDING.pattern} that", re.IGNORECASE)

# Observation of thousands of parentheticals seems to indicate that the
# most useful ones are in the neighborhood of 20 words long.
Expand Down
9 changes: 8 additions & 1 deletion cl/citations/filter_parentheticals.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re

_MODIFIABLE = r"(omissions?|quotations?|quotes?|headings?|(quotations? )?marks?|ellips.s|cites?|citations?|emphas.s|italics?|footnotes?|alterations?|punctuation|modifications?|brackets?|bracketed material|formatting)"
_MODIFIABLE = r"(omissions?|quotations?|quotes?|headings?|(quotations? )?marks|ellips.s|cites?|citations?|emphas.s|italics?|footnotes?|alterations?|punctuation|modifications?|brackets?|bracketed material|formatting)"
_MODIFABLE_TYPE = r"(internal|former|latter|first|second|third|fourth|fifth|last|some|further|certain|numbered|other|transcript)"
_FULL_MODIFIABLE = f"(({_MODIFABLE_TYPE} )?{_MODIFIABLE})"
_QUOTE_MODIFICATION = r"(added|provided|removed|adopted|(in )?(the )original|omitted|included|deleted|eliminated|altered|modified|supplied|ours|mine|changed|(in|by) \S+|by \S+ court)"
Expand All @@ -20,16 +20,19 @@
r".n banc", # en banc or in banc
# Scalia, J., dissenting; Roberts, C.J., concurring in the judgment, concurring in part, and dissenting in part
f"{_JUDGE_NAME}( {_FULL_OPINION_DESCRIPTOR})?([ ,]+(and )?{_FULL_OPINION_DESCRIPTOR})*",
f"{_JUDGE_NAME}.{{1,75}}",
# concurring in result
f"({_DOCUMENT_TYPES} )?{_FULL_OPINION_DESCRIPTOR}",
# opinion of Breyer, J.; opinion of Scalia and Alito, J.J.
f"{_DOCUMENT_TYPES} of {_JUDGE_NAME}",
# plurality opinion, supplemental order
f"{_OPINION_TYPES}( {_DOCUMENT_TYPES})?( {_OPINION_TYPE_MODIFICATION})?",
rf"({_DOCUMENT_TYPES} )?opinion.*",
r"dictum|dicta",
r"on rehearing|denying cert(iorari)?",
r"simplified|cleaned up|as amended",
r"same|similar|contra",
r"standard of review",
r"(and )?cases cited therein",
# No. 12-345
r"No. \d+.?\d+",
Expand All @@ -45,6 +48,8 @@
f"{_FULL_MODIFIABLE} and {_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}",
f"{_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}[;,] ?{_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}",
f"({_MODIFABLE_TYPE} )?{_MODIFIABLE}, {_MODIFIABLE}, and {_MODIFIABLE} {_QUOTE_MODIFICATION}",
# Match any short parenthetical that looks like a modification (e.g. "citations and internal marks omitted, emphasis added")
rf"(?=.*{_MODIFIABLE}.*).{{1,75}}",
# citing Gonzales v. Raich, 123 U.S. 456 (2019). A tad over-inclusive but very helpful
f"{_REFERENTIAL} .*",
# 2nd Cir. 2019, Third Circuit 1993
Expand All @@ -55,6 +60,8 @@
r".{1,10} (Circuit|Cir.)",
# hereinafter, "Griffin II"
r"here(in)?after(,)? .+",
# Imbalanced parentheses (for when eyecite cuts off the parenthetical too soon) e.g. "holding Section 4(a"
r"^.{1,35}\([^\)]{1,35}$",
# Single-word parentheticals, e.g., 'TILA'
r"\S*",
]
Expand Down
13 changes: 12 additions & 1 deletion cl/citations/fixtures/opinions_matching_citations.json
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,17 @@
"model": "search.citation",
"pk": 6
},
{
"fields": {
"volume": 2,
"reporter": "S.Ct.",
"page": "2",
"type": 1,
"cluster": 4
},
"model": "search.citation",
"pk": 20
},
{
"fields": {
"volume": 2,
Expand Down Expand Up @@ -401,7 +412,7 @@
"date_modified": "2015-08-15T14:10:56.801Z",
"extracted_by_ocr": false,
"author": 2,
"plain_text": "my plain text secret word for queries",
"plain_text": "my plain text secret word for queries. Foo v. Bar, 1 U.S. 1, 4, 2 S.Ct. 2, 5 (2000) (holding something happened)",
"html": "",
"download_url": null,
"cluster": 8,
Expand Down
11 changes: 9 additions & 2 deletions cl/citations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,19 @@ def find_citations_and_parentheticals_for_opinion_by_pks(

parentheticals = []
for _opinion, _citations in citation_resolutions.items():
# Currently, eyecite has a bug where parallel citations are
# detected individually. We avoid creating duplicate parentheticals
# because of that by keeping track of what we've seen so far.
parenthetical_texts = set()
Comment on lines +166 to +169
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a pretty good fix. I'd prefer if we could fix it upstream though in eyecite, and then fix it properly here, rather than hack around it. That'd make our code better, and would provide a nice enhancement to eyecite users to. See: freelawproject/eyecite#76

for _cit in _citations:
# If the citation has a descriptive parenthetical, clean
# it up and store it as a Parenthetical
if (
par_text := _cit.metadata.parenthetical
) and is_parenthetical_descriptive(par_text):
(par_text := _cit.metadata.parenthetical)
and par_text not in parenthetical_texts
and is_parenthetical_descriptive(par_text)
):
parenthetical_texts.add(par_text)
clean = clean_parenthetical_text(par_text)
parentheticals.append(
Parenthetical(
Expand Down
17 changes: 17 additions & 0 deletions cl/citations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,18 @@ def test_opinionscited_creation(self) -> None:
num_parentheticals,
)

def test_no_duplicate_parentheticals_from_parallel_cites(self) -> None:
remove_citations_from_imported_fixtures()
citing = Opinion.objects.get(pk=11)
cited = Opinion.objects.get(pk=7)
find_citations_and_parentheticals_for_opinion_by_pks.delay([11])
self.assertEqual(
Parenthetical.objects.filter(
describing_opinion=citing, described_opinion=cited
).count(),
1,
)


class CitationFeedTest(IndexedSolrTestCase):
def _tree_has_content(self, content, expected_count):
Expand Down Expand Up @@ -807,6 +819,7 @@ def test_is_not_descriptive(self):
"internal citations and quotations omitted",
"citations and internal ellipses omitted",
"quotation marks omitted; ellipses ours",
"headings and internal quotations omitted, emphasis and citations altered",
"plurality opinion",
"opinion of Breyer, J.",
"opinion of Mister Justice Black",
Expand All @@ -826,9 +839,12 @@ def test_is_not_descriptive(self):
"Sotomayor, J., statement respecting denial of certiorari",
"Roberts, C.J., concurring in part and dissenting in part",
"Friendly, J., concurring in the judgment, concurring in part, and dissenting in part",
"Scalia, J., specially concurring in the judgment on this issue",
"en banc",
"per curiam",
"same",
"standard of review",
"opinion of O'Connor, J., respecting the granting of an injunction",
"no",
"n. 3",
"No. 12-345",
Expand All @@ -853,6 +869,7 @@ def test_is_descriptive(self):
"accountant who gave lay opinion testimony might have qualified as expert",
"where plaintif's complaint alleges facts which, if proven, would entitle plaintiff to relief under the Eighth Amendment, dismissal of complaint was inappropriate",
"ruling that there is nothing either legal or illegal, only thinking makes it so",
"testing that the mere presence of the word quotation doesn't get a parenthetical filtered out if it's long enough",
"First Amendment",
"mislabeled product",
'"Look on my Works, ye Mighty, and despair"',
Expand Down
4 changes: 3 additions & 1 deletion cl/opinion_page/templates/view_opinion_summaries.html
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ <h2>

<div id="all-summaries">
<h3>{{ summaries_count|intcomma }} judge-written summar{{ summaries_count|pluralize:"y,ies" }} of this opinion from other cases.</h3>
<p>We looked through our complete collection of opinions and identified the following parenthetical summaries that describe this case:</p>
{% if summaries_count > 0 %}
<p>We looked through our complete collection of opinions and identified the following parenthetical summaries that describe this case:</p>
{% endif %}
<hr>
<ul>
{% for summary in summaries %}
Expand Down