-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: Implemented preliminary pipeline * LLM experiment on gpt-4o-mini * removed placeholder * Added Gemini support * Fixed crash on hitting gemini rate limit * Implicit review loading * Delete test.py
- Loading branch information
1 parent
c261acb
commit d63e37c
Showing
7 changed files
with
207 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,3 +43,4 @@ output/toy.2016SB5/5.deu_Latn/ | |
data/raw/google/ | ||
*.rar | ||
output/toy.2016SB5/5.arb_Arab/ | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
metric,5.pes_Arab.zho_Hans.deu_Latn.arb_Arab.fra_Latn.spa_Latn.rnd.0.0 | ||
P_1,0.0666666666666666 | ||
P_5,0.0133333333333333 | ||
P_10,0.0066666666666666 | ||
P_100,0.0006666666666666 | ||
recall_1,0.0666666666666666 | ||
recall_5,0.0666666666666666 | ||
recall_10,0.0666666666666666 | ||
recall_100,0.0666666666666666 | ||
ndcg_cut_1,0.0666666666666666 | ||
ndcg_cut_5,0.0666666666666666 | ||
ndcg_cut_10,0.0666666666666666 | ||
ndcg_cut_100,0.0666666666666666 | ||
map_cut_1,0.0666666666666666 | ||
map_cut_5,0.0666666666666666 | ||
map_cut_10,0.0666666666666666 | ||
map_cut_100,0.0666666666666666 | ||
success_1,0.0666666666666666 | ||
success_5,0.0666666666666666 | ||
success_10,0.0666666666666666 | ||
success_100,0.0666666666666666 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
"""Expected values for test cases of implicit/explicit review loading""" | ||
|
||
# Expected first, last, and amount of reviews with implicit aspects""" | ||
SEMEVAL_IMPLICIT = { | ||
"first": { | ||
'id': '1004293:2', | ||
'text': 'they never brought us complimentary noodles, ignored repeated requests for sugar, and threw our dishes on the table.', | ||
'sentences': [ | ||
['they', 'never', 'brought', 'us', 'complimentary', 'noodles,', 'ignored', 'repeated', 'requests', 'for', 'sugar,', 'and', 'threw', 'our', 'dishes', 'on', 'the', 'table.'] | ||
], | ||
'aos': [[([None], [], '-1')]], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"last": { | ||
'id': '1058221:7', | ||
'text': 'the last time i walked by it looked pretty empty. hmmm.', | ||
'sentences': [ | ||
['the', 'last', 'time', 'i', 'walked', 'by', 'it', 'looked', 'pretty', 'empty.', 'hmmm.'] | ||
], | ||
'aos': [[([None], [], '-1')]], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"count": 10, | ||
} | ||
|
||
# Expected first, last, and amount of reviews with explicit aspects | ||
SEMEVAL_EXPLICIT = { | ||
"first": { | ||
'id': '1004293:0', | ||
'text': 'judging from previous posts of test this used to be a good place but not any longer.', | ||
'sentences': [ | ||
['judging', 'from', 'previous', 'posts', 'of', 'test', 'this', 'used', 'to', 'be', 'a', 'good', 'place', 'but', 'not', 'any', 'longer.'] | ||
], | ||
'aos': [ | ||
[(['posts', 'of', 'test'], [], '-1'), | ||
(['place'], [], '-1')] | ||
], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"last": { | ||
'id': '1058221:4', | ||
'text': 'i happen to have a policy that goes along with a little bit of self-respect, which includes not letting a waiter intimidate me, i.e. make me feel bad asking for trivialities like water, or the check.', | ||
'sentences': [ | ||
['i', 'happen', 'to', 'have', 'a', 'policy', 'that', 'goes', 'along', 'with', 'a', 'little', 'bit', 'of', 'self-respect,', 'which', 'includes', 'not', 'letting', 'a', 'waiter', 'intimidate', 'me,', 'i.e.', 'make', 'me', 'feel', 'bad', 'asking', 'for', 'trivialities', 'like', 'water,', 'or', 'the', 'check.'] | ||
], | ||
'aos': [[(['waiter'], [], '-1')]], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"count": 18, | ||
} | ||
|
||
# Expected first, last, and amount of reviews with implicit and explicit aspects""" | ||
SEMEVAL_BOTH = { | ||
"first": { | ||
'id': '1004293:0', | ||
'text': 'judging from previous posts of test this used to be a good place but not any longer.', | ||
'sentences': [ | ||
['judging', 'from', 'previous', 'posts', 'of', 'test', 'this', 'used', 'to', 'be', 'a', 'good', 'place', 'but', 'not', 'any', 'longer.'] | ||
], | ||
'aos': [ | ||
[(['posts', 'of', 'test'], [], '-1'), | ||
(['place'], [], '-1')] | ||
], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"last": { | ||
'id': '1058221:7', | ||
'text': 'the last time i walked by it looked pretty empty. hmmm.', | ||
'sentences': [ | ||
['the', 'last', 'time', 'i', 'walked', 'by', 'it', 'looked', 'pretty', 'empty.', 'hmmm.'] | ||
], | ||
'aos': [[([None], [], '-1')]], | ||
'lang': 'eng_Latn', | ||
'orig': True | ||
}, | ||
"count": 26, | ||
} | ||
|
||
SEMEVAL_NULL = { | ||
"count": 0, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""Unit tests for implicit review loading""" | ||
import sys | ||
import os | ||
import pytest | ||
from ev_implicit_reviews import SEMEVAL_EXPLICIT, SEMEVAL_IMPLICIT, SEMEVAL_BOTH, SEMEVAL_NULL | ||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) | ||
|
||
from cmn.review import Review | ||
from cmn.semeval import SemEvalReview | ||
from cmn.twitter import TwitterReview # Currently, no twitter reviews have implicit aspects | ||
|
||
SEMEVAL_PATH = "./data/raw/semeval/toy.2016SB5/ABSA16_Restaurants_Train_SB1_v2.xml" | ||
|
||
@pytest.mark.parametrize("path, expected", [ | ||
(SEMEVAL_PATH, SEMEVAL_IMPLICIT), | ||
]) | ||
def test_implicit(path, expected): | ||
"""Test loading implicit aspect containing reviews.""" | ||
reviews: list[Review] = SemEvalReview.load(path, explicit=False, implicit=True) | ||
first = reviews[0].to_dict()[0] | ||
last = reviews[-1].to_dict()[0] | ||
count = len(reviews) | ||
assert first == expected["first"] | ||
assert last == expected["last"] | ||
assert count == expected["count"] | ||
|
||
@pytest.mark.parametrize("path, expected", [ | ||
(SEMEVAL_PATH, SEMEVAL_EXPLICIT), | ||
]) | ||
def test_explicit(path, expected): | ||
"""Test loading explicit aspect containing reviews.""" | ||
reviews: list[Review] = SemEvalReview.load(path, explicit=True, implicit=False) | ||
first = reviews[0].to_dict()[0] | ||
last = reviews[-1].to_dict()[0] | ||
count = len(reviews) | ||
assert first == expected["first"] | ||
assert last == expected["last"] | ||
assert count == expected["count"] | ||
|
||
@pytest.mark.parametrize("path, expected", [ | ||
(SEMEVAL_PATH, SEMEVAL_BOTH), | ||
]) | ||
def test_implicit_and_explicit(path, expected): | ||
"""Test loading both implicit and explicit reviews.""" | ||
reviews: list[Review] = SemEvalReview.load(path, explicit=True, implicit=True) | ||
first = reviews[0].to_dict()[0] | ||
last = reviews[-1].to_dict()[0] | ||
count = len(reviews) | ||
assert first == expected["first"] | ||
assert last == expected["last"] | ||
assert count == expected["count"] | ||
|
||
@pytest.mark.parametrize("path, expected", [ | ||
(SEMEVAL_PATH, SEMEVAL_NULL), | ||
]) | ||
def test_null(path, expected): | ||
"""Test loading neither implicit nor explicit reviews.""" | ||
reviews: list[Review] = SemEvalReview.load(path, explicit=False, implicit=False) | ||
count = len(reviews) | ||
assert count == expected["count"] |