-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsinhalese_vowel_letter_fixer.py
52 lines (45 loc) · 1.75 KB
/
sinhalese_vowel_letter_fixer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from preprocessing.sinhalese_characters import get_fixed_vowel, is_sinhalese_vowel, is_sinhalese_letter
class SinhaleseVowelLetterFixer:
"""
Sinhalese Language Vowel Letter Fixer
"""
@staticmethod
def get_fixed_text(text: str) -> str:
"""
Fix wrong usage of vowels
:param text: text to be fixed
:return: fixed text with proper vowels
"""
fixed_text = ""
last_letter = ""
last_vowel = ""
for letter in text:
if is_sinhalese_letter(letter):
fixed_text += (last_letter + last_vowel)
last_letter = letter
last_vowel = ""
elif is_sinhalese_vowel(letter):
if last_letter == "":
print("Error : First letter can't be a vowel sign : " + letter)
if last_vowel == "":
last_vowel = letter
else:
try:
last_vowel = get_fixed_vowel(last_vowel + letter)
except KeyError:
# fix error of mistakenly duplicate vowel
if last_vowel == letter:
continue
else:
print("Error : can't fix vowel combination " + last_vowel + " + " + letter)
else:
fixed_text += (last_letter + last_vowel + letter)
last_letter = ""
last_vowel = ""
fixed_text += last_letter + last_vowel
return fixed_text
# Test
wrong_text = "ද" + "ෙ" + "ෙ" + "ව" + "ය"
correct_text = "ද" + "ෛ" + "ව" + "ය"
corrected_text = SinhaleseVowelLetterFixer.get_fixed_text(wrong_text)
assert correct_text == corrected_text