Skip to content
This repository has been archived by the owner on Nov 30, 2023. It is now read-only.

Update database.py #175

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions chinese/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,24 @@ def _get_word_pinyin(self, word, type_, prefer_tw=False, no_variants=True):
accentuate(list(map(str.lower, s.split())), 'pinyin')
)


def _get_word_jyutping(self, word, type_):
if type_ == 'trad':
query = 'SELECT jyutping FROM cidian WHERE traditional=?'
#query = 'SELECT jyutping FROM cidian WHERE traditional=?'
query = 'SELECT kCantonese FROM hanzi WHERE cp=?'
#query2 = query.split(':')[0]
#query2 = query.split(" ")[0]
elif type_ == 'simp':
query = 'SELECT jyutping FROM cidian WHERE simplified=?'
#query = 'SELECT jyutping FROM cidian WHERE simplified=?'
query = 'SELECT kCantonese FROM hanzi WHERE cp=?'
#query2 = query.split(" ")[0]
self.c.execute(query, (word,))
res = self.c.fetchone()
if not res:
return None
return res[0]


def get_pinyin(self, word, type_, prefer_tw=False, word_len=4):
p = self._get_word_pinyin(word, type_, prefer_tw)
if p:
Expand Down Expand Up @@ -122,8 +129,47 @@ def get_pinyin(self, word, type_, prefer_tw=False, word_len=4):
word = word[1:]
return result


def get_cantonese(self, word, type_):
return self._get_word_jyutping(word, type_)
#return self._get_word_jyutping(word, type_)
word_len=4;
p = self._get_word_jyutping(word, type_)
if p:
return p.split(" ")[0] # # #
if len(word) == 1:
p = self._get_char(word, 'canto')
return p.split(" ")[0]

result = ''
word = word[:]
last_was_pinyin = False
while len(word) > 0:
word_was_found = False

while word_len > 1:
p = self._get_word_jyutping(word[:word_len], type_)
if p:
result = add_with_space(result, p)
word = word[word_len:]
last_was_pinyin = True
word_was_found = True
break
word_len -= 1

if not word_was_found:
p = self._get_char(word[0], 'canto')
p = p.split(" ")[0]
if p:
result = add_with_space(result, p)
last_was_pinyin = True
else:
if last_was_pinyin:
result += ' '
result += word[0]
last_was_pinyin = False
word = word[1:]
return result


def get_traditional(self, word, word_len=4):
return self.get_word(word, word_len, type_='trad')
Expand Down