-
-
Notifications
You must be signed in to change notification settings - Fork 17
/
main.py
80 lines (62 loc) · 2.28 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json
from janome.tokenizer import Tokenizer
from pykakasi import kakasi
import re
t = Tokenizer()
kakasi = kakasi()
kakasi.setMode("J","H")
conv_j2h = kakasi.getConverter()
kakasi.setMode("K","H")
conv_k2h = kakasi.getConverter()
def hiraganafy_v1(keyword):
k = keyword.upper()
k = conv_j2h.do(k)
k = conv_k2h.do(k)
return k
def hiraganafy(keyword):
katakana = ''
for token in t.tokenize(keyword.upper()):
katakana += token.surface if token.reading == '*' else token.reading
hiragana = conv_k2h.do(katakana)
hiragana_v1 = hiraganafy_v1(keyword)
if hiragana != hiragana_v1:
print(f'| {keyword} | {hiragana} | {hiragana_v1} |')
return hiragana
def add_word_to_dict(emoji, keyword, emoji_dict):
valid_keyword = keyword.replace('ゔ', 'う゛')
word = f':{valid_keyword}\t{emoji}\t記号\t'
emoji_dict.append(word)
class EmojiDict():
emoji_json = None
emoji_dict = []
def __init__(self, emoji_json_path: str, emoji_dict_path: str) -> None:
self.emoji_json_path = emoji_json_path
self.emoji_dict_path = emoji_dict_path
def get_emoji_json(self) -> None:
with open(self.emoji_json_path, 'r') as f:
self.emoji_json = json.load(f)
def save_emoji_dict(self) -> None:
with open(self.emoji_dict_path, 'w') as f:
f.write(str(self.emoji_dict))
f.write('\n')
def create_emoji_dict(self) -> None:
for emoji in self.emoji_json:
for k in self.emoji_json[emoji]['keywords']:
if k.isalpha() is False:
continue
hiragana = hiraganafy(k)
add_word_to_dict(emoji, hiragana, self.emoji_dict)
k = self.emoji_json[emoji]['short_name']
if k.isalpha() is False:
continue
hiragana = hiraganafy(k)
add_word_to_dict(emoji, hiragana, self.emoji_dict)
self.emoji_dict = sorted(list(set(self.emoji_dict)))
self.emoji_dict = '\n'.join(self.emoji_dict)
if __name__ == "__main__":
EMOJI_JSON_PATH = '/root/emoji_ja.json'
EMOJI_DICT_PATH = 'tsv/emoji.tsv'
emoji_dict = EmojiDict(EMOJI_JSON_PATH, EMOJI_DICT_PATH)
emoji_dict.get_emoji_json()
emoji_dict.create_emoji_dict()
emoji_dict.save_emoji_dict()