-
Notifications
You must be signed in to change notification settings - Fork 10
/
generate_strokes_cangjie.py
79 lines (71 loc) · 2.24 KB
/
generate_strokes_cangjie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import sys
import math
import random
import string
import textwrap
import itertools
import unicodedata
import collections
strokes = {}
used_codes = set()
started = False
with open('wordlist/cangjie5.dict.yaml', 'r', encoding='utf-8') as f:
for ln in f:
ln = ln.rstrip()
if not ln:
continue
elif started:
row = ln.split()
if len(row[0]) > 1:
continue
elif row[1][0] in 'zx':
continue
elif row[1].startswith('yyy'):
continue
elif row[0] == '、':
continue
ch = ord(row[0])
code = [ord(x) - ord('a') + 1 for x in row[1]]
orig_code = code[:]
if len(code) > 4:
code[3] = code[-1]
code = code[:4]
if tuple(code) in used_codes:
code[2:4] = orig_code[-2:]
#if tuple(code) in used_codes:
#code[1:4] = orig_code[-3:]
#print(row, code)
elif len(code) < 4:
code.extend([0] * (4-len(code)))
ch_id = ord(row[0])
if (ch_id not in strokes or
any(x >= 26 or x == 0 for x in strokes.get(ch_id, [0])) and
not any(x >= 26 or x == 0 for x in code)):
if ch_id in strokes:
old_code = strokes[ch_id]
#print(chr(ch_id), old_code, tuple(code))
if sum(x == old_code for x in strokes.values()) == 1:
used_codes.remove(old_code)
strokes[ch_id] = tuple(code)
used_codes.add(tuple(code))
elif ln == '...':
started = True
with open('wordlist/original-radical-stroke.txt', 'r', encoding='utf-8') as f:
for ln in f:
row = tuple(map(int, ln.strip().split()))
if row[0] not in strokes:
strokes[row[0]] = row[1:]
for k, v in sorted(strokes.items()):
print(' '.join(map(str, (k,) + v)))
# seen = set()
# dup = 0
# for v in strokes.values():
# if v in seen:
# dup += 1
# else:
# seen.add(v)
# print(dup, len(strokes))