Skip to content
update-simptrad-table.py 1.74 KiB
Newer Older
#!/usr/bin/env python
import sys
sys.path.append(".")

from ZhConversion import *
from valid_hanzi import *

def convert(s, d, n):
    out = u""
    end = len(s)
    begin = 0
    while begin < end:
        for i in range(min(n, end - begin), 0, -1):
            t = s[begin:begin+i]
            t = d.get(t, t if i == 1 else None)
            if t:
                break
        out = out + t
        begin += i
    return out

def filter_more(records, n):
    han = filter(lambda (k, v): len(k) <= n, records)
    hand = dict(han)
    hanm = filter(lambda (k, v): convert(k, hand, n) != v, records)
    return hanm + han

def filter_func(args):
    k, v = args
    # length is not equal or length > 6
    if len(k) != len(v) or len(k) > 6:
        return False
    # k includes invalid hanzi
    if not all(c in valid_hanzi for c in k):
        return False
    # v includes invalid hanzi
    if not all(c in valid_hanzi for c in v):
        return False

    # # check chars in k and v
    # for c1, c2 in zip(k, v):
    #     if c1 == c2:
    #         continue
    #     if c2 not in S_2_T.get(c1, []):
    #         return False
    return True

def get_records():
    records = zh2Hant.items()

    records = filter(filter_func, records)

    maxlen = max(map(lambda (k,v): len(k), records))
    for i in range(1,  maxlen - 1):
        records = filter_more(records, i)
    records = map(lambda (k, v): (k.encode("utf8"), v.encode("utf8")), records)
    records.sort()
    return maxlen, records

def main():
    print "static const char *simp_to_trad[][2] = {"
    maxlen, records = get_records()
    for s, ts in records:
        print '    { "%s", "%s" },' % (s, ts)
    print "};"
    print '#define SIMP_TO_TRAD_MAX_LEN (%d)' % maxlen

if __name__ == "__main__":
    main()