/* vim:set et ts=4 sts=4: * * libpyzy - The Chinese PinYin and Bopomofo conversion library. * * Copyright (c) 2008-2010 Peng Huang * Copyright (c) 2010 BYVoid * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of the * License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "PyZySimpTradConverter.h" #ifdef HAVE_OPENCC # include #else # include # include #endif #include "PyZyTypes.h" #include "PyZyString.h" namespace PyZy { #ifdef HAVE_OPENCC class opencc { static const int BUFFER_SIZE = 64; public: opencc (void) { m_od = opencc_open (OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); g_assert (m_od != NULL); } ~opencc (void) { opencc_close(m_od); } void convert (const gchar *in, String &out) { glong n_char; gunichar *in_ucs4 = g_utf8_to_ucs4_fast (in, -1, &n_char); ucs4_t *pinbuf = (ucs4_t *)in_ucs4; size_t inbuf_left = n_char; while (inbuf_left != 0) { ucs4_t *poutbuf = (ucs4_t *)m_buffer; size_t outbuf_left = BUFFER_SIZE; size_t retval = opencc_convert(m_od, &pinbuf, &inbuf_left, &poutbuf, &outbuf_left); if (retval == (size_t) -1) { /* append left chars in pinbuf */ g_warning ("opencc_convert return failed"); out << (gunichar *) pinbuf; break; } *poutbuf = L'\0'; out << m_buffer; } g_free (in_ucs4); } private: opencc_t m_od; gunichar m_buffer[BUFFER_SIZE + 1]; }; void SimpTradConverter::simpToTrad (const gchar *in, String &out) { static opencc opencc; opencc.convert (in, out); } #else static gint _xcmp (const gchar *p1, const gchar *p2, const gchar *str) { for (;;) { // both reach end if (p1 == p2 && *str == '\0') return 0; // p1 reaches end if (p1 == p2) return -1; // str reaches end if (*str == '\0') return 1; if (*p1 < *str) return -1; if (*p1 > *str) return 1; p1 ++; str ++; }; } static gint _cmp (gconstpointer p1, gconstpointer p2) { const gchar **pp = (const gchar **) p1; const gchar **s2 = (const gchar **) p2; return _xcmp (pp[0], pp[1], s2[0]); } #include "PyZySimpTradConverterTable.h" void SimpTradConverter::simpToTrad (const gchar *in, String &out) { const gchar *pend; const gchar *pp[2]; glong len; glong begin; if (!g_utf8_validate (in, -1 , NULL)) { g_warning ("\%s\" is not an utf8 string!", in); g_assert_not_reached (); } begin = 0; pend = in + std::strlen (in); len = g_utf8_strlen (in, -1); // length in charactoers pp[0] = in; while (pp[0] != pend) { glong slen = std::min (len - begin, (glong) SIMP_TO_TRAD_MAX_LEN); // the length of sub string in character pp[1] = g_utf8_offset_to_pointer (pp[0], slen); // the end of sub string for (;;) { const gchar **result; result = (const gchar **) std::bsearch (pp, simp_to_trad, G_N_ELEMENTS (simp_to_trad), sizeof (simp_to_trad[0]), _cmp); if (result != NULL) { // found item in table, // append the trad to out and adjust pointers out << result[1]; pp[0] = pp[1]; begin += slen; break; } if (slen == 1) { // if only one character left, // append origin character to out and adjust pointers out.append (pp[0], pp[1] - pp[0]); pp[0] = pp[1]; begin += 1; break; } // if more than on characters left, // adjust pp[1] to previous character pp[1] = g_utf8_prev_char (pp[1]); slen--; } } } #endif // HAVE_OPENCC }; // namespace PyZy