pax_global_header 0000666 0000000 0000000 00000000064 14467107307 0014523 g ustar 00root root 0000000 0000000 52 comment=26904b2c2658daef20f91ec245a8d29fb3e35aee
zero-pinyin-service-master/ 0000775 0000000 0000000 00000000000 14467107307 0016301 5 ustar 00root root 0000000 0000000 zero-pinyin-service-master/.gitignore 0000664 0000000 0000000 00000000034 14467107307 0020266 0 ustar 00root root 0000000 0000000 DEST/
*.deb
build/
release/
zero-pinyin-service-master/README.org 0000664 0000000 0000000 00000006377 14467107307 0017764 0 ustar 00root root 0000000 0000000 * COMMENT -*- mode: org -*-
#+Date: 2019-09-28
Time-stamp: <2020-01-14>
* zero-pinyin-service
zero-pinyin-service is a dbus service to provide pinyin input method service
for zero-el pinyin input method. This allows doing arbitrarily complex things
in C, while the elisp side code can be simple and efficient.
You can see the dbus service interface in
./com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface.xml
You can check zero-el document at https://blog.emacsos.com/zero-el.html
zero-pinyin-service requires a character/word table to get candidates for
pinyin preedit string. It uses libpyzy's openphrase db for
this. zero-pinyin-service should know how to break pinyin and how to convert
pinyin preedit string to Chinese character candidates, currently it also use
libpyzy for this. So you can think of the current implementation as an adapter
that makes libpyzy work with zero-el pinyin input method. This implementation
is to get zero-el pinyin working with minimum effort. There are lots of
improvements that can be done here.
* how to build zero-pinyin-service
- Get [[https://mesonbuild.com][meson build tool]] and install the ninja backend.
Usually this should do:
#+BEGIN_SRC sh
sudo apt install meson ninja
#+END_SRC
You may prefer installing a newer version of meson via PyPI:
#+BEGIN_SRC sh
sudo apt install python3-pip
python3 -m pip install --user meson ninja
# add ~/.local/bin to PATH
#+END_SRC
- Install zero-pinyin-service dependencies
#+BEGIN_SRC sh
sudo apt install libglib2.0-dev libsqlite3-dev uuid-dev
#+END_SRC
- Build zero-pinyin-service
#+BEGIN_SRC sh
git clone https://gitlab.emacsos.com/sylecn/libpyzy.git
cd libpyzy/src/
# Side note: zero-pinyin-service must be in libpyzy src dir to build, because
# it uses the source code directly. I didn't use libpyzy as a dependency
# because I have done code review and patched the libpyzy code and those
# changes are not sent to upstream. libpyzy is pretty inactive these days.
git clone https://gitlab.emacsos.com/sylecn/zero-pinyin-service.git
cd zero-pinyin-service/
meson setup release/
cd release && ninja
#+END_SRC
You can install it on local system via
#+BEGIN_SRC sh
cd release
sudo ninja install
#+END_SRC
Or you can build a deb and install the deb instead.
- Create deb for zero-pinyin-service
Create deb requires [[https://github.com/jordansissel/fpm][fpm]] tool.
#+BEGIN_SRC sh
fpm --version
#+END_SRC
At project root dir,
#+BEGIN_SRC sh
./create-deb.sh
#+END_SRC
** how to install fpm tool
Read official doc for how to install it. The key commands are
#+BEGIN_SRC sh
apt install ruby ruby-dev
gem install --user fpm
# add ruby gem bin dir to PATH
#+END_SRC
* 2019-08-31 ibus-pinyin user db inference notice.
zero-pinyin-service reuses ibus-pinyin's userdb at
~/.cache/ibus/pinyin/user-1.0.db
This is generally not a problem. But if ibus-pinyin (libpyzy) changes their
table schema in the future, zero-pinyin-service may require update.
zero-pinyin-service also stores user phrase in this db. So user phrases are
shared between zero-pinyin and ibus-pinyin.
If you don't use ibus-pinyin, this is not a problem for
you. zero-pinyin-service works fine when ibus-pinyin is not installed.
zero-pinyin-service-master/com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface.xml 0000664 0000000 0000000 00000005765 14467107307 0033203 0 ustar 00root root 0000000 0000000
zero-pinyin-service-master/com.emacsos.zero.ZeroPinyinService1.service 0000664 0000000 0000000 00000000150 14467107307 0026553 0 ustar 00root root 0000000 0000000 [D-BUS Service]
Name=com.emacsos.zero.ZeroPinyinService1
Exec=/home/sylecn/bin/sbin/zero-pinyin-service
zero-pinyin-service-master/create-deb.sh 0000775 0000000 0000000 00000002525 14467107307 0020637 0 ustar 00root root 0000000 0000000 #!/bin/sh
set -x
set -e
print_help_and_exit() {
echo "Usage: ./create-deb.sh
create deb for debian. requires fpm tool."
exit 1
}
# main()
if [ "$1" = "--help" ]; then
print_help_and_exit
fi
DEST=${DEST:-DEST}
VERSION=${VERSION:-`grep -E "^ *version: '.*'" meson.build|cut -d"'" -f2`}
if [ ! -d release/ ]; then
meson setup --buildtype release release/
fi
ninja -C release/
mkdir -p \
"$DEST"/usr/share/dbus-1/interfaces/ \
"$DEST"/usr/share/dbus-1/services/ \
"$DEST"/usr/bin/
cp 'com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface.xml' "$DEST"/usr/share/dbus-1/interfaces/
cp 'com.emacsos.zero.ZeroPinyinService1.service' "$DEST"/usr/share/dbus-1/services/
sed -i -E 's:/home/sylecn/bin/sbin/zero-pinyin-service:/usr/bin/zero-pinyin-service:' "$DEST"/usr/share/dbus-1/services/com.emacsos.zero.ZeroPinyinService1.service
cp release/zero-pinyin-service "$DEST"/usr/bin/
fpm -f -t deb -s dir -n zero-pinyin-service -v "$VERSION" \
-d libglib2.0-0 \
-d libsqlite3-0 \
-d libuuid1 \
-d libpyzy-1.0-0v5 \
--after-install deb-scripts/after-install.sh \
--description "provide pinyin input engine for zero-el pinyin" \
--vendor sylecn \
--maintainer "Yuanle Song " \
--deb-priority optional \
--url "https://gitlab.emacsos.com/sylecn/zero-pinyin-service" \
-C "$DEST" .
zero-pinyin-service-master/deb-scripts/ 0000775 0000000 0000000 00000000000 14467107307 0020520 5 ustar 00root root 0000000 0000000 zero-pinyin-service-master/deb-scripts/after-install.sh 0000664 0000000 0000000 00000000071 14467107307 0023617 0 ustar 00root root 0000000 0000000 #!/bin/sh
pkill -f /usr/bin/zero-pinyin-service || true
zero-pinyin-service-master/main.c 0000664 0000000 0000000 00000035165 14467107307 0017403 0 ustar 00root root 0000000 0000000 #include
#include
#include
#include "zero-pinyin-service.h"
#include "zero-pinyin-service-generated.h"
#include "../sqlite3_util.h"
#include
#include
static const int MAX_PHRASE_LEN = 16;
static const char *SQLITE3_MEMORY_DB = ":memory:";
typedef struct {
GApplication *app;
guint owner_id;
ZeroPinyinService *interface;
sqlite3 *db;
gchar **env;
} AppData;
static gboolean
on_handle_get_candidates_v2(ZeroPinyinService *object,
GDBusMethodInvocation *invocation,
const gchar *preedit_str,
guint fetch_size,
guint fuzzy_flag,
AppData *appdata)
{
if (preedit_str == NULL || fetch_size == 0) {
g_dbus_method_invocation_return_dbus_error(
invocation,
"org.gtk.GDBus.Failed",
"Bad param");
return TRUE;
}
g_info("get_candidates for preedit_str=%s fetch_size=%u",
preedit_str, fetch_size);
GVariant *result = NULL;
GVariantBuilder *candidates_builder = NULL;
GVariantBuilder *matched_lengths_builder = NULL;
GVariantBuilder *candidates_pinyin_indices = NULL;
/* test data */
/* get_candidates_test(preedit_str, fetch_size, candidates_builder, matched_lengths_builder); */
candidates_builder = g_variant_builder_new(G_VARIANT_TYPE("as"));
matched_lengths_builder = g_variant_builder_new(G_VARIANT_TYPE("au"));
candidates_pinyin_indices = g_variant_builder_new(G_VARIANT_TYPE("aa(ii)"));
get_candidates(appdata->db, preedit_str, fetch_size, fuzzy_flag, candidates_builder, matched_lengths_builder, candidates_pinyin_indices);
result = g_variant_new("(asauaa(ii))", candidates_builder, matched_lengths_builder, candidates_pinyin_indices);
g_assert_nonnull(result);
/* result is a GVarient tuple of two dbus arrays */
g_dbus_method_invocation_return_value(invocation, result);
g_variant_builder_unref(candidates_builder);
g_variant_builder_unref(matched_lengths_builder);
g_variant_builder_unref(candidates_pinyin_indices);
return TRUE;
}
static gboolean
on_handle_get_candidates(ZeroPinyinService *object,
GDBusMethodInvocation *invocation,
const gchar *preedit_str,
guint fetch_size,
AppData *appdata)
{
return on_handle_get_candidates_v2(object, invocation, preedit_str, fetch_size, 0, appdata);
}
static gboolean
on_handle_commit_candidate(ZeroPinyinService *object,
GDBusMethodInvocation *invocation,
const gchar *candidate,
GVariant *candidate_pinyin_indices,
AppData *appdata)
{
commit_candidate(appdata->db, candidate, candidate_pinyin_indices);
g_dbus_method_invocation_return_value(invocation, NULL);
return TRUE;
}
static gboolean
on_handle_delete_candidate(ZeroPinyinService *object,
GDBusMethodInvocation *invocation,
const char *candidate,
AppData *appdata)
{
if (! candidate) {
g_dbus_method_invocation_return_value(invocation, NULL);
return TRUE;
}
guint len = g_utf8_strlen(candidate, -1);
if (len == 1) {
g_debug("delete single character %s is a no-op", candidate);
g_dbus_method_invocation_return_value(invocation, NULL);
return TRUE;
}
g_message("delete candidate %s", candidate);
/* insert phrase to userdb.not_phrase table. */
char *sql = NULL;
gboolean rb = FALSE;
sql = sqlite3_mprintf("INSERT INTO userdb.not_phrase (phrase) VALUES (%Q);", candidate);
rb = sqlite3_exec_simple(appdata->db, sql);
if (! rb) {
g_warning("insert phrase to not_phrase table failed");
}
sqlite3_free(sql);
/* delete phrase from userdb.py_phrase_x table. */
guint table_suffix = len - 1;
sql = sqlite3_mprintf("DELETE FROM userdb.py_phrase_%u WHERE phrase = %Q;", table_suffix, candidate);
rb = sqlite3_exec_simple(appdata->db, sql);
if (! rb) {
g_warning("delete phrase from py_phrase_%u table failed", table_suffix);
}
sqlite3_free(sql);
g_dbus_method_invocation_return_value(invocation, NULL);
return TRUE;
}
static gboolean
on_handle_quit(ZeroPinyinService *object,
GDBusMethodInvocation *invocation,
AppData *appdata)
{
g_application_quit(appdata->app);
g_dbus_method_invocation_return_value(invocation, NULL);
return TRUE;
}
static void
on_bus_acquired(GDBusConnection *connection,
const gchar *name,
gpointer user_data)
{
AppData *appdata = (AppData *) user_data;
GError *err = NULL;
g_message("on_bus_acquired() name=%s", name);
appdata->interface = zero_pinyin_service_skeleton_new();
g_signal_connect(appdata->interface,
"handle-get-candidates-v2",
G_CALLBACK(on_handle_get_candidates_v2),
appdata);
g_signal_connect(appdata->interface,
"handle-get-candidates",
G_CALLBACK(on_handle_get_candidates),
appdata);
g_signal_connect(appdata->interface,
"handle-commit-candidate",
G_CALLBACK(on_handle_commit_candidate),
appdata);
g_signal_connect(appdata->interface,
"handle-delete-candidate",
G_CALLBACK(on_handle_delete_candidate),
appdata);
g_signal_connect(appdata->interface,
"handle-quit",
G_CALLBACK(on_handle_quit),
appdata);
g_dbus_interface_skeleton_export(
G_DBUS_INTERFACE_SKELETON(appdata->interface),
connection,
ZERO_PINYIN_OBJECT_PATH,
&err);
if (err) {
g_warning("export interface at %s failed: %s",
ZERO_PINYIN_OBJECT_PATH, err->message);
g_error_free(err);
g_application_quit(G_APPLICATION(appdata->app));
return;
}
g_message("interface exported at %s", ZERO_PINYIN_OBJECT_PATH);
}
static void
on_name_acquired(GDBusConnection *connection,
const gchar *name,
gpointer user_data)
{
g_message("on_name_acquired() name=%s", name);
}
static void
on_name_lost(GDBusConnection *connection,
const gchar *name,
gpointer user_data)
{
AppData *appdata = (AppData *) user_data;
/* this won't happen if this is the only app that tries to take the
* name, because GApplication already have primary instance
* concept. None primary instance will just send 'activate' signal to
* primary instance and exit. They will not try to register ibus at
* all. */
g_message("on_name_lost() name=%s exiting", name);
g_application_quit(G_APPLICATION(appdata->app));
}
static void
config_dbus_service(AppData *appdata)
{
appdata->owner_id = g_bus_own_name(
G_BUS_TYPE_SESSION,
ZERO_PINYIN_WELL_KNOWN_NAME,
G_BUS_NAME_OWNER_FLAGS_ALLOW_REPLACEMENT | G_BUS_NAME_OWNER_FLAGS_REPLACE,
on_bus_acquired,
on_name_acquired,
on_name_lost,
appdata,
NULL);
g_assert_cmpint(appdata->owner_id, >, 0);
}
/**
* handle SIGTERM gracefully.
*/
static gboolean
on_sigterm_received(gpointer user_data)
{
AppData *appdata = (AppData *) user_data;
g_application_quit(appdata->app);
return G_SOURCE_REMOVE;
}
/**
* return HOME dir, get value from HOME env variable.
*/
static const gchar *
get_home_dir(AppData *appdata)
{
const gchar *result;
result = g_environ_getenv(appdata->env, "HOME");
return result;
}
/**
* return TRUE if file exists
*/
static gboolean
file_exists(const char *filename)
{
return g_file_test(filename, G_FILE_TEST_EXISTS);
}
/**
* Return the file path of the main db file. main db is the main word/phrase
* dababase in libpyzy db format. Without main db, char/phrase query will not
* work at all.
*
* if not main db found, return NULL.
*
* returned gchar* should be freed with g_free().
*/
static gchar *
get_maindb_file(const gchar *home_dir)
{
/* TODO make db path configurable */
const gchar *user_main_db = ".cache/ibus/pinyin/main.db";
const gchar *open_phrase_db = "/usr/share/pyzy/db/open-phrase.db";
const gchar *android_db = "/usr/share/pyzy/db/android.db";
gchar *home_dir_maindb = NULL;
home_dir_maindb = g_strconcat(home_dir, "/", user_main_db, NULL);
if (file_exists(home_dir_maindb)) {
return home_dir_maindb;
}
if (file_exists(open_phrase_db)) {
return g_strdup(open_phrase_db);
}
if (file_exists(android_db)) {
return g_strdup(android_db);
}
return NULL;
}
/**
* return userdb file path.
* "~/.cache/ibus/pinyin/user-1.0.db"
*
* returned gchar* should be freed with g_free().
*/
static gchar *
get_userdb_file(const gchar *home_dir)
{
/* TODO make db path configurable */
return g_strconcat(home_dir, "/.cache/ibus/pinyin/user-1.0.db", NULL);
}
/**
* return a usable userdb file, it's either the path returned by
* `get_userdb_file()' or ':memory:' if that file doesn't exist and can't be
* created.
*
* returned gchar* should be freed with g_free().
*/
static gchar *
get_userdb_file_create(const gchar *home_dir)
{
gchar *userdb_file = NULL;
userdb_file = get_userdb_file(home_dir);
if (file_exists(userdb_file)) {
return userdb_file;
}
/* try create the file */
gchar *parent_dir = NULL;
parent_dir = g_path_get_dirname(userdb_file);
gint r = g_mkdir_with_parents(parent_dir, 0750);
if (r != 0) {
g_warning("create dir %s failed: %d (%s)",
parent_dir, r, g_strerror(r));
return g_strdup(SQLITE3_MEMORY_DB);
}
sqlite3 *userdb = NULL;
g_message("creating user db at %s", userdb_file);
r = sqlite3_open(userdb_file, &userdb);
if (r != SQLITE_OK) {
sqlite3_close(userdb);
return g_strdup(SQLITE3_MEMORY_DB);
}
sqlite3_close(userdb);
return userdb_file;
}
/**
* initialize user db.
* create tables, index and populate data into desc table.
*
* Returns: true on success, false otherwise.
*/
static gboolean
init_userdb(sqlite3 *userdb, const char *schema, AppData *appdata)
{
gboolean rb;
GString *sql;
gchar uuid_str[37];
uuid_t uuid;
gchar *snippet;
/* original libpyzy user db schema */
uuid_generate_random(uuid);
uuid_unparse_lower(uuid, uuid_str);
/* uuid = g_uuid_string_random(); */
sql = g_string_sized_new(200);
snippet = sqlite3_mprintf(
"BEGIN TRANSACTION;\n"
"CREATE TABLE IF NOT EXISTS %s.desc (name PRIMARY KEY, value TEXT);\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('version', '1.2.0');\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('uuid', %Q);\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('hostname', %Q);\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('username', %Q);\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('create-time', datetime());\n"
"INSERT OR IGNORE INTO %s.desc VALUES ('attach-time', datetime());\n", schema, schema, schema, uuid_str, schema, g_environ_getenv(appdata->env, "HOSTNAME"), schema, g_environ_getenv(appdata->env, "USER"), schema, schema);
sql = g_string_append(sql, snippet);
sqlite3_free(snippet);
/* create phrase tables */
for (gint i = 0; i < MAX_PHRASE_LEN; i++) {
g_string_append_printf(sql, "CREATE TABLE IF NOT EXISTS %s.py_phrase_%d (user_freq, phrase TEXT, freq INTEGER", schema, i);
for (gint j = 0; j <= i; j++)
g_string_append_printf(sql, ", s%d INTEGER, y%d INTEGER", j, j);
sql = g_string_append(sql, ");\n");
}
/* create index */
g_string_append_printf(
sql,
"CREATE UNIQUE INDEX IF NOT EXISTS %s.index_0_0 ON py_phrase_0(s0,y0,phrase);\n"
"CREATE UNIQUE INDEX IF NOT EXISTS %s.index_1_0 ON py_phrase_1(s0,y0,s1,y1,phrase);\n"
"CREATE INDEX IF NOT EXISTS %s.index_1_1 ON py_phrase_1(s0,s1,y1);\n", schema, schema, schema);
for (gint i = 2; i < MAX_PHRASE_LEN; i++) {
g_string_append_printf(sql, "CREATE UNIQUE INDEX IF NOT EXISTS %s.index_%d_0 ON py_phrase_%d(s0,y0", schema, i, i);
for (gint j = 1; j <= i; j++)
g_string_append_printf(sql, ",s%d,y%d", j, j);
sql = g_string_append(sql, ",phrase);\n");
g_string_append_printf(sql, "CREATE INDEX IF NOT EXISTS %s.index_%d_1 ON py_phrase_%d(s0,s1,s2,y2);\n", schema, i, i);
}
/* zero-pinyin-service addition */
g_string_append_printf(sql, "CREATE TABLE IF NOT EXISTS %s.not_phrase (phrase TEXT UNIQUE);\n", schema);
sql = g_string_append(sql, "COMMIT;");
rb = sqlite3_exec_simple(userdb, sql->str);
g_string_free(sql, TRUE);
if (! rb) {
g_warning("init userdb failed, query will not work.");
return FALSE;
}
return TRUE;
}
/**
* init appdata->db
*/
static void
config_db(AppData *appdata)
{
gint ri = 0;
gboolean rb = FALSE;
sqlite3 *db = NULL;
gchar *sql = NULL;
const gchar *home_dir;
gchar *maindb_file = NULL;
gchar *userdb_file = NULL;
home_dir = get_home_dir(appdata);
ri = sqlite3_open(SQLITE3_MEMORY_DB, &db);
if (ri != SQLITE_OK) {
g_warning("sqlite3_open :memory: db failed, query will not work.");
goto db_fail;
}
g_assert_nonnull(db);
maindb_file = get_maindb_file(home_dir);
g_info("using maindb file: %s", maindb_file);
sql = sqlite3_mprintf("ATTACH %Q AS maindb", maindb_file);
g_free(maindb_file);
rb = sqlite3_exec_simple(db, sql);
if (! rb) {
g_warning("attach maindb failed, query will not work.");
goto attach_fail;
}
sqlite3_free(sql);
userdb_file = get_userdb_file_create(home_dir);
sql = sqlite3_mprintf("ATTACH %Q AS userdb", userdb_file);
g_free(userdb_file);
rb = sqlite3_exec_simple(db, sql);
if (! rb) {
g_warning("attach userdb failed, query will not work.");
goto attach_fail;
}
sqlite3_free(sql);
init_userdb(db, "userdb", appdata);
appdata->db = db;
return;
attach_fail:
sqlite3_free(sql);
sqlite3_close(db);
db_fail:
appdata->db = NULL;
}
/**
* allow graceful shutdown by Ctrl-C and SIGTERM.
*/
static void
setup_sigint_sigterm_handler(AppData *appdata)
{
GSource *source = NULL;
source = g_unix_signal_source_new(SIGTERM);
g_source_set_callback(source, on_sigterm_received, appdata, NULL);
g_source_attach(source, NULL);
g_source_unref(source);
source = g_unix_signal_source_new(SIGINT);
g_source_set_callback(source, on_sigterm_received, appdata, NULL);
g_source_attach(source, NULL);
g_source_unref(source);
}
static void
on_startup(GApplication *app,
AppData *appdata)
{
g_message("zero-pinyin-service startup()");
appdata->env = g_get_environ();
config_db(appdata);
config_dbus_service(appdata);
setup_sigint_sigterm_handler(appdata);
g_application_hold(app);
}
static void
on_activate(GApplication *app,
AppData *appdata)
{
g_message("zero-pinyin-service activate()");
}
static void
on_shutdown(GApplication *app,
AppData *appdata)
{
g_message("zero-pinyin-service shutdown()");
if (appdata->owner_id > 0) {
g_bus_unown_name(appdata->owner_id);
appdata->owner_id = 0;
}
if (appdata->db != NULL) {
sqlite3_close(appdata->db);
appdata->db = NULL;
}
if (appdata->env != NULL) {
g_strfreev(appdata->env);
}
}
/**
* provides zero-pinyin-service dbus service.
* it's a console app (GApplication) based on glib and gio.
*/
int
main(int argc, char *argv[])
{
static AppData appdata = {0};
GApplication *app = NULL;
int status = 0;
setlocale(LC_ALL, "");
app = g_application_new("com.emacsos.zero.ZeroPinyinServiceApp",
G_APPLICATION_FLAGS_NONE);
g_assert_nonnull(app);
appdata.app = app;
g_signal_connect(app, "startup", G_CALLBACK(on_startup), &appdata);
g_signal_connect(app, "activate", G_CALLBACK(on_activate), &appdata);
g_signal_connect(app, "shutdown", G_CALLBACK(on_shutdown), &appdata);
status = g_application_run(G_APPLICATION(app), argc, argv);
g_object_unref(app);
return status;
}
zero-pinyin-service-master/meson.build 0000664 0000000 0000000 00000004106 14467107307 0020444 0 ustar 00root root 0000000 0000000 # -*- mode: conf -*-
project('zero-pinyin-service', ['c', 'cpp'],
version: '0.10.0',
license: 'GPL',
meson_version: '>=0.1.0',
default_options: [
'warning_level=2',
'werror=true',
'buildtype=release',
'strip=true',
'b_ndebug=if-release',
'c_std=c11'])
# to create a debug build
# meson setup --buildtype debugoptimized build/
add_project_arguments(
'-Wno-unused-parameter',
'-Wno-missing-field-initializers',
language: ['c', 'cpp'])
add_project_arguments(
'-DHAVE_LIBUUID',
language: 'cpp')
if get_option('buildtype').startswith('release')
add_project_arguments(
'-DG_DISABLE_ASSERT',
'-DG_DISABLE_CHECKS',
language: ['c', 'cpp'])
endif
install_data('com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface.xml',
install_dir: '/usr/share/dbus-1/interfaces/')
install_data('com.emacsos.zero.ZeroPinyinService1.service',
install_dir: '/usr/share/dbus-1/services/')
glib = dependency('glib-2.0')
gio = dependency('gio-unix-2.0')
uuid = dependency('uuid')
sqlite3 = dependency('sqlite3')
shared_dep = [glib, gio, uuid, sqlite3]
gen_inc = include_directories('.')
gdbus_codegen = find_program('gdbus-codegen')
zero_pinyin_generated = custom_target('zero-pinyin-generated',
input: 'com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface.xml',
output: ['zero-pinyin-service-generated.h', 'zero-pinyin-service-generated.c'],
command: [gdbus_codegen, '--generate-c-code', 'zero-pinyin-service-generated', '@INPUT@'])
lib = [
'../PinyinParser.cc',
'../sqlite3_util.c',
'parse-pinyin.cpp',
'zero-pinyin-service.c']
src = [
lib,
zero_pinyin_generated,
'main.c']
executable('zero-pinyin-service', src,
include_directories: gen_inc,
dependencies: shared_dep,
install: true, install_dir: '/home/sylecn/bin/sbin/')
test('parse-pinyin-test',
executable('parse-pinyin-test',
['../PinyinParser.cc', 'parse-pinyin.cpp', 'parse-pinyin-test.cpp'],
dependencies: shared_dep))
test('zero-pinyin-service-test',
executable('zero-pinyin-service-test',
[lib, 'zero-pinyin-service-test.c'],
dependencies: shared_dep))
zero-pinyin-service-master/operational 0000664 0000000 0000000 00000030272 14467107307 0020545 0 ustar 00root root 0000000 0000000 * COMMENT -*- mode: org -*-
#+Date: 2019-04-05
Time-stamp: <2023-08-15>
#+STARTUP: content
* notes :entry:
** 2020-02-02 documents
- Wrap best practices and tips
https://mesonbuild.com/Wrap-best-practices-and-tips.html
- meson build Reference manual
https://mesonbuild.com/Reference-manual.html
-
** 2019-08-31 ibus-pinyin userdb inference notice.
zero-pinyin-service reuse ibus-pinyin's userdb at
~/.cache/ibus/pinyin/user-1.0.db
This is generally not a problem. But if ibus-pinyin changed their table schema
in the future, zero-pinyin-service may require update.
zero-pinyin-service also store user phrase in this db. So user phrases are
shared between zero-pinyin and ibus-pinyin.
** 2019-04-05 zero-pinyin-service file structure :doc:
- zero-pinyin-service
- main.c
a console application based on glib and gio.
provides dbus service. see the zero-pinyin-service spec at
~/c/gtk-im-module/operational :id001:
- zero-pinyin-service.h
zero-pinyin-service.c
zero-pinyin-service-test.c
contains the zero-pinyin-service dbus method C based implementation.
- parse-pinyin.h
parse-pinyin.cpp
parse-pinyin-test.cpp
contains utility functions based on code from libpyzyz. This should be
replaced eventually. I don't want to depend on libpyzy source code.
- utilities
- test-sql.sh
a shell script to experiment SQL commands
- data files
- meson.build
build script. used to create executable.
- com.emacsos.zero.ZeroPinyinService.service
dbus service definition file.
Should be copied to dbus service file dir /usr/share/dbus-1/services/
When dbus client try to talk to service, dbus session bus will start the
service app automatically.
** 2019-08-31 how to format C code? do it before git commit.
see ~/c/gtk-im-module/, it uses myastyle-pre-commit-check in git pre-commit
~/bin/myastyle-pre-commit-check
** 2023-08-15 how to build from source :doc:
- dependencies
- RHEL
sudo dnf install -y gcc-c++ libuuid-devel sqlite-devel gtk3-devel python3-pip
- Debian
sudo apt install -y g++ uuid-dev libsqlite3-dev libgtk-3-dev python3-pip
- meson build tool
python3 -m pip install --user meson ninja
- do a debug/release build
get source code. This project requires libpyzy.
mkdir ~/fromsource/
git clone git@gitlab.emacsos.com:sylecn/libpyzy.git
cd libpyzy/src/
git clone git@gitlab.emacsos.com:sylecn/zero-pinyin-service.git
cd zero-pinyin-service/
This is project root dir.
in project root dir,
# for debug build, do not include "--buildtype release" param
meson setup --buildtype release build/
cd build && ninja
- to run the built binary
get phrases db, they are available in libpyzy in debian.
you can download the deb and unpack it
https://packages.debian.org/bookworm/libpyzy-1.0-0v5
put your main phrase database at any of these position:
/usr/share/pyzy/db/open-phrase.db
~/.cache/ibus/pinyin/main.db
put your user phrase database at:
~/.cache/ibus/pinyin/user-1.0.db
now it's ready to run zero-pinyin-service.
in project root dir,
./build/zero-pinyin-service
** 2020-02-16 how to create deb for distribution on apt repo?
- on dev node, test deb, make sure it works.
git push
- on debian 9 x86 and x64 build server,
git pull
./create-deb.sh
- upload deb to aptly node using scp.
- create aptly distribution, snapshot and publish snapshot.
on aptly node,
aptly repo add d [FILE|DIR]...
aptly snapshot list
version=0.1
aptly snapshot create d-$version from repo d
# initial snapshot publish
# aptly publish snapshot \
# -batch -gpg-key=apt@emacsos.com -passphrase-file=/etc/aptly_pass \
# d-$version
# future publishes, switch publish to new snapshot
aptly publish switch \
-batch -gpg-key=apt@emacsos.com -passphrase-file=/etc/aptly_pass \
debian d-$version
* later :entry:
* current :entry:
**
** 2023-08-15 try build zero-pinyin-service for RHEL9
- problems
- maindb is empty. I need libpyzy's db.
on rh901,
mkdir -p ~/.cache/ibus/pinyin/
on lat21,
scp ~/.cache/ibus/pinyin/main.db rh901:.cache/ibus/pinyin/
also copy my user db. now that I am here.
scp ~/.cache/ibus/pinyin/user-1.0.db rh901:.cache/ibus/pinyin/
yes. it only needs the phrases db.
libpyzy lib is not needed at runtime.
- I should really get an alternative panel using emacs facility.
it's easier to test when I don't have access to GUI env.
** 2019-08-31 honor XDG cache dir.
~/.cache/ibus
** 2019-04-17 make flags configurable at runtime.
- add dbus method to set flags.
- make the method work. use gobject property maybe.
- set default flags to my flags. reflect this in UI/config file.
* done :entry:
** 2020-02-16 aptly publish fails
- ERROR: prefix/distribution already used by another published repo
#+BEGIN_SRC sh
root@gcpsgp01:~/d# aptly publish snapshot \
> -batch -gpg-key=apt@emacsos.com -passphrase-file=/etc/aptly_pass \
> d-$version
ERROR: prefix/distribution already used by another published repo: ./debian [amd64, i386] publishes {main: [d-0.1]: Snapshot from local repo [d]}
#+END_SRC
try unpublish old snapshot
aptly publish drop debian?
aptly publish switch -batch -gpg-key=apt@emacsos.com -passphrase-file=/etc/aptly_pass debian d-$version
** 2019-10-22 handle a an o en etc differently. only match exactly the character.
no fuzzy matching or incomplete pinyin matching.
if shengmu is 0, always strict match it.
do not allow omit shengmu in zero.
ao men
澳门
e nuo
婀娜
- DONE use different FLAGS for them.
FLAGS doesn't affect pinyin parsing.
if flag support is to be added, just add it in appdata, as property.
then use the property when building SQL.
- DONE test in bogon VM.
- DONE git push
- build on debian 9 and release deb. later.
- problems
- result = parse_pinyin("an", 15, PINYIN_FUZZY_ALL);
this only return 1 result.
not an and ang.
why?
- maybe flag is not used when parsing pinyin string. only used when building
the SQL statement.
build_sql_for_n_pinyin()
build_where_clause(pylist, n);
yes. here. I should pass the flags around.
-
** 2020-02-02 how to use zero-pinyin-service dbus service using C client?
see example in ~/c/gtk-im-module/zero-pinyin.c
#include
- WONTFIX make zero-pinyin-generated a library.
- I choose to include the dbus xml in the project, and generated new header/C
files there. Still need to define a few const for object name etc though.
- problems
- only an so (libzero-pinyin-generated.so) is generated.
how to install header files?
search: meson library() how to install headers?
install_headers()
- search: meson install_headers from generated source file
Generated headers cannot be installed directly · Issue #705 · mesonbuild/meson
https://github.com/mesonbuild/meson/issues/705
foo.vapi files generated by Vala library() should be installed · Issue #891 · mesonbuild/meson
https://github.com/mesonbuild/meson/issues/891
use install_data to install header files myself.
#+BEGIN_SRC sh
install_data(meson.current_build_dir() + '/foo.vapi',
install_dir: 'share/vala/vapi')
install_data(meson.current_build_dir() + '/foo@sha/Foo-1.0.gir',
install_dir: 'share/gir-1.0')
#+END_SRC
** 2019-10-22 support fuzzy pinyin.
- 0 no fuzzy
- 1 (0b1)
z <-> zh
c <-> ch
s <-> sh
- 2 (0b10)
l <-> n
- I won't implement other flags at this time.
- default is no fuzzy.
- test case:
ru chi 如此
ci di 此地
e nuo 婀娜
e luo
ci yu 词语
song 宋
shong
- implementation
- WONTFIX add a property in appdata.
use a guint flag.
only support 0b0 and 0b10 for now.
Not necessary.
- DONE add dbus interface to allow change the property at run time.
property is added in dbus interface directly.
app just read the property there. there is no network round trip.
data is stored in the dbus service server side. not in dbus daemon.
- DONE add flags in zero-el.
this flag is NOT buffer local.
(setq zero-pinyin-fuzzy-flag 3)
it will set fuzzy mode when zero-pinyin-reset or zero-pinyin-init.
(zero-pinyin-service-set-fuzzy-flag 3)
it works.
- problems
- can I make this change backward compatible?
DONE what will happen when zero asks dbus to set FuzzyFlag property, if
running an old zero-pinyin-service?
dbus-set-property returns nil if the property doesn't exist.
I will show a warning.
- property generated code is okay.
- how to access that property in zero-pinyin-service.c?
guint zero_pinyin_service_get_fuzzy_flag (ZeroPinyinService *object);
this is not the right way.
because flag doesn't change much.
should query once at start up, then listen to change event and update
in-ram data. do not query dbus every time FuzzyFlag is needed.
WONTFIX add event handler to update appdata->fuzzy_flag when property is
changed. // generated code handle set event and update g_object data in
RAM automatically.
signal org.freedesktop.DBus.Properties.PropertiesChanged
see example in
https://developer.gnome.org/gio/2.26/GDBusProxy.html
- how to set the property in zero-el?
call some dbus built-in service.
(zero-pinyin-service-quit)
run zero-pinyin-service in console
(dbus-set-property :session "com.emacsos.zero.ZeroPinyinService1"
"/com/emacsos/zero/ZeroPinyinService1"
"com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface"
"FuzzyFlag" 3)
(dbus-set-property :session "com.emacsos.zero.ZeroPinyinService1"
"/com/emacsos/zero/ZeroPinyinService1"
"com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface"
"FuzzyFlag" 0)
I think generated code will handle set/get call.
maybe just always get the value from
zero_pinyin_service_get_fuzzy_flag(ZeroPinyinService *object)
https://developer.gnome.org/gio/2.60/gdbus-codegen.html
Server-side usage
just use g_object_get() in server side. it will not talk to dbus.
property change should be handled by generated code.
- set property from emacs and read property in zero-pinyin-service.c
works. but the fuzzy SQL logic is not working.
when set to 3, shong can't produce 宋.
enabled debug log.
I see the problem.
shong is not a valid py if fuzzy flag is not used during parsing.
use correct parsing flag from FuzzyFlag when parsing.
- now it works.
** 2019-08-31 choose maindb like my patched libpyzy.
- here is patched libpyzy maindb logic:
files.push_back (m_user_data_dir + "/main.db");
files.push_back (PKGDATADIR"/db/local.db");
files.push_back (PKGDATADIR"/db/open-phrase.db");
files.push_back (PKGDATADIR"/db/android.db");
return first_existing_file (files);
m_user_data_dir default is ~/.cache/ibus/pinyin/
PKGDATADIR default is /usr/share/pyzy/
in zero-pinyin-service, just use the first existing file:
~/.cache/ibus/pinyin/main.db
/usr/share/pyzy/db/open-phrase.db
/usr/share/pyzy/db/android.db
- should I reuse the ibus-pinyin userdb file?
~/.cache/ibus/pinyin/user-1.0.db
yes. ibus-pinyin is not going away.
DONE document this behavior in zero-el and zero-pinyin-service.
- init_userdb()
sqlite3_mprintf()
https://www.sqlite.org/c3ref/mprintf.html
additional non-standard formats (%q, %Q, %w, and %z).
| | in | out | used for |
|----+------+---------+----------------------------------------------------------|
| %q | ab'c | ab''c | SQL string literal |
| %Q | ab'c | 'ab''c' | SQL string literal |
| %w | ab"c | ab""c | SQL identifier name |
| %z | abc | abc | like %s, but sqlite3_free() is called on param after use |
* wontfix :entry:
** 2019-10-23 bug: type "zhey" doesn't show 这样 candidate. :invalid:
- 2020-02-16 can't reproduce this.
zero-pinyin-service-master/parse-pinyin-test.cpp 0000664 0000000 0000000 00000005502 14467107307 0022402 0 ustar 00root root 0000000 0000000 #include "parse-pinyin.h"
#include
#include
#include
static void
test_parse_pinyin()
{
GList *result = NULL;
Pinyin *thispy = NULL;
result = parse_pinyin("liyifeng", 15, PINYIN_FLAGS_NONE);
g_assert_cmpint(g_list_length(result), ==, 3);
thispy = (Pinyin *) g_list_nth_data(result, 0);
g_assert_cmpint(thispy->shengmu_i, ==, 10);
g_assert_cmpint(thispy->yunmu_i, ==, 34);
g_assert_cmpint(thispy->length, ==, 2);
thispy = (Pinyin *) g_list_nth_data(result, 1);
g_assert_cmpint(thispy->shengmu_i, ==, 21);
g_assert_cmpint(thispy->yunmu_i, ==, 34);
g_assert_cmpint(thispy->length, ==, 2);
thispy = (Pinyin *) g_list_nth_data(result, 2);
g_assert_cmpint(thispy->shengmu_i, ==, 5);
g_assert_cmpint(thispy->yunmu_i, ==, 32);
g_assert_cmpint(thispy->length, ==, 4);
g_list_free_full(result, g_free);
}
/**
* print GList of Pinyin.
*/
static void
print_parse_result(GList *result)
{
GList *iter = result;
Pinyin *thispy = NULL;
while (iter != NULL) {
thispy = (Pinyin *) iter->data;
g_printf("shengmu_i=%i yunmu_i=%i length=%u\n",
thispy->shengmu_i, thispy->yunmu_i, thispy->length);
iter = iter->next;
}
}
static void
test_parse_pinyin_incomplete_pinyin()
{
GList *result = NULL;
Pinyin *thispy = NULL;
result = parse_pinyin("zhey", 15, PINYIN_INCOMPLETE_PINYIN);
print_parse_result(result);
g_assert_cmpint(g_list_length(result), ==, 2);
thispy = (Pinyin *) g_list_nth_data(result, 0);
g_assert_cmpint(thispy->shengmu_i, ==, 23);
g_assert_cmpint(thispy->yunmu_i, ==, 29);
g_assert_cmpint(thispy->length, ==, 3);
thispy = (Pinyin *) g_list_nth_data(result, 1);
g_assert_cmpint(thispy->shengmu_i, ==, 21);
g_assert_cmpint(thispy->yunmu_i, ==, 0);
g_assert_cmpint(thispy->length, ==, 1);
g_list_free_full(result, g_free);
}
static void
test_parse_pinyin_yunmu_only_pinyin()
{
GList *result = NULL;
Pinyin *thispy = NULL;
result = parse_pinyin("a", 15, PINYIN_FLAGS_NONE);
print_parse_result(result);
g_assert_cmpint(g_list_length(result), ==, 1);
thispy = (Pinyin *) g_list_nth_data(result, 0);
g_assert_cmpint(thispy->shengmu_i, ==, 0);
g_assert_cmpint(thispy->yunmu_i, ==, 24);
g_assert_cmpint(thispy->length, ==, 1);
g_list_free_full(result, g_free);
// flag doesn't affect pinyin parsing.
// flags are only effective when building the SQL.
result = parse_pinyin("an", 15, PINYIN_FUZZY_ALL);
print_parse_result(result);
g_assert_cmpint(g_list_length(result), ==, 1);
g_list_free_full(result, g_free);
}
int
main(int argc, char *argv[])
{
g_test_init(&argc, &argv, NULL);
g_test_add_func("/zero/test_parse_pinyin",
test_parse_pinyin);
g_test_add_func("/zero/test_parse_pinyin incomplete pinyin",
test_parse_pinyin_incomplete_pinyin);
g_test_add_func("/zero/test_parse_pinyin yunmu only pinyin",
test_parse_pinyin_yunmu_only_pinyin);
return g_test_run();
}
zero-pinyin-service-master/parse-pinyin.cpp 0000664 0000000 0000000 00000001224 14467107307 0021422 0 ustar 00root root 0000000 0000000 #include "parse-pinyin.h"
#include "../PinyinArray.h"
#include "../PinyinParser.h"
#include "../Const.h"
GList *
parse_pinyin(const char *preedit_str, const guint max_pinyin, const guint flags)
{
GList *result = NULL;
PyZy::PinyinArray pyar = {0};
Pinyin *thispy = NULL;
PyZy::PinyinParser::parse(preedit_str, strlen(preedit_str), flags,
pyar, max_pinyin);
for (guint i = 0; i < pyar.size(); ++i) {
thispy = g_new(Pinyin, 1);
thispy->shengmu_i = (int) pyar[i].pinyin->pinyin_id[0].sheng;
thispy->yunmu_i = (int) pyar[i].pinyin->pinyin_id[0].yun;
thispy->length = pyar[i].len;
result = g_list_append(result, thispy);
}
return result;
}
zero-pinyin-service-master/parse-pinyin.h 0000664 0000000 0000000 00000001203 14467107307 0021064 0 ustar 00root root 0000000 0000000 #ifndef _PARSE_PINYIN_H_
#define _PARSE_PINYIN_H_
#include
#include "zero-pinyin-service.h"
#include "../Const.h"
G_BEGIN_DECLS
#define PINYIN_FLAGS_NONE 0
/**
* parse preedit_str to groups of pinyin.
*
* @preedit_str: the preedit str
* @max_pinyin: parse at most this many pinyin from preedit str.
* @flags: support incomplete pinyin, fuzzy pinyin, correction, see ../Const.h
*
* Returns: a list of Pinyin struct. caller should free each Pinyin and the
* GList after use.
*/
GList *parse_pinyin(const char *preedit_str,
const guint max_pinyin,
const guint flags);
G_END_DECLS
#endif /* _PARSE_PINYIN_H_ */
zero-pinyin-service-master/pinyin-id.h 0000664 0000000 0000000 00000003650 14467107307 0020356 0 ustar 00root root 0000000 0000000 #ifndef _PINYIN_ID_H_
#define _PINYIN_ID_H_
#ifdef __cplusplus
extern "C"
{
#endif
/* data from ../Types.h, copied here so it can be used in C code. */
#define PINYIN_ID_VOID (-1)
#define PINYIN_ID_ZERO (0)
#define PINYIN_ID_B (1)
#define PINYIN_ID_C (2)
#define PINYIN_ID_CH (3)
#define PINYIN_ID_D (4)
#define PINYIN_ID_F (5)
#define PINYIN_ID_G (6)
#define PINYIN_ID_H (7)
#define PINYIN_ID_J (8)
#define PINYIN_ID_K (9)
#define PINYIN_ID_L (10)
#define PINYIN_ID_M (11)
#define PINYIN_ID_N (12)
#define PINYIN_ID_P (13)
#define PINYIN_ID_Q (14)
#define PINYIN_ID_R (15)
#define PINYIN_ID_S (16)
#define PINYIN_ID_SH (17)
#define PINYIN_ID_T (18)
#define PINYIN_ID_W (19)
#define PINYIN_ID_X (20)
#define PINYIN_ID_Y (21)
#define PINYIN_ID_Z (22)
#define PINYIN_ID_ZH (23)
#define PINYIN_ID_A (24)
#define PINYIN_ID_AI (25)
#define PINYIN_ID_AN (26)
#define PINYIN_ID_ANG (27)
#define PINYIN_ID_AO (28)
#define PINYIN_ID_E (29)
#define PINYIN_ID_EI (30)
#define PINYIN_ID_EN (31)
#define PINYIN_ID_ENG (32)
#define PINYIN_ID_ER (33)
#define PINYIN_ID_I (34)
#define PINYIN_ID_IA (35)
#define PINYIN_ID_IAN (36)
#define PINYIN_ID_IANG (37)
#define PINYIN_ID_IAO (38)
#define PINYIN_ID_IE (39)
#define PINYIN_ID_IN (40)
#define PINYIN_ID_ING (41)
#define PINYIN_ID_IONG (42)
#define PINYIN_ID_IU (43)
#define PINYIN_ID_O (44)
#define PINYIN_ID_ONG (45)
#define PINYIN_ID_OU (46)
#define PINYIN_ID_U (47)
#define PINYIN_ID_UA (48)
#define PINYIN_ID_UAI (49)
#define PINYIN_ID_UAN (50)
#define PINYIN_ID_UANG (51)
#define PINYIN_ID_UE (52)
#define PINYIN_ID_VE PINYIN_ID_UE
#define PINYIN_ID_UI (53)
#define PINYIN_ID_UN (54)
#define PINYIN_ID_UO (55)
#define PINYIN_ID_V (56)
#define PINYIN_ID_NG PINYIN_ID_VOID
#ifdef __cplusplus
}
#endif
#endif /* _PINYIN_ID_H_ */
zero-pinyin-service-master/test-sql.sh 0000775 0000000 0000000 00000000356 14467107307 0020420 0 ustar 00root root 0000000 0000000 #!/bin/sh
sqlite3 -echo \
-cmd '.headers on' \
-cmd '.prompt "zerodb> " " ...> "' \
-cmd 'attach "/home/sylecn/.cache/ibus/pinyin/main.db" as "maindb"; attach "/home/sylecn/.cache/ibus/pinyin/user-1.0.db" as "userdb";' \
':memory:'
zero-pinyin-service-master/zero-pinyin-service-test.c 0000664 0000000 0000000 00000001340 14467107307 0023341 0 ustar 00root root 0000000 0000000 #include
#include "zero-pinyin-service.h"
static void
test_GString()
{
GString *s = NULL;
s = g_string_new(NULL);
g_string_append_printf(s, "s0=%d ", 1);
g_assert_cmpstr(s->str, ==, "s0=1 ");
g_string_free(s, TRUE);
}
static void
test_build_s_y_fields()
{
gchar *result = NULL;
result = build_s_y_fields(1);
g_assert_cmpstr(result, ==, ", s0, y0 ");
g_free(result);
result = build_s_y_fields(2);
g_assert_cmpstr(result, ==, ", s0, y0, s1, y1 ");
g_free(result);
}
int
main(int argc, char *argv[])
{
setlocale(LC_ALL, "");
g_test_init(&argc, &argv, NULL);
g_test_add_func("/zero/test_GString", test_GString);
g_test_add_func("/zero/test_build_s_y_fields", test_build_s_y_fields);
return g_test_run();
}
zero-pinyin-service-master/zero-pinyin-service.c 0000664 0000000 0000000 00000037240 14467107307 0022374 0 ustar 00root root 0000000 0000000 #include "zero-pinyin-service.h"
#include "parse-pinyin.h"
#include "../sqlite3_util.h"
#include "pinyin-id.h"
void
get_candidates_test(const char *preedit_str,
const guint fetch_size,
GVariantBuilder *candidates_builder,
GVariantBuilder *matched_lengths_builder)
{
if (g_str_equal(preedit_str, "liyifeng")) {
const gchar *matches[] = {"李易峰", "利益", "礼仪", "离异", "里", "理", "力"};
guint matched_lengths[] = {8, 4, 4, 4, 2, 2, 2};
for (guint i = 0; i < G_N_ELEMENTS(matches); ++i) {
g_variant_builder_add(candidates_builder, "s", matches[i]);
g_variant_builder_add(matched_lengths_builder, "u", matched_lengths[i]);
}
} else if (g_str_equal(preedit_str, "feng")) {
const gchar *matches[] = {"风", "封", "疯", "丰", "凤"};
guint matched_lengths[] = {4, 4, 4, 4, 4, 4};
for (guint i = 0; i < G_N_ELEMENTS(matches); ++i) {
g_variant_builder_add(candidates_builder, "s", matches[i]);
g_variant_builder_add(matched_lengths_builder, "u", matched_lengths[i]);
}
} else if (g_str_equal(preedit_str, "yifeng")) {
const gchar *matches[] = {"一封", "遗风", "艺", "依", "一", "以"};
guint matched_lengths[] = {6, 6, 2, 2, 2, 2};
for (guint i = 0; i < G_N_ELEMENTS(matches); ++i) {
g_variant_builder_add(candidates_builder, "s", matches[i]);
g_variant_builder_add(matched_lengths_builder, "u", matched_lengths[i]);
}
}
}
/**
* get pinyin's fuzzy pair.
* for example, zh for z.
*/
gint
get_fuzzy_pair(gint pinyin_id)
{
switch (pinyin_id) {
case PINYIN_ID_Z: return PINYIN_ID_ZH;
case PINYIN_ID_ZH: return PINYIN_ID_Z;
case PINYIN_ID_C: return PINYIN_ID_CH;
case PINYIN_ID_CH: return PINYIN_ID_C;
case PINYIN_ID_S: return PINYIN_ID_SH;
case PINYIN_ID_SH: return PINYIN_ID_S;
case PINYIN_ID_L: return PINYIN_ID_N;
case PINYIN_ID_N: return PINYIN_ID_L;
default:
g_assert_not_reached();
return pinyin_id;
}
}
/**
* build where clause for build_sql_for_n_pinyin().
*
* @pylist: the pinyin list.
* @fuzzy_flag: see dbus interface FuzzyFlag property.
* @n: number of Pinyin to use in pylist.
*
* returns: where_clause, caller should g_free() result after use.
*/
static char *
build_where_clause(GList *pylist,
const guint fuzzy_flag,
const guint n)
{
GString *s = NULL;
GList *iter = pylist;
Pinyin *thispy = NULL;
s = g_string_new(NULL);
/* allow append "AND something" without checking */
g_string_append_printf(s, "1=1 ");
for (guint i = 0; i < n; ++i) {
g_assert_nonnull(iter);
thispy = (Pinyin *) iter->data;
/* do not allow omit shengmu. always do strict match */
switch (thispy->shengmu_i) {
case PINYIN_ID_Z:
case PINYIN_ID_C:
case PINYIN_ID_S:
case PINYIN_ID_ZH:
case PINYIN_ID_CH:
case PINYIN_ID_SH:
if (fuzzy_flag & FUZZY_FLAG_ZCS_ZHCHSH) {
g_string_append_printf(
s,
"AND (s%u=%d OR s%u=%d) ",
i, thispy->shengmu_i,
i, get_fuzzy_pair(thispy->shengmu_i));
} else {
goto NO_FUZZY;
}
break;
case PINYIN_ID_L:
case PINYIN_ID_N:
if (fuzzy_flag & FUZZY_FLAG_L_N) {
g_string_append_printf(
s,
"AND (s%u=%d OR s%u=%d) ",
i, thispy->shengmu_i,
i, get_fuzzy_pair(thispy->shengmu_i));
} else {
goto NO_FUZZY;
}
break;
default:
NO_FUZZY:
g_string_append_printf(s, "AND s%u=%d ", i, thispy->shengmu_i);
}
/* allow omit yunmu, if 0 don't match on it */
if (thispy->yunmu_i) {
g_string_append_printf(s, "AND y%u=%d ", i, thispy->yunmu_i);
}
iter = iter->next;
}
gchar *result = s->str;
g_string_free(s, FALSE);
return result;
}
/**
* return a string like ", s0, y0, s1, y1 "
*
* caller should g_free() result after use.
*/
char *
build_s_y_fields(const guint n)
{
GString *s = NULL;
g_assert_cmpint(n, >=, 1);
s = g_string_new(NULL);
for (guint i = 0; i < n; ++i) {
g_string_append_printf(s, ", s%u, y%u", i, i);
}
s = g_string_append(s, " ");
gchar *result = s->str;
g_string_free(s, FALSE);
return result;
}
/**
* build a SQL to query candidates for first n pinyin in pylist.
* n can be from 1 to len(pylist).
*
* caller should free result with g_free() after use.
*/
static char *
build_sql_for_n_pinyin(GList *pylist,
const guint fuzzy_flag,
const guint n,
const guint limit)
{
/* always keep one space after current term */
GString *sql = NULL;
gchar *where_clause = NULL;
sql = g_string_new("SELECT MAX(user_freq) AS user_freq, "
"phrase, MAX(freq) AS freq");
gchar *s_y_fields = build_s_y_fields(n);
g_string_append_printf(sql, s_y_fields);
g_string_append_printf(sql, "FROM (");
g_string_append_printf(
sql, "SELECT 0 AS user_freq, phrase, freq");
g_string_append_printf(sql, s_y_fields);
g_string_append_printf(
sql, "FROM maindb.py_phrase_%u WHERE ", n - 1);
where_clause = build_where_clause(pylist, fuzzy_flag, n);
g_assert_nonnull(where_clause);
g_debug("where_clause=%s", where_clause);
sql = g_string_append(sql, where_clause);
sql = g_string_append(sql, "UNION ");
g_string_append_printf(
sql, "SELECT user_freq, phrase, freq");
g_string_append_printf(sql, s_y_fields);
g_string_append_printf(
sql, "FROM userdb.py_phrase_%u WHERE ", n - 1);
sql = g_string_append(sql, where_clause);
sql = g_string_append(
sql, ") "
"WHERE phrase NOT IN (SELECT phrase FROM userdb.not_phrase) "
"GROUP BY phrase "
"ORDER BY user_freq DESC, freq DESC ");
g_string_append_printf(sql, "LIMIT %u;", limit);
char *result = sql->str;
g_free(s_y_fields);
g_free(where_clause);
g_string_free(sql, FALSE);
return result;
}
/**
* For a candidate of length group_size, calculate the matched py length.
*
* This is part of get_candidates_for_n_pinyin().
*
* see param meaning there.
*/
static guint
get_matched_py_length(const char *preedit_str,
GList *pylist,
const guint group_size)
{
guint matched_py_length = 0;
GList *iter = pylist;
g_assert_cmpint(group_size, >=, 1);
/* For usual pinyin string, just add up the Pinyin length. But for
* pinyin that contains ', when a Pinyin in pylist is used, also take
* the ' before and after it. */
for (guint i = 0; i < group_size; ++i) {
while (preedit_str[matched_py_length] == '\'') {
matched_py_length++;
}
matched_py_length += ((Pinyin *) iter->data)->length;
while (preedit_str[matched_py_length] == '\'') {
matched_py_length++;
}
iter = iter->next;
}
return matched_py_length;
}
/**
* fetch candidates for a fixed word length.
*
* @db: sqlite3 db handler.
* @preedit_str: the pinyin preedit str. can contain '. This is needed to
* calculate matched_py_length.
* @pylist: the pinyin list.
* @fuzzy_flag: see dbus interface FuzzyFlag property.
* @group_size: the fixed word length. use this many pinyin from pinyin list.
* @limit: fetch this many result is enough for user. more is not a problem though.
* @candidates: the result candidate list. caller should free this after use.
*
* returns: how many candidates fetched.
*/
static guint
get_candidates_for_n_pinyin(sqlite3 *db,
const char *preedit_str,
GList *pylist,
const guint fuzzy_flag,
const guint group_size,
const guint limit,
GList **candidates)
{
const guint DEFAULT_LIMIT = 50;
GList *result = NULL; /* GList of Candidate */
g_assert_nonnull(db);
g_assert_cmpint(group_size, >=, 1);
g_assert_cmpint(group_size, <=, g_list_length(pylist));
gint candidates_count = 0;
gint r = 0;
/* build SQL and run SQL query */
char *sql = NULL;
sql = build_sql_for_n_pinyin(pylist, fuzzy_flag,
group_size, MAX(limit, DEFAULT_LIMIT));
g_debug("build_sql_for_n_pinyin result SQL:\n\n%s\n", sql);
guint matched_py_length = get_matched_py_length(preedit_str, pylist, group_size);
sqlite3_stmt *stmt = NULL;
const char *unused;
Candidate *c = NULL;
r = sqlite3_prepare_v2(db, sql, -1, &stmt, &unused);
g_assert_nonnull(unused);
g_assert_cmpstr(unused, ==, "");
if (strlen(unused)) {
g_warning("part of sql is unused \"%s\" length=%zu",
unused, strlen(unused));
}
g_free(sql);
while (TRUE) {
r = sqlite3_step(stmt);
if (r == SQLITE_DONE) {
break;
} else if (r == SQLITE_ROW) {
c = g_new0(Candidate, 1);
/* sql SELECT should select these columns in order */
c->user_freq = sqlite3_column_int(stmt, 0);
c->str = g_strdup((const char *) sqlite3_column_text(stmt, 1));
c->freq = sqlite3_column_int(stmt, 2);
c->matched_py_length = matched_py_length;
c->char_len = group_size;
c->py_indices = g_malloc0(sizeof(Pinyin *) * group_size);
for (guint i = 0; i < group_size; ++i) {
c->py_indices[i] = g_new0(Pinyin, 1);
c->py_indices[i]->shengmu_i = sqlite3_column_int(stmt, 3 + i * 2);
c->py_indices[i]->yunmu_i = sqlite3_column_int(stmt, 4 + i * 2);
/* we don't care about ->length field */
}
if (g_utf8_validate(c->str, -1, NULL)) {
result = g_list_prepend(result, c);
candidates_count++;
} else {
g_warning("ignore non utf8 phrase: %s", c->str);
}
} else if (r == SQLITE_BUSY) {
g_warning("sqlite3_step got SQLITE_BUSY");
break;
} else {
g_warning("sqlite3_step error: %d (%s)",
r, sqlite3_errmsg(db));
break;
}
}
r = sqlite3_finalize(stmt);
if (r != SQLITE_OK) {
g_debug("sqlite3_finalize error: %d (%s)", r, sqlite3_errmsg(db));
}
/* store query result in a new GList */
*candidates = g_list_reverse(result);
return candidates_count;
}
static void
add_candidate_to_builders(Candidate *c,
GVariantBuilder *candidates_builder,
GVariantBuilder *matched_lengths_builder,
GVariantBuilder *candidates_pinyin_indices)
{
g_variant_builder_add(candidates_builder, "s", c->str);
g_variant_builder_add(matched_lengths_builder, "u", c->matched_py_length);
GVariantBuilder *py_indices_builder = NULL;
py_indices_builder = g_variant_builder_new(G_VARIANT_TYPE("a(ii)"));
for (guint i = 0; i < c->char_len; ++i) {
g_variant_builder_add(
py_indices_builder, "(ii)",
c->py_indices[i]->shengmu_i,
c->py_indices[i]->yunmu_i);
g_debug("adding (ii) %d %d",
c->py_indices[i]->shengmu_i,
c->py_indices[i]->yunmu_i);
g_free(c->py_indices[i]);
}
g_debug("adding a(ii) to aa(ii)");
g_variant_builder_add(candidates_pinyin_indices, "a(ii)",
py_indices_builder);
g_variant_builder_unref(py_indices_builder);
g_free(c->str);
g_free(c->py_indices);
}
/**
* convert zero FuzzyFlag to libpyzy flag.
* I don't use libpyzy flag directly because it is overly complex.
*/
guint
to_pyzy_flag(const guint fuzzy_flag)
{
/* ue to ve is enabled by default. */
guint result = PINYIN_CORRECT_UE_TO_VE;
if (fuzzy_flag & FUZZY_FLAG_ZCS_ZHCHSH) {
result = result |
PINYIN_FUZZY_Z_ZH | PINYIN_FUZZY_ZH_Z |
PINYIN_FUZZY_C_CH | PINYIN_FUZZY_CH_C |
PINYIN_FUZZY_S_SH | PINYIN_FUZZY_SH_S;
}
if (fuzzy_flag & FUZZY_FLAG_L_N) {
result = result | PINYIN_FUZZY_L_N | PINYIN_FUZZY_N_L;
}
/* always enable incomplete pinyin support */
return result | PINYIN_INCOMPLETE_PINYIN;
}
void
get_candidates(sqlite3 *db,
const char *preedit_str,
const guint fetch_size,
const guint fuzzy_flag,
GVariantBuilder *candidates_builder,
GVariantBuilder *matched_lengths_builder,
GVariantBuilder *candidates_pinyin_indices)
{
if (! db) {
g_warning("No db connection, can't get candidates.");
return;
}
GList *pylist = NULL;
guint pylist_len = 0;
g_debug("fuzzy_flag=%u", fuzzy_flag);
pylist = parse_pinyin(preedit_str, 15, to_pyzy_flag(fuzzy_flag));
pylist_len = g_list_length(pylist);
guint group_size = pylist_len;
guint fetched_size = 0;
guint r = 0;
GList *candidates = NULL;
while (fetched_size < fetch_size && group_size > 0) {
g_info("phrase length=%u", group_size);
r = get_candidates_for_n_pinyin(db, preedit_str, pylist, fuzzy_flag, group_size, fetch_size - fetched_size, &candidates);
if (candidates) {
GList *iter = g_list_first(candidates);
Candidate *c = NULL;
while (iter != NULL) {
c = (Candidate *) iter->data;
add_candidate_to_builders(
c, candidates_builder,
matched_lengths_builder,
candidates_pinyin_indices);
iter = iter->next;
}
g_list_free_full(candidates, g_free);
}
g_info("%u candidates found", r);
fetched_size += r;
group_size--;
}
g_info("returning %u candidates", fetched_size);
g_list_free_full(pylist, g_free);
}
/**
* sub function for commit_candidate()
*/
static void
_update_userdb_py_phrase(sqlite3 *db,
const gchar *candidate,
GVariant *candidate_pinyin_indices,
guint len) /* utf8 length of candidate char */
{
GString *sql = NULL;
GVariantIter iter = {0};
GVariant *child = NULL;
gint x = 0;
gint y = 0;
guint count = 0;
char *s = NULL;
gboolean rb = FALSE;
g_assert_nonnull(db);
g_assert_nonnull(candidate);
g_assert_nonnull(candidate_pinyin_indices);
/* insert candidate maybe */
sql = g_string_new(NULL);
g_string_append_printf(sql, "INSERT OR IGNORE INTO userdb.py_phrase_%u (user_freq, phrase, freq", len - 1);
gchar *s_y_fields = build_s_y_fields(len);
sql = g_string_append(sql, s_y_fields);
g_free(s_y_fields);
s = sqlite3_mprintf(") VALUES (0, %Q, 0", candidate);
sql = g_string_append(sql, s);
sqlite3_free(s);
/* iter over GVariant "a(ii)" */
g_variant_iter_init(&iter, candidate_pinyin_indices);
count = 0;
while ((child = g_variant_iter_next_value(&iter))) {
g_variant_get(child, "(ii)", &x, &y);
g_string_append_printf(sql, ", %d, %d", x, y);
count++;
}
if (count != len) {
g_warning("candidate length=%u, a(ii) length=%u, mismatch!",
len, count);
g_string_free(sql, TRUE);
g_assert_not_reached();
return;
}
g_string_append_printf(sql, ");");
rb = sqlite3_exec_simple(db, sql->str);
if (! rb) {
g_warning("INSERT candidate to userdb failed");
} else {
if (sqlite3_changes(db) == 1) {
g_debug("candidate %s inserted to userdb", candidate);
}
}
g_string_free(sql, TRUE);
/* increment user_freq field for candidate */
sql = g_string_new(NULL);
g_string_append_printf(sql, "UPDATE userdb.py_phrase_%u "
"SET user_freq = user_freq + 1 ", len - 1);
s = sqlite3_mprintf("WHERE phrase = %Q ", candidate);
sql = g_string_append(sql, s);
sqlite3_free(s);
g_variant_iter_init(&iter, candidate_pinyin_indices);
count = 0;
while ((child = g_variant_iter_next_value(&iter))) {
g_variant_get(child, "(ii)", &x, &y);
g_string_append_printf(sql, "AND s%d=%d AND y%d=%d ",
count, x, count, y);
count++;
}
sql = g_string_append(sql, ";");
rb = sqlite3_exec_simple(db, sql->str);
if (! rb) {
g_warning("UPDATE candidate user_freq failed");
} else {
if (sqlite3_changes(db) == 1) {
g_info("candidate %s user_freq incremented", candidate);
} else {
g_warning("UPDATE candidate user_freq failed, no match");
}
}
g_string_free(sql, TRUE);
}
static void
_update_userdb_not_phrase(sqlite3 *db,
const gchar *candidate)
{
g_assert_nonnull(db);
g_assert_nonnull(candidate);
gboolean rb = FALSE;
char *sql = sqlite3_mprintf("DELETE FROM userdb.not_phrase WHERE phrase = %Q;", candidate);
rb = sqlite3_exec_simple(db, sql);
if (! rb) {
g_warning("DELETE candidate from not_phrase failed");
} else {
if (sqlite3_changes(db) == 1) {
g_debug("candidate %s removed from not_phrase", candidate);
}
}
sqlite3_free(sql);
}
void
commit_candidate(sqlite3 *db,
const gchar *candidate,
GVariant *candidate_pinyin_indices)
{
if (! db) {
g_warning("No db connection, can't commit candidates.");
return;
}
if (! candidate) {
g_warning("candidate should not be NULL. won't commit candidate.");
return;
}
if (! candidate_pinyin_indices) {
g_warning("candidate_pinyin_indices should not be NULL. won't commit candidate.");
return;
}
guint len = g_utf8_strlen(candidate, -1);
if (len <= 1) {
g_info("commit single character %s is a no-op", candidate);
return;
}
_update_userdb_py_phrase(db, candidate, candidate_pinyin_indices, len);
_update_userdb_not_phrase(db, candidate);
}
zero-pinyin-service-master/zero-pinyin-service.h 0000664 0000000 0000000 00000004211 14467107307 0022371 0 ustar 00root root 0000000 0000000 #ifndef _ZERO_PINYIN_SERVICE_H_
#define _ZERO_PINYIN_SERVICE_H_
#include
#include
#include
G_BEGIN_DECLS
#define ZERO_PINYIN_WELL_KNOWN_NAME "com.emacsos.zero.ZeroPinyinService1"
#define ZERO_PINYIN_OBJECT_PATH "/com/emacsos/zero/ZeroPinyinService1"
#define ZERO_PINYIN_INTERFACE_NAME "com.emacsos.zero.ZeroPinyinService1.ZeroPinyinServiceInterface"
static const guint FUZZY_FLAG_NONE = 0;
static const guint FUZZY_FLAG_ZCS_ZHCHSH = 1;
static const guint FUZZY_FLAG_L_N = 2;
/* Note: next flag should be 4, always use a new bit for flag. */
typedef struct {
gint shengmu_i;
gint yunmu_i;
guint length;
} Pinyin;
typedef struct {
gchar *str; /* the candidate string */
guint freq; /* word frequency [0, 65535] */
guint user_freq; /* user frequency [0, 65535] */
guint matched_py_length; /* matched preedit_str length */
guint char_len; /* candidate Chinese character length */
Pinyin **py_indices; /* Pinyin for each character in this candidate */
} Candidate;
/**
* an implementation of get_candidates() with simple test data.
*/
void get_candidates_test(const char *preedit_str,
const guint fetch_size,
GVariantBuilder *candidates_builder,
GVariantBuilder *matched_lengths_builder);
/**
* fetch candidates for preedit_str.
*
* @preedit_str the preedit_str
* @fetch_size try to fetch this many candidates if possible
* @candidates_builder candidates will be added to this builder
* @matched_lengths_builder matched preedit_str length will be added to this builder
*/
void get_candidates(sqlite3 *db,
const char *preedit_str,
const guint fetch_size,
const guint fuzzy_flag,
GVariantBuilder *candidates_builder,
GVariantBuilder *matched_lengths_builder,
GVariantBuilder *candidates_pinyin_indices);
/**
* commit candidate.
*
* This will save candidate in user db so it's available in the future.
* It also update user_freq for given candidate.
*/
void commit_candidate(sqlite3 *db,
const gchar *candidate,
GVariant *candidate_pinyin_indices);
/* for test only */
char *build_s_y_fields(const guint n);
G_END_DECLS
#endif /* _ZERO_PINYIN_SERVICE_H_ */