Skip to content
Database.cc 24.3 KiB
Newer Older
/* vim:set et ts=4 sts=4:
 *
 * libpyzy - The Chinese PinYin and Bopomofo conversion library.
 * Copyright (c) 2019 Yuanle Song <sylecn@gmail.com>
 * Copyright (c) 2008-2010 Peng Huang <shawn.p.huang@gmail.com>
 *
Hiroshi Sumita's avatar
Hiroshi Sumita committed
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
Hiroshi Sumita's avatar
Hiroshi Sumita committed
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
Hiroshi Sumita's avatar
Hiroshi Sumita committed
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
#include "Database.h"

#include <glib.h>
#include <glib/gstdio.h>
#include <sqlite3.h>

#include "Config.h"
#include "PinyinArray.h"
#include "Util.h"

namespace PyZy {
#define DB_CACHE_SIZE       "5000"
#define DB_INDEX_SIZE       (3)
// define columns
#define DB_COLUMN_USER_FREQ (0)
#define DB_COLUMN_PHRASE    (1)
#define DB_COLUMN_FREQ      (2)
#define DB_COLUMN_S0        (3)

#define DB_PREFETCH_LEN     (6)
// in seconds
#define DB_BACKUP_TIMEOUT   (60)

#define USER_DICTIONARY_FILE  "user-1.0.db"
#define SQLITE3_MEMORY_DB ":memory:"
std::unique_ptr<Database> Database::m_instance;

class Conditions : public std::vector<std::string> {
public:
    Conditions (void) : std::vector<std::string> (1) {}

    void double_ (void) {
        size_t i = size ();
        // To avoid a invalid referece caused by a memory reallocation, which
        // may occur on push_back, we call reserve here.
        reserve (i * 2);
        do {
            push_back (at (--i));
        } while (i > 0);
    }

    void triple (void) {
        size_t i = size ();
        // To avoid a invalid referece caused by a memory reallocation, which
        // may occur on push_back, we call reserve here.
        reserve (i * 3);
            const std::string & value = at (--i);
            push_back (value);
            push_back (value);
        } while (i > 0);
    }

    void appendVPrintf (size_t begin, size_t end, const char *fmt, va_list args) {
        char str[64];
        g_vsnprintf (str, sizeof(str), fmt, args);
        for (size_t i = begin; i < end; i++) {
    void appendPrintf (size_t begin, size_t end, const char *fmt, ...) {
        va_list args;
        va_start (args, fmt);
        appendVPrintf (begin, end, fmt, args);
        va_end (args);
    }
};

class SQLStmt {
public:
    SQLStmt (sqlite3 *db)
        : m_db (db), m_stmt (NULL) {
        g_assert (m_db != NULL);
    }

    ~SQLStmt () {
        if (m_stmt != NULL) {
            if (sqlite3_finalize (m_stmt) != SQLITE_OK) {
                g_warning ("delete prepared statement failed, "
			   "error code: %d (%s)",
			   sqlite3_errcode (m_db), sqlite3_errmsg (m_db));
    /**
     * return true on success, false otherwise.
     */
    bool prepare (const String &sql) {
	int r = sqlite3_prepare (
		m_db, sql.c_str (), sql.size (), &m_stmt, NULL);
        if (r != SQLITE_OK) {
            g_warning ("create prepare statement for sql %s failed: %d (%s)",
		       sql.c_str (), r, sqlite3_errmsg (m_db));
    /**
     * evaluate prepared statement.
     * 
     * Returns: true if another row is available.
     * false if no rows available or there is an error.
     */
        switch (sqlite3_step (m_stmt)) {
        case SQLITE_ROW:
        case SQLITE_DONE:
            g_warning ("sqlite step failed: %d (%s)",
		       sqlite3_errcode (m_db), sqlite3_errmsg (m_db));
    const char *columnText (int col) {
        return (const char *) sqlite3_column_text (m_stmt, col);
    int columnInt (int col) {
        return sqlite3_column_int (m_stmt, col);
    }

private:
    sqlite3 *m_db;
    sqlite3_stmt *m_stmt;
};

Query::Query (const PinyinArray    & pinyin,
              size_t                 pinyin_begin,
              size_t                 pinyin_len,
              unsigned int           option)
    : m_pinyin (pinyin),
      m_pinyin_begin (pinyin_begin),
      m_pinyin_len (pinyin_len),
      m_option (option)
{
	g_assert (m_pinyin.size () >= pinyin_begin + pinyin_len);
/**
 * query db to find phrases for pinyin.
 *
 * @phrases: put query result in this PhraseArray.
 * @count: query at most this many phrases.
 *
 * Returns: how many phrases fetched from db. 
 */
int
Query::fill (PhraseArray &phrases, int count)

    while (m_pinyin_len > 0) {
        if (G_LIKELY (m_stmt.get () == NULL)) {
            m_stmt = Database::instance ().query (
		    m_pinyin, m_pinyin_begin, m_pinyin_len, -1, m_option);
	    if (m_stmt == nullptr) {
		    return 0;
	    }
        }

        while (m_stmt->step ()) {
            Phrase phrase;

            g_strlcpy (phrase.phrase,
                       m_stmt->columnText (DB_COLUMN_PHRASE),
                       sizeof (phrase.phrase));
            phrase.freq = m_stmt->columnInt (DB_COLUMN_FREQ);
            phrase.user_freq = m_stmt->columnInt (DB_COLUMN_USER_FREQ);
            phrase.len = m_pinyin_len;

            for (size_t i = 0, column = DB_COLUMN_S0; i < m_pinyin_len; i++) {
                phrase.pinyin_id[i].sheng = m_stmt->columnInt (column++);
                phrase.pinyin_id[i].yun = m_stmt->columnInt (column++);
            }

            phrases.push_back (phrase);
            row ++;
            if (G_UNLIKELY (row == count)) {
                return row;
            }
        }

        m_stmt.reset ();
        m_pinyin_len --;
    }

    return row;
}

 * attach db to dest handler as schema_name.
 *
 * Returns: true on success, false otherwise.
 */
static bool
attach_db (sqlite3* dest, String db_file_to_attach, const gchar* schema_name)
	g_message ("attaching db %s as %s",
		   db_file_to_attach.c_str (), schema_name);
	char* sql = sqlite3_mprintf ("ATTACH DATABASE %Q AS %Q;",
				     db_file_to_attach.c_str (),
				     schema_name);
	gboolean result = sqlite3_exec_simple (dest, sql);
	sqlite3_free (sql);
	return result;
 * return TRUE if file exists
static gboolean
file_exists (const char* filename) {
	return g_file_test (filename, G_FILE_TEST_EXISTS);
 * return the first existing file in given file list.
 * return "" if none of the file exists.
static String
first_existing_file (const std::vector<String> &files)
	for (const String &fn : files) {
		if (file_exists (fn.c_str ())) {
			return fn;
		}
	}
	return "";
 * Returns: a main db file name if one exists.
 * an empty string otherwise.
 * This function will look for file at these path:
 *   m_user_data_dir + "/main.db"
 *   {PKGDATADIR}/db/local.db
 *   {PKGDATADIR}/db/open-phrase.db
 *   {PKGDATADIR}/db/android.db
 * In ibus-pinyin context those are:
 *   ~/.cache/ibus/pinyin/main.db
 *   /usr/share/pyzy/db/local.db
 *   /usr/share/pyzy/db/open-phrase.db
 *   /usr/share/pyzy/db/android.db
String
Database::getMainDBFile (void)
	std::vector<String> files;
	files.push_back (m_user_data_dir + "/main.db");
	files.push_back (PKGDATADIR"/db/local.db");
	files.push_back (PKGDATADIR"/db/open-phrase.db");
	files.push_back (PKGDATADIR"/db/android.db");
	return first_existing_file (files);
 * set pragma on "maindb" and "userdb" on given sqlite3 db handler.
 *
 * see document at
 * https://www.sqlite.org/pragma.html
 * 
 * Returns: true on success, false otherwise.
static bool
db_set_pragma (sqlite3* db)
{
	g_debug ("setting pragma on db");
	const char* sql =
		"PRAGMA maindb.temp_store=MEMORY;"
		"PRAGMA userdb.temp_store=MEMORY;";
	char* errmsg = NULL;
	int r = sqlite3_exec (db, sql, NULL, NULL, &errmsg);
	sqlite3_free (errmsg);
	return r == SQLITE_OK;
 * Create user db file and init the user db with tables and indices.
 * 
 * Returns: TRUE on success, FALSE otherwise.
gboolean
Database::createUserDBFile ()
	g_return_val_if_fail (! m_user_data_dir.empty (), FALSE);
	g_return_val_if_fail (! m_user_db_file.empty (), FALSE);

	g_debug ("ensure dir exists:%s", m_user_data_dir.c_str ());
	gint r = g_mkdir_with_parents (m_user_data_dir, 0750);
	if (r != 0) {
		g_warning ("create dir %s failed: %d (%s)",
			   m_user_data_dir.c_str (), r, g_strerror (r));
	sqlite3 *userdb = NULL;
	g_message ("creating user db at %s", m_user_db_file.c_str ());
	r = sqlite3_open (m_user_db_file, &userdb);
        if (r != SQLITE_OK) {
		sqlite3_close (userdb);
		return FALSE;
	bool result = initUserDB (userdb, "main");
	sqlite3_close (userdb);
	return result;
 * init m_db handler, make main db accessible at "maindb" schema,
 * make user db accessible at "userdb" schema.
 *
 * This also sets m_main_db_file, m_user_db_file to correct file name.
 *
 * Returns: true on success, false otherwise.
 */
bool
Database::initDB ()
	g_assert_null (m_db);
	if (m_db)
		return TRUE;

	sqlite3* db = NULL;
	int r = 0;
	r = sqlite3_open (SQLITE3_MEMORY_DB, &db);
	if (r) {
		g_warning ("sqlite3 open %s db failed", SQLITE3_MEMORY_DB);
		goto fail;
	m_main_db_file = getMainDBFile();
	if (m_main_db_file.empty()) {
		g_warning ("failed to find a main db file");
		goto fail;
	}
	g_message ("found main db file at %s", m_main_db_file.c_str ());
	r = attach_db (db, m_main_db_file, "maindb");
	if (!r) {
		g_warning ("attach main db file %s failed",
			   m_main_db_file.c_str ());
		goto fail;
	}
	m_user_db_file = getUserDBFile();
	g_assert (! m_user_db_file.empty());
	if (! file_exists (m_user_db_file.c_str ())) {
		r = createUserDBFile ();
		if (!r) {
			g_warning ("create user db file failed");
			// use a memory db for user db.
			// populate tables after attach.
			m_user_db_file = SQLITE3_MEMORY_DB;
		}
	} else {
		g_message ("found user db file at %s", m_user_db_file.c_str ());
	}
	r = attach_db (db, m_user_db_file, "userdb");
	if (!r) {
		g_warning ("attach user db file %s failed",
			   m_user_db_file.c_str ());
		goto fail;
	}
	if (m_user_db_file == SQLITE3_MEMORY_DB) {
		bool r = initUserDB (db, "userdb");
		if (!r) {
			g_warning ("init in RAM user db failed");
			g_assert_not_reached ();
			goto fail;
	}
	r = db_set_pragma (db);
	if (!r) {
		g_warning ("set pragma on db failed");
		// can continue
	}
	g_assert_nonnull (db);
	m_db = db;
	return true;
fail:
	sqlite3_close (db);
	g_warning ("initDB() failed, query won't work");
String
Database::getUserDBFile (void)
	return m_user_data_dir + G_DIR_SEPARATOR_S + USER_DICTIONARY_FILE;
}
Database::Database (const std::string &user_data_dir)
    : m_db (NULL)
    , m_user_data_dir (user_data_dir)
{
	initDB ();		// init m_db
}

Database::~Database (void)
{
	int r = sqlite3_close (m_db);
	if (r != SQLITE_OK) {
		g_warning ("close sqlite database failed: %d (%s)",
			   r, sqlite3_errmsg (m_db));
	}
	m_db = NULL;
 * initialize user db.
 * create tables, index and populate data into desc table.
 * Returns: true on success, false otherwise.
bool
Database::initUserDB (sqlite3 *userdb, const char* schema)
	String sql;
	sql = "BEGIN TRANSACTION;\n";
	// create desc table
	sql << "CREATE TABLE IF NOT EXISTS " << schema << ".desc (name PRIMARY KEY, value TEXT);\n";
	sql << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('version', '1.2.0');\n"
	    << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('uuid', '" << UUID () << "');\n"
	    << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('hostname', '" << Hostname () << "');\n"
	    << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('username', '" << Env ("USERNAME") << "');\n"
	    << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('create-time', datetime());\n"
	    << "INSERT OR IGNORE INTO " << schema << ".desc VALUES ('attach-time', datetime());\n";

	/* create phrase tables */
	for (size_t i = 0; i < MAX_PHRASE_LEN; i++) {
		sql.appendPrintf ("CREATE TABLE IF NOT EXISTS %s.py_phrase_%d (user_freq, phrase TEXT, freq INTEGER", schema, i);
		for (size_t j = 0; j <= i; j++)
			sql.appendPrintf (", s%d INTEGER, y%d INTEGER", j, j);
		sql << ");\n";

	/* create index */
	sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << schema << ".index_0_0 ON py_phrase_0(s0,y0,phrase);\n";
	sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << schema << ".index_1_0 ON py_phrase_1(s0,y0,s1,y1,phrase);\n";
	sql << "CREATE INDEX IF NOT EXISTS " << schema << ".index_1_1 ON py_phrase_1(s0,s1,y1);\n";
	for (size_t i = 2; i < MAX_PHRASE_LEN; i++) {
		sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << schema << ".index_" << i << "_0 ON py_phrase_" << i
		      << "(s0,y0";
		for (size_t j = 1; j <= i; j++)
			sql << ",s" << j << ",y" << j;
		sql << ",phrase);\n";
		sql << "CREATE INDEX IF NOT EXISTS " << schema << ".index_" << i << "_1 ON py_phrase_" << i << "(s0,s1,s2,y2);\n";
	}
	sql << "COMMIT;";

	return sqlite3_exec_simple (userdb, sql.c_str ());
/**
 * this method is called whenever user db is modified.
 */
void
Database::modified (void)
{
}

/**
 * ?
 * 
 * @option: fuzzy pinyin and auto correction flags.
 * @id: ?
 * @fid: ?
 */
static inline bool
pinyin_option_check_sheng (guint option, guint id, guint fid)
{
    switch ((id << 16) | fid) {
    case (PINYIN_ID_C << 16) | PINYIN_ID_CH:
        return (option & PINYIN_FUZZY_C_CH);
    case (PINYIN_ID_CH << 16) | PINYIN_ID_C:
        return (option & PINYIN_FUZZY_CH_C);
    case (PINYIN_ID_Z << 16) | PINYIN_ID_ZH:
        return (option & PINYIN_FUZZY_Z_ZH);
    case (PINYIN_ID_ZH << 16) | PINYIN_ID_Z:
        return (option & PINYIN_FUZZY_ZH_Z);
    case (PINYIN_ID_S << 16) | PINYIN_ID_SH:
        return (option & PINYIN_FUZZY_S_SH);
    case (PINYIN_ID_SH << 16) | PINYIN_ID_S:
        return (option & PINYIN_FUZZY_SH_S);
    case (PINYIN_ID_L << 16) | PINYIN_ID_N:
        return (option & PINYIN_FUZZY_L_N);
    case (PINYIN_ID_N << 16) | PINYIN_ID_L:
        return (option & PINYIN_FUZZY_N_L);
    case (PINYIN_ID_F << 16) | PINYIN_ID_H:
        return (option & PINYIN_FUZZY_F_H);
    case (PINYIN_ID_H << 16) | PINYIN_ID_F:
        return (option & PINYIN_FUZZY_H_F);
    case (PINYIN_ID_L << 16) | PINYIN_ID_R:
        return (option & PINYIN_FUZZY_L_R);
    case (PINYIN_ID_R << 16) | PINYIN_ID_L:
        return (option & PINYIN_FUZZY_R_L);
    case (PINYIN_ID_K << 16) | PINYIN_ID_G:
        return (option & PINYIN_FUZZY_K_G);
    case (PINYIN_ID_G << 16) | PINYIN_ID_K:
        return (option & PINYIN_FUZZY_G_K);
    default: return false;
/**
 * ?
 * 
 * @option: fuzzy pinyin and auto correction flags.
 * @id: ?
 * @fid: ?
 */
static inline bool
pinyin_option_check_yun (guint option, guint id, guint fid)
{
    switch ((id << 16) | fid) {
    case (PINYIN_ID_AN << 16) | PINYIN_ID_ANG:
        return (option & PINYIN_FUZZY_AN_ANG);
    case (PINYIN_ID_ANG << 16) | PINYIN_ID_AN:
        return (option & PINYIN_FUZZY_ANG_AN);
    case (PINYIN_ID_EN << 16) | PINYIN_ID_ENG:
        return (option & PINYIN_FUZZY_EN_ENG);
    case (PINYIN_ID_ENG << 16) | PINYIN_ID_EN:
        return (option & PINYIN_FUZZY_ENG_EN);
    case (PINYIN_ID_IN << 16) | PINYIN_ID_ING:
        return (option & PINYIN_FUZZY_IN_ING);
    case (PINYIN_ID_ING << 16) | PINYIN_ID_IN:
        return (option & PINYIN_FUZZY_ING_IN);
    case (PINYIN_ID_IAN << 16) | PINYIN_ID_IANG:
        return (option & PINYIN_FUZZY_IAN_IANG);
    case (PINYIN_ID_IANG << 16) | PINYIN_ID_IAN:
        return (option & PINYIN_FUZZY_IANG_IAN);
    case (PINYIN_ID_UAN << 16) | PINYIN_ID_UANG:
        return (option & PINYIN_FUZZY_UAN_UANG);
    case (PINYIN_ID_UANG << 16) | PINYIN_ID_UAN:
        return (option & PINYIN_FUZZY_UANG_UAN);
    default: return false;
/**
 * create a SQLStmt that is prepared to query candidates for given pinyin.
 *
 * @pinyin: the pinyin to query for
 * @pinyin_begin: ?
 * @pinyin_len: ?
 * @m: ?
 * @option: whether to enable fuzzy pinyin and auto correction when finding
 * matches.
 *
 * Returns: a shared_ptr<SQLStmt> on success. or a reset shared_ptr<SQLStmt>
 * on failure.
 */
SQLStmtPtr
Database::query (const PinyinArray &pinyin,
                 size_t             pinyin_begin,
                 size_t             pinyin_len,
                 int                m,
                 guint              option)
    if (! m_db) {
        g_warning ("Error: can't query db "
		   "because db init failed");
        return nullptr;
    }

    g_assert (pinyin_begin < pinyin.size ());
    g_assert (pinyin_len <= pinyin.size () - pinyin_begin);
    g_assert (pinyin_len <= MAX_PHRASE_LEN);

    /* prepare sql */
    Conditions conditions;

    for (size_t i = 0; i < pinyin_len; i++) {
        const Pinyin *p;
        p = pinyin[i + pinyin_begin];

        fs1 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
        fs2 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);

        if (G_LIKELY (i > 0))
            conditions.appendPrintf (0, conditions.size (), " AND ");

        if (G_UNLIKELY (fs1 || fs2)) {
            if (G_LIKELY (i < DB_INDEX_SIZE)) {
                if (fs1 && fs2 == 0) {
                    conditions.double_ ();
                    conditions.appendPrintf (0, conditions.size ()  >> 1,
                                               "s%d=%d", i, p->pinyin_id[0].sheng);
                    conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
                                               "s%d=%d", i, p->pinyin_id[1].sheng);
                }
                else if (fs1 == 0 && fs2) {
                    conditions.double_ ();
                    conditions.appendPrintf (0, conditions.size ()  >> 1,
                                               "s%d=%d", i, p->pinyin_id[0].sheng);
                    conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
                                               "s%d=%d", i, p->pinyin_id[2].sheng);
                }
                else {
                    size_t len = conditions.size ();
                    conditions.triple ();
                    conditions.appendPrintf (0, len,
                                               "s%d=%d", i, p->pinyin_id[0].sheng);
                    conditions.appendPrintf (len, len << 1,
                                               "s%d=%d", i, p->pinyin_id[1].sheng);
                    conditions.appendPrintf (len << 1, conditions.size (),
                                               "s%d=%d", i, p->pinyin_id[2].sheng);
                }
            }
            else {
                if (fs1 && fs2 == 0) {
                    conditions.appendPrintf (0, conditions.size (),
                                               "s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
                }
                else if (fs1 == 0 && fs2) {
                    conditions.appendPrintf (0, conditions.size (),
                                               "s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);
                }
                else {
                    conditions.appendPrintf (0, conditions.size (),
                                               "s%d IN (%d,%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng, p->pinyin_id[2].sheng);
                }
            }
        }
        else {
            conditions.appendPrintf (0, conditions.size (),
                                       "s%d=%d", i, p->pinyin_id[0].sheng);
        }

        if (p->pinyin_id[0].yun != PINYIN_ID_ZERO) {
            if (pinyin_option_check_yun (option, p->pinyin_id[0].yun, p->pinyin_id[1].yun)) {
                if (G_LIKELY (i < DB_INDEX_SIZE)) {
                    conditions.double_ ();
                    conditions.appendPrintf (0, conditions.size ()  >> 1,
                                               " AND y%d=%d", i, p->pinyin_id[0].yun);
                    conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
                                               " and y%d=%d", i, p->pinyin_id[1].yun);
                }
                else {
                    conditions.appendPrintf (0, conditions.size (),
                                               " AND y%d IN (%d,%d)", i, p->pinyin_id[0].yun, p->pinyin_id[1].yun);
                }
            }
            else {
                conditions.appendPrintf (0, conditions.size (),
                                           " AND y%d=%d", i, p->pinyin_id[0].yun);
            }
        }
    }

    String sql_condition;
    sql_condition.clear ();
    for (size_t i = 0; i < conditions.size (); i++) {
	    if (G_UNLIKELY (i == 0))
		    sql_condition << " (" << conditions[i] << ")\n";
	    else
		    sql_condition << " OR (" << conditions[i] << ")\n";
    int id = pinyin_len - 1;
    sql << "SELECT * FROM ("
                "SELECT 0 AS user_freq, * FROM maindb.py_phrase_" << id << " WHERE " << sql_condition << " UNION ALL "
                "SELECT * FROM userdb.py_phrase_" << id << " WHERE " << sql_condition << ") "
                    "GROUP BY phrase ORDER BY user_freq DESC, freq DESC ";
    if (m > 0)
        sql << "LIMIT " << m;

    g_debug ("sql=\n%s", sql.c_str ());

    /* query database */
    SQLStmtPtr stmt (new SQLStmt (m_db));

    if (!stmt->prepare (sql)) {
	    stmt.reset ();
void
Database::phraseWhereSql (const Phrase &p, String &sql)
	// escape and add single quote on both sides
	char* phrase = sqlite3_mprintf("%Q", p.phrase);
	sql << " WHERE";
	sql << " s0=" << p.pinyin_id[0].sheng
	    << " AND y0=" << p.pinyin_id[0].yun;
	for (size_t i = 1; i < p.len; i++) {
		sql << " AND s" << i << '=' << p.pinyin_id[i].sheng
		    << " AND y" << i << '=' << p.pinyin_id[i].yun;
	}
	sql << " AND phrase=" << phrase;
	sqlite3_free (phrase);
void
Database::phraseSql (const Phrase &p, String &sql)
	// escape and add single quote on both sides
	char* phrase = sqlite3_mprintf("%Q", p.phrase);
	sql << "INSERT OR IGNORE INTO userdb.py_phrase_" << p.len - 1
	    << " VALUES(" << 0                  /* user_freq */
	    << "," << phrase                    /* phrase */
	    << ','   << p.freq;                 /* freq */
	sqlite3_free (phrase);
	for (size_t i = 0; i < p.len; i++) {
		sql << ',' << p.pinyin_id[i].sheng << ',' << p.pinyin_id[i].yun;
	}
	sql << ");\n";

	sql << "UPDATE userdb.py_phrase_" << p.len - 1
	    << " SET user_freq=MIN(user_freq+1, 65535)";

	phraseWhereSql (p, sql);
	sql << ";\n";
/**
 * insert phrases to userdb and increment user_freq field for each phrase.
 */
Database::commit (const PhraseArray &phrases)
	if (! m_db) {
		g_warning ("Error: can't commit new phrase "
			   "because db init failed");
		return;
	}
	Phrase phrase = {""};
	String sql = "BEGIN TRANSACTION;\n";
	for (size_t i = 0; i < phrases.size (); i++) {
		phrase += phrases[i];
		phraseSql (phrases[i], sql);
	}
	if (phrases.size () > 1)
		phraseSql (phrase, sql);
	sql << "COMMIT;\n";

	gboolean r = sqlite3_exec_simple (m_db, sql);
	if (r) {
		modified ();
	} else {
		g_warning ("insert phrases to (or update freq for) "
			   "userdb failed");
	}
/**
 * remove phrase from userdb.
 */
void
Database::remove (const Phrase & phrase)
{
	if (! m_db) {
		g_warning ("Error: can't remove phrase "
			   "because db init failed");
		return;
	}
	String sql;
	sql << "BEGIN;\n"
		"DELETE FROM userdb.py_phrase_" << phrase.len - 1;
	phraseWhereSql (phrase, sql);
	sql << ";\n"
		"COMMIT;\n";
	gboolean r = sqlite3_exec_simple (m_db, sql);
	if (r) {
		modified ();
	} else {
		g_warning ("remove phrase %s from userdb failed", phrase.phrase);
	}
/**
 * create Database singleton instance at Database::m_instance.
 *
 * upstream should check the return value for error. if false is returned,
 * upstream app should not try to access the db.
 *
 * Returns: true on success, false otherwise.
 */
bool
Database::init (const std::string & user_data_dir)
{
	if (m_instance.get () == NULL) {
		m_instance.reset (new Database (user_data_dir));
	}
	if (! m_instance->m_db) {
		// try initDB() again if previous call in Database constructor
		// failed.
		return m_instance->initDB ();
	}
	return true;
}

void
Database::finalize (void)
{
    m_instance.reset (NULL);
}

};  // namespace PyZy