diff --git a/src/Database.cc b/src/Database.cc index 9a8930aa4d58d0b0fa24c1a8f8eed4f058d01f46..7e09b9cfebea0655e6175d3ff5aee79d2391fe00 100644 --- a/src/Database.cc +++ b/src/Database.cc @@ -2,6 +2,7 @@ * * libpyzy - The Chinese PinYin and Bopomofo conversion library. * + * Copyright (c) 2019 Yuanle Song * Copyright (c) 2008-2010 Peng Huang * * This library is free software; you can redistribute it and/or @@ -29,18 +30,17 @@ #include "PinyinArray.h" #include "Util.h" - namespace PyZy { - #define DB_CACHE_SIZE "5000" #define DB_INDEX_SIZE (3) -/* define columns */ +// define columns #define DB_COLUMN_USER_FREQ (0) #define DB_COLUMN_PHRASE (1) #define DB_COLUMN_FREQ (2) #define DB_COLUMN_S0 (3) #define DB_PREFETCH_LEN (6) +// in seconds #define DB_BACKUP_TIMEOUT (60) #define USER_DICTIONARY_FILE "user-1.0.db" @@ -100,24 +100,33 @@ public: ~SQLStmt () { if (m_stmt != NULL) { if (sqlite3_finalize (m_stmt) != SQLITE_OK) { - g_warning ("destroy sqlite stmt failed!"); + g_warning ("delete prepared statement failed, " + "error code: %d (%s)", + sqlite3_errcode (m_db), sqlite3_errmsg (m_db)); } } } + /** + * return true on success, false otherwise. + */ bool prepare (const String &sql) { - if (sqlite3_prepare (m_db, - sql.c_str (), - sql.size (), - &m_stmt, - NULL) != SQLITE_OK) { - g_warning ("parse sql failed!\n %s", sql.c_str ()); + int r = sqlite3_prepare ( + m_db, sql.c_str (), sql.size (), &m_stmt, NULL); + if (r != SQLITE_OK) { + g_warning ("create prepare statement for sql %s failed: %d (%s)", + sql.c_str (), r, sqlite3_errmsg (m_db)); return false; } - return true; } + /** + * evaluate prepared statement. + * + * Returns: true if another row is available. + * false if no rows available or there is an error. + */ bool step (void) { switch (sqlite3_step (m_stmt)) { case SQLITE_ROW: @@ -125,7 +134,8 @@ public: case SQLITE_DONE: return false; default: - g_warning ("sqlites step error!"); + g_warning ("sqlite step failed: %d (%s)", + sqlite3_errcode (m_db), sqlite3_errmsg (m_db)); return false; } } @@ -166,7 +176,8 @@ Query::fill (PhraseArray &phrases, int count) while (m_pinyin_len > 0) { if (G_LIKELY (m_stmt.get () == NULL)) { - m_stmt = Database::instance ().query (m_pinyin, m_pinyin_begin, m_pinyin_len, -1, m_option); + m_stmt = Database::instance ().query ( + m_pinyin, m_pinyin_begin, m_pinyin_len, -1, m_option); g_assert (m_stmt.get () != NULL); } @@ -205,38 +216,93 @@ Database::Database (const std::string &user_data_dir) , m_timer (g_timer_new ()) , m_user_data_dir (user_data_dir) { - open (); + m_user_db_file.clear (); + m_user_db_file << m_user_data_dir + << G_DIR_SEPARATOR_S + << USER_DICTIONARY_FILE; + bool r = open (); + if (! r) { + g_error ("open main db failed"); + } } Database::~Database (void) { - g_timer_destroy (m_timer); - if (m_timeout_id != 0) { - saveUserDB (); - g_source_remove (m_timeout_id); - } - if (m_db) { - if (sqlite3_close (m_db) != SQLITE_OK) { - g_warning ("close sqlite database failed!"); - } - } + g_timer_destroy (m_timer); + if (m_timeout_id != 0) { + bool r = saveUserDB (); + if (! r) { + g_warning ("save user db failed"); + } + gboolean r1 = g_source_remove (m_timeout_id); + if (! r1) { + g_warning ("remove timeout source failed, " + "source id is %d", m_timeout_id); + } + m_timeout_id = 0; + } + if (m_db) { + if (sqlite3_close (m_db) != SQLITE_OK) { + g_warning ("close sqlite database failed: %d (%s)", + sqlite3_errcode (m_db), sqlite3_errmsg (m_db)); + } + m_db = NULL; + } } -inline bool +bool Database::executeSQL (const char *sql, sqlite3 *db) { - if (db == NULL) - db = m_db; - + if (! db) + db = m_db; + if (! db) { + g_warning ("trying to execute sql %s on db handler NULL", sql); + g_assert_not_reached (); + return false; + } char *errmsg = NULL; if (sqlite3_exec (db, sql, NULL, NULL, &errmsg) != SQLITE_OK) { - g_warning ("%s: %s", errmsg, sql); + g_warning ("execute sql failed: sql=%s error=%s", sql, errmsg); sqlite3_free (errmsg); return false; } return true; } +/** + * set sqlite3 pragma on main db to improve performance. + * + * Returns: true on success, false otherwise. + */ +bool +Database::setPragmaOnMainDB (void) +{ + m_sql.clear (); + + // see https://www.sqlite.org/pragma.html#pragma_synchronous + m_sql << "PRAGMA synchronous=OFF;\n"; + + /* Set the cache size for better performance */ + m_sql << "PRAGMA cache_size=" DB_CACHE_SIZE ";\n"; + + /* Using memory for temp store */ + // m_sql << "PRAGMA temp_store=MEMORY;\n"; + + /* Set journal mode */ + // m_sql << "PRAGMA journal_mode=PERSIST;\n"; + + /* Using EXCLUSIVE locking mode on databases + * for better performance */ + m_sql << "PRAGMA locking_mode=EXCLUSIVE;\n"; + + return executeSQL (m_sql); +} + +/** + * try to open a main database. such as open-phrase.db. + * + * Returns: true on success, false otherwise. + */ bool Database::open (void) { @@ -253,61 +319,33 @@ Database::open (void) size_t i; for (i = 0; i < G_N_ELEMENTS (maindb); i++) { - if (!g_file_test(maindb[i], G_FILE_TEST_IS_REGULAR)) - continue; - if (sqlite3_open_v2 (maindb[i], &m_db, - SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) == SQLITE_OK) { - break; + g_debug ("trying to load main db at %s", maindb[i]); + if (sqlite3_open_v2 (maindb[i], &m_db, SQLITE_OPEN_READWRITE, NULL) == SQLITE_OK) { + g_message ("loading main db at %s", maindb[i]); + break; } } - if (i == G_N_ELEMENTS (maindb)) { - g_warning ("can not open main database"); - break; + g_warning ("Failed to load any known main database"); + break; } - m_sql.clear (); - - /* Set synchronous=OFF, write user database will become much faster. - * It will cause user database corrupted, if the operatering system - * crashes or computer loses power. - * */ - m_sql << "PRAGMA synchronous=OFF;\n"; - - /* Set the cache size for better performance */ - m_sql << "PRAGMA cache_size=" DB_CACHE_SIZE ";\n"; - - /* Using memory for temp store */ - // m_sql << "PRAGMA temp_store=MEMORY;\n"; + int r = 0; + r = setPragmaOnMainDB (); + if (! r) { + g_warning ("execute sqlite PRAGMA statements failed"); + break; + } - /* Set journal mode */ - // m_sql << "PRAGMA journal_mode=PERSIST;\n"; - - /* Using EXCLUSIVE locking mode on databases - * for better performance */ - m_sql << "PRAGMA locking_mode=EXCLUSIVE;\n"; - if (!executeSQL (m_sql)) - break; - - loadUserDB (); -#if 0 - /* Attach user database */ - - g_mkdir_with_parents (m_user_data_dir, 0750); - m_buffer.clear (); - m_buffer << m_user_data_dir << G_DIR_SEPARATOR_S << USER_DICTIONARY_FILE; - - retval = openUserDB (m_buffer); - if (!retval) { - g_warning ("Can not open user database %s", m_buffer.c_str ()); - if (!openUserDB (":memory:")) - goto _failed; - } -#endif + r = loadUserDB (); + if (! r) { + g_warning ("load user db failed"); + break; + } /* prefetch some tables */ // prefetch (); - + g_assert_nonnull (m_db); return true; } while (0); @@ -318,41 +356,31 @@ Database::open (void) return false; } +/** + * initialize user db. + * create tables, index and populate data into desc table. + * + * Returns: true on success, false otherwise. + */ bool -Database::loadUserDB (void) +Database::initUserDB (sqlite3 *userdb) { - sqlite3 *userdb = NULL; - do { - /* Attach user database */ - m_sql.printf ("ATTACH DATABASE \":memory:\" AS userdb;"); - if (!executeSQL (m_sql)) - break; - - g_mkdir_with_parents (m_user_data_dir, 0750); - m_buffer.clear (); - m_buffer << m_user_data_dir << G_DIR_SEPARATOR_S << USER_DICTIONARY_FILE; - - unsigned int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; - if (sqlite3_open_v2 (m_buffer, &userdb, flags, NULL) != SQLITE_OK && - sqlite3_open_v2 (":memory:", &userdb, flags, NULL) != SQLITE_OK) - break; - m_sql = "BEGIN TRANSACTION;\n"; /* create desc table*/ m_sql << "CREATE TABLE IF NOT EXISTS desc (name PRIMARY KEY, value TEXT);\n"; - m_sql << "INSERT OR IGNORE INTO desc VALUES " << "('version', '1.2.0');\n" - << "INSERT OR IGNORE INTO desc VALUES " << "('uuid', '" << UUID () << "');\n" - << "INSERT OR IGNORE INTO desc VALUES " << "('hostname', '" << Hostname () << "');\n" - << "INSERT OR IGNORE INTO desc VALUES " << "('username', '" << Env ("USERNAME") << "');\n" - << "INSERT OR IGNORE INTO desc VALUES " << "('create-time', datetime());\n" - << "INSERT OR IGNORE INTO desc VALUES " << "('attach-time', datetime());\n"; + m_sql << "INSERT OR IGNORE INTO desc VALUES ('version', '1.2.0');\n" + << "INSERT OR IGNORE INTO desc VALUES ('uuid', '" << UUID () << "');\n" + << "INSERT OR IGNORE INTO desc VALUES ('hostname', '" << Hostname () << "');\n" + << "INSERT OR IGNORE INTO desc VALUES ('username', '" << Env ("USERNAME") << "');\n" + << "INSERT OR IGNORE INTO desc VALUES ('create-time', datetime());\n" + << "INSERT OR IGNORE INTO desc VALUES ('attach-time', datetime());\n"; /* create phrase tables */ for (size_t i = 0; i < MAX_PHRASE_LEN; i++) { - m_sql.appendPrintf ("CREATE TABLE IF NOT EXISTS py_phrase_%d (user_freq, phrase TEXT, freq INTEGER ", i); - for (size_t j = 0; j <= i; j++) - m_sql.appendPrintf (",s%d INTEGER, y%d INTEGER", j, j); - m_sql << ");\n"; + m_sql.appendPrintf ("CREATE TABLE IF NOT EXISTS py_phrase_%d (user_freq, phrase TEXT, freq INTEGER ", i); + for (size_t j = 0; j <= i; j++) + m_sql.appendPrintf (",s%d INTEGER, y%d INTEGER", j, j); + m_sql << ");\n"; } /* create index */ @@ -360,118 +388,274 @@ Database::loadUserDB (void) m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "index_1_0 ON py_phrase_1(s0,y0,s1,y1,phrase);\n"; m_sql << "CREATE INDEX IF NOT EXISTS " << "index_1_1 ON py_phrase_1(s0,s1,y1);\n"; for (size_t i = 2; i < MAX_PHRASE_LEN; i++) { - m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "index_" << i << "_0 ON py_phrase_" << i - << "(s0,y0"; - for (size_t j = 1; j <= i; j++) - m_sql << ",s" << j << ",y" << j; - m_sql << ",phrase);\n"; - m_sql << "CREATE INDEX IF NOT EXISTS " << "index_" << i << "_1 ON py_phrase_" << i << "(s0,s1,s2,y2);\n"; + m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "index_" << i << "_0 ON py_phrase_" << i + << "(s0,y0"; + for (size_t j = 1; j <= i; j++) + m_sql << ",s" << j << ",y" << j; + m_sql << ",phrase);\n"; + m_sql << "CREATE INDEX IF NOT EXISTS " << "index_" << i << "_1 ON py_phrase_" << i << "(s0,s1,s2,y2);\n"; } m_sql << "COMMIT;"; - if (!executeSQL (m_sql, userdb)) - break; - - sqlite3_backup *backup = sqlite3_backup_init (m_db, "userdb", userdb, "main"); + return executeSQL (m_sql, userdb); +} +/** + * copy src_dbname to dest_dbname using sqlite3_backup_step(). + * + * dest and src should be opened sqlite3 db handler. + * dest_dbname and src_dbname are db (schema) names. + * + * Returns: true on success, false otherwise. + */ +bool +Database::copyDB (sqlite3 *dest, const char* dest_dbname, + sqlite3 *src, const char* src_dbname) +{ + bool copy_done = false; + sqlite3_backup *backup = sqlite3_backup_init ( + dest, dest_dbname, src, src_dbname); if (backup) { - sqlite3_backup_step (backup, -1); - sqlite3_backup_finish (backup); - } - - sqlite3_close (userdb); - return true; - } while (0); + int r = sqlite3_backup_step (backup, -1); + if (r == SQLITE_DONE) { + copy_done = true; + } else { + g_warning ("sqlite3_backup_step() failed: %d (%s)", + r, sqlite3_errmsg (dest)); + } + r = sqlite3_backup_finish (backup); + if (r != SQLITE_OK) { + g_warning ("sqlite3_backup_finish() failed: %d (%s)", + r, sqlite3_errmsg (dest)); + } + } else { + g_warning ("sqlite3_backup_init() failed: %d (%s)", + sqlite3_errcode (dest), sqlite3_errmsg (dest)); + } + return copy_done; +} - if (userdb) - sqlite3_close (userdb); - return false; +/** + * return TRUE if file exists + */ +static gboolean +file_exists (const char* filename) { + return g_file_test (filename, G_FILE_TEST_EXISTS); } + +/** + * this will load data from user db to an attached :memory: db on m_db. + * the attached db is called "userdb". + * + * if there is no local user db file (usually + * ~/.cache/ibus/pinyin/user-1.0.db), create an empty user db in :memory: and + * use that. + * + * Returns: true if the process finished successfully, false otherwise. + */ bool -Database::saveUserDB (void) +Database::loadUserDB (void) { - g_mkdir_with_parents (m_user_data_dir, 0750); - m_buffer.clear (); - m_buffer << m_user_data_dir << G_DIR_SEPARATOR_S << USER_DICTIONARY_FILE; - - String tmpfile = m_buffer + "-tmp"; sqlite3 *userdb = NULL; + int r = 0; do { - /* remove tmpfile if it exist */ - g_unlink (tmpfile); - - unsigned int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; - if (sqlite3_open_v2 (tmpfile, &userdb, flags, NULL) != SQLITE_OK) - break; - - sqlite3_backup *backup = sqlite3_backup_init (userdb, "main", m_db, "userdb"); - - if (backup == NULL) - break; - - sqlite3_backup_step (backup, -1); - sqlite3_backup_finish (backup); - sqlite3_close (userdb); - - g_rename (tmpfile, m_buffer); - + /* Attach user database */ + m_sql.printf ("ATTACH DATABASE \":memory:\" AS userdb;"); + if (!executeSQL (m_sql)) + break; + + r = g_mkdir_with_parents (m_user_data_dir, 0750); + if (r != 0) { + g_warning ("create dir %s failed: %d (%s)", + m_user_data_dir.c_str (), r, g_strerror (r)); + // not critical, libpyzy should still function without a user + // db file. + } + + g_message ("loading user db at %s", m_user_db_file.c_str ()); + // always open RW because we may need to add additional table or index. + r = sqlite3_open (m_user_db_file, &userdb); + if (r != SQLITE_OK) { + if (file_exists (m_user_db_file.c_str ())) { + g_warning ("open user db failed: %d (%s)", + r, sqlite3_errmsg (userdb)); + } + // use a :memory: db as userdb, only works for current + // session. + r = sqlite3_open (":memory:", &userdb); + if (r != SQLITE_OK) { + g_warning ("open :memory: as user db failed: %d (%s)", + r, sqlite3_errmsg (userdb)); + break; + } + } + g_assert_nonnull (userdb); + + r = initUserDB (userdb); + if (! r) { + break; + } + + r = copyDB (m_db, "userdb", userdb, "main"); + if (! r) { + g_warning ("copy user db to (attached :memory: userdb) failed"); + break; + } + + r = sqlite3_close (userdb); + if (r != SQLITE_OK) { + g_warning ("close userdb failed: %d (%s)", + r, sqlite3_errmsg (userdb)); + // this is a minor problem. + // I still want to return true. so no break here. + } return true; } while (0); - if (userdb != NULL) - sqlite3_close (userdb); - g_unlink (tmpfile); - + r = sqlite3_close (userdb); + if (r != SQLITE_OK) { + g_warning ("close userdb failed: %d (%s)", + r, sqlite3_errmsg (userdb)); + } return false; } +/** + * save :memory: based "userdb" in m_db back to user db file. + * + * Returns: true on success, false otherwise. + */ +bool +Database::saveUserDB (void) +{ + int r = 0; + r = g_mkdir_with_parents (m_user_data_dir, 0750); + if (r) { + g_warning ("create dir %s failed: %d (%s)", + m_user_data_dir.c_str (), r, g_strerror (r)); + return false; + } + String user_db_filename = ""; + user_db_filename << m_user_data_dir << G_DIR_SEPARATOR_S << USER_DICTIONARY_FILE; + String tmpfile = user_db_filename + "-tmp"; + sqlite3 *userdb = NULL; + bool save_ok = false; + do { + /* remove tmpfile if it exist */ + r = g_unlink (tmpfile); + if (r) { + if (file_exists (tmpfile)) { + g_warning ("delete tmp db %s failed: %d (%s)", + tmpfile.c_str (), r, g_strerror (r)); + // do not reuse existing -tmp db, can result + // in duplicate data when copyDB(). + return false; + } + } else { + g_debug ("old tmpfile %s removed", tmpfile.c_str ()); + } + g_message ("saving in RAM userdb to %s", tmpfile.c_str ()); + unsigned int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; + g_assert (! file_exists (tmpfile)); + r = sqlite3_open_v2 (tmpfile, &userdb, flags, NULL); + if (r != SQLITE_OK) { + g_warning ("open tmp db %s failed: %d (%s)", + tmpfile.c_str (), + r, sqlite3_errmsg (userdb)); + break; + } + r = copyDB (userdb, "main", m_db, "userdb"); + if (! r) { + g_warning ("save user db back to file failed"); + } else { + save_ok = true; + } + sqlite3_close (userdb); + + if (save_ok) { + r = g_rename (tmpfile, user_db_filename); + if (r) { + g_warning ("rename tmpfile to %s failed: " + "%d (%s)", user_db_filename.c_str (), + r, g_strerror (r)); + return false; + } + g_message ("tmp file renamed. user db %s updated.", + user_db_filename.c_str ()); + return true; + } + return false; + } while (0); + + sqlite3_close (userdb); + g_unlink (tmpfile); + return false; +} + void Database::prefetch (void) { - m_sql.clear (); - for (size_t i = 0; i < DB_PREFETCH_LEN; i++) - m_sql << "SELECT * FROM py_phrase_" << i << ";\n"; + m_sql.clear (); + for (size_t i = 0; i < DB_PREFETCH_LEN; i++) + m_sql << "SELECT * FROM py_phrase_" << i << ";\n"; - // g_debug ("prefetching ..."); - executeSQL (m_sql); - // g_debug ("done"); + g_debug ("prefetching ..."); + executeSQL (m_sql); + g_debug ("done"); } -// This function should be return gboolean because g_timeout_add_seconds requires it. +/** + * call saveUserDB() if timer has run for DB_BACKUP_TIMEOUT seconds or more. + * + * used as GSourceFunc() for g_timeout_add_seconds(). + */ gboolean -Database::timeoutCallback (void * data) +Database::cb_saveUserDB (gpointer user_data) { - Database *self = static_cast (data); - - /* Get elapsed time since last modification of database. */ - unsigned int elapsed = (unsigned int)g_timer_elapsed (self->m_timer, NULL); - - if (elapsed >= DB_BACKUP_TIMEOUT && - self->saveUserDB ()) { - self->m_timeout_id = 0; - return false; - } - - return true; + Database *self = static_cast (user_data); + double elapsed = g_timer_elapsed (self->m_timer, NULL); // in seconds + if (elapsed + 1 > DB_BACKUP_TIMEOUT) { + bool r = self->saveUserDB (); + if (! r) { + g_warning ("auto save user db failed"); + } + self->m_timeout_id = 0; + return G_SOURCE_REMOVE; + } + return G_SOURCE_CONTINUE; } +/** + * this method is called whenever user db is modified. + * + * we will schedule a user db save in DB_BACKUP_TIMEOUT seconds, if no new + * modification came in between. if there are new modifications, wait for + * DB_BACKUP_TIMEOUT after last modification. + */ void Database::modified (void) { - /* Restart the timer */ - g_timer_start (m_timer); - - if (m_timeout_id != 0) - return; - - m_timeout_id = g_timeout_add_seconds (DB_BACKUP_TIMEOUT, - Database::timeoutCallback, - static_cast (this)); + if (m_timeout_id) { + g_timer_start (m_timer); // reset timer. + } else { + static const guint CHECK_INTERVAL = DB_BACKUP_TIMEOUT; + m_timeout_id = g_timeout_add_seconds ( + CHECK_INTERVAL, + Database::cb_saveUserDB, + static_cast (this)); + } } -inline static bool -pinyin_option_check_sheng (unsigned int option, unsigned int id, unsigned int fid) +/** + * ? + * + * @option: fuzzy pinyin and auto correction flags. + * @id: ? + * @fid: ? + */ +static inline bool +pinyin_option_check_sheng (guint option, guint id, guint fid) { switch ((id << 16) | fid) { case (PINYIN_ID_C << 16) | PINYIN_ID_CH: @@ -506,8 +690,15 @@ pinyin_option_check_sheng (unsigned int option, unsigned int id, unsigned int fi } } -inline static bool -pinyin_option_check_yun (unsigned int option, unsigned int id, unsigned int fid) +/** + * ? + * + * @option: fuzzy pinyin and auto correction flags. + * @id: ? + * @fid: ? + */ +static inline bool +pinyin_option_check_yun (guint option, guint id, guint fid) { switch ((id << 16) | fid) { case (PINYIN_ID_AN << 16) | PINYIN_ID_ANG: @@ -534,12 +725,25 @@ pinyin_option_check_yun (unsigned int option, unsigned int id, unsigned int fid) } } +/** + * create a SQLStmt that is prepared to query candidates for given pinyin. + * + * @pinyin: the pinyin to query for + * @pinyin_begin: ? + * @pinyin_len: ? + * @m: ? + * @option: whether to enable fuzzy pinyin and auto correction when finding + * matches. + * + * Returns: a shared_ptr on success. or a reset shared_ptr + * on failure. + */ SQLStmtPtr Database::query (const PinyinArray &pinyin, size_t pinyin_begin, size_t pinyin_len, int m, - unsigned int option) + guint option) { g_assert (pinyin_begin < pinyin.size ()); g_assert (pinyin_len <= pinyin.size () - pinyin_begin); @@ -628,20 +832,20 @@ Database::query (const PinyinArray &pinyin, } } - - m_buffer.clear (); + String sql_condition; + sql_condition.clear (); for (size_t i = 0; i < conditions.size (); i++) { - if (G_UNLIKELY (i == 0)) - m_buffer << " (" << conditions[i] << ")\n"; - else - m_buffer << " OR (" << conditions[i] << ")\n"; + if (G_UNLIKELY (i == 0)) + sql_condition << " (" << conditions[i] << ")\n"; + else + sql_condition << " OR (" << conditions[i] << ")\n"; } m_sql.clear (); int id = pinyin_len - 1; m_sql << "SELECT * FROM (" - "SELECT 0 AS user_freq, * FROM main.py_phrase_" << id << " WHERE " << m_buffer << " UNION ALL " - "SELECT * FROM userdb.py_phrase_" << id << " WHERE " << m_buffer << ") " + "SELECT 0 AS user_freq, * FROM main.py_phrase_" << id << " WHERE " << sql_condition << " UNION ALL " + "SELECT * FROM userdb.py_phrase_" << id << " WHERE " << sql_condition << ") " "GROUP BY phrase ORDER BY user_freq DESC, freq DESC"; if (m > 0) m_sql << " LIMIT " << m; @@ -653,49 +857,55 @@ Database::query (const PinyinArray &pinyin, SQLStmtPtr stmt (new SQLStmt (m_db)); if (!stmt->prepare (m_sql)) { - stmt.reset (); + stmt.reset (); } return stmt; } -inline void -Database::phraseWhereSql (const Phrase & p, String & sql) +void +Database::phraseWhereSql (const Phrase &p, String &sql) { - sql << " WHERE"; - sql << " s0=" << p.pinyin_id[0].sheng - << " AND y0=" << p.pinyin_id[0].yun; - for (size_t i = 1; i < p.len; i++) { - sql << " AND s" << i << '=' << p.pinyin_id[i].sheng - << " AND y" << i << '=' << p.pinyin_id[i].yun; - } - sql << " AND phrase=\"" << p.phrase << "\""; - + // escape and add single quote on both sides + char* phrase = sqlite3_mprintf("%Q", p.phrase); + sql << " WHERE"; + sql << " s0=" << p.pinyin_id[0].sheng + << " AND y0=" << p.pinyin_id[0].yun; + for (size_t i = 1; i < p.len; i++) { + sql << " AND s" << i << '=' << p.pinyin_id[i].sheng + << " AND y" << i << '=' << p.pinyin_id[i].yun; + } + sql << " AND phrase=" << phrase; + sqlite3_free (phrase); } -inline void -Database::phraseSql (const Phrase & p, String & sql) +void +Database::phraseSql (const Phrase &p, String &sql) { - sql << "INSERT OR IGNORE INTO userdb.py_phrase_" << p.len - 1 - << " VALUES(" << 0 /* user_freq */ - << ",\"" << p.phrase << '"' /* phrase */ - << ',' << p.freq; /* freq */ - - for (size_t i = 0; i < p.len; i++) { - sql << ',' << p.pinyin_id[i].sheng << ',' << p.pinyin_id[i].yun; - } - - sql << ");\n"; - - sql << "UPDATE userdb.py_phrase_" << p.len - 1 - << " SET user_freq=user_freq+1"; - - phraseWhereSql (p, sql); - sql << ";\n"; + // escape and add single quote on both sides + char* phrase = sqlite3_mprintf("%Q", p.phrase); + sql << "INSERT OR IGNORE INTO userdb.py_phrase_" << p.len - 1 + << " VALUES(" << 0 /* user_freq */ + << "," << phrase /* phrase */ + << ',' << p.freq; /* freq */ + sqlite3_free (phrase); + for (size_t i = 0; i < p.len; i++) { + sql << ',' << p.pinyin_id[i].sheng << ',' << p.pinyin_id[i].yun; + } + sql << ");\n"; + + sql << "UPDATE userdb.py_phrase_" << p.len - 1 + << " SET user_freq=user_freq+1"; + + phraseWhereSql (p, sql); + sql << ";\n"; } +/** + * insert phrases to userdb and increment user_freq field for each phrase. + */ void -Database::commit (const PhraseArray &phrases) +Database::commit (const PhraseArray &phrases) { Phrase phrase = {""}; @@ -708,18 +918,25 @@ Database::commit (const PhraseArray &phrases) phraseSql (phrase, m_sql); m_sql << "COMMIT;\n"; - executeSQL (m_sql); - modified (); + bool r = executeSQL (m_sql); + if (r) { + modified (); + } else { + g_warning ("insert phrases to (or update freq for) userdb failed"); + } } +/** + * remove phrase from userdb. + */ void Database::remove (const Phrase & phrase) { m_sql = "BEGIN TRANSACTION;\n"; m_sql << "DELETE FROM userdb.py_phrase_" << phrase.len - 1; phraseWhereSql (phrase, m_sql); - m_sql << ";\n"; - m_sql << "COMMIT;\n"; + m_sql << ";\n" + << "COMMIT;\n"; executeSQL (m_sql); modified (); diff --git a/src/Database.h b/src/Database.h index a7ea0c8b761a780073cc72c987e16b4634459fee..ab7c8caf347716e357a11f2ef7e513af5a433722 100644 --- a/src/Database.h +++ b/src/Database.h @@ -2,6 +2,7 @@ * * libpyzy - The Chinese PinYin and Bopomofo conversion library. * + * Copyright (c) 2019 Yuanle Song * Copyright (c) 2008-2010 Peng Huang * * This library is free software; you can redistribute it and/or @@ -86,24 +87,28 @@ public: } private: + bool setPragmaOnMainDB (void); bool open (void); + bool initUserDB (sqlite3* userdb); + bool copyDB (sqlite3* dest, const char* dest_dbname, + sqlite3* src, const char* src_dbname); bool loadUserDB (void); bool saveUserDB (void); void prefetch (void); void phraseSql (const Phrase & p, String & sql); void phraseWhereSql (const Phrase & p, String & sql); - bool executeSQL (const char *sql, sqlite3 *db = NULL); + bool executeSQL (const char* sql, sqlite3* db = NULL); + static gboolean cb_saveUserDB (gpointer user_data); void modified (void); - static gboolean timeoutCallback (void * data); private: sqlite3 *m_db; /* sqlite3 database */ String m_sql; /* sql stmt */ - String m_buffer; /* temp buffer */ unsigned int m_timeout_id; GTimer *m_timer; String m_user_data_dir; + String m_user_db_file; /* user db file name with full path */ private: static std::unique_ptr m_instance; diff --git a/src/Database_test.cc b/src/Database_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d5b1715be09331c2f37b1833bbb7c580c87ac097 --- /dev/null +++ b/src/Database_test.cc @@ -0,0 +1,41 @@ +#include +#include + +static void +test_string_escape () +{ + const char* phrase1 = "nihao"; + char* quoted = sqlite3_mprintf("%Q", phrase1); + g_assert_cmpstr (quoted, ==, "'nihao'"); + sqlite3_free (quoted); + + const char* phrase2 = "ni'hao"; + quoted = sqlite3_mprintf("%Q", phrase2); + g_assert_cmpstr (quoted, ==, "'ni''hao'"); + sqlite3_free (quoted); + + const char* phrase3 = "'nihao'"; + quoted = sqlite3_mprintf("%Q", phrase3); + g_assert_cmpstr (quoted, ==, "'''nihao'''"); + sqlite3_free (quoted); + + const char* phrase4 = "英国"; + quoted = sqlite3_mprintf("%Q", phrase4); + g_assert_cmpstr (quoted, ==, "'英国'"); + sqlite3_free (quoted); + + const char* phrase5 = "英';drop国"; + quoted = sqlite3_mprintf("%Q", phrase5); + g_assert_cmpstr (quoted, ==, "'英'';drop国'"); + sqlite3_free (quoted); +} + +int +main (int argc, char *argv[]) +{ + g_test_init (&argc, &argv, NULL); + g_test_add_func ("/Database/string-escape", + test_string_escape); + + return g_test_run (); +} diff --git a/src/PhraseEditor.cc b/src/PhraseEditor.cc index 2223d40652a24727f17d9c0009e3667e407ee091..a4746c4c0a47f8f19a5a02535f6caa1622de94c4 100644 --- a/src/PhraseEditor.cc +++ b/src/PhraseEditor.cc @@ -2,6 +2,7 @@ * * libpyzy - The Chinese PinYin and Bopomofo conversion library. * + * Copyright (c) 2019 Yuanle Song * Copyright (c) 2008-2010 Peng Huang * * This library is free software; you can redistribute it and/or @@ -149,6 +150,9 @@ PhraseEditor::updateTheFirstCandidate (void) end - begin, m_config.option); ret = query.fill (m_candidate_0_phrases, 1); + if (ret != 1) { + g_warning ("expect query.fill() result be 1, found %d", ret); + } g_assert (ret == 1); begin += m_candidate_0_phrases.back ().len; } diff --git a/src/meson.build b/src/meson.build index e1dd52f9eafb734c01c5e707c3103114796ed072..2dcacbb22dd88f30c033b758d9c1035dc587c151 100644 --- a/src/meson.build +++ b/src/meson.build @@ -10,15 +10,20 @@ project('pyzy', 'cpp', 'strip=true', 'b_ndebug=if-release']) -pkgdatadir = join_paths(get_option('datadir'), 'pyzy') +pkgdatadir = join_paths(get_option('prefix'), get_option('datadir'), 'pyzy') add_project_arguments( '-Wno-unused-parameter', '-Wno-missing-field-initializers', '-DHAVE_LIBUUID', '-std=c++0x', + '-DG_LOG_DOMAIN="libpyzy"', '-DPKGDATADIR="' + pkgdatadir + '"', language: 'cpp') +if get_option('buildtype').startswith('release') + add_project_arguments('-DG_DISABLE_ASSERT', language: 'cpp') +endif + glib = dependency('glib-2.0') sqlite3 = dependency('sqlite3') uuid = dependency('uuid') @@ -43,6 +48,9 @@ lib_src = [ shared_library('pyzy-1.0', lib_src, soversion: '0', - version: '0.100.0', + version: '0.100.1', dependencies: shared_dep, install: true) + +test('libpyzy-test', + executable('dbtest', 'Database_test.cc', dependencies: shared_dep))