diff options
author | Christian Mollekopf <chrigi_1@fastmail.fm> | 2016-10-13 18:38:35 +0200 |
---|---|---|
committer | Christian Mollekopf <chrigi_1@fastmail.fm> | 2018-02-11 23:03:17 +0100 |
commit | 6051c1247cde61bcc8e483eb4166e5a297c0ecc6 (patch) | |
tree | df3aba1ef4011f2640b17c8cf7a9b106933231ab /common/fulltextindex.cpp | |
parent | 8740a007515dcf1b315d69ab5c64fcfd40ec980c (diff) | |
download | sink-6051c1247cde61bcc8e483eb4166e5a297c0ecc6.tar.gz sink-6051c1247cde61bcc8e483eb4166e5a297c0ecc6.zip |
Xapian based fulltext indexing
This cuts into the sync performance by about 40%,
but gives us fast fulltext searching for all local content.
Diffstat (limited to 'common/fulltextindex.cpp')
-rw-r--r-- | common/fulltextindex.cpp | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/common/fulltextindex.cpp b/common/fulltextindex.cpp new file mode 100644 index 0000000..33972b7 --- /dev/null +++ b/common/fulltextindex.cpp | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2018 Christian Mollekopf <mollekopf@kolabsys.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the | ||
16 | * Free Software Foundation, Inc., | ||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | */ | ||
19 | //xapian.h needs to be included first to build | ||
20 | #include <xapian.h> | ||
21 | #include "fulltextindex.h" | ||
22 | |||
23 | #include <QFile> | ||
24 | #include <QDir> | ||
25 | |||
26 | #include "log.h" | ||
27 | #include "definitions.h" | ||
28 | |||
29 | FulltextIndex::FulltextIndex(const QByteArray &resourceInstanceIdentifier, Sink::Storage::DataStore::AccessMode accessMode) | ||
30 | : mName("fulltext"), | ||
31 | mDbPath{QFile::encodeName(Sink::resourceStorageLocation(resourceInstanceIdentifier) + '/' + "fulltext")} | ||
32 | { | ||
33 | try { | ||
34 | if (QDir{}.mkpath(mDbPath)) { | ||
35 | if (accessMode == Sink::Storage::DataStore::ReadWrite) { | ||
36 | mDb = new Xapian::WritableDatabase(mDbPath.toStdString(), Xapian::DB_CREATE_OR_OPEN); | ||
37 | } else { | ||
38 | mDb = new Xapian::Database(mDbPath.toStdString(), Xapian::DB_OPEN); | ||
39 | } | ||
40 | } else { | ||
41 | SinkError() << "Failed to open database" << mDbPath; | ||
42 | } | ||
43 | } catch (const Xapian::DatabaseError& e) { | ||
44 | SinkError() << "Failed to open database" << mDbPath << ":" << QString::fromStdString(e.get_msg()); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | FulltextIndex::~FulltextIndex() | ||
49 | { | ||
50 | delete mDb; | ||
51 | } | ||
52 | |||
53 | static std::string idTerm(const QByteArray &key) | ||
54 | { | ||
55 | return "Q" + key.toStdString(); | ||
56 | } | ||
57 | |||
58 | void FulltextIndex::add(const QByteArray &key, const QString &value) | ||
59 | { | ||
60 | add(key, {{{}, value}}); | ||
61 | } | ||
62 | |||
63 | void FulltextIndex::add(const QByteArray &key, const QList<QPair<QString, QString>> &values) | ||
64 | { | ||
65 | if (!mDb) { | ||
66 | return; | ||
67 | } | ||
68 | Xapian::TermGenerator generator; | ||
69 | Xapian::Document document; | ||
70 | generator.set_document(document); | ||
71 | |||
72 | for (const auto &entry : values) { | ||
73 | if (!entry.second.isEmpty()) { | ||
74 | generator.index_text(entry.second.toStdString()); | ||
75 | } | ||
76 | } | ||
77 | document.add_value(0, key.toStdString()); | ||
78 | |||
79 | const auto idterm = idTerm(key); | ||
80 | document.add_boolean_term(idterm); | ||
81 | |||
82 | writableDatabase()->replace_document(idterm, document); | ||
83 | } | ||
84 | |||
85 | void FulltextIndex::commitTransaction() | ||
86 | { | ||
87 | if (mHasTransactionOpen) { | ||
88 | Q_ASSERT(mDb); | ||
89 | writableDatabase()->commit_transaction(); | ||
90 | mHasTransactionOpen = false; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | void FulltextIndex::abortTransaction() | ||
95 | { | ||
96 | if (mHasTransactionOpen) { | ||
97 | Q_ASSERT(mDb); | ||
98 | writableDatabase()->cancel_transaction(); | ||
99 | mHasTransactionOpen = false; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | Xapian::WritableDatabase* FulltextIndex::writableDatabase() | ||
104 | { | ||
105 | Q_ASSERT(dynamic_cast<Xapian::WritableDatabase*>(mDb)); | ||
106 | auto db = static_cast<Xapian::WritableDatabase*>(mDb); | ||
107 | if (!mHasTransactionOpen) { | ||
108 | db->begin_transaction(); | ||
109 | mHasTransactionOpen = true; | ||
110 | } | ||
111 | return db; | ||
112 | } | ||
113 | |||
114 | void FulltextIndex::remove(const QByteArray &key) | ||
115 | { | ||
116 | if (!mDb) { | ||
117 | return; | ||
118 | } | ||
119 | writableDatabase()->delete_document(idTerm(key)); | ||
120 | } | ||
121 | |||
122 | QVector<QByteArray> FulltextIndex::lookup(const QString &searchTerm) | ||
123 | { | ||
124 | if (!mDb) { | ||
125 | return {}; | ||
126 | } | ||
127 | QVector<QByteArray> results; | ||
128 | |||
129 | try { | ||
130 | Xapian::QueryParser parser; | ||
131 | auto query = parser.parse_query(searchTerm.toStdString(), Xapian::QueryParser::FLAG_WILDCARD|Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE); | ||
132 | Xapian::Enquire enquire(*mDb); | ||
133 | enquire.set_query(query); | ||
134 | |||
135 | auto limit = 1000; | ||
136 | Xapian::MSet mset = enquire.get_mset(0, limit); | ||
137 | Xapian::MSetIterator it = mset.begin(); | ||
138 | for (;it != mset.end(); it++) { | ||
139 | auto doc = it.get_document(); | ||
140 | const auto data = doc.get_value(0); | ||
141 | results << QByteArray{data.c_str(), int(data.length())}; | ||
142 | } | ||
143 | } | ||
144 | catch (const Xapian::Error &error) { | ||
145 | // Nothing to do, move along | ||
146 | } | ||
147 | return results; | ||
148 | } | ||
149 | |||