summaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
Diffstat (limited to 'common')
-rw-r--r--common/storage_lmdb.cpp340
1 files changed, 212 insertions, 128 deletions
diff --git a/common/storage_lmdb.cpp b/common/storage_lmdb.cpp
index 660326a..5fb1d0f 100644
--- a/common/storage_lmdb.cpp
+++ b/common/storage_lmdb.cpp
@@ -26,6 +26,8 @@
26#include <QDebug> 26#include <QDebug>
27#include <QDir> 27#include <QDir>
28#include <QReadWriteLock> 28#include <QReadWriteLock>
29#include <QMutex>
30#include <QMutexLocker>
29#include <QString> 31#include <QString>
30#include <QTime> 32#include <QTime>
31#include <valgrind.h> 33#include <valgrind.h>
@@ -99,6 +101,96 @@ static QList<QByteArray> getDatabaseNames(MDB_txn *transaction)
99 101
100} 102}
101 103
104/*
105 * To create a dbi we always need a write transaction,
106 * and we always need to commit the transaction ASAP
107 * We can only ever enter from one point per process.
108 */
109
110QMutex sCreateDbiLock;
111
112static bool createDbi(MDB_txn *transaction, const QByteArray &db, bool readOnly, bool allowDuplicates, MDB_dbi &dbi)
113{
114
115 unsigned int flags = 0;
116 if (allowDuplicates) {
117 flags |= MDB_DUPSORT;
118 }
119
120 MDB_dbi flagtableDbi;
121 if (const int rc = mdb_dbi_open(transaction, "__flagtable", readOnly ? 0 : MDB_CREATE, &flagtableDbi)) {
122 if (!readOnly) {
123 SinkWarning() << "Failed to to open flagdb: " << QByteArray(mdb_strerror(rc));
124 }
125 } else {
126 MDB_val key, value;
127 key.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
128 key.mv_size = db.size();
129 if (const auto rc = mdb_get(transaction, flagtableDbi, &key, &value)) {
130 //We expect this to fail for new databases
131 if (rc != MDB_NOTFOUND) {
132 SinkWarning() << "Failed to read flags from flag db: " << QByteArray(mdb_strerror(rc));
133 }
134 } else {
135 //Found the flags
136 const auto ba = QByteArray::fromRawData((char *)value.mv_data, value.mv_size);
137 flags = ba.toInt();
138 }
139 }
140
141 if (const int rc = mdb_dbi_open(transaction, db.constData(), flags, &dbi)) {
142 //Create the db if it is not existing already
143 if (rc == MDB_NOTFOUND && !readOnly) {
144 //Sanity check db name
145 {
146 auto parts = db.split('.');
147 for (const auto &p : parts) {
148 auto containsSpecialCharacter = [] (const QByteArray &p) {
149 for (int i = 0; i < p.size(); i++) {
150 const auto c = p.at(i);
151 //Between 0 and z in the ascii table. Essentially ensures that the name is printable and doesn't contain special chars
152 if (c < 0x30 || c > 0x7A) {
153 return true;
154 }
155 }
156 return false;
157 };
158 if (p.isEmpty() || containsSpecialCharacter(p)) {
159 SinkError() << "Tried to create a db with an invalid name. Hex:" << db.toHex() << " ASCII:" << db;
160 Q_ASSERT(false);
161 throw std::runtime_error("Fatal error while creating db.");
162 }
163 }
164 }
165 if (const int rc = mdb_dbi_open(transaction, db.constData(), flags | MDB_CREATE, &dbi)) {
166 SinkWarning() << "Failed to create db " << QByteArray(mdb_strerror(rc));
167 return false;
168 }
169 //Record the db flags
170 MDB_val key, value;
171 key.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
172 key.mv_size = db.size();
173 //Store the flags without the create option
174 const auto ba = QByteArray::number(flags);
175 value.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
176 value.mv_size = db.size();
177 if (const int rc = mdb_put(transaction, flagtableDbi, &key, &value, MDB_NOOVERWRITE)) {
178 //We expect this to fail if we're only creating the dbi but not the db
179 if (rc != MDB_KEYEXIST) {
180 SinkWarning() << "Failed to write flags to flag db: " << QByteArray(mdb_strerror(rc));
181 }
182 }
183 } else {
184 //It's not an error if we only want to read
185 if (!readOnly) {
186 SinkWarning() << "Failed to open db " << QByteArray(mdb_strerror(rc));
187 return true;
188 }
189 return false;
190 }
191 }
192 return true;
193}
102 194
103class DataStore::NamedDatabase::Private 195class DataStore::NamedDatabase::Private
104{ 196{
@@ -119,112 +211,91 @@ public:
119 std::function<void(const DataStore::Error &error)> defaultErrorHandler; 211 std::function<void(const DataStore::Error &error)> defaultErrorHandler;
120 QString name; 212 QString name;
121 bool createdNewDbi = false; 213 bool createdNewDbi = false;
122 QString createdDbName; 214 QString createdNewDbiName;
123 215
124 bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler) 216 bool dbiValidForTransaction(MDB_dbi dbi, MDB_txn *transaction)
125 { 217 {
126 unsigned int flags = 0; 218 //sDbis can contain dbi's that are not available to this transaction.
127 if (allowDuplicates) { 219 //We use mdb_dbi_flags to check if the dbi is valid for this transaction.
128 flags |= MDB_DUPSORT; 220 uint f;
221 if (mdb_dbi_flags(transaction, dbi, &f) == EINVAL) {
222 return false;
129 } 223 }
224 return true;
225 }
130 226
227 bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler)
228 {
131 const auto dbiName = name + db; 229 const auto dbiName = name + db;
230 QReadLocker dbiLocker{&sDbisLock};
132 if (sDbis.contains(dbiName)) { 231 if (sDbis.contains(dbiName)) {
133 dbi = sDbis.value(dbiName); 232 dbi = sDbis.value(dbiName);
134 //sDbis can contain dbi's that are not available to this transaction. 233 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
135 //We use mdb_dbi_flags to check if the dbi is valid for this transaction.
136 uint f;
137 if (mdb_dbi_flags(transaction, dbi, &f) == EINVAL) {
138 //In readonly mode we can just ignore this. In read-write we would have tried to concurrently create a db.
139 if (!readOnly) {
140 SinkWarning() << "Tried to create database in second transaction: " << dbiName;
141 }
142 dbi = 0;
143 transaction = 0;
144 return false;
145 }
146 } else { 234 } else {
147 MDB_dbi flagtableDbi; 235 /*
148 if (const int rc = mdb_dbi_open(transaction, "__flagtable", readOnly ? 0 : MDB_CREATE, &flagtableDbi)) { 236 * Dynamic creation of databases.
149 if (!readOnly) { 237 * If all databases were defined via the database layout we wouldn't ever end up in here.
150 SinkWarning() << "Failed to to open flagdb: " << QByteArray(mdb_strerror(rc)); 238 * However, we rely on this codepath for indexes, synchronization databases and in race-conditions
239 * where the database is not yet fully created when the client initializes it for reading.
240 *
241 * There are a few things to consider:
242 * * dbi's (DataBase Identifier) should be opened once (ideally), and then be persisted in the environment.
243 * * To open a dbi we need a transaction and must commit the transaction. From then on any open transaction will have access to the dbi.
244 * * Already running transactions will not have access to the dbi.
245 * * There *must* only ever be one active transaction opening dbi's (using mdb_dbi_open), and that transaction *must*
246 * commit or abort before any other transaction opens a dbi.
247 *
248 * We solve this the following way:
249 * * For read-only transactions we abort the transaction, open the dbi and persist it in the environment, and reopen the transaction (so the dbi is available). This may result in the db content changing unexpectedly and referenced memory becoming unavailable, but isn't a problem as long as we don't rely on memory remaining valid for the duration of the transaction (which is anyways not given since any operation would invalidate the memory region)..
250 * * For write transactions we open the dbi for future use, and then open it as well in the current transaction.
251 */
252 SinkTrace() << "Creating database dynamically: " << dbiName << readOnly;
253 //Only one transaction may ever create dbis at a time.
254 QMutexLocker createDbiLocker(&sCreateDbiLock);
255 //Double checked locking
256 if (sDbis.contains(dbiName)) {
257 dbi = sDbis.value(dbiName);
258 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
259 return true;
260 }
261
262 //Create a transaction to open the dbi
263 MDB_txn *dbiTransaction;
264 if (readOnly) {
265 MDB_env *env = mdb_txn_env(transaction);
266 Q_ASSERT(env);
267 mdb_txn_reset(transaction);
268 if (const int rc = mdb_txn_begin(env, nullptr, MDB_RDONLY, &dbiTransaction)) {
269 SinkError() << "Failed to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction;
270 return false;
151 } 271 }
152 } else { 272 } else {
153 MDB_val key, value; 273 dbiTransaction = transaction;
154 key.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
155 key.mv_size = db.size();
156 if (const auto rc = mdb_get(transaction, flagtableDbi, &key, &value)) {
157 //We expect this to fail for new databases
158 if (rc != MDB_NOTFOUND) {
159 SinkWarning() << "Failed to read flags from flag db: " << QByteArray(mdb_strerror(rc));
160 }
161 } else {
162 //Found the flags
163 const auto ba = QByteArray::fromRawData((char *)value.mv_data, value.mv_size);
164 flags = ba.toInt();
165 }
166 } 274 }
167 275 if (createDbi(dbiTransaction, db, readOnly, allowDuplicates, dbi)) {
168 Q_ASSERT(transaction); 276 if (readOnly) {
169 if (const int rc = mdb_dbi_open(transaction, db.constData(), flags, &dbi)) { 277 mdb_txn_commit(dbiTransaction);
170 //Create the db if it is not existing already 278 dbiLocker.unlock();
171 if (rc == MDB_NOTFOUND && !readOnly) { 279 QWriteLocker dbiWriteLocker(&sDbisLock);
172 //Sanity check db name 280 sDbis.insert(dbiName, dbi);
173 { 281 //We reopen the read-only transaction so the dbi becomes available in it.
174 auto parts = db.split('.'); 282 mdb_txn_renew(transaction);
175 for (const auto &p : parts) {
176 auto containsSpecialCharacter = [] (const QByteArray &p) {
177 for (int i = 0; i < p.size(); i++) {
178 const auto c = p.at(i);
179 //Between 0 and z in the ascii table. Essentially ensures that the name is printable and doesn't contain special chars
180 if (c < 0x30 || c > 0x7A) {
181 return true;
182 }
183 }
184 return false;
185 };
186 if (p.isEmpty() || containsSpecialCharacter(p)) {
187 SinkError() << "Tried to create a db with an invalid name. Hex:" << db.toHex() << " ASCII:" << db;
188 Q_ASSERT(false);
189 throw std::runtime_error("Fatal error while creating db.");
190 }
191 }
192 }
193 if (const int rc = mdb_dbi_open(transaction, db.constData(), flags | MDB_CREATE, &dbi)) {
194 SinkWarning() << "Failed to create db " << QByteArray(mdb_strerror(rc));
195 Error error(name.toLatin1(), ErrorCodes::GenericError, "Error while creating database: " + QByteArray(mdb_strerror(rc)));
196 errorHandler ? errorHandler(error) : defaultErrorHandler(error);
197 return false;
198 }
199 //Record the db flags
200 MDB_val key, value;
201 key.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
202 key.mv_size = db.size();
203 //Store the flags without the create option
204 const auto ba = QByteArray::number(flags);
205 value.mv_data = const_cast<void*>(static_cast<const void*>(db.constData()));
206 value.mv_size = db.size();
207 if (const int rc = mdb_put(transaction, flagtableDbi, &key, &value, MDB_NOOVERWRITE)) {
208 //We expect this to fail if we're only creating the dbi but not the db
209 if (rc != MDB_KEYEXIST) {
210 SinkWarning() << "Failed to write flags to flag db: " << QByteArray(mdb_strerror(rc));
211 }
212 }
213 } else { 283 } else {
214 dbi = 0; 284 createdNewDbi = true;
215 transaction = 0; 285 createdNewDbiName = dbiName;
216 //It's not an error if we only want to read 286 }
217 if (!readOnly) { 287 //Ensure the dbi is valid for the parent transaction
218 SinkWarning() << "Failed to open db " << QByteArray(mdb_strerror(rc)); 288 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
219 Error error(name.toLatin1(), ErrorCodes::GenericError, "Error while opening database: " + QByteArray(mdb_strerror(rc))); 289 } else {
220 errorHandler ? errorHandler(error) : defaultErrorHandler(error); 290 if (readOnly) {
221 } 291 mdb_txn_abort(dbiTransaction);
222 return false; 292 mdb_txn_renew(transaction);
223 } 293 }
294 SinkWarning() << "Failed to create the dbi: " << dbiName;
295 dbi = 0;
296 transaction = 0;
297 return false;
224 } 298 }
225
226 createdNewDbi = true;
227 createdDbName = dbiName;
228 } 299 }
229 return true; 300 return true;
230 } 301 }
@@ -538,8 +609,8 @@ bool DataStore::NamedDatabase::allowsDuplicates() const
538class DataStore::Transaction::Private 609class DataStore::Transaction::Private
539{ 610{
540public: 611public:
541 Private(bool _requestRead, const std::function<void(const DataStore::Error &error)> &_defaultErrorHandler, const QString &_name, MDB_env *_env, bool _noLock = false) 612 Private(bool _requestRead, const std::function<void(const DataStore::Error &error)> &_defaultErrorHandler, const QString &_name, MDB_env *_env)
542 : env(_env), transaction(nullptr), requestedRead(_requestRead), defaultErrorHandler(_defaultErrorHandler), name(_name), implicitCommit(false), error(false), modificationCounter(0), noLock(_noLock) 613 : env(_env), transaction(nullptr), requestedRead(_requestRead), defaultErrorHandler(_defaultErrorHandler), name(_name), implicitCommit(false), error(false)
543 { 614 {
544 } 615 }
545 ~Private() 616 ~Private()
@@ -553,9 +624,6 @@ public:
553 QString name; 624 QString name;
554 bool implicitCommit; 625 bool implicitCommit;
555 bool error; 626 bool error;
556 int modificationCounter;
557 bool noLock;
558
559 QMap<QString, MDB_dbi> createdDbs; 627 QMap<QString, MDB_dbi> createdDbs;
560 628
561 void startTransaction() 629 void startTransaction()
@@ -641,23 +709,22 @@ bool DataStore::Transaction::commit(const std::function<void(const DataStore::Er
641 //If transactions start failing we're in an unrecoverable situation (i.e. out of diskspace). So throw an exception that will terminate the application. 709 //If transactions start failing we're in an unrecoverable situation (i.e. out of diskspace). So throw an exception that will terminate the application.
642 throw std::runtime_error("Fatal error while committing transaction."); 710 throw std::runtime_error("Fatal error while committing transaction.");
643 } 711 }
644 d->transaction = nullptr;
645 712
646 //Add the created dbis to the shared environment 713 //Add the created dbis to the shared environment
647 if (!d->createdDbs.isEmpty()) { 714 if (!d->createdDbs.isEmpty()) {
648 if (!d->noLock) { 715 sDbisLock.lockForWrite();
649 sDbisLock.lockForWrite();
650 }
651 for (auto it = d->createdDbs.constBegin(); it != d->createdDbs.constEnd(); it++) { 716 for (auto it = d->createdDbs.constBegin(); it != d->createdDbs.constEnd(); it++) {
717 //This means we opened the dbi again in a read-only transaction while the write transaction was ongoing.
652 Q_ASSERT(!sDbis.contains(it.key())); 718 Q_ASSERT(!sDbis.contains(it.key()));
653 sDbis.insert(it.key(), it.value()); 719 if (!sDbis.contains(it.key())) {
720 sDbis.insert(it.key(), it.value());
721 }
654 } 722 }
655 d->createdDbs.clear(); 723 d->createdDbs.clear();
656 if (!d->noLock) { 724 sDbisLock.unlock();
657 sDbisLock.unlock();
658 }
659 } 725 }
660 726
727 d->transaction = nullptr;
661 return !rc; 728 return !rc;
662} 729}
663 730
@@ -667,10 +734,10 @@ void DataStore::Transaction::abort()
667 return; 734 return;
668 } 735 }
669 736
670 d->createdDbs.clear();
671 // Trace_area("storage." + d->name.toLatin1()) << "Aborting transaction" << mdb_txn_id(d->transaction) << d->transaction; 737 // Trace_area("storage." + d->name.toLatin1()) << "Aborting transaction" << mdb_txn_id(d->transaction) << d->transaction;
672 Q_ASSERT(sEnvironments.values().contains(d->env)); 738 Q_ASSERT(sEnvironments.values().contains(d->env));
673 mdb_txn_abort(d->transaction); 739 mdb_txn_abort(d->transaction);
740 d->createdDbs.clear();
674 d->transaction = nullptr; 741 d->transaction = nullptr;
675} 742}
676 743
@@ -707,22 +774,16 @@ DataStore::NamedDatabase DataStore::Transaction::openDatabase(const QByteArray &
707 // We don't now if anything changed 774 // We don't now if anything changed
708 d->implicitCommit = true; 775 d->implicitCommit = true;
709 auto p = new DataStore::NamedDatabase::Private(db, allowDuplicates, d->defaultErrorHandler, d->name, d->transaction); 776 auto p = new DataStore::NamedDatabase::Private(db, allowDuplicates, d->defaultErrorHandler, d->name, d->transaction);
710 if (!d->noLock) { 777 auto ret = p->openDatabase(d->requestedRead, errorHandler);
711 sDbisLock.lockForRead(); 778 if (!ret) {
712 }
713 if (!p->openDatabase(d->requestedRead, errorHandler)) {
714 if (!d->noLock) {
715 sDbisLock.unlock();
716 }
717 delete p; 779 delete p;
718 return DataStore::NamedDatabase(); 780 return DataStore::NamedDatabase();
719 } 781 }
720 if (!d->noLock) { 782
721 sDbisLock.unlock();
722 }
723 if (p->createdNewDbi) { 783 if (p->createdNewDbi) {
724 d->createdDbs.insert(p->createdDbName, p->dbi); 784 d->createdDbs.insert(p->createdNewDbiName, p->dbi);
725 } 785 }
786
726 auto database = DataStore::NamedDatabase(p); 787 auto database = DataStore::NamedDatabase(p);
727 if (!ensureCorrectDb(database, db, d->requestedRead)) { 788 if (!ensureCorrectDb(database, db, d->requestedRead)) {
728 SinkWarning() << "Failed to open the database correctly" << db; 789 SinkWarning() << "Failed to open the database correctly" << db;
@@ -863,28 +924,41 @@ public:
863 Q_ASSERT(env); 924 Q_ASSERT(env);
864 sEnvironments.insert(fullPath, env); 925 sEnvironments.insert(fullPath, env);
865 //Open all available dbi's 926 //Open all available dbi's
866 bool noLock = true; 927 MDB_txn *transaction;
867 auto t = Transaction(new Transaction::Private(readOnly, nullptr, name, env, noLock)); 928 if (const int rc = mdb_txn_begin(env, nullptr, readOnly ? MDB_RDONLY : 0, &transaction)) {
929 SinkWarning() << "Failed to to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction;
930 return;
931 }
868 if (!layout.tables.isEmpty()) { 932 if (!layout.tables.isEmpty()) {
869 933
870 //TODO upgrade db if the layout has changed: 934 //TODO upgrade db if the layout has changed:
871 //* read existing layout 935 //* read existing layout
872 //* if layout is not the same create new layout 936 //* if layout is not the same create new layout
873 //If the db is read only, abort if the db is not yet existing.
874 //If the db is not read-only but is not existing, ensure we have a layout and create all tables.
875 937
938 //Create dbis from the given layout.
876 for (auto it = layout.tables.constBegin(); it != layout.tables.constEnd(); it++) { 939 for (auto it = layout.tables.constBegin(); it != layout.tables.constEnd(); it++) {
877 bool allowDuplicates = it.value(); 940 const bool allowDuplicates = it.value();
878 t.openDatabase(it.key(), {}, allowDuplicates); 941 MDB_dbi dbi = 0;
942 const auto db = it.key();
943 const auto dbiName = name + db;
944 if (createDbi(transaction, db, readOnly, allowDuplicates, dbi)) {
945 sDbis.insert(dbiName, dbi);
946 }
879 } 947 }
880 } else { 948 } else {
881 for (const auto &db : t.getDatabaseNames()) { 949 //Open all available databases
882 //Get dbi to store for future use. 950 for (const auto &db : getDatabaseNames(transaction)) {
883 t.openDatabase(db); 951 MDB_dbi dbi = 0;
952 const auto dbiName = name + db;
953 //We're going to load the flags anyways.
954 bool allowDuplicates = false;
955 if (createDbi(transaction, db, readOnly, allowDuplicates, dbi)) {
956 sDbis.insert(dbiName, dbi);
957 }
884 } 958 }
885 } 959 }
886 //To persist the dbis (this is also necessary for read-only transactions) 960 //To persist the dbis (this is also necessary for read-only transactions)
887 t.commit(); 961 mdb_txn_commit(transaction);
888 } 962 }
889 } 963 }
890 } 964 }
@@ -990,8 +1064,18 @@ void DataStore::removeFromDisk() const
990 1064
991void DataStore::clearEnv() 1065void DataStore::clearEnv()
992{ 1066{
1067 SinkTrace() << "Clearing environment";
993 QWriteLocker locker(&sEnvironmentsLock); 1068 QWriteLocker locker(&sEnvironmentsLock);
994 for (auto env : sEnvironments) { 1069 QWriteLocker dbiLocker(&sDbisLock);
1070 for (const auto &envName : sEnvironments.keys()) {
1071 auto env = sEnvironments.value(envName);
1072 mdb_env_sync(env, true);
1073 for (const auto &k : sDbis.keys()) {
1074 if (k.startsWith(envName)) {
1075 auto dbi = sDbis.value(k);
1076 mdb_dbi_close(env, dbi);
1077 }
1078 }
995 mdb_env_close(env); 1079 mdb_env_close(env);
996 } 1080 }
997 sDbis.clear(); 1081 sDbis.clear();