summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Mollekopf <chrigi_1@fastmail.fm>2018-08-06 13:33:53 +0200
committerChristian Mollekopf <chrigi_1@fastmail.fm>2018-08-06 13:33:53 +0200
commitf2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c (patch)
treeeeaa0c8a560bf9c0b9814347353465f5dad36cd5
parentb4341b9ba3f0242ff95fb229a80b4143c1369c34 (diff)
downloadsink-f2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c.tar.gz
sink-f2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c.zip
Fixed threading issues with dynamic db creation.
This patch addresses two problems: * A potential deadlock. We had the following code inside a separately protected section: dbiLocker.unlock(); //Here we could loos the readlock QWriteLocker dbiWriteLocker(&sDbisLock); If we lost the lock in between the two lines, the second thread that was now holding a read-lock on sDbisLock could not enter the protected section, which was a requirement to release the read-lock, and we'd thus end up in a deadlock. This is solved using tryLock with intermediate releases of the read-lock, allowing the original thread to finish. * When failing to validate a dbi for the current transacation we simply returned an invalid db (which then in this particular case broke reading of revision uid's and type's), leading to queries not executing as they should. Both problems are unfortunately hard to reproduce, the adjusted test at leaset allowed me to reproduce the deadlock situation sometimes. To fix this cleanly we should probably just get rid of dynamic dbi allocation for good.
-rw-r--r--common/storage_lmdb.cpp170
-rw-r--r--tests/storagetest.cpp8
2 files changed, 109 insertions, 69 deletions
diff --git a/common/storage_lmdb.cpp b/common/storage_lmdb.cpp
index f254077..3fee2a9 100644
--- a/common/storage_lmdb.cpp
+++ b/common/storage_lmdb.cpp
@@ -219,88 +219,126 @@ public:
219 bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler) 219 bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler)
220 { 220 {
221 const auto dbiName = name + db; 221 const auto dbiName = name + db;
222 //Never access sDbis while anything is writing to it.
222 QReadLocker dbiLocker{&sDbisLock}; 223 QReadLocker dbiLocker{&sDbisLock};
223 if (sDbis.contains(dbiName)) { 224 if (sDbis.contains(dbiName)) {
224 dbi = sDbis.value(dbiName); 225 dbi = sDbis.value(dbiName);
225 //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created. 226 //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created.
226 if (!dbiValidForTransaction(dbi, transaction)) { 227 if (dbiValidForTransaction(dbi, transaction)) {
227 SinkTrace() << "Found dbi that is not available for the current transaction.";
228 return false;
229 }
230 } else {
231 /*
232 * Dynamic creation of databases.
233 * If all databases were defined via the database layout we wouldn't ever end up in here.
234 * However, we rely on this codepath for indexes, synchronization databases and in race-conditions
235 * where the database is not yet fully created when the client initializes it for reading.
236 *
237 * There are a few things to consider:
238 * * dbi's (DataBase Identifier) should be opened once (ideally), and then be persisted in the environment.
239 * * To open a dbi we need a transaction and must commit the transaction. From then on any open transaction will have access to the dbi.
240 * * Already running transactions will not have access to the dbi.
241 * * There *must* only ever be one active transaction opening dbi's (using mdb_dbi_open), and that transaction *must*
242 * commit or abort before any other transaction opens a dbi.
243 *
244 * We solve this the following way:
245 * * For read-only transactions we abort the transaction, open the dbi and persist it in the environment, and reopen the transaction (so the dbi is available). This may result in the db content changing unexpectedly and referenced memory becoming unavailable, but isn't a problem as long as we don't rely on memory remaining valid for the duration of the transaction (which is anyways not given since any operation would invalidate the memory region)..
246 * * For write transactions we open the dbi for future use, and then open it as well in the current transaction.
247 * * Write transactions that open the named database multiple times will call this codepath multiple times,
248 * this is ok though because the same dbi will be returned by mdb_dbi_open (We could also start to do a lookup in
249 * Transaction::Private::createdDbs first).
250 */
251 SinkTrace() << "Creating database dynamically: " << dbiName << readOnly;
252 //Only one transaction may ever create dbis at a time.
253 QMutexLocker createDbiLocker(&sCreateDbiLock);
254 //Double checked locking
255 if (sDbis.contains(dbiName)) {
256 dbi = sDbis.value(dbiName);
257 //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created.
258 if (!dbiValidForTransaction(dbi, transaction)) {
259 SinkTrace() << "Found dbi that is not available for the current transaction.";
260 return false;
261 }
262 return true; 228 return true;
263 }
264
265 //Create a transaction to open the dbi
266 MDB_txn *dbiTransaction;
267 if (readOnly) {
268 MDB_env *env = mdb_txn_env(transaction);
269 Q_ASSERT(env);
270 mdb_txn_reset(transaction);
271 if (const int rc = mdb_txn_begin(env, nullptr, MDB_RDONLY, &dbiTransaction)) {
272 SinkError() << "Failed to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction;
273 return false;
274 }
275 } else { 229 } else {
276 dbiTransaction = transaction; 230 SinkTrace() << "Found dbi that is not available for the current transaction.";
277 }
278 if (createDbi(dbiTransaction, db, readOnly, allowDuplicates, dbi)) {
279 if (readOnly) { 231 if (readOnly) {
280 mdb_txn_commit(dbiTransaction); 232 //Recovery for read-only transactions. Abort and renew.
281 dbiLocker.unlock(); 233 mdb_txn_reset(transaction);
282 QWriteLocker dbiWriteLocker(&sDbisLock);
283 sDbis.insert(dbiName, dbi);
284 //We reopen the read-only transaction so the dbi becomes available in it.
285 mdb_txn_renew(transaction); 234 mdb_txn_renew(transaction);
286 } else { 235 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
287 createdNewDbi = true; 236 return true;
288 createdNewDbiName = dbiName;
289 } 237 }
290 //Ensure the dbi is valid for the parent transaction 238 //There is no recover path for non-read-only transactions.
291 Q_ASSERT(dbiValidForTransaction(dbi, transaction)); 239 }
240 //Nothing in the code deals well with non-existing databases.
241 Q_ASSERT(false);
242 return false;
243 }
244
245
246 /*
247 * Dynamic creation of databases.
248 * If all databases were defined via the database layout we wouldn't ever end up in here.
249 * However, we rely on this codepath for indexes, synchronization databases and in race-conditions
250 * where the database is not yet fully created when the client initializes it for reading.
251 *
252 * There are a few things to consider:
253 * * dbi's (DataBase Identifier) should be opened once (ideally), and then be persisted in the environment.
254 * * To open a dbi we need a transaction and must commit the transaction. From then on any open transaction will have access to the dbi.
255 * * Already running transactions will not have access to the dbi.
256 * * There *must* only ever be one active transaction opening dbi's (using mdb_dbi_open), and that transaction *must*
257 * commit or abort before any other transaction opens a dbi.
258 *
259 * We solve this the following way:
260 * * For read-only transactions we abort the transaction, open the dbi and persist it in the environment, and reopen the transaction (so the dbi is available). This may result in the db content changing unexpectedly and referenced memory becoming unavailable, but isn't a problem as long as we don't rely on memory remaining valid for the duration of the transaction (which is anyways not given since any operation would invalidate the memory region)..
261 * * For write transactions we open the dbi for future use, and then open it as well in the current transaction.
262 * * Write transactions that open the named database multiple times will call this codepath multiple times,
263 * this is ok though because the same dbi will be returned by mdb_dbi_open (We could also start to do a lookup in
264 * Transaction::Private::createdDbs first).
265 */
266 SinkTrace() << "Creating database dynamically: " << dbiName << readOnly;
267 //Only one transaction may ever create dbis at a time.
268 while (!sCreateDbiLock.tryLock(10)) {
269 //Allow another thread that has already acquired sCreateDbiLock to continue below.
270 //Otherwise we risk a dead-lock if another thread already acquired sCreateDbiLock, but then lost the sDbisLock while upgrading it to a
271 //write lock below
272 dbiLocker.unlock();
273 dbiLocker.relock();
274 }
275 //Double checked locking
276 if (sDbis.contains(dbiName)) {
277 dbi = sDbis.value(dbiName);
278 //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created.
279 sCreateDbiLock.unlock();
280 if (dbiValidForTransaction(dbi, transaction)) {
281 return true;
292 } else { 282 } else {
283 SinkTrace() << "Found dbi that is not available for the current transaction.";
293 if (readOnly) { 284 if (readOnly) {
294 mdb_txn_abort(dbiTransaction); 285 //Recovery for read-only transactions. Abort and renew.
286 mdb_txn_reset(transaction);
295 mdb_txn_renew(transaction); 287 mdb_txn_renew(transaction);
296 } else { 288 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
297 SinkWarning() << "Failed to create the dbi: " << dbiName; 289 return true;
298 } 290 }
299 dbi = 0; 291 //There is no recover path for non-read-only transactions.
300 transaction = 0; 292 Q_ASSERT(false);
301 return false; 293 return false;
302 } 294 }
303 } 295 }
296
297 //Ensure nobody reads sDbis either
298 dbiLocker.unlock();
299 //We risk loosing the lock in here. That's why we tryLock above in the while loop
300 QWriteLocker dbiWriteLocker(&sDbisLock);
301
302 //Create a transaction to open the dbi
303 MDB_txn *dbiTransaction;
304 if (readOnly) {
305 MDB_env *env = mdb_txn_env(transaction);
306 Q_ASSERT(env);
307 mdb_txn_reset(transaction);
308 if (const int rc = mdb_txn_begin(env, nullptr, MDB_RDONLY, &dbiTransaction)) {
309 SinkError() << "Failed to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction;
310 sCreateDbiLock.unlock();
311 return false;
312 }
313 } else {
314 dbiTransaction = transaction;
315 }
316 if (createDbi(dbiTransaction, db, readOnly, allowDuplicates, dbi)) {
317 if (readOnly) {
318 mdb_txn_commit(dbiTransaction);
319 Q_ASSERT(!sDbis.contains(dbiName));
320 sDbis.insert(dbiName, dbi);
321 //We reopen the read-only transaction so the dbi becomes available in it.
322 mdb_txn_renew(transaction);
323 } else {
324 createdNewDbi = true;
325 createdNewDbiName = dbiName;
326 }
327 //Ensure the dbi is valid for the parent transaction
328 Q_ASSERT(dbiValidForTransaction(dbi, transaction));
329 } else {
330 if (readOnly) {
331 mdb_txn_abort(dbiTransaction);
332 mdb_txn_renew(transaction);
333 } else {
334 SinkWarning() << "Failed to create the dbi: " << dbiName;
335 }
336 dbi = 0;
337 transaction = 0;
338 sCreateDbiLock.unlock();
339 return false;
340 }
341 sCreateDbiLock.unlock();
304 return true; 342 return true;
305 } 343 }
306}; 344};
diff --git a/tests/storagetest.cpp b/tests/storagetest.cpp
index 4c44322..81acc13 100644
--- a/tests/storagetest.cpp
+++ b/tests/storagetest.cpp
@@ -191,14 +191,16 @@ private slots:
191 191
192 // We repeat the test a bunch of times since failing is relatively random 192 // We repeat the test a bunch of times since failing is relatively random
193 for (int tries = 0; tries < 10; tries++) { 193 for (int tries = 0; tries < 10; tries++) {
194 //clearEnv in combination with the bogus db layouts tests the dynamic named db opening as well.
195 Sink::Storage::DataStore::clearEnv();
194 bool error = false; 196 bool error = false;
195 // Try to concurrently read 197 // Try to concurrently read
196 QList<QFuture<void>> futures; 198 QList<QFuture<void>> futures;
197 const int concurrencyLevel = 20; 199 const int concurrencyLevel = 20;
198 for (int num = 0; num < concurrencyLevel; num++) { 200 for (int num = 0; num < concurrencyLevel; num++) {
199 futures << QtConcurrent::run([this, &error]() { 201 futures << QtConcurrent::run([this, &error]() {
200 Sink::Storage::DataStore storage(testDataPath, dbName, Sink::Storage::DataStore::ReadOnly); 202 Sink::Storage::DataStore storage(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadOnly);
201 Sink::Storage::DataStore storage2(testDataPath, dbName + "2", Sink::Storage::DataStore::ReadOnly); 203 Sink::Storage::DataStore storage2(testDataPath, {dbName+ "2", {{"bogus", 0}}}, Sink::Storage::DataStore::ReadOnly);
202 for (int i = 0; i < count; i++) { 204 for (int i = 0; i < count; i++) {
203 if (!verify(storage, i)) { 205 if (!verify(storage, i)) {
204 error = true; 206 error = true;
@@ -757,7 +759,7 @@ private slots:
757 759
758 Sink::Storage::DataStore::clearEnv(); 760 Sink::Storage::DataStore::clearEnv();
759 //Try to read-write dynamic opening of the db. 761 //Try to read-write dynamic opening of the db.
760 //This is the case if we don't have all databases available upon initializatoin and we don't (e.g. because the db hasn't been created yet) 762 //This is the case if we don't have all databases available upon initialization and we don't (e.g. because the db hasn't been created yet)
761 { 763 {
762 // Trick the db into not loading all dbs by passing in a bogus layout. 764 // Trick the db into not loading all dbs by passing in a bogus layout.
763 Sink::Storage::DataStore store(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadWrite); 765 Sink::Storage::DataStore store(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadWrite);