diff options
author | Christian Mollekopf <chrigi_1@fastmail.fm> | 2018-08-06 13:33:53 +0200 |
---|---|---|
committer | Christian Mollekopf <chrigi_1@fastmail.fm> | 2018-08-06 13:33:53 +0200 |
commit | f2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c (patch) | |
tree | eeaa0c8a560bf9c0b9814347353465f5dad36cd5 | |
parent | b4341b9ba3f0242ff95fb229a80b4143c1369c34 (diff) | |
download | sink-f2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c.tar.gz sink-f2fac63e086b4bd8dfc3ed9c9f31241eb9df9b9c.zip |
Fixed threading issues with dynamic db creation.
This patch addresses two problems:
* A potential deadlock.
We had the following code inside a separately protected section:
dbiLocker.unlock();
//Here we could loos the readlock
QWriteLocker dbiWriteLocker(&sDbisLock);
If we lost the lock in between the two lines, the second thread that was
now holding a read-lock on sDbisLock could not enter the protected
section, which was a requirement to release the read-lock, and we'd thus
end up in a deadlock. This is solved using tryLock with intermediate
releases of the read-lock, allowing the original thread to finish.
* When failing to validate a dbi for the current transacation we
simply returned an invalid db (which then in this particular case broke
reading of revision uid's and type's), leading to queries not executing
as they should.
Both problems are unfortunately hard to reproduce, the adjusted test
at leaset allowed me to reproduce the deadlock situation sometimes.
To fix this cleanly we should probably just get rid of dynamic dbi
allocation for good.
-rw-r--r-- | common/storage_lmdb.cpp | 170 | ||||
-rw-r--r-- | tests/storagetest.cpp | 8 |
2 files changed, 109 insertions, 69 deletions
diff --git a/common/storage_lmdb.cpp b/common/storage_lmdb.cpp index f254077..3fee2a9 100644 --- a/common/storage_lmdb.cpp +++ b/common/storage_lmdb.cpp | |||
@@ -219,88 +219,126 @@ public: | |||
219 | bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler) | 219 | bool openDatabase(bool readOnly, std::function<void(const DataStore::Error &error)> errorHandler) |
220 | { | 220 | { |
221 | const auto dbiName = name + db; | 221 | const auto dbiName = name + db; |
222 | //Never access sDbis while anything is writing to it. | ||
222 | QReadLocker dbiLocker{&sDbisLock}; | 223 | QReadLocker dbiLocker{&sDbisLock}; |
223 | if (sDbis.contains(dbiName)) { | 224 | if (sDbis.contains(dbiName)) { |
224 | dbi = sDbis.value(dbiName); | 225 | dbi = sDbis.value(dbiName); |
225 | //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created. | 226 | //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created. |
226 | if (!dbiValidForTransaction(dbi, transaction)) { | 227 | if (dbiValidForTransaction(dbi, transaction)) { |
227 | SinkTrace() << "Found dbi that is not available for the current transaction."; | ||
228 | return false; | ||
229 | } | ||
230 | } else { | ||
231 | /* | ||
232 | * Dynamic creation of databases. | ||
233 | * If all databases were defined via the database layout we wouldn't ever end up in here. | ||
234 | * However, we rely on this codepath for indexes, synchronization databases and in race-conditions | ||
235 | * where the database is not yet fully created when the client initializes it for reading. | ||
236 | * | ||
237 | * There are a few things to consider: | ||
238 | * * dbi's (DataBase Identifier) should be opened once (ideally), and then be persisted in the environment. | ||
239 | * * To open a dbi we need a transaction and must commit the transaction. From then on any open transaction will have access to the dbi. | ||
240 | * * Already running transactions will not have access to the dbi. | ||
241 | * * There *must* only ever be one active transaction opening dbi's (using mdb_dbi_open), and that transaction *must* | ||
242 | * commit or abort before any other transaction opens a dbi. | ||
243 | * | ||
244 | * We solve this the following way: | ||
245 | * * For read-only transactions we abort the transaction, open the dbi and persist it in the environment, and reopen the transaction (so the dbi is available). This may result in the db content changing unexpectedly and referenced memory becoming unavailable, but isn't a problem as long as we don't rely on memory remaining valid for the duration of the transaction (which is anyways not given since any operation would invalidate the memory region).. | ||
246 | * * For write transactions we open the dbi for future use, and then open it as well in the current transaction. | ||
247 | * * Write transactions that open the named database multiple times will call this codepath multiple times, | ||
248 | * this is ok though because the same dbi will be returned by mdb_dbi_open (We could also start to do a lookup in | ||
249 | * Transaction::Private::createdDbs first). | ||
250 | */ | ||
251 | SinkTrace() << "Creating database dynamically: " << dbiName << readOnly; | ||
252 | //Only one transaction may ever create dbis at a time. | ||
253 | QMutexLocker createDbiLocker(&sCreateDbiLock); | ||
254 | //Double checked locking | ||
255 | if (sDbis.contains(dbiName)) { | ||
256 | dbi = sDbis.value(dbiName); | ||
257 | //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created. | ||
258 | if (!dbiValidForTransaction(dbi, transaction)) { | ||
259 | SinkTrace() << "Found dbi that is not available for the current transaction."; | ||
260 | return false; | ||
261 | } | ||
262 | return true; | 228 | return true; |
263 | } | ||
264 | |||
265 | //Create a transaction to open the dbi | ||
266 | MDB_txn *dbiTransaction; | ||
267 | if (readOnly) { | ||
268 | MDB_env *env = mdb_txn_env(transaction); | ||
269 | Q_ASSERT(env); | ||
270 | mdb_txn_reset(transaction); | ||
271 | if (const int rc = mdb_txn_begin(env, nullptr, MDB_RDONLY, &dbiTransaction)) { | ||
272 | SinkError() << "Failed to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction; | ||
273 | return false; | ||
274 | } | ||
275 | } else { | 229 | } else { |
276 | dbiTransaction = transaction; | 230 | SinkTrace() << "Found dbi that is not available for the current transaction."; |
277 | } | ||
278 | if (createDbi(dbiTransaction, db, readOnly, allowDuplicates, dbi)) { | ||
279 | if (readOnly) { | 231 | if (readOnly) { |
280 | mdb_txn_commit(dbiTransaction); | 232 | //Recovery for read-only transactions. Abort and renew. |
281 | dbiLocker.unlock(); | 233 | mdb_txn_reset(transaction); |
282 | QWriteLocker dbiWriteLocker(&sDbisLock); | ||
283 | sDbis.insert(dbiName, dbi); | ||
284 | //We reopen the read-only transaction so the dbi becomes available in it. | ||
285 | mdb_txn_renew(transaction); | 234 | mdb_txn_renew(transaction); |
286 | } else { | 235 | Q_ASSERT(dbiValidForTransaction(dbi, transaction)); |
287 | createdNewDbi = true; | 236 | return true; |
288 | createdNewDbiName = dbiName; | ||
289 | } | 237 | } |
290 | //Ensure the dbi is valid for the parent transaction | 238 | //There is no recover path for non-read-only transactions. |
291 | Q_ASSERT(dbiValidForTransaction(dbi, transaction)); | 239 | } |
240 | //Nothing in the code deals well with non-existing databases. | ||
241 | Q_ASSERT(false); | ||
242 | return false; | ||
243 | } | ||
244 | |||
245 | |||
246 | /* | ||
247 | * Dynamic creation of databases. | ||
248 | * If all databases were defined via the database layout we wouldn't ever end up in here. | ||
249 | * However, we rely on this codepath for indexes, synchronization databases and in race-conditions | ||
250 | * where the database is not yet fully created when the client initializes it for reading. | ||
251 | * | ||
252 | * There are a few things to consider: | ||
253 | * * dbi's (DataBase Identifier) should be opened once (ideally), and then be persisted in the environment. | ||
254 | * * To open a dbi we need a transaction and must commit the transaction. From then on any open transaction will have access to the dbi. | ||
255 | * * Already running transactions will not have access to the dbi. | ||
256 | * * There *must* only ever be one active transaction opening dbi's (using mdb_dbi_open), and that transaction *must* | ||
257 | * commit or abort before any other transaction opens a dbi. | ||
258 | * | ||
259 | * We solve this the following way: | ||
260 | * * For read-only transactions we abort the transaction, open the dbi and persist it in the environment, and reopen the transaction (so the dbi is available). This may result in the db content changing unexpectedly and referenced memory becoming unavailable, but isn't a problem as long as we don't rely on memory remaining valid for the duration of the transaction (which is anyways not given since any operation would invalidate the memory region).. | ||
261 | * * For write transactions we open the dbi for future use, and then open it as well in the current transaction. | ||
262 | * * Write transactions that open the named database multiple times will call this codepath multiple times, | ||
263 | * this is ok though because the same dbi will be returned by mdb_dbi_open (We could also start to do a lookup in | ||
264 | * Transaction::Private::createdDbs first). | ||
265 | */ | ||
266 | SinkTrace() << "Creating database dynamically: " << dbiName << readOnly; | ||
267 | //Only one transaction may ever create dbis at a time. | ||
268 | while (!sCreateDbiLock.tryLock(10)) { | ||
269 | //Allow another thread that has already acquired sCreateDbiLock to continue below. | ||
270 | //Otherwise we risk a dead-lock if another thread already acquired sCreateDbiLock, but then lost the sDbisLock while upgrading it to a | ||
271 | //write lock below | ||
272 | dbiLocker.unlock(); | ||
273 | dbiLocker.relock(); | ||
274 | } | ||
275 | //Double checked locking | ||
276 | if (sDbis.contains(dbiName)) { | ||
277 | dbi = sDbis.value(dbiName); | ||
278 | //sDbis can potentially contain a dbi that is not valid for this transaction, if this transaction was created before the dbi was created. | ||
279 | sCreateDbiLock.unlock(); | ||
280 | if (dbiValidForTransaction(dbi, transaction)) { | ||
281 | return true; | ||
292 | } else { | 282 | } else { |
283 | SinkTrace() << "Found dbi that is not available for the current transaction."; | ||
293 | if (readOnly) { | 284 | if (readOnly) { |
294 | mdb_txn_abort(dbiTransaction); | 285 | //Recovery for read-only transactions. Abort and renew. |
286 | mdb_txn_reset(transaction); | ||
295 | mdb_txn_renew(transaction); | 287 | mdb_txn_renew(transaction); |
296 | } else { | 288 | Q_ASSERT(dbiValidForTransaction(dbi, transaction)); |
297 | SinkWarning() << "Failed to create the dbi: " << dbiName; | 289 | return true; |
298 | } | 290 | } |
299 | dbi = 0; | 291 | //There is no recover path for non-read-only transactions. |
300 | transaction = 0; | 292 | Q_ASSERT(false); |
301 | return false; | 293 | return false; |
302 | } | 294 | } |
303 | } | 295 | } |
296 | |||
297 | //Ensure nobody reads sDbis either | ||
298 | dbiLocker.unlock(); | ||
299 | //We risk loosing the lock in here. That's why we tryLock above in the while loop | ||
300 | QWriteLocker dbiWriteLocker(&sDbisLock); | ||
301 | |||
302 | //Create a transaction to open the dbi | ||
303 | MDB_txn *dbiTransaction; | ||
304 | if (readOnly) { | ||
305 | MDB_env *env = mdb_txn_env(transaction); | ||
306 | Q_ASSERT(env); | ||
307 | mdb_txn_reset(transaction); | ||
308 | if (const int rc = mdb_txn_begin(env, nullptr, MDB_RDONLY, &dbiTransaction)) { | ||
309 | SinkError() << "Failed to open transaction: " << QByteArray(mdb_strerror(rc)) << readOnly << transaction; | ||
310 | sCreateDbiLock.unlock(); | ||
311 | return false; | ||
312 | } | ||
313 | } else { | ||
314 | dbiTransaction = transaction; | ||
315 | } | ||
316 | if (createDbi(dbiTransaction, db, readOnly, allowDuplicates, dbi)) { | ||
317 | if (readOnly) { | ||
318 | mdb_txn_commit(dbiTransaction); | ||
319 | Q_ASSERT(!sDbis.contains(dbiName)); | ||
320 | sDbis.insert(dbiName, dbi); | ||
321 | //We reopen the read-only transaction so the dbi becomes available in it. | ||
322 | mdb_txn_renew(transaction); | ||
323 | } else { | ||
324 | createdNewDbi = true; | ||
325 | createdNewDbiName = dbiName; | ||
326 | } | ||
327 | //Ensure the dbi is valid for the parent transaction | ||
328 | Q_ASSERT(dbiValidForTransaction(dbi, transaction)); | ||
329 | } else { | ||
330 | if (readOnly) { | ||
331 | mdb_txn_abort(dbiTransaction); | ||
332 | mdb_txn_renew(transaction); | ||
333 | } else { | ||
334 | SinkWarning() << "Failed to create the dbi: " << dbiName; | ||
335 | } | ||
336 | dbi = 0; | ||
337 | transaction = 0; | ||
338 | sCreateDbiLock.unlock(); | ||
339 | return false; | ||
340 | } | ||
341 | sCreateDbiLock.unlock(); | ||
304 | return true; | 342 | return true; |
305 | } | 343 | } |
306 | }; | 344 | }; |
diff --git a/tests/storagetest.cpp b/tests/storagetest.cpp index 4c44322..81acc13 100644 --- a/tests/storagetest.cpp +++ b/tests/storagetest.cpp | |||
@@ -191,14 +191,16 @@ private slots: | |||
191 | 191 | ||
192 | // We repeat the test a bunch of times since failing is relatively random | 192 | // We repeat the test a bunch of times since failing is relatively random |
193 | for (int tries = 0; tries < 10; tries++) { | 193 | for (int tries = 0; tries < 10; tries++) { |
194 | //clearEnv in combination with the bogus db layouts tests the dynamic named db opening as well. | ||
195 | Sink::Storage::DataStore::clearEnv(); | ||
194 | bool error = false; | 196 | bool error = false; |
195 | // Try to concurrently read | 197 | // Try to concurrently read |
196 | QList<QFuture<void>> futures; | 198 | QList<QFuture<void>> futures; |
197 | const int concurrencyLevel = 20; | 199 | const int concurrencyLevel = 20; |
198 | for (int num = 0; num < concurrencyLevel; num++) { | 200 | for (int num = 0; num < concurrencyLevel; num++) { |
199 | futures << QtConcurrent::run([this, &error]() { | 201 | futures << QtConcurrent::run([this, &error]() { |
200 | Sink::Storage::DataStore storage(testDataPath, dbName, Sink::Storage::DataStore::ReadOnly); | 202 | Sink::Storage::DataStore storage(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadOnly); |
201 | Sink::Storage::DataStore storage2(testDataPath, dbName + "2", Sink::Storage::DataStore::ReadOnly); | 203 | Sink::Storage::DataStore storage2(testDataPath, {dbName+ "2", {{"bogus", 0}}}, Sink::Storage::DataStore::ReadOnly); |
202 | for (int i = 0; i < count; i++) { | 204 | for (int i = 0; i < count; i++) { |
203 | if (!verify(storage, i)) { | 205 | if (!verify(storage, i)) { |
204 | error = true; | 206 | error = true; |
@@ -757,7 +759,7 @@ private slots: | |||
757 | 759 | ||
758 | Sink::Storage::DataStore::clearEnv(); | 760 | Sink::Storage::DataStore::clearEnv(); |
759 | //Try to read-write dynamic opening of the db. | 761 | //Try to read-write dynamic opening of the db. |
760 | //This is the case if we don't have all databases available upon initializatoin and we don't (e.g. because the db hasn't been created yet) | 762 | //This is the case if we don't have all databases available upon initialization and we don't (e.g. because the db hasn't been created yet) |
761 | { | 763 | { |
762 | // Trick the db into not loading all dbs by passing in a bogus layout. | 764 | // Trick the db into not loading all dbs by passing in a bogus layout. |
763 | Sink::Storage::DataStore store(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadWrite); | 765 | Sink::Storage::DataStore store(testDataPath, {dbName, {{"bogus", 0}}}, Sink::Storage::DataStore::ReadWrite); |