From 0aa5b2187dcd7d8c91900dd5cb017481941bfc0f Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Tue, 21 Nov 2017 15:28:33 +0100 Subject: Storage debugging code --- common/storage.h | 19 +++++++ common/storage_lmdb.cpp | 81 ++++++++++++++++++++++++++++-- tests/dummyresourcewritebenchmark.cpp | 93 +++++++++++++++++++++++++++++++++-- 3 files changed, 187 insertions(+), 6 deletions(-) diff --git a/common/storage.h b/common/storage.h index c39b904..1967a5e 100644 --- a/common/storage.h +++ b/common/storage.h @@ -123,6 +123,16 @@ public: qint64 getSize(); + struct Stat { + size_t branchPages; + size_t leafPages; + size_t overflowPages; + size_t numEntries; + }; + Stat stat(); + + bool allowsDuplicates() const; + private: friend Transaction; NamedDatabase(NamedDatabase &other); @@ -150,6 +160,15 @@ public: operator bool() const; + struct Stat { + size_t totalPages; + size_t freePages; + size_t pageSize; + NamedDatabase::Stat mainDbStat; + NamedDatabase::Stat freeDbStat; + }; + Stat stat(); + private: Transaction(Transaction &other); Transaction &operator=(Transaction &other); diff --git a/common/storage_lmdb.cpp b/common/storage_lmdb.cpp index 2745a14..392ac0a 100644 --- a/common/storage_lmdb.cpp +++ b/common/storage_lmdb.cpp @@ -496,13 +496,39 @@ qint64 DataStore::NamedDatabase::getSize() if (rc) { SinkWarning() << "Something went wrong " << QByteArray(mdb_strerror(rc)); } - // std::cout << "overflow_pages: " << stat.ms_overflow_pages << std::endl; + return stat.ms_psize * (stat.ms_leaf_pages + stat.ms_branch_pages + stat.ms_overflow_pages); +} + +DataStore::NamedDatabase::Stat DataStore::NamedDatabase::stat() +{ + if (!d || !d->transaction) { + return {}; + } + + int rc; + MDB_stat stat; + rc = mdb_stat(d->transaction, d->dbi, &stat); + if (rc) { + SinkWarning() << "Something went wrong " << QByteArray(mdb_strerror(rc)); + return {}; + } + return {stat.ms_branch_pages, + stat.ms_leaf_pages, + stat.ms_overflow_pages, + stat.ms_entries}; // std::cout << "page size: " << stat.ms_psize << std::endl; - // std::cout << "branch_pages: " << stat.ms_branch_pages << std::endl; // std::cout << "leaf_pages: " << stat.ms_leaf_pages << std::endl; + // std::cout << "branch_pages: " << stat.ms_branch_pages << std::endl; + // std::cout << "overflow_pages: " << stat.ms_overflow_pages << std::endl; // std::cout << "depth: " << stat.ms_depth << std::endl; // std::cout << "entries: " << stat.ms_entries << std::endl; - return stat.ms_psize * (stat.ms_leaf_pages + stat.ms_branch_pages + stat.ms_overflow_pages); +} + +bool DataStore::NamedDatabase::allowsDuplicates() const +{ + unsigned int flags; + mdb_dbi_flags(d->transaction, d->dbi, &flags); + return flags & MDB_DUPSORT; } @@ -708,6 +734,55 @@ QList DataStore::Transaction::getDatabaseNames() const } +DataStore::Transaction::Stat DataStore::Transaction::stat() +{ + const int freeDbi = 0; + const int mainDbi = 1; + + MDB_envinfo mei; + mdb_env_info(d->env, &mei); + + MDB_stat mst; + mdb_stat(d->transaction, freeDbi, &mst); + auto freeStat = NamedDatabase::Stat{mst.ms_branch_pages, + mst.ms_leaf_pages, + mst.ms_overflow_pages, + mst.ms_entries}; + + mdb_stat(d->transaction, mainDbi, &mst); + auto mainStat = NamedDatabase::Stat{mst.ms_branch_pages, + mst.ms_leaf_pages, + mst.ms_overflow_pages, + mst.ms_entries}; + + MDB_cursor *cursor; + MDB_val key, data; + size_t freePages = 0, *iptr; + + int rc = mdb_cursor_open(d->transaction, freeDbi, &cursor); + if (rc) { + fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + return {}; + } + + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + iptr = static_cast(data.mv_data); + freePages += *iptr; + size_t pg, prev; + ssize_t i, j, span = 0; + j = *iptr++; + for (i = j, prev = 1; --i >= 0; ) { + pg = iptr[i]; + prev = pg; + pg += span; + for (; i >= span && iptr[i-span] == pg; span++, pg++) ; + } + } + mdb_cursor_close(cursor); + return {mei.me_last_pgno + 1, freePages, mst.ms_psize, mainStat, freeStat}; +} + + class DataStore::Private { public: diff --git a/tests/dummyresourcewritebenchmark.cpp b/tests/dummyresourcewritebenchmark.cpp index 87bb57a..07f57f6 100644 --- a/tests/dummyresourcewritebenchmark.cpp +++ b/tests/dummyresourcewritebenchmark.cpp @@ -27,16 +27,21 @@ #include "getrssusage.h" #include "utils.h" -static QByteArray createEntityBuffer(int &bufferSize) +static QByteArray createEntityBuffer(size_t attachmentSize, int &bufferSize) { + uint8_t rawData[attachmentSize]; flatbuffers::FlatBufferBuilder eventFbb; eventFbb.Clear(); { + uint8_t *rawDataPtr = Q_NULLPTR; + auto data = eventFbb.CreateUninitializedVector(attachmentSize, &rawDataPtr); auto summary = eventFbb.CreateString("summary"); Sink::ApplicationDomain::Buffer::EventBuilder eventBuilder(eventFbb); eventBuilder.add_summary(summary); + eventBuilder.add_attachment(data); auto eventLocation = eventBuilder.Finish(); Sink::ApplicationDomain::Buffer::FinishEventBuffer(eventFbb, eventLocation); + memcpy((void *)rawDataPtr, rawData, attachmentSize); } flatbuffers::FlatBufferBuilder localFbb; @@ -84,7 +89,7 @@ class DummyResourceWriteBenchmark : public QObject DummyResource resource(Sink::ResourceContext{"sink.dummy.instance1", "sink.dummy", Sink::AdaptorFactoryRegistry::instance().getFactories("sink.dummy")}); int bufferSize = 0; - auto command = createEntityBuffer(bufferSize); + auto command = createEntityBuffer(0, bufferSize); const auto startingRss = getCurrentRSS(); for (int i = 0; i < num; i++) { @@ -114,7 +119,7 @@ class DummyResourceWriteBenchmark : public QObject std::cout << "Rss without db [kb]: " << rssWithoutDb / 1024 << std::endl; std::cout << "Percentage peak rss error: " << percentageRssError << std::endl; - auto onDisk = DummyResource::diskUsage("sink.dummy.instance1"); + auto onDisk = Sink::Storage::DataStore(Sink::storageLocation(), "sink.dummy.instance1", Sink::Storage::DataStore::ReadOnly).diskUsage(); auto writeAmplification = static_cast(onDisk) / static_cast(bufferSizeTotal); std::cout << "On disk [kb]: " << onDisk / 1024 << std::endl; std::cout << "Buffer size total [kb]: " << bufferSizeTotal / 1024 << std::endl; @@ -165,6 +170,84 @@ class DummyResourceWriteBenchmark : public QObject // std::system("exec pmap -x \"$PPID\""); } + void testDiskUsage(int num) + { + auto resourceId = "testDiskUsage"; + DummyResource::removeFromDisk(resourceId); + + { + DummyResource resource(Sink::ResourceContext{resourceId, "sink.dummy", Sink::AdaptorFactoryRegistry::instance().getFactories("sink.dummy")}); + + int bufferSize = 0; + auto command = createEntityBuffer(1000, bufferSize); + + for (int i = 0; i < num; i++) { + resource.processCommand(Sink::Commands::CreateEntityCommand, command); + } + + // Wait until all messages have been processed + resource.processAllMessages().exec().waitForFinished(); + } + + qint64 totalDbSizes = 0; + qint64 totalKeysAndValues = 0; + QMap dbSizes; + Sink::Storage::DataStore storage(Sink::storageLocation(), resourceId, Sink::Storage::DataStore::ReadOnly); + auto transaction = storage.createTransaction(Sink::Storage::DataStore::ReadOnly); + auto stat = transaction.stat(); + + std::cout << "Free pages: " << stat.freePages << std::endl; + std::cout << "Total pages: " << stat.totalPages << std::endl; + auto totalUsedSize = stat.pageSize * (stat.totalPages - stat.freePages); + std::cout << "Used size: " << totalUsedSize << std::endl; + + auto freeDbSize = stat.pageSize * (stat.freeDbStat.leafPages + stat.freeDbStat.overflowPages + stat.freeDbStat.branchPages); + std::cout << "Free db size: " << freeDbSize << std::endl; + auto mainDbSize = stat.pageSize * (stat.mainDbStat.leafPages + stat.mainDbStat.overflowPages + stat.mainDbStat.branchPages); + std::cout << "Main db size: " << mainDbSize << std::endl; + + totalDbSizes += mainDbSize; + QList databases = transaction.getDatabaseNames(); + for (const auto &databaseName : databases) { + auto db = transaction.openDatabase(databaseName); + const auto size = db.getSize(); + dbSizes.insert(databaseName, size); + totalDbSizes += size; + + qint64 keySizes = 0; + qint64 valueSizes = 0; + db.scan({}, [&] (const QByteArray &key, const QByteArray &data) { + keySizes += key.size(); + valueSizes += data.size(); + return true; + }, + [&](const Sink::Storage::DataStore::Error &e) { + qWarning() << "Error while reading" << e; + }, + false, false); + + auto s = db.stat(); + auto usedPages = (s.leafPages + s.branchPages + s.overflowPages); + + std::cout << std::endl; + std::cout << "Db: " << databaseName.toStdString() << (db.allowsDuplicates() ? " DUP" : "") << std::endl; + std::cout << "Used pages " << usedPages << std::endl; + std::cout << "Used size " << (keySizes + valueSizes) / 4096.0 << std::endl; + std::cout << "Entries " << s.numEntries << std::endl; + totalKeysAndValues += (keySizes + valueSizes); + } + std::cout << std::endl; + + auto mainStoreOnDisk = Sink::Storage::DataStore(Sink::storageLocation(), resourceId, Sink::Storage::DataStore::ReadOnly).diskUsage(); + auto totalOnDisk = DummyResource::diskUsage(resourceId); + std::cout << "Calculated key + value size: " << totalKeysAndValues << std::endl; + std::cout << "Calculated total db sizes: " << totalDbSizes << std::endl; + std::cout << "Main store on disk: " << mainStoreOnDisk << std::endl; + std::cout << "Total on disk: " << totalOnDisk << std::endl; + std::cout << "Used size amplification: " << static_cast(totalUsedSize) / static_cast(totalKeysAndValues) << std::endl; + std::cout << "Write amplification: " << static_cast(mainStoreOnDisk) / static_cast(totalKeysAndValues) << std::endl; + std::cout << std::endl; + } private slots: void initTestCase() @@ -201,6 +284,10 @@ private slots: HAWD::Formatter::print(dataset); } + void testDiskUsage() + { + testDiskUsage(1000); + } // This allows to run individual parts without doing a cleanup, but still cleaning up normally void testCleanupForCompleteTest() -- cgit v1.2.3