summaryrefslogtreecommitdiffstats
path: root/tests/dummyresourcewritebenchmark.cpp
blob: e0ec50361e17cd4acffac8c4afa6ee02a710fbb5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#include <QtTest>

#include <QString>
#include <QDateTime>

#include <iostream>

#include "dummyresource/resourcefactory.h"
#include "dummyresource/domainadaptor.h"
#include "store.h"
#include "commands.h"
#include "entitybuffer.h"
#include "log.h"
#include "resourceconfig.h"
#include "definitions.h"
#include "facadefactory.h"
#include "adaptorfactoryregistry.h"

#include "hawd/dataset.h"
#include "hawd/formatter.h"

#include "event_generated.h"
#include "mail_generated.h"
#include "entity_generated.h"
#include "metadata_generated.h"
#include "createentity_generated.h"

#include "getrssusage.h"
#include "utils.h"

#include <KMime/Message>

static QByteArray createEntityBuffer(size_t attachmentSize, int &bufferSize)
{
    flatbuffers::FlatBufferBuilder eventFbb;
    eventFbb.Clear();
    {

        auto msg = KMime::Message::Ptr::create();
        msg->subject()->from7BitString("Some subject");
        msg->setBody("This is the body now.");
        msg->assemble();

        const auto data = msg->encodedContent();

        auto summary = eventFbb.CreateString("summary");
        auto mimeMessage = eventFbb.CreateString(data.constData(), data.length());
        Sink::ApplicationDomain::Buffer::MailBuilder eventBuilder(eventFbb);
        eventBuilder.add_subject(summary);
        eventBuilder.add_messageId(summary);
        eventBuilder.add_mimeMessage(mimeMessage);
        Sink::ApplicationDomain::Buffer::FinishMailBuffer(eventFbb, eventBuilder.Finish());
    }

    flatbuffers::FlatBufferBuilder entityFbb;
    Sink::EntityBuffer::assembleEntityBuffer(entityFbb, 0, 0, 0, 0, eventFbb.GetBufferPointer(), eventFbb.GetSize());
    bufferSize = entityFbb.GetSize();

    flatbuffers::FlatBufferBuilder fbb;
    auto type = fbb.CreateString(Sink::ApplicationDomain::getTypeName<Sink::ApplicationDomain::Mail>().toStdString().data());
    auto delta = fbb.CreateVector<uint8_t>(entityFbb.GetBufferPointer(), entityFbb.GetSize());
    Sink::Commands::CreateEntityBuilder builder(fbb);
    builder.add_domainType(type);
    builder.add_delta(delta);
    auto location = builder.Finish();
    Sink::Commands::FinishCreateEntityBuffer(fbb, location);

    return QByteArray(reinterpret_cast<const char *>(fbb.GetBufferPointer()), fbb.GetSize());
}

/**
 * Benchmark writing in the synchronizer process.
 */
class DummyResourceWriteBenchmark : public QObject
{
    Q_OBJECT

    QList<double> mRssGrowthPerEntity;
    QList<double> mTimePerEntity;
    QDateTime mTimeStamp{QDateTime::currentDateTimeUtc()};

    void writeInProcess(int num, const QDateTime &timestamp)
    {
        DummyResource::removeFromDisk("sink.dummy.instance1");

        QTime time;
        time.start();
        DummyResource resource(Sink::ResourceContext{"sink.dummy.instance1", "sink.dummy", Sink::AdaptorFactoryRegistry::instance().getFactories("sink.dummy")});

        int bufferSize = 0;
        auto command = createEntityBuffer(0, bufferSize);

        const auto startingRss = getCurrentRSS();
        for (int i = 0; i < num; i++) {
            resource.processCommand(Sink::Commands::CreateEntityCommand, command);
        }
        auto appendTime = time.elapsed();
        Q_UNUSED(appendTime);
        auto bufferSizeTotal = bufferSize * num;

        // Wait until all messages have been processed
        resource.processAllMessages().exec().waitForFinished();

        auto allProcessedTime = time.elapsed();

        const auto finalRss = getCurrentRSS();
        const auto rssGrowth = finalRss - startingRss;
        // Since the database is memory mapped it is attributted to the resident set size.
        const auto rssWithoutDb = finalRss - DummyResource::diskUsage("sink.dummy.instance1");
        const auto peakRss = getPeakRSS();
        // How much peak deviates from final rss in percent
        const auto percentageRssError = static_cast<double>(peakRss - finalRss) * 100.0 / static_cast<double>(finalRss);
        auto rssGrowthPerEntity = rssGrowth / num;
        std::cout << "Current Rss usage [kb]: " << finalRss / 1024 << std::endl;
        std::cout << "Peak Rss usage [kb]: " << peakRss / 1024 << std::endl;
        std::cout << "Rss growth [kb]: " << rssGrowth / 1024 << std::endl;
        std::cout << "Rss growth per entity [byte]: " << rssGrowthPerEntity << std::endl;
        std::cout << "Rss without db [kb]: " << rssWithoutDb / 1024 << std::endl;
        std::cout << "Percentage peak rss error: " << percentageRssError << std::endl;

        auto onDisk = Sink::Storage::DataStore(Sink::storageLocation(), "sink.dummy.instance1", Sink::Storage::DataStore::ReadOnly).diskUsage();
        auto writeAmplification = static_cast<double>(onDisk) / static_cast<double>(bufferSizeTotal);
        std::cout << "On disk [kb]: " << onDisk / 1024 << std::endl;
        std::cout << "Buffer size total [kb]: " << bufferSizeTotal / 1024 << std::endl;
        std::cout << "Write amplification: " << writeAmplification << std::endl;


        mTimePerEntity << static_cast<double>(allProcessedTime) / static_cast<double>(num);
        mRssGrowthPerEntity << rssGrowthPerEntity;

        {
            HAWD::Dataset dataset("dummy_write_perf", m_hawdState);
            HAWD::Dataset::Row row = dataset.row();
            row.setValue("rows", num);
            row.setValue("append", (qreal)num/appendTime);
            row.setValue("total", (qreal)num/allProcessedTime);
            row.setTimestamp(timestamp);
            dataset.insertRow(row);
            HAWD::Formatter::print(dataset);
        }

        {
            HAWD::Dataset dataset("dummy_write_memory", m_hawdState);
            HAWD::Dataset::Row row = dataset.row();
            row.setValue("rows", num);
            row.setValue("rss", QVariant::fromValue(finalRss / 1024));
            row.setValue("peakRss", QVariant::fromValue(peakRss / 1024));
            row.setValue("percentagePeakRssError", percentageRssError);
            row.setValue("rssGrowthPerEntity", QVariant::fromValue(rssGrowthPerEntity));
            row.setValue("rssWithoutDb", rssWithoutDb / 1024);
            row.setTimestamp(timestamp);
            dataset.insertRow(row);
            HAWD::Formatter::print(dataset);
        }

        {
            HAWD::Dataset dataset("dummy_write_disk", m_hawdState);
            HAWD::Dataset::Row row = dataset.row();
            row.setValue("rows", num);
            row.setValue("onDisk", onDisk / 1024);
            row.setValue("bufferSize", bufferSizeTotal / 1024);
            row.setValue("writeAmplification", writeAmplification);
            row.setTimestamp(timestamp);
            dataset.insertRow(row);
            HAWD::Formatter::print(dataset);
        }

        // Print memory layout, RSS is what is in memory
        // std::system("exec pmap -x \"$PPID\"");
    }

    void testDiskUsage(int num)
    {
        auto resourceId = "testDiskUsage";
        DummyResource::removeFromDisk(resourceId);

        {
            DummyResource resource(Sink::ResourceContext{resourceId, "sink.dummy", Sink::AdaptorFactoryRegistry::instance().getFactories("sink.dummy")});

            int bufferSize = 0;
            auto command = createEntityBuffer(1000, bufferSize);

            for (int i = 0; i < num; i++) {
                resource.processCommand(Sink::Commands::CreateEntityCommand, command);
            }

            // Wait until all messages have been processed
            resource.processAllMessages().exec().waitForFinished();
        }

        qint64 totalDbSizes = 0;
        qint64 totalKeysAndValues = 0;
        QMap<QByteArray, qint64> dbSizes;
        Sink::Storage::DataStore storage(Sink::storageLocation(), resourceId, Sink::Storage::DataStore::ReadOnly);
        auto transaction = storage.createTransaction(Sink::Storage::DataStore::ReadOnly);
        auto stat = transaction.stat();

        std::cout << "Free pages: " << stat.freePages << std::endl;
        std::cout << "Total pages: " << stat.totalPages << std::endl;
        auto totalUsedSize = stat.pageSize * (stat.totalPages - stat.freePages);
        std::cout << "Used size: " << totalUsedSize << std::endl;

        auto freeDbSize = stat.pageSize * (stat.freeDbStat.leafPages + stat.freeDbStat.overflowPages + stat.freeDbStat.branchPages);
        std::cout << "Free db size: " << freeDbSize << std::endl;
        auto mainDbSize = stat.pageSize * (stat.mainDbStat.leafPages + stat.mainDbStat.overflowPages + stat.mainDbStat.branchPages);
        std::cout << "Main db size: " << mainDbSize << std::endl;

        totalDbSizes += mainDbSize;
        QList<QByteArray> databases = transaction.getDatabaseNames();
        for (const auto &databaseName : databases) {
            auto db = transaction.openDatabase(databaseName);
            const auto size = db.getSize();
            dbSizes.insert(databaseName, size);
            totalDbSizes += size;

            qint64 keySizes = 0;
            qint64 valueSizes = 0;
            db.scan({}, [&] (const QByteArray &key, const QByteArray &data) {
                    keySizes += key.size();
                    valueSizes += data.size();
                    return true;
                },
                [&](const Sink::Storage::DataStore::Error &e) {
                    qWarning() << "Error while reading" << e;
                },
                false, false);

            auto s = db.stat();
            auto usedPages = (s.leafPages + s.branchPages + s.overflowPages);

            std::cout << std::endl;
            std::cout << "Db: " << databaseName.toStdString() << (db.allowsDuplicates() ? " DUP" : "") << std::endl;
            std::cout << "Used pages " << usedPages << std::endl;
            std::cout << "Used size " << (keySizes + valueSizes) / 4096.0 << std::endl;
            std::cout << "Entries " << s.numEntries << std::endl;
            totalKeysAndValues += (keySizes + valueSizes);
        }
        std::cout << std::endl;

        auto mainStoreOnDisk = Sink::Storage::DataStore(Sink::storageLocation(), resourceId, Sink::Storage::DataStore::ReadOnly).diskUsage();
        auto totalOnDisk = DummyResource::diskUsage(resourceId);
        std::cout << "Calculated key + value size: " << totalKeysAndValues << std::endl;
        std::cout << "Calculated total db sizes: " << totalDbSizes << std::endl;
        std::cout << "Main store on disk: " << mainStoreOnDisk << std::endl;
        std::cout << "Total on disk: " << totalOnDisk << std::endl;
        std::cout << "Used size amplification: " << static_cast<double>(totalUsedSize) / static_cast<double>(totalKeysAndValues) << std::endl;
        std::cout << "Write amplification: " << static_cast<double>(mainStoreOnDisk) / static_cast<double>(totalKeysAndValues) << std::endl;
        std::cout << std::endl;
    }

private slots:
    void initTestCase()
    {
        Sink::Log::setDebugOutputLevel(Sink::Log::Warning);
        auto factory = Sink::ResourceFactory::load("sink.dummy");
        QVERIFY(factory);
    }

    void cleanup()
    {
    }

    void runBenchmarks()
    {
        writeInProcess(5000, mTimeStamp);
    }

    void ensureUsedMemoryRemainsStable()
    {
        auto rssStandardDeviation = sqrt(variance(mRssGrowthPerEntity));
        auto timeStandardDeviation = sqrt(variance(mTimePerEntity));
        HAWD::Dataset dataset("dummy_write_summary", m_hawdState);
        HAWD::Dataset::Row row = dataset.row();
        row.setValue("rssStandardDeviation", rssStandardDeviation);
        row.setValue("rssMaxDifference", maxDifference(mRssGrowthPerEntity));
        row.setValue("timeStandardDeviation", timeStandardDeviation);
        row.setValue("timeMaxDifference", maxDifference(mTimePerEntity));
        row.setTimestamp(mTimeStamp);
        dataset.insertRow(row);
        HAWD::Formatter::print(dataset);
    }

    void testDiskUsage()
    {
        testDiskUsage(1000);
    }

    // This allows to run individual parts without doing a cleanup, but still cleaning up normally
    void testCleanupForCompleteTest()
    {
        DummyResource::removeFromDisk("sink.dummy.instance1");
    }

private:
    HAWD::State m_hawdState;
};

QTEST_MAIN(DummyResourceWriteBenchmark)
#include "dummyresourcewritebenchmark.moc"