diff options
author | Rémi Nicole <nicole@kolabsystems.com> | 2018-06-19 11:04:17 +0200 |
---|---|---|
committer | Christian Mollekopf <chrigi_1@fastmail.fm> | 2018-06-19 11:10:47 +0200 |
commit | 077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0 (patch) | |
tree | 3cfdaf0912ef22dba71755b4332354d579f6e7cf /common/typeindex.cpp | |
parent | 1ff4456e5dc2b9a9dfa80047f9e5a4a9e1395cdf (diff) | |
download | sink-077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0.tar.gz sink-077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0.zip |
Implement Overlap queries
Summary:
Notes:
- Introduces the concept of queries on multiple properties (which meant changing query's internals a bit)
- Dates are stored as well as the "reference" in the index to allow quick filtering without fetching the whole entity
- Buckets are weeks starting on Monday (guaranteed by the use of the Julian calendar)
- Some size improvements are definitely possible (dates are padded numbers again, not using integer databases, Julian calendar starts at a very old date, etc.)
Test Plan: Tested in querytest
Reviewers: cmollekopf
Reviewed By: cmollekopf
Tags: #sink
Differential Revision: https://phabricator.kde.org/D13477
Diffstat (limited to 'common/typeindex.cpp')
-rw-r--r-- | common/typeindex.cpp | 122 |
1 files changed, 116 insertions, 6 deletions
diff --git a/common/typeindex.cpp b/common/typeindex.cpp index 41821cb..6aa3796 100644 --- a/common/typeindex.cpp +++ b/common/typeindex.cpp | |||
@@ -53,6 +53,12 @@ static QByteArray getByteArray(const QVariant &value) | |||
53 | return "toplevel"; | 53 | return "toplevel"; |
54 | } | 54 | } |
55 | 55 | ||
56 | template <typename T> | ||
57 | static QByteArray padNumber(T number) | ||
58 | { | ||
59 | static T uint_num_digits = (T)std::log10(std::numeric_limits<T>::max()) + 1; | ||
60 | return QByteArray::number(number).rightJustified(uint_num_digits, '0'); | ||
61 | } | ||
56 | 62 | ||
57 | static QByteArray toSortableByteArrayImpl(const QDateTime &date) | 63 | static QByteArray toSortableByteArrayImpl(const QDateTime &date) |
58 | { | 64 | { |
@@ -60,8 +66,7 @@ static QByteArray toSortableByteArrayImpl(const QDateTime &date) | |||
60 | if (!date.isValid()) { | 66 | if (!date.isValid()) { |
61 | return QByteArray::number(std::numeric_limits<unsigned int>::max()); | 67 | return QByteArray::number(std::numeric_limits<unsigned int>::max()); |
62 | } | 68 | } |
63 | static unsigned int uint_num_digits = (unsigned int)std::log10(std::numeric_limits<unsigned int>::max()) + 1; | 69 | return padNumber(std::numeric_limits<unsigned int>::max() - date.toTime_t()); |
64 | return QByteArray::number(std::numeric_limits<unsigned int>::max() - date.toTime_t()).rightJustified(uint_num_digits, '0'); | ||
65 | } | 70 | } |
66 | 71 | ||
67 | static QByteArray toSortableByteArray(const QVariant &value) | 72 | static QByteArray toSortableByteArray(const QVariant &value) |
@@ -99,6 +104,22 @@ QByteArray TypeIndex::sortedIndexName(const QByteArray &property) const | |||
99 | return mType + ".index." + property + ".sorted"; | 104 | return mType + ".index." + property + ".sorted"; |
100 | } | 105 | } |
101 | 106 | ||
107 | QByteArray TypeIndex::sampledPeriodIndexName(const QByteArray &rangeBeginProperty, const QByteArray &rangeEndProperty) const | ||
108 | { | ||
109 | return mType + ".index." + rangeBeginProperty + ".range." + rangeEndProperty; | ||
110 | } | ||
111 | |||
112 | static unsigned int bucketOf(const QVariant &value) | ||
113 | { | ||
114 | switch (value.type()) { | ||
115 | case QMetaType::QDateTime: | ||
116 | return value.value<QDateTime>().date().toJulianDay() / 7; | ||
117 | default: | ||
118 | SinkError() << "Not knowing how to get the bucket of a" << value.typeName(); | ||
119 | return {}; | ||
120 | } | ||
121 | } | ||
122 | |||
102 | template <> | 123 | template <> |
103 | void TypeIndex::addProperty<QByteArray>(const QByteArray &property) | 124 | void TypeIndex::addProperty<QByteArray>(const QByteArray &property) |
104 | { | 125 | { |
@@ -202,6 +223,41 @@ void TypeIndex::addPropertyWithSorting<ApplicationDomain::Reference, QDateTime>( | |||
202 | addPropertyWithSorting<QByteArray, QDateTime>(property, sortProperty); | 223 | addPropertyWithSorting<QByteArray, QDateTime>(property, sortProperty); |
203 | } | 224 | } |
204 | 225 | ||
226 | template <> | ||
227 | void TypeIndex::addSampledPeriodIndex<QDateTime, QDateTime>( | ||
228 | const QByteArray &beginProperty, const QByteArray &endProperty) | ||
229 | { | ||
230 | auto indexer = [=](bool add, const QByteArray &identifier, const QVariant &begin, | ||
231 | const QVariant &end, Sink::Storage::DataStore::Transaction &transaction) { | ||
232 | SinkTraceCtx(mLogCtx) << "Adding entity to sampled period index"; | ||
233 | const auto beginDate = begin.toDateTime(); | ||
234 | const auto endDate = end.toDateTime(); | ||
235 | |||
236 | auto beginBucket = bucketOf(beginDate); | ||
237 | auto endBucket = bucketOf(endDate); | ||
238 | |||
239 | if (beginBucket > endBucket) { | ||
240 | SinkError() << "End bucket greater than begin bucket"; | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | Index index(sampledPeriodIndexName(beginProperty, endProperty), transaction); | ||
245 | for (auto bucket = beginBucket; bucket <= endBucket; ++bucket) { | ||
246 | QByteArray bucketKey = padNumber(bucket); | ||
247 | if (add) { | ||
248 | SinkTraceCtx(mLogCtx) << "Adding entity to bucket:" << bucketKey; | ||
249 | index.add(bucketKey, identifier); | ||
250 | } else { | ||
251 | SinkTraceCtx(mLogCtx) << "Removing entity from bucket:" << bucketKey; | ||
252 | index.remove(bucketKey, identifier); | ||
253 | } | ||
254 | } | ||
255 | }; | ||
256 | |||
257 | mSampledPeriodProperties.insert({ beginProperty, endProperty }); | ||
258 | mSampledPeriodIndexer.insert({ beginProperty, endProperty }, indexer); | ||
259 | } | ||
260 | |||
205 | void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) | 261 | void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) |
206 | { | 262 | { |
207 | for (const auto &property : mProperties) { | 263 | for (const auto &property : mProperties) { |
@@ -209,6 +265,12 @@ void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink:: | |||
209 | auto indexer = mIndexer.value(property); | 265 | auto indexer = mIndexer.value(property); |
210 | indexer(add, identifier, value, transaction); | 266 | indexer(add, identifier, value, transaction); |
211 | } | 267 | } |
268 | for (const auto &properties : mSampledPeriodProperties) { | ||
269 | const auto beginValue = entity.getProperty(properties.first); | ||
270 | const auto endValue = entity.getProperty(properties.second); | ||
271 | auto indexer = mSampledPeriodIndexer.value(properties); | ||
272 | indexer(add, identifier, beginValue, endValue, transaction); | ||
273 | } | ||
212 | for (const auto &property : mSortedProperties) { | 274 | for (const auto &property : mSortedProperties) { |
213 | const auto value = entity.getProperty(property); | 275 | const auto value = entity.getProperty(property); |
214 | auto indexer = mSortIndexer.value(property); | 276 | auto indexer = mSortIndexer.value(property); |
@@ -312,7 +374,38 @@ static QVector<QByteArray> sortedIndexLookup(Index &index, QueryBase::Comparator | |||
312 | return keys; | 374 | return keys; |
313 | } | 375 | } |
314 | 376 | ||
315 | QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArray> &appliedFilters, QByteArray &appliedSorting, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) | 377 | static QVector<QByteArray> sampledIndexLookup(Index &index, QueryBase::Comparator filter) |
378 | { | ||
379 | if (filter.comparator != Query::Comparator::Overlap) { | ||
380 | SinkWarning() << "Comparisons other than Overlap not supported on sampled period indexes"; | ||
381 | return {}; | ||
382 | } | ||
383 | |||
384 | QVector<QByteArray> keys; | ||
385 | |||
386 | auto bounds = filter.value.value<QVariantList>(); | ||
387 | |||
388 | QByteArray lowerBound = toSortableByteArray(bounds[0]); | ||
389 | QByteArray upperBound = toSortableByteArray(bounds[1]); | ||
390 | |||
391 | QByteArray lowerBucket = padNumber(bucketOf(bounds[0])); | ||
392 | QByteArray upperBucket = padNumber(bucketOf(bounds[1])); | ||
393 | |||
394 | SinkTrace() << "Looking up from bucket:" << lowerBucket << "to:" << upperBucket; | ||
395 | |||
396 | index.rangeLookup(lowerBucket, upperBucket, | ||
397 | [&](const QByteArray &value) { | ||
398 | keys << value.data(); | ||
399 | }, | ||
400 | [bounds](const Index::Error &error) { | ||
401 | SinkWarning() << "Lookup error in index:" << error.message | ||
402 | << "with bounds:" << bounds[0] << bounds[1]; | ||
403 | }); | ||
404 | |||
405 | return keys; | ||
406 | } | ||
407 | |||
408 | QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArrayList> &appliedFilters, QByteArray &appliedSorting, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) | ||
316 | { | 409 | { |
317 | const auto baseFilters = query.getBaseFilters(); | 410 | const auto baseFilters = query.getBaseFilters(); |
318 | for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) { | 411 | for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) { |
@@ -325,11 +418,28 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr | |||
325 | } | 418 | } |
326 | } | 419 | } |
327 | 420 | ||
421 | for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) { | ||
422 | if (it.value().comparator == QueryBase::Comparator::Overlap) { | ||
423 | if (mSampledPeriodProperties.contains({it.key()[0], it.key()[1]})) { | ||
424 | Index index(sampledPeriodIndexName(it.key()[0], it.key()[1]), transaction); | ||
425 | const auto keys = sampledIndexLookup(index, query.getFilter(it.key())); | ||
426 | // The filter is not completely applied, we need post-filtering | ||
427 | // in the case the overlap period is not completely aligned | ||
428 | // with a week starting on monday | ||
429 | //appliedFilters << it.key(); | ||
430 | SinkTraceCtx(mLogCtx) << "Sampled period index lookup on" << it.key() << "found" << keys.size() << "keys."; | ||
431 | return keys; | ||
432 | } else { | ||
433 | SinkWarning() << "Overlap search without sampled period index"; | ||
434 | } | ||
435 | } | ||
436 | } | ||
437 | |||
328 | for (auto it = mGroupedSortedProperties.constBegin(); it != mGroupedSortedProperties.constEnd(); it++) { | 438 | for (auto it = mGroupedSortedProperties.constBegin(); it != mGroupedSortedProperties.constEnd(); it++) { |
329 | if (query.hasFilter(it.key()) && query.sortProperty() == it.value()) { | 439 | if (query.hasFilter(it.key()) && query.sortProperty() == it.value()) { |
330 | Index index(indexName(it.key(), it.value()), transaction); | 440 | Index index(indexName(it.key(), it.value()), transaction); |
331 | const auto keys = indexLookup(index, query.getFilter(it.key())); | 441 | const auto keys = indexLookup(index, query.getFilter(it.key())); |
332 | appliedFilters << it.key(); | 442 | appliedFilters.insert({it.key()}); |
333 | appliedSorting = it.value(); | 443 | appliedSorting = it.value(); |
334 | SinkTraceCtx(mLogCtx) << "Grouped sorted index lookup on " << it.key() << it.value() << " found " << keys.size() << " keys."; | 444 | SinkTraceCtx(mLogCtx) << "Grouped sorted index lookup on " << it.key() << it.value() << " found " << keys.size() << " keys."; |
335 | return keys; | 445 | return keys; |
@@ -340,7 +450,7 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr | |||
340 | if (query.hasFilter(property)) { | 450 | if (query.hasFilter(property)) { |
341 | Index index(sortedIndexName(property), transaction); | 451 | Index index(sortedIndexName(property), transaction); |
342 | const auto keys = sortedIndexLookup(index, query.getFilter(property)); | 452 | const auto keys = sortedIndexLookup(index, query.getFilter(property)); |
343 | appliedFilters << property; | 453 | appliedFilters.insert({property}); |
344 | SinkTraceCtx(mLogCtx) << "Sorted index lookup on " << property << " found " << keys.size() << " keys."; | 454 | SinkTraceCtx(mLogCtx) << "Sorted index lookup on " << property << " found " << keys.size() << " keys."; |
345 | return keys; | 455 | return keys; |
346 | } | 456 | } |
@@ -350,7 +460,7 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr | |||
350 | if (query.hasFilter(property)) { | 460 | if (query.hasFilter(property)) { |
351 | Index index(indexName(property), transaction); | 461 | Index index(indexName(property), transaction); |
352 | const auto keys = indexLookup(index, query.getFilter(property)); | 462 | const auto keys = indexLookup(index, query.getFilter(property)); |
353 | appliedFilters << property; | 463 | appliedFilters.insert({property}); |
354 | SinkTraceCtx(mLogCtx) << "Index lookup on " << property << " found " << keys.size() << " keys."; | 464 | SinkTraceCtx(mLogCtx) << "Index lookup on " << property << " found " << keys.size() << " keys."; |
355 | return keys; | 465 | return keys; |
356 | } | 466 | } |