summaryrefslogtreecommitdiffstats
path: root/common/typeindex.cpp
diff options
context:
space:
mode:
authorRémi Nicole <nicole@kolabsystems.com>2018-06-19 11:04:17 +0200
committerChristian Mollekopf <chrigi_1@fastmail.fm>2018-06-19 11:10:47 +0200
commit077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0 (patch)
tree3cfdaf0912ef22dba71755b4332354d579f6e7cf /common/typeindex.cpp
parent1ff4456e5dc2b9a9dfa80047f9e5a4a9e1395cdf (diff)
downloadsink-077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0.tar.gz
sink-077e3cb30ace5f6ee20ee15e0d32d2bfb197fde0.zip
Implement Overlap queries
Summary: Notes: - Introduces the concept of queries on multiple properties (which meant changing query's internals a bit) - Dates are stored as well as the "reference" in the index to allow quick filtering without fetching the whole entity - Buckets are weeks starting on Monday (guaranteed by the use of the Julian calendar) - Some size improvements are definitely possible (dates are padded numbers again, not using integer databases, Julian calendar starts at a very old date, etc.) Test Plan: Tested in querytest Reviewers: cmollekopf Reviewed By: cmollekopf Tags: #sink Differential Revision: https://phabricator.kde.org/D13477
Diffstat (limited to 'common/typeindex.cpp')
-rw-r--r--common/typeindex.cpp122
1 files changed, 116 insertions, 6 deletions
diff --git a/common/typeindex.cpp b/common/typeindex.cpp
index 41821cb..6aa3796 100644
--- a/common/typeindex.cpp
+++ b/common/typeindex.cpp
@@ -53,6 +53,12 @@ static QByteArray getByteArray(const QVariant &value)
53 return "toplevel"; 53 return "toplevel";
54} 54}
55 55
56template <typename T>
57static QByteArray padNumber(T number)
58{
59 static T uint_num_digits = (T)std::log10(std::numeric_limits<T>::max()) + 1;
60 return QByteArray::number(number).rightJustified(uint_num_digits, '0');
61}
56 62
57static QByteArray toSortableByteArrayImpl(const QDateTime &date) 63static QByteArray toSortableByteArrayImpl(const QDateTime &date)
58{ 64{
@@ -60,8 +66,7 @@ static QByteArray toSortableByteArrayImpl(const QDateTime &date)
60 if (!date.isValid()) { 66 if (!date.isValid()) {
61 return QByteArray::number(std::numeric_limits<unsigned int>::max()); 67 return QByteArray::number(std::numeric_limits<unsigned int>::max());
62 } 68 }
63 static unsigned int uint_num_digits = (unsigned int)std::log10(std::numeric_limits<unsigned int>::max()) + 1; 69 return padNumber(std::numeric_limits<unsigned int>::max() - date.toTime_t());
64 return QByteArray::number(std::numeric_limits<unsigned int>::max() - date.toTime_t()).rightJustified(uint_num_digits, '0');
65} 70}
66 71
67static QByteArray toSortableByteArray(const QVariant &value) 72static QByteArray toSortableByteArray(const QVariant &value)
@@ -99,6 +104,22 @@ QByteArray TypeIndex::sortedIndexName(const QByteArray &property) const
99 return mType + ".index." + property + ".sorted"; 104 return mType + ".index." + property + ".sorted";
100} 105}
101 106
107QByteArray TypeIndex::sampledPeriodIndexName(const QByteArray &rangeBeginProperty, const QByteArray &rangeEndProperty) const
108{
109 return mType + ".index." + rangeBeginProperty + ".range." + rangeEndProperty;
110}
111
112static unsigned int bucketOf(const QVariant &value)
113{
114 switch (value.type()) {
115 case QMetaType::QDateTime:
116 return value.value<QDateTime>().date().toJulianDay() / 7;
117 default:
118 SinkError() << "Not knowing how to get the bucket of a" << value.typeName();
119 return {};
120 }
121}
122
102template <> 123template <>
103void TypeIndex::addProperty<QByteArray>(const QByteArray &property) 124void TypeIndex::addProperty<QByteArray>(const QByteArray &property)
104{ 125{
@@ -202,6 +223,41 @@ void TypeIndex::addPropertyWithSorting<ApplicationDomain::Reference, QDateTime>(
202 addPropertyWithSorting<QByteArray, QDateTime>(property, sortProperty); 223 addPropertyWithSorting<QByteArray, QDateTime>(property, sortProperty);
203} 224}
204 225
226template <>
227void TypeIndex::addSampledPeriodIndex<QDateTime, QDateTime>(
228 const QByteArray &beginProperty, const QByteArray &endProperty)
229{
230 auto indexer = [=](bool add, const QByteArray &identifier, const QVariant &begin,
231 const QVariant &end, Sink::Storage::DataStore::Transaction &transaction) {
232 SinkTraceCtx(mLogCtx) << "Adding entity to sampled period index";
233 const auto beginDate = begin.toDateTime();
234 const auto endDate = end.toDateTime();
235
236 auto beginBucket = bucketOf(beginDate);
237 auto endBucket = bucketOf(endDate);
238
239 if (beginBucket > endBucket) {
240 SinkError() << "End bucket greater than begin bucket";
241 return;
242 }
243
244 Index index(sampledPeriodIndexName(beginProperty, endProperty), transaction);
245 for (auto bucket = beginBucket; bucket <= endBucket; ++bucket) {
246 QByteArray bucketKey = padNumber(bucket);
247 if (add) {
248 SinkTraceCtx(mLogCtx) << "Adding entity to bucket:" << bucketKey;
249 index.add(bucketKey, identifier);
250 } else {
251 SinkTraceCtx(mLogCtx) << "Removing entity from bucket:" << bucketKey;
252 index.remove(bucketKey, identifier);
253 }
254 }
255 };
256
257 mSampledPeriodProperties.insert({ beginProperty, endProperty });
258 mSampledPeriodIndexer.insert({ beginProperty, endProperty }, indexer);
259}
260
205void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) 261void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId)
206{ 262{
207 for (const auto &property : mProperties) { 263 for (const auto &property : mProperties) {
@@ -209,6 +265,12 @@ void TypeIndex::updateIndex(bool add, const QByteArray &identifier, const Sink::
209 auto indexer = mIndexer.value(property); 265 auto indexer = mIndexer.value(property);
210 indexer(add, identifier, value, transaction); 266 indexer(add, identifier, value, transaction);
211 } 267 }
268 for (const auto &properties : mSampledPeriodProperties) {
269 const auto beginValue = entity.getProperty(properties.first);
270 const auto endValue = entity.getProperty(properties.second);
271 auto indexer = mSampledPeriodIndexer.value(properties);
272 indexer(add, identifier, beginValue, endValue, transaction);
273 }
212 for (const auto &property : mSortedProperties) { 274 for (const auto &property : mSortedProperties) {
213 const auto value = entity.getProperty(property); 275 const auto value = entity.getProperty(property);
214 auto indexer = mSortIndexer.value(property); 276 auto indexer = mSortIndexer.value(property);
@@ -312,7 +374,38 @@ static QVector<QByteArray> sortedIndexLookup(Index &index, QueryBase::Comparator
312 return keys; 374 return keys;
313} 375}
314 376
315QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArray> &appliedFilters, QByteArray &appliedSorting, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId) 377static QVector<QByteArray> sampledIndexLookup(Index &index, QueryBase::Comparator filter)
378{
379 if (filter.comparator != Query::Comparator::Overlap) {
380 SinkWarning() << "Comparisons other than Overlap not supported on sampled period indexes";
381 return {};
382 }
383
384 QVector<QByteArray> keys;
385
386 auto bounds = filter.value.value<QVariantList>();
387
388 QByteArray lowerBound = toSortableByteArray(bounds[0]);
389 QByteArray upperBound = toSortableByteArray(bounds[1]);
390
391 QByteArray lowerBucket = padNumber(bucketOf(bounds[0]));
392 QByteArray upperBucket = padNumber(bucketOf(bounds[1]));
393
394 SinkTrace() << "Looking up from bucket:" << lowerBucket << "to:" << upperBucket;
395
396 index.rangeLookup(lowerBucket, upperBucket,
397 [&](const QByteArray &value) {
398 keys << value.data();
399 },
400 [bounds](const Index::Error &error) {
401 SinkWarning() << "Lookup error in index:" << error.message
402 << "with bounds:" << bounds[0] << bounds[1];
403 });
404
405 return keys;
406}
407
408QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArrayList> &appliedFilters, QByteArray &appliedSorting, Sink::Storage::DataStore::Transaction &transaction, const QByteArray &resourceInstanceId)
316{ 409{
317 const auto baseFilters = query.getBaseFilters(); 410 const auto baseFilters = query.getBaseFilters();
318 for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) { 411 for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) {
@@ -325,11 +418,28 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr
325 } 418 }
326 } 419 }
327 420
421 for (auto it = baseFilters.constBegin(); it != baseFilters.constEnd(); it++) {
422 if (it.value().comparator == QueryBase::Comparator::Overlap) {
423 if (mSampledPeriodProperties.contains({it.key()[0], it.key()[1]})) {
424 Index index(sampledPeriodIndexName(it.key()[0], it.key()[1]), transaction);
425 const auto keys = sampledIndexLookup(index, query.getFilter(it.key()));
426 // The filter is not completely applied, we need post-filtering
427 // in the case the overlap period is not completely aligned
428 // with a week starting on monday
429 //appliedFilters << it.key();
430 SinkTraceCtx(mLogCtx) << "Sampled period index lookup on" << it.key() << "found" << keys.size() << "keys.";
431 return keys;
432 } else {
433 SinkWarning() << "Overlap search without sampled period index";
434 }
435 }
436 }
437
328 for (auto it = mGroupedSortedProperties.constBegin(); it != mGroupedSortedProperties.constEnd(); it++) { 438 for (auto it = mGroupedSortedProperties.constBegin(); it != mGroupedSortedProperties.constEnd(); it++) {
329 if (query.hasFilter(it.key()) && query.sortProperty() == it.value()) { 439 if (query.hasFilter(it.key()) && query.sortProperty() == it.value()) {
330 Index index(indexName(it.key(), it.value()), transaction); 440 Index index(indexName(it.key(), it.value()), transaction);
331 const auto keys = indexLookup(index, query.getFilter(it.key())); 441 const auto keys = indexLookup(index, query.getFilter(it.key()));
332 appliedFilters << it.key(); 442 appliedFilters.insert({it.key()});
333 appliedSorting = it.value(); 443 appliedSorting = it.value();
334 SinkTraceCtx(mLogCtx) << "Grouped sorted index lookup on " << it.key() << it.value() << " found " << keys.size() << " keys."; 444 SinkTraceCtx(mLogCtx) << "Grouped sorted index lookup on " << it.key() << it.value() << " found " << keys.size() << " keys.";
335 return keys; 445 return keys;
@@ -340,7 +450,7 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr
340 if (query.hasFilter(property)) { 450 if (query.hasFilter(property)) {
341 Index index(sortedIndexName(property), transaction); 451 Index index(sortedIndexName(property), transaction);
342 const auto keys = sortedIndexLookup(index, query.getFilter(property)); 452 const auto keys = sortedIndexLookup(index, query.getFilter(property));
343 appliedFilters << property; 453 appliedFilters.insert({property});
344 SinkTraceCtx(mLogCtx) << "Sorted index lookup on " << property << " found " << keys.size() << " keys."; 454 SinkTraceCtx(mLogCtx) << "Sorted index lookup on " << property << " found " << keys.size() << " keys.";
345 return keys; 455 return keys;
346 } 456 }
@@ -350,7 +460,7 @@ QVector<QByteArray> TypeIndex::query(const Sink::QueryBase &query, QSet<QByteArr
350 if (query.hasFilter(property)) { 460 if (query.hasFilter(property)) {
351 Index index(indexName(property), transaction); 461 Index index(indexName(property), transaction);
352 const auto keys = indexLookup(index, query.getFilter(property)); 462 const auto keys = indexLookup(index, query.getFilter(property));
353 appliedFilters << property; 463 appliedFilters.insert({property});
354 SinkTraceCtx(mLogCtx) << "Index lookup on " << property << " found " << keys.size() << " keys."; 464 SinkTraceCtx(mLogCtx) << "Index lookup on " << property << " found " << keys.size() << " keys.";
355 return keys; 465 return keys;
356 } 466 }