From da0c37dbad121252effa85941de4d49222176179 Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Thu, 20 Oct 2016 13:34:38 +0200 Subject: A new indexer subsystem that can be used for indexes that are more complex than a simple key-value pair. --- common/CMakeLists.txt | 2 + common/domain/mail.cpp | 99 +---------------------------- common/indexer.cpp | 39 ++++++++++++ common/indexer.h | 49 +++++++++++++++ common/mail/threadindexer.cpp | 140 ++++++++++++++++++++++++++++++++++++++++++ common/mail/threadindexer.h | 36 +++++++++++ common/typeindex.cpp | 8 +++ common/typeindex.h | 9 +++ 8 files changed, 286 insertions(+), 96 deletions(-) create mode 100644 common/indexer.cpp create mode 100644 common/indexer.h create mode 100644 common/mail/threadindexer.cpp create mode 100644 common/mail/threadindexer.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index e329d93..b5efb62 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -75,6 +75,8 @@ set(command_SRCS specialpurposepreprocessor.cpp datastorequery.cpp storage/entitystore.cpp + indexer.cpp + mail/threadindexer.cpp ${storage_SRCS}) add_library(${PROJECT_NAME} SHARED ${command_SRCS}) diff --git a/common/domain/mail.cpp b/common/domain/mail.cpp index 9d58767..4b33f06 100644 --- a/common/domain/mail.cpp +++ b/common/domain/mail.cpp @@ -35,6 +35,7 @@ #include "entitybuffer.h" #include "datastorequery.h" #include "entity_generated.h" +#include "mail/threadindexer.h" #include "mail_generated.h" @@ -59,6 +60,8 @@ void TypeImplementation::configureIndex(TypeIndex &index) index.addProperty(Mail::ParentMessageId::name); index.addProperty(); + + index.addSecondaryPropertyIndexer(); index.addSecondaryProperty(); index.addSecondaryProperty(); } @@ -74,102 +77,6 @@ static TypeIndex &getIndex() return *index; } -static QString stripOffPrefixes(const QString &subject) -{ - //TODO this hardcoded list is probably not good enough (especially regarding internationalization) - //TODO this whole routine, including internationalized re/fwd ... should go into some library. - //We'll require the same for generating reply/forward subjects in kube - static QStringList defaultReplyPrefixes = QStringList() << QLatin1String("Re\\s*:") - << QLatin1String("Re\\[\\d+\\]:") - << QLatin1String("Re\\d+:"); - - static QStringList defaultForwardPrefixes = QStringList() << QLatin1String("Fwd:") - << QLatin1String("FW:"); - - QStringList replyPrefixes; // = GlobalSettings::self()->replyPrefixes(); - if (replyPrefixes.isEmpty()) { - replyPrefixes = defaultReplyPrefixes; - } - - QStringList forwardPrefixes; // = GlobalSettings::self()->forwardPrefixes(); - if (forwardPrefixes.isEmpty()) { - forwardPrefixes = defaultReplyPrefixes; - } - - const QStringList prefixRegExps = replyPrefixes + forwardPrefixes; - - // construct a big regexp that - // 1. is anchored to the beginning of str (sans whitespace) - // 2. matches at least one of the part regexps in prefixRegExps - const QString bigRegExp = QString::fromLatin1("^(?:\\s+|(?:%1))+\\s*").arg(prefixRegExps.join(QLatin1String(")|(?:"))); - - static QString regExpPattern; - static QRegExp regExp; - - regExp.setCaseSensitivity(Qt::CaseInsensitive); - if (regExpPattern != bigRegExp) { - // the prefixes have changed, so update the regexp - regExpPattern = bigRegExp; - regExp.setPattern(regExpPattern); - } - - if(regExp.isValid()) { - QString tmp = subject; - if (regExp.indexIn( tmp ) == 0) { - return tmp.remove(0, regExp.matchedLength()); - } - } else { - SinkWarning() << "bigRegExp = \"" - << bigRegExp << "\"\n" - << "prefix regexp is invalid!"; - } - - return subject; -} - - -static void updateThreadingIndex(const QByteArray &identifier, const BufferAdaptor &bufferAdaptor, Sink::Storage::DataStore::Transaction &transaction) -{ - auto messageId = bufferAdaptor.getProperty(Mail::MessageId::name); - auto parentMessageId = bufferAdaptor.getProperty(Mail::ParentMessageId::name); - auto subject = bufferAdaptor.getProperty(Mail::Subject::name); - - auto normalizedSubject = stripOffPrefixes(subject.toString()).toUtf8(); - - QVector thread; - - //a child already registered our thread. - thread = getIndex().secondaryLookup(messageId, transaction); - - //If parent is already available, add to thread of parent - if (thread.isEmpty() && parentMessageId.isValid()) { - thread = getIndex().secondaryLookup(parentMessageId, transaction); - SinkTrace() << "Found parent"; - } - if (thread.isEmpty()) { - //Try to lookup the thread by subject: - thread = getIndex().secondaryLookup(normalizedSubject, transaction); - if (thread.isEmpty()) { - SinkTrace() << "Created a new thread "; - thread << QUuid::createUuid().toByteArray(); - } else { - } - } - - //We should have found the thread by now - if (!thread.isEmpty()) { - if (parentMessageId.isValid()) { - //Register parent with thread for when it becomes available - getIndex().index(parentMessageId, thread.first(), transaction); - } - getIndex().index(messageId, thread.first(), transaction); - getIndex().index(thread.first(), messageId, transaction); - getIndex().index(normalizedSubject, thread.first(), transaction); - } else { - SinkWarning() << "Couldn't find a thread for: " << messageId; - } -} - QSharedPointer::Buffer> > TypeImplementation::initializeReadPropertyMapper() { auto propertyMapper = QSharedPointer >::create(); diff --git a/common/indexer.cpp b/common/indexer.cpp new file mode 100644 index 0000000..1b223b3 --- /dev/null +++ b/common/indexer.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 Christian Mollekopf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include "indexer.h" + +using namespace Sink; + +void Indexer::setup(TypeIndex *index, Storage::DataStore::Transaction *transaction) +{ + mTypeIndex = index; + mTransaction = transaction; +} + +Storage::DataStore::Transaction &Indexer::transaction() +{ + Q_ASSERT(mTransaction); + return *mTransaction; +} + +TypeIndex &Indexer::index() +{ + Q_ASSERT(mTypeIndex); + return *mTypeIndex; +} diff --git a/common/indexer.h b/common/indexer.h new file mode 100644 index 0000000..7e148d1 --- /dev/null +++ b/common/indexer.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015 Christian Mollekopf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#pragma once + +#include "storage.h" +#include + +class TypeIndex; +namespace Sink { +namespace ApplicationDomain { + class ApplicationDomainType; +} + +class Indexer +{ +public: + typedef QSharedPointer Ptr; + virtual void add(const ApplicationDomain::ApplicationDomainType &entity) = 0; + virtual void modify(const ApplicationDomain::ApplicationDomainType &old, const ApplicationDomain::ApplicationDomainType &entity) = 0; + virtual void remove(const ApplicationDomain::ApplicationDomainType &entity) = 0; + +protected: + Storage::DataStore::Transaction &transaction(); + TypeIndex &index(); + +private: + friend class ::TypeIndex; + void setup(TypeIndex *, Storage::DataStore::Transaction *); + Storage::DataStore::Transaction *mTransaction; + TypeIndex *mTypeIndex; +}; + +} diff --git a/common/mail/threadindexer.cpp b/common/mail/threadindexer.cpp new file mode 100644 index 0000000..4a18625 --- /dev/null +++ b/common/mail/threadindexer.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 Christian Mollekopf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include "threadindexer.h" + +#include "typeindex.h" +#include "log.h" + +SINK_DEBUG_AREA("threadindex") + +using namespace Sink; +using namespace Sink::ApplicationDomain; + +static QString stripOffPrefixes(const QString &subject) +{ + //TODO this hardcoded list is probably not good enough (especially regarding internationalization) + //TODO this whole routine, including internationalized re/fwd ... should go into some library. + //We'll require the same for generating reply/forward subjects in kube + static QStringList defaultReplyPrefixes = QStringList() << QLatin1String("Re\\s*:") + << QLatin1String("Re\\[\\d+\\]:") + << QLatin1String("Re\\d+:"); + + static QStringList defaultForwardPrefixes = QStringList() << QLatin1String("Fwd:") + << QLatin1String("FW:"); + + QStringList replyPrefixes; // = GlobalSettings::self()->replyPrefixes(); + if (replyPrefixes.isEmpty()) { + replyPrefixes = defaultReplyPrefixes; + } + + QStringList forwardPrefixes; // = GlobalSettings::self()->forwardPrefixes(); + if (forwardPrefixes.isEmpty()) { + forwardPrefixes = defaultReplyPrefixes; + } + + const QStringList prefixRegExps = replyPrefixes + forwardPrefixes; + + // construct a big regexp that + // 1. is anchored to the beginning of str (sans whitespace) + // 2. matches at least one of the part regexps in prefixRegExps + const QString bigRegExp = QString::fromLatin1("^(?:\\s+|(?:%1))+\\s*").arg(prefixRegExps.join(QLatin1String(")|(?:"))); + + static QString regExpPattern; + static QRegExp regExp; + + regExp.setCaseSensitivity(Qt::CaseInsensitive); + if (regExpPattern != bigRegExp) { + // the prefixes have changed, so update the regexp + regExpPattern = bigRegExp; + regExp.setPattern(regExpPattern); + } + + if(regExp.isValid()) { + QString tmp = subject; + if (regExp.indexIn( tmp ) == 0) { + return tmp.remove(0, regExp.matchedLength()); + } + } else { + SinkWarning() << "bigRegExp = \"" + << bigRegExp << "\"\n" + << "prefix regexp is invalid!"; + } + + return subject; +} + + +void ThreadIndexer::updateThreadingIndex(const QByteArray &identifier, const ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction) +{ + auto messageId = entity.getProperty(Mail::MessageId::name); + auto parentMessageId = entity.getProperty(Mail::ParentMessageId::name); + auto subject = entity.getProperty(Mail::Subject::name); + + auto normalizedSubject = stripOffPrefixes(subject.toString()).toUtf8(); + + QVector thread; + + //a child already registered our thread. + thread = index().secondaryLookup(messageId, transaction); + + //If parent is already available, add to thread of parent + if (thread.isEmpty() && parentMessageId.isValid()) { + thread = index().secondaryLookup(parentMessageId, transaction); + SinkTrace() << "Found parent"; + } + if (thread.isEmpty()) { + //Try to lookup the thread by subject: + thread = index().secondaryLookup(normalizedSubject, transaction); + if (thread.isEmpty()) { + SinkTrace() << "Created a new thread "; + thread << QUuid::createUuid().toByteArray(); + } else { + } + } + + //We should have found the thread by now + if (!thread.isEmpty()) { + if (parentMessageId.isValid()) { + //Register parent with thread for when it becomes available + index().index(parentMessageId, thread.first(), transaction); + } + index().index(messageId, thread.first(), transaction); + index().index(thread.first(), messageId, transaction); + index().index(normalizedSubject, thread.first(), transaction); + } else { + SinkWarning() << "Couldn't find a thread for: " << messageId; + } +} + + +void ThreadIndexer::add(const ApplicationDomain::ApplicationDomainType &entity) +{ + updateThreadingIndex(entity.identifier(), entity, transaction()); +} + +void ThreadIndexer::modify(const ApplicationDomain::ApplicationDomainType &old, const ApplicationDomain::ApplicationDomainType &entity) +{ + +} + +void ThreadIndexer::remove(const ApplicationDomain::ApplicationDomainType &entity) +{ + +} + diff --git a/common/mail/threadindexer.h b/common/mail/threadindexer.h new file mode 100644 index 0000000..064ae71 --- /dev/null +++ b/common/mail/threadindexer.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 Christian Mollekopf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#pragma once + +#include "indexer.h" + +namespace Sink { + +class ThreadIndexer : public Indexer +{ +public: + typedef QSharedPointer Ptr; + virtual void add(const ApplicationDomain::ApplicationDomainType &entity) Q_DECL_OVERRIDE; + virtual void modify(const ApplicationDomain::ApplicationDomainType &old, const ApplicationDomain::ApplicationDomainType &entity) Q_DECL_OVERRIDE; + virtual void remove(const ApplicationDomain::ApplicationDomainType &entity) Q_DECL_OVERRIDE; +private: + void updateThreadingIndex(const QByteArray &identifier, const ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction); +}; + +} diff --git a/common/typeindex.cpp b/common/typeindex.cpp index 7920efc..8f5de4f 100644 --- a/common/typeindex.cpp +++ b/common/typeindex.cpp @@ -121,6 +121,10 @@ void TypeIndex::add(const QByteArray &identifier, const Sink::ApplicationDomain: auto indexer = mSortIndexer.value(it.key() + it.value()); indexer(identifier, value, sortValue, transaction); } + for (const auto &indexer : mCustomIndexer) { + indexer->setup(this, &transaction); + indexer->add(entity); + } } void TypeIndex::remove(const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction) @@ -138,6 +142,10 @@ void TypeIndex::remove(const QByteArray &identifier, const Sink::ApplicationDoma Index(indexName(it.key(), it.value()), transaction).remove(propertyValue.toByteArray() + sortValue.toByteArray(), identifier); } } + for (const auto &indexer : mCustomIndexer) { + indexer->setup(this, &transaction); + indexer->remove(entity); + } } static QVector indexLookup(Index &index, Query::Comparator filter) diff --git a/common/typeindex.h b/common/typeindex.h index e11e673..041e04a 100644 --- a/common/typeindex.h +++ b/common/typeindex.h @@ -22,6 +22,7 @@ #include "storage.h" #include "query.h" #include "log.h" +#include "indexer.h" #include class TypeIndex @@ -51,6 +52,13 @@ public: { mSecondaryProperties.insert(Left::name, Right::name); } + + template + void addSecondaryPropertyIndexer() + { + mCustomIndexer << CustomIndexer::Ptr::create(); + } + void add(const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction); void remove(const QByteArray &identifier, const Sink::ApplicationDomain::ApplicationDomainType &entity, Sink::Storage::DataStore::Transaction &transaction); @@ -84,6 +92,7 @@ private: QMap mSortedProperties; // QMap mSecondaryProperties; + QList mCustomIndexer; QHash> mIndexer; QHash> mSortIndexer; }; -- cgit v1.2.3