From 6fc76bc690e5a2e7748936fa835338d820c7e7de Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Wed, 21 Sep 2016 14:47:06 +0200 Subject: Merge mails by subject --- common/domain/mail.cpp | 79 ++++++++++++++++++++++++++++++++---- tests/mailthreadtest.cpp | 101 ++++++++++++++++++++++++++++++++++++++++++----- tests/mailthreadtest.h | 1 + 3 files changed, 165 insertions(+), 16 deletions(-) diff --git a/common/domain/mail.cpp b/common/domain/mail.cpp index 859ebef..0c737fa 100644 --- a/common/domain/mail.cpp +++ b/common/domain/mail.cpp @@ -63,34 +63,98 @@ static TypeIndex &getIndex() return *index; } +static QString stripOffPrefixes(const QString &subject) +{ + //TODO this hardcoded list is probably not good enough (especially regarding internationalization) + //TODO this whole routine, including internationalized re/fwd ... should go into some library. + //We'll require the same for generating reply/forward subjects in kube + static QStringList defaultReplyPrefixes = QStringList() << QLatin1String("Re\\s*:") + << QLatin1String("Re\\[\\d+\\]:") + << QLatin1String("Re\\d+:"); + + static QStringList defaultForwardPrefixes = QStringList() << QLatin1String("Fwd:") + << QLatin1String("FW:"); + + QStringList replyPrefixes; // = GlobalSettings::self()->replyPrefixes(); + if (replyPrefixes.isEmpty()) { + replyPrefixes = defaultReplyPrefixes; + } + + QStringList forwardPrefixes; // = GlobalSettings::self()->forwardPrefixes(); + if (forwardPrefixes.isEmpty()) { + forwardPrefixes = defaultReplyPrefixes; + } + + const QStringList prefixRegExps = replyPrefixes + forwardPrefixes; + + // construct a big regexp that + // 1. is anchored to the beginning of str (sans whitespace) + // 2. matches at least one of the part regexps in prefixRegExps + const QString bigRegExp = QString::fromLatin1("^(?:\\s+|(?:%1))+\\s*").arg(prefixRegExps.join(QLatin1String(")|(?:"))); + + static QString regExpPattern; + static QRegExp regExp; + + regExp.setCaseSensitivity(Qt::CaseInsensitive); + if (regExpPattern != bigRegExp) { + // the prefixes have changed, so update the regexp + regExpPattern = bigRegExp; + regExp.setPattern(regExpPattern); + } + + if(regExp.isValid()) { + QString tmp = subject; + if (regExp.indexIn( tmp ) == 0) { + return tmp.remove(0, regExp.matchedLength()); + } + } else { + SinkWarning() << "bigRegExp = \"" + << bigRegExp << "\"\n" + << "prefix regexp is invalid!"; + } + + return subject; +} + + static void updateThreadingIndex(const QByteArray &identifier, const BufferAdaptor &bufferAdaptor, Sink::Storage::Transaction &transaction) { auto messageId = bufferAdaptor.getProperty(Mail::MessageId::name).toByteArray(); auto parentMessageId = bufferAdaptor.getProperty(Mail::ParentMessageId::name).toByteArray(); + auto subject = bufferAdaptor.getProperty(Mail::Subject::name).toString(); Index msgIdIndex("msgId", transaction); Index msgIdThreadIdIndex("msgIdThreadId", transaction); + Index subjectThreadIdIndex("subjectThreadId", transaction); //Add the message to the index Q_ASSERT(msgIdIndex.lookup(messageId).isEmpty()); msgIdIndex.add(messageId, identifier); - //If parent is already available, add to thread of parent + auto normalizedSubject = stripOffPrefixes(subject).toUtf8(); + QByteArray thread; + //If parent is already available, add to thread of parent if (!parentMessageId.isEmpty() && !msgIdIndex.lookup(parentMessageId).isEmpty()) { thread = msgIdThreadIdIndex.lookup(parentMessageId); msgIdThreadIdIndex.add(messageId, thread); + subjectThreadIdIndex.add(normalizedSubject, thread); } else { - thread = QUuid::createUuid().toByteArray(); - if (!parentMessageId.isEmpty()) { - //Register parent with thread for when it becomes available - msgIdThreadIdIndex.add(parentMessageId, thread); + //Try to lookup the thread by subject: + thread = subjectThreadIdIndex.lookup(normalizedSubject); + if (!thread.isEmpty()) { + msgIdThreadIdIndex.add(messageId, thread); + } else { + thread = QUuid::createUuid().toByteArray(); + subjectThreadIdIndex.add(normalizedSubject, thread); + if (!parentMessageId.isEmpty()) { + //Register parent with thread for when it becomes available + msgIdThreadIdIndex.add(parentMessageId, thread); + } } } Q_ASSERT(!thread.isEmpty()); msgIdThreadIdIndex.add(messageId, thread); - - //Look for parentMessageId and resolve to local id if available } void TypeImplementation::index(const QByteArray &identifier, const BufferAdaptor &bufferAdaptor, Sink::Storage::Transaction &transaction) @@ -173,6 +237,7 @@ protected: if (rootCollection->contains(thread)) { auto date = rootCollection->value(thread); //The mail we have in our result already is newer, so we can ignore this one + //This is always true during the initial query if the set has been sorted by date. if (date > getProperty(entity.entity(), ApplicationDomain::Mail::Date::name).toDateTime()) { return false; } diff --git a/tests/mailthreadtest.cpp b/tests/mailthreadtest.cpp index 1bbe713..a3df56b 100644 --- a/tests/mailthreadtest.cpp +++ b/tests/mailthreadtest.cpp @@ -78,17 +78,100 @@ void MailThreadTest::testListThreadLeader() auto job = Store::fetchAll(query).syncThen>([](const QList &mails) { QCOMPARE(mails.size(), 1); QVERIFY(mails.first()->getSubject().startsWith(QString("ThreadLeader"))); - const auto data = mails.first()->getMimeMessage(); - QVERIFY(!data.isEmpty()); - - KMime::Message m; - m.setContent(data); - m.parse(); - QCOMPARE(mails.first()->getSubject(), m.subject(true)->asUnicodeString()); - QVERIFY(!mails.first()->getFolder().isEmpty()); - QVERIFY(mails.first()->getDate().isValid()); }); VERIFYEXEC(job); } +/* + * Thread: + * 1. + * 2. + * 3. + * + * 3. first, should result in a new thread. + * 1. second, should be merged by subject + * 2. last, should complete the thread. + */ +void MailThreadTest::testIndexInMixedOrder() +{ + auto folder = Folder::create(mResourceInstanceIdentifier); + folder.setName("folder"); + VERIFYEXEC(Store::create(folder)); + + auto message1 = KMime::Message::Ptr::create(); + message1->subject(true)->fromUnicodeString("1", "utf8"); + message1->messageID(true)->generate("foobar.com"); + message1->date(true)->setDateTime(QDateTime::currentDateTimeUtc()); + message1->assemble(); + + auto message2 = KMime::Message::Ptr::create(); + message2->subject(true)->fromUnicodeString("Re: 1", "utf8"); + message2->messageID(true)->generate("foobar.com"); + message2->inReplyTo(true)->appendIdentifier(message1->messageID(true)->identifier()); + message2->date(true)->setDateTime(QDateTime::currentDateTimeUtc().addSecs(1)); + message2->assemble(); + + auto message3 = KMime::Message::Ptr::create(); + message3->subject(true)->fromUnicodeString("Re: Re: 1", "utf8"); + message3->messageID(true)->generate("foobar.com"); + message3->inReplyTo(true)->appendIdentifier(message2->messageID(true)->identifier()); + message3->date(true)->setDateTime(QDateTime::currentDateTimeUtc().addSecs(2)); + message3->assemble(); + + { + auto mail = Mail::create(mResourceInstanceIdentifier); + mail.setMimeMessage(message3->encodedContent()); + mail.setFolder(folder); + VERIFYEXEC(Store::create(mail)); + } + VERIFYEXEC(ResourceControl::flushMessageQueue(QByteArrayList() << mResourceInstanceIdentifier)); + + Sink::Query query; + query.resources << mResourceInstanceIdentifier; + query.request().request().request().request(); + query.threadLeaderOnly = true; + query.sort(); + query.filter(folder); + + { + auto job = Store::fetchAll(query) + .syncThen>([=](const QList &mails) { + QCOMPARE(mails.size(), 1); + auto mail = *mails.first(); + QCOMPARE(mail.getSubject(), QString::fromLatin1("Re: Re: 1")); + }); + VERIFYEXEC(job); + } + + { + auto mail = Mail::create(mResourceInstanceIdentifier); + mail.setMimeMessage(message1->encodedContent()); + mail.setFolder(folder); + VERIFYEXEC(Store::create(mail)); + } + + VERIFYEXEC(ResourceControl::flushMessageQueue(QByteArrayList() << mResourceInstanceIdentifier)); + { + auto job = Store::fetchAll(query) + .syncThen>([=](const QList &mails) { + QCOMPARE(mails.size(), 1); + auto mail = *mails.first(); + QCOMPARE(mail.getSubject(), QString::fromLatin1("Re: Re: 1")); + }); + VERIFYEXEC(job); + //TODO ensure we also find message 1 as part of thread. + } + + /* VERIFYEXEC(Store::remove(mail)); */ + /* VERIFYEXEC(ResourceControl::flushMessageQueue(QByteArrayList() << mResourceInstanceIdentifier)); */ + /* { */ + /* auto job = Store::fetchAll(Query::RequestedProperties(QByteArrayList() << Mail::Folder::name << Mail::Subject::name)) */ + /* .syncThen>([=](const QList &mails) { */ + /* QCOMPARE(mails.size(), 0); */ + /* }); */ + /* VERIFYEXEC(job); */ + /* } */ + /* VERIFYEXEC(ResourceControl::flushReplayQueue(QByteArrayList() << mResourceInstanceIdentifier)); */ +} + #include "mailthreadtest.moc" diff --git a/tests/mailthreadtest.h b/tests/mailthreadtest.h index d6b9c24..8730ec6 100644 --- a/tests/mailthreadtest.h +++ b/tests/mailthreadtest.h @@ -51,6 +51,7 @@ private slots: void cleanup(); void testListThreadLeader(); + void testIndexInMixedOrder(); }; } -- cgit v1.2.3