From 6051c1247cde61bcc8e483eb4166e5a297c0ecc6 Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Thu, 13 Oct 2016 18:38:35 +0200 Subject: Xapian based fulltext indexing This cuts into the sync performance by about 40%, but gives us fast fulltext searching for all local content. --- common/mailpreprocessor.cpp | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'common/mailpreprocessor.cpp') diff --git a/common/mailpreprocessor.cpp b/common/mailpreprocessor.cpp index 8f5a77d..58cb15b 100644 --- a/common/mailpreprocessor.cpp +++ b/common/mailpreprocessor.cpp @@ -21,9 +21,11 @@ #include #include +#include #include #include "pipeline.h" +#include "fulltextindex.h" #include "definitions.h" #include "applicationdomaintype.h" @@ -45,13 +47,34 @@ static QList getContactList(const KMime: return list; } +static QList> processPart(KMime::Content* content) +{ + if (KMime::Headers::ContentType* type = content->contentType(false)) { + if (type->isMultipart() && !type->isSubtype("encrypted")) { + QList> list; + for (const auto c : content->contents()) { + list << processPart(c); + } + return list; + } else if (type->isHTMLText()) { + // Only get HTML content, if no plain text content + QTextDocument doc; + doc.setHtml(content->decodedText()); + return {{{}, {doc.toPlainText()}}}; + } else if (type->isEmpty()) { + return {{{}, {content->decodedText()}}}; + } + } + return {}; +} + void MailPropertyExtractor::updatedIndexedProperties(Sink::ApplicationDomain::Mail &mail, const QByteArray &data) { if (data.isEmpty()) { return; } auto msg = KMime::Message::Ptr(new KMime::Message); - msg->setHead(KMime::CRLFtoLF(data)); + msg->setContent(KMime::CRLFtoLF(data)); msg->parse(); if (!msg) { return; @@ -103,6 +126,20 @@ void MailPropertyExtractor::updatedIndexedProperties(Sink::ApplicationDomain::Ma if (!parentMessageId.isEmpty()) { mail.setExtractedParentMessageId(parentMessageId); } + QList> contentToIndex; + contentToIndex.append({{}, msg->subject()->asUnicodeString()}); + if (KMime::Content* mainBody = msg->mainBodyPart("text/plain")) { + contentToIndex.append({{}, mainBody->decodedText()}); + } else { + contentToIndex << processPart(msg.data()); + } + contentToIndex.append({{}, msg->from(true)->asUnicodeString()}); + contentToIndex.append({{}, msg->to(true)->asUnicodeString()}); + contentToIndex.append({{}, msg->cc(true)->asUnicodeString()}); + contentToIndex.append({{}, msg->bcc(true)->asUnicodeString()}); + + //Prepare content for indexing; + mail.setProperty("index", QVariant::fromValue(contentToIndex)); } void MailPropertyExtractor::newEntity(Sink::ApplicationDomain::Mail &mail) @@ -114,4 +151,3 @@ void MailPropertyExtractor::modifiedEntity(const Sink::ApplicationDomain::Mail & { updatedIndexedProperties(newMail, newMail.getMimeMessage()); } - -- cgit v1.2.3