From e452707fdfbd61be1e5633b516b653b7337e7865 Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Mon, 29 May 2017 16:17:04 +0200 Subject: Reduced the messagetreeparser to aproximately what we actually require While in a much more managable state it's still not pretty. However, further refactoring can now gradually happen as we need to do further work on it. Things that should happen eventually: * Simplify the logic that creates the messageparts (we don't need the whole formatter plugin complexity) * Get rid of the nodehelper (let the parts hold the necessary data) * Get rid of partmetadata (let the part handleit) --- .../mime/mimetreeparser/objecttreeparser.cpp | 549 +++++++++++++++++++++ 1 file changed, 549 insertions(+) create mode 100644 framework/src/domain/mime/mimetreeparser/objecttreeparser.cpp (limited to 'framework/src/domain/mime/mimetreeparser/objecttreeparser.cpp') diff --git a/framework/src/domain/mime/mimetreeparser/objecttreeparser.cpp b/framework/src/domain/mime/mimetreeparser/objecttreeparser.cpp new file mode 100644 index 00000000..914298b9 --- /dev/null +++ b/framework/src/domain/mime/mimetreeparser/objecttreeparser.cpp @@ -0,0 +1,549 @@ +/* + objecttreeparser.cpp + + This file is part of KMail, the KDE mail client. + Copyright (c) 2003 Marc Mutz + Copyright (C) 2002-2004 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.net + Copyright (c) 2009 Andras Mantia + Copyright (c) 2015 Sandro Knauß + + KMail is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KMail is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +// MessageViewer includes + +#include "objecttreeparser.h" + +#include "bodypartformatterbasefactory.h" +#include "nodehelper.h" +#include "messagepart.h" +#include "partnodebodypart.h" + +#include "mimetreeparser_debug.h" + +#include "utils.h" +#include "bodypartformatter.h" +#include "util.h" + +#include +#include + +// KDE includes + +// Qt includes +#include +#include +#include +#include + +using namespace MimeTreeParser; + + +ObjectTreeParser::ObjectTreeParser() + : mNodeHelper(nullptr), + mTopLevelContent(nullptr), + mShowOnlyOneMimePart(false), + mHasPendingAsyncJobs(false), + mAllowAsync(false) +{ + init(); +} + +ObjectTreeParser::ObjectTreeParser(MimeTreeParser::NodeHelper *nodeHelper, + bool showOnlyOneMimePart) + : mNodeHelper(nodeHelper), + mTopLevelContent(nullptr), + mShowOnlyOneMimePart(showOnlyOneMimePart), + mHasPendingAsyncJobs(false), + mAllowAsync(false) +{ + init(); +} + +void ObjectTreeParser::init() +{ + if (!mNodeHelper) { + mNodeHelper = new NodeHelper(); + mDeleteNodeHelper = true; + } else { + mDeleteNodeHelper = false; + } +} + +ObjectTreeParser::~ObjectTreeParser() +{ + if (mDeleteNodeHelper) { + delete mNodeHelper; + mNodeHelper = nullptr; + } +} + +void ObjectTreeParser::setAllowAsync(bool allow) +{ + Q_ASSERT(!mHasPendingAsyncJobs); + mAllowAsync = allow; +} + +bool ObjectTreeParser::allowAsync() const +{ + return mAllowAsync; +} + +bool ObjectTreeParser::hasPendingAsyncJobs() const +{ + return mHasPendingAsyncJobs; +} + +QString ObjectTreeParser::plainTextContent() const +{ + return mPlainTextContent; +} + +QString ObjectTreeParser::htmlContent() const +{ + return mHtmlContent; +} + +static void print(KMime::Content *node, const QString prefix = {}) +{ + QByteArray mediaType("text"); + QByteArray subType("plain"); + if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && + !node->contentType()->subType().isEmpty()) { + mediaType = node->contentType()->mediaType(); + subType = node->contentType()->subType(); + } + qWarning() << prefix << "!" << mediaType << subType; + for (const auto c: node->contents()) { + print(c, prefix + QLatin1String(" ")); + } +} + +static void print(const MessagePart &messagePart, const QByteArray pre = {}) +{ + qWarning() << pre << "#" << messagePart.metaObject()->className(); + for (const auto &p: messagePart.subParts()) { + print(*p, pre + " "); + } +} + +void ObjectTreeParser::print() +{ + if (mTopLevelContent) { + ::print(mTopLevelContent); + } + if (mParsedPart) { + ::print(*mParsedPart); + } +} + +static KMime::Content *find(KMime::Content *node, const std::function &select) +{ + QByteArray mediaType("text"); + QByteArray subType("plain"); + if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && + !node->contentType()->subType().isEmpty()) { + mediaType = node->contentType()->mediaType(); + subType = node->contentType()->subType(); + } + if (select(node)) { + return node; + } + for (const auto c: node->contents()) { + if (const auto n = find(c, select)) { + return n; + } + } + return nullptr; +} + + +KMime::Content *ObjectTreeParser::find(const std::function &select) +{ + return ::find(mTopLevelContent, select); +} + +/* + * Collect message parts bottom up. + * Filter to avoid evaluating a subtree. + * Select parts to include it in the result set. Selecting a part in a branch will keep any parent parts from being selected. + */ +static QVector collect(MessagePart::Ptr start, const std::function &filter, const std::function &select) +{ + MessagePartPtr ptr = start.dynamicCast(); + Q_ASSERT(ptr); + if (!filter(ptr)) { + return {}; + } + + QVector list; + if (ptr) { + for (const auto &p: ptr->subParts()) { + list << ::collect(p, filter, select); + } + } + //Don't consider this part if we already selected a subpart + if (list.isEmpty()) { + if (select(ptr)) { + list << start; + } + } + return list; +} + +static bool isAttachment(MessagePart::Ptr part) +{ + //TODO + // show everything but the first text/plain body as attachment + if (part->disposition() == MessagePart::Inline) { + return false; + } + if (part->disposition() == MessagePart::Attachment) { + return true; + } + // text/* w/o filename parameter should go inline + if (part->node()) { + const auto ct = part->node()->contentType(false); + if (ct && ct->isText() && ct->name().trimmed().isEmpty() && part->filename().trimmed().isEmpty()) { + return false; + } + return true; + } + return false; +} + +QVector ObjectTreeParser::collectContentParts() +{ + QVector contentParts = ::collect(mParsedPart, + [] (const MessagePartPtr &part) { + // return p->type() != "EncapsulatedPart"; + return true; + }, + [] (const MessagePartPtr &part) { + if (const auto attachment = dynamic_cast(part.data())) { + return false; + } else if (const auto text = dynamic_cast(part.data())) { + auto enc = dynamic_cast(text->parentPart()); + if (enc && enc->error()) { + return false; + } + return true; + } else if (const auto alternative = dynamic_cast(part.data())) { + return true; + } else if (const auto html = dynamic_cast(part.data())) { + return true; + } else if (const auto enc = dynamic_cast(part.data())) { + if (enc->error()) { + return true; + } + //If we have a textpart with encrypted and unencrypted subparts we want to return the textpart + if (dynamic_cast(enc->parentPart())) { + return false; + } + } else if (const auto sig = dynamic_cast(part.data())) { + //Signatures without subparts already contain the text + return !sig->hasSubParts(); + } + return false; + }); + return contentParts; +} + +QVector ObjectTreeParser::collectAttachmentParts() +{ + QVector contentParts = ::collect(mParsedPart, + [] (const MessagePartPtr &part) { + return true; + }, + [] (const MessagePartPtr &part) { + if (const auto attachment = dynamic_cast(part.data())) { + return true; + } + return false; + }); + return contentParts; +} + +void ObjectTreeParser::decryptParts() +{ + ::collect(mParsedPart, + [] (const MessagePartPtr &part) { return true; }, + [] (const MessagePartPtr &part) { + if (const auto enc = dynamic_cast(part.data())) { + enc->startDecryption(); + } + return false; + }); + print(); + ::collect(mParsedPart, + [] (const MessagePartPtr &part) { return true; }, + [] (const MessagePartPtr &part) { + if (const auto enc = dynamic_cast(part.data())) { + enc->startVerification(); + } + return false; + }); +} + +void ObjectTreeParser::importCertificates() +{ + QVector contentParts = ::collect(mParsedPart, + [] (const MessagePartPtr &part) { return true; }, + [] (const MessagePartPtr &part) { + if (const auto cert = dynamic_cast(part.data())) { + cert->import(); + } + return false; + }); +} + + +QString ObjectTreeParser::resolveCidLinks(const QString &html) +{ + auto text = html; + const auto rx = QRegExp(QLatin1String("(src)\\s*=\\s*(\"|')(cid:[^\"']+)\\2")); + int pos = 0; + while ((pos = rx.indexIn(text, pos)) != -1) { + const auto link = QUrl(rx.cap(3)); + pos += rx.matchedLength(); + auto cid = link.path(); + auto mailMime = const_cast(find([=] (KMime::Content *c) { + if (!c || !c->contentID(false)) { + return false; + } + return QString::fromLatin1(c->contentID(false)->identifier()) == cid; + })); + if (mailMime) { + const auto ct = mailMime->contentType(false); + if (!ct) { + qWarning() << "No content type, skipping"; + continue; + } + QMimeDatabase mimeDb; + const auto mimetype = mimeDb.mimeTypeForName(QString::fromLatin1(ct->mimeType())).name(); + if (mimetype.startsWith(QLatin1String("image/"))) { + //We reencode to base64 below. + const auto data = mailMime->decodedContent(); + if (data.isEmpty()) { + qWarning() << "Attachment is empty."; + continue; + } + text.replace(rx.cap(0), QString::fromLatin1("src=\"data:%1;base64,%2\"").arg(mimetype, QString::fromLatin1(data.toBase64()))); + } + } else { + qWarning() << "Failed to find referenced attachment: " << cid; + } + } + return text; +} + +//----------------------------------------------------------------------------- + +void ObjectTreeParser::parseObjectTree(const QByteArray &mimeMessage) +{ + const auto mailData = KMime::CRLFtoLF(mimeMessage); + mMsg = KMime::Message::Ptr(new KMime::Message); + mMsg->setContent(mailData); + mMsg->parse(); + parseObjectTree(mMsg.data()); +} + +void ObjectTreeParser::parseObjectTree(KMime::Content *node) +{ + mTopLevelContent = node; + mParsedPart = parseObjectTreeInternal(node, showOnlyOneMimePart()); + + //Gather plaintext and html content + if (mParsedPart) { + //Find relevant plaintext parts and set plaintext + if (auto mp = toplevelTextNode(mParsedPart)) { + if (auto _mp = mp.dynamicCast()) { + mPlainTextContent += _mp->mNode->decodedText(); + mPlainTextContentCharset += NodeHelper::charset(_mp->mNode); + } else if (auto _mp = mp.dynamicCast()) { + if (_mp->mChildNodes.contains(Util::MultipartPlain)) { + mPlainTextContent += _mp->mChildNodes[Util::MultipartPlain]->decodedText(); + mPlainTextContentCharset += NodeHelper::charset(_mp->mChildNodes[Util::MultipartPlain]); + } + } + } + + //Find html parts and copy content + QVector contentParts = ::collect(mParsedPart, + [] (const MessagePartPtr &part) { + return true; + }, + [] (const MessagePartPtr &part) { + if (const auto html = dynamic_cast(part.data())) { + return true; + } + return false; + }); + for (const auto &part : contentParts) { + mHtmlContent += part->text(); + mHtmlContentCharset = part->charset(); + } + } +} + +MessagePartPtr ObjectTreeParser::parsedPart() const +{ + return mParsedPart; +} + +MessagePartPtr ObjectTreeParser::processType(KMime::Content *node, const QByteArray &mediaType, const QByteArray &subType, bool onlyOneMimePart) +{ + static MimeTreeParser::BodyPartFormatterBaseFactory factory; + const auto sub = factory.subtypeRegistry(mediaType.constData()); + auto range = sub.equal_range(subType.constData()); + for (auto it = range.first; it != range.second; ++it) { + const auto formatter = (*it).second; + if (!formatter) { + continue; + } + PartNodeBodyPart part(this, mTopLevelContent, node, mNodeHelper); + if (const MessagePart::Ptr result = formatter->process(part)) { + return result; + } + } + return {}; +} + +MessagePart::Ptr ObjectTreeParser::parseObjectTreeInternal(KMime::Content *node, bool onlyOneMimePart) +{ + if (!node) { + return MessagePart::Ptr(); + } + + // reset pending async jobs state (we'll rediscover pending jobs as we go) + mHasPendingAsyncJobs = false; + + // reset "processed" flags for... + if (onlyOneMimePart) { + // ... this node and all descendants + mNodeHelper->setNodeUnprocessed(node, false); + if (!node->contents().isEmpty()) { + mNodeHelper->setNodeUnprocessed(node, true); + } + } else if (!node->parent()) { + // ...this node and all it's siblings and descendants + mNodeHelper->setNodeUnprocessed(node, true); + } + + const bool isRoot = node->isTopLevel(); + auto parsedPart = MessagePart::Ptr(new MessagePartList(this)); + parsedPart->setIsRoot(isRoot); + KMime::Content *parent = node->parent(); + auto contents = parent ? parent->contents() : KMime::Content::List(); + if (contents.isEmpty()) { + contents.append(node); + } + int i = contents.indexOf(const_cast(node)); + for (; i < contents.size(); ++i) { + node = contents.at(i); + if (mNodeHelper->nodeProcessed(node)) { + continue; + } + + QByteArray mediaType("text"); + QByteArray subType("plain"); + if (node->contentType(false) && !node->contentType()->mediaType().isEmpty() && + !node->contentType()->subType().isEmpty()) { + mediaType = node->contentType()->mediaType(); + subType = node->contentType()->subType(); + } + + //Try the specific type handler + if (auto mp = processType(node, mediaType, subType, onlyOneMimePart)) { + if (mp) { + parsedPart->appendSubPart(mp); + } + //Fallback to the generic handler + } else if (auto mp = processType(node, mediaType, "*", onlyOneMimePart)) { + if (mp) { + parsedPart->appendSubPart(mp); + } + //Fallback to the default handler + } else { + if (auto mp = defaultHandling(node, onlyOneMimePart)) { + parsedPart->appendSubPart(mp); + } + } + mNodeHelper->setNodeProcessed(node, false); + + if (onlyOneMimePart) { + break; + } + } + + return parsedPart; +} + +MessagePart::Ptr ObjectTreeParser::defaultHandling(KMime::Content *node, bool onlyOneMimePart) +{ + if (node->contentType()->mimeType() == QByteArrayLiteral("application/octet-stream") && + (node->contentType()->name().endsWith(QLatin1String("p7m")) || + node->contentType()->name().endsWith(QLatin1String("p7s")) || + node->contentType()->name().endsWith(QLatin1String("p7c")) + )) { + if (auto mp = processType(node, "application", "pkcs7-mime", onlyOneMimePart)) { + return mp; + } + } + + const auto mp = AttachmentMessagePart::Ptr(new AttachmentMessagePart(this, node)); + return mp; +} + +const QTextCodec *ObjectTreeParser::codecFor(KMime::Content *node) const +{ + Q_ASSERT(node); + return mNodeHelper->codec(node); +} + +QByteArray ObjectTreeParser::plainTextContentCharset() const +{ + return mPlainTextContentCharset; +} + +QByteArray ObjectTreeParser::htmlContentCharset() const +{ + return mHtmlContentCharset; +} + +bool ObjectTreeParser::showOnlyOneMimePart() const +{ + return mShowOnlyOneMimePart; +} + +void ObjectTreeParser::setShowOnlyOneMimePart(bool show) +{ + mShowOnlyOneMimePart = show; +} + +MimeTreeParser::NodeHelper *ObjectTreeParser::nodeHelper() const +{ + return mNodeHelper; +} -- cgit v1.2.3