summaryrefslogtreecommitdiffstats
path: root/common/mailpreprocessor.cpp
diff options
context:
space:
mode:
authorChristian Mollekopf <chrigi_1@fastmail.fm>2016-09-20 17:18:21 +0200
committerChristian Mollekopf <chrigi_1@fastmail.fm>2016-09-20 17:18:21 +0200
commitebc5c48c03b6145e604da7c313b35321d0a71142 (patch)
tree1cee00a9fa4faa4995c0a50f01703ac5672c8797 /common/mailpreprocessor.cpp
parent4a14a6fade947aa830d3f21598a4a6ba7316b933 (diff)
downloadsink-ebc5c48c03b6145e604da7c313b35321d0a71142.tar.gz
sink-ebc5c48c03b6145e604da7c313b35321d0a71142.zip
A first draft of the threading algorithm.
Diffstat (limited to 'common/mailpreprocessor.cpp')
-rw-r--r--common/mailpreprocessor.cpp102
1 files changed, 76 insertions, 26 deletions
diff --git a/common/mailpreprocessor.cpp b/common/mailpreprocessor.cpp
index 2863ad4..0534338 100644
--- a/common/mailpreprocessor.cpp
+++ b/common/mailpreprocessor.cpp
@@ -36,48 +36,98 @@ QString MailPropertyExtractor::getFilePathFromMimeMessagePath(const QString &s)
36 return s; 36 return s;
37} 37}
38 38
39void MailPropertyExtractor::updatedIndexedProperties(Sink::ApplicationDomain::Mail &mail) 39struct MimeMessageReader {
40{ 40 MimeMessageReader(const QString &mimeMessagePath)
41 const auto mimeMessagePath = getFilePathFromMimeMessagePath(mail.getMimeMessagePath()); 41 : f(mimeMessagePath),
42 if (mimeMessagePath.isNull()) { 42 mapped(0),
43 SinkTrace() << "No mime message"; 43 mappedSize(0)
44 return; 44 {
45 } 45 if (mimeMessagePath.isNull()) {
46 SinkTrace() << "Updating indexed properties " << mimeMessagePath; 46 SinkTrace() << "No mime message";
47 QFile f(mimeMessagePath); 47 return;
48 if (!f.open(QIODevice::ReadOnly)) { 48 }
49 SinkWarning() << "Failed to open the file: " << mimeMessagePath; 49 SinkTrace() << "Updating indexed properties " << mimeMessagePath;
50 return; 50 if (!f.open(QIODevice::ReadOnly)) {
51 } 51 SinkWarning() << "Failed to open the file: " << mimeMessagePath;
52 if (!f.size()) { 52 return;
53 SinkWarning() << "The file is empty."; 53 }
54 return; 54 if (!f.size()) {
55 SinkWarning() << "The file is empty.";
56 return;
57 }
58 mappedSize = qMin((qint64)8000, f.size());
59 mapped = f.map(0, mappedSize);
60 if (!mapped) {
61 SinkWarning() << "Failed to map the file: " << f.errorString();
62 return;
63 }
55 } 64 }
56 const auto mappedSize = qMin((qint64)8000, f.size()); 65
57 auto mapped = f.map(0, mappedSize); 66 KMime::Message::Ptr mimeMessage()
58 if (!mapped) { 67 {
59 SinkWarning() << "Failed to map the file: " << f.errorString(); 68 if (!mapped) {
60 return; 69 return KMime::Message::Ptr();
70 }
71 Q_ASSERT(mapped);
72 Q_ASSERT(mappedSize);
73 auto msg = KMime::Message::Ptr(new KMime::Message);
74 msg->setHead(KMime::CRLFtoLF(QByteArray::fromRawData(reinterpret_cast<const char*>(mapped), mappedSize)));
75 msg->parse();
76 return msg;
61 } 77 }
62 78
63 KMime::Message *msg = new KMime::Message; 79 QFile f;
64 msg->setHead(KMime::CRLFtoLF(QByteArray::fromRawData(reinterpret_cast<const char*>(mapped), mappedSize))); 80 uchar *mapped;
65 msg->parse(); 81 qint64 mappedSize;
82};
66 83
84static void updatedIndexedProperties(Sink::ApplicationDomain::Mail &mail, KMime::Message::Ptr msg)
85{
67 mail.setExtractedSubject(msg->subject(true)->asUnicodeString()); 86 mail.setExtractedSubject(msg->subject(true)->asUnicodeString());
68 mail.setExtractedSender(msg->from(true)->asUnicodeString()); 87 mail.setExtractedSender(msg->from(true)->asUnicodeString());
69 mail.setExtractedSenderName(msg->from(true)->asUnicodeString()); 88 mail.setExtractedSenderName(msg->from(true)->asUnicodeString());
70 mail.setExtractedDate(msg->date(true)->dateTime()); 89 mail.setExtractedDate(msg->date(true)->dateTime());
90
91 //The rest should never change, unless we didn't have the headers available initially.
92 auto messageId = msg->messageID(true)->identifier();
93
94 //Ensure the mssageId is unique.
95 //If there already is one with the same id we'd have to assign a new message id, which probably doesn't make any sense.
96
97 //The last is the parent
98 auto references = msg->references(true)->identifiers();
99
100 //The first is the parent
101 auto inReplyTo = msg->inReplyTo(true)->identifiers();
102 QByteArray parentMessageId;
103 if (!references.isEmpty()) {
104 parentMessageId = references.last();
105 //TODO we could use the rest of the references header to complete the ancestry in case we have missing parents.
106 } else {
107 if (!inReplyTo.isEmpty()) {
108 //According to RFC5256 we should ignore all but the first
109 parentMessageId = inReplyTo.first();
110 }
111 }
112
113 mail.setExtractedMessageId(messageId);
114 if (!parentMessageId.isEmpty()) {
115 mail.setExtractedParentMessageId(parentMessageId);
116 }
71} 117}
72 118
73void MailPropertyExtractor::newEntity(Sink::ApplicationDomain::Mail &mail, Sink::Storage::Transaction &transaction) 119void MailPropertyExtractor::newEntity(Sink::ApplicationDomain::Mail &mail, Sink::Storage::Transaction &transaction)
74{ 120{
75 updatedIndexedProperties(mail); 121 MimeMessageReader mimeMessageReader(getFilePathFromMimeMessagePath(mail.getMimeMessagePath()));
122 auto msg = mimeMessageReader.mimeMessage();
123 updatedIndexedProperties(mail, msg);
76} 124}
77 125
78void MailPropertyExtractor::modifiedEntity(const Sink::ApplicationDomain::Mail &oldMail, Sink::ApplicationDomain::Mail &newMail,Sink::Storage::Transaction &transaction) 126void MailPropertyExtractor::modifiedEntity(const Sink::ApplicationDomain::Mail &oldMail, Sink::ApplicationDomain::Mail &newMail,Sink::Storage::Transaction &transaction)
79{ 127{
80 updatedIndexedProperties(newMail); 128 MimeMessageReader mimeMessageReader(getFilePathFromMimeMessagePath(newMail.getMimeMessagePath()));
129 auto msg = mimeMessageReader.mimeMessage();
130 updatedIndexedProperties(newMail, msg);
81} 131}
82 132
83 133