From 5d529c9252c386abaa76ec1a981ff99f50ce3dc0 Mon Sep 17 00:00:00 2001 From: Christian Mollekopf Date: Tue, 23 May 2017 10:51:59 +0200 Subject: Linkify plain text mails --- framework/src/CMakeLists.txt | 1 + framework/src/domain/htmlutils.cpp | 286 +++++++++++++++++++++++++++++ framework/src/domain/htmlutils.h | 25 +++ framework/src/domain/messageparser_new.cpp | 3 +- 4 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 framework/src/domain/htmlutils.cpp create mode 100644 framework/src/domain/htmlutils.h (limited to 'framework/src') diff --git a/framework/src/CMakeLists.txt b/framework/src/CMakeLists.txt index 10773f8a..54dcd4dc 100644 --- a/framework/src/CMakeLists.txt +++ b/framework/src/CMakeLists.txt @@ -36,6 +36,7 @@ set(SRCS domain/contactcontroller.cpp domain/controller.cpp domain/peoplemodel.cpp + domain/htmlutils.cpp accounts/accountfactory.cpp accounts/accountsmodel.cpp fabric.cpp diff --git a/framework/src/domain/htmlutils.cpp b/framework/src/domain/htmlutils.cpp new file mode 100644 index 00000000..156bcc48 --- /dev/null +++ b/framework/src/domain/htmlutils.cpp @@ -0,0 +1,286 @@ +/* + Copyright (c) 2017 Christian Mollekopf + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ +#include "htmlutils.h" + +#include + +static QString resolveEntities(const QString &in) +{ + QString out; + + for(int i = 0; i < (int)in.length(); ++i) { + if(in[i] == '&') { + // find a semicolon + ++i; + int n = in.indexOf(';', i); + if(n == -1) + break; + QString type = in.mid(i, (n-i)); + i = n; // should be n+1, but we'll let the loop increment do it + + if(type == "amp") + out += '&'; + else if(type == "lt") + out += '<'; + else if(type == "gt") + out += '>'; + else if(type == "quot") + out += '\"'; + else if(type == "apos") + out += '\''; + else if(type == "nbsp") + out += 0xa0; + } else { + out += in[i]; + } + } + + return out; +} + + +static bool linkify_pmatch(const QString &str1, int at, const QString &str2) +{ + if(str2.length() > (str1.length()-at)) + return false; + + for(int n = 0; n < (int)str2.length(); ++n) { + if(str1.at(n+at).toLower() != str2.at(n).toLower()) + return false; + } + + return true; +} + +static bool linkify_isOneOf(const QChar &c, const QString &charlist) +{ + for(int i = 0; i < (int)charlist.length(); ++i) { + if(c == charlist.at(i)) + return true; + } + + return false; +} + +// encodes a few dangerous html characters +static QString linkify_htmlsafe(const QString &in) +{ + QString out; + + for(int n = 0; n < in.length(); ++n) { + if(linkify_isOneOf(in.at(n), "\"\'`<>")) { + // hex encode + QString hex; + hex.sprintf("%%%02X", in.at(n).toLatin1()); + out.append(hex); + } else { + out.append(in.at(n)); + } + } + + return out; +} + +static bool linkify_okUrl(const QString &url) +{ + if(url.at(url.length()-1) == '.') + return false; + + return true; +} + +static bool linkify_okEmail(const QString &addy) +{ + // this makes sure that there is an '@' and a '.' after it, and that there is + // at least one char for each of the three sections + int n = addy.indexOf('@'); + if(n == -1 || n == 0) + return false; + int d = addy.indexOf('.', n+1); + if(d == -1 || d == 0) + return false; + if((addy.length()-1) - d <= 0) + return false; + if(addy.indexOf("..") != -1) + return false; + + return true; +} + +/** + * takes a richtext string and heuristically adds links for uris of common protocols + * @return a richtext string with link markup added + */ +QString HtmlUtils::linkify(const QString &in) +{ + QString out = in; + int x1, x2; + bool isUrl, isAtStyle; + QString linked, link, href; + + for(int n = 0; n < (int)out.length(); ++n) { + isUrl = false; + isAtStyle = false; + x1 = n; + + if(linkify_pmatch(out, n, "xmpp:")) { + n += 5; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "mailto:")) { + n += 7; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "http://")) { + n += 7; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "https://")) { + n += 8; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "ftp://")) { + n += 6; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "news://")) { + n += 7; + isUrl = true; + href = ""; + } + else if (linkify_pmatch(out, n, "ed2k://")) { + n += 7; + isUrl = true; + href = ""; + } + else if (linkify_pmatch(out, n, "magnet:")) { + n += 7; + isUrl = true; + href = ""; + } + else if(linkify_pmatch(out, n, "www.")) { + isUrl = true; + href = "http://"; + } + else if(linkify_pmatch(out, n, "ftp.")) { + isUrl = true; + href = "ftp://"; + } + else if(linkify_pmatch(out, n, "@")) { + isAtStyle = true; + href = "x-psi-atstyle:"; + } + + if(isUrl) { + // make sure the previous char is not alphanumeric + if(x1 > 0 && out.at(x1-1).isLetterOrNumber()) + continue; + + // find whitespace (or end) + QMap brackets; + brackets['('] = brackets[')'] = brackets['['] = brackets[']'] = brackets['{'] = brackets['}'] = 0; + QMap openingBracket; + openingBracket[')'] = '('; + openingBracket[']'] = '['; + openingBracket['}'] = '{'; + for(x2 = n; x2 < (int)out.length(); ++x2) { + if(out.at(x2).isSpace() || linkify_isOneOf(out.at(x2), "\"\'`<>") + || linkify_pmatch(out, x2, """) || linkify_pmatch(out, x2, "'") + || linkify_pmatch(out, x2, ">") || linkify_pmatch(out, x2, "<") ) { + break; + } + if(brackets.keys().contains(out.at(x2))) { + ++brackets[out.at(x2)]; + } + } + int len = x2-x1; + QString pre = resolveEntities(out.mid(x1, x2-x1)); + + // go backward hacking off unwanted punctuation + int cutoff; + for(cutoff = pre.length()-1; cutoff >= 0; --cutoff) { + if(!linkify_isOneOf(pre.at(cutoff), "!?,.()[]{}<>\"")) + break; + if(linkify_isOneOf(pre.at(cutoff), ")]}") + && brackets[pre.at(cutoff)] - brackets[openingBracket[pre.at(cutoff)]] <= 0 ) { + break; // in theory, there could be == above, but these are urls, not math ;) + } + if(brackets.keys().contains(pre.at(cutoff))) { + --brackets[pre.at(cutoff)]; + } + + } + ++cutoff; + //++x2; + + link = pre.mid(0, cutoff); + if(!linkify_okUrl(link)) { + n = x1 + link.length(); + continue; + } + href += link; + // attributes need to be encoded too. + href = href.toHtmlEscaped(); + href = linkify_htmlsafe(href); + //printf("link: [%s], href=[%s]\n", link.latin1(), href.latin1()); + linked = QString("").arg(href) + link.toHtmlEscaped() + "" + pre.mid(cutoff).toHtmlEscaped(); + out.replace(x1, len, linked); + n = x1 + linked.length() - 1; + } else if(isAtStyle) { + // go backward till we find the beginning + if(x1 == 0) + continue; + --x1; + for(; x1 >= 0; --x1) { + if(!linkify_isOneOf(out.at(x1), "_.-+") && !out.at(x1).isLetterOrNumber()) + break; + } + ++x1; + + // go forward till we find the end + x2 = n + 1; + for(; x2 < (int)out.length(); ++x2) { + if(!linkify_isOneOf(out.at(x2), "_.-+") && !out.at(x2).isLetterOrNumber()) + break; + } + + int len = x2-x1; + link = out.mid(x1, len); + //link = resolveEntities(link); + + if(!linkify_okEmail(link)) { + n = x1 + link.length(); + continue; + } + + href += link; + //printf("link: [%s], href=[%s]\n", link.latin1(), href.latin1()); + linked = QString("").arg(href) + link + ""; + out.replace(x1, len, linked); + n = x1 + linked.length() - 1; + } + } + + return out; +} diff --git a/framework/src/domain/htmlutils.h b/framework/src/domain/htmlutils.h new file mode 100644 index 00000000..b59da1dc --- /dev/null +++ b/framework/src/domain/htmlutils.h @@ -0,0 +1,25 @@ +/* + Copyright (c) 2017 Christian Mollekopf + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ +#pragma once + +#include + +namespace HtmlUtils { + QString linkify(const QString &in); +} diff --git a/framework/src/domain/messageparser_new.cpp b/framework/src/domain/messageparser_new.cpp index c353becf..7e7dbfa6 100644 --- a/framework/src/domain/messageparser_new.cpp +++ b/framework/src/domain/messageparser_new.cpp @@ -19,6 +19,7 @@ #include "messageparser.h" #include "mimetreeparser/interface.h" +#include "htmlutils.h" #include #include @@ -470,7 +471,7 @@ QVariant NewModel::data(const QModelIndex &index, int role) const } } else { //We assume plain //We alwas do richtext (so we get highlighted links and stuff). - return Qt::convertFromPlainText(text); + return HtmlUtils::linkify(Qt::convertFromPlainText(text)); } return text; } -- cgit v1.2.3