summaryrefslogtreecommitdiffstats
path: root/framework/src/domain/mime/htmlutils.cpp
diff options
context:
space:
mode:
authorChristian Mollekopf <chrigi_1@fastmail.fm>2017-05-23 21:00:50 +0200
committerChristian Mollekopf <chrigi_1@fastmail.fm>2017-05-23 21:00:50 +0200
commit31bf3102fe8f8cdd3f1448f0f22f182d0c2820d2 (patch)
treeb5b508c3f065e0f51c8ce40aaf97d7070b5f9ef5 /framework/src/domain/mime/htmlutils.cpp
parent1948369d4da2d0bc23b6af93683982b0e65d4992 (diff)
downloadkube-31bf3102fe8f8cdd3f1448f0f22f182d0c2820d2.tar.gz
kube-31bf3102fe8f8cdd3f1448f0f22f182d0c2820d2.zip
Moved MIME related stuff to a mime subdir
Diffstat (limited to 'framework/src/domain/mime/htmlutils.cpp')
-rw-r--r--framework/src/domain/mime/htmlutils.cpp286
1 files changed, 286 insertions, 0 deletions
diff --git a/framework/src/domain/mime/htmlutils.cpp b/framework/src/domain/mime/htmlutils.cpp
new file mode 100644
index 00000000..156bcc48
--- /dev/null
+++ b/framework/src/domain/mime/htmlutils.cpp
@@ -0,0 +1,286 @@
1/*
2 Copyright (c) 2017 Christian Mollekopf <mollekopf@kolabsys.com>
3
4 This library is free software; you can redistribute it and/or modify it
5 under the terms of the GNU Library General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or (at your
7 option) any later version.
8
9 This library is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 License for more details.
13
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to the
16 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 02110-1301, USA.
18*/
19#include "htmlutils.h"
20
21#include <QTextDocument>
22
23static QString resolveEntities(const QString &in)
24{
25 QString out;
26
27 for(int i = 0; i < (int)in.length(); ++i) {
28 if(in[i] == '&') {
29 // find a semicolon
30 ++i;
31 int n = in.indexOf(';', i);
32 if(n == -1)
33 break;
34 QString type = in.mid(i, (n-i));
35 i = n; // should be n+1, but we'll let the loop increment do it
36
37 if(type == "amp")
38 out += '&';
39 else if(type == "lt")
40 out += '<';
41 else if(type == "gt")
42 out += '>';
43 else if(type == "quot")
44 out += '\"';
45 else if(type == "apos")
46 out += '\'';
47 else if(type == "nbsp")
48 out += 0xa0;
49 } else {
50 out += in[i];
51 }
52 }
53
54 return out;
55}
56
57
58static bool linkify_pmatch(const QString &str1, int at, const QString &str2)
59{
60 if(str2.length() > (str1.length()-at))
61 return false;
62
63 for(int n = 0; n < (int)str2.length(); ++n) {
64 if(str1.at(n+at).toLower() != str2.at(n).toLower())
65 return false;
66 }
67
68 return true;
69}
70
71static bool linkify_isOneOf(const QChar &c, const QString &charlist)
72{
73 for(int i = 0; i < (int)charlist.length(); ++i) {
74 if(c == charlist.at(i))
75 return true;
76 }
77
78 return false;
79}
80
81// encodes a few dangerous html characters
82static QString linkify_htmlsafe(const QString &in)
83{
84 QString out;
85
86 for(int n = 0; n < in.length(); ++n) {
87 if(linkify_isOneOf(in.at(n), "\"\'`<>")) {
88 // hex encode
89 QString hex;
90 hex.sprintf("%%%02X", in.at(n).toLatin1());
91 out.append(hex);
92 } else {
93 out.append(in.at(n));
94 }
95 }
96
97 return out;
98}
99
100static bool linkify_okUrl(const QString &url)
101{
102 if(url.at(url.length()-1) == '.')
103 return false;
104
105 return true;
106}
107
108static bool linkify_okEmail(const QString &addy)
109{
110 // this makes sure that there is an '@' and a '.' after it, and that there is
111 // at least one char for each of the three sections
112 int n = addy.indexOf('@');
113 if(n == -1 || n == 0)
114 return false;
115 int d = addy.indexOf('.', n+1);
116 if(d == -1 || d == 0)
117 return false;
118 if((addy.length()-1) - d <= 0)
119 return false;
120 if(addy.indexOf("..") != -1)
121 return false;
122
123 return true;
124}
125
126/**
127 * takes a richtext string and heuristically adds links for uris of common protocols
128 * @return a richtext string with link markup added
129 */
130QString HtmlUtils::linkify(const QString &in)
131{
132 QString out = in;
133 int x1, x2;
134 bool isUrl, isAtStyle;
135 QString linked, link, href;
136
137 for(int n = 0; n < (int)out.length(); ++n) {
138 isUrl = false;
139 isAtStyle = false;
140 x1 = n;
141
142 if(linkify_pmatch(out, n, "xmpp:")) {
143 n += 5;
144 isUrl = true;
145 href = "";
146 }
147 else if(linkify_pmatch(out, n, "mailto:")) {
148 n += 7;
149 isUrl = true;
150 href = "";
151 }
152 else if(linkify_pmatch(out, n, "http://")) {
153 n += 7;
154 isUrl = true;
155 href = "";
156 }
157 else if(linkify_pmatch(out, n, "https://")) {
158 n += 8;
159 isUrl = true;
160 href = "";
161 }
162 else if(linkify_pmatch(out, n, "ftp://")) {
163 n += 6;
164 isUrl = true;
165 href = "";
166 }
167 else if(linkify_pmatch(out, n, "news://")) {
168 n += 7;
169 isUrl = true;
170 href = "";
171 }
172 else if (linkify_pmatch(out, n, "ed2k://")) {
173 n += 7;
174 isUrl = true;
175 href = "";
176 }
177 else if (linkify_pmatch(out, n, "magnet:")) {
178 n += 7;
179 isUrl = true;
180 href = "";
181 }
182 else if(linkify_pmatch(out, n, "www.")) {
183 isUrl = true;
184 href = "http://";
185 }
186 else if(linkify_pmatch(out, n, "ftp.")) {
187 isUrl = true;
188 href = "ftp://";
189 }
190 else if(linkify_pmatch(out, n, "@")) {
191 isAtStyle = true;
192 href = "x-psi-atstyle:";
193 }
194
195 if(isUrl) {
196 // make sure the previous char is not alphanumeric
197 if(x1 > 0 && out.at(x1-1).isLetterOrNumber())
198 continue;
199
200 // find whitespace (or end)
201 QMap<QChar, int> brackets;
202 brackets['('] = brackets[')'] = brackets['['] = brackets[']'] = brackets['{'] = brackets['}'] = 0;
203 QMap<QChar, QChar> openingBracket;
204 openingBracket[')'] = '(';
205 openingBracket[']'] = '[';
206 openingBracket['}'] = '{';
207 for(x2 = n; x2 < (int)out.length(); ++x2) {
208 if(out.at(x2).isSpace() || linkify_isOneOf(out.at(x2), "\"\'`<>")
209 || linkify_pmatch(out, x2, "&quot;") || linkify_pmatch(out, x2, "&apos;")
210 || linkify_pmatch(out, x2, "&gt;") || linkify_pmatch(out, x2, "&lt;") ) {
211 break;
212 }
213 if(brackets.keys().contains(out.at(x2))) {
214 ++brackets[out.at(x2)];
215 }
216 }
217 int len = x2-x1;
218 QString pre = resolveEntities(out.mid(x1, x2-x1));
219
220 // go backward hacking off unwanted punctuation
221 int cutoff;
222 for(cutoff = pre.length()-1; cutoff >= 0; --cutoff) {
223 if(!linkify_isOneOf(pre.at(cutoff), "!?,.()[]{}<>\""))
224 break;
225 if(linkify_isOneOf(pre.at(cutoff), ")]}")
226 && brackets[pre.at(cutoff)] - brackets[openingBracket[pre.at(cutoff)]] <= 0 ) {
227 break; // in theory, there could be == above, but these are urls, not math ;)
228 }
229 if(brackets.keys().contains(pre.at(cutoff))) {
230 --brackets[pre.at(cutoff)];
231 }
232
233 }
234 ++cutoff;
235 //++x2;
236
237 link = pre.mid(0, cutoff);
238 if(!linkify_okUrl(link)) {
239 n = x1 + link.length();
240 continue;
241 }
242 href += link;
243 // attributes need to be encoded too.
244 href = href.toHtmlEscaped();
245 href = linkify_htmlsafe(href);
246 //printf("link: [%s], href=[%s]\n", link.latin1(), href.latin1());
247 linked = QString("<a href=\"%1\">").arg(href) + link.toHtmlEscaped() + "</a>" + pre.mid(cutoff).toHtmlEscaped();
248 out.replace(x1, len, linked);
249 n = x1 + linked.length() - 1;
250 } else if(isAtStyle) {
251 // go backward till we find the beginning
252 if(x1 == 0)
253 continue;
254 --x1;
255 for(; x1 >= 0; --x1) {
256 if(!linkify_isOneOf(out.at(x1), "_.-+") && !out.at(x1).isLetterOrNumber())
257 break;
258 }
259 ++x1;
260
261 // go forward till we find the end
262 x2 = n + 1;
263 for(; x2 < (int)out.length(); ++x2) {
264 if(!linkify_isOneOf(out.at(x2), "_.-+") && !out.at(x2).isLetterOrNumber())
265 break;
266 }
267
268 int len = x2-x1;
269 link = out.mid(x1, len);
270 //link = resolveEntities(link);
271
272 if(!linkify_okEmail(link)) {
273 n = x1 + link.length();
274 continue;
275 }
276
277 href += link;
278 //printf("link: [%s], href=[%s]\n", link.latin1(), href.latin1());
279 linked = QString("<a href=\"%1\">").arg(href) + link + "</a>";
280 out.replace(x1, len, linked);
281 n = x1 + linked.length() - 1;
282 }
283 }
284
285 return out;
286}