From 5722805a3f6aa144c5b47dc47f16cd21c057bbd6 Mon Sep 17 00:00:00 2001 From: Aaron Seigo Date: Fri, 25 Dec 2015 11:21:23 +0100 Subject: proper tokenization of input --- akonadish/syntaxtree.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/akonadish/syntaxtree.cpp b/akonadish/syntaxtree.cpp index 4188e5f..495ad22 100644 --- a/akonadish/syntaxtree.cpp +++ b/akonadish/syntaxtree.cpp @@ -170,7 +170,47 @@ Syntax::List SyntaxTree::nearestSyntax(const QStringList &words, const QString & QStringList SyntaxTree::tokenize(const QString &text) { - //TODO: properly tokenize (e.g. "foo bar" should not become ['"foo', 'bar"'] - return text.split(" "); + //TODO: properly tokenize (e.g. "foo bar" should not become ['"foo', 'bar"']a + static const QVector quoters = QVector() << '"' << '\''; + QStringList tokens; + QString acc; + QChar closer; + for (int i = 0; i < text.size(); ++i) { + const QChar c = text.at(i); + if (c == '\\') { + ++i; + if (i < text.size()) { + acc.append(text.at(i)); + } + } else if (!closer.isNull()) { + if (c == closer) { + acc = acc.trimmed(); + if (!acc.isEmpty()) { + tokens << acc; + } + acc.clear(); + closer = QChar(); + } else { + acc.append(c); + } + } else if (c.isSpace()) { + acc = acc.trimmed(); + if (!acc.isEmpty()) { + tokens << acc; + } + acc.clear(); + } else if (quoters.contains(c)) { + closer = c; + } else { + acc.append(c); + } + } + + acc = acc.trimmed(); + if (!acc.isEmpty()) { + tokens << acc; + } + + return tokens; } -- cgit v1.2.3