From a98d2f585db4397801f851ac6bb3e669795af477 Mon Sep 17 00:00:00 2001 From: Jean-Marcel Flach Date: Fri, 26 Jun 2015 15:37:47 +0200 Subject: [PATCH] Implement URL parser fixes #9470 --- lib/base/CMakeLists.txt | 2 +- lib/base/array.cpp | 2 +- lib/base/url-characters.hpp | 42 +++++ lib/base/url.cpp | 310 ++++++++++++++++++++++++++++++++++ lib/base/url.hpp | 71 ++++++++ lib/base/utility.cpp | 25 ++- lib/base/utility.hpp | 2 +- lib/cli/repositoryutility.cpp | 2 +- test/CMakeLists.txt | 46 ++--- test/base-url.cpp | 79 +++++++++ 10 files changed, 548 insertions(+), 33 deletions(-) create mode 100644 lib/base/url-characters.hpp create mode 100644 lib/base/url.cpp create mode 100644 lib/base/url.hpp create mode 100644 test/base-url.cpp diff --git a/lib/base/CMakeLists.txt b/lib/base/CMakeLists.txt index 900d3535b..5d92176e4 100644 --- a/lib/base/CMakeLists.txt +++ b/lib/base/CMakeLists.txt @@ -31,7 +31,7 @@ set(base_SOURCES scriptutils.cpp serializer.cpp socket.cpp socketevents.cpp stacktrace.cpp statsfunction.cpp stdiostream.cpp stream.cpp streamlogger.cpp streamlogger.thpp string.cpp string-script.cpp sysloglogger.cpp sysloglogger.thpp tcpsocket.cpp thinmutex.cpp threadpool.cpp timer.cpp - tlsstream.cpp tlsutility.cpp type.cpp unixsocket.cpp utility.cpp value.cpp + tlsstream.cpp tlsutility.cpp type.cpp unixsocket.cpp url.cpp utility.cpp value.cpp value-operators.cpp workqueue.cpp ) diff --git a/lib/base/array.cpp b/lib/base/array.cpp index 5c22330c3..55a53b71e 100644 --- a/lib/base/array.cpp +++ b/lib/base/array.cpp @@ -31,7 +31,7 @@ REGISTER_PRIMITIVE_TYPE(Array, Array::GetPrototype()); /** * Restrieves a value from an array. * - * @param index The index.. + * @param index The index. * @returns The value. */ Value Array::Get(unsigned int index) const diff --git a/lib/base/url-characters.hpp b/lib/base/url-characters.hpp new file mode 100644 index 000000000..a497655c4 --- /dev/null +++ b/lib/base/url-characters.hpp @@ -0,0 +1,42 @@ +/****************************************************************************** + * Icinga 2 * + * Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of the GNU General Public License * + * as published by the Free Software Foundation; either version 2 * + * of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software Foundation * + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * + ******************************************************************************/ +#ifndef URL_CHARACTERS_H +#define URL_CHARACTERS_H + +#define ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define NUMERIC "0123456789" + +#define UNRESERVED ALPHA NUMERIC "-._~" +#define GEN_DELIMS ":/?#[]@" +#define SUB_DELIMS "!$&'()*+,;=" +#define RESERVED GEN_DELIMS SUB-DELIMS +#define PCHAR UNRESERVED SUB_DELIMS ":@" + +#define ACSCHEME ALPHA NUMERIC ".-+" + +//authority = [ userinfo "@" ] host [ ":" port ] +#define ACUSERINFO UNRESERVED SUB_DELIMS ":" +#define ACHOST UNRESERVED SUB_DELIMS +#define ACPORT NUMERIC + +#define ACPATHSEGMENT PCHAR +#define ACQUERY PCHAR "/?" +#define ACFRAGMENT PCHAR "/?" + +#endif /* URL_CHARACTERS_H */ diff --git a/lib/base/url.cpp b/lib/base/url.cpp new file mode 100644 index 000000000..d88ab7844 --- /dev/null +++ b/lib/base/url.cpp @@ -0,0 +1,310 @@ +/****************************************************************************** + * Icinga 2 * + * Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of the GNU General Public License * + * as published by the Free Software Foundation; either version 2 * + * of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software Foundation * + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * + ******************************************************************************/ + +#include "base/url.hpp" +#include "base/url-characters.hpp" +#include "base/array.hpp" +#include "base/utility.hpp" +#include "base/objectlock.hpp" +#include +#include + +using namespace icinga; + +Url::Url(const String& base_url) +{ + String url = base_url; + + if (url.GetLength() == 0) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Empty URL.")); + + size_t pHelper = url.Find(":"); + + if (pHelper == String::NPos) { + m_Scheme = ""; + } else { + if (!ParseScheme(url.SubStr(0, pHelper))) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Scheme.")); + url = url.SubStr(pHelper + 1); + } + + if (*url.Begin() != '/') + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: '/' expected after scheme.")); + + if (url.GetLength() == 1) { + m_Path.push_back("/"); + return; + } + + if (*(url.Begin() + 1) != '/') + m_Authority = ""; + else { + pHelper = url.Find("/", 2); + + if (pHelper == String::NPos) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: Missing '/' after authority.")); + + if (!ParseAuthority(url.SubStr(0, pHelper))) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Authority")); + + url = url.SubStr(pHelper); + } + + if (*url.Begin() == '/') { + pHelper = url.FindFirstOf("#?"); + if (!ParsePath(url.SubStr(1, pHelper - 1))) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Path")); + + if (pHelper != String::NPos) + url = url.SubStr(pHelper); + } else + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: Missing path.")); + + if (*url.Begin() == '?') { + pHelper = url.Find("#"); + if (!ParseQuery(url.SubStr(1, pHelper - 1))) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Query")); + + if (pHelper != String::NPos) + url = url.SubStr(pHelper); + } + + if (*url.Begin() == '#') { + if (!ParseFragment(url.SubStr(1))) + BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Fragment")); + } +} + +String Url::GetScheme(void) const +{ + return m_Scheme; +} + +String Url::GetAuthority(void) const +{ + return m_Authority; +} + +const std::vector& Url::GetPath(void) const +{ + return m_Path; +} + +const std::map& Url::GetQuery(void) const +{ + return m_Query; +} + +Value Url::GetQueryElement(const String& name) const +{ + std::map::const_iterator it = m_Query.find(name); + + if (it == m_Query.end()) + return Empty; + + return it->second; +} + + +String Url::GetFragment(void) const +{ + return m_Fragment; +} + +String Url::Format(void) const +{ + String url = ""; + + if (!m_Scheme.IsEmpty()) + url += m_Scheme + ":"; + + if (!m_Authority.IsEmpty()) + url += "//" + m_Authority; + + if (m_Path.empty()) + url += "/"; + else { + BOOST_FOREACH (const String p, m_Path) { + url += "/"; + url += Utility::EscapeString(p, ACPATHSEGMENT, false); + } + } + + String param = ""; + if (!m_Query.empty()) { + typedef std::pair kv_pair; + + BOOST_FOREACH (const kv_pair& kv, m_Query) { + String key = Utility::EscapeString(kv.first, ACQUERY, false); + if (param.IsEmpty()) + param = "?"; + else + param += "&"; + + Value val = kv.second; + + if (val.IsEmpty()) + param += key; + else { + if (val.IsObjectType()) { + Array::Ptr arr = val; + String temp = ""; + + ObjectLock olock(arr); + BOOST_FOREACH (const String& sArrIn, arr) { + if (!temp.IsEmpty()) + temp += "&"; + + temp += key + "[]=" + Utility::EscapeString(sArrIn, ACQUERY, false); + } + + param += temp; + } else + param += key + "=" + Utility::EscapeString(kv.second, ACQUERY, false); + } + } + } + + url += param; + + if (!m_Fragment.IsEmpty()) + url += "#" + Utility::EscapeString(m_Fragment, ACFRAGMENT, false); + + return url; +} + +bool Url::ParseScheme(const String& scheme) +{ + m_Scheme = scheme; + + if (scheme.FindFirstOf(ALPHA) != 0) + return false; + + return (ValidateToken(scheme, ACSCHEME)); +} + +bool Url::ParseAuthority(const String& authority) +{ + //TODO parse all Authorities + m_Authority = authority.SubStr(2); + return (ValidateToken(m_Authority, ACHOST)); +} + +bool Url::ParsePath(const String& path) +{ + std::string pathStr = path; + boost::char_separator sep("/"); + boost::tokenizer > tokens(pathStr, sep); + + BOOST_FOREACH(const String& token, tokens) { + if (token.IsEmpty()) + continue; + + String decodedToken = Utility::UnescapeString(token); + + if (!ValidateToken(decodedToken, ACPATHSEGMENT)) + return false; + + m_Path.push_back(decodedToken); + } + + return true; +} + +bool Url::ParseQuery(const String& query) +{ + //Tokenizer does not like String AT ALL + std::string queryStr = query; + boost::char_separator sep("&"); + boost::tokenizer > tokens(queryStr, sep); + + BOOST_FOREACH(const String& token, tokens) { + size_t pHelper = token.Find("="); + + String key = token.SubStr(0, pHelper); + String value = Empty; + + if (pHelper != String::NPos) { + if (pHelper == token.GetLength() - 1) + return false; + + value = token.SubStr(pHelper + 1); + if (!ValidateToken(value, ACQUERY)) + return false; + else + value = Utility::UnescapeString(value); + } else + String key = token; + + if (key.IsEmpty()) + return false; + + pHelper = key.Find("[]"); + + if (pHelper != String::NPos) { + + if (key.GetLength() < 3) + return false; + + key = key.SubStr(0, key.GetLength() - 2); + key = Utility::UnescapeString(key); + + if (!ValidateToken(value, ACQUERY)) + return false; + + std::map::iterator it = m_Query.find(key); + + if (it == m_Query.end()) { + Array::Ptr tmp = new Array(); + tmp->Add(Utility::UnescapeString(value)); + m_Query[key] = tmp; + } else if (m_Query[key].IsObjectType()){ + Array::Ptr arr = it->second; + arr->Add(Utility::UnescapeString(value)); + } else + return false; + } else { + key = Utility::UnescapeString(key); + + if (m_Query.find(key) == m_Query.end() && ValidateToken(key, ACQUERY)) + m_Query[key] = Utility::UnescapeString(value); + else + return false; + } + } + + return true; +} + +bool Url::ParseFragment(const String& fragment) +{ + m_Fragment = Utility::UnescapeString(fragment); + + return ValidateToken(fragment, ACFRAGMENT); +} + +bool Url::ValidateToken(const String& token, const String& symbols) +{ + BOOST_FOREACH (const char c, token.CStr()) { + if (symbols.FindFirstOf(c) == String::NPos) + return false; + } + + return true; +} diff --git a/lib/base/url.hpp b/lib/base/url.hpp new file mode 100644 index 000000000..aa68d2819 --- /dev/null +++ b/lib/base/url.hpp @@ -0,0 +1,71 @@ +/****************************************************************************** + * Icinga 2 * + * Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of the GNU General Public License * + * as published by the Free Software Foundation; either version 2 * + * of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software Foundation * + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * + ******************************************************************************/ + +#ifndef URL_H +#define URL_H + +#include "base/i2-base.hpp" +#include "base/object.hpp" +#include "base/string.hpp" +#include "base/value.hpp" +#include +#include + +namespace icinga +{ + +/** + * A url class to use with the API + * + * @ingroup base + */ +class I2_BASE_API Url : public Object +{ +public: + DECLARE_PTR_TYPEDEFS(Url); + + Url(const String& url); + + String Format(void) const; + + String GetScheme(void) const; + String GetAuthority(void) const; + const std::vector& GetPath(void) const; + const std::map& GetQuery(void) const; + Value GetQueryElement(const String& name) const; + String GetFragment(void) const; + +private: + String m_Scheme; + String m_Authority; + std::vector m_Path; + std::map m_Query; + String m_Fragment; + + bool ParseScheme(const String& scheme); + bool ParseAuthority(const String& authority); + bool ParsePath(const String& path); + bool ParseQuery(const String& query); + bool ParseFragment(const String& fragment); + + static bool ValidateToken(const String& token, const String& symbols); +}; + +} +#endif /* URL_H */ diff --git a/lib/base/utility.cpp b/lib/base/utility.cpp index b03567632..177b67b32 100644 --- a/lib/base/utility.cpp +++ b/lib/base/utility.cpp @@ -1147,16 +1147,25 @@ static int HexDecode(char hc) BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid hex character.")); } -String Utility::EscapeString(const String& s, const String& chars) +String Utility::EscapeString(const String& s, const String& chars, const bool illegal) { std::ostringstream result; - - BOOST_FOREACH(char ch, s) { - if (chars.FindFirstOf(ch) != String::NPos || ch == '%') { - result << '%'; - HexEncode(ch, result); - } else - result << ch; + if (illegal) { + BOOST_FOREACH(char ch, s) { + if (chars.FindFirstOf(ch) != String::NPos || ch == '%') { + result << '%'; + HexEncode(ch, result); + } else + result << ch; + } + } else { + BOOST_FOREACH(char ch, s) { + if (chars.FindFirstOf(ch) == String::NPos || ch == '%') { + result << '%'; + HexEncode(ch, result); + } else + result << ch; + } } return result.str(); diff --git a/lib/base/utility.hpp b/lib/base/utility.hpp index 5633d8290..f616801e2 100644 --- a/lib/base/utility.hpp +++ b/lib/base/utility.hpp @@ -104,7 +104,7 @@ public: static String EscapeShellCmd(const String& s); static String EscapeShellArg(const String& s); - static String EscapeString(const String& s, const String& chars); + static String EscapeString(const String& s, const String& chars, const bool illegal); static String UnescapeString(const String& s); static void SetThreadName(const String& name, bool os = true); diff --git a/lib/cli/repositoryutility.cpp b/lib/cli/repositoryutility.cpp index d1fa88b3a..d0eb423df 100644 --- a/lib/cli/repositoryutility.cpp +++ b/lib/cli/repositoryutility.cpp @@ -519,7 +519,7 @@ Dictionary::Ptr RepositoryUtility::GetObjectFromRepository(const String& filenam String RepositoryUtility::EscapeName(const String& name) { - return Utility::EscapeString(name, "<>:\"/\\|?*"); + return Utility::EscapeString(name, "<>:\"/\\|?*", true); } String RepositoryUtility::UnescapeName(const String& name) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index adab1dd7c..ed13a9716 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,8 +22,8 @@ set(base_test_SOURCES base-json.cpp base-match.cpp base-netstring.cpp base-object.cpp base-serialize.cpp base-shellescape.cpp base-stacktrace.cpp base-stream.cpp base-string.cpp base-timer.cpp base-type.cpp - base-value.cpp config-ops.cpp icinga-macros.cpp icinga-perfdata.cpp - remote-apiuser.cpp test.cpp + base-url.cpp base-value.cpp config-ops.cpp icinga-macros.cpp + icinga-perfdata.cpp remote-apiuser.cpp test.cpp ) set(livestatus_test_SOURCES @@ -79,7 +79,7 @@ add_boost_test(base base_string/clear base_string/append base_string/trim - base_string/contains + base_string/contains base_string/replace base_string/index base_string/find @@ -87,27 +87,31 @@ add_boost_test(base base_timer/interval base_timer/invoke base_timer/scope - base_type/gettype - base_type/assign - base_type/byname - base_type/instantiate + base_type/gettype + base_type/assign + base_type/byname + base_type/instantiate + base_url/id_and_path + base_url/parameters + base_url/format + base_url/illegal_legal_strings base_value/scalar base_value/convert base_value/format - config_ops/simple - config_ops/advanced - icinga_macros/simple - icinga_perfdata/empty - icinga_perfdata/simple - icinga_perfdata/quotes - icinga_perfdata/multiple - icinga_perfdata/uom - icinga_perfdata/warncritminmax - icinga_perfdata/ignore_invalid_warn_crit_min_max - icinga_perfdata/invalid - icinga_perfdata/multi - remote_apiuser/get_password - remote_apiuser/check_password + config_ops/simple + config_ops/advanced + icinga_macros/simple + icinga_perfdata/empty + icinga_perfdata/simple + icinga_perfdata/quotes + icinga_perfdata/multiple + icinga_perfdata/uom + icinga_perfdata/warncritminmax + icinga_perfdata/ignore_invalid_warn_crit_min_max + icinga_perfdata/invalid + icinga_perfdata/multi + remote_apiuser/get_password + remote_apiuser/check_password ) if(ICINGA2_WITH_LIVESTATUS) diff --git a/test/base-url.cpp b/test/base-url.cpp new file mode 100644 index 000000000..50f239c15 --- /dev/null +++ b/test/base-url.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * Icinga 2 * + * Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of the GNU General Public License * + * as published by the Free Software Foundation; either version 2 * + * of the License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software Foundation * + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * + ******************************************************************************/ + +#include "base/url.hpp" +#include "base/array.hpp" +#include +#include + +using namespace icinga; + +BOOST_AUTO_TEST_SUITE(base_url) + +BOOST_AUTO_TEST_CASE(id_and_path) +{ + Url::Ptr url = new Url("http://icinga.org/foo/bar/baz?hurr=durr"); + + BOOST_CHECK(url->GetScheme() == "http"); + + BOOST_CHECK(url->GetAuthority() == "icinga.org"); + + std::vector PathCorrect; + PathCorrect.push_back("foo"); + PathCorrect.push_back("bar"); + PathCorrect.push_back("baz"); + + BOOST_CHECK(url->GetPath() == PathCorrect); +} + +BOOST_AUTO_TEST_CASE(parameters) +{ + Url::Ptr url = new Url("https://icinga.org/hya/?rain=karl&rair=robert&foo[]=bar"); + + BOOST_CHECK(url->GetQueryElement("rair") == "robert"); + BOOST_CHECK(url->GetQueryElement("rain") == "karl"); + BOOST_CHECK(url->GetQueryElement("foo").IsObjectType()); + Array::Ptr test = url->GetQueryElement("foo"); + BOOST_CHECK(test->GetLength() == 1); + BOOST_CHECK(test->Get(0) == "bar"); +} + +BOOST_AUTO_TEST_CASE(format) +{ + Url::Ptr url = new Url("http://foo.bar/baz/?hop=top&flop=sop#iLIKEtrains"); + BOOST_CHECK(new Url(url->Format())); + + url = new Url("//main.args/////////?k[]=one&k[]=two#three"); + BOOST_CHECK(new Url(url->Format())); + + url = new Url("/foo/bar/index.php?blaka"); + BOOST_CHECK(new Url(url->Format())); +} + +BOOST_AUTO_TEST_CASE(illegal_legal_strings) +{ + BOOST_CHECK_THROW(new Url("/?foo=barr&foo[]=bazz"), std::invalid_argument); + BOOST_CHECK_THROW(new Url("/?]=gar"), std::invalid_argument); + BOOST_CHECK(new Url("/?foo=baz??&\?\?=/?")); //Valid + BOOST_CHECK_THROW(new Url("/?foo=bar&foo=ba"), std::invalid_argument); + BOOST_CHECK_THROW(new Url("/?foo=bar&[]=d"), std::invalid_argument); + BOOST_CHECK_THROW(new Url("/?fo=&bar=garOA"), std::invalid_argument); +} + +BOOST_AUTO_TEST_SUITE_END() -- 2.40.0