Implement URL parser

fixes #9470
This commit is contained in:
Jean-Marcel Flach 2015-06-26 15:37:47 +02:00
parent 2560a9733d
commit a98d2f585d
10 changed files with 548 additions and 33 deletions

View File

@ -31,7 +31,7 @@ set(base_SOURCES
scriptutils.cpp serializer.cpp socket.cpp socketevents.cpp stacktrace.cpp
statsfunction.cpp stdiostream.cpp stream.cpp streamlogger.cpp streamlogger.thpp string.cpp string-script.cpp
sysloglogger.cpp sysloglogger.thpp tcpsocket.cpp thinmutex.cpp threadpool.cpp timer.cpp
tlsstream.cpp tlsutility.cpp type.cpp unixsocket.cpp utility.cpp value.cpp
tlsstream.cpp tlsutility.cpp type.cpp unixsocket.cpp url.cpp utility.cpp value.cpp
value-operators.cpp workqueue.cpp
)

View File

@ -31,7 +31,7 @@ REGISTER_PRIMITIVE_TYPE(Array, Array::GetPrototype());
/**
* Restrieves a value from an array.
*
* @param index The index..
* @param index The index.
* @returns The value.
*/
Value Array::Get(unsigned int index) const

View File

@ -0,0 +1,42 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#ifndef URL_CHARACTERS_H
#define URL_CHARACTERS_H
#define ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define NUMERIC "0123456789"
#define UNRESERVED ALPHA NUMERIC "-._~"
#define GEN_DELIMS ":/?#[]@"
#define SUB_DELIMS "!$&'()*+,;="
#define RESERVED GEN_DELIMS SUB-DELIMS
#define PCHAR UNRESERVED SUB_DELIMS ":@"
#define ACSCHEME ALPHA NUMERIC ".-+"
//authority = [ userinfo "@" ] host [ ":" port ]
#define ACUSERINFO UNRESERVED SUB_DELIMS ":"
#define ACHOST UNRESERVED SUB_DELIMS
#define ACPORT NUMERIC
#define ACPATHSEGMENT PCHAR
#define ACQUERY PCHAR "/?"
#define ACFRAGMENT PCHAR "/?"
#endif /* URL_CHARACTERS_H */

310
lib/base/url.cpp Normal file
View File

@ -0,0 +1,310 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#include "base/url.hpp"
#include "base/url-characters.hpp"
#include "base/array.hpp"
#include "base/utility.hpp"
#include "base/objectlock.hpp"
#include <boost/tokenizer.hpp>
#include <boost/foreach.hpp>
using namespace icinga;
Url::Url(const String& base_url)
{
String url = base_url;
if (url.GetLength() == 0)
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Empty URL."));
size_t pHelper = url.Find(":");
if (pHelper == String::NPos) {
m_Scheme = "";
} else {
if (!ParseScheme(url.SubStr(0, pHelper)))
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Scheme."));
url = url.SubStr(pHelper + 1);
}
if (*url.Begin() != '/')
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: '/' expected after scheme."));
if (url.GetLength() == 1) {
m_Path.push_back("/");
return;
}
if (*(url.Begin() + 1) != '/')
m_Authority = "";
else {
pHelper = url.Find("/", 2);
if (pHelper == String::NPos)
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: Missing '/' after authority."));
if (!ParseAuthority(url.SubStr(0, pHelper)))
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Authority"));
url = url.SubStr(pHelper);
}
if (*url.Begin() == '/') {
pHelper = url.FindFirstOf("#?");
if (!ParsePath(url.SubStr(1, pHelper - 1)))
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Path"));
if (pHelper != String::NPos)
url = url.SubStr(pHelper);
} else
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL: Missing path."));
if (*url.Begin() == '?') {
pHelper = url.Find("#");
if (!ParseQuery(url.SubStr(1, pHelper - 1)))
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Query"));
if (pHelper != String::NPos)
url = url.SubStr(pHelper);
}
if (*url.Begin() == '#') {
if (!ParseFragment(url.SubStr(1)))
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid URL Fragment"));
}
}
String Url::GetScheme(void) const
{
return m_Scheme;
}
String Url::GetAuthority(void) const
{
return m_Authority;
}
const std::vector<String>& Url::GetPath(void) const
{
return m_Path;
}
const std::map<String,Value>& Url::GetQuery(void) const
{
return m_Query;
}
Value Url::GetQueryElement(const String& name) const
{
std::map<String, Value>::const_iterator it = m_Query.find(name);
if (it == m_Query.end())
return Empty;
return it->second;
}
String Url::GetFragment(void) const
{
return m_Fragment;
}
String Url::Format(void) const
{
String url = "";
if (!m_Scheme.IsEmpty())
url += m_Scheme + ":";
if (!m_Authority.IsEmpty())
url += "//" + m_Authority;
if (m_Path.empty())
url += "/";
else {
BOOST_FOREACH (const String p, m_Path) {
url += "/";
url += Utility::EscapeString(p, ACPATHSEGMENT, false);
}
}
String param = "";
if (!m_Query.empty()) {
typedef std::pair<String, Value> kv_pair;
BOOST_FOREACH (const kv_pair& kv, m_Query) {
String key = Utility::EscapeString(kv.first, ACQUERY, false);
if (param.IsEmpty())
param = "?";
else
param += "&";
Value val = kv.second;
if (val.IsEmpty())
param += key;
else {
if (val.IsObjectType<Array>()) {
Array::Ptr arr = val;
String temp = "";
ObjectLock olock(arr);
BOOST_FOREACH (const String& sArrIn, arr) {
if (!temp.IsEmpty())
temp += "&";
temp += key + "[]=" + Utility::EscapeString(sArrIn, ACQUERY, false);
}
param += temp;
} else
param += key + "=" + Utility::EscapeString(kv.second, ACQUERY, false);
}
}
}
url += param;
if (!m_Fragment.IsEmpty())
url += "#" + Utility::EscapeString(m_Fragment, ACFRAGMENT, false);
return url;
}
bool Url::ParseScheme(const String& scheme)
{
m_Scheme = scheme;
if (scheme.FindFirstOf(ALPHA) != 0)
return false;
return (ValidateToken(scheme, ACSCHEME));
}
bool Url::ParseAuthority(const String& authority)
{
//TODO parse all Authorities
m_Authority = authority.SubStr(2);
return (ValidateToken(m_Authority, ACHOST));
}
bool Url::ParsePath(const String& path)
{
std::string pathStr = path;
boost::char_separator<char> sep("/");
boost::tokenizer<boost::char_separator<char> > tokens(pathStr, sep);
BOOST_FOREACH(const String& token, tokens) {
if (token.IsEmpty())
continue;
String decodedToken = Utility::UnescapeString(token);
if (!ValidateToken(decodedToken, ACPATHSEGMENT))
return false;
m_Path.push_back(decodedToken);
}
return true;
}
bool Url::ParseQuery(const String& query)
{
//Tokenizer does not like String AT ALL
std::string queryStr = query;
boost::char_separator<char> sep("&");
boost::tokenizer<boost::char_separator<char> > tokens(queryStr, sep);
BOOST_FOREACH(const String& token, tokens) {
size_t pHelper = token.Find("=");
String key = token.SubStr(0, pHelper);
String value = Empty;
if (pHelper != String::NPos) {
if (pHelper == token.GetLength() - 1)
return false;
value = token.SubStr(pHelper + 1);
if (!ValidateToken(value, ACQUERY))
return false;
else
value = Utility::UnescapeString(value);
} else
String key = token;
if (key.IsEmpty())
return false;
pHelper = key.Find("[]");
if (pHelper != String::NPos) {
if (key.GetLength() < 3)
return false;
key = key.SubStr(0, key.GetLength() - 2);
key = Utility::UnescapeString(key);
if (!ValidateToken(value, ACQUERY))
return false;
std::map<String, Value>::iterator it = m_Query.find(key);
if (it == m_Query.end()) {
Array::Ptr tmp = new Array();
tmp->Add(Utility::UnescapeString(value));
m_Query[key] = tmp;
} else if (m_Query[key].IsObjectType<Array>()){
Array::Ptr arr = it->second;
arr->Add(Utility::UnescapeString(value));
} else
return false;
} else {
key = Utility::UnescapeString(key);
if (m_Query.find(key) == m_Query.end() && ValidateToken(key, ACQUERY))
m_Query[key] = Utility::UnescapeString(value);
else
return false;
}
}
return true;
}
bool Url::ParseFragment(const String& fragment)
{
m_Fragment = Utility::UnescapeString(fragment);
return ValidateToken(fragment, ACFRAGMENT);
}
bool Url::ValidateToken(const String& token, const String& symbols)
{
BOOST_FOREACH (const char c, token.CStr()) {
if (symbols.FindFirstOf(c) == String::NPos)
return false;
}
return true;
}

71
lib/base/url.hpp Normal file
View File

@ -0,0 +1,71 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#ifndef URL_H
#define URL_H
#include "base/i2-base.hpp"
#include "base/object.hpp"
#include "base/string.hpp"
#include "base/value.hpp"
#include <map>
#include <vector>
namespace icinga
{
/**
* A url class to use with the API
*
* @ingroup base
*/
class I2_BASE_API Url : public Object
{
public:
DECLARE_PTR_TYPEDEFS(Url);
Url(const String& url);
String Format(void) const;
String GetScheme(void) const;
String GetAuthority(void) const;
const std::vector<String>& GetPath(void) const;
const std::map<String,Value>& GetQuery(void) const;
Value GetQueryElement(const String& name) const;
String GetFragment(void) const;
private:
String m_Scheme;
String m_Authority;
std::vector<String> m_Path;
std::map<String,Value> m_Query;
String m_Fragment;
bool ParseScheme(const String& scheme);
bool ParseAuthority(const String& authority);
bool ParsePath(const String& path);
bool ParseQuery(const String& query);
bool ParseFragment(const String& fragment);
static bool ValidateToken(const String& token, const String& symbols);
};
}
#endif /* URL_H */

View File

@ -1147,16 +1147,25 @@ static int HexDecode(char hc)
BOOST_THROW_EXCEPTION(std::invalid_argument("Invalid hex character."));
}
String Utility::EscapeString(const String& s, const String& chars)
String Utility::EscapeString(const String& s, const String& chars, const bool illegal)
{
std::ostringstream result;
BOOST_FOREACH(char ch, s) {
if (chars.FindFirstOf(ch) != String::NPos || ch == '%') {
result << '%';
HexEncode(ch, result);
} else
result << ch;
if (illegal) {
BOOST_FOREACH(char ch, s) {
if (chars.FindFirstOf(ch) != String::NPos || ch == '%') {
result << '%';
HexEncode(ch, result);
} else
result << ch;
}
} else {
BOOST_FOREACH(char ch, s) {
if (chars.FindFirstOf(ch) == String::NPos || ch == '%') {
result << '%';
HexEncode(ch, result);
} else
result << ch;
}
}
return result.str();

View File

@ -104,7 +104,7 @@ public:
static String EscapeShellCmd(const String& s);
static String EscapeShellArg(const String& s);
static String EscapeString(const String& s, const String& chars);
static String EscapeString(const String& s, const String& chars, const bool illegal);
static String UnescapeString(const String& s);
static void SetThreadName(const String& name, bool os = true);

View File

@ -519,7 +519,7 @@ Dictionary::Ptr RepositoryUtility::GetObjectFromRepository(const String& filenam
String RepositoryUtility::EscapeName(const String& name)
{
return Utility::EscapeString(name, "<>:\"/\\|?*");
return Utility::EscapeString(name, "<>:\"/\\|?*", true);
}
String RepositoryUtility::UnescapeName(const String& name)

View File

@ -22,8 +22,8 @@ set(base_test_SOURCES
base-json.cpp base-match.cpp base-netstring.cpp base-object.cpp
base-serialize.cpp base-shellescape.cpp base-stacktrace.cpp
base-stream.cpp base-string.cpp base-timer.cpp base-type.cpp
base-value.cpp config-ops.cpp icinga-macros.cpp icinga-perfdata.cpp
remote-apiuser.cpp test.cpp
base-url.cpp base-value.cpp config-ops.cpp icinga-macros.cpp
icinga-perfdata.cpp remote-apiuser.cpp test.cpp
)
set(livestatus_test_SOURCES
@ -79,7 +79,7 @@ add_boost_test(base
base_string/clear
base_string/append
base_string/trim
base_string/contains
base_string/contains
base_string/replace
base_string/index
base_string/find
@ -87,27 +87,31 @@ add_boost_test(base
base_timer/interval
base_timer/invoke
base_timer/scope
base_type/gettype
base_type/assign
base_type/byname
base_type/instantiate
base_type/gettype
base_type/assign
base_type/byname
base_type/instantiate
base_url/id_and_path
base_url/parameters
base_url/format
base_url/illegal_legal_strings
base_value/scalar
base_value/convert
base_value/format
config_ops/simple
config_ops/advanced
icinga_macros/simple
icinga_perfdata/empty
icinga_perfdata/simple
icinga_perfdata/quotes
icinga_perfdata/multiple
icinga_perfdata/uom
icinga_perfdata/warncritminmax
icinga_perfdata/ignore_invalid_warn_crit_min_max
icinga_perfdata/invalid
icinga_perfdata/multi
remote_apiuser/get_password
remote_apiuser/check_password
config_ops/simple
config_ops/advanced
icinga_macros/simple
icinga_perfdata/empty
icinga_perfdata/simple
icinga_perfdata/quotes
icinga_perfdata/multiple
icinga_perfdata/uom
icinga_perfdata/warncritminmax
icinga_perfdata/ignore_invalid_warn_crit_min_max
icinga_perfdata/invalid
icinga_perfdata/multi
remote_apiuser/get_password
remote_apiuser/check_password
)
if(ICINGA2_WITH_LIVESTATUS)

79
test/base-url.cpp Normal file
View File

@ -0,0 +1,79 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-2015 Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#include "base/url.hpp"
#include "base/array.hpp"
#include <boost/test/unit_test.hpp>
#include <boost/foreach.hpp>
using namespace icinga;
BOOST_AUTO_TEST_SUITE(base_url)
BOOST_AUTO_TEST_CASE(id_and_path)
{
Url::Ptr url = new Url("http://icinga.org/foo/bar/baz?hurr=durr");
BOOST_CHECK(url->GetScheme() == "http");
BOOST_CHECK(url->GetAuthority() == "icinga.org");
std::vector<String> PathCorrect;
PathCorrect.push_back("foo");
PathCorrect.push_back("bar");
PathCorrect.push_back("baz");
BOOST_CHECK(url->GetPath() == PathCorrect);
}
BOOST_AUTO_TEST_CASE(parameters)
{
Url::Ptr url = new Url("https://icinga.org/hya/?rain=karl&rair=robert&foo[]=bar");
BOOST_CHECK(url->GetQueryElement("rair") == "robert");
BOOST_CHECK(url->GetQueryElement("rain") == "karl");
BOOST_CHECK(url->GetQueryElement("foo").IsObjectType<Array>());
Array::Ptr test = url->GetQueryElement("foo");
BOOST_CHECK(test->GetLength() == 1);
BOOST_CHECK(test->Get(0) == "bar");
}
BOOST_AUTO_TEST_CASE(format)
{
Url::Ptr url = new Url("http://foo.bar/baz/?hop=top&flop=sop#iLIKEtrains");
BOOST_CHECK(new Url(url->Format()));
url = new Url("//main.args/////////?k[]=one&k[]=two#three");
BOOST_CHECK(new Url(url->Format()));
url = new Url("/foo/bar/index.php?blaka");
BOOST_CHECK(new Url(url->Format()));
}
BOOST_AUTO_TEST_CASE(illegal_legal_strings)
{
BOOST_CHECK_THROW(new Url("/?foo=barr&foo[]=bazz"), std::invalid_argument);
BOOST_CHECK_THROW(new Url("/?]=gar"), std::invalid_argument);
BOOST_CHECK(new Url("/?foo=baz??&\?\?=/?")); //Valid
BOOST_CHECK_THROW(new Url("/?foo=bar&foo=ba"), std::invalid_argument);
BOOST_CHECK_THROW(new Url("/?foo=bar&[]=d"), std::invalid_argument);
BOOST_CHECK_THROW(new Url("/?fo=&bar=garOA"), std::invalid_argument);
}
BOOST_AUTO_TEST_SUITE_END()