From 958df6ad9192285e0a19caa362f9f0c9e63422d4 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Fri, 25 Dec 2015 18:50:25 +0100 Subject: Added the cUrlParser class, exported to Lua API. --- src/Bindings/CMakeLists.txt | 2 +- src/Bindings/ManualBindings.cpp | 159 ++++++++++++++++++++++++++++++++ src/HTTPServer/CMakeLists.txt | 8 +- src/HTTPServer/UrlParser.cpp | 200 ++++++++++++++++++++++++++++++++++++++++ src/HTTPServer/UrlParser.h | 58 ++++++++++++ 5 files changed, 424 insertions(+), 3 deletions(-) create mode 100644 src/HTTPServer/UrlParser.cpp create mode 100644 src/HTTPServer/UrlParser.h (limited to 'src') diff --git a/src/Bindings/CMakeLists.txt b/src/Bindings/CMakeLists.txt index a53e82581..702d38365 100644 --- a/src/Bindings/CMakeLists.txt +++ b/src/Bindings/CMakeLists.txt @@ -154,5 +154,5 @@ endif() if(NOT MSVC) add_library(Bindings ${SRCS} ${HDRS}) - target_link_libraries(Bindings lua sqlite tolualib mbedtls) + target_link_libraries(Bindings lua sqlite tolualib mbedtls HTTPServer) endif() diff --git a/src/Bindings/ManualBindings.cpp b/src/Bindings/ManualBindings.cpp index 3a595c1d2..42e7e9bd2 100644 --- a/src/Bindings/ManualBindings.cpp +++ b/src/Bindings/ManualBindings.cpp @@ -36,6 +36,7 @@ #include "../StringCompression.h" #include "../CommandOutput.h" #include "../BuildInfo.h" +#include "../HTTPServer/UrlParser.h" @@ -1956,6 +1957,155 @@ static int tolua_get_HTTPRequest_FormData(lua_State* tolua_S) +static int tolua_cUrlParser_GetDefaultPort(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:GetDefaultPort("scheme") -> number + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString scheme; + L.GetStackValue(2, scheme); + + // Execute and push result: + L.Push(cUrlParser::GetDefaultPort(scheme)); + return 1; +} + + + + + +static int tolua_cUrlParser_IsKnownScheme(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:IsKnownScheme("scheme") -> bool + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString scheme; + L.GetStackValue(2, scheme); + + // Execute and push result: + L.Push(cUrlParser::IsKnownScheme(scheme)); + return 1; +} + + + + + +static int tolua_cUrlParser_Parse(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:Parse("url") -> "scheme", "user", "password", "host", portnum, "path", "query", "fragment" + // On error, returns nil and error message + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString url; + L.GetStackValue(2, url); + + // Execute and push result: + AString scheme, username, password, host, path, query, fragment; + UInt16 port; + auto res = cUrlParser::Parse(url, scheme, username, password, host, port, path, query, fragment); + if (!res.first) + { + // Error, return nil and error msg: + L.PushNil(); + L.Push(res.second); + return 2; + } + L.Push(scheme); + L.Push(username); + L.Push(password); + L.Push(host); + L.Push(port); + L.Push(path); + L.Push(query); + L.Push(fragment); + return 8; +} + + + + + +static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:ParseAuthorityPart("authority") -> "user", "password", "host", portnum + // On error, returns nil and error message + // Parts not specified in the "authority" are left empty / zero + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString authPart; + L.GetStackValue(2, authPart); + + // Execute and push result: + AString username, password, host; + UInt16 port; + auto res = cUrlParser::ParseAuthorityPart(authPart, username, password, host, port); + if (!res.first) + { + // Error, return nil and error msg: + L.PushNil(); + L.Push(res.second); + return 2; + } + L.Push(username); + L.Push(password); + L.Push(host); + L.Push(port); + return 4; +} + + + + + static int tolua_cWebAdmin_GetPlugins(lua_State * tolua_S) { cWebAdmin * self = reinterpret_cast(tolua_tousertype(tolua_S, 1, nullptr)); @@ -3224,9 +3374,11 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_usertype(tolua_S, "cCryptoHash"); tolua_usertype(tolua_S, "cLineBlockTracer"); tolua_usertype(tolua_S, "cStringCompression"); + tolua_usertype(tolua_S, "cUrlParser"); tolua_cclass(tolua_S, "cCryptoHash", "cCryptoHash", "", nullptr); tolua_cclass(tolua_S, "cLineBlockTracer", "cLineBlockTracer", "", nullptr); tolua_cclass(tolua_S, "cStringCompression", "cStringCompression", "", nullptr); + tolua_cclass(tolua_S, "cUrlParser", "cUrlParser", "", nullptr); // Globals: tolua_function(tolua_S, "Clamp", tolua_Clamp); @@ -3390,6 +3542,13 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_function(tolua_S, "InflateString", tolua_InflateString); tolua_endmodule(tolua_S); + tolua_beginmodule(tolua_S, "cUrlParser"); + tolua_function(tolua_S, "GetDefaultPort", tolua_cUrlParser_GetDefaultPort); + tolua_function(tolua_S, "IsKnownScheme", tolua_cUrlParser_IsKnownScheme); + tolua_function(tolua_S, "Parse", tolua_cUrlParser_Parse); + tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart); + tolua_endmodule(tolua_S); + tolua_beginmodule(tolua_S, "cWebAdmin"); tolua_function(tolua_S, "GetHTMLEscapedString", tolua_AllToLua_cWebAdmin_GetHTMLEscapedString); tolua_function(tolua_S, "GetPlugins", tolua_cWebAdmin_GetPlugins); diff --git a/src/HTTPServer/CMakeLists.txt b/src/HTTPServer/CMakeLists.txt index 6788d50bf..b875b9145 100644 --- a/src/HTTPServer/CMakeLists.txt +++ b/src/HTTPServer/CMakeLists.txt @@ -12,7 +12,9 @@ SET (SRCS HTTPServer.cpp MultipartParser.cpp NameValueParser.cpp - SslHTTPConnection.cpp) + SslHTTPConnection.cpp + UrlParser.cpp +) SET (HDRS EnvelopeParser.h @@ -22,7 +24,9 @@ SET (HDRS HTTPServer.h MultipartParser.h NameValueParser.h - SslHTTPConnection.h) + SslHTTPConnection.h + UrlParser.h +) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set_source_files_properties(HTTPServer.cpp PROPERTIES COMPILE_FLAGS "-Wno-error=global-constructors ") diff --git a/src/HTTPServer/UrlParser.cpp b/src/HTTPServer/UrlParser.cpp new file mode 100644 index 000000000..05db3e413 --- /dev/null +++ b/src/HTTPServer/UrlParser.cpp @@ -0,0 +1,200 @@ + +// UrlParser.cpp + +// Implements the cUrlParser class that parses string URL into individual parts + +#include "Globals.h" +#include "UrlParser.h" + + + + + +UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme) +{ + if (a_Scheme == "http") + { + return 80; + } + else if (a_Scheme == "https") + { + return 443; + } + else if (a_Scheme == "ftp") + { + return 21; + } + else if (a_Scheme == "mailto") + { + return 25; + } + return 0; +} + + + + + +std::pair cUrlParser::ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port +) +{ + /* + a_AuthorityPart format: + [user:password@]host[:port] + host can be an IPv4, hostname, or an IPv6 enclosed in brackets + Assume only the password can contain an additional at-sign + */ + + // Split the authority on the last at-sign, if present: + auto idxLastAtSign = a_AuthorityPart.find_last_of('@'); + auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign); + auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1); + + // User credentials are completely optional: + auto idxCredColon = credPart.find(':'); + a_Username = credPart.substr(0, idxCredColon); + a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1); + + // Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first + if (srvrPart.empty()) + { + // No host information at all. Bail out with success + a_Host.clear(); + return std::make_pair(true, AString()); + } + if (srvrPart[0] == '[') + { + // [IPv6] host, search for the closing bracket + auto idxClosingBracket = srvrPart.find(']'); + if (idxClosingBracket == AString::npos) + { + return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket"); + } + a_Host = srvrPart.substr(0, idxClosingBracket); + auto portPart = srvrPart.substr(idxClosingBracket + 1); + if (portPart.empty()) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + if (portPart[0] != ':') + { + return std::make_pair(false, "Invalid port format after IPv6 address, mising colon"); + } + if (!StringToInteger(portPart.substr(2), a_Port)) + { + return std::make_pair(false, "Failed to parse port number after IPv6 address"); + } + return std::make_pair(true, AString()); + } + + // Not an [IPv6] address, split on the last colon: + auto idxLastColon = srvrPart.find_last_of(':'); + a_Host = srvrPart.substr(0, idxLastColon); + if (idxLastColon == AString::npos) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + auto portPart = srvrPart.substr(idxLastColon + 1); + if (!StringToInteger(portPart, a_Port)) + { + return std::make_pair(false, "Failed to parse port number after hostname"); + } + return std::make_pair(true, AString()); +} + + + + + +std::pair cUrlParser::Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment +) +{ + // Find the scheme - the text before the first colon: + auto idxColon = a_Url.find(':'); + if (idxColon == AString::npos) + { + return std::make_pair(false, "Cannot parse the Scheme part of the URL"); + } + a_Scheme = StrToLower(a_Url.substr(0, idxColon)); + a_Port = GetDefaultPort(a_Scheme); + if (a_Port == 0) + { + return std::make_pair(false, Printf("Unknown URL scheme: \"%s\"", a_Scheme.c_str())); + } + + // If the next two chars are a double-slash, skip them: + auto authStart = idxColon + 1; + if (a_Url.substr(authStart, 2) == "//") + { + authStart += 2; + } + + // The Authority part follows the Scheme, until the first slash: + auto idxFirstSlash = a_Url.find('/', authStart + 1); + if (idxFirstSlash == AString::npos) + { + // No slash, the whole end of the Url is the authority part + idxFirstSlash = a_Url.size(); + } + + // Parse the Authority part into individual components: + auto res = ParseAuthorityPart( + a_Url.substr(authStart, idxFirstSlash - authStart), + a_Username, a_Password, + a_Host, a_Port + ); + if (!res.first) + { + return res; + } + + // Parse the rest into a path, query and fragment: + a_Path.clear(); + a_Query.clear(); + a_Fragment.clear(); + if (idxFirstSlash == a_Url.size()) + { + // No additional data, bail out with success + return std::make_pair(true, AString()); + } + auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1); + if (idxPathEnd == AString::npos) + { + a_Path = a_Url.substr(idxFirstSlash); + return std::make_pair(true, AString()); + } + a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash); + auto idxHash = a_Url.find('#', idxPathEnd); + if (idxHash == AString::npos) + { + a_Query = a_Url.substr(idxPathEnd + 1); + return std::make_pair(true, AString()); + } + if (idxHash > idxPathEnd) + { + a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1); + } + a_Fragment = a_Url.substr(idxHash + 1); + return std::make_pair(true, AString()); +} + + + + + diff --git a/src/HTTPServer/UrlParser.h b/src/HTTPServer/UrlParser.h new file mode 100644 index 000000000..15a63e05d --- /dev/null +++ b/src/HTTPServer/UrlParser.h @@ -0,0 +1,58 @@ + +// UrlParser.h + +// Declares the cUrlParser class that parses string URL into individual parts + + + + + +#pragma once + + + + + +class cUrlParser +{ +public: + /** Returns true if the specified scheme (http, ftp, mailto, ...) is recognized by the URL parser. + Is case sensitive, known schemes are always lowercase. */ + static bool IsKnownScheme(const AString & a_Scheme) { return (GetDefaultPort(a_Scheme) > 0); } + + /** Returns the default port used by the specified scheme / protocol. + If the scheme is not known, 0 is returned. */ + static UInt16 GetDefaultPort(const AString & a_Scheme); + + /** Parses the given Authority part of an URL into individual components. + Returns true on success, + returns false and error message on failure. */ + static std::pair ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port + ); + + /** Parses the given URL into individual components. + Returns true on success, + returns false and error message on failure. + Fails if the scheme (protocol) is not known. + If port is missing, the default port for the specific scheme is applied. */ + static std::pair Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment + ); +}; + + + + -- cgit v1.2.3