From b3b723b4531a3781ab847a2652cf192303e78941 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Sun, 25 Dec 2016 18:29:21 +0100 Subject: Refactored to put URL Encoding / Decoding in a single place. (#3491) --- src/Bindings/ManualBindings.cpp | 85 ++++++++++++---- src/HTTP/HTTPFormParser.cpp | 13 ++- src/StringUtils.cpp | 220 +++++++++++++++++++++++++++++----------- src/StringUtils.h | 15 ++- src/WebAdmin.cpp | 25 +---- 5 files changed, 255 insertions(+), 103 deletions(-) (limited to 'src') diff --git a/src/Bindings/ManualBindings.cpp b/src/Bindings/ManualBindings.cpp index 1fe3c0f9f..34357eb65 100644 --- a/src/Bindings/ManualBindings.cpp +++ b/src/Bindings/ManualBindings.cpp @@ -2112,6 +2112,66 @@ static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState) +static int tolua_cUrlParser_UrlDecode(lua_State * tolua_S) +{ + // Check the param types: + cLuaState S(tolua_S); + if ( + // Don't care about the first param + !S.CheckParamString(2) || + !S.CheckParamEnd(3) + ) + { + return 0; + } + + // Get the parameters: + AString Input; + S.GetStackValue(2, Input); + + // Convert and return: + auto res = URLDecode(Input); + if (res.first) + { + S.Push(res.second); + } + else + { + S.Push(cLuaState::Nil); + } + return 1; +} + + + + + +static int tolua_cUrlParser_UrlEncode(lua_State * tolua_S) +{ + // Check the param types: + cLuaState S(tolua_S); + if ( + // Don't care about the first param + !S.CheckParamString(2) || + !S.CheckParamEnd(3) + ) + { + return 0; + } + + // Get the parameters: + AString Input; + S.GetStackValue(2, Input); + + // Convert and return: + S.Push(URLEncode(Input)); + return 1; +} + + + + + static int tolua_cWebAdmin_AddWebTab(lua_State * tolua_S) { // Function signatures: @@ -2324,28 +2384,15 @@ static int tolua_cWebAdmin_GetPage(lua_State * tolua_S) -/** Binding for cWebAdmin::GetURLEncodedString. -Manual code required because ToLua generates an extra return value */ +/** Binding for cWebAdmin::GetURLEncodedString. */ static int tolua_cWebAdmin_GetURLEncodedString(lua_State * tolua_S) { - // Check the param types: + // Emit the obsoletion warning: cLuaState S(tolua_S); - if ( - // Don't care whether the first param is a cWebAdmin instance or class - !S.CheckParamString(2) || - !S.CheckParamEnd(3) - ) - { - return 0; - } - - // Get the parameters: - AString Input; - S.GetStackValue(2, Input); + LOGWARNING("cWebAdmin:GetURLEncodedString() is obsolete, use cUrlParser:UrlEncode() instead."); + S.LogStackTrace(); - // Convert and return: - S.Push(cWebAdmin::GetURLEncodedString(Input)); - return 1; + return tolua_cUrlParser_UrlEncode(tolua_S); } @@ -4042,6 +4089,8 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_function(tolua_S, "IsKnownScheme", tolua_cUrlParser_IsKnownScheme); tolua_function(tolua_S, "Parse", tolua_cUrlParser_Parse); tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart); + tolua_function(tolua_S, "UrlDecode", tolua_cUrlParser_UrlDecode); + tolua_function(tolua_S, "UrlEncode", tolua_cUrlParser_UrlEncode); tolua_endmodule(tolua_S); tolua_beginmodule(tolua_S, "cWebAdmin"); diff --git a/src/HTTP/HTTPFormParser.cpp b/src/HTTP/HTTPFormParser.cpp index ea5da3c18..6ad5d2592 100644 --- a/src/HTTP/HTTPFormParser.cpp +++ b/src/HTTP/HTTPFormParser.cpp @@ -167,13 +167,22 @@ void cHTTPFormParser::ParseFormUrlEncoded(void) case 1: { // Only name present - (*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = ""; + auto name = URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' ')); + if (name.first) + { + (*this)[name.second] = ""; + } break; } case 2: { // name=value format: - (*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = URLDecode(ReplaceAllCharOccurrences(Components[1], '+', ' ')); + auto name = URLDecode(Components[0]); + auto value = URLDecode(Components[1]); + if (name.first && value.first) + { + (*this)[name.second] = value.second; + } break; } } diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp index 81999a35d..2fb4f6a9a 100644 --- a/src/StringUtils.cpp +++ b/src/StringUtils.cpp @@ -14,6 +14,42 @@ +/** Returns the value of the single hex digit. +Returns 0xff on failure. */ +static unsigned char HexToDec(char a_HexChar) +{ + switch (a_HexChar) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': return 10; + case 'b': return 11; + case 'c': return 12; + case 'd': return 13; + case 'e': return 14; + case 'f': return 15; + case 'A': return 10; + case 'B': return 11; + case 'C': return 12; + case 'D': return 13; + case 'E': return 14; + case 'F': return 15; + } + return 0xff; +} + + + + + AString & AppendVPrintf(AString & str, const char * format, va_list args) { ASSERT(format != nullptr); @@ -353,44 +389,63 @@ AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UT a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size for (size_t i = 0; i < a_NumShorts; i++) { - int c = GetBEShort(&a_RawData[i * 2]); - if (c < 0x80) - { - a_UTF8.push_back(static_cast(c)); - } - else if (c < 0x800) - { - a_UTF8.push_back(static_cast(192 + c / 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c - 0xd800 < 0x800) - { - // Error, silently drop - } - else if (c < 0x10000) + a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2]))); + } + return a_UTF8; +} + + + + +AString UnicodeCharToUtf8(unsigned a_UnicodeChar) +{ + if (a_UnicodeChar < 0x80) + { + return AString{static_cast(a_UnicodeChar)}; + } + else if (a_UnicodeChar < 0x800) + { + return AString { - a_UTF8.push_back(static_cast(224 + c / 4096)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else if (c < 0x110000) + static_cast(192 + a_UnicodeChar / 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else if (a_UnicodeChar - 0xd800 < 0x800) + { + // Error + return AString(); + } + else if (a_UnicodeChar < 0x10000) + { + return AString { - a_UTF8.push_back(static_cast(240 + c / 262144)); - a_UTF8.push_back(static_cast(128 + (c / 4096) % 64)); - a_UTF8.push_back(static_cast(128 + (c / 64) % 64)); - a_UTF8.push_back(static_cast(128 + c % 64)); - } - else + static_cast(224 + a_UnicodeChar / 4096), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64) + }; + } + else if (a_UnicodeChar < 0x110000) + { + return AString { - // Error, silently drop - } + static_cast(240 + a_UnicodeChar / 262144), + static_cast(128 + (a_UnicodeChar / 4096) % 64), + static_cast(128 + (a_UnicodeChar / 64) % 64), + static_cast(128 + a_UnicodeChar % 64), + }; + } + else + { + // Error + return AString(); } - return a_UTF8; } + // UTF-8 conversion code adapted from: // https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c @@ -708,58 +763,99 @@ AString StripColorCodes(const AString & a_Message) -AString URLDecode(const AString & a_String) +std::pair URLDecode(const AString & a_Text) { AString res; - size_t len = a_String.length(); + auto len = a_Text.size(); res.reserve(len); for (size_t i = 0; i < len; i++) { - char ch = a_String[i]; - if ((ch != '%') || (i > len - 3)) + if (a_Text[i] == '+') { - res.push_back(ch); + res.push_back(' '); continue; } - // Decode the hex value: - char hi = a_String[i + 1], lo = a_String[i + 2]; - if ((hi >= '0') && (hi <= '9')) + if (a_Text[i] != '%') { - hi = hi - '0'; + res.push_back(a_Text[i]); + continue; } - else if ((hi >= 'a') && (hi <= 'f')) + if (i + 1 >= len) { - hi = hi - 'a' + 10; + // String too short for an encoded value + return std::make_pair(false, AString()); } - else if ((hi >= 'A') && (hi <= 'F')) + if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U')) { - hi = hi - 'F' + 10; + // Unicode char "%u0xxxx" + if (i + 6 >= len) + { + return std::make_pair(false, AString()); + } + if (a_Text[i + 2] != '0') + { + return std::make_pair(false, AString()); + } + unsigned v1 = HexToDec(a_Text[i + 3]); + unsigned v2 = HexToDec(a_Text[i + 4]); + unsigned v3 = HexToDec(a_Text[i + 5]); + unsigned v4 = HexToDec(a_Text[i + 6]); + if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)); + i = i + 6; } else { - res.push_back(ch); - continue; - } - if ((lo >= '0') && (lo <= '9')) - { - lo = lo - '0'; + // Regular char "%xx": + if (i + 2 >= len) + { + return std::make_pair(false, AString()); + } + auto v1 = HexToDec(a_Text[i + 1]); + auto v2 = HexToDec(a_Text[i + 2]); + if ((v1 == 0xff) || (v2 == 0xff)) + { + // Invalid hex numbers + return std::make_pair(false, AString()); + } + res.push_back(static_cast((v1 << 4) | v2)); + i = i + 2; } - else if ((lo >= 'a') && (lo <= 'f')) + } // for i - a_Text[i] + return std::make_pair(true, res); +} + + + + + +AString URLEncode(const AString & a_Text) +{ + AString res; + auto len = a_Text.size(); + res.reserve(len); + static const char HEX[] = "0123456789abcdef"; + for (size_t i = 0; i < len; ++i) + { + if (isalnum(a_Text[i])) { - lo = lo - 'a' + 10; + res.push_back(a_Text[i]); } - else if ((lo >= 'A') && (lo <= 'F')) + else if (a_Text[i] == ' ') { - lo = lo - 'A' + 10; + res.push_back('+'); } else { - res.push_back(ch); - continue; + res.push_back('%'); + res.push_back(HEX[static_cast(a_Text[i]) >> 4]); + res.push_back(HEX[static_cast(a_Text[i]) & 0x0f]); } - res.push_back(static_cast((hi << 4) | lo)); - i += 2; - } // for i - a_String[] + } return res; } @@ -907,6 +1003,16 @@ short GetBEShort(const char * a_Mem) +unsigned short GetBEUShort(const char * a_Mem) +{ + const Byte * Bytes = reinterpret_cast(a_Mem); + return static_cast((Bytes[0] << 8) | Bytes[1]); +} + + + + + int GetBEInt(const char * a_Mem) { const Byte * Bytes = reinterpret_cast(a_Mem); diff --git a/src/StringUtils.h b/src/StringUtils.h index 8c1925115..e2be2b9c0 100644 --- a/src/StringUtils.h +++ b/src/StringUtils.h @@ -85,6 +85,9 @@ extern void ReplaceString(AString & iHayStack, const AString & iNeedle, const AS /** Converts a stream of BE shorts into UTF-8 string; returns a_UTF8. */ extern AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8); +/** Converts a unicode character to its UTF8 representation. */ +extern AString UnicodeCharToUtf8(unsigned a_UnicodeChar); + /** Converts a UTF-8 string into a UTF-16 BE string. */ extern std::u16string UTF8ToRawBEUTF16(const AString & a_String); @@ -98,8 +101,13 @@ extern AString EscapeString(const AString & a_Message); // tolua_export /** Removes all control codes used by MC for colors and styles. */ extern AString StripColorCodes(const AString & a_Message); // tolua_export -/** URL-Decodes the given string, replacing all "%HH" into the correct characters. Invalid % sequences are left intact */ -extern AString URLDecode(const AString & a_String); // Cannot export to Lua automatically - would generated an extra return value +/** URL-Decodes the given string. +The first value specifies whether the decoding was successful. +The second value is the decoded string, if successful. */ +extern std::pair URLDecode(const AString & a_String); // Exported to Lua as cUrlParser::UrlDecode() + +/** URL-encodes the given string. */ +extern AString URLEncode(const AString & a_Text); /** Replaces all occurrences of char a_From inside a_String with char a_To. */ extern AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To); // Needn't export to Lua, since Lua doesn't have chars anyway @@ -113,6 +121,9 @@ extern AString Base64Encode(const AString & a_Input); // Exported manually due /** Reads two bytes from the specified memory location and interprets them as BigEndian short */ extern short GetBEShort(const char * a_Mem); +/** Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short */ +extern unsigned short GetBEUShort(const char * a_Mem); + /** Reads four bytes from the specified memory location and interprets them as BigEndian int */ extern int GetBEInt(const char * a_Mem); diff --git a/src/WebAdmin.cpp b/src/WebAdmin.cpp index 5c08deb0d..1e60b7c0e 100644 --- a/src/WebAdmin.cpp +++ b/src/WebAdmin.cpp @@ -602,30 +602,7 @@ AString cWebAdmin::GetHTMLEscapedString(const AString & a_Input) AString cWebAdmin::GetURLEncodedString(const AString & a_Input) { - // Translation table from nibble to hex: - static const char Hex[] = "0123456789abcdef"; - - // Preallocate the output to match input: - AString dst; - size_t len = a_Input.length(); - dst.reserve(len); - - // Loop over input and substitute whatever is needed: - for (size_t i = 0; i < len; i++) - { - char ch = a_Input[i]; - if (isalnum(ch) || (ch == '-') || (ch == '_') || (ch == '.') || (ch == '~')) - { - dst.push_back(ch); - } - else - { - dst.push_back('%'); - dst.push_back(Hex[(ch >> 4) & 0x0f]); - dst.push_back(Hex[ch & 0x0f]); - } - } // for i - a_Input[] - return dst; + return URLEncode(a_Input); } -- cgit v1.2.3