diff --git a/src/common-tests/path_tests.cpp b/src/common-tests/path_tests.cpp index ffd527ff2..45c23196b 100644 --- a/src/common-tests/path_tests.cpp +++ b/src/common-tests/path_tests.cpp @@ -277,3 +277,65 @@ TEST(Path, CreateFileURL) ASSERT_EQ(Path::CreateFileURL("/foo/bar"), "file:///foo/bar"); #endif } + +TEST(Path, URLEncode) +{ + // Basic cases + ASSERT_EQ(Path::URLEncode("hello world"), "hello%20world"); + ASSERT_EQ(Path::URLEncode(""), ""); + ASSERT_EQ(Path::URLEncode("abcABC123"), "abcABC123"); + + // Special characters + ASSERT_EQ(Path::URLEncode("!@#$%^&*()"), "%21%40%23%24%25%5E%26%2A%28%29"); + ASSERT_EQ(Path::URLEncode("[]{}<>"), "%5B%5D%7B%7D%3C%3E"); + ASSERT_EQ(Path::URLEncode(",./?;:'\""), "%2C.%2F%3F%3B%3A%27%22"); + + // Unicode characters + ASSERT_EQ(Path::URLEncode("こんにちは"), "%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF"); + ASSERT_EQ(Path::URLEncode("über"), "%C3%BCber"); + + // Additional special characters + ASSERT_EQ(Path::URLEncode("=&?"), "%3D%26%3F"); + ASSERT_EQ(Path::URLEncode("\\|`"), "%5C%7C%60"); + ASSERT_EQ(Path::URLEncode("§±€"), "%C2%A7%C2%B1%E2%82%AC"); + ASSERT_EQ(Path::URLEncode("%20%2F%3F"), "%2520%252F%253F"); + ASSERT_EQ(Path::URLEncode("tab\tline\nreturn\r"), "tab%09line%0Areturn%0D"); + + // Mixed content + ASSERT_EQ(Path::URLEncode("path/to/my file.txt"), "path%2Fto%2Fmy%20file.txt"); + ASSERT_EQ(Path::URLEncode("user+name@example.com"), "user%2Bname%40example.com"); +} + +TEST(Path, URLDecode) +{ + // Basic cases + ASSERT_EQ(Path::URLDecode("hello%20world"), "hello world"); + ASSERT_EQ(Path::URLDecode(""), ""); + ASSERT_EQ(Path::URLDecode("abcABC123"), "abcABC123"); + + // Special characters + ASSERT_EQ(Path::URLDecode("%21%40%23%24%25%5E%26%2A%28%29"), "!@#$%^&*()"); + ASSERT_EQ(Path::URLDecode("%5B%5D%7B%7D%3C%3E"), "[]{}<>"); + ASSERT_EQ(Path::URLDecode("%2C%2F%3F%3B%3A%27%22"), ",/?;:'\""); + + // Additional special characters + ASSERT_EQ(Path::URLDecode("%3D%26%3F"), "=&?"); + ASSERT_EQ(Path::URLDecode("%5C%7C%60"), "\\|`"); + ASSERT_EQ(Path::URLDecode("%C2%A7%C2%B1%E2%82%AC"), "§±€"); + ASSERT_EQ(Path::URLDecode("%2520%252F%253F"), "%20%2F%3F"); + ASSERT_EQ(Path::URLDecode("tab%09line%0Areturn%0D"), "tab\tline\nreturn\r"); + + // Unicode characters + ASSERT_EQ(Path::URLDecode("%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF"), "こんにちは"); + ASSERT_EQ(Path::URLDecode("%C3%BCber"), "über"); + + // Mixed content + ASSERT_EQ(Path::URLDecode("path%2Fto%2Fmy%20file.txt"), "path/to/my file.txt"); + ASSERT_EQ(Path::URLDecode("user%2Bname%40example.com"), "user+name@example.com"); + + // Invalid decode cases - decoder should stop at first error + ASSERT_EQ(Path::URLDecode("hello%2G"), "hello"); // Invalid hex char 'G' + ASSERT_EQ(Path::URLDecode("test%"), "test"); // Incomplete escape sequence + ASSERT_EQ(Path::URLDecode("path%%20name"), "path"); // Invalid % followed by valid sequence + ASSERT_EQ(Path::URLDecode("abc%2"), "abc"); // Truncated escape sequence +} diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp index 57ffe3133..6aae65853 100644 --- a/src/common/file_system.cpp +++ b/src/common/file_system.cpp @@ -1067,7 +1067,7 @@ std::string Path::URLEncode(std::string_view str) { const char c = str[i]; if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '_' || - c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')') + c == '.' || c == '~') { ret.push_back(c); } @@ -1077,8 +1077,8 @@ std::string Path::URLEncode(std::string_view str) const unsigned char n1 = static_cast(c) >> 4; const unsigned char n2 = static_cast(c) & 0x0F; - ret.push_back((n1 >= 10) ? ('a' + (n1 - 10)) : ('0' + n1)); - ret.push_back((n2 >= 10) ? ('a' + (n2 - 10)) : ('0' + n2)); + ret.push_back((n1 >= 10) ? ('A' + (n1 - 10)) : ('0' + n1)); + ret.push_back((n2 >= 10) ? ('A' + (n2 - 10)) : ('0' + n2)); } } @@ -1090,34 +1090,29 @@ std::string Path::URLDecode(std::string_view str) std::string ret; ret.reserve(str.length()); - for (size_t i = 0, l = str.size(); i < l; i++) + for (size_t i = 0, l = str.size(); i < l;) { - const char c = str[i]; - if (c == '+') + const char c = str[i++]; + if (c == '%') { - ret.push_back(c); - } - else if (c == '%') - { - if ((i + 2) >= str.length()) + if ((i + 2) > str.length()) break; - const char clower = str[i + 1]; - const char cupper = str[i + 2]; - const unsigned char lower = - (clower >= '0' && clower <= '9') ? - static_cast(clower - '0') : - ((clower >= 'a' && clower <= 'f') ? - static_cast(clower - 'a') : - ((clower >= 'A' && clower <= 'F') ? static_cast(clower - 'A') : 0)); - const unsigned char upper = - (cupper >= '0' && cupper <= '9') ? - static_cast(cupper - '0') : - ((cupper >= 'a' && cupper <= 'f') ? - static_cast(cupper - 'a') : - ((cupper >= 'A' && cupper <= 'F') ? static_cast(cupper - 'A') : 0)); - const char dch = static_cast(lower | (upper << 4)); - ret.push_back(dch); + // return -1 which will be negative when or'ed with anything else, so it becomes invalid. + static constexpr auto to_nibble = [](char ch) -> int { + return (ch >= '0' && ch <= '9') ? + static_cast(ch - '0') : + ((ch >= 'a' && ch <= 'f') ? (static_cast(ch - 'a') + 0xa) : + ((ch >= 'A' && ch <= 'F') ? (static_cast(ch - 'A') + 0xa) : -1)); + }; + + const int upper = to_nibble(str[i++]); + const int lower = to_nibble(str[i++]); + const int dch = lower | (upper << 4); + if (dch < 0) + break; + + ret.push_back(static_cast(dch)); } else { @@ -1125,7 +1120,7 @@ std::string Path::URLDecode(std::string_view str) } } - return std::string(str); + return ret; } std::string Path::CreateFileURL(std::string_view path)