mirror of
https://github.com/stenzek/duckstation.git
synced 2025-06-06 19:45:33 +00:00
StringUtil: Add DecodeUTF16String() and BE variants
This commit is contained in:
parent
17dfb95d6a
commit
e3c0eed3df
@ -1,8 +1,9 @@
|
|||||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
|
||||||
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
|
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
|
||||||
|
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
#include "assert.h"
|
#include "assert.h"
|
||||||
|
#include "bitutils.h"
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
@ -20,6 +21,16 @@
|
|||||||
#include "windows_headers.h"
|
#include "windows_headers.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace StringUtil {
|
||||||
|
|
||||||
|
template<bool swap>
|
||||||
|
static size_t DecodeUTF16Impl(const void* bytes, size_t pos, size_t size, char32_t* ch);
|
||||||
|
|
||||||
|
template<bool swap>
|
||||||
|
static std::string DecodeUTF16StringImpl(const void* bytes, size_t size);
|
||||||
|
|
||||||
|
} // namespace StringUtil
|
||||||
|
|
||||||
bool StringUtil::WildcardMatch(const char* subject, const char* mask, bool case_sensitive /*= true*/)
|
bool StringUtil::WildcardMatch(const char* subject, const char* mask, bool case_sensitive /*= true*/)
|
||||||
{
|
{
|
||||||
if (case_sensitive)
|
if (case_sensitive)
|
||||||
@ -629,17 +640,20 @@ size_t StringUtil::EncodeAndAppendUTF16(void* utf16, size_t pos, size_t size, ch
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t StringUtil::DecodeUTF16(const void* bytes, size_t pos, size_t length, char32_t* ch)
|
template<bool swap>
|
||||||
|
size_t StringUtil::DecodeUTF16Impl(const void* bytes, size_t pos, size_t size, char32_t* ch)
|
||||||
{
|
{
|
||||||
const u8* const utf16_bytes = std::assume_aligned<sizeof(u16)>(static_cast<const u8*>(bytes)) + pos * sizeof(u16);
|
const u8* const utf16_bytes = std::assume_aligned<sizeof(u16)>(static_cast<const u8*>(bytes)) + pos * sizeof(u16);
|
||||||
|
|
||||||
u16 high;
|
u16 high;
|
||||||
std::memcpy(&high, utf16_bytes, sizeof(high));
|
std::memcpy(&high, utf16_bytes, sizeof(high));
|
||||||
|
if constexpr (swap)
|
||||||
|
high = ByteSwap(high);
|
||||||
|
|
||||||
// High surrogate?
|
// High surrogate?
|
||||||
if (high >= 0xD800 && high <= 0xDBFF) [[unlikely]]
|
if (high >= 0xD800 && high <= 0xDBFF) [[unlikely]]
|
||||||
{
|
{
|
||||||
if (length < 2) [[unlikely]]
|
if ((size - pos) < 2) [[unlikely]]
|
||||||
{
|
{
|
||||||
// Missing low surrogate.
|
// Missing low surrogate.
|
||||||
*ch = UNICODE_REPLACEMENT_CHARACTER;
|
*ch = UNICODE_REPLACEMENT_CHARACTER;
|
||||||
@ -648,6 +662,9 @@ size_t StringUtil::DecodeUTF16(const void* bytes, size_t pos, size_t length, cha
|
|||||||
|
|
||||||
u16 low;
|
u16 low;
|
||||||
std::memcpy(&low, utf16_bytes + sizeof(u16), sizeof(low));
|
std::memcpy(&low, utf16_bytes + sizeof(u16), sizeof(low));
|
||||||
|
if constexpr (swap)
|
||||||
|
low = ByteSwap(low);
|
||||||
|
|
||||||
if (low >= 0xDC00 && low <= 0xDFFF) [[likely]]
|
if (low >= 0xDC00 && low <= 0xDFFF) [[likely]]
|
||||||
{
|
{
|
||||||
*ch = static_cast<char32_t>(((static_cast<u32>(high) - 0xD800u) << 10) + ((static_cast<u32>(low) - 0xDC00)) +
|
*ch = static_cast<char32_t>(((static_cast<u32>(high) - 0xD800u) << 10) + ((static_cast<u32>(low) - 0xDC00)) +
|
||||||
@ -669,6 +686,44 @@ size_t StringUtil::DecodeUTF16(const void* bytes, size_t pos, size_t length, cha
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool swap>
|
||||||
|
std::string StringUtil::DecodeUTF16StringImpl(const void* bytes, size_t size)
|
||||||
|
{
|
||||||
|
std::string dest;
|
||||||
|
dest.reserve(size);
|
||||||
|
|
||||||
|
const size_t u16_size = size / 2;
|
||||||
|
for (size_t pos = 0; pos < u16_size;)
|
||||||
|
{
|
||||||
|
char32_t codepoint;
|
||||||
|
const size_t byte_len = DecodeUTF16Impl<swap>(bytes, pos, u16_size, &codepoint);
|
||||||
|
StringUtil::EncodeAndAppendUTF8(dest, codepoint);
|
||||||
|
pos += byte_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t StringUtil::DecodeUTF16(const void* bytes, size_t pos, size_t size, char32_t* codepoint)
|
||||||
|
{
|
||||||
|
return DecodeUTF16Impl<false>(bytes, pos, size, codepoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t StringUtil::DecodeUTF16BE(const void* bytes, size_t pos, size_t size, char32_t* codepoint)
|
||||||
|
{
|
||||||
|
return DecodeUTF16Impl<true>(bytes, pos, size, codepoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StringUtil::DecodeUTF16String(const void* bytes, size_t size)
|
||||||
|
{
|
||||||
|
return DecodeUTF16StringImpl<false>(bytes, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string StringUtil::DecodeUTF16BEString(const void* bytes, size_t size)
|
||||||
|
{
|
||||||
|
return DecodeUTF16StringImpl<true>(bytes, size);
|
||||||
|
}
|
||||||
|
|
||||||
std::string StringUtil::Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis /*= "..."*/)
|
std::string StringUtil::Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis /*= "..."*/)
|
||||||
{
|
{
|
||||||
std::string ret;
|
std::string ret;
|
||||||
|
@ -461,8 +461,13 @@ size_t DecodeUTF8(const std::string& str, size_t offset, char32_t* ch);
|
|||||||
size_t EncodeAndAppendUTF16(void* utf16, size_t pos, size_t size, char32_t codepoint);
|
size_t EncodeAndAppendUTF16(void* utf16, size_t pos, size_t size, char32_t codepoint);
|
||||||
|
|
||||||
/// Decodes UTF-16 to a single unicode codepoint.
|
/// Decodes UTF-16 to a single unicode codepoint.
|
||||||
/// Returns the number of bytes the codepoint took in the original string.
|
/// Returns the number of 16-bit units the codepoint took in the original string.
|
||||||
size_t DecodeUTF16(const void* bytes, size_t pos, size_t size, char32_t* codepoint);
|
size_t DecodeUTF16(const void* bytes, size_t pos, size_t size, char32_t* codepoint);
|
||||||
|
size_t DecodeUTF16BE(const void* bytes, size_t pos, size_t size, char32_t* codepoint);
|
||||||
|
|
||||||
|
/// Decodes a UTF-16 string to a UTF-8 string.
|
||||||
|
std::string DecodeUTF16String(const void* bytes, size_t size);
|
||||||
|
std::string DecodeUTF16BEString(const void* bytes, size_t size);
|
||||||
|
|
||||||
// Replaces the end of a string with ellipsis if it exceeds the specified length.
|
// Replaces the end of a string with ellipsis if it exceeds the specified length.
|
||||||
std::string Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis = "...");
|
std::string Ellipsise(const std::string_view str, u32 max_length, const char* ellipsis = "...");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user