Merge pull request #18682 from hrydgard/string-optimizations

More string_view optimizations
This commit is contained in:
Henrik Rydgård 2024-01-12 15:03:46 +01:00 committed by GitHub
commit 83999b854b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 89 additions and 128 deletions

View File

@ -206,27 +206,15 @@ int u8_charnum(const char *s, int offset)
return charnum;
}
/* number of characters */
int u8_strlen(const char *s)
{
int count = 0;
int i = 0;
while (u8_nextchar(s, &i) != 0)
count++;
return count;
}
/* reads the next utf-8 sequence out of a string, updating an index */
uint32_t u8_nextchar(const char *s, int *index) {
uint32_t u8_nextchar(const char *s, int *index, size_t size) {
uint32_t ch = 0;
int sz = 0;
int i = *index;
do {
ch = (ch << 6) + (unsigned char)s[i++];
sz++;
} while (s[i] && ((s[i]) & 0xC0) == 0x80);
} while (i < size && ((s[i]) & 0xC0) == 0x80);
*index = i;
return ch - offsetsFromUTF8[sz - 1];
}
@ -234,7 +222,6 @@ uint32_t u8_nextchar(const char *s, int *index) {
uint32_t u8_nextchar_unsafe(const char *s, int *i) {
uint32_t ch = (unsigned char)s[(*i)++];
int sz = 1;
if (ch >= 0xF0) {
sz++;
ch &= ~0x10;
@ -253,7 +240,6 @@ uint32_t u8_nextchar_unsafe(const char *s, int *i) {
ch <<= 6;
ch += ((unsigned char)s[(*i)++]) & 0x3F;
}
return ch;
}
@ -367,48 +353,6 @@ int u8_unescape(char *buf, int sz, char *src)
return c;
}
const char *u8_strchr(const char *s, uint32_t ch, int *charn)
{
int i = 0, lasti=0;
uint32_t c;
*charn = 0;
while (s[i]) {
c = u8_nextchar(s, &i);
if (c == ch) {
return &s[lasti];
}
lasti = i;
(*charn)++;
}
return NULL;
}
const char *u8_memchr(const char *s, uint32_t ch, size_t sz, int *charn)
{
size_t i = 0, lasti=0;
uint32_t c;
int csz;
*charn = 0;
while (i < sz) {
c = csz = 0;
do {
c <<= 6;
c += (unsigned char)s[i++];
csz++;
} while (i < sz && !isutf(s[i]));
c -= offsetsFromUTF8[csz-1];
if (c == ch) {
return &s[lasti];
}
lasti = i;
(*charn)++;
}
return NULL;
}
int u8_is_locale_utf8(const char *locale)
{
/* this code based on libutf8 */
@ -428,10 +372,12 @@ int u8_is_locale_utf8(const char *locale)
return 0;
}
bool AnyEmojiInString(const char *s, size_t byteCount) {
UTF8::UTF8(const char *c) : c_(c), size_((int)strlen(c)), index_(0) {}
bool AnyEmojiInString(std::string_view str, size_t byteCount) {
int i = 0;
while (i < byteCount) {
uint32_t c = u8_nextchar(s, &i);
uint32_t c = u8_nextchar(str.data(), &i, str.size());
if (CodepointIsProbablyEmoji(c)) {
return true;
}
@ -439,7 +385,7 @@ bool AnyEmojiInString(const char *s, size_t byteCount) {
return false;
}
int UTF8StringNonASCIICount(const char *utf8string) {
int UTF8StringNonASCIICount(std::string_view utf8string) {
UTF8 utf(utf8string);
int count = 0;
while (!utf.end()) {
@ -450,7 +396,7 @@ int UTF8StringNonASCIICount(const char *utf8string) {
return count;
}
bool UTF8StringHasNonASCII(const char *utf8string) {
bool UTF8StringHasNonASCII(std::string_view utf8string) {
return UTF8StringNonASCIICount(utf8string) > 0;
}
@ -478,25 +424,21 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
return s;
}
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source) {
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source) {
int len = (int)source.size();
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, dest, std::min((int)destSize, size));
destSize -= 1; // account for the \0.
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source.data(), len, dest, std::min((int)destSize, size));
dest[size] = 0;
}
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source) {
int len = (int)strlen(source) + 1; // include trailing zero
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source, len, NULL, 0);
MultiByteToWideChar(CP_UTF8, 0, source, len, dest, std::min((int)destSize, size));
}
std::wstring ConvertUTF8ToWString(const std::string &source) {
std::wstring ConvertUTF8ToWString(const std::string_view source) {
int len = (int)source.size();
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, NULL, 0);
int size = (int)MultiByteToWideChar(CP_UTF8, 0, source.data(), len, NULL, 0);
std::wstring str;
str.resize(size);
if (size > 0) {
MultiByteToWideChar(CP_UTF8, 0, source.c_str(), len, &str[0], size);
MultiByteToWideChar(CP_UTF8, 0, source.data(), source.size(), &str[0], size);
}
return str;
}
@ -517,8 +459,8 @@ std::string ConvertUCS2ToUTF8(const std::u16string &wstr) {
return s;
}
std::string SanitizeUTF8(const std::string &utf8string) {
UTF8 utf(utf8string.c_str());
std::string SanitizeUTF8(std::string_view utf8string) {
UTF8 utf(utf8string);
std::string s;
// Worst case.
s.resize(utf8string.size() * 4);
@ -533,11 +475,11 @@ std::string SanitizeUTF8(const std::string &utf8string) {
return s;
}
static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const std::string &source) {
static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, std::string_view source) {
const char16_t *const orig = dest;
const char16_t *const destEnd = dest + destSize;
UTF8 utf(source.c_str());
UTF8 utf(source);
char16_t *destw = (char16_t *)dest;
const char16_t *const destwEnd = destw + destSize;
@ -550,7 +492,7 @@ static size_t ConvertUTF8ToUCS2Internal(char16_t *dest, size_t destSize, const s
destw += UTF16LE::encodeUCS2(destw, c);
}
// No ++ to not count the terminal in length.
// No ++ to not count the null-terminator in length.
if (destw < destEnd) {
*destw = 0;
}
@ -562,11 +504,11 @@ void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &sourc
ConvertUTF8ToUCS2Internal(dest, destSize, source);
}
std::u16string ConvertUTF8ToUCS2(const std::string &source) {
std::u16string ConvertUTF8ToUCS2(std::string_view source) {
std::u16string dst;
// utf-8 won't be less bytes than there are characters. But need +1 for terminator.
dst.resize(source.size() + 1, 0);
size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size() + 1, source);
// utf-8 won't be less bytes than there are characters.
dst.resize(source.size(), 0);
size_t realLen = ConvertUTF8ToUCS2Internal(&dst[0], source.size(), source);
dst.resize(realLen);
return dst;
}
@ -595,11 +537,11 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr) {
return s;
}
static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const std::string &source) {
static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, std::string_view source) {
const wchar_t *const orig = dest;
const wchar_t *const destEnd = dest + destSize;
UTF8 utf(source.c_str());
UTF8 utf(source);
if (sizeof(wchar_t) == 2) {
char16_t *destw = (char16_t *)dest;
@ -628,12 +570,13 @@ static size_t ConvertUTF8ToWStringInternal(wchar_t *dest, size_t destSize, const
return dest - orig;
}
std::wstring ConvertUTF8ToWString(const std::string &source) {
std::wstring ConvertUTF8ToWString(std::string_view source) {
std::wstring dst;
// utf-8 won't be less bytes than there are characters. But need +1 for terminator.
dst.resize(source.size() + 1, 0);
size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size() + 1, source);
// utf-8 won't be less bytes than there are characters.
dst.resize(source.size(), 0);
size_t realLen = ConvertUTF8ToWStringInternal(&dst[0], source.size(), source);
dst.resize(realLen);
dst[realLen] = 0;
return dst;
}

View File

@ -18,11 +18,11 @@
#include <cstdint>
#include <string>
#include <string_view>
uint32_t u8_nextchar(const char *s, int *i);
uint32_t u8_nextchar(const char *s, int *i, size_t size);
uint32_t u8_nextchar_unsafe(const char *s, int *i);
int u8_wc_toutf8(char *dest, uint32_t ch);
int u8_strlen(const char *s);
void u8_inc(const char *s, int *i);
void u8_dec(const char *s, int *i);
@ -33,21 +33,23 @@ inline bool CodepointIsProbablyEmoji(uint32_t c) {
return c > 0xFFFF;
}
bool AnyEmojiInString(const char *s, size_t byteCount);
bool AnyEmojiInString(std::string_view str, size_t byteCount);
class UTF8 {
public:
static const uint32_t INVALID = (uint32_t)-1;
UTF8(const char *c) : c_(c), index_(0) {}
UTF8(const char *c, int index) : c_(c), index_(index) {}
bool end() const { return c_[index_] == 0; }
// TODO: Try to get rid of this constructor.
explicit UTF8(const char *c);
explicit UTF8(std::string_view view) : c_(view.data()), size_((int)view.size()), index_(0) {}
explicit UTF8(std::string_view view, int index) : c_(view.data()), size_((int)view.size()), index_(index) {}
bool end() const { return index_ == size_; }
// Returns true if the next character is outside BMP and Planes 1 - 16.
bool invalid() const {
unsigned char c = (unsigned char)c_[index_];
return (c >= 0x80 && c <= 0xC1) || c >= 0xF5;
}
uint32_t next() {
return u8_nextchar(c_, &index_);
return u8_nextchar(c_, &index_, size_);
}
// Allow invalid continuation bytes.
uint32_t next_unsafe() {
@ -55,7 +57,7 @@ public:
}
uint32_t peek() const {
int tempIndex = index_;
return u8_nextchar(c_, &tempIndex);
return u8_nextchar(c_, &tempIndex, size_);
}
void fwd() {
u8_inc(c_, &index_);
@ -64,7 +66,7 @@ public:
u8_dec(c_, &index_);
}
int length() const {
return u8_strlen(c_);
return size_;
}
int byteIndex() const {
return index_;
@ -88,16 +90,16 @@ public:
private:
const char *c_;
int index_;
int size_;
};
int UTF8StringNonASCIICount(const char *utf8string);
int UTF8StringNonASCIICount(std::string_view utf8string);
bool UTF8StringHasNonASCII(const char *utf8string);
bool UTF8StringHasNonASCII(std::string_view utf8string);
// Removes overlong encodings and similar.
std::string SanitizeUTF8(const std::string &utf8string);
std::string SanitizeUTF8(std::string_view utf8string);
std::string CodepointToUTF8(uint32_t codePoint);
@ -107,14 +109,13 @@ std::string CodepointToUTF8(uint32_t codePoint);
std::string ConvertWStringToUTF8(const std::wstring &wstr);
std::string ConvertWStringToUTF8(const wchar_t *wstr);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const char *source);
std::wstring ConvertUTF8ToWString(const std::string &source);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source);
std::wstring ConvertUTF8ToWString(std::string_view source);
#else
// Used by SymbolMap/assembler
std::wstring ConvertUTF8ToWString(const std::string &source);
std::wstring ConvertUTF8ToWString(std::string_view source);
std::string ConvertWStringToUTF8(const std::wstring &wstr);
#endif
@ -122,5 +123,5 @@ std::string ConvertWStringToUTF8(const std::wstring &wstr);
std::string ConvertUCS2ToUTF8(const std::u16string &wstr);
// Dest size in units, not bytes.
void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, const std::string &source);
std::u16string ConvertUTF8ToUCS2(const std::string &source);
void ConvertUTF8ToUCS2(char16_t *dest, size_t destSize, std::string_view source);
std::u16string ConvertUTF8ToUCS2(std::string_view source);

View File

@ -215,24 +215,34 @@ const ParsedIniLine *Section::GetLine(const char* key) const {
}
void Section::Set(const char* key, uint32_t newValue) {
Set(key, StringFromFormat("0x%08x", newValue).c_str());
char temp[128];
snprintf(temp, sizeof(temp), "0x%08x", newValue);
Set(key, (const char *)temp);
}
void Section::Set(const char* key, uint64_t newValue) {
Set(key, StringFromFormat("0x%016" PRIx64, newValue).c_str());
char temp[128];
snprintf(temp, sizeof(temp), "0x%016" PRIx64, newValue);
Set(key, (const char *)temp);
}
void Section::Set(const char* key, float newValue) {
_dbg_assert_(!my_isnanorinf(newValue));
Set(key, StringFromFormat("%f", newValue).c_str());
char temp[128];
snprintf(temp, sizeof(temp), "%f", newValue);
Set(key, (const char *)temp);
}
void Section::Set(const char* key, double newValue) {
Set(key, StringFromFormat("%f", newValue).c_str());
char temp[128];
snprintf(temp, sizeof(temp), "%f", newValue);
Set(key, (const char *)temp);
}
void Section::Set(const char* key, int newValue) {
Set(key, StringFromInt(newValue).c_str());
char temp[128];
snprintf(temp, sizeof(temp), "%d", newValue);
Set(key, (const char *)temp);
}
void Section::Set(const char* key, const char* newValue) {

View File

@ -215,10 +215,12 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
}
// Find the first file in the directory.
WIN32_FIND_DATA ffd;
std::wstring wpath = directory.ToWString();
wpath += L"\\*";
#if PPSSPP_PLATFORM(UWP)
HANDLE hFind = FindFirstFileExFromAppW((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
HANDLE hFind = FindFirstFileExFromAppW(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
#else
HANDLE hFind = FindFirstFileEx((directory.ToWString() + L"\\*").c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
HANDLE hFind = FindFirstFileEx(wpath.c_str(), FindExInfoStandard, &ffd, FindExSearchNameMatch, NULL, 0);
#endif
if (hFind == INVALID_HANDLE_VALUE) {
#if PPSSPP_PLATFORM(UWP)

View File

@ -136,7 +136,7 @@ void TextDrawerWin32::MeasureString(const char *str, size_t len, float *w, float
std::string toMeasure = ReplaceAll(std::string(str, len), "&&", "&");
std::vector<std::string> lines;
std::vector<std::string_view> lines;
SplitString(toMeasure, '\n', lines);
int extW = 0, extH = 0;
@ -177,20 +177,20 @@ void TextDrawerWin32::MeasureStringRect(const char *str, size_t len, const Bound
TEXTMETRIC metrics{};
GetTextMetrics(ctx_->hDC, &metrics);
std::vector<std::string> lines;
std::vector<std::string_view> lines;
SplitString(toMeasure, '\n', lines);
int total_w = 0;
int total_h = 0;
CacheKey key{ "", fontHash_};
for (size_t i = 0; i < lines.size(); i++) {
CacheKey key{ lines[i], fontHash_ };
key.text = lines[i];
TextMeasureEntry *entry;
auto iter = sizeCache_.find(key);
if (iter != sizeCache_.end()) {
entry = iter->second.get();
} else {
SIZE size;
std::wstring wstr = ConvertUTF8ToWString(lines[i].length() == 0 ? " " : ReplaceAll(lines[i], "&&", "&"));
std::wstring wstr = ConvertUTF8ToWString(lines[i].empty() ? " " : ReplaceAll(lines[i], "&&", "&"));
GetTextExtentPoint32(ctx_->hDC, wstr.c_str(), (int)wstr.size(), &size);
entry = new TextMeasureEntry();

View File

@ -284,6 +284,7 @@ std::string_view StripQuotes(std::string_view s) {
return s;
}
// NOTE: str must live at least as long as all uses of output.
void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output) {
size_t next = 0;
for (size_t pos = 0, len = str.length(); pos < len; ++pos) {

View File

@ -81,8 +81,9 @@ std::string StripQuotes(const std::string &s);
std::string_view StripSpaces(std::string_view s);
std::string_view StripQuotes(std::string_view s);
// TODO: Make this a lot more efficient by outputting string_views.
// NOTE: str must live at least as long as all uses of output.
void SplitString(std::string_view str, const char delim, std::vector<std::string_view> &output);
// Try to avoid this when possible, in favor of the string_view version.
void SplitString(std::string_view str, const char delim, std::vector<std::string> &output);
void GetQuotedStrings(const std::string& str, std::vector<std::string>& output);

View File

@ -658,7 +658,7 @@ bool MainUI::event(QEvent *e) {
default:
if (str.size()) {
int pos = 0;
int unicode = u8_nextchar(str.c_str(), &pos);
int unicode = u8_nextchar(str.c_str(), &pos, str.size());
NativeKey(KeyInput(DEVICE_ID_KEYBOARD, unicode));
}
break;

View File

@ -867,7 +867,7 @@ static void ProcessSDLEvent(SDL_Window *window, const SDL_Event &event, InputSta
case SDL_TEXTINPUT:
{
int pos = 0;
int c = u8_nextchar(event.text.text, &pos);
int c = u8_nextchar(event.text.text, &pos, strlen(event.text.text));
KeyInput key;
key.flags = KEY_CHAR;
key.unicodeChar = c;

View File

@ -34,8 +34,8 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
@ -76,6 +76,7 @@
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -97,6 +98,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../../../;../../ext;../../;../../../../ext;../prebuilt;../../../../ext/zstd/lib</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

View File

@ -34,15 +34,15 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
@ -78,6 +78,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../../../;../../ext;../../;../../../../ext;../prebuilt</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -98,6 +99,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../../../;../../ext;../../;../../../../ext;../prebuilt</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

@ -1 +1 @@
Subproject commit 64fcb9ea3cf990e65343057ace9271ff3b77428e
Subproject commit 68c70da18f7ff814e9e424fd04ad5f294e307018

View File

@ -24,7 +24,6 @@ set(ALL_SOURCE_FILES
${SRC_DIR}/rc_compat.h
${SRC_DIR}/rc_compat.h
${SRC_DIR}/rc_compat.c
${SRC_DIR}/rc_util.h
${SRC_DIR}/rc_util.c
${SRC_DIR}/rc_client.c
${SRC_DIR}/rc_client_internal.h