From f8eac3027669b1284c79600c5f9b96b2b92c96a7 Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Wed, 9 Oct 2024 09:43:23 -0700 Subject: [PATCH] Added SDL_StepBackUTF8() --- include/SDL3/SDL_stdinc.h | 32 ++++++++++++++++++++++++++++++- src/dynapi/SDL_dynapi.sym | 1 + src/dynapi/SDL_dynapi_overrides.h | 1 + src/dynapi/SDL_dynapi_procs.h | 1 + src/stdlib/SDL_string.c | 20 +++++++++++++++++++ 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h index b443101369..1e21f8b82a 100644 --- a/include/SDL3/SDL_stdinc.h +++ b/include/SDL3/SDL_stdinc.h @@ -2472,13 +2472,14 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea /** * The Unicode REPLACEMENT CHARACTER codepoint. * - * SDL_StepUTF8() reports this codepoint when it encounters a UTF-8 string + * SDL_StepUTF8() and SDL_StepBackUTF8() report this codepoint when they encounter a UTF-8 string * with encoding errors. * * This tends to render as something like a question mark in most places. * * \since This macro is available since SDL 3.0.0. * + * \sa SDL_StepBackUTF8 * \sa SDL_StepUTF8 */ #define SDL_INVALID_UNICODE_CODEPOINT 0xFFFD @@ -2528,6 +2529,35 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea */ extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen); +/** + * Decode a UTF-8 string in reverse, one Unicode codepoint at a time. + * + * This will go to the start of the previous Unicode codepoint in the string, move `*pstr` to that location and return that codepoint. + * + * If the resulting codepoint is zero (already at the start of the string), it will not advance `*pstr` at all. + * + * Generally this function is called in a loop until it returns zero, + * adjusting its parameter each iteration. + * + * If an invalid UTF-8 sequence is encountered, this function returns + * SDL_INVALID_UNICODE_CODEPOINT. + * + * Several things can generate invalid UTF-8 sequences, including overlong + * encodings, the use of UTF-16 surrogate values, and truncated data. Please + * refer to + * [RFC3629](https://www.ietf.org/rfc/rfc3629.txt) + * for details. + * + * \param start a pointer to the beginning of the UTF-8 string. + * \param pstr a pointer to a UTF-8 string pointer to be read and adjusted. + * \returns the previous Unicode codepoint in the string. + * + * \threadsafety It is safe to call this function from any thread. + * + * \since This function is available since SDL 3.0.0. + */ +extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepBackUTF8(const char *start, const char **pstr); + /** * Convert a single Unicode codepoint to UTF-8. * diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index ab7704b53d..c20dff93ff 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -1176,6 +1176,7 @@ SDL3_0.0.0 { SDL_wcsnstr; SDL_wcsstr; SDL_wcstol; + SDL_StepBackUTF8; # extra symbols go here (don't modify this line) local: *; }; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index ab9ac53528..556ac2cb74 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -1201,3 +1201,4 @@ #define SDL_wcsnstr SDL_wcsnstr_REAL #define SDL_wcsstr SDL_wcsstr_REAL #define SDL_wcstol SDL_wcstol_REAL +#define SDL_StepBackUTF8 SDL_StepBackUTF8_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 2c1ef580ca..0486ce96aa 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1207,3 +1207,4 @@ SDL_DYNAPI_PROC(size_t,SDL_wcsnlen,(const wchar_t *a, size_t b),(a,b),return) SDL_DYNAPI_PROC(wchar_t*,SDL_wcsnstr,(const wchar_t *a, const wchar_t *b, size_t c),(a,b,c),return) SDL_DYNAPI_PROC(wchar_t*,SDL_wcsstr,(const wchar_t *a, const wchar_t *b),(a,b),return) SDL_DYNAPI_PROC(long,SDL_wcstol,(const wchar_t *a, wchar_t **b, int c),(a,b,c),return) +SDL_DYNAPI_PROC(Uint32,SDL_StepBackUTF8,(const char *a, const char **b),(a,b),return) diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c index 03ed3c13ac..ec6b5e703e 100644 --- a/src/stdlib/SDL_string.c +++ b/src/stdlib/SDL_string.c @@ -265,6 +265,26 @@ Uint32 SDL_StepUTF8(const char **pstr, size_t *pslen) return result; } +Uint32 SDL_StepBackUTF8(const char *start, const char **pstr) +{ + if (!pstr || *pstr <= start) { + return 0; + } + + // Step back over the previous UTF-8 character + const char *str = *pstr; + do { + if (str == start) { + break; + } + --str; + } while ((*str & 0xC0) == 0x80); + + size_t length = (*pstr - str); + *pstr = str; + return StepUTF8(&str, length); +} + #if (SDL_SIZEOF_WCHAR_T == 2) static Uint32 StepUTF16(const Uint16 **_str, const size_t slen) {