diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h index c3628cc6b7..482ae8cf59 100644 --- a/include/SDL3/SDL_stdinc.h +++ b/include/SDL3/SDL_stdinc.h @@ -1309,6 +1309,35 @@ extern SDL_DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *st */ extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen); +/** + * Convert a single Unicode codepoint to UTF-8. + * + * The buffer pointed to by `dst` must be at least 4 bytes long, as this + * function may generate between 1 and 4 bytes of output. + * + * This function returns the first byte _after_ the newly-written UTF-8 + * sequence, which is useful for encoding multiple codepoints in a loop, or + * knowing where to write a NULL-terminator character to end the string (in + * either case, plan to have a buffer of _more_ than 4 bytes!). + * + * If `codepoint` is an invalid value (outside the Unicode range, or a UTF-16 + * surrogate value, etc), this will use U+FFFD (REPLACEMENT CHARACTER) for + * the codepoint instead, and not set an error. + * + * If `dst` is NULL, this returns NULL immediately without writing to the + * pointer and without setting an error. + * + * \param codepoint a Unicode codepoint to convert to UTF-8. + * \param dst the location to write the encoded UTF-8. Must point to at least 4 bytes! + * \returns the first byte past the newly-written UTF-8 sequence. + * + * \threadsafety It is safe to call this function from any thread. + * + * \since This function is available since SDL 3.0.0. + */ +extern SDL_DECLSPEC char * SDLCALL SDL_UCS4ToUTF8(Uint32 codepoint, char *dst); + + extern SDL_DECLSPEC int SDLCALL SDL_sscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, ...) SDL_SCANF_VARARG_FUNC(2); extern SDL_DECLSPEC int SDLCALL SDL_vsscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, va_list ap) SDL_SCANF_VARARG_FUNCV(2); extern SDL_DECLSPEC int SDLCALL SDL_snprintf(SDL_OUT_Z_CAP(maxlen) char *text, size_t maxlen, SDL_PRINTF_FORMAT_STRING const char *fmt, ... ) SDL_PRINTF_VARARG_FUNC(3); diff --git a/src/SDL_utils.c b/src/SDL_utils.c index 0aaa3dc391..222cf49a3e 100644 --- a/src/SDL_utils.c +++ b/src/SDL_utils.c @@ -101,31 +101,6 @@ SDL_bool SDL_endswith(const char *string, const char *suffix) return SDL_FALSE; } -char *SDL_UCS4ToUTF8(Uint32 ch, char *dst) -{ - Uint8 *p = (Uint8 *)dst; - if (ch <= 0x7F) { - *p = (Uint8)ch; - ++dst; - } else if (ch <= 0x7FF) { - p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); - p[1] = 0x80 | (Uint8)(ch & 0x3F); - dst += 2; - } else if (ch <= 0xFFFF) { - p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); - p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); - p[2] = 0x80 | (Uint8)(ch & 0x3F); - dst += 3; - } else { - p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); - p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); - p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); - p[3] = 0x80 | (Uint8)(ch & 0x3F); - dst += 4; - } - return dst; -} - /* Assume we can wrap SDL_AtomicInt values and cast to Uint32 */ SDL_COMPILE_TIME_ASSERT(sizeof_object_id, sizeof(int) == sizeof(Uint32)); diff --git a/src/SDL_utils_c.h b/src/SDL_utils_c.h index fb83c30be9..35d8543e8a 100644 --- a/src/SDL_utils_c.h +++ b/src/SDL_utils_c.h @@ -32,8 +32,6 @@ extern void SDL_CalculateFraction(float x, int *numerator, int *denominator); extern SDL_bool SDL_endswith(const char *string, const char *suffix); -extern char *SDL_UCS4ToUTF8(Uint32 ch, char *dst); - typedef enum { SDL_OBJECT_TYPE_UNKNOWN, diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index 1f5f8ca542..284d1bda56 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -811,6 +811,7 @@ SDL3_0.0.0 { SDL_TryLockRWLockForWriting; SDL_TryLockSpinlock; SDL_TryWaitSemaphore; + SDL_UCS4ToUTF8; SDL_UnbindAudioStream; SDL_UnbindAudioStreams; SDL_UnloadObject; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index 36fec9b648..701d5fe329 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -836,6 +836,7 @@ #define SDL_TryLockRWLockForWriting SDL_TryLockRWLockForWriting_REAL #define SDL_TryLockSpinlock SDL_TryLockSpinlock_REAL #define SDL_TryWaitSemaphore SDL_TryWaitSemaphore_REAL +#define SDL_UCS4ToUTF8 SDL_UCS4ToUTF8_REAL #define SDL_UnbindAudioStream SDL_UnbindAudioStream_REAL #define SDL_UnbindAudioStreams SDL_UnbindAudioStreams_REAL #define SDL_UnloadObject SDL_UnloadObject_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 16c54b3658..75fd300e19 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -846,6 +846,7 @@ SDL_DYNAPI_PROC(int,SDL_TryLockRWLockForReading,(SDL_RWLock *a),(a),return) SDL_DYNAPI_PROC(int,SDL_TryLockRWLockForWriting,(SDL_RWLock *a),(a),return) SDL_DYNAPI_PROC(SDL_bool,SDL_TryLockSpinlock,(SDL_SpinLock *a),(a),return) SDL_DYNAPI_PROC(int,SDL_TryWaitSemaphore,(SDL_Semaphore *a),(a),return) +SDL_DYNAPI_PROC(char*,SDL_UCS4ToUTF8,(Uint32 a, char *b),(a,b),return) SDL_DYNAPI_PROC(void,SDL_UnbindAudioStream,(SDL_AudioStream *a),(a),) SDL_DYNAPI_PROC(void,SDL_UnbindAudioStreams,(SDL_AudioStream **a, int b),(a,b),) SDL_DYNAPI_PROC(void,SDL_UnloadObject,(void *a),(a),) diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c index 554a746ff4..cd6cd1868c 100644 --- a/src/stdlib/SDL_string.c +++ b/src/stdlib/SDL_string.c @@ -42,6 +42,42 @@ SDL_COMPILE_TIME_ASSERT(sizeof_wchar_t, sizeof(wchar_t) == SDL_SIZEOF_WCHAR_T); +char *SDL_UCS4ToUTF8(Uint32 codepoint, char *dst) +{ + if (!dst) { + return NULL; // I guess...? + } else if (codepoint > 0x10FFFF) { // Outside the range of Unicode codepoints (also, larger than can be encoded in 4 bytes of UTF-8!). + codepoint = SDL_INVALID_UNICODE_CODEPOINT; + } else if ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) { // UTF-16 surrogate values are illegal in UTF-8. + codepoint = SDL_INVALID_UNICODE_CODEPOINT; + } + + Uint8 *p = (Uint8 *)dst; + if (codepoint <= 0x7F) { + *p = (Uint8)codepoint; + ++dst; + } else if (codepoint <= 0x7FF) { + p[0] = 0xC0 | (Uint8)((codepoint >> 6) & 0x1F); + p[1] = 0x80 | (Uint8)(codepoint & 0x3F); + dst += 2; + } else if (codepoint <= 0xFFFF) { + p[0] = 0xE0 | (Uint8)((codepoint >> 12) & 0x0F); + p[1] = 0x80 | (Uint8)((codepoint >> 6) & 0x3F); + p[2] = 0x80 | (Uint8)(codepoint & 0x3F); + dst += 3; + } else { + SDL_assert(codepoint <= 0x10FFFF); + p[0] = 0xF0 | (Uint8)((codepoint >> 18) & 0x07); + p[1] = 0x80 | (Uint8)((codepoint >> 12) & 0x3F); + p[2] = 0x80 | (Uint8)((codepoint >> 6) & 0x3F); + p[3] = 0x80 | (Uint8)(codepoint & 0x3F); + dst += 4; + } + + return dst; +} + + // this expects `from` and `to` to be UTF-32 encoding! int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to) { diff --git a/src/video/emscripten/SDL_emscriptenevents.c b/src/video/emscripten/SDL_emscriptenevents.c index f531e47b54..74fc3f1568 100644 --- a/src/video/emscripten/SDL_emscriptenevents.c +++ b/src/video/emscripten/SDL_emscriptenevents.c @@ -543,9 +543,9 @@ static EM_BOOL Emscripten_HandleKeyPress(int eventType, const EmscriptenKeyboard if (SDL_TextInputActive(window_data->window)) { char text[5]; - if (SDL_UCS4ToUTF8(keyEvent->charCode, text)) { - SDL_SendKeyboardText(text); - } + char *end = SDL_UCS4ToUTF8(keyEvent->charCode, text); + *end = '\0'; + SDL_SendKeyboardText(text); return EM_TRUE; } return EM_FALSE; diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c index 8e6485b3cb..32a3507617 100644 --- a/src/video/windows/SDL_windowsevents.c +++ b/src/video/windows/SDL_windowsevents.c @@ -1286,9 +1286,9 @@ LRESULT CALLBACK WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lPara } else { if (SDL_TextInputActive(data->window)) { char text[5]; - if (SDL_UCS4ToUTF8((Uint32)wParam, text) != text) { - SDL_SendKeyboardText(text); - } + char *end = SDL_UCS4ToUTF8((Uint32)wParam, text); + *end = '\0'; + SDL_SendKeyboardText(text); } returnCode = 0; }