From c6c1e673c0a861ae0524c15106ea7be3967dd8b6 Mon Sep 17 00:00:00 2001 From: Brick <6098371+0x1F9F1@users.noreply.github.com> Date: Sun, 13 Aug 2023 14:36:48 +0100 Subject: [PATCH] Optimized SDL_Convert_*_to_*_Scalar They are now all branch-less and avoid float multiplication/conversion where possible --- src/audio/SDL_audiotypecvt.c | 183 ++++++++++++++++++++++++++++------- 1 file changed, 146 insertions(+), 37 deletions(-) diff --git a/src/audio/SDL_audiotypecvt.c b/src/audio/SDL_audiotypecvt.c index 74377a2008..c70c298c93 100644 --- a/src/audio/SDL_audiotypecvt.c +++ b/src/audio/SDL_audiotypecvt.c @@ -39,50 +39,155 @@ #define NEED_SCALAR_CONVERTER_FALLBACKS 1 #endif -#define DIVBY128 0.0078125f -#define DIVBY32768 0.000030517578125f -#define DIVBY8388607 0.00000011920930376163766f - #if NEED_SCALAR_CONVERTER_FALLBACKS -/* these all convert backwards because (currently) float32 is >= to the size of anything it converts to, so it lets us safely convert in-place. */ -#define AUDIOCVT_TOFLOAT_SCALAR(from, fromtype, equation) \ - static void SDL_Convert_##from##_to_F32_Scalar(float *dst, const fromtype *src, int num_samples) { \ - int i; \ - LOG_DEBUG_AUDIO_CONVERT(#from, "F32"); \ - for (i = num_samples - 1; i >= 0; --i) { \ - dst[i] = equation; \ - } \ +/* This code requires that floats are in the IEEE-754 binary32 format */ +SDL_COMPILE_TIME_ASSERT(float_bits, sizeof(float) == sizeof(Uint32)); + +union float_bits { + Uint32 u32; + float f32; +}; + +static void SDL_Convert_S8_to_F32_Scalar(float *dst, const Sint8 *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("S8", "F32"); + + for (i = num_samples - 1; i >= 0; --i) { + /* 1) Construct a float in the range [65536.0, 65538.0) + * 2) Shift the float range to [-1.0, 1.0) */ + union float_bits x = { .u32 = (Uint8)src[i] ^ 0x47800080u }; + dst[i] = x.f32 - 65537.0f; } +} -AUDIOCVT_TOFLOAT_SCALAR(S8, Sint8, ((float)src[i]) * DIVBY128) -AUDIOCVT_TOFLOAT_SCALAR(U8, Uint8, (((float)src[i]) * DIVBY128) - 1.0f) -AUDIOCVT_TOFLOAT_SCALAR(S16, Sint16, ((float)src[i]) * DIVBY32768) -AUDIOCVT_TOFLOAT_SCALAR(S32, Sint32, ((float)(src[i] >> 8)) * DIVBY8388607) -#undef AUDIOCVT_FROMFLOAT_SCALAR +static void SDL_Convert_U8_to_F32_Scalar(float *dst, const Uint8 *src, int num_samples) +{ + int i; -/* these all convert forwards because (currently) float32 is >= to the size of anything it converts from, so it lets us safely convert in-place. */ -#define AUDIOCVT_FROMFLOAT_SCALAR(to, totype, clampmin, clampmax, equation) \ - static void SDL_Convert_F32_to_##to##_Scalar(totype *dst, const float *src, int num_samples) { \ - int i; \ - LOG_DEBUG_AUDIO_CONVERT("F32", #to); \ - for (i = 0; i < num_samples; i++) { \ - const float sample = src[i]; \ - if (sample >= 1.0f) { \ - dst[i] = (totype) (clampmax); \ - } else if (sample <= -1.0f) { \ - dst[i] = (totype) (clampmin); \ - } else { \ - dst[i] = (totype) (equation); \ - } \ - } \ + LOG_DEBUG_AUDIO_CONVERT("U8", "F32"); + + for (i = num_samples - 1; i >= 0; --i) { + /* 1) Construct a float in the range [65536.0, 65538.0) + * 2) Shift the float range to [-1.0, 1.0) */ + union float_bits x = { .u32 = (Uint8)src[i] ^ 0x47800000u }; + dst[i] = x.f32 - 65537.0f; } +} -AUDIOCVT_FROMFLOAT_SCALAR(S8, Sint8, -128, 127, sample * 127.0f); -AUDIOCVT_FROMFLOAT_SCALAR(U8, Uint8, 0, 255, (sample + 1.0f) * 127.0f); -AUDIOCVT_FROMFLOAT_SCALAR(S16, Sint16, -32768, 32767, sample * 32767.0f); -AUDIOCVT_FROMFLOAT_SCALAR(S32, Sint32, -2147483648LL, 2147483647, ((Sint32)(sample * 8388607.0f)) << 8); -#undef AUDIOCVT_FROMFLOAT_SCALAR +static void SDL_Convert_S16_to_F32_Scalar(float *dst, const Sint16 *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("S16", "F32"); + + for (i = num_samples - 1; i >= 0; --i) { + /* 1) Construct a float in the range [256.0, 258.0) + * 2) Shift the float range to [-1.0, 1.0) */ + union float_bits x = { .u32 = (Uint16)src[i] ^ 0x43808000u }; + dst[i] = x.f32 - 257.0f; + } +} + +static void SDL_Convert_S32_to_F32_Scalar(float *dst, const Sint32 *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("S32", "F32"); + + for (i = num_samples - 1; i >= 0; --i) { + dst[i] = (float)src[i] * 0x1p-31f; + } +} + +/* Create a bit-mask based on the sign-bit. Should optimize to a single arithmetic-shift-right */ +#define SIGNMASK(x) (Uint32)(0u - ((Uint32)(x) >> 31)) + +static void SDL_Convert_F32_to_S8_Scalar(Sint8 *dst, const float *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("F32", "S8"); + + for (i = 0; i < num_samples; ++i) { + /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0] + * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128] + * 3) Clamp the value to [-128, 127] */ + union float_bits x = { .f32 = src[i] + 98304.0f }; + + Uint32 y = x.u32 - 0x47C00000u; + Uint32 z = 0x7Fu - (y ^ SIGNMASK(y)); + y = y ^ (z & SIGNMASK(z)); + + dst[i] = (Sint8)(y & 0xFF); + } +} + +static void SDL_Convert_F32_to_U8_Scalar(Uint8 *dst, const float *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("F32", "U8"); + + for (i = 0; i < num_samples; ++i) { + union float_bits x = { .f32 = src[i] + 98304.0f }; + + /* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0] + * 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128] + * 3) Clamp the value to [-128, 127] + * 4) Shift the integer range from [-128, 127] to [0, 255] */ + Uint32 y = x.u32 - 0x47C00000u; + Uint32 z = 0x7Fu - (y ^ SIGNMASK(y)); + y = (y ^ 0x80u) ^ (z & SIGNMASK(z)); + + dst[i] = (Uint8)(y & 0xFF); + } +} + +static void SDL_Convert_F32_to_S16_Scalar(Sint16 *dst, const float *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("F32", "S16"); + + for (i = 0; i < num_samples; ++i) { + /* 1) Shift the float range from [-1.0, 1.0] to [383.0, 385.0] + * 2) Shift the integer range from [0x43BF8000, 0x43C08000] to [-32768, 32768] + * 3) Clamp values outside the [-32768, 32767] range */ + union float_bits x = { .f32 = src[i] + 384.0f }; + + Uint32 y = x.u32 - 0x43C00000u; + Uint32 z = 0x7FFFu - (y ^ SIGNMASK(y)); + y = y ^ (z & SIGNMASK(z)); + + dst[i] = (Sint16)(y & 0xFFFF); + } +} + +static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num_samples) +{ + int i; + + LOG_DEBUG_AUDIO_CONVERT("F32", "S32"); + + for (i = 0; i < num_samples; ++i) { + union float_bits x = { .f32 = src[i] }; + + /* 1) Shift the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0] + * 2) Set values outside the [-2147483648.0, 2147483647.0] range to -2147483648.0 + * 3) Convert the float to an integer, and fixup values outside the valid range */ + Uint32 y = x.u32 + 0x0F800000u; + Uint32 z = y - 0xCF000000u; + z &= SIGNMASK(y ^ z); + x.u32 = y - z; + + dst[i] = (Sint32)((Uint32)(Sint32)x.f32 ^ SIGNMASK(z)); + } +} + +#undef SIGNMASK #endif /* NEED_SCALAR_CONVERTER_FALLBACKS */ @@ -426,6 +531,10 @@ static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const #endif #ifdef SDL_NEON_INTRINSICS +#define DIVBY128 0x1p-7f +#define DIVBY32768 0x1p-15f +#define DIVBY8388607 0x1.000002p-23f + static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples) { int i;