diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 5d6d92da8b..1cefd45603 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -22,6 +22,7 @@ #ifdef SDL_HAVE_BLIT_A +#include "SDL_pixels_c.h" #include "SDL_surface_c.h" // Functions to perform alpha blended blitting @@ -968,6 +969,10 @@ static void Blit8888to8888PixelAlphaSwizzle(SDL_BlitInfo *info) int dstskip = info->dst_skip; const SDL_PixelFormatDetails *srcfmt = info->src_fmt; const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; + bool fill_alpha = !dstfmt->Amask; + Uint32 dstAmask, dstAshift; + + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); while (height--) { int i = 0; @@ -976,6 +981,9 @@ static void Blit8888to8888PixelAlphaSwizzle(SDL_BlitInfo *info) Uint32 src32 = *(Uint32 *)src; Uint32 dst32 = *(Uint32 *)dst; ALPHA_BLEND_SWIZZLE_8888(src32, dst32, srcfmt, dstfmt); + if (fill_alpha) { + dst32 |= dstAmask; + } *(Uint32 *)dst = dst32; src += 4; dst += 4; @@ -998,6 +1006,10 @@ static void SDL_TARGETING("sse4.1") Blit8888to8888PixelAlphaSwizzleSSE41(SDL_Bli int dstskip = info->dst_skip; const SDL_PixelFormatDetails *srcfmt = info->src_fmt; const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; + bool fill_alpha = !dstfmt->Amask; + Uint32 dstAmask, dstAshift; + + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const __m128i mask_offsets = _mm_set_epi8( @@ -1011,7 +1023,7 @@ static void SDL_TARGETING("sse4.1") Blit8888to8888PixelAlphaSwizzleSSE41(SDL_Bli mask_offsets); const __m128i alpha_splat_mask = _mm_add_epi8(_mm_set1_epi8(srcfmt->Ashift >> 3), mask_offsets); - const __m128i alpha_fill_mask = _mm_set1_epi32((int)dstfmt->Amask); + const __m128i alpha_fill_mask = _mm_set1_epi32((int)dstAmask); while (height--) { int i = 0; @@ -1057,7 +1069,11 @@ static void SDL_TARGETING("sse4.1") Blit8888to8888PixelAlphaSwizzleSSE41(SDL_Bli dst_hi = _mm_mulhi_epu16(dst_hi, _mm_set1_epi16(257)); // Blend the pixels together and save the result - _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(dst_lo, dst_hi)); + dst128 = _mm_packus_epi16(dst_lo, dst_hi); + if (fill_alpha) { + dst128 = _mm_or_si128(dst128, alpha_fill_mask); + } + _mm_storeu_si128((__m128i *)dst, dst128); src += 16; dst += 16; @@ -1067,6 +1083,9 @@ static void SDL_TARGETING("sse4.1") Blit8888to8888PixelAlphaSwizzleSSE41(SDL_Bli Uint32 src32 = *(Uint32 *)src; Uint32 dst32 = *(Uint32 *)dst; ALPHA_BLEND_SWIZZLE_8888(src32, dst32, srcfmt, dstfmt); + if (fill_alpha) { + dst32 |= dstAmask; + } *(Uint32 *)dst = dst32; src += 4; dst += 4; @@ -1091,6 +1110,10 @@ static void SDL_TARGETING("avx2") Blit8888to8888PixelAlphaSwizzleAVX2(SDL_BlitIn int dstskip = info->dst_skip; const SDL_PixelFormatDetails *srcfmt = info->src_fmt; const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; + bool fill_alpha = !dstfmt->Amask; + Uint32 dstAmask, dstAshift; + + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const __m256i mask_offsets = _mm256_set_epi8( @@ -1104,7 +1127,7 @@ static void SDL_TARGETING("avx2") Blit8888to8888PixelAlphaSwizzleAVX2(SDL_BlitIn mask_offsets); const __m256i alpha_splat_mask = _mm256_add_epi8(_mm256_set1_epi8(srcfmt->Ashift >> 3), mask_offsets); - const __m256i alpha_fill_mask = _mm256_set1_epi32((int)dstfmt->Amask); + const __m256i alpha_fill_mask = _mm256_set1_epi32((int)dstAmask); while (height--) { int i = 0; @@ -1150,7 +1173,11 @@ static void SDL_TARGETING("avx2") Blit8888to8888PixelAlphaSwizzleAVX2(SDL_BlitIn dst_hi = _mm256_mulhi_epu16(dst_hi, _mm256_set1_epi16(257)); // Blend the pixels together and save the result - _mm256_storeu_si256((__m256i *)dst, _mm256_packus_epi16(dst_lo, dst_hi)); + dst256 = _mm256_packus_epi16(dst_lo, dst_hi); + if (fill_alpha) { + dst256 = _mm256_or_si256(dst256, alpha_fill_mask); + } + _mm256_storeu_si256((__m256i *)dst, dst256); src += 32; dst += 32; @@ -1160,6 +1187,9 @@ static void SDL_TARGETING("avx2") Blit8888to8888PixelAlphaSwizzleAVX2(SDL_BlitIn Uint32 src32 = *(Uint32 *)src; Uint32 dst32 = *(Uint32 *)dst; ALPHA_BLEND_SWIZZLE_8888(src32, dst32, srcfmt, dstfmt); + if (fill_alpha) { + dst32 |= dstAmask; + } *(Uint32 *)dst = dst32; src += 4; dst += 4; @@ -1184,6 +1214,10 @@ static void Blit8888to8888PixelAlphaSwizzleNEON(SDL_BlitInfo *info) int dstskip = info->dst_skip; const SDL_PixelFormatDetails *srcfmt = info->src_fmt; const SDL_PixelFormatDetails *dstfmt = info->dst_fmt; + bool fill_alpha = !dstfmt->Amask; + Uint32 dstAmask, dstAshift; + + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const uint8x16_t mask_offsets = vreinterpretq_u8_u64(vcombine_u64( @@ -1197,7 +1231,7 @@ static void Blit8888to8888PixelAlphaSwizzleNEON(SDL_BlitInfo *info) ((srcfmt->Bshift >> 3) << dstfmt->Bshift)))); const uint8x16_t alpha_splat_mask = vaddq_u8(vdupq_n_u8(srcfmt->Ashift >> 3), mask_offsets); - const uint8x16_t alpha_fill_mask = vreinterpretq_u8_u32(vdupq_n_u32(dstfmt->Amask)); + const uint8x16_t alpha_fill_mask = vreinterpretq_u8_u32(vdupq_n_u32(dstAmask)); while (height--) { int i = 0; @@ -1242,6 +1276,10 @@ static void Blit8888to8888PixelAlphaSwizzleNEON(SDL_BlitInfo *info) // temp = vraddhn_u16(res_lo, vrshrq_n_u16(res_lo, 8)); // dst128 = vraddhn_high_u16(temp, res_hi, vrshrq_n_u16(res_hi, 8)); + if (fill_alpha) { + dst128 = vorrq_u8(dst128, alpha_fill_mask); + } + // Save the result vst1q_u8(dst, dst128); @@ -1266,6 +1304,10 @@ static void Blit8888to8888PixelAlphaSwizzleNEON(SDL_BlitInfo *info) dst32 = vaddhn_u16(res, vshrq_n_u16(res, 8)); + if (fill_alpha) { + dst32 = vorr_u8(dst32, vget_low_u8(alpha_fill_mask)); + } + // Save the result, only low 32-bits vst1_lane_u32((Uint32*)dst, vreinterpret_u32_u8(dst32), 0); diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index b1cdc1d222..9b7104f109 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2568,36 +2568,6 @@ static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info) dstAmask; \ } while (0) -#if defined(SDL_SSE4_1_INTRINSICS) || defined(SDL_AVX2_INTRINSICS) || (defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8)) -static void Get8888AlphaMaskAndShift(const SDL_PixelFormatDetails *fmt, Uint32 *mask, Uint32 *shift) -{ - if (fmt->Amask) { - *mask = fmt->Amask; - *shift = fmt->Ashift; - } else { - *mask = ~(fmt->Rmask | fmt->Gmask | fmt->Bmask); - switch (*mask) { - case 0x000000FF: - *shift = 0; - break; - case 0x0000FF00: - *shift = 8; - break; - case 0x00FF0000: - *shift = 16; - break; - case 0xFF000000: - *shift = 24; - break; - default: - // Should never happen - *shift = 0; - break; - } - } -} -#endif // SSE4.1, AVX2, and NEON implementations of Blit8888to8888PixelSwizzle - #ifdef SDL_SSE4_1_INTRINSICS static void SDL_TARGETING("sse4.1") Blit8888to8888PixelSwizzleSSE41(SDL_BlitInfo *info) @@ -2614,8 +2584,8 @@ static void SDL_TARGETING("sse4.1") Blit8888to8888PixelSwizzleSSE41(SDL_BlitInfo Uint32 srcAmask, srcAshift; Uint32 dstAmask, dstAshift; - Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); - Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); + SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const __m128i mask_offsets = _mm_set_epi8( @@ -2689,8 +2659,8 @@ static void SDL_TARGETING("avx2") Blit8888to8888PixelSwizzleAVX2(SDL_BlitInfo *i Uint32 srcAmask, srcAshift; Uint32 dstAmask, dstAshift; - Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); - Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); + SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const __m256i mask_offsets = _mm256_set_epi8( @@ -2764,8 +2734,8 @@ static void Blit8888to8888PixelSwizzleNEON(SDL_BlitInfo *info) Uint32 srcAmask, srcAshift; Uint32 dstAmask, dstAshift; - Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); - Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); + SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift); + SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift); // The byte offsets for the start of each pixel const uint8x16_t mask_offsets = vreinterpretq_u8_u64(vcombine_u64( diff --git a/src/video/SDL_pixels.c b/src/video/SDL_pixels.c index 5fede2255a..5e341f4363 100644 --- a/src/video/SDL_pixels.c +++ b/src/video/SDL_pixels.c @@ -692,6 +692,34 @@ void SDL_QuitPixelFormatDetails(void) } } +void SDL_Get8888AlphaMaskAndShift(const SDL_PixelFormatDetails *fmt, Uint32 *mask, Uint32 *shift) +{ + if (fmt->Amask) { + *mask = fmt->Amask; + *shift = fmt->Ashift; + } else { + *mask = ~(fmt->Rmask | fmt->Gmask | fmt->Bmask); + switch (*mask) { + case 0x000000FF: + *shift = 0; + break; + case 0x0000FF00: + *shift = 8; + break; + case 0x00FF0000: + *shift = 16; + break; + case 0xFF000000: + *shift = 24; + break; + default: + // Should never happen + *shift = 0; + break; + } + } +} + SDL_Colorspace SDL_GetDefaultColorspaceForFormat(SDL_PixelFormat format) { if (SDL_ISPIXELFORMAT_FOURCC(format)) { diff --git a/src/video/SDL_pixels_c.h b/src/video/SDL_pixels_c.h index 92c3401691..11be7f3f2b 100644 --- a/src/video/SDL_pixels_c.h +++ b/src/video/SDL_pixels_c.h @@ -29,7 +29,7 @@ // Pixel format functions -extern bool SDL_CalculateSurfaceSize(SDL_PixelFormat format, int width, int height, size_t *size, size_t *pitch, bool minimalPitch); +extern void SDL_Get8888AlphaMaskAndShift(const SDL_PixelFormatDetails *fmt, Uint32 *mask, Uint32 *shift); extern SDL_Colorspace SDL_GetDefaultColorspaceForFormat(SDL_PixelFormat pixel_format); extern void SDL_QuitPixelFormatDetails(void); diff --git a/src/video/SDL_surface_c.h b/src/video/SDL_surface_c.h index 27dc88a625..9e73a4bca7 100644 --- a/src/video/SDL_surface_c.h +++ b/src/video/SDL_surface_c.h @@ -83,6 +83,7 @@ struct SDL_Surface // Surface functions extern bool SDL_SurfaceValid(SDL_Surface *surface); extern void SDL_UpdateSurfaceLockFlag(SDL_Surface *surface); +extern bool SDL_CalculateSurfaceSize(SDL_PixelFormat format, int width, int height, size_t *size, size_t *pitch, bool minimalPitch); extern float SDL_GetDefaultSDRWhitePoint(SDL_Colorspace colorspace); extern float SDL_GetSurfaceSDRWhitePoint(SDL_Surface *surface, SDL_Colorspace colorspace); extern float SDL_GetDefaultHDRHeadroom(SDL_Colorspace colorspace);