From 5cec91e27a970233446d3d122f52bf3661ac5281 Mon Sep 17 00:00:00 2001 From: Isaac Aronson Date: Tue, 10 Oct 2023 08:09:10 -0500 Subject: [PATCH] Implement accurate, performant 32-bit scalar blitter for ARGB dst case --- src/video/SDL_blit.h | 30 +++++++++++++++++++++++++++- src/video/SDL_blit_A.c | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h index 04525d7ee0..1d677cb7be 100644 --- a/src/video/SDL_blit.h +++ b/src/video/SDL_blit.h @@ -493,6 +493,13 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); } \ } +/* Convert any 32-bit 4-bpp pixel to ARGB format */ +#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst) \ + do { \ + Uint8 a, r, g, b; \ + RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \ + dst = a << 24 | r << 16 | g << 8 | b; \ + } while (0) /* Blend a single color channel or alpha value */ #define ALPHA_BLEND_CHANNEL(sC, dC, sA) \ do { \ @@ -509,7 +516,28 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); ALPHA_BLEND_CHANNEL(sG, dG, A); \ ALPHA_BLEND_CHANNEL(sB, dB, A); \ } while (0) - +/* Blend the ARGB values of two 32-bit pixels */ +#define ALPHA_BLEND_ARGB_PIXELS(src, dst) \ + do { \ + Uint32 srcA = src >> 24; \ + src |= 0xFF000000; \ + \ + Uint32 srcRB = src & 0x00FF00FF; \ + Uint32 dstRB = dst & 0x00FF00FF; \ + \ + Uint32 srcGA = (src >> 8) & 0x00FF00FF; \ + Uint32 dstGA = (dst >> 8) & 0x00FF00FF; \ + \ + Uint32 resRB = ((srcRB - dstRB) * srcA) + (dstRB << 8) - dstRB; \ + resRB += 0x00010001; \ + resRB += (resRB >> 8) & 0x00FF00FF; \ + resRB = (resRB >> 8) & 0x00FF00FF; \ + Uint32 resGA = ((srcGA - dstGA) * srcA) + (dstGA << 8) - dstGA; \ + resGA += 0x00010001; \ + resGA += (resGA >> 8) & 0x00FF00FF; \ + resGA &= 0xFF00FF00; \ + dst = resRB | resGA; \ + } while (0) /* Blend the RGBA values of two pixels */ #define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \ do { \ diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 3707ca891c..7268fa5ad9 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -1073,22 +1073,36 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) } while (height--) { - /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( - { - DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); - if (sA) { - DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); - ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); - ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); + if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 && + dstfmt->Bshift == 0) { + DUFFS_LOOP4( + { + PIXEL_TO_ARGB_PIXEL(*(Uint32 *) src, srcfmt, Pixel); + Uint32 blended = *(Uint32 *) dst; + ALPHA_BLEND_ARGB_PIXELS(Pixel, blended); + *(Uint32*)dst = blended; + src += srcbpp; + dst += dstbpp; + }, + width); + } else { + /* *INDENT-OFF* */ /* clang-format off */ + DUFFS_LOOP4( + { + DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); + if (sA) { + DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); + ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); + ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); + } + src += srcbpp; + dst += dstbpp; + }, + width); + /* *INDENT-ON* */ /* clang-format on */ + src += srcskip; + dst += dstskip; } - src += srcbpp; - dst += dstbpp; - }, - width); - /* *INDENT-ON* */ /* clang-format on */ - src += srcskip; - dst += dstskip; } if (freeFormat) { SDL_DestroyPixelFormat(dstfmt);