From badc3b82c51ef55c06ef1e1fffe4cd14617d964c Mon Sep 17 00:00:00 2001 From: Michael Fitzmayer Date: Thu, 16 Apr 2026 21:27:14 +0200 Subject: [PATCH] [N-Gage] Micro-optimize rendering back-end - Skip SDL_GetRenderScale call in Copy() fast path - Cache last clear color to avoid redundant SetBrushColor calls - Add whole-image bounds pre-check to skip per-pixel checks in rotation - Simplify color packing in DrawPoints/FillRects to reduce overhead --- src/render/ngage/SDL_render_ngage.cpp | 52 +++---- src/render/ngage/SDL_render_ngage_c.hpp | 3 + src/render/ngage/SDL_render_ops.cpp | 174 ++++++++++++++++++------ 3 files changed, 161 insertions(+), 68 deletions(-) diff --git a/src/render/ngage/SDL_render_ngage.cpp b/src/render/ngage/SDL_render_ngage.cpp index e9dcd547ab..88fe359275 100644 --- a/src/render/ngage/SDL_render_ngage.cpp +++ b/src/render/ngage/SDL_render_ngage.cpp @@ -174,7 +174,7 @@ CRenderer *CRenderer::NewL() return self; } -CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1), iLinePointsBuffer(0), iLinePointsBufferCapacity(0), iLastDrawColor(0) +CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1), iLinePointsBuffer(0), iLinePointsBufferCapacity(0), iLastDrawColor(0), iLastClearColor(0xFFFFFFFF) { } @@ -313,7 +313,11 @@ void CRenderer::AbortNow(RDirectScreenAccess::TTerminationReasons aReason) void CRenderer::Clear(TUint32 iColor) { if (iRenderer && iRenderer->Gc()) { - iRenderer->Gc()->SetBrushColor(iColor); + // Skip redundant SetBrushColor if color hasn't changed. + if (iColor != iLastClearColor) { + iRenderer->Gc()->SetBrushColor(iColor); + iLastClearColor = iColor; + } iRenderer->Gc()->Clear(); } } @@ -534,20 +538,24 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec SDL_FColor *c = &texture->color; - // Get render scale once. - float sx; - float sy; - SDL_GetRenderScale(renderer, &sx, &sy); - // Fast path 1: No transformations needed; direct BitBlt. - if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f && sx == 1.f && sy == 1.f) { - TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); - TPoint aDest(dstrect->x, dstrect->y); - iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); - return true; + if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) { + // Only check render scale if color mod passes. + float sx; + float sy; + SDL_GetRenderScale(renderer, &sx, &sy); + if (sx == 1.f && sy == 1.f) { + TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); + TPoint aDest(dstrect->x, dstrect->y); + iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); + return true; + } } // Slow path: Transformations needed. + float sx; + float sy; + SDL_GetRenderScale(renderer, &sx, &sy); int w = phdata->cachedWidth; int h = phdata->cachedHeight; int pitch = phdata->cachedPitch; @@ -617,13 +625,13 @@ bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE SDL_FColor *c = &texture->color; // Pre-calculate common checks. - const bool isIdentityScale = (copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1)); - const bool isNoRotation = (copydata->angle == 0); const bool isNoFlip = (!copydata->flip); + const bool isNoRotation = (copydata->angle == 0); const bool isNoColorMod = (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f); + const bool isIdentityScale = (copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1)); // Fast path 1: No transformations needed; direct BitBlt. - if (isNoFlip && isIdentityScale && isNoRotation && isNoColorMod) { + if (isNoFlip && isNoRotation && isNoColorMod && isIdentityScale) { TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); @@ -799,11 +807,8 @@ void CRenderer::DrawPoints(NGAGE_Vertex *aVerts, const TInt aCount) bool colorSet = false; for (TInt i = 0; i < aCount; i++, aVerts++) { - Uint8 ca = aVerts->color.a; - Uint8 cr = aVerts->color.r; - Uint8 cg = aVerts->color.g; - Uint8 cb = aVerts->color.b; - TUint32 aColor = (ca << 24) | (cb << 16) | (cg << 8) | cr; + TUint32 aColor = (TUint32(aVerts->color.a) << 24) | (TUint32(aVerts->color.b) << 16) | + (TUint32(aVerts->color.g) << 8) | TUint32(aVerts->color.r); // Only set pen color when it changes. if (!colorSet || aColor != currentColor) { @@ -830,11 +835,8 @@ void CRenderer::FillRects(NGAGE_Vertex *aVerts, const TInt aCount) TSize size(aVerts[i + 1].x, aVerts[i + 1].y); TRect rect(pos, size); - Uint8 ca = aVerts[i].color.a; - Uint8 cr = aVerts[i].color.r; - Uint8 cg = aVerts[i].color.g; - Uint8 cb = aVerts[i].color.b; - TUint32 aColor = (ca << 24) | (cb << 16) | (cg << 8) | cr; + TUint32 aColor = (TUint32(aVerts[i].color.a) << 24) | (TUint32(aVerts[i].color.b) << 16) | + (TUint32(aVerts[i].color.g) << 8) | TUint32(aVerts[i].color.r); // Only set colors when they change. if (!colorSet || aColor != currentColor) { diff --git a/src/render/ngage/SDL_render_ngage_c.hpp b/src/render/ngage/SDL_render_ngage_c.hpp index 4b1d38ddcf..7fbfc88799 100644 --- a/src/render/ngage/SDL_render_ngage_c.hpp +++ b/src/render/ngage/SDL_render_ngage_c.hpp @@ -111,6 +111,9 @@ class CRenderer : public MDirectScreenAccess // Cached draw color to avoid redundant SetPenColor/SetBrushColor calls. TUint32 iLastDrawColor; + // Cached clear color to avoid redundant SetBrushColor calls. + TUint32 iLastClearColor; + // Helper methods. bool EnsureWorkBufferCapacity(TInt aRequiredSize); bool EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight); diff --git a/src/render/ngage/SDL_render_ops.cpp b/src/render/ngage/SDL_render_ops.cpp index 54fbddb900..6bb925749c 100644 --- a/src/render/ngage/SDL_render_ops.cpp +++ b/src/render/ngage/SDL_render_ops.cpp @@ -98,6 +98,22 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0; const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0; + // Fast path: No flip; just copy entire buffer. + if (!flipHorizontal && !flipVertical) { + Mem::Copy(dest, source, pitch * height); + return; + } + + // Fast path: Vertical-only flip; copy rows in reverse order. + if (flipVertical && !flipHorizontal) { + for (int y = 0; y < height; ++y) { + const int src_y = height - 1 - y; + Mem::Copy(&dst_pixels[y * pitchPixels], &src_pixels[src_y * pitchPixels], pitch); + } + return; + } + + // Slow path: Horizontal or both flips; need pixel-level operations. // Pre-calculate width/height bounds for horizontal/vertical flipping. const int width_m1 = width - 1; const int height_m1 = height - 1; @@ -150,6 +166,27 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T // Pre-calculate pitch in pixels to avoid repeated division. const TInt pitchPixels = pitch >> 1; + // Pre-check if rotation keeps all pixels within bounds to skip per-pixel checks. + // Calculate the four corners of the image after rotation around center. + bool allInBounds = true; + if (angle != 0) { + // Check corners: (0,0), (width-1,0), (0,height-1), (width-1,height-1) + TFixed corners_x[4] = { -center_x, Int2Fix(width - 1) - center_x, -center_x, Int2Fix(width - 1) - center_x }; + TFixed corners_y[4] = { -center_y, -center_y, Int2Fix(height - 1) - center_y, Int2Fix(height - 1) - center_y }; + + for (int i = 0; i < 4; ++i) { + TFixed rot_x = FixMul(corners_x[i], cos_angle) - FixMul(corners_y[i], sin_angle) + center_x; + TFixed rot_y = FixMul(corners_x[i], sin_angle) + FixMul(corners_y[i], cos_angle) + center_y; + int final_x = Fix2Int(rot_x); + int final_y = Fix2Int(rot_y); + + if (final_x < 0 || final_x >= width || final_y < 0 || final_y >= height) { + allInBounds = false; + break; + } + } + } + // Incremental DDA: Calculate per-pixel increments. // As we move right (x+1), the rotated position changes by (cos, -sin). const TFixed dx_cos = cos_angle; @@ -172,55 +209,99 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T int x = 0; - // Process 4 pixels at once. - for (; x < width - 3; x += 4) { - // Pixel 0 - int final_x0 = Fix2Int(src_x); - int final_y0 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; + if (allInBounds) { + // Fast path: No bounds checking needed. + for (; x < width - 3; x += 4) { + // Pixel 0 + int final_x0 = Fix2Int(src_x); + int final_y0 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; - // Pixel 1 - int final_x1 = Fix2Int(src_x); - int final_y1 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; + // Pixel 1 + int final_x1 = Fix2Int(src_x); + int final_y1 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; - // Pixel 2 - int final_x2 = Fix2Int(src_x); - int final_y2 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; + // Pixel 2 + int final_x2 = Fix2Int(src_x); + int final_y2 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; - // Pixel 3 - int final_x3 = Fix2Int(src_x); - int final_y3 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; + // Pixel 3 + int final_x3 = Fix2Int(src_x); + int final_y3 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; - // Write all 4 pixels with bounds checking. - dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0; - dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0; - dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0; - dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0; - } - - // Handle remaining pixels. - for (; x < width; ++x) { - // Convert to integer coordinates. - int final_x = Fix2Int(src_x); - int final_y = Fix2Int(src_y); - - // Check bounds. - if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { - dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x]; - } else { - dst_pixels[dstRowOffset + x] = 0; + // Write all 4 pixels without bounds checking. + dst_pixels[dstRowOffset + x] = src_pixels[final_y0 * pitchPixels + final_x0]; + dst_pixels[dstRowOffset + x + 1] = src_pixels[final_y1 * pitchPixels + final_x1]; + dst_pixels[dstRowOffset + x + 2] = src_pixels[final_y2 * pitchPixels + final_x2]; + dst_pixels[dstRowOffset + x + 3] = src_pixels[final_y3 * pitchPixels + final_x3]; } - // Incremental step: move to next pixel (just additions, no multiplications!). - src_x += dx_cos; - src_y += dx_sin; + // Handle remaining pixels. + for (; x < width; ++x) { + int final_x = Fix2Int(src_x); + int final_y = Fix2Int(src_y); + dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x]; + src_x += dx_cos; + src_y += dx_sin; + } + } else { + // Slow path: Bounds checking required. + for (; x < width - 3; x += 4) { + // Pixel 0 + int final_x0 = Fix2Int(src_x); + int final_y0 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; + + // Pixel 1 + int final_x1 = Fix2Int(src_x); + int final_y1 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; + + // Pixel 2 + int final_x2 = Fix2Int(src_x); + int final_y2 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; + + // Pixel 3 + int final_x3 = Fix2Int(src_x); + int final_y3 = Fix2Int(src_y); + src_x += dx_cos; + src_y += dx_sin; + + // Write all 4 pixels with bounds checking. + dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0; + dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0; + dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0; + dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0; + } + + // Handle remaining pixels. + for (; x < width; ++x) { + // Convert to integer coordinates. + int final_x = Fix2Int(src_x); + int final_y = Fix2Int(src_y); + + // Check bounds. + if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { + dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x]; + } else { + dst_pixels[dstRowOffset + x] = 0; + } + + // Incremental step: move to next pixel (just additions, no multiplications!). + src_x += dx_cos; + src_y += dx_sin; + } } } } @@ -230,6 +311,13 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); + // Fast path: Identity scale; just copy entire buffer. + const TFixed identity = Int2Fix(1); + if (scale_x == identity && scale_y == identity) { + Mem::Copy(dest, source, pitch * height); + return; + } + // Pre-calculate pitch in pixels to avoid repeated division. const TInt pitchPixels = pitch >> 1;