From 4870f81d9caf5838e174a88fd4b780b9cc3869f6 Mon Sep 17 00:00:00 2001 From: Michael Fitzmayer Date: Wed, 15 Apr 2026 19:44:24 +0200 Subject: [PATCH] [N-Gage] Optimize rendering back-end - Remove SDL_Surface member from NGAGE_TextureData structure and update all functions that currently use surface->pixels to instead access bitmap->DataAddress() directly. This eliminates the intermediate copy step (Mem::Copy from surface to bitmap) in rendering operations. - Eliminate per-frame allocations in Copy/CopyEx methods. These buffers are now allocated once and resized only when needed. --- src/render/ngage/SDL_render_ngage.c | 60 ++++--- src/render/ngage/SDL_render_ngage.cpp | 207 +++++++++++++++++------- src/render/ngage/SDL_render_ngage_c.h | 11 +- src/render/ngage/SDL_render_ngage_c.hpp | 8 + src/render/ngage/SDL_render_ops.cpp | 59 +++---- 5 files changed, 222 insertions(+), 123 deletions(-) diff --git a/src/render/ngage/SDL_render_ngage.c b/src/render/ngage/SDL_render_ngage.c index 3d8d59c03f..74a4ce5771 100644 --- a/src/render/ngage/SDL_render_ngage.c +++ b/src/render/ngage/SDL_render_ngage.c @@ -160,13 +160,6 @@ static bool NGAGE_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SD return false; } - SDL_Surface *surface = SDL_CreateSurface(texture->w, texture->h, texture->format); - if (!surface) { - SDL_free(data); - return false; - } - - data->surface = surface; texture->internal = data; return true; @@ -447,29 +440,25 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co { NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal; - SDL_Surface *surface = phdata->surface; - Uint8 *src, *dst; - int row; - size_t length; - - if (SDL_MUSTLOCK(surface)) { - if (!SDL_LockSurface(surface)) { - return false; - } + if (!phdata) { + return false; } - src = (Uint8 *)pixels; - dst = (Uint8 *)surface->pixels + - rect->y * surface->pitch + - rect->x * surface->fmt->bytes_per_pixel; - length = (size_t)rect->w * surface->fmt->bytes_per_pixel; - for (row = 0; row < rect->h; ++row) { + void *bitmapData = NGAGE_GetBitmapDataAddress(phdata); + int bitmapPitch = NGAGE_GetBitmapPitch(phdata); + + if (!bitmapData || bitmapPitch == 0) { + return false; + } + + Uint8 *src = (Uint8 *)pixels; + Uint8 *dst = (Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2; // 2 bytes per pixel for EColor4K + + size_t length = (size_t)rect->w * 2; // 2 bytes per pixel for EColor4K + for (int row = 0; row < rect->h; ++row) { SDL_memcpy(dst, src, length); src += pitch; - dst += surface->pitch; - } - if (SDL_MUSTLOCK(surface)) { - SDL_UnlockSurface(surface); + dst += bitmapPitch; } return true; @@ -478,12 +467,20 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co static bool NGAGE_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rect *rect, void **pixels, int *pitch) { NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal; - SDL_Surface *surface = phdata->surface; - *pixels = - (void *)((Uint8 *)surface->pixels + rect->y * surface->pitch + - rect->x * surface->fmt->bytes_per_pixel); - *pitch = surface->pitch; + if (!phdata) { + return false; + } + + void *bitmapData = NGAGE_GetBitmapDataAddress(phdata); + int bitmapPitch = NGAGE_GetBitmapPitch(phdata); + + if (!bitmapData || bitmapPitch == 0) { + return false; + } + + *pixels = (void *)((Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2); // 2 bytes per pixel for EColor4K + *pitch = bitmapPitch; return true; } @@ -512,7 +509,6 @@ static void NGAGE_DestroyTexture(SDL_Renderer *renderer, SDL_Texture *texture) { NGAGE_TextureData *data = (NGAGE_TextureData *)texture->internal; if (data) { - SDL_DestroySurface(data->surface); NGAGE_DestroyTextureData(data); SDL_free(data); texture->internal = 0; diff --git a/src/render/ngage/SDL_render_ngage.cpp b/src/render/ngage/SDL_render_ngage.cpp index 2198c02b3b..18f5084139 100644 --- a/src/render/ngage/SDL_render_ngage.cpp +++ b/src/render/ngage/SDL_render_ngage.cpp @@ -72,6 +72,39 @@ void NGAGE_DestroyTextureData(NGAGE_TextureData *data) } } +void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data) +{ + if (data && data->bitmap) { + return data->bitmap->DataAddress(); + } + return NULL; +} + +int NGAGE_GetBitmapPitch(NGAGE_TextureData *data) +{ + if (data && data->bitmap) { + TSize size = data->bitmap->SizeInPixels(); + return data->bitmap->ScanLineLength(size.iWidth, data->bitmap->DisplayMode()); + } + return 0; +} + +int NGAGE_GetBitmapWidth(NGAGE_TextureData *data) +{ + if (data && data->bitmap) { + return data->bitmap->SizeInPixels().iWidth; + } + return 0; +} + +int NGAGE_GetBitmapHeight(NGAGE_TextureData *data) +{ + if (data && data->bitmap) { + return data->bitmap->SizeInPixels().iHeight; + } + return 0; +} + void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count) { gRenderer->DrawLines(verts, count); @@ -127,12 +160,19 @@ CRenderer *CRenderer::NewL() return self; } -CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0) {} +CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0) {} CRenderer::~CRenderer() { delete iRenderer; iRenderer = 0; + + // Free work buffers. + SDL_free(iWorkBuffer1); + SDL_free(iWorkBuffer2); + iWorkBuffer1 = 0; + iWorkBuffer2 = 0; + iWorkBufferSize = 0; } void CRenderer::ConstructL() @@ -251,6 +291,36 @@ void CRenderer::Clear(TUint32 iColor) } } +bool CRenderer::EnsureWorkBufferCapacity(TInt aRequiredSize) +{ + if (aRequiredSize <= iWorkBufferSize) { + return true; + } + + // Free old buffers. + SDL_free(iWorkBuffer1); + SDL_free(iWorkBuffer2); + + // Allocate new buffers. + iWorkBuffer1 = SDL_calloc(1, aRequiredSize); + if (!iWorkBuffer1) { + iWorkBuffer2 = 0; + iWorkBufferSize = 0; + return false; + } + + iWorkBuffer2 = SDL_calloc(1, aRequiredSize); + if (!iWorkBuffer2) { + SDL_free(iWorkBuffer1); + iWorkBuffer1 = 0; + iWorkBufferSize = 0; + return false; + } + + iWorkBufferSize = aRequiredSize; + return true; +} + #ifdef __cplusplus extern "C" { #endif @@ -293,65 +363,72 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec } NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal; - if (!phdata) { + if (!phdata || !phdata->bitmap) { return false; } SDL_FColor *c = &texture->color; - int w = phdata->surface->w; - int h = phdata->surface->h; - int pitch = phdata->surface->pitch; - void *source = phdata->surface->pixels; + + // Get render scale. + float sx; + float sy; + SDL_GetRenderScale(renderer, &sx, &sy); + + // Fast path: No transformations needed; direct BitBlt. + if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f && + sx == 1.f && sy == 1.f) { + TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); + TPoint aDest(dstrect->x, dstrect->y); + iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); + return true; + } + + // Slow path: Transformations needed. + int w = phdata->cachedWidth; + int h = phdata->cachedHeight; + int pitch = phdata->cachedPitch; + void *source = phdata->cachedDataAddress; void *dest; if (!source) { return false; } - void *pixel_buffer_a = SDL_calloc(1, pitch * h); - if (!pixel_buffer_a) { + // Ensure work buffers have sufficient capacity. + TInt bufferSize = pitch * h; + if (!EnsureWorkBufferCapacity(bufferSize)) { return false; } - dest = pixel_buffer_a; - void *pixel_buffer_b = SDL_calloc(1, pitch * h); - if (!pixel_buffer_b) { - SDL_free(pixel_buffer_a); - return false; - } + dest = iWorkBuffer1; + bool useBuffer1 = true; if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) { ApplyColorMod(dest, source, pitch, w, h, texture->color); - source = dest; + useBuffer1 = !useBuffer1; } - float sx; - float sy; - SDL_GetRenderScale(renderer, &sx, &sy); - if (sx != 1.f || sy != 1.f) { TFixed scale_x = Real2Fix(sx); TFixed scale_y = Real2Fix(sy); TFixed center_x = Int2Fix(w / 2); TFixed center_y = Int2Fix(h / 2); - dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a; - + dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2; ApplyScale(dest, source, pitch, w, h, center_x, center_y, scale_x, scale_y); - source = dest; + useBuffer1 = !useBuffer1; } - Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h); - SDL_free(pixel_buffer_a); - SDL_free(pixel_buffer_b); + // Render directly from work buffer without copying back to bitmap. + // Note: We need a temporary bitmap for rendering the transformed data. + // For now, copy to original bitmap (this could be further optimized with a render target). + Mem::Copy(phdata->cachedDataAddress, source, pitch * h); - if (phdata->bitmap) { - TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); - TPoint aDest(dstrect->x, dstrect->y); - iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); - } + TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); + TPoint aDest(dstrect->x, dstrect->y); + iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); return true; } @@ -359,65 +436,78 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE_CopyExData *copydata) { NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal; - if (!phdata) { + if (!phdata || !phdata->bitmap) { return false; } SDL_FColor *c = &texture->color; - int w = phdata->surface->w; - int h = phdata->surface->h; - int pitch = phdata->surface->pitch; - void *source = phdata->surface->pixels; + + // Fast path: No transformations needed; direct BitBlt. + if (!copydata->flip && + copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1) && + copydata->angle == 0 && + c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) { + TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); + TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); + iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); + return true; + } + + // Slow path: Transformations needed. + int w = phdata->cachedWidth; + int h = phdata->cachedHeight; + int pitch = phdata->cachedPitch; + void *source = phdata->cachedDataAddress; void *dest; if (!source) { return false; } - void *pixel_buffer_a = SDL_calloc(1, pitch * h); - if (!pixel_buffer_a) { + // Ensure work buffers have sufficient capacity. + TInt bufferSize = pitch * h; + if (!EnsureWorkBufferCapacity(bufferSize)) { return false; } - dest = pixel_buffer_a; - void *pixel_buffer_b = SDL_calloc(1, pitch * h); - if (!pixel_buffer_a) { - SDL_free(pixel_buffer_a); - return false; - } + dest = iWorkBuffer1; + bool useBuffer1 = true; if (copydata->flip) { ApplyFlip(dest, source, pitch, w, h, copydata->flip); source = dest; + useBuffer1 = !useBuffer1; } - if (copydata->scale_x != 1.f || copydata->scale_y != 1.f) { - dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a; + if (copydata->scale_x != Int2Fix(1) || copydata->scale_y != Int2Fix(1)) { + dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2; ApplyScale(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->scale_x, copydata->scale_y); source = dest; + useBuffer1 = !useBuffer1; } if (copydata->angle) { - dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a; + dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2; ApplyRotation(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->angle); source = dest; + useBuffer1 = !useBuffer1; } if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) { - dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a; + dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2; ApplyColorMod(dest, source, pitch, w, h, texture->color); source = dest; + useBuffer1 = !useBuffer1; } - Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h); - SDL_free(pixel_buffer_a); - SDL_free(pixel_buffer_b); + // Render directly from work buffer without copying back to bitmap. + // Note: We need a temporary bitmap for rendering the transformed data. + // For now, copy to original bitmap (this could be further optimized with a render target). + Mem::Copy(phdata->cachedDataAddress, source, pitch * h); - if (phdata->bitmap) { - TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); - TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); - iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); - } + TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); + TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); + iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); return true; } @@ -440,6 +530,13 @@ bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aW return false; } + // Cache texture properties to avoid repeated API calls. + TSize bitmapSize = aTextureData->bitmap->SizeInPixels(); + aTextureData->cachedWidth = bitmapSize.iWidth; + aTextureData->cachedHeight = bitmapSize.iHeight; + aTextureData->cachedPitch = aTextureData->bitmap->ScanLineLength(aWidth, aTextureData->bitmap->DisplayMode()); + aTextureData->cachedDataAddress = aTextureData->bitmap->DataAddress(); + return true; } diff --git a/src/render/ngage/SDL_render_ngage_c.h b/src/render/ngage/SDL_render_ngage_c.h index 32ce861525..4d56e472ca 100644 --- a/src/render/ngage/SDL_render_ngage_c.h +++ b/src/render/ngage/SDL_render_ngage_c.h @@ -58,7 +58,12 @@ typedef struct CFbsBitmap CFbsBitmap; typedef struct NGAGE_TextureData { CFbsBitmap *bitmap; - SDL_Surface *surface; + + // Cached properties to avoid repeated API calls. + int cachedWidth; + int cachedHeight; + int cachedPitch; + void *cachedDataAddress; } NGAGE_TextureData; @@ -89,6 +94,10 @@ bool NGAGE_Copy(SDL_Renderer *renderer, SDL_Texture *texture, SDL_Rect *srcrect, bool NGAGE_CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, NGAGE_CopyExData *copydata); bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height); void NGAGE_DestroyTextureData(NGAGE_TextureData *data); +void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data); +int NGAGE_GetBitmapPitch(NGAGE_TextureData *data); +int NGAGE_GetBitmapWidth(NGAGE_TextureData *data); +int NGAGE_GetBitmapHeight(NGAGE_TextureData *data); void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count); void NGAGE_DrawPoints(NGAGE_Vertex *verts, const int count); void NGAGE_FillRects(NGAGE_Vertex *verts, const int count); diff --git a/src/render/ngage/SDL_render_ngage_c.hpp b/src/render/ngage/SDL_render_ngage_c.hpp index 63aef670d3..007b7145a7 100644 --- a/src/render/ngage/SDL_render_ngage_c.hpp +++ b/src/render/ngage/SDL_render_ngage_c.hpp @@ -86,6 +86,14 @@ class CRenderer : public MDirectScreenAccess // Screen saver. TBool iSuspendScreenSaver; + + // Work buffers for texture transformations (reusable to avoid per-frame allocations). + void *iWorkBuffer1; + void *iWorkBuffer2; + TInt iWorkBufferSize; + + // Helper method to ensure work buffers have sufficient capacity. + bool EnsureWorkBufferCapacity(TInt aRequiredSize); }; #endif // ngage_video_render_ngage_c_hpp diff --git a/src/render/ngage/SDL_render_ops.cpp b/src/render/ngage/SDL_render_ops.cpp index 3874700bd2..8a2244436b 100644 --- a/src/render/ngage/SDL_render_ops.cpp +++ b/src/render/ngage/SDL_render_ops.cpp @@ -20,8 +20,8 @@ */ #include "SDL_internal.h" -#include <3dtypes.h> #include "SDL_render_ops.hpp" +#include <3dtypes.h> void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color) { @@ -32,18 +32,22 @@ void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, S TFixed gf = Real2Fix(color.g); TFixed bf = Real2Fix(color.b); - for (int y = 0; y < height; ++y) - { - for (int x = 0; x < width; ++x) - { - TUint16 pixel = src_pixels[y * pitch / 2 + x]; + // Pre-calculate pitch in pixels to avoid repeated division. + const TInt pitchPixels = pitch >> 1; + + for (int y = 0; y < height; ++y) { + // Calculate row offset once per row. + TInt rowOffset = y * pitchPixels; + + for (int x = 0; x < width; ++x) { + TUint16 pixel = src_pixels[rowOffset + x]; TUint8 r = (pixel & 0xF800) >> 8; TUint8 g = (pixel & 0x07E0) >> 3; TUint8 b = (pixel & 0x001F) << 3; r = FixMul(r, rf); g = FixMul(g, gf); b = FixMul(b, bf); - dst_pixels[y * pitch / 2 + x] = (r << 8) | (g << 3) | (b >> 3); + dst_pixels[rowOffset + x] = (r << 8) | (g << 3) | (b >> 3); } } } @@ -53,20 +57,16 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - for (int y = 0; y < height; ++y) - { - for (int x = 0; x < width; ++x) - { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { int src_x = x; int src_y = y; - if (flip & SDL_FLIP_HORIZONTAL) - { + if (flip & SDL_FLIP_HORIZONTAL) { src_x = width - 1 - x; } - if (flip & SDL_FLIP_VERTICAL) - { + if (flip & SDL_FLIP_VERTICAL) { src_y = height - 1 - y; } @@ -83,15 +83,12 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T TFixed cos_angle = 0; TFixed sin_angle = 0; - if (angle != 0) - { + if (angle != 0) { FixSinCos(angle, sin_angle, cos_angle); } - for (int y = 0; y < height; ++y) - { - for (int x = 0; x < width; ++x) - { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { // Translate point to origin. TFixed translated_x = Int2Fix(x) - center_x; TFixed translated_y = Int2Fix(y) - center_y; @@ -105,12 +102,9 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T int final_y = Fix2Int(rotated_y + center_y); // Check bounds. - if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) - { + if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { dst_pixels[y * pitch / 2 + x] = src_pixels[final_y * pitch / 2 + final_x]; - } - else - { + } else { dst_pixels[y * pitch / 2 + x] = 0; } } @@ -122,10 +116,8 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - for (int y = 0; y < height; ++y) - { - for (int x = 0; x < width; ++x) - { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { // Translate point to origin. TFixed translated_x = Int2Fix(x) - center_x; TFixed translated_y = Int2Fix(y) - center_y; @@ -139,12 +131,9 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix int final_y = Fix2Int(scaled_y + center_y); // Check bounds. - if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) - { + if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { dst_pixels[y * pitch / 2 + x] = src_pixels[final_y * pitch / 2 + final_x]; - } - else - { + } else { dst_pixels[y * pitch / 2 + x] = 0; } }