[N-Gage] Optimize rendering back-end

- Remove SDL_Surface member from NGAGE_TextureData structure and update all functions that currently use
  surface->pixels to instead access bitmap->DataAddress() directly. This eliminates the intermediate copy
  step (Mem::Copy from surface to bitmap) in rendering operations.

- Eliminate per-frame allocations in Copy/CopyEx methods. These buffers are now allocated once and resized
  only when needed.
This commit is contained in:
Michael Fitzmayer
2026-04-15 19:44:24 +02:00
parent a49a5e87a9
commit 4870f81d9c
5 changed files with 222 additions and 123 deletions

View File

@@ -160,13 +160,6 @@ static bool NGAGE_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SD
return false;
}
SDL_Surface *surface = SDL_CreateSurface(texture->w, texture->h, texture->format);
if (!surface) {
SDL_free(data);
return false;
}
data->surface = surface;
texture->internal = data;
return true;
@@ -447,29 +440,25 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co
{
NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal;
SDL_Surface *surface = phdata->surface;
Uint8 *src, *dst;
int row;
size_t length;
if (SDL_MUSTLOCK(surface)) {
if (!SDL_LockSurface(surface)) {
return false;
}
if (!phdata) {
return false;
}
src = (Uint8 *)pixels;
dst = (Uint8 *)surface->pixels +
rect->y * surface->pitch +
rect->x * surface->fmt->bytes_per_pixel;
length = (size_t)rect->w * surface->fmt->bytes_per_pixel;
for (row = 0; row < rect->h; ++row) {
void *bitmapData = NGAGE_GetBitmapDataAddress(phdata);
int bitmapPitch = NGAGE_GetBitmapPitch(phdata);
if (!bitmapData || bitmapPitch == 0) {
return false;
}
Uint8 *src = (Uint8 *)pixels;
Uint8 *dst = (Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2; // 2 bytes per pixel for EColor4K
size_t length = (size_t)rect->w * 2; // 2 bytes per pixel for EColor4K
for (int row = 0; row < rect->h; ++row) {
SDL_memcpy(dst, src, length);
src += pitch;
dst += surface->pitch;
}
if (SDL_MUSTLOCK(surface)) {
SDL_UnlockSurface(surface);
dst += bitmapPitch;
}
return true;
@@ -478,12 +467,20 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co
static bool NGAGE_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rect *rect, void **pixels, int *pitch)
{
NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal;
SDL_Surface *surface = phdata->surface;
*pixels =
(void *)((Uint8 *)surface->pixels + rect->y * surface->pitch +
rect->x * surface->fmt->bytes_per_pixel);
*pitch = surface->pitch;
if (!phdata) {
return false;
}
void *bitmapData = NGAGE_GetBitmapDataAddress(phdata);
int bitmapPitch = NGAGE_GetBitmapPitch(phdata);
if (!bitmapData || bitmapPitch == 0) {
return false;
}
*pixels = (void *)((Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2); // 2 bytes per pixel for EColor4K
*pitch = bitmapPitch;
return true;
}
@@ -512,7 +509,6 @@ static void NGAGE_DestroyTexture(SDL_Renderer *renderer, SDL_Texture *texture)
{
NGAGE_TextureData *data = (NGAGE_TextureData *)texture->internal;
if (data) {
SDL_DestroySurface(data->surface);
NGAGE_DestroyTextureData(data);
SDL_free(data);
texture->internal = 0;

View File

@@ -72,6 +72,39 @@ void NGAGE_DestroyTextureData(NGAGE_TextureData *data)
}
}
void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data)
{
if (data && data->bitmap) {
return data->bitmap->DataAddress();
}
return NULL;
}
int NGAGE_GetBitmapPitch(NGAGE_TextureData *data)
{
if (data && data->bitmap) {
TSize size = data->bitmap->SizeInPixels();
return data->bitmap->ScanLineLength(size.iWidth, data->bitmap->DisplayMode());
}
return 0;
}
int NGAGE_GetBitmapWidth(NGAGE_TextureData *data)
{
if (data && data->bitmap) {
return data->bitmap->SizeInPixels().iWidth;
}
return 0;
}
int NGAGE_GetBitmapHeight(NGAGE_TextureData *data)
{
if (data && data->bitmap) {
return data->bitmap->SizeInPixels().iHeight;
}
return 0;
}
void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count)
{
gRenderer->DrawLines(verts, count);
@@ -127,12 +160,19 @@ CRenderer *CRenderer::NewL()
return self;
}
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0) {}
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0) {}
CRenderer::~CRenderer()
{
delete iRenderer;
iRenderer = 0;
// Free work buffers.
SDL_free(iWorkBuffer1);
SDL_free(iWorkBuffer2);
iWorkBuffer1 = 0;
iWorkBuffer2 = 0;
iWorkBufferSize = 0;
}
void CRenderer::ConstructL()
@@ -251,6 +291,36 @@ void CRenderer::Clear(TUint32 iColor)
}
}
bool CRenderer::EnsureWorkBufferCapacity(TInt aRequiredSize)
{
if (aRequiredSize <= iWorkBufferSize) {
return true;
}
// Free old buffers.
SDL_free(iWorkBuffer1);
SDL_free(iWorkBuffer2);
// Allocate new buffers.
iWorkBuffer1 = SDL_calloc(1, aRequiredSize);
if (!iWorkBuffer1) {
iWorkBuffer2 = 0;
iWorkBufferSize = 0;
return false;
}
iWorkBuffer2 = SDL_calloc(1, aRequiredSize);
if (!iWorkBuffer2) {
SDL_free(iWorkBuffer1);
iWorkBuffer1 = 0;
iWorkBufferSize = 0;
return false;
}
iWorkBufferSize = aRequiredSize;
return true;
}
#ifdef __cplusplus
extern "C" {
#endif
@@ -293,65 +363,72 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec
}
NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal;
if (!phdata) {
if (!phdata || !phdata->bitmap) {
return false;
}
SDL_FColor *c = &texture->color;
int w = phdata->surface->w;
int h = phdata->surface->h;
int pitch = phdata->surface->pitch;
void *source = phdata->surface->pixels;
// Get render scale.
float sx;
float sy;
SDL_GetRenderScale(renderer, &sx, &sy);
// Fast path: No transformations needed; direct BitBlt.
if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f &&
sx == 1.f && sy == 1.f) {
TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h));
TPoint aDest(dstrect->x, dstrect->y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
return true;
}
// Slow path: Transformations needed.
int w = phdata->cachedWidth;
int h = phdata->cachedHeight;
int pitch = phdata->cachedPitch;
void *source = phdata->cachedDataAddress;
void *dest;
if (!source) {
return false;
}
void *pixel_buffer_a = SDL_calloc(1, pitch * h);
if (!pixel_buffer_a) {
// Ensure work buffers have sufficient capacity.
TInt bufferSize = pitch * h;
if (!EnsureWorkBufferCapacity(bufferSize)) {
return false;
}
dest = pixel_buffer_a;
void *pixel_buffer_b = SDL_calloc(1, pitch * h);
if (!pixel_buffer_b) {
SDL_free(pixel_buffer_a);
return false;
}
dest = iWorkBuffer1;
bool useBuffer1 = true;
if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) {
ApplyColorMod(dest, source, pitch, w, h, texture->color);
source = dest;
useBuffer1 = !useBuffer1;
}
float sx;
float sy;
SDL_GetRenderScale(renderer, &sx, &sy);
if (sx != 1.f || sy != 1.f) {
TFixed scale_x = Real2Fix(sx);
TFixed scale_y = Real2Fix(sy);
TFixed center_x = Int2Fix(w / 2);
TFixed center_y = Int2Fix(h / 2);
dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a;
dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2;
ApplyScale(dest, source, pitch, w, h, center_x, center_y, scale_x, scale_y);
source = dest;
useBuffer1 = !useBuffer1;
}
Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h);
SDL_free(pixel_buffer_a);
SDL_free(pixel_buffer_b);
// Render directly from work buffer without copying back to bitmap.
// Note: We need a temporary bitmap for rendering the transformed data.
// For now, copy to original bitmap (this could be further optimized with a render target).
Mem::Copy(phdata->cachedDataAddress, source, pitch * h);
if (phdata->bitmap) {
TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h));
TPoint aDest(dstrect->x, dstrect->y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
}
TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h));
TPoint aDest(dstrect->x, dstrect->y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
return true;
}
@@ -359,65 +436,78 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec
bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE_CopyExData *copydata)
{
NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal;
if (!phdata) {
if (!phdata || !phdata->bitmap) {
return false;
}
SDL_FColor *c = &texture->color;
int w = phdata->surface->w;
int h = phdata->surface->h;
int pitch = phdata->surface->pitch;
void *source = phdata->surface->pixels;
// Fast path: No transformations needed; direct BitBlt.
if (!copydata->flip &&
copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1) &&
copydata->angle == 0 &&
c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) {
TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h));
TPoint aDest(copydata->dstrect.x, copydata->dstrect.y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
return true;
}
// Slow path: Transformations needed.
int w = phdata->cachedWidth;
int h = phdata->cachedHeight;
int pitch = phdata->cachedPitch;
void *source = phdata->cachedDataAddress;
void *dest;
if (!source) {
return false;
}
void *pixel_buffer_a = SDL_calloc(1, pitch * h);
if (!pixel_buffer_a) {
// Ensure work buffers have sufficient capacity.
TInt bufferSize = pitch * h;
if (!EnsureWorkBufferCapacity(bufferSize)) {
return false;
}
dest = pixel_buffer_a;
void *pixel_buffer_b = SDL_calloc(1, pitch * h);
if (!pixel_buffer_a) {
SDL_free(pixel_buffer_a);
return false;
}
dest = iWorkBuffer1;
bool useBuffer1 = true;
if (copydata->flip) {
ApplyFlip(dest, source, pitch, w, h, copydata->flip);
source = dest;
useBuffer1 = !useBuffer1;
}
if (copydata->scale_x != 1.f || copydata->scale_y != 1.f) {
dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a;
if (copydata->scale_x != Int2Fix(1) || copydata->scale_y != Int2Fix(1)) {
dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2;
ApplyScale(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->scale_x, copydata->scale_y);
source = dest;
useBuffer1 = !useBuffer1;
}
if (copydata->angle) {
dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a;
dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2;
ApplyRotation(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->angle);
source = dest;
useBuffer1 = !useBuffer1;
}
if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) {
dest == pixel_buffer_a ? dest = pixel_buffer_b : dest = pixel_buffer_a;
dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2;
ApplyColorMod(dest, source, pitch, w, h, texture->color);
source = dest;
useBuffer1 = !useBuffer1;
}
Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h);
SDL_free(pixel_buffer_a);
SDL_free(pixel_buffer_b);
// Render directly from work buffer without copying back to bitmap.
// Note: We need a temporary bitmap for rendering the transformed data.
// For now, copy to original bitmap (this could be further optimized with a render target).
Mem::Copy(phdata->cachedDataAddress, source, pitch * h);
if (phdata->bitmap) {
TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h));
TPoint aDest(copydata->dstrect.x, copydata->dstrect.y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
}
TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h));
TPoint aDest(copydata->dstrect.x, copydata->dstrect.y);
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
return true;
}
@@ -440,6 +530,13 @@ bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aW
return false;
}
// Cache texture properties to avoid repeated API calls.
TSize bitmapSize = aTextureData->bitmap->SizeInPixels();
aTextureData->cachedWidth = bitmapSize.iWidth;
aTextureData->cachedHeight = bitmapSize.iHeight;
aTextureData->cachedPitch = aTextureData->bitmap->ScanLineLength(aWidth, aTextureData->bitmap->DisplayMode());
aTextureData->cachedDataAddress = aTextureData->bitmap->DataAddress();
return true;
}

View File

@@ -58,7 +58,12 @@ typedef struct CFbsBitmap CFbsBitmap;
typedef struct NGAGE_TextureData
{
CFbsBitmap *bitmap;
SDL_Surface *surface;
// Cached properties to avoid repeated API calls.
int cachedWidth;
int cachedHeight;
int cachedPitch;
void *cachedDataAddress;
} NGAGE_TextureData;
@@ -89,6 +94,10 @@ bool NGAGE_Copy(SDL_Renderer *renderer, SDL_Texture *texture, SDL_Rect *srcrect,
bool NGAGE_CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, NGAGE_CopyExData *copydata);
bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height);
void NGAGE_DestroyTextureData(NGAGE_TextureData *data);
void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data);
int NGAGE_GetBitmapPitch(NGAGE_TextureData *data);
int NGAGE_GetBitmapWidth(NGAGE_TextureData *data);
int NGAGE_GetBitmapHeight(NGAGE_TextureData *data);
void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count);
void NGAGE_DrawPoints(NGAGE_Vertex *verts, const int count);
void NGAGE_FillRects(NGAGE_Vertex *verts, const int count);

View File

@@ -86,6 +86,14 @@ class CRenderer : public MDirectScreenAccess
// Screen saver.
TBool iSuspendScreenSaver;
// Work buffers for texture transformations (reusable to avoid per-frame allocations).
void *iWorkBuffer1;
void *iWorkBuffer2;
TInt iWorkBufferSize;
// Helper method to ensure work buffers have sufficient capacity.
bool EnsureWorkBufferCapacity(TInt aRequiredSize);
};
#endif // ngage_video_render_ngage_c_hpp

View File

@@ -20,8 +20,8 @@
*/
#include "SDL_internal.h"
#include <3dtypes.h>
#include "SDL_render_ops.hpp"
#include <3dtypes.h>
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color)
{
@@ -32,18 +32,22 @@ void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, S
TFixed gf = Real2Fix(color.g);
TFixed bf = Real2Fix(color.b);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
TUint16 pixel = src_pixels[y * pitch / 2 + x];
// Pre-calculate pitch in pixels to avoid repeated division.
const TInt pitchPixels = pitch >> 1;
for (int y = 0; y < height; ++y) {
// Calculate row offset once per row.
TInt rowOffset = y * pitchPixels;
for (int x = 0; x < width; ++x) {
TUint16 pixel = src_pixels[rowOffset + x];
TUint8 r = (pixel & 0xF800) >> 8;
TUint8 g = (pixel & 0x07E0) >> 3;
TUint8 b = (pixel & 0x001F) << 3;
r = FixMul(r, rf);
g = FixMul(g, gf);
b = FixMul(b, bf);
dst_pixels[y * pitch / 2 + x] = (r << 8) | (g << 3) | (b >> 3);
dst_pixels[rowOffset + x] = (r << 8) | (g << 3) | (b >> 3);
}
}
}
@@ -53,20 +57,16 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F
TUint16 *src_pixels = static_cast<TUint16 *>(source);
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int src_x = x;
int src_y = y;
if (flip & SDL_FLIP_HORIZONTAL)
{
if (flip & SDL_FLIP_HORIZONTAL) {
src_x = width - 1 - x;
}
if (flip & SDL_FLIP_VERTICAL)
{
if (flip & SDL_FLIP_VERTICAL) {
src_y = height - 1 - y;
}
@@ -83,15 +83,12 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T
TFixed cos_angle = 0;
TFixed sin_angle = 0;
if (angle != 0)
{
if (angle != 0) {
FixSinCos(angle, sin_angle, cos_angle);
}
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// Translate point to origin.
TFixed translated_x = Int2Fix(x) - center_x;
TFixed translated_y = Int2Fix(y) - center_y;
@@ -105,12 +102,9 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T
int final_y = Fix2Int(rotated_y + center_y);
// Check bounds.
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height)
{
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
dst_pixels[y * pitch / 2 + x] = src_pixels[final_y * pitch / 2 + final_x];
}
else
{
} else {
dst_pixels[y * pitch / 2 + x] = 0;
}
}
@@ -122,10 +116,8 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix
TUint16 *src_pixels = static_cast<TUint16 *>(source);
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// Translate point to origin.
TFixed translated_x = Int2Fix(x) - center_x;
TFixed translated_y = Int2Fix(y) - center_y;
@@ -139,12 +131,9 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix
int final_y = Fix2Int(scaled_y + center_y);
// Check bounds.
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height)
{
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
dst_pixels[y * pitch / 2 + x] = src_pixels[final_y * pitch / 2 + final_x];
}
else
{
} else {
dst_pixels[y * pitch / 2 + x] = 0;
}
}