[N-Gage] Add LUT color mod, cardinal rotation cache and loop unrolling

- Implement lookup tables for faster color modulation
- Cache 0°/90°/180°/270° rotations for speedup on common angles
- Add dirty rectangle tracking infrastructure
- Process 4 pixels at a time in all transform operations
This commit is contained in:
Michael Fitzmayer
2026-04-15 20:34:23 +02:00
parent e5c8523b36
commit 5bd1a65e6f
6 changed files with 319 additions and 40 deletions

View File

@@ -461,6 +461,10 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co
dst += bitmapPitch;
}
// Mark texture as dirty.
phdata->isDirty = true;
phdata->dirtyRect = *rect;
return true;
}
@@ -481,11 +485,21 @@ static bool NGAGE_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture, cons
*pixels = (void *)((Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2); // 2 bytes per pixel for EColor4K
*pitch = bitmapPitch;
// Store the lock rectangle for dirty tracking.
phdata->dirtyRect = *rect;
return true;
}
static void NGAGE_UnlockTexture(SDL_Renderer *renderer, SDL_Texture *texture)
{
NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal;
if (phdata) {
// Mark texture as dirty after unlock (assume it was modified).
phdata->isDirty = true;
}
}
static bool NGAGE_SetRenderTarget(SDL_Renderer *renderer, SDL_Texture *texture)

View File

@@ -69,6 +69,14 @@ void NGAGE_DestroyTextureData(NGAGE_TextureData *data)
if (data) {
delete data->bitmap;
data->bitmap = NULL;
// Free cardinal rotation cache.
for (int i = 0; i < 4; i++) {
if (data->cardinalRotations[i]) {
delete data->cardinalRotations[i];
data->cardinalRotations[i] = NULL;
}
}
}
}
@@ -160,7 +168,7 @@ CRenderer *CRenderer::NewL()
return self;
}
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0) {}
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1) {}
CRenderer::~CRenderer()
{
@@ -361,6 +369,94 @@ bool CRenderer::EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight)
return true;
}
void CRenderer::BuildColorModLUT(TFixed rf, TFixed gf, TFixed bf)
{
// Build lookup tables for R, G, B channels.
for (int i = 0; i < 256; i++) {
TFixed val = i << 16; // Convert to fixed-point
iColorModLUT[i] = (TUint8)SDL_min(Fix2Int(FixMul(val, rf)), 255); // R
iColorModLUT[i + 256] = (TUint8)SDL_min(Fix2Int(FixMul(val, gf)), 255); // G
iColorModLUT[i + 512] = (TUint8)SDL_min(Fix2Int(FixMul(val, bf)), 255); // B
}
// Remember the last color to avoid rebuilding unnecessarily.
iLastColorR = rf;
iLastColorG = gf;
iLastColorB = bf;
}
CFbsBitmap* CRenderer::GetCardinalRotation(NGAGE_TextureData *aTextureData, TInt aAngleIndex)
{
// Check if already cached.
if (aTextureData->cardinalRotations[aAngleIndex]) {
return aTextureData->cardinalRotations[aAngleIndex];
}
// Create rotated bitmap.
CFbsBitmap *rotated = new CFbsBitmap();
if (!rotated) {
return NULL;
}
TInt w = aTextureData->cachedWidth;
TInt h = aTextureData->cachedHeight;
TSize size(w, h);
// For 90 and 270 degree rotations, swap width/height.
if (aAngleIndex == 1 || aAngleIndex == 3) {
size = TSize(h, w);
}
TInt error = rotated->Create(size, EColor4K);
if (error != KErrNone) {
delete rotated;
return NULL;
}
// Rotate the bitmap data.
TUint16 *src = (TUint16 *)aTextureData->cachedDataAddress;
TUint16 *dst = (TUint16 *)rotated->DataAddress();
TInt srcPitch = aTextureData->cachedPitch >> 1;
TInt dstPitch = rotated->ScanLineLength(size.iWidth, rotated->DisplayMode()) >> 1;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
TUint16 pixel = src[y * srcPitch + x];
int dstX = 0;
int dstY = 0;
switch (aAngleIndex) {
case 0: // 0 degrees
dstX = x;
dstY = y;
break;
case 1: // 90 degrees
dstX = h - 1 - y;
dstY = x;
break;
case 2: // 180 degrees
dstX = w - 1 - x;
dstY = h - 1 - y;
break;
case 3: // 270 degrees
dstX = y;
dstY = w - 1 - x;
break;
default:
// Should never happen, but initialize to avoid warnings
dstX = x;
dstY = y;
break;
}
dst[dstY * dstPitch + dstX] = pixel;
}
}
aTextureData->cardinalRotations[aAngleIndex] = rotated;
return rotated;
}
#ifdef __cplusplus
extern "C" {
#endif
@@ -444,7 +540,16 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec
bool useBuffer1 = true;
if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) {
ApplyColorMod(dest, source, pitch, w, h, texture->color);
TFixed rf = Real2Fix(c->r);
TFixed gf = Real2Fix(c->g);
TFixed bf = Real2Fix(c->b);
// Build LUT if color changed.
if (rf != iLastColorR || gf != iLastColorG || bf != iLastColorB) {
BuildColorModLUT(rf, gf, bf);
}
ApplyColorMod(dest, source, pitch, w, h, texture->color, iColorModLUT);
source = dest;
useBuffer1 = !useBuffer1;
}
@@ -486,6 +591,39 @@ bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE
SDL_FColor *c = &texture->color;
// Check for cardinal rotation cache opportunity (0°, 90°, 180°, 270°).
TInt angleIndex = -1;
TFixed angle = copydata->angle;
if (!copydata->flip &&
copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1) &&
c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) {
// Convert angle to degrees and check if it's a cardinal angle.
// Angle is in fixed-point radians: 0, π/2, π, 3π/2
TFixed zero = 0;
TFixed pi_2 = Real2Fix(M_PI / 2.0);
TFixed pi = Real2Fix(M_PI);
TFixed pi3_2 = Real2Fix(3.0 * M_PI / 2.0);
TFixed pi2 = Real2Fix(2.0 * M_PI);
if (angle == zero) angleIndex = 0;
else if (SDL_abs(angle - pi_2) < 100) angleIndex = 1; // 90°
else if (SDL_abs(angle - pi) < 100) angleIndex = 2; // 180°
else if (SDL_abs(angle - pi3_2) < 100) angleIndex = 3; // 270°
else if (SDL_abs(angle - pi2) < 100) angleIndex = 0; // 360° = 0°
if (angleIndex >= 0) {
CFbsBitmap *cached = GetCardinalRotation(phdata, angleIndex);
if (cached) {
TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h));
TPoint aDest(copydata->dstrect.x, copydata->dstrect.y);
iRenderer->Gc()->BitBlt(aDest, cached, aSource);
return true;
}
}
}
// Fast path: No transformations needed; direct BitBlt.
if (!copydata->flip &&
copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1) &&
@@ -538,8 +676,17 @@ bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE
}
if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) {
TFixed rf = Real2Fix(c->r);
TFixed gf = Real2Fix(c->g);
TFixed bf = Real2Fix(c->b);
// Build LUT if color changed.
if (rf != iLastColorR || gf != iLastColorG || bf != iLastColorB) {
BuildColorModLUT(rf, gf, bf);
}
dest = useBuffer1 ? iWorkBuffer1 : iWorkBuffer2;
ApplyColorMod(dest, source, pitch, w, h, texture->color);
ApplyColorMod(dest, source, pitch, w, h, texture->color, iColorModLUT);
source = dest;
useBuffer1 = !useBuffer1;
}
@@ -585,6 +732,18 @@ bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aW
aTextureData->cachedPitch = aTextureData->bitmap->ScanLineLength(aWidth, aTextureData->bitmap->DisplayMode());
aTextureData->cachedDataAddress = aTextureData->bitmap->DataAddress();
// Initialize cardinal rotation cache to NULL.
for (int i = 0; i < 4; i++) {
aTextureData->cardinalRotations[i] = NULL;
}
// Initialize dirty tracking.
aTextureData->isDirty = true; // New textures start dirty
aTextureData->dirtyRect.x = 0;
aTextureData->dirtyRect.y = 0;
aTextureData->dirtyRect.w = aWidth;
aTextureData->dirtyRect.h = aHeight;
return true;
}

View File

@@ -65,6 +65,13 @@ typedef struct NGAGE_TextureData
int cachedPitch;
void *cachedDataAddress;
// Cardinal rotation cache (0°, 90°, 180°, 270°) - created on demand.
CFbsBitmap *cardinalRotations[4];
// Dirty tracking to avoid redundant rendering.
bool isDirty;
SDL_Rect dirtyRect;
} NGAGE_TextureData;
typedef struct NGAGE_CopyExData

View File

@@ -23,6 +23,7 @@
#define ngage_video_render_ngage_c_hpp
#include "SDL_render_ngage_c.h"
#include <3dtypes.h>
#include <NRenderer.h>
#include <e32std.h>
#include <w32std.h>
@@ -97,9 +98,17 @@ class CRenderer : public MDirectScreenAccess
TInt iTempRenderBitmapWidth;
TInt iTempRenderBitmapHeight;
// Color modulation lookup tables (pre-calculated to avoid per-pixel FixMul).
TUint8 iColorModLUT[768]; // 256 entries each for R, G, B
TFixed iLastColorR;
TFixed iLastColorG;
TFixed iLastColorB;
// Helper methods.
bool EnsureWorkBufferCapacity(TInt aRequiredSize);
bool EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight);
void BuildColorModLUT(TFixed rf, TFixed gf, TFixed bf);
CFbsBitmap *GetCardinalRotation(NGAGE_TextureData *aTextureData, TInt aAngleIndex);
};
#endif // ngage_video_render_ngage_c_hpp

View File

@@ -23,30 +23,58 @@
#include "SDL_render_ops.hpp"
#include <3dtypes.h>
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color)
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT)
{
TUint16 *src_pixels = static_cast<TUint16 *>(source);
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
TFixed rf = Real2Fix(color.r);
TFixed gf = Real2Fix(color.g);
TFixed bf = Real2Fix(color.b);
// Pre-calculate pitch in pixels to avoid repeated division.
const TInt pitchPixels = pitch >> 1;
const int totalPixels = width * height;
// Process 4 pixels at a time (loop unrolling).
int pixelIndex = 0;
for (int y = 0; y < height; ++y) {
// Calculate row offset once per row.
TInt rowOffset = y * pitchPixels;
int x = 0;
for (int x = 0; x < width; ++x) {
// Unrolled loop: process 4 pixels at once.
for (; x < width - 3; x += 4) {
// Pixel 0
TUint16 p0 = src_pixels[rowOffset + x];
TUint8 r0 = colorLUT[(p0 & 0xF800) >> 8];
TUint8 g0 = colorLUT[256 + ((p0 & 0x07E0) >> 3)];
TUint8 b0 = colorLUT[512 + ((p0 & 0x001F) << 3)];
dst_pixels[rowOffset + x] = (r0 << 8) | (g0 << 3) | (b0 >> 3);
// Pixel 1
TUint16 p1 = src_pixels[rowOffset + x + 1];
TUint8 r1 = colorLUT[(p1 & 0xF800) >> 8];
TUint8 g1 = colorLUT[256 + ((p1 & 0x07E0) >> 3)];
TUint8 b1 = colorLUT[512 + ((p1 & 0x001F) << 3)];
dst_pixels[rowOffset + x + 1] = (r1 << 8) | (g1 << 3) | (b1 >> 3);
// Pixel 2
TUint16 p2 = src_pixels[rowOffset + x + 2];
TUint8 r2 = colorLUT[(p2 & 0xF800) >> 8];
TUint8 g2 = colorLUT[256 + ((p2 & 0x07E0) >> 3)];
TUint8 b2 = colorLUT[512 + ((p2 & 0x001F) << 3)];
dst_pixels[rowOffset + x + 2] = (r2 << 8) | (g2 << 3) | (b2 >> 3);
// Pixel 3
TUint16 p3 = src_pixels[rowOffset + x + 3];
TUint8 r3 = colorLUT[(p3 & 0xF800) >> 8];
TUint8 g3 = colorLUT[256 + ((p3 & 0x07E0) >> 3)];
TUint8 b3 = colorLUT[512 + ((p3 & 0x001F) << 3)];
dst_pixels[rowOffset + x + 3] = (r3 << 8) | (g3 << 3) | (b3 >> 3);
}
// Handle remaining pixels.
for (; x < width; ++x) {
TUint16 pixel = src_pixels[rowOffset + x];
TUint8 r = (pixel & 0xF800) >> 8;
TUint8 g = (pixel & 0x07E0) >> 3;
TUint8 b = (pixel & 0x001F) << 3;
r = FixMul(r, rf);
g = FixMul(g, gf);
b = FixMul(b, bf);
TUint8 r = colorLUT[(pixel & 0xF800) >> 8];
TUint8 g = colorLUT[256 + ((pixel & 0x07E0) >> 3)];
TUint8 b = colorLUT[512 + ((pixel & 0x001F) << 3)];
dst_pixels[rowOffset + x] = (r << 8) | (g << 3) | (b >> 3);
}
}
@@ -57,20 +85,40 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F
TUint16 *src_pixels = static_cast<TUint16 *>(source);
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
// Pre-calculate pitch in pixels to avoid repeated division.
const TInt pitchPixels = pitch >> 1;
// Pre-calculate flip flags to avoid repeated bitwise operations.
const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0;
const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int src_x = x;
int src_y = y;
// Calculate destination row offset once per row.
TInt dstRowOffset = y * pitchPixels;
if (flip & SDL_FLIP_HORIZONTAL) {
src_x = width - 1 - x;
}
// Calculate source Y coordinate once per row.
int src_y = flipVertical ? (height - 1 - y) : y;
TInt srcRowOffset = src_y * pitchPixels;
if (flip & SDL_FLIP_VERTICAL) {
src_y = height - 1 - y;
}
int x = 0;
dst_pixels[y * pitch / 2 + x] = src_pixels[src_y * pitch / 2 + src_x];
// Unrolled loop: process 4 pixels at once.
for (; x < width - 3; x += 4) {
int src_x0 = flipHorizontal ? (width - 1 - x) : x;
int src_x1 = flipHorizontal ? (width - 2 - x) : (x + 1);
int src_x2 = flipHorizontal ? (width - 3 - x) : (x + 2);
int src_x3 = flipHorizontal ? (width - 4 - x) : (x + 3);
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + src_x0];
dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + src_x1];
dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + src_x2];
dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + src_x3];
}
// Handle remaining pixels.
for (; x < width; ++x) {
int src_x = flipHorizontal ? (width - 1 - x) : x;
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + src_x];
}
}
}
@@ -132,25 +180,67 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix
TUint16 *src_pixels = static_cast<TUint16 *>(source);
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
// Pre-calculate pitch in pixels to avoid repeated division.
const TInt pitchPixels = pitch >> 1;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
// Translate point to origin.
// Calculate destination row offset once per row.
TInt dstRowOffset = y * pitchPixels;
// Pre-calculate translated_y for the entire row.
TFixed translated_y = Int2Fix(y) - center_y;
TFixed scaled_y = FixDiv(translated_y, scale_y);
int final_y = Fix2Int(scaled_y + center_y);
// Check if this row is within bounds.
bool rowInBounds = (final_y >= 0 && final_y < height);
TInt srcRowOffset = final_y * pitchPixels;
int x = 0;
// Unrolled loop: process 4 pixels at once.
for (; x < width - 3; x += 4) {
// Pixel 0
TFixed translated_x0 = Int2Fix(x) - center_x;
TFixed scaled_x0 = FixDiv(translated_x0, scale_x);
int final_x0 = Fix2Int(scaled_x0 + center_x);
// Pixel 1
TFixed translated_x1 = Int2Fix(x + 1) - center_x;
TFixed scaled_x1 = FixDiv(translated_x1, scale_x);
int final_x1 = Fix2Int(scaled_x1 + center_x);
// Pixel 2
TFixed translated_x2 = Int2Fix(x + 2) - center_x;
TFixed scaled_x2 = FixDiv(translated_x2, scale_x);
int final_x2 = Fix2Int(scaled_x2 + center_x);
// Pixel 3
TFixed translated_x3 = Int2Fix(x + 3) - center_x;
TFixed scaled_x3 = FixDiv(translated_x3, scale_x);
int final_x3 = Fix2Int(scaled_x3 + center_x);
// Write all 4 pixels
dst_pixels[dstRowOffset + x] = (rowInBounds && final_x0 >= 0 && final_x0 < width) ?
src_pixels[srcRowOffset + final_x0] : 0;
dst_pixels[dstRowOffset + x + 1] = (rowInBounds && final_x1 >= 0 && final_x1 < width) ?
src_pixels[srcRowOffset + final_x1] : 0;
dst_pixels[dstRowOffset + x + 2] = (rowInBounds && final_x2 >= 0 && final_x2 < width) ?
src_pixels[srcRowOffset + final_x2] : 0;
dst_pixels[dstRowOffset + x + 3] = (rowInBounds && final_x3 >= 0 && final_x3 < width) ?
src_pixels[srcRowOffset + final_x3] : 0;
}
// Handle remaining pixels.
for (; x < width; ++x) {
TFixed translated_x = Int2Fix(x) - center_x;
TFixed translated_y = Int2Fix(y) - center_y;
// Scale point.
TFixed scaled_x = FixDiv(translated_x, scale_x);
TFixed scaled_y = FixDiv(translated_y, scale_y);
// Translate point back.
int final_x = Fix2Int(scaled_x + center_x);
int final_y = Fix2Int(scaled_y + center_y);
// Check bounds.
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
dst_pixels[y * pitch / 2 + x] = src_pixels[final_y * pitch / 2 + final_x];
if (rowInBounds && final_x >= 0 && final_x < width) {
dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + final_x];
} else {
dst_pixels[y * pitch / 2 + x] = 0;
dst_pixels[dstRowOffset + x] = 0;
}
}
}

View File

@@ -24,7 +24,7 @@
#include <3dtypes.h>
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color);
void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT);
void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_FlipMode flip);
void ApplyRotation(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed angle);
void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed scale_x, TFixed scale_y);