From 6e65c3fac4d3f0c9eb017c7d11b2ef49a960e422 Mon Sep 17 00:00:00 2001 From: Michael Fitzmayer Date: Sat, 18 Apr 2026 12:52:22 +0200 Subject: [PATCH] [N-Gage] Remove optimisations except for native texture handling prior to some rework of the rendering back-end [N-Gage] Set proper brush style to draw filled rects properly. [N-Gage] Add persistent buffers to avoid per-frame memory allocations (which are expensive) [N-Gage] Add support for SDL_TEXTURE_ACCESS_TARGET, fixes #13165 [N-Gage] Update README, add hint that the compiler does not support aggregate initializations for structs (knowing this, avoids a lot of headache during debugging) [N-Gage] Add basic fast-path optimisations for render operations. [N-Gage] Fix line drawing. --- docs/README-ngage.md | 11 +- src/render/ngage/SDL_render_ngage.c | 66 +-- src/render/ngage/SDL_render_ngage.cpp | 688 +++++++----------------- src/render/ngage/SDL_render_ngage_c.h | 26 +- src/render/ngage/SDL_render_ngage_c.hpp | 46 +- src/render/ngage/SDL_render_ops.cpp | 537 +++++++++--------- src/render/ngage/SDL_render_ops.hpp | 2 +- 7 files changed, 509 insertions(+), 867 deletions(-) diff --git a/docs/README-ngage.md b/docs/README-ngage.md index beed7af0af..eebbcabd81 100644 --- a/docs/README-ngage.md +++ b/docs/README-ngage.md @@ -33,14 +33,6 @@ software renderer has been removed. The outcome is a significantly leaner and more efficient SDL port, which we hope will breathe new life into this beloved yet obscure platform. -## To the Stubborn Legends of the DC Scene - -This port is lovingly dedicated to the ever-nostalgic Dreamcast homebrew scene -- -because if we managed to pull this off for the N-Gage (yes, the N-Gage), surely -you guys can stop clinging to SDL2 like it's a rare Shenmue prototype and finally -make the leap to SDL3. It's 2025, not 1999 -- and let's be honest, you're rocking -a state-of-the-art C23 compiler. The irony writes itself. - ## Existing Issues and Limitations - For now, the new @@ -62,3 +54,6 @@ a state-of-the-art C23 compiler. The irony writes itself. expected to be resolved in a future update. - Dependency tracking is currently non-functional. + +- The compiler doesn't support aggregate initialization for structs, so + each field must be assigned explicitly. diff --git a/src/render/ngage/SDL_render_ngage.c b/src/render/ngage/SDL_render_ngage.c index b960a84d87..b3710c39dc 100644 --- a/src/render/ngage/SDL_render_ngage.c +++ b/src/render/ngage/SDL_render_ngage.c @@ -155,7 +155,7 @@ static bool NGAGE_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SD return false; } - if (!NGAGE_CreateTextureData(data, texture->w, texture->h)) { + if (!NGAGE_CreateTextureData(data, texture->w, texture->h, texture->access)) { SDL_free(data); return false; } @@ -283,8 +283,12 @@ static bool NGAGE_QueueCopyEx(SDL_Renderer *renderer, SDL_RenderCommand *cmd, SD verts->dstrect.h = (int)dstrect->h; verts->angle = Real2Fix(angle); - verts->center.x = Real2Fix(center->x); - verts->center.y = Real2Fix(center->y); + // Convert center from destination-space to source-space. + // Center is relative to dstrect, but rotation is applied in source texture space. + float center_x_src = (center->x / dstrect->w) * srcquad->w; + float center_y_src = (center->y / dstrect->h) * srcquad->h; + verts->center.x = Real2Fix(center_x_src); + verts->center.y = Real2Fix(center_y_src); verts->scale_x = Real2Fix(scale_x); verts->scale_y = Real2Fix(scale_y); @@ -444,27 +448,24 @@ static bool NGAGE_UpdateTexture(SDL_Renderer *renderer, SDL_Texture *texture, co return false; } - void *bitmapData = NGAGE_GetBitmapDataAddress(phdata); - int bitmapPitch = NGAGE_GetBitmapPitch(phdata); - - if (!bitmapData || bitmapPitch == 0) { + Uint8 *dst = (Uint8 *)NGAGE_GetBitmapDataAddress(phdata); + if (!dst) { return false; } - Uint8 *src = (Uint8 *)pixels; - Uint8 *dst = (Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2; // 2 bytes per pixel for EColor4K + const int bytes_per_pixel = 2; + const int bitmap_pitch = texture->w * bytes_per_pixel; - size_t length = (size_t)rect->w * 2; // 2 bytes per pixel for EColor4K + const Uint8 *src = (const Uint8 *)pixels; + dst += rect->y * bitmap_pitch + rect->x * bytes_per_pixel; + + const size_t length = (size_t)rect->w * bytes_per_pixel; for (int row = 0; row < rect->h; ++row) { SDL_memcpy(dst, src, length); src += pitch; - dst += bitmapPitch; + dst += bitmap_pitch; } - // Mark texture as dirty. - phdata->isDirty = true; - phdata->dirtyRect = *rect; - return true; } @@ -476,34 +477,39 @@ static bool NGAGE_LockTexture(SDL_Renderer *renderer, SDL_Texture *texture, cons return false; } - void *bitmapData = NGAGE_GetBitmapDataAddress(phdata); - int bitmapPitch = NGAGE_GetBitmapPitch(phdata); - - if (!bitmapData || bitmapPitch == 0) { + Uint8 *data = (Uint8 *)NGAGE_GetBitmapDataAddress(phdata); + if (!data) { return false; } - *pixels = (void *)((Uint8 *)bitmapData + rect->y * bitmapPitch + rect->x * 2); // 2 bytes per pixel for EColor4K - *pitch = bitmapPitch; - - // Store the lock rectangle for dirty tracking. - phdata->dirtyRect = *rect; + const int bytes_per_pixel = 2; + const int bitmap_pitch = texture->w * bytes_per_pixel; + *pixels = (void *)(data + rect->y * bitmap_pitch + rect->x * bytes_per_pixel); + *pitch = bitmap_pitch; return true; } static void NGAGE_UnlockTexture(SDL_Renderer *renderer, SDL_Texture *texture) { - NGAGE_TextureData *phdata = (NGAGE_TextureData *)texture->internal; - - if (phdata) { - // Mark texture as dirty after unlock (assume it was modified). - phdata->isDirty = true; - } } static bool NGAGE_SetRenderTarget(SDL_Renderer *renderer, SDL_Texture *texture) { + NGAGE_RendererData *data = (NGAGE_RendererData *)renderer->internal; + + if (texture) { + NGAGE_TextureData *texturedata = (NGAGE_TextureData *)texture->internal; + if (!texturedata || !texturedata->gc) { + return SDL_SetError("Texture is not a render target"); + } + data->current_target = texture; + NGAGE_SetRenderTargetInternal(texturedata); + } else { + data->current_target = NULL; + NGAGE_SetRenderTargetInternal(NULL); + } + return true; } diff --git a/src/render/ngage/SDL_render_ngage.cpp b/src/render/ngage/SDL_render_ngage.cpp index 88fe359275..a4b9a6d0a1 100644 --- a/src/render/ngage/SDL_render_ngage.cpp +++ b/src/render/ngage/SDL_render_ngage.cpp @@ -59,57 +59,33 @@ bool NGAGE_CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, NGAGE_CopyExData return gRenderer->CopyEx(renderer, texture, copydata); } -bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height) +bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height, const int access) { - return gRenderer->CreateTextureData(data, width, height); + return gRenderer->CreateTextureData(data, width, height, access); } void NGAGE_DestroyTextureData(NGAGE_TextureData *data) { if (data) { + if (data->gc) { + delete data->gc; + data->gc = NULL; + } + if (data->device) { + delete data->device; + data->device = NULL; + } delete data->bitmap; data->bitmap = NULL; - - // Free cardinal rotation cache. - for (int i = 0; i < 4; i++) { - if (data->cardinalRotations[i]) { - delete data->cardinalRotations[i]; - data->cardinalRotations[i] = NULL; - } - } } } void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data) { - if (data) { - return data->bitmap->DataAddress(); + if (!data || !data->bitmap) { + return NULL; } - return NULL; -} - -int NGAGE_GetBitmapPitch(NGAGE_TextureData *data) -{ - if (data) { - return data->cachedPitch; - } - return 0; -} - -int NGAGE_GetBitmapWidth(NGAGE_TextureData *data) -{ - if (data) { - return data->cachedWidth; - } - return 0; -} - -int NGAGE_GetBitmapHeight(NGAGE_TextureData *data) -{ - if (data) { - return data->cachedHeight; - } - return 0; + return data->bitmap->DataAddress(); } void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count) @@ -139,7 +115,9 @@ void NGAGE_SetClipRect(const SDL_Rect *rect) void NGAGE_SetDrawColor(const Uint32 color) { - gRenderer->SetDrawColor(color); + if (gRenderer) { + gRenderer->SetDrawColor(color); + } } void NGAGE_PumpEventsInternal() @@ -152,19 +130,17 @@ void NGAGE_SuspendScreenSaverInternal(bool suspend) gRenderer->SuspendScreenSaver(suspend); } +void NGAGE_SetRenderTargetInternal(NGAGE_TextureData *target) +{ + if (gRenderer) { + gRenderer->SetRenderTarget(target); + } +} + #ifdef __cplusplus } #endif -// Pre-calculated fixed-point angle constants for cardinal rotation checks. -// These avoid repeated Real2Fix conversions in CopyEx hot path. -static const TFixed kAngleZero = 0; -static const TFixed kAnglePi_2 = Real2Fix(M_PI / 2.0); // 90 degrees -static const TFixed kAnglePi = Real2Fix(M_PI); // 180 degrees -static const TFixed kAnglePi3_2 = Real2Fix(3.0 * M_PI / 2.0); // 270 degrees -static const TFixed kAnglePi2 = Real2Fix(2.0 * M_PI); // 360 degrees -static const TFixed kAngleTolerance = 100; // Tolerance for angle comparison - CRenderer *CRenderer::NewL() { CRenderer *self = new (ELeave) CRenderer(); @@ -174,32 +150,16 @@ CRenderer *CRenderer::NewL() return self; } -CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1), iLinePointsBuffer(0), iLinePointsBufferCapacity(0), iLastDrawColor(0), iLastClearColor(0xFFFFFFFF) -{ -} +CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iCurrentRenderTarget(0), iPixelBufferA(0), iPixelBufferB(0), iPixelBufferSize(0), iPointsBuffer(0), iPointsBufferSize(0) {} CRenderer::~CRenderer() { delete iRenderer; iRenderer = 0; - // Free work buffers. - SDL_free(iWorkBuffer1); - SDL_free(iWorkBuffer2); - iWorkBuffer1 = 0; - iWorkBuffer2 = 0; - iWorkBufferSize = 0; - - // Free temp render bitmap. - delete iTempRenderBitmap; - iTempRenderBitmap = 0; - iTempRenderBitmapWidth = 0; - iTempRenderBitmapHeight = 0; - - // Free line points buffer. - delete[] iLinePointsBuffer; - iLinePointsBuffer = 0; - iLinePointsBufferCapacity = 0; + SDL_free(iPixelBufferA); + SDL_free(iPixelBufferB); + delete[] iPointsBuffer; } void CRenderer::ConstructL() @@ -312,188 +272,13 @@ void CRenderer::AbortNow(RDirectScreenAccess::TTerminationReasons aReason) void CRenderer::Clear(TUint32 iColor) { - if (iRenderer && iRenderer->Gc()) { - // Skip redundant SetBrushColor if color hasn't changed. - if (iColor != iLastClearColor) { - iRenderer->Gc()->SetBrushColor(iColor); - iLastClearColor = iColor; - } - iRenderer->Gc()->Clear(); + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + gc->SetBrushColor(iColor); + gc->Clear(); } } -bool CRenderer::EnsureWorkBufferCapacity(TInt aRequiredSize) -{ - if (aRequiredSize <= iWorkBufferSize) { - return true; - } - - // Free old buffers. - SDL_free(iWorkBuffer1); - SDL_free(iWorkBuffer2); - - // Allocate new buffers. - iWorkBuffer1 = SDL_calloc(1, aRequiredSize); - if (!iWorkBuffer1) { - iWorkBuffer2 = 0; - iWorkBufferSize = 0; - return false; - } - - iWorkBuffer2 = SDL_calloc(1, aRequiredSize); - if (!iWorkBuffer2) { - SDL_free(iWorkBuffer1); - iWorkBuffer1 = 0; - iWorkBufferSize = 0; - return false; - } - - iWorkBufferSize = aRequiredSize; - return true; -} - -bool CRenderer::EnsureLinePointsCapacity(TInt aRequiredCount) -{ - if (aRequiredCount <= iLinePointsBufferCapacity) { - return true; - } - - // Free old buffer. - delete[] iLinePointsBuffer; - - // Allocate new buffer. - iLinePointsBuffer = new TPoint[aRequiredCount]; - if (!iLinePointsBuffer) { - iLinePointsBufferCapacity = 0; - return false; - } - - iLinePointsBufferCapacity = aRequiredCount; - return true; -} - -bool CRenderer::EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight) -{ - if (iTempRenderBitmap && - iTempRenderBitmapWidth >= aWidth && - iTempRenderBitmapHeight >= aHeight) { - return true; - } - - // Delete old bitmap. - delete iTempRenderBitmap; - iTempRenderBitmap = 0; - - // Create new bitmap. - iTempRenderBitmap = new CFbsBitmap(); - if (!iTempRenderBitmap) { - iTempRenderBitmapWidth = 0; - iTempRenderBitmapHeight = 0; - return false; - } - - TInt error = iTempRenderBitmap->Create(TSize(aWidth, aHeight), EColor4K); - if (error != KErrNone) { - delete iTempRenderBitmap; - iTempRenderBitmap = 0; - iTempRenderBitmapWidth = 0; - iTempRenderBitmapHeight = 0; - return false; - } - - iTempRenderBitmapWidth = aWidth; - iTempRenderBitmapHeight = aHeight; - return true; -} - -void CRenderer::BuildColorModLUT(TFixed rf, TFixed gf, TFixed bf) -{ - // Build lookup tables for R, G, B channels. - for (int i = 0; i < 256; i++) { - TFixed val = i << 16; // Convert to fixed-point - iColorModLUT[i] = (TUint8)SDL_min(Fix2Int(FixMul(val, rf)), 255); // R - iColorModLUT[i + 256] = (TUint8)SDL_min(Fix2Int(FixMul(val, gf)), 255); // G - iColorModLUT[i + 512] = (TUint8)SDL_min(Fix2Int(FixMul(val, bf)), 255); // B - } - - // Remember the last color to avoid rebuilding unnecessarily. - iLastColorR = rf; - iLastColorG = gf; - iLastColorB = bf; -} - -CFbsBitmap *CRenderer::GetCardinalRotation(NGAGE_TextureData *aTextureData, TInt aAngleIndex) -{ - // Check if already cached. - if (aTextureData->cardinalRotations[aAngleIndex]) { - return aTextureData->cardinalRotations[aAngleIndex]; - } - - // Create rotated bitmap. - CFbsBitmap *rotated = new CFbsBitmap(); - if (!rotated) { - return NULL; - } - - TInt w = aTextureData->cachedWidth; - TInt h = aTextureData->cachedHeight; - TSize size(w, h); - - // For 90 and 270 degree rotations, swap width/height. - if (aAngleIndex == 1 || aAngleIndex == 3) { - size = TSize(h, w); - } - - TInt error = rotated->Create(size, EColor4K); - if (error != KErrNone) { - delete rotated; - return NULL; - } - - // Rotate the bitmap data. - TUint16 *src = (TUint16 *)aTextureData->cachedDataAddress; - TUint16 *dst = (TUint16 *)rotated->DataAddress(); - TInt srcPitch = aTextureData->cachedPitch >> 1; - TInt dstPitch = rotated->ScanLineLength(size.iWidth, rotated->DisplayMode()) >> 1; - - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - TUint16 pixel = src[y * srcPitch + x]; - int dstX = 0; - int dstY = 0; - - switch (aAngleIndex) { - case 0: // 0 degrees - dstX = x; - dstY = y; - break; - case 1: // 90 degrees - dstX = h - 1 - y; - dstY = x; - break; - case 2: // 180 degrees - dstX = w - 1 - x; - dstY = h - 1 - y; - break; - case 3: // 270 degrees - dstX = y; - dstY = w - 1 - x; - break; - default: - // Should never happen, but initialize to avoid warnings - dstX = x; - dstY = y; - break; - } - - dst[dstY * dstPitch + dstX] = pixel; - } - } - - aTextureData->cardinalRotations[aAngleIndex] = rotated; - return rotated; -} - #ifdef __cplusplus extern "C" { #endif @@ -508,9 +293,13 @@ Uint32 NGAGE_ConvertColor(float r, float g, float b, float a, float color_scale) TFixed bf = Real2Fix(b); TFixed af = Real2Fix(a); - rf = SDL_clamp(FixMul(rf, scalef), 0, ff); - gf = SDL_clamp(FixMul(gf, scalef), 0, ff); - bf = SDL_clamp(FixMul(bf, scalef), 0, ff); + rf = FixMul(rf, scalef); + gf = FixMul(gf, scalef); + bf = FixMul(bf, scalef); + + rf = SDL_clamp(rf, 0, ff); + gf = SDL_clamp(gf, 0, ff); + bf = SDL_clamp(bf, 0, ff); af = SDL_clamp(af, 0, ff); rf = FixMul(rf, ff) >> 16; @@ -537,81 +326,70 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec } SDL_FColor *c = &texture->color; - - // Fast path 1: No transformations needed; direct BitBlt. - if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) { - // Only check render scale if color mod passes. - float sx; - float sy; - SDL_GetRenderScale(renderer, &sx, &sy); - if (sx == 1.f && sy == 1.f) { - TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); - TPoint aDest(dstrect->x, dstrect->y); - iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); - return true; - } - } - - // Slow path: Transformations needed. - float sx; - float sy; - SDL_GetRenderScale(renderer, &sx, &sy); - int w = phdata->cachedWidth; - int h = phdata->cachedHeight; - int pitch = phdata->cachedPitch; - void *source = phdata->cachedDataAddress; + int w = texture->w; + int h = texture->h; + const int bytes_per_pixel = 2; + int pitch = w * bytes_per_pixel; + void *source = phdata->bitmap->DataAddress(); void *dest; if (!source) { return false; } - // Ensure work buffers have sufficient capacity. - TInt bufferSize = pitch * h; - if (!EnsureWorkBufferCapacity(bufferSize)) { - return false; + TInt required_size = pitch * h; + if (required_size > iPixelBufferSize) { + void *new_buffer_a = SDL_realloc(iPixelBufferA, required_size); + if (!new_buffer_a) { + return false; + } + iPixelBufferA = new_buffer_a; + + void *new_buffer_b = SDL_realloc(iPixelBufferB, required_size); + if (!new_buffer_b) { + return false; + } + iPixelBufferB = new_buffer_b; + + iPixelBufferSize = required_size; } - dest = iWorkBuffer1; + dest = iPixelBufferA; if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) { - TFixed rf = Real2Fix(c->r); - TFixed gf = Real2Fix(c->g); - TFixed bf = Real2Fix(c->b); + ApplyColorMod(dest, source, pitch, w, h, texture->color); - // Build LUT if color changed. - if (rf != iLastColorR || gf != iLastColorG || bf != iLastColorB) { - BuildColorModLUT(rf, gf, bf); - } - - ApplyColorMod(dest, source, pitch, w, h, texture->color, iColorModLUT); source = dest; - dest = (dest == iWorkBuffer1) ? iWorkBuffer2 : iWorkBuffer1; } + float sx; + float sy; + SDL_GetRenderScale(renderer, &sx, &sy); + if (sx != 1.f || sy != 1.f) { TFixed scale_x = Real2Fix(sx); TFixed scale_y = Real2Fix(sy); TFixed center_x = Int2Fix(w / 2); TFixed center_y = Int2Fix(h / 2); + dest == iPixelBufferA ? dest = iPixelBufferB : dest = iPixelBufferA; + ApplyScale(dest, source, pitch, w, h, center_x, center_y, scale_x, scale_y); + source = dest; } - // Use temp bitmap to avoid destroying source texture. - if (!EnsureTempBitmapCapacity(w, h)) { - return false; + Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h); + + if (phdata->bitmap) { + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); + TPoint aDest(dstrect->x, dstrect->y); + gc->BitBlt(aDest, phdata->bitmap, aSource); + } } - // Copy transformed data to temp bitmap. - Mem::Copy(iTempRenderBitmap->DataAddress(), source, pitch * h); - - // Render from temp bitmap, preserving original texture. - TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h)); - TPoint aDest(dstrect->x, dstrect->y); - iRenderer->Gc()->BitBlt(aDest, iTempRenderBitmap, aSource); - return true; } @@ -623,118 +401,74 @@ bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE } SDL_FColor *c = &texture->color; - - // Pre-calculate common checks. - const bool isNoFlip = (!copydata->flip); - const bool isNoRotation = (copydata->angle == 0); - const bool isNoColorMod = (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f); - const bool isIdentityScale = (copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1)); - - // Fast path 1: No transformations needed; direct BitBlt. - if (isNoFlip && isNoRotation && isNoColorMod && isIdentityScale) { - TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); - TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); - iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource); - return true; - } - - // Fast path 2: Check for cardinal rotation cache opportunity (0°, 90°, 180°, 270°). - if (isNoFlip && isIdentityScale && isNoColorMod && !isNoRotation) { - TFixed angle = copydata->angle; - TInt angleIndex = -1; - - // Check cardinal angles with tolerance - optimized for early exit. - if (SDL_abs(angle - kAngleZero) < kAngleTolerance) { - angleIndex = 0; // 0° - } else if (SDL_abs(angle - kAnglePi_2) < kAngleTolerance) { - angleIndex = 1; // 90° - } else if (SDL_abs(angle - kAnglePi) < kAngleTolerance) { - angleIndex = 2; // 180° - } else if (SDL_abs(angle - kAnglePi3_2) < kAngleTolerance) { - angleIndex = 3; // 270° - } else if (SDL_abs(angle - kAnglePi2) < kAngleTolerance) { - angleIndex = 0; // 360° = 0° - } - - if (angleIndex >= 0) { - CFbsBitmap *cached = GetCardinalRotation(phdata, angleIndex); - if (cached) { - TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); - TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); - iRenderer->Gc()->BitBlt(aDest, cached, aSource); - return true; - } - } - } - - // Slow path: Transformations needed. - int w = phdata->cachedWidth; - int h = phdata->cachedHeight; - int pitch = phdata->cachedPitch; - void *source = phdata->cachedDataAddress; + int w = texture->w; + int h = texture->h; + const int bytes_per_pixel = 2; + int pitch = w * bytes_per_pixel; + void *source = phdata->bitmap->DataAddress(); void *dest; if (!source) { return false; } - // Ensure work buffers have sufficient capacity. - TInt bufferSize = pitch * h; - if (!EnsureWorkBufferCapacity(bufferSize)) { - return false; + TInt required_size = pitch * h; + if (required_size > iPixelBufferSize) { + void *new_buffer_a = SDL_realloc(iPixelBufferA, required_size); + if (!new_buffer_a) { + return false; + } + iPixelBufferA = new_buffer_a; + + void *new_buffer_b = SDL_realloc(iPixelBufferB, required_size); + if (!new_buffer_b) { + return false; + } + iPixelBufferB = new_buffer_b; + + iPixelBufferSize = required_size; } - dest = iWorkBuffer1; + dest = iPixelBufferA; if (copydata->flip) { ApplyFlip(dest, source, pitch, w, h, copydata->flip); source = dest; - dest = (dest == iWorkBuffer1) ? iWorkBuffer2 : iWorkBuffer1; } - if (!isIdentityScale) { + if (copydata->scale_x != 1.f || copydata->scale_y != 1.f) { + dest == iPixelBufferA ? dest = iPixelBufferB : dest = iPixelBufferA; ApplyScale(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->scale_x, copydata->scale_y); source = dest; - dest = (dest == iWorkBuffer1) ? iWorkBuffer2 : iWorkBuffer1; } if (copydata->angle) { + dest == iPixelBufferA ? dest = iPixelBufferB : dest = iPixelBufferA; ApplyRotation(dest, source, pitch, w, h, copydata->center.x, copydata->center.y, copydata->angle); source = dest; - dest = (dest == iWorkBuffer1) ? iWorkBuffer2 : iWorkBuffer1; } - if (!isNoColorMod) { - TFixed rf = Real2Fix(c->r); - TFixed gf = Real2Fix(c->g); - TFixed bf = Real2Fix(c->b); - - // Build LUT if color changed. - if (rf != iLastColorR || gf != iLastColorG || bf != iLastColorB) { - BuildColorModLUT(rf, gf, bf); - } - - ApplyColorMod(dest, source, pitch, w, h, texture->color, iColorModLUT); + if (c->a != 1.f || c->r != 1.f || c->g != 1.f || c->b != 1.f) { + dest == iPixelBufferA ? dest = iPixelBufferB : dest = iPixelBufferA; + ApplyColorMod(dest, source, pitch, w, h, texture->color); source = dest; } - // Use temp bitmap to avoid destroying source texture. - if (!EnsureTempBitmapCapacity(w, h)) { - return false; + Mem::Copy(phdata->bitmap->DataAddress(), source, pitch * h); + + if (phdata->bitmap) { + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); + TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); + gc->BitBlt(aDest, phdata->bitmap, aSource); + } } - // Copy transformed data to temp bitmap. - Mem::Copy(iTempRenderBitmap->DataAddress(), source, pitch * h); - - // Render from temp bitmap, preserving original texture. - TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h)); - TPoint aDest(copydata->dstrect.x, copydata->dstrect.y); - iRenderer->Gc()->BitBlt(aDest, iTempRenderBitmap, aSource); - return true; } -bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aWidth, const TInt aHeight) +bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aWidth, const TInt aHeight, const TInt aAccess) { if (!aTextureData) { return false; @@ -752,101 +486,88 @@ bool CRenderer::CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aW return false; } - // Cache texture properties to avoid repeated API calls. - TSize bitmapSize = aTextureData->bitmap->SizeInPixels(); - aTextureData->cachedWidth = bitmapSize.iWidth; - aTextureData->cachedHeight = bitmapSize.iHeight; - aTextureData->cachedPitch = aTextureData->bitmap->ScanLineLength(aWidth, aTextureData->bitmap->DisplayMode()); - aTextureData->cachedDataAddress = aTextureData->bitmap->DataAddress(); + if (aAccess == SDL_TEXTUREACCESS_TARGET) { + TRAPD(err1, aTextureData->device = CFbsBitmapDevice::NewL(aTextureData->bitmap)); + if (err1 != KErrNone || !aTextureData->device) { + delete aTextureData->bitmap; + aTextureData->bitmap = NULL; + return false; + } - // Initialize cardinal rotation cache to NULL. - for (int i = 0; i < 4; i++) { - aTextureData->cardinalRotations[i] = NULL; + TRAPD(err2, aTextureData->gc = CFbsBitGc::NewL()); + if (err2 != KErrNone || !aTextureData->gc) { + delete aTextureData->device; + aTextureData->device = NULL; + delete aTextureData->bitmap; + aTextureData->bitmap = NULL; + return false; + } + + aTextureData->gc->Activate(aTextureData->device); + } else { + aTextureData->gc = NULL; + aTextureData->device = NULL; } - // Initialize dirty tracking. - aTextureData->isDirty = true; // New textures start dirty. - aTextureData->dirtyRect.x = 0; - aTextureData->dirtyRect.y = 0; - aTextureData->dirtyRect.w = aWidth; - aTextureData->dirtyRect.h = aHeight; - return true; } void CRenderer::DrawLines(NGAGE_Vertex *aVerts, const TInt aCount) { - if (iRenderer && iRenderer->Gc()) { - // Ensure reusable buffer has sufficient capacity. - if (!EnsureLinePointsCapacity(aCount)) { - return; + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + gc->SetPenStyle(CGraphicsContext::ESolidPen); + + // Draw lines as pairs of points (start, end) + for (TInt i = 0; i < aCount - 1; i += 2) { + TPoint start(aVerts[i].x, aVerts[i].y); + TPoint end(aVerts[i + 1].x, aVerts[i + 1].y); + + TRgb color = TRgb(aVerts[i].color.r, aVerts[i].color.g, aVerts[i].color.b); + + gc->SetPenColor(color); + gc->DrawLine(start, end); } - - // Fill points from vertex data. - for (TInt i = 0; i < aCount; i++) { - iLinePointsBuffer[i] = TPoint(aVerts[i].x, aVerts[i].y); - } - - // Pack color once - all vertices use the same color in polyline. - Uint8 ca = aVerts->color.a; - Uint8 cr = aVerts->color.r; - Uint8 cg = aVerts->color.g; - Uint8 cb = aVerts->color.b; - TUint32 aColor = (ca << 24) | (cb << 16) | (cg << 8) | cr; - - iRenderer->Gc()->SetPenColor(aColor); - iRenderer->Gc()->DrawPolyLineNoEndPoint(iLinePointsBuffer, aCount); } } void CRenderer::DrawPoints(NGAGE_Vertex *aVerts, const TInt aCount) { - if (iRenderer && iRenderer->Gc()) { - // Batch points by color to minimize SetPenColor calls. - TUint32 currentColor = 0; - bool colorSet = false; - + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { for (TInt i = 0; i < aCount; i++, aVerts++) { - TUint32 aColor = (TUint32(aVerts->color.a) << 24) | (TUint32(aVerts->color.b) << 16) | - (TUint32(aVerts->color.g) << 8) | TUint32(aVerts->color.r); + TUint32 aColor = (((TUint8)aVerts->color.a << 24) | + ((TUint8)aVerts->color.b << 16) | + ((TUint8)aVerts->color.g << 8) | + (TUint8)aVerts->color.r); - // Only set pen color when it changes. - if (!colorSet || aColor != currentColor) { - iRenderer->Gc()->SetPenColor(aColor); - currentColor = aColor; - colorSet = true; - } - - iRenderer->Gc()->Plot(TPoint(aVerts->x, aVerts->y)); + gc->SetPenColor(aColor); + gc->Plot(TPoint(aVerts->x, aVerts->y)); } } } void CRenderer::FillRects(NGAGE_Vertex *aVerts, const TInt aCount) { - if (iRenderer && iRenderer->Gc()) { - // Batch rectangles by color to minimize SetPenColor/SetBrushColor calls. - TUint32 currentColor = 0; - bool colorSet = false; - - // Process rectangles (each rect uses 2 vertices: position and size). - for (TInt i = 0; i < aCount; i += 2) { + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + for (TInt i = 0; i < aCount; i++, aVerts++) { TPoint pos(aVerts[i].x, aVerts[i].y); - TSize size(aVerts[i + 1].x, aVerts[i + 1].y); + TSize size( + aVerts[i + 1].x, + aVerts[i + 1].y); TRect rect(pos, size); - TUint32 aColor = (TUint32(aVerts[i].color.a) << 24) | (TUint32(aVerts[i].color.b) << 16) | - (TUint32(aVerts[i].color.g) << 8) | TUint32(aVerts[i].color.r); + TUint32 aColor = (((TUint8)aVerts->color.a << 24) | + ((TUint8)aVerts->color.b << 16) | + ((TUint8)aVerts->color.g << 8) | + (TUint8)aVerts->color.r); - // Only set colors when they change. - if (!colorSet || aColor != currentColor) { - iRenderer->Gc()->SetPenColor(aColor); - iRenderer->Gc()->SetBrushColor(aColor); - currentColor = aColor; - colorSet = true; - } - - iRenderer->Gc()->DrawRect(rect); + gc->SetPenColor(aColor); + gc->SetBrushColor(aColor); + gc->SetBrushStyle(CGraphicsContext::ESolidBrush); + gc->SetPenStyle(CGraphicsContext::ENullPen); + gc->DrawRect(rect); } } } @@ -862,65 +583,64 @@ void CRenderer::Flip() return; } + iRenderer->Gc()->UseFont(iFont); + if (iShowFPS && iRenderer->Gc()) { UpdateFPS(); - iRenderer->Gc()->UseFont(iFont); - TBuf<64> info; iRenderer->Gc()->SetPenStyle(CGraphicsContext::ESolidPen); - iRenderer->Gc()->SetBrushStyle(CGraphicsContext::ESolidBrush); - iRenderer->Gc()->SetBrushColor(KRgbBlack); + iRenderer->Gc()->SetBrushStyle(CGraphicsContext::ENullBrush); iRenderer->Gc()->SetPenColor(KRgbCyan); - // Draw FPS background and text. TRect aTextRect(TPoint(3, 203 - iFont->HeightInPixels()), TSize(45, iFont->HeightInPixels() + 2)); + iRenderer->Gc()->SetBrushStyle(CGraphicsContext::ESolidBrush); + iRenderer->Gc()->SetBrushColor(KRgbBlack); iRenderer->Gc()->DrawRect(aTextRect); + // Draw messages. info.Format(_L("FPS: %d"), iFPS); iRenderer->Gc()->DrawText(info, TPoint(5, 203)); - - iRenderer->Gc()->DiscardFont(); + } else { + // This is a workaround that helps regulating the FPS. + iRenderer->Gc()->DrawText(_L(""), TPoint(0, 0)); } - + iRenderer->Gc()->DiscardFont(); iRenderer->Flip(iDirectScreen); - // Keep the backlight on when screen saver is suspended. + // Keep the backlight on. if (iSuspendScreenSaver) { User::ResetInactivityTime(); } - - // Yield to other threads and active objects briefly. + // Suspend the current thread for a short while. + // Give some time to other threads and active objects. User::After(0); } void CRenderer::SetDrawColor(TUint32 iColor) { - if (iRenderer && iRenderer->Gc()) { - // Skip redundant calls if color hasn't changed. - if (iColor == iLastDrawColor) { - return; - } - - iRenderer->Gc()->SetPenColor(iColor); - iRenderer->Gc()->SetBrushColor(iColor); - iRenderer->Gc()->SetBrushStyle(CGraphicsContext::ESolidBrush); + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { + gc->SetPenColor(iColor); + gc->SetBrushColor(iColor); + gc->SetBrushStyle(CGraphicsContext::ESolidBrush); + } + if (iRenderer) { TRAPD(err, iRenderer->SetCurrentColor(iColor)); if (err != KErrNone) { return; } - - iLastDrawColor = iColor; } } void CRenderer::SetClipRect(TInt aX, TInt aY, TInt aWidth, TInt aHeight) { - if (iRenderer && iRenderer->Gc()) { + CFbsBitGc *gc = GetCurrentGc(); + if (gc) { TRect viewportRect(aX, aY, aX + aWidth, aY + aHeight); - iRenderer->Gc()->SetClippingRect(viewportRect); + gc->SetClippingRect(viewportRect); } } @@ -928,17 +648,10 @@ void CRenderer::UpdateFPS() { static TTime lastTime; static TInt frameCount = 0; - static TBool initialized = EFalse; TTime currentTime; - const TUint KOneSecond = 1000000; // 1s in microseconds. + const TUint KOneSecond = 1000000; // 1s in ms. currentTime.HomeTime(); - - if (!initialized) { - lastTime = currentTime; - initialized = ETrue; - } - ++frameCount; TTimeIntervalMicroSeconds timeDiff = currentTime.MicroSecondsFrom(lastTime); @@ -958,6 +671,19 @@ void CRenderer::SuspendScreenSaver(TBool aSuspend) iSuspendScreenSaver = aSuspend; } +void CRenderer::SetRenderTarget(NGAGE_TextureData *aTarget) +{ + iCurrentRenderTarget = aTarget; +} + +CFbsBitGc *CRenderer::GetCurrentGc() +{ + if (iCurrentRenderTarget && iCurrentRenderTarget->gc) { + return iCurrentRenderTarget->gc; + } + return iRenderer ? iRenderer->Gc() : NULL; +} + static SDL_Scancode ConvertScancode(int key) { SDL_Keycode keycode; @@ -1058,8 +784,8 @@ void CRenderer::HandleEvent(const TWsEvent &aWsEvent) case EEventKeyUp: /* Key events */ timestamp = SDL_GetPerformanceCounter(); SDL_SendKeyboardKey(timestamp, 1, aWsEvent.Key()->iCode, ConvertScancode(aWsEvent.Key()->iScanCode), false); - break; + break; case EEventFocusGained: DisableKeyBlocking(); if (!iDirectScreen->IsActive()) { diff --git a/src/render/ngage/SDL_render_ngage_c.h b/src/render/ngage/SDL_render_ngage_c.h index fa87a34f30..20f1af17cf 100644 --- a/src/render/ngage/SDL_render_ngage_c.h +++ b/src/render/ngage/SDL_render_ngage_c.h @@ -34,6 +34,7 @@ extern "C" { typedef struct NGAGE_RendererData { SDL_Rect *viewport; + SDL_Texture *current_target; } NGAGE_RendererData; @@ -54,23 +55,14 @@ typedef struct NGAGE_Vertex } NGAGE_Vertex; typedef struct CFbsBitmap CFbsBitmap; +typedef struct CFbsBitGc CFbsBitGc; +typedef struct CFbsDevice CFbsDevice; typedef struct NGAGE_TextureData { CFbsBitmap *bitmap; - - // Cached properties to avoid repeated API calls. - int cachedWidth; - int cachedHeight; - int cachedPitch; - void *cachedDataAddress; - - // Cardinal rotation cache (0°, 90°, 180°, 270°) - created on demand. - CFbsBitmap *cardinalRotations[4]; - - // Dirty tracking to avoid redundant rendering. - bool isDirty; - SDL_Rect dirtyRect; + CFbsBitGc *gc; + CFbsDevice *device; } NGAGE_TextureData; @@ -99,12 +91,9 @@ void NGAGE_Clear(const Uint32 color); Uint32 NGAGE_ConvertColor(float r, float g, float b, float a, float color_scale); bool NGAGE_Copy(SDL_Renderer *renderer, SDL_Texture *texture, SDL_Rect *srcrect, SDL_Rect *dstrect); bool NGAGE_CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, NGAGE_CopyExData *copydata); -bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height); +bool NGAGE_CreateTextureData(NGAGE_TextureData *data, const int width, const int height, const int access); void NGAGE_DestroyTextureData(NGAGE_TextureData *data); -void *NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data); -int NGAGE_GetBitmapPitch(NGAGE_TextureData *data); -int NGAGE_GetBitmapWidth(NGAGE_TextureData *data); -int NGAGE_GetBitmapHeight(NGAGE_TextureData *data); +void* NGAGE_GetBitmapDataAddress(NGAGE_TextureData *data); void NGAGE_DrawLines(NGAGE_Vertex *verts, const int count); void NGAGE_DrawPoints(NGAGE_Vertex *verts, const int count); void NGAGE_FillRects(NGAGE_Vertex *verts, const int count); @@ -113,6 +102,7 @@ void NGAGE_SetClipRect(const SDL_Rect *rect); void NGAGE_SetDrawColor(const Uint32 color); void NGAGE_PumpEventsInternal(void); void NGAGE_SuspendScreenSaverInternal(bool suspend); +void NGAGE_SetRenderTargetInternal(NGAGE_TextureData *target); #ifdef __cplusplus } diff --git a/src/render/ngage/SDL_render_ngage_c.hpp b/src/render/ngage/SDL_render_ngage_c.hpp index 7fbfc88799..b7776ec589 100644 --- a/src/render/ngage/SDL_render_ngage_c.hpp +++ b/src/render/ngage/SDL_render_ngage_c.hpp @@ -23,7 +23,6 @@ #define ngage_video_render_ngage_c_hpp #include "SDL_render_ngage_c.h" -#include <3dtypes.h> #include #include #include @@ -38,7 +37,7 @@ class CRenderer : public MDirectScreenAccess void Clear(TUint32 iColor); bool Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rect *srcrect, const SDL_Rect *dstrect); bool CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE_CopyExData *copydata); - bool CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aWidth, const TInt aHeight); + bool CreateTextureData(NGAGE_TextureData *aTextureData, const TInt aWidth, const TInt aHeight, const TInt aAccess); void DrawLines(NGAGE_Vertex *aVerts, const TInt aCount); void DrawPoints(NGAGE_Vertex *aVerts, const TInt aCount); void FillRects(NGAGE_Vertex *aVerts, const TInt aCount); @@ -48,6 +47,10 @@ class CRenderer : public MDirectScreenAccess void UpdateFPS(); void SuspendScreenSaver(TBool aSuspend); + // Render target management. + void SetRenderTarget(NGAGE_TextureData *aTarget); + CFbsBitGc* GetCurrentGc(); + // Event handling. void DisableKeyBlocking(); void HandleEvent(const TWsEvent &aWsEvent); @@ -88,38 +91,15 @@ class CRenderer : public MDirectScreenAccess // Screen saver. TBool iSuspendScreenSaver; - // Work buffers for texture transformations (reusable to avoid per-frame allocations). - void *iWorkBuffer1; - void *iWorkBuffer2; - TInt iWorkBufferSize; + // Render target. + NGAGE_TextureData *iCurrentRenderTarget; - // Temporary render bitmap to avoid destroying source textures. - CFbsBitmap *iTempRenderBitmap; - TInt iTempRenderBitmapWidth; - TInt iTempRenderBitmapHeight; - - // Color modulation lookup tables (pre-calculated to avoid per-pixel FixMul). - TUint8 iColorModLUT[768]; // 256 entries each for R, G, B - TFixed iLastColorR; - TFixed iLastColorG; - TFixed iLastColorB; - - // Reusable line points buffer to avoid per-frame allocations in DrawLines. - TPoint *iLinePointsBuffer; - TInt iLinePointsBufferCapacity; - - // Cached draw color to avoid redundant SetPenColor/SetBrushColor calls. - TUint32 iLastDrawColor; - - // Cached clear color to avoid redundant SetBrushColor calls. - TUint32 iLastClearColor; - - // Helper methods. - bool EnsureWorkBufferCapacity(TInt aRequiredSize); - bool EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight); - bool EnsureLinePointsCapacity(TInt aRequiredCount); - void BuildColorModLUT(TFixed rf, TFixed gf, TFixed bf); - CFbsBitmap *GetCardinalRotation(NGAGE_TextureData *aTextureData, TInt aAngleIndex); + // Persistent buffers to avoid per-frame allocations. + void *iPixelBufferA; + void *iPixelBufferB; + TInt iPixelBufferSize; + TPoint *iPointsBuffer; + TInt iPointsBufferSize; }; #endif // ngage_video_render_ngage_c_hpp diff --git a/src/render/ngage/SDL_render_ops.cpp b/src/render/ngage/SDL_render_ops.cpp index 6bb925749c..3b998d8132 100644 --- a/src/render/ngage/SDL_render_ops.cpp +++ b/src/render/ngage/SDL_render_ops.cpp @@ -23,65 +23,43 @@ #include "SDL_render_ops.hpp" #include <3dtypes.h> -void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT) +void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color) { TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - // Pre-calculate pitch in pixels to avoid repeated division. - const TInt pitchPixels = pitch >> 1; - - // Pre-calculate LUT offsets to reduce addressing calculations. - const TUint8 *lut_r = colorLUT; - const TUint8 *lut_g = colorLUT + 256; - const TUint8 *lut_b = colorLUT + 512; - - // Process 4 pixels at a time. - for (int y = 0; y < height; ++y) { - const TInt rowOffset = y * pitchPixels; - int x = 0; - - // Process 4 pixels at once with optimized bit manipulation. - for (; x < width - 3; x += 4) { - // Load 4 pixels at once. - TUint16 p0 = src_pixels[rowOffset + x]; - TUint16 p1 = src_pixels[rowOffset + x + 1]; - TUint16 p2 = src_pixels[rowOffset + x + 2]; - TUint16 p3 = src_pixels[rowOffset + x + 3]; - - // Pixel 0: Extract and modulate RGB4444 components. - // RGB4444 format: RRRR GGGG BBBB xxxx - TUint8 r0 = lut_r[(p0 >> 8) & 0xF0]; // Extract R (bits 12-15), shift to byte position - TUint8 g0 = lut_g[(p0 >> 3) & 0xF8]; // Extract G (bits 6-9), scale to 8-bit - TUint8 b0 = lut_b[(p0 << 3) & 0xF8]; // Extract B (bits 0-3), scale to 8-bit - dst_pixels[rowOffset + x] = ((r0 & 0xF0) << 8) | ((g0 & 0xF0) << 3) | ((b0 & 0xF0) >> 1); - - // Pixel 1 - TUint8 r1 = lut_r[(p1 >> 8) & 0xF0]; - TUint8 g1 = lut_g[(p1 >> 3) & 0xF8]; - TUint8 b1 = lut_b[(p1 << 3) & 0xF8]; - dst_pixels[rowOffset + x + 1] = ((r1 & 0xF0) << 8) | ((g1 & 0xF0) << 3) | ((b1 & 0xF0) >> 1); - - // Pixel 2 - TUint8 r2 = lut_r[(p2 >> 8) & 0xF0]; - TUint8 g2 = lut_g[(p2 >> 3) & 0xF8]; - TUint8 b2 = lut_b[(p2 << 3) & 0xF8]; - dst_pixels[rowOffset + x + 2] = ((r2 & 0xF0) << 8) | ((g2 & 0xF0) << 3) | ((b2 & 0xF0) >> 1); - - // Pixel 3 - TUint8 r3 = lut_r[(p3 >> 8) & 0xF0]; - TUint8 g3 = lut_g[(p3 >> 3) & 0xF8]; - TUint8 b3 = lut_b[(p3 << 3) & 0xF8]; - dst_pixels[rowOffset + x + 3] = ((r3 & 0xF0) << 8) | ((g3 & 0xF0) << 3) | ((b3 & 0xF0) >> 1); + // Fast path: no color modulation (white color). + if (color.r == 1.0f && color.g == 1.0f && color.b == 1.0f) { + if (dest != source) { + for (int y = 0; y < height; ++y) { + TUint16 *src_row = src_pixels + (y * pitch / 2); + TUint16 *dst_row = dst_pixels + (y * pitch / 2); + for (int x = 0; x < width; ++x) { + dst_row[x] = src_row[x]; + } + } } + return; + } - // Handle remaining pixels. - for (; x < width; ++x) { - TUint16 pixel = src_pixels[rowOffset + x]; - TUint8 r = lut_r[(pixel >> 8) & 0xF0]; - TUint8 g = lut_g[(pixel >> 3) & 0xF8]; - TUint8 b = lut_b[(pixel << 3) & 0xF8]; - dst_pixels[rowOffset + x] = ((r & 0xF0) << 8) | ((g & 0xF0) << 3) | ((b & 0xF0) >> 1); + TFixed rf = Real2Fix(color.r); + TFixed gf = Real2Fix(color.g); + TFixed bf = Real2Fix(color.b); + + int pitch_offset = pitch / 2; + + for (int y = 0; y < height; ++y) { + int row_offset = y * pitch_offset; + for (int x = 0; x < width; ++x) { + int idx = row_offset + x; + TUint16 pixel = src_pixels[idx]; + TUint8 r = (pixel & 0xF800) >> 8; + TUint8 g = (pixel & 0x07E0) >> 3; + TUint8 b = (pixel & 0x001F) << 3; + r = FixMul(r, rf); + g = FixMul(g, gf); + b = FixMul(b, bf); + dst_pixels[idx] = (r << 8) | (g << 3) | (b >> 3); } } } @@ -91,62 +69,56 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - // Pre-calculate pitch in pixels to avoid repeated division. - const TInt pitchPixels = pitch >> 1; - - // Pre-calculate flip flags to avoid repeated bitwise operations. - const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0; - const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0; - - // Fast path: No flip; just copy entire buffer. - if (!flipHorizontal && !flipVertical) { - Mem::Copy(dest, source, pitch * height); - return; - } - - // Fast path: Vertical-only flip; copy rows in reverse order. - if (flipVertical && !flipHorizontal) { - for (int y = 0; y < height; ++y) { - const int src_y = height - 1 - y; - Mem::Copy(&dst_pixels[y * pitchPixels], &src_pixels[src_y * pitchPixels], pitch); - } - return; - } - - // Slow path: Horizontal or both flips; need pixel-level operations. - // Pre-calculate width/height bounds for horizontal/vertical flipping. - const int width_m1 = width - 1; - const int height_m1 = height - 1; - - for (int y = 0; y < height; ++y) { - // Calculate destination row offset once per row. - const TInt dstRowOffset = y * pitchPixels; - - // Calculate source Y coordinate once per row. - const int src_y = flipVertical ? (height_m1 - y) : y; - const TInt srcRowOffset = src_y * pitchPixels; - - int x = 0; - - // Process 4 pixels at once. - for (; x < width - 3; x += 4) { - if (flipHorizontal) { - dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + (width_m1 - x)]; - dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + (width_m1 - x - 1)]; - dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + (width_m1 - x - 2)]; - dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + (width_m1 - x - 3)]; - } else { - dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + x]; - dst_pixels[dstRowOffset + x + 1] = src_pixels[srcRowOffset + x + 1]; - dst_pixels[dstRowOffset + x + 2] = src_pixels[srcRowOffset + x + 2]; - dst_pixels[dstRowOffset + x + 3] = src_pixels[srcRowOffset + x + 3]; + // Fast path: no flip. + if (flip == SDL_FLIP_NONE) { + if (dest != source) { + for (int y = 0; y < height; ++y) { + TUint16 *src_row = src_pixels + (y * pitch / 2); + TUint16 *dst_row = dst_pixels + (y * pitch / 2); + for (int x = 0; x < width; ++x) { + dst_row[x] = src_row[x]; + } } } + return; + } - // Handle remaining pixels. - for (; x < width; ++x) { - const int src_x = flipHorizontal ? (width_m1 - x) : x; - dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + src_x]; + int pitch_offset = pitch / 2; + + // Fast path: horizontal flip only. + if (flip == SDL_FLIP_HORIZONTAL) { + for (int y = 0; y < height; ++y) { + int dst_row_offset = y * pitch_offset; + int src_row_offset = y * pitch_offset; + int width_minus_1 = width - 1; + for (int x = 0; x < width; ++x) { + dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + (width_minus_1 - x)]; + } + } + return; + } + + // Fast path: vertical flip only. + if (flip == SDL_FLIP_VERTICAL) { + int height_minus_1 = height - 1; + for (int y = 0; y < height; ++y) { + int dst_row_offset = y * pitch_offset; + int src_row_offset = (height_minus_1 - y) * pitch_offset; + for (int x = 0; x < width; ++x) { + dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + x]; + } + } + return; + } + + // Both horizontal and vertical flip + int width_minus_1 = width - 1; + int height_minus_1 = height - 1; + for (int y = 0; y < height; ++y) { + int dst_row_offset = y * pitch_offset; + int src_row_offset = (height_minus_1 - y) * pitch_offset; + for (int x = 0; x < width; ++x) { + dst_pixels[dst_row_offset + x] = src_pixels[src_row_offset + (width_minus_1 - x)]; } } } @@ -156,152 +128,151 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - TFixed cos_angle = 0; - TFixed sin_angle = 0; - - if (angle != 0) { - FixSinCos(angle, sin_angle, cos_angle); - } - - // Pre-calculate pitch in pixels to avoid repeated division. - const TInt pitchPixels = pitch >> 1; - - // Pre-check if rotation keeps all pixels within bounds to skip per-pixel checks. - // Calculate the four corners of the image after rotation around center. - bool allInBounds = true; - if (angle != 0) { - // Check corners: (0,0), (width-1,0), (0,height-1), (width-1,height-1) - TFixed corners_x[4] = { -center_x, Int2Fix(width - 1) - center_x, -center_x, Int2Fix(width - 1) - center_x }; - TFixed corners_y[4] = { -center_y, -center_y, Int2Fix(height - 1) - center_y, Int2Fix(height - 1) - center_y }; - - for (int i = 0; i < 4; ++i) { - TFixed rot_x = FixMul(corners_x[i], cos_angle) - FixMul(corners_y[i], sin_angle) + center_x; - TFixed rot_y = FixMul(corners_x[i], sin_angle) + FixMul(corners_y[i], cos_angle) + center_y; - int final_x = Fix2Int(rot_x); - int final_y = Fix2Int(rot_y); - - if (final_x < 0 || final_x >= width || final_y < 0 || final_y >= height) { - allInBounds = false; - break; + // Fast path: no rotation. + if (angle == 0) { + if (dest != source) { + int pitch_offset = pitch / 2; + for (int y = 0; y < height; ++y) { + TUint16 *src_row = src_pixels + (y * pitch_offset); + TUint16 *dst_row = dst_pixels + (y * pitch_offset); + for (int x = 0; x < width; ++x) { + dst_row[x] = src_row[x]; + } } } + return; } - // Incremental DDA: Calculate per-pixel increments. - // As we move right (x+1), the rotated position changes by (cos, -sin). - const TFixed dx_cos = cos_angle; - const TFixed dx_sin = -sin_angle; + // Fast paths for 90-degree rotations + TFixed angle_90 = Int2Fix(90); + TFixed angle_180 = Int2Fix(180); + TFixed angle_270 = Int2Fix(270); + TFixed angle_360 = Int2Fix(360); + + // Normalize angle to 0-360 range + TFixed normalized_angle = angle; + while (normalized_angle < 0) { + normalized_angle += angle_360; + } + while (normalized_angle >= angle_360) { + normalized_angle -= angle_360; + } + + int pitch_offset = pitch / 2; + + // Fast path: 90-degree rotation (clockwise). + if (normalized_angle == angle_90) { + TFixed center_x_int = Fix2Int(center_x); + TFixed center_y_int = Fix2Int(center_y); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + // Translate to origin. + int tx = x - center_x_int; + int ty = y - center_y_int; + // Rotate 90 degrees clockwise: (x, y) -> (y, -x). + int rx = ty; + int ry = -tx; + // Translate back. + int src_x = rx + center_x_int; + int src_y = ry + center_y_int; + if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { + dst_pixels[y * pitch_offset + x] = src_pixels[src_y * pitch_offset + src_x]; + } else { + dst_pixels[y * pitch_offset + x] = 0; + } + } + } + return; + } + + // Fast path: 180-degree rotation. + if (normalized_angle == angle_180) { + TFixed center_x_int = Fix2Int(center_x); + TFixed center_y_int = Fix2Int(center_y); + for (int y = 0; y < height; ++y) { + int dst_row_offset = y * pitch_offset; + for (int x = 0; x < width; ++x) { + // Translate to origin + int tx = x - center_x_int; + int ty = y - center_y_int; + // Rotate 180 degrees: (x, y) -> (-x, -y) + int rx = -tx; + int ry = -ty; + // Translate back + int src_x = rx + center_x_int; + int src_y = ry + center_y_int; + if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { + dst_pixels[dst_row_offset + x] = src_pixels[src_y * pitch_offset + src_x]; + } else { + dst_pixels[dst_row_offset + x] = 0; + } + } + } + return; + } + + // Fast path: 270-degree rotation (clockwise). + if (normalized_angle == angle_270) { + TFixed center_x_int = Fix2Int(center_x); + TFixed center_y_int = Fix2Int(center_y); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + // Translate to origin. + int tx = x - center_x_int; + int ty = y - center_y_int; + // Rotate 270 degrees clockwise (or 90 counter-clockwise): (x, y) -> (-y, x). + int rx = -ty; + int ry = tx; + // Translate back. + int src_x = rx + center_x_int; + int src_y = ry + center_y_int; + if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) { + dst_pixels[y * pitch_offset + x] = src_pixels[src_y * pitch_offset + src_x]; + } else { + dst_pixels[y * pitch_offset + x] = 0; + } + } + } + return; + } + + TFixed cos_angle = 0; + TFixed sin_angle = 0; + FixSinCos(angle, sin_angle, cos_angle); + + // Pre-calculate the translation of center to origin. + TFixed neg_center_x = -center_x; + TFixed neg_center_y = -center_y; for (int y = 0; y < height; ++y) { - // Calculate destination row offset once per row. - const TInt dstRowOffset = y * pitchPixels; + int dst_row_offset = y * pitch_offset; + TFixed y_fixed = Int2Fix(y) + neg_center_y; - // Calculate starting position for this row. - // For y, rotation transforms: x' = x*cos - y*sin, y' = x*sin + y*cos - // At x=0: x' = -y*sin, y' = y*cos (relative to center) - const TFixed translated_y = Int2Fix(y) - center_y; - const TFixed row_start_x = center_x - FixMul(translated_y, sin_angle); - const TFixed row_start_y = center_y + FixMul(translated_y, cos_angle); + // Pre-calculate these values for the entire row. + TFixed cos_mul_ty = FixMul(y_fixed, cos_angle); + TFixed sin_mul_ty = FixMul(y_fixed, sin_angle); - // Start at x=0 position. - TFixed src_x = row_start_x; - TFixed src_y = row_start_y; + // Starting position for the row (x=0). + // rotated_x = cos(angle) * (0 - center_x) + sin(angle) * (y - center_y) + center_x + // rotated_y = cos(angle) * (y - center_y) - sin(angle) * (0 - center_x) + center_y + TFixed rotated_x = sin_mul_ty + center_x + FixMul(neg_center_x, cos_angle); + TFixed rotated_y = cos_mul_ty + center_y - FixMul(neg_center_x, sin_angle); - int x = 0; + for (int x = 0; x < width; ++x) { + // Convert to integer coordinates. + int final_x = Fix2Int(rotated_x); + int final_y = Fix2Int(rotated_y); - if (allInBounds) { - // Fast path: No bounds checking needed. - for (; x < width - 3; x += 4) { - // Pixel 0 - int final_x0 = Fix2Int(src_x); - int final_y0 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 1 - int final_x1 = Fix2Int(src_x); - int final_y1 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 2 - int final_x2 = Fix2Int(src_x); - int final_y2 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 3 - int final_x3 = Fix2Int(src_x); - int final_y3 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Write all 4 pixels without bounds checking. - dst_pixels[dstRowOffset + x] = src_pixels[final_y0 * pitchPixels + final_x0]; - dst_pixels[dstRowOffset + x + 1] = src_pixels[final_y1 * pitchPixels + final_x1]; - dst_pixels[dstRowOffset + x + 2] = src_pixels[final_y2 * pitchPixels + final_x2]; - dst_pixels[dstRowOffset + x + 3] = src_pixels[final_y3 * pitchPixels + final_x3]; + // Check bounds and copy pixel. + if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { + dst_pixels[dst_row_offset + x] = src_pixels[final_y * pitch_offset + final_x]; + } else { + dst_pixels[dst_row_offset + x] = 0; } - // Handle remaining pixels. - for (; x < width; ++x) { - int final_x = Fix2Int(src_x); - int final_y = Fix2Int(src_y); - dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x]; - src_x += dx_cos; - src_y += dx_sin; - } - } else { - // Slow path: Bounds checking required. - for (; x < width - 3; x += 4) { - // Pixel 0 - int final_x0 = Fix2Int(src_x); - int final_y0 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 1 - int final_x1 = Fix2Int(src_x); - int final_y1 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 2 - int final_x2 = Fix2Int(src_x); - int final_y2 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Pixel 3 - int final_x3 = Fix2Int(src_x); - int final_y3 = Fix2Int(src_y); - src_x += dx_cos; - src_y += dx_sin; - - // Write all 4 pixels with bounds checking. - dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0; - dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0; - dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0; - dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0; - } - - // Handle remaining pixels. - for (; x < width; ++x) { - // Convert to integer coordinates. - int final_x = Fix2Int(src_x); - int final_y = Fix2Int(src_y); - - // Check bounds. - if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { - dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x]; - } else { - dst_pixels[dstRowOffset + x] = 0; - } - - // Incremental step: move to next pixel (just additions, no multiplications!). - src_x += dx_cos; - src_y += dx_sin; - } + // Increment to next pixel (add rotation matrix column). + rotated_x += cos_angle; + rotated_y -= sin_angle; } } } @@ -311,72 +282,46 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix TUint16 *src_pixels = static_cast(source); TUint16 *dst_pixels = static_cast(dest); - // Fast path: Identity scale; just copy entire buffer. - const TFixed identity = Int2Fix(1); - if (scale_x == identity && scale_y == identity) { - Mem::Copy(dest, source, pitch * height); + TFixed one_fixed = Int2Fix(1); + + // Fast path: no scaling (1.0x scale). + if (scale_x == one_fixed && scale_y == one_fixed) { + if (dest != source) { + for (int y = 0; y < height; ++y) { + TUint16 *src_row = src_pixels + (y * pitch / 2); + TUint16 *dst_row = dst_pixels + (y * pitch / 2); + for (int x = 0; x < width; ++x) { + dst_row[x] = src_row[x]; + } + } + } return; } - // Pre-calculate pitch in pixels to avoid repeated division. - const TInt pitchPixels = pitch >> 1; - - // Pre-calculate inverse scale factors to use FixMul instead of FixDiv. - // This is MUCH faster on N-Gage hardware (no division per pixel!). - TFixed inv_scale_x = FixDiv(Int2Fix(1), scale_x); - TFixed inv_scale_y = FixDiv(Int2Fix(1), scale_y); - - // Pre-calculate center offset to reduce operations per pixel. - TFixed center_x_fixed = center_x; - TFixed center_y_fixed = center_y; + int pitch_offset = pitch / 2; for (int y = 0; y < height; ++y) { - // Calculate destination row offset once per row. - TInt dstRowOffset = y * pitchPixels; + int dst_row_offset = y * pitch_offset; + TFixed y_fixed = Int2Fix(y); + TFixed translated_y = y_fixed - center_y; + TFixed scaled_y = FixDiv(translated_y, scale_y); - // Use inverse scale factor (multiply instead of divide). - TFixed translated_y = Int2Fix(y) - center_y_fixed; - TFixed scaled_y = FixMul(translated_y, inv_scale_y); - int final_y = Fix2Int(scaled_y + center_y_fixed); + for (int x = 0; x < width; ++x) { + // Translate point to origin. + TFixed translated_x = Int2Fix(x) - center_x; - // Check if this row is within bounds. - bool rowInBounds = (final_y >= 0 && final_y < height); - TInt srcRowOffset = final_y * pitchPixels; + // Scale point. + TFixed scaled_x = FixDiv(translated_x, scale_x); - // Incremental DDA for X: pre-calculate starting position and increment. - TFixed src_x_start = FixMul(-center_x_fixed, inv_scale_x) + center_x_fixed; - TFixed src_x = src_x_start; + // Translate point back. + int final_x = Fix2Int(scaled_x + center_x); + int final_y = Fix2Int(scaled_y + center_y); - int x = 0; - - // Process 4 pixels at once. - for (; x < width - 3; x += 4) { - // Process 4 pixels using incremental approach. - int final_x0 = Fix2Int(src_x); - src_x += inv_scale_x; - int final_x1 = Fix2Int(src_x); - src_x += inv_scale_x; - int final_x2 = Fix2Int(src_x); - src_x += inv_scale_x; - int final_x3 = Fix2Int(src_x); - src_x += inv_scale_x; - - // Write all 4 pixels with bounds checking. - dst_pixels[dstRowOffset + x] = (rowInBounds && final_x0 >= 0 && final_x0 < width) ? src_pixels[srcRowOffset + final_x0] : 0; - dst_pixels[dstRowOffset + x + 1] = (rowInBounds && final_x1 >= 0 && final_x1 < width) ? src_pixels[srcRowOffset + final_x1] : 0; - dst_pixels[dstRowOffset + x + 2] = (rowInBounds && final_x2 >= 0 && final_x2 < width) ? src_pixels[srcRowOffset + final_x2] : 0; - dst_pixels[dstRowOffset + x + 3] = (rowInBounds && final_x3 >= 0 && final_x3 < width) ? src_pixels[srcRowOffset + final_x3] : 0; - } - - // Handle remaining pixels. - for (; x < width; ++x) { - int final_x = Fix2Int(src_x); - src_x += inv_scale_x; - - if (rowInBounds && final_x >= 0 && final_x < width) { - dst_pixels[dstRowOffset + x] = src_pixels[srcRowOffset + final_x]; + // Check bounds. + if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) { + dst_pixels[dst_row_offset + x] = src_pixels[final_y * pitch_offset + final_x]; } else { - dst_pixels[dstRowOffset + x] = 0; + dst_pixels[dst_row_offset + x] = 0; } } } diff --git a/src/render/ngage/SDL_render_ops.hpp b/src/render/ngage/SDL_render_ops.hpp index ae580f65e7..65e92e5bca 100644 --- a/src/render/ngage/SDL_render_ops.hpp +++ b/src/render/ngage/SDL_render_ops.hpp @@ -24,7 +24,7 @@ #include <3dtypes.h> -void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color, const TUint8 *colorLUT); +void ApplyColorMod(void *dest, void *source, int pitch, int width, int height, SDL_FColor color); void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_FlipMode flip); void ApplyRotation(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed angle); void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFixed center_x, TFixed center_y, TFixed scale_x, TFixed scale_y);