mirror of
https://github.com/libsdl-org/SDL.git
synced 2026-04-19 23:05:33 +02:00
[N-Gage] Micro-optimize rendering back-end
- Skip SDL_GetRenderScale call in Copy() fast path - Cache last clear color to avoid redundant SetBrushColor calls - Add whole-image bounds pre-check to skip per-pixel checks in rotation - Simplify color packing in DrawPoints/FillRects to reduce overhead
This commit is contained in:
@@ -174,7 +174,7 @@ CRenderer *CRenderer::NewL()
|
||||
return self;
|
||||
}
|
||||
|
||||
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1), iLinePointsBuffer(0), iLinePointsBufferCapacity(0), iLastDrawColor(0)
|
||||
CRenderer::CRenderer() : iRenderer(0), iDirectScreen(0), iScreenGc(0), iWsSession(), iWsWindowGroup(), iWsWindowGroupID(0), iWsWindow(), iWsScreen(0), iWsEventStatus(), iWsEvent(), iShowFPS(EFalse), iFPS(0), iFont(0), iWorkBuffer1(0), iWorkBuffer2(0), iWorkBufferSize(0), iTempRenderBitmap(0), iTempRenderBitmapWidth(0), iTempRenderBitmapHeight(0), iLastColorR(-1), iLastColorG(-1), iLastColorB(-1), iLinePointsBuffer(0), iLinePointsBufferCapacity(0), iLastDrawColor(0), iLastClearColor(0xFFFFFFFF)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -313,7 +313,11 @@ void CRenderer::AbortNow(RDirectScreenAccess::TTerminationReasons aReason)
|
||||
void CRenderer::Clear(TUint32 iColor)
|
||||
{
|
||||
if (iRenderer && iRenderer->Gc()) {
|
||||
iRenderer->Gc()->SetBrushColor(iColor);
|
||||
// Skip redundant SetBrushColor if color hasn't changed.
|
||||
if (iColor != iLastClearColor) {
|
||||
iRenderer->Gc()->SetBrushColor(iColor);
|
||||
iLastClearColor = iColor;
|
||||
}
|
||||
iRenderer->Gc()->Clear();
|
||||
}
|
||||
}
|
||||
@@ -534,20 +538,24 @@ bool CRenderer::Copy(SDL_Renderer *renderer, SDL_Texture *texture, const SDL_Rec
|
||||
|
||||
SDL_FColor *c = &texture->color;
|
||||
|
||||
// Get render scale once.
|
||||
float sx;
|
||||
float sy;
|
||||
SDL_GetRenderScale(renderer, &sx, &sy);
|
||||
|
||||
// Fast path 1: No transformations needed; direct BitBlt.
|
||||
if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f && sx == 1.f && sy == 1.f) {
|
||||
TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h));
|
||||
TPoint aDest(dstrect->x, dstrect->y);
|
||||
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
|
||||
return true;
|
||||
if (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f) {
|
||||
// Only check render scale if color mod passes.
|
||||
float sx;
|
||||
float sy;
|
||||
SDL_GetRenderScale(renderer, &sx, &sy);
|
||||
if (sx == 1.f && sy == 1.f) {
|
||||
TRect aSource(TPoint(srcrect->x, srcrect->y), TSize(srcrect->w, srcrect->h));
|
||||
TPoint aDest(dstrect->x, dstrect->y);
|
||||
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Slow path: Transformations needed.
|
||||
float sx;
|
||||
float sy;
|
||||
SDL_GetRenderScale(renderer, &sx, &sy);
|
||||
int w = phdata->cachedWidth;
|
||||
int h = phdata->cachedHeight;
|
||||
int pitch = phdata->cachedPitch;
|
||||
@@ -617,13 +625,13 @@ bool CRenderer::CopyEx(SDL_Renderer *renderer, SDL_Texture *texture, const NGAGE
|
||||
SDL_FColor *c = &texture->color;
|
||||
|
||||
// Pre-calculate common checks.
|
||||
const bool isIdentityScale = (copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1));
|
||||
const bool isNoRotation = (copydata->angle == 0);
|
||||
const bool isNoFlip = (!copydata->flip);
|
||||
const bool isNoRotation = (copydata->angle == 0);
|
||||
const bool isNoColorMod = (c->a == 1.f && c->r == 1.f && c->g == 1.f && c->b == 1.f);
|
||||
const bool isIdentityScale = (copydata->scale_x == Int2Fix(1) && copydata->scale_y == Int2Fix(1));
|
||||
|
||||
// Fast path 1: No transformations needed; direct BitBlt.
|
||||
if (isNoFlip && isIdentityScale && isNoRotation && isNoColorMod) {
|
||||
if (isNoFlip && isNoRotation && isNoColorMod && isIdentityScale) {
|
||||
TRect aSource(TPoint(copydata->srcrect.x, copydata->srcrect.y), TSize(copydata->srcrect.w, copydata->srcrect.h));
|
||||
TPoint aDest(copydata->dstrect.x, copydata->dstrect.y);
|
||||
iRenderer->Gc()->BitBlt(aDest, phdata->bitmap, aSource);
|
||||
@@ -799,11 +807,8 @@ void CRenderer::DrawPoints(NGAGE_Vertex *aVerts, const TInt aCount)
|
||||
bool colorSet = false;
|
||||
|
||||
for (TInt i = 0; i < aCount; i++, aVerts++) {
|
||||
Uint8 ca = aVerts->color.a;
|
||||
Uint8 cr = aVerts->color.r;
|
||||
Uint8 cg = aVerts->color.g;
|
||||
Uint8 cb = aVerts->color.b;
|
||||
TUint32 aColor = (ca << 24) | (cb << 16) | (cg << 8) | cr;
|
||||
TUint32 aColor = (TUint32(aVerts->color.a) << 24) | (TUint32(aVerts->color.b) << 16) |
|
||||
(TUint32(aVerts->color.g) << 8) | TUint32(aVerts->color.r);
|
||||
|
||||
// Only set pen color when it changes.
|
||||
if (!colorSet || aColor != currentColor) {
|
||||
@@ -830,11 +835,8 @@ void CRenderer::FillRects(NGAGE_Vertex *aVerts, const TInt aCount)
|
||||
TSize size(aVerts[i + 1].x, aVerts[i + 1].y);
|
||||
TRect rect(pos, size);
|
||||
|
||||
Uint8 ca = aVerts[i].color.a;
|
||||
Uint8 cr = aVerts[i].color.r;
|
||||
Uint8 cg = aVerts[i].color.g;
|
||||
Uint8 cb = aVerts[i].color.b;
|
||||
TUint32 aColor = (ca << 24) | (cb << 16) | (cg << 8) | cr;
|
||||
TUint32 aColor = (TUint32(aVerts[i].color.a) << 24) | (TUint32(aVerts[i].color.b) << 16) |
|
||||
(TUint32(aVerts[i].color.g) << 8) | TUint32(aVerts[i].color.r);
|
||||
|
||||
// Only set colors when they change.
|
||||
if (!colorSet || aColor != currentColor) {
|
||||
|
||||
@@ -111,6 +111,9 @@ class CRenderer : public MDirectScreenAccess
|
||||
// Cached draw color to avoid redundant SetPenColor/SetBrushColor calls.
|
||||
TUint32 iLastDrawColor;
|
||||
|
||||
// Cached clear color to avoid redundant SetBrushColor calls.
|
||||
TUint32 iLastClearColor;
|
||||
|
||||
// Helper methods.
|
||||
bool EnsureWorkBufferCapacity(TInt aRequiredSize);
|
||||
bool EnsureTempBitmapCapacity(TInt aWidth, TInt aHeight);
|
||||
|
||||
@@ -98,6 +98,22 @@ void ApplyFlip(void *dest, void *source, int pitch, int width, int height, SDL_F
|
||||
const bool flipHorizontal = (flip & SDL_FLIP_HORIZONTAL) != 0;
|
||||
const bool flipVertical = (flip & SDL_FLIP_VERTICAL) != 0;
|
||||
|
||||
// Fast path: No flip; just copy entire buffer.
|
||||
if (!flipHorizontal && !flipVertical) {
|
||||
Mem::Copy(dest, source, pitch * height);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path: Vertical-only flip; copy rows in reverse order.
|
||||
if (flipVertical && !flipHorizontal) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
const int src_y = height - 1 - y;
|
||||
Mem::Copy(&dst_pixels[y * pitchPixels], &src_pixels[src_y * pitchPixels], pitch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow path: Horizontal or both flips; need pixel-level operations.
|
||||
// Pre-calculate width/height bounds for horizontal/vertical flipping.
|
||||
const int width_m1 = width - 1;
|
||||
const int height_m1 = height - 1;
|
||||
@@ -150,6 +166,27 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
// Pre-check if rotation keeps all pixels within bounds to skip per-pixel checks.
|
||||
// Calculate the four corners of the image after rotation around center.
|
||||
bool allInBounds = true;
|
||||
if (angle != 0) {
|
||||
// Check corners: (0,0), (width-1,0), (0,height-1), (width-1,height-1)
|
||||
TFixed corners_x[4] = { -center_x, Int2Fix(width - 1) - center_x, -center_x, Int2Fix(width - 1) - center_x };
|
||||
TFixed corners_y[4] = { -center_y, -center_y, Int2Fix(height - 1) - center_y, Int2Fix(height - 1) - center_y };
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
TFixed rot_x = FixMul(corners_x[i], cos_angle) - FixMul(corners_y[i], sin_angle) + center_x;
|
||||
TFixed rot_y = FixMul(corners_x[i], sin_angle) + FixMul(corners_y[i], cos_angle) + center_y;
|
||||
int final_x = Fix2Int(rot_x);
|
||||
int final_y = Fix2Int(rot_y);
|
||||
|
||||
if (final_x < 0 || final_x >= width || final_y < 0 || final_y >= height) {
|
||||
allInBounds = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Incremental DDA: Calculate per-pixel increments.
|
||||
// As we move right (x+1), the rotated position changes by (cos, -sin).
|
||||
const TFixed dx_cos = cos_angle;
|
||||
@@ -172,55 +209,99 @@ void ApplyRotation(void *dest, void *source, int pitch, int width, int height, T
|
||||
|
||||
int x = 0;
|
||||
|
||||
// Process 4 pixels at once.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Pixel 0
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
int final_y0 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
if (allInBounds) {
|
||||
// Fast path: No bounds checking needed.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Pixel 0
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
int final_y0 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 1
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
int final_y1 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
// Pixel 1
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
int final_y1 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 2
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
int final_y2 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
// Pixel 2
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
int final_y2 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 3
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
int final_y3 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
// Pixel 3
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
int final_y3 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Write all 4 pixels with bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0;
|
||||
dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0;
|
||||
dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0;
|
||||
dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
// Convert to integer coordinates.
|
||||
int final_x = Fix2Int(src_x);
|
||||
int final_y = Fix2Int(src_y);
|
||||
|
||||
// Check bounds.
|
||||
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
|
||||
} else {
|
||||
dst_pixels[dstRowOffset + x] = 0;
|
||||
// Write all 4 pixels without bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y0 * pitchPixels + final_x0];
|
||||
dst_pixels[dstRowOffset + x + 1] = src_pixels[final_y1 * pitchPixels + final_x1];
|
||||
dst_pixels[dstRowOffset + x + 2] = src_pixels[final_y2 * pitchPixels + final_x2];
|
||||
dst_pixels[dstRowOffset + x + 3] = src_pixels[final_y3 * pitchPixels + final_x3];
|
||||
}
|
||||
|
||||
// Incremental step: move to next pixel (just additions, no multiplications!).
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
int final_x = Fix2Int(src_x);
|
||||
int final_y = Fix2Int(src_y);
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
}
|
||||
} else {
|
||||
// Slow path: Bounds checking required.
|
||||
for (; x < width - 3; x += 4) {
|
||||
// Pixel 0
|
||||
int final_x0 = Fix2Int(src_x);
|
||||
int final_y0 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 1
|
||||
int final_x1 = Fix2Int(src_x);
|
||||
int final_y1 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 2
|
||||
int final_x2 = Fix2Int(src_x);
|
||||
int final_y2 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Pixel 3
|
||||
int final_x3 = Fix2Int(src_x);
|
||||
int final_y3 = Fix2Int(src_y);
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
|
||||
// Write all 4 pixels with bounds checking.
|
||||
dst_pixels[dstRowOffset + x] = (final_x0 >= 0 && final_x0 < width && final_y0 >= 0 && final_y0 < height) ? src_pixels[final_y0 * pitchPixels + final_x0] : 0;
|
||||
dst_pixels[dstRowOffset + x + 1] = (final_x1 >= 0 && final_x1 < width && final_y1 >= 0 && final_y1 < height) ? src_pixels[final_y1 * pitchPixels + final_x1] : 0;
|
||||
dst_pixels[dstRowOffset + x + 2] = (final_x2 >= 0 && final_x2 < width && final_y2 >= 0 && final_y2 < height) ? src_pixels[final_y2 * pitchPixels + final_x2] : 0;
|
||||
dst_pixels[dstRowOffset + x + 3] = (final_x3 >= 0 && final_x3 < width && final_y3 >= 0 && final_y3 < height) ? src_pixels[final_y3 * pitchPixels + final_x3] : 0;
|
||||
}
|
||||
|
||||
// Handle remaining pixels.
|
||||
for (; x < width; ++x) {
|
||||
// Convert to integer coordinates.
|
||||
int final_x = Fix2Int(src_x);
|
||||
int final_y = Fix2Int(src_y);
|
||||
|
||||
// Check bounds.
|
||||
if (final_x >= 0 && final_x < width && final_y >= 0 && final_y < height) {
|
||||
dst_pixels[dstRowOffset + x] = src_pixels[final_y * pitchPixels + final_x];
|
||||
} else {
|
||||
dst_pixels[dstRowOffset + x] = 0;
|
||||
}
|
||||
|
||||
// Incremental step: move to next pixel (just additions, no multiplications!).
|
||||
src_x += dx_cos;
|
||||
src_y += dx_sin;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -230,6 +311,13 @@ void ApplyScale(void *dest, void *source, int pitch, int width, int height, TFix
|
||||
TUint16 *src_pixels = static_cast<TUint16 *>(source);
|
||||
TUint16 *dst_pixels = static_cast<TUint16 *>(dest);
|
||||
|
||||
// Fast path: Identity scale; just copy entire buffer.
|
||||
const TFixed identity = Int2Fix(1);
|
||||
if (scale_x == identity && scale_y == identity) {
|
||||
Mem::Copy(dest, source, pitch * height);
|
||||
return;
|
||||
}
|
||||
|
||||
// Pre-calculate pitch in pixels to avoid repeated division.
|
||||
const TInt pitchPixels = pitch >> 1;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user