Replace most of the renderqueue logic with GX command queues, details:

- Removed double buffered gpu cmdbuffer (might be added back later)
- Added a global gxCmdQueue which by default is configured to run
  commands as they arrive (in the future it might be double buffered)
- Added C3D_FrameSync for explicitly waiting for "vblank"
- Repurposed C3D_FRAME_SYNCDRAW to perform C3D_FrameSync
- Added C3D_FrameSplit for splitting/submitting the gpu cmdlist in the
  middle of a renderqueue frame
- C3D_RenderTargetSetClear is still supported, however it's unofficially
  deprecated (it's performed after drawing/transferring instead of before
  drawing, which is pretty counter-intuitive)
- C3D_RenderTargetSetOutput is explicitly NOT deprecated since it's
  necessary to avoid screen tearing when transferring to the screen
  framebuffers (this stems from a Nintendo design flaw where screen swap
  processing is done immediately after GX transfers finish)
This commit is contained in:
fincs 2017-03-26 20:15:01 +02:00
parent 9f8f5e320a
commit 2fc57e99b9
4 changed files with 203 additions and 285 deletions

View File

@ -5,30 +5,33 @@ typedef struct C3D_RenderTarget_tag C3D_RenderTarget;
struct C3D_RenderTarget_tag struct C3D_RenderTarget_tag
{ {
C3D_RenderTarget *next, *prev, *link, *frame[2]; C3D_RenderTarget *next, *prev;
C3D_FrameBuf frameBuf; C3D_FrameBuf frameBuf;
u32 transferFlags; bool used;
u32 clearColor, clearDepth;
C3D_ClearBits clearBits;
bool ownsColor, ownsDepth; bool ownsColor, ownsDepth;
bool drawOk, transferOk;
bool linked; bool linked;
gfxScreen_t screen; gfxScreen_t screen;
gfx3dSide_t side; gfx3dSide_t side;
C3D_ClearBits clearBits;
u32 transferFlags;
u32 clearColor, clearDepth;
}; };
// Flags for C3D_FrameBegin // Flags for C3D_FrameBegin
enum enum
{ {
C3D_FRAME_SYNCDRAW = BIT(0), // Do not render the frame until the previous has finished rendering C3D_FRAME_SYNCDRAW = BIT(0), // Perform C3D_FrameSync before checking the GPU status
C3D_FRAME_NONBLOCK = BIT(1), // Return false instead of waiting for the GPU to finish rendering C3D_FRAME_NONBLOCK = BIT(1), // Return false instead of waiting if the GPU is busy
}; };
float C3D_FrameRate(float fps); float C3D_FrameRate(float fps);
void C3D_FrameSync(void);
bool C3D_FrameBegin(u8 flags); bool C3D_FrameBegin(u8 flags);
bool C3D_FrameDrawOn(C3D_RenderTarget* target); bool C3D_FrameDrawOn(C3D_RenderTarget* target);
void C3D_FrameSplit(u8 flags);
void C3D_FrameEnd(u8 flags); void C3D_FrameEnd(u8 flags);
float C3D_GetDrawingTime(void); float C3D_GetDrawingTime(void);

View File

@ -1,4 +1,5 @@
#include "internal.h" #include "internal.h"
#include <stdlib.h>
#include <c3d/base.h> #include <c3d/base.h>
#include <c3d/effect.h> #include <c3d/effect.h>
#include <c3d/uniforms.h> #include <c3d/uniforms.h>
@ -76,12 +77,24 @@ bool C3D_Init(size_t cmdBufSize)
if (ctx->flags & C3DiF_Active) if (ctx->flags & C3DiF_Active)
return false; return false;
ctx->cmdBufSize = cmdBufSize/8; // Half of the size of the cmdbuf, in words cmdBufSize = (cmdBufSize + 0xF) &~ 0xF; // 0x10-byte align
ctx->cmdBufSize = cmdBufSize/4;
ctx->cmdBuf = (u32*)linearAlloc(cmdBufSize); ctx->cmdBuf = (u32*)linearAlloc(cmdBufSize);
ctx->cmdBufUsage = 0; ctx->cmdBufUsage = 0;
if (!ctx->cmdBuf) return false; if (!ctx->cmdBuf)
return false;
ctx->gxQueue.maxEntries = 32;
ctx->gxQueue.entries = (gxCmdEntry_s*)malloc(ctx->gxQueue.maxEntries*sizeof(gxCmdEntry_s));
if (!ctx->gxQueue.entries)
{
linearFree(ctx->cmdBuf);
return false;
}
GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0); GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0);
GX_BindQueue(&ctx->gxQueue);
gxCmdQueueRun(&ctx->gxQueue);
ctx->flags = C3DiF_Active | C3DiF_TexEnvBuf | C3DiF_TexEnvAll | C3DiF_Effect | C3DiF_TexStatus | C3DiF_TexAll; ctx->flags = C3DiF_Active | C3DiF_TexEnvBuf | C3DiF_TexEnvAll | C3DiF_Effect | C3DiF_TexStatus | C3DiF_TexAll;
@ -270,10 +283,13 @@ void C3Di_UpdateContext(void)
C3D_UpdateUniforms(GPU_GEOMETRY_SHADER); C3D_UpdateUniforms(GPU_GEOMETRY_SHADER);
} }
void C3Di_FinalizeFrame(u32** pBuf, u32* pSize) bool C3Di_SplitFrame(u32** pBuf, u32* pSize)
{ {
C3D_Context* ctx = C3Di_GetContext(); C3D_Context* ctx = C3Di_GetContext();
if (!gpuCmdBufOffset)
return false; // Nothing was drawn
if (ctx->flags & C3DiF_DrawUsed) if (ctx->flags & C3DiF_DrawUsed)
{ {
ctx->flags &= ~C3DiF_DrawUsed; ctx->flags &= ~C3DiF_DrawUsed;
@ -282,32 +298,29 @@ void C3Di_FinalizeFrame(u32** pBuf, u32* pSize)
GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1); GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1);
} }
GPUCMD_Finalize(); GPUCMD_Split(pBuf, pSize);
GPUCMD_GetBuffer(pBuf, NULL, pSize); u32 totalCmdBufSize = *pBuf + *pSize - ctx->cmdBuf;
ctx->cmdBufUsage = (float)(*pSize) / ctx->cmdBufSize; ctx->cmdBufUsage = (float)totalCmdBufSize / ctx->cmdBufSize;
*pSize *= 4; return true;
ctx->flags ^= C3DiF_CmdBuffer;
u32* buf = ctx->cmdBuf;
if (ctx->flags & C3DiF_CmdBuffer)
buf += ctx->cmdBufSize;
GPUCMD_SetBuffer(buf, ctx->cmdBufSize, 0);
} }
void C3D_FlushAsync(void) void C3D_FlushAsync(void)
{ {
if (!(C3Di_GetContext()->flags & C3DiF_Active)) C3D_Context* ctx = C3Di_GetContext();
if (!(ctx->flags & C3DiF_Active))
return; return;
u32* cmdBuf; u32* cmdBuf;
u32 cmdBufSize; u32 cmdBufSize;
C3Di_FinalizeFrame(&cmdBuf, &cmdBufSize); C3Di_SplitFrame(&cmdBuf, &cmdBufSize);
GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0);
//take advantage of GX_FlushCacheRegions to flush gsp heap //take advantage of GX_FlushCacheRegions to flush gsp heap
extern u32 __ctru_linear_heap; extern u32 __ctru_linear_heap;
extern u32 __ctru_linear_heap_size; extern u32 __ctru_linear_heap_size;
GX_FlushCacheRegions(cmdBuf, cmdBufSize, (u32 *) __ctru_linear_heap, __ctru_linear_heap_size, NULL, 0); GX_FlushCacheRegions(cmdBuf, cmdBufSize*4, (u32 *) __ctru_linear_heap, __ctru_linear_heap_size, NULL, 0);
GX_ProcessCommandList(cmdBuf, cmdBufSize, 0x0); GX_ProcessCommandList(cmdBuf, cmdBufSize*4, 0x0);
} }
float C3D_GetCmdBufUsage(void) float C3D_GetCmdBufUsage(void)
@ -324,6 +337,10 @@ void C3D_Fini(void)
C3Di_RenderQueueExit(); C3Di_RenderQueueExit();
aptUnhook(&hookCookie); aptUnhook(&hookCookie);
gxCmdQueueStop(&ctx->gxQueue);
gxCmdQueueWait(&ctx->gxQueue, -1);
GX_BindQueue(NULL);
free(ctx->gxQueue.entries);
linearFree(ctx->cmdBuf); linearFree(ctx->cmdBuf);
ctx->flags = 0; ctx->flags = 0;
} }

View File

@ -27,6 +27,7 @@ typedef struct
typedef struct typedef struct
{ {
gxCmdQueue_s gxQueue;
u32* cmdBuf; u32* cmdBuf;
size_t cmdBufSize; size_t cmdBufSize;
float cmdBufUsage; float cmdBufUsage;
@ -73,7 +74,6 @@ enum
C3DiF_LightEnv = BIT(10), C3DiF_LightEnv = BIT(10),
C3DiF_VshCode = BIT(11), C3DiF_VshCode = BIT(11),
C3DiF_GshCode = BIT(12), C3DiF_GshCode = BIT(12),
C3DiF_CmdBuffer = BIT(13),
C3DiF_TexStatus = BIT(14), C3DiF_TexStatus = BIT(14),
C3DiF_ProcTex = BIT(15), C3DiF_ProcTex = BIT(15),
C3DiF_ProcTexColorLut = BIT(16), C3DiF_ProcTexColorLut = BIT(16),
@ -116,4 +116,4 @@ void C3Di_DirtyUniforms(GPU_SHADER_TYPE type);
void C3Di_LoadShaderUniforms(shaderInstance_s* si); void C3Di_LoadShaderUniforms(shaderInstance_s* si);
void C3Di_ClearShaderUniforms(GPU_SHADER_TYPE type); void C3Di_ClearShaderUniforms(GPU_SHADER_TYPE type);
void C3Di_FinalizeFrame(u32** pBuf, u32* pSize); bool C3Di_SplitFrame(u32** pBuf, u32* pSize);

View File

@ -3,88 +3,24 @@
#include <c3d/renderqueue.h> #include <c3d/renderqueue.h>
#include <stdlib.h> #include <stdlib.h>
static const u8 colorFmtSizes[] = {2,1,0,0,0};
static const u8 depthFmtSizes[] = {0,0,1,2};
static C3D_RenderTarget *firstTarget, *lastTarget; static C3D_RenderTarget *firstTarget, *lastTarget;
static C3D_RenderTarget *linkedTarget[3]; static C3D_RenderTarget *linkedTarget[3];
static C3D_RenderTarget *transferQueue, *clearQueue;
static TickCounter gpuTime, cpuTime; static TickCounter gpuTime, cpuTime;
static struct #define STAGE_HAS_TRANSFER(n) BIT(0+(n))
{ #define STAGE_HAS_ANY_TRANSFER (7<<0)
C3D_RenderTarget* targetList; #define STAGE_NEED_TRANSFER(n) BIT(3+(n))
u32* cmdBuf; #define STAGE_NEED_TOP_TRANSFER (STAGE_NEED_TRANSFER(0)|STAGE_NEED_TRANSFER(1))
u32 cmdBufSize; #define STAGE_NEED_BOT_TRANSFER STAGE_NEED_TRANSFER(2)
u8 flags; #define STAGE_WAIT_TRANSFER BIT(6)
} queuedFrame[2];
static u8 queueSwap, queuedCount, queuedState;
static bool initialized; static bool initialized;
static bool inFrame, inSafeTransfer, inSafeClear; static bool inFrame, inSafeTransfer, measureGpuTime;
static u8 frameStage;
static float framerate = 60.0f; static float framerate = 60.0f;
static float framerateCounter[2] = { 60.0f, 60.0f }; static float framerateCounter[2] = { 60.0f, 60.0f };
static u32 frameCounter[2];
static void onRenderFinish(void* unused);
static void onTransferFinish(void* unused);
static void onClearDone(void* unused);
static void performDraw(void)
{
gspSetEventCallback(GSPGPU_EVENT_P3D, onRenderFinish, NULL, true);
GX_ProcessCommandList(queuedFrame[queueSwap].cmdBuf, queuedFrame[queueSwap].cmdBufSize, queuedFrame[queueSwap].flags);
osTickCounterStart(&gpuTime);
}
static void performTransfer(void)
{
if (inSafeTransfer) return; // Let the safe transfer finish handler retry this
C3D_FrameBuf* frameBuf = &transferQueue->frameBuf;
u32* outputFrameBuf = (u32*)gfxGetFramebuffer(transferQueue->screen, transferQueue->side, NULL, NULL);
if (transferQueue->side == GFX_LEFT)
gfxConfigScreen(transferQueue->screen, false);
u32 dim = GX_BUFFER_DIM((u32)frameBuf->width, (u32)frameBuf->height);
gspSetEventCallback(GSPGPU_EVENT_PPF, onTransferFinish, NULL, true);
GX_DisplayTransfer((u32*)frameBuf->colorBuf, dim, outputFrameBuf, dim, transferQueue->transferFlags);
}
static void performClear(void)
{
if (inSafeClear) return; // Let the safe clear finish handler retry this
C3D_RenderTarget* target = clearQueue;
while (target && !target->clearBits)
{
target->drawOk = true;
target = target->link;
clearQueue = target;
}
if (!target) return;
C3D_FrameBuf* frameBuf = &target->frameBuf;
u32 size = (u32)frameBuf->width * frameBuf->height;
u32 cfs = colorFmtSizes[frameBuf->colorFmt];
u32 dfs = depthFmtSizes[frameBuf->depthFmt];
void* colorBufEnd = (u8*)frameBuf->colorBuf + size*(2+cfs);
void* depthBufEnd = (u8*)frameBuf->depthBuf + size*(2+dfs);
gspSetEventCallback(GSPGPU_EVENT_PSC0, onClearDone, NULL, true);
if (target->clearBits & C3D_CLEAR_COLOR)
{
if (target->clearBits & C3D_CLEAR_DEPTH)
GX_MemoryFill(
(u32*)frameBuf->colorBuf, target->clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
(u32*)frameBuf->depthBuf, target->clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8));
else
GX_MemoryFill(
(u32*)frameBuf->colorBuf, target->clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
NULL, 0, NULL, 0);
} else
GX_MemoryFill(
(u32*)frameBuf->depthBuf, target->clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8),
NULL, 0, NULL, 0);
}
static bool framerateLimit(int id) static bool framerateLimit(int id)
{ {
@ -97,158 +33,123 @@ static bool framerateLimit(int id)
return false; return false;
} }
static void updateFrameQueue(void) static void C3Di_TargetTransfer(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side)
{ {
C3D_RenderTarget* a; C3D_FrameBufTransfer(&target->frameBuf, screen, side, target->transferFlags);
if (queuedState>0) return; // Still rendering if (target->clearBits)
C3D_FrameBufClear(&target->frameBuf, target->clearBits, target->clearColor, target->clearDepth);
// Check that all targets are OK to be drawn on
for (a = queuedFrame[queueSwap].targetList; a; a = a->frame[queueSwap])
if (!a->drawOk)
return; // Nope, we can't start rendering yet
// Start rendering the frame
queuedState=1;
for (a = queuedFrame[queueSwap].targetList; a; a = a->frame[queueSwap])
a->drawOk = false;
performDraw();
}
static void transferTarget(C3D_RenderTarget* target)
{
C3D_RenderTarget* a;
target->transferOk = false;
target->link = NULL;
if (!transferQueue)
{
transferQueue = target;
performTransfer();
return;
}
for (a = transferQueue; a->link; a = a->link);
a->link = target;
}
static void clearTarget(C3D_RenderTarget* target)
{
C3D_RenderTarget* a;
target->link = NULL;
if (!clearQueue)
{
clearQueue = target;
performClear();
return;
}
for (a = clearQueue; a->link; a = a->link);
a->link = target;
} }
static void onVBlank0(C3D_UNUSED void* unused) static void onVBlank0(C3D_UNUSED void* unused)
{ {
if (!linkedTarget[0] || !framerateLimit(0)) return; gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue;
if (frameStage & STAGE_NEED_TOP_TRANSFER)
if (gfxIs3D())
{ {
if (linkedTarget[1] && linkedTarget[1]->transferOk) gxCmdQueueStop(queue);
transferTarget(linkedTarget[1]); C3D_RenderTarget *left = linkedTarget[0], *right = linkedTarget[1];
else if (linkedTarget[0]->transferOk) if (left && !(frameStage&STAGE_NEED_TRANSFER(0)))
left = NULL;
if (right && !(frameStage&STAGE_NEED_TRANSFER(1)))
right = NULL;
if (gfxIs3D() && !right)
right = left;
frameStage &= ~STAGE_NEED_TOP_TRANSFER;
if (left || right)
{ {
// Use a temporary copy of the left framebuffer to fill in the missing right image. frameStage |= STAGE_WAIT_TRANSFER;
static C3D_RenderTarget temp; if (left)
memcpy(&temp, linkedTarget[0], sizeof(temp)); C3Di_TargetTransfer(left, GFX_TOP, GFX_LEFT);
temp.side = GFX_RIGHT; if (right)
temp.clearBits = false; C3Di_TargetTransfer(right, GFX_TOP, GFX_RIGHT);
transferTarget(&temp); gfxConfigScreen(GFX_TOP, false);
} }
gxCmdQueueRun(queue);
} }
if (linkedTarget[0]->transferOk) if (framerateLimit(0))
transferTarget(linkedTarget[0]); frameCounter[0]++;
} }
static void onVBlank1(C3D_UNUSED void* unused) static void onVBlank1(C3D_UNUSED void* unused)
{ {
if (linkedTarget[2] && framerateLimit(1) && linkedTarget[2]->transferOk) gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue;
transferTarget(linkedTarget[2]); if (frameStage & STAGE_NEED_BOT_TRANSFER)
}
void onRenderFinish(C3D_UNUSED void* unused)
{
C3D_RenderTarget *a, *next;
osTickCounterUpdate(&gpuTime);
// The following check should never trigger
if (queuedState!=1) svcBreak(USERBREAK_PANIC);
for (a = queuedFrame[queueSwap].targetList; a; a = next)
{ {
next = a->frame[queueSwap]; gxCmdQueueStop(queue);
a->frame[queueSwap] = NULL; frameStage &= ~STAGE_NEED_BOT_TRANSFER;
if (a->linked) C3D_RenderTarget* target = linkedTarget[2];
a->transferOk = true; if (target)
else if (a->clearBits) {
clearTarget(a); frameStage |= STAGE_WAIT_TRANSFER;
else C3Di_TargetTransfer(target, GFX_BOTTOM, GFX_LEFT);
a->drawOk = true; gfxConfigScreen(GFX_BOTTOM, false);
}
gxCmdQueueRun(queue);
} }
if (framerateLimit(1))
// Consume the frame that has been just rendered frameCounter[1]++;
memset(&queuedFrame[queueSwap], 0, sizeof(queuedFrame[queueSwap]));
queueSwap ^= 1;
queuedCount--;
queuedState = 0;
// Update the frame queue if there are still frames to render
if (queuedCount>0)
updateFrameQueue();
} }
void onTransferFinish(C3D_UNUSED void* unused) static void onQueueFinish(gxCmdQueue_s* queue)
{ {
C3D_RenderTarget* target = transferQueue; if (measureGpuTime)
{
osTickCounterUpdate(&gpuTime);
measureGpuTime = false;
}
if (inSafeTransfer) if (inSafeTransfer)
{ {
inSafeTransfer = false; inSafeTransfer = false;
// Try again if there are queued transfers if (inFrame)
if (target) {
performTransfer(); gxCmdQueueStop(queue);
return; gxCmdQueueClear(queue);
}
} }
transferQueue = target->link; else if (frameStage & STAGE_WAIT_TRANSFER)
if (target->clearBits) frameStage &= ~STAGE_WAIT_TRANSFER;
clearTarget(target);
else else
target->drawOk = true; {
if (transferQueue) u8 needs = frameStage & STAGE_HAS_ANY_TRANSFER;
performTransfer(); frameStage = (frameStage&~STAGE_HAS_ANY_TRANSFER) | (needs<<3);
if (target->drawOk && queuedCount>0 && queuedState==0) }
updateFrameQueue();
} }
void onClearDone(C3D_UNUSED void* unused) void C3D_FrameSync(void)
{ {
C3D_RenderTarget* target = clearQueue; u32 cur[2];
if (inSafeClear) u32 start[2] = { frameCounter[0], frameCounter[1] };
do
{ {
inSafeClear = false; gspWaitForAnyEvent();
// Try again if there are queued clears cur[0] = frameCounter[0];
if (target) cur[1] = frameCounter[1];
performClear(); } while (cur[0]==start[0] || cur[1]==start[1]);
return; }
}
clearQueue = target->link; static bool C3Di_WaitAndClearQueue(s64 timeout)
target->drawOk = true; {
if (clearQueue) gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue;
performClear(); if (!gxCmdQueueWait(queue, timeout))
if (queuedCount>0 && queuedState==0) return false;
updateFrameQueue(); if (timeout==0 && frameStage)
return false;
while (frameStage)
gspWaitForAnyEvent();
gxCmdQueueStop(queue);
gxCmdQueueClear(queue);
return true;
} }
static void C3Di_RenderQueueInit(void) static void C3Di_RenderQueueInit(void)
{ {
gspSetEventCallback(GSPGPU_EVENT_VBlank0, onVBlank0, NULL, false); gspSetEventCallback(GSPGPU_EVENT_VBlank0, onVBlank0, NULL, false);
gspSetEventCallback(GSPGPU_EVENT_VBlank1, onVBlank1, NULL, false); gspSetEventCallback(GSPGPU_EVENT_VBlank1, onVBlank1, NULL, false);
gxCmdQueueSetCallback(&C3Di_GetContext()->gxQueue, onQueueFinish, NULL);
} }
static void C3Di_RenderTargetDestroy(C3D_RenderTarget* target);
void C3Di_RenderQueueExit(void) void C3Di_RenderQueueExit(void)
{ {
int i; int i;
@ -257,22 +158,20 @@ void C3Di_RenderQueueExit(void)
if (!initialized) if (!initialized)
return; return;
C3Di_WaitAndClearQueue(-1);
for (a = firstTarget; a; a = next) for (a = firstTarget; a; a = next)
{ {
next = a->next; next = a->next;
C3D_RenderTargetDelete(a); C3Di_RenderTargetDestroy(a);
} }
gspSetEventCallback(GSPGPU_EVENT_VBlank0, NULL, NULL, false); gspSetEventCallback(GSPGPU_EVENT_VBlank0, NULL, NULL, false);
gspSetEventCallback(GSPGPU_EVENT_VBlank1, NULL, NULL, false); gspSetEventCallback(GSPGPU_EVENT_VBlank1, NULL, NULL, false);
gxCmdQueueSetCallback(&C3Di_GetContext()->gxQueue, NULL, NULL);
for (i = 0; i < 3; i ++) for (i = 0; i < 3; i ++)
linkedTarget[i] = NULL; linkedTarget[i] = NULL;
memset(queuedFrame, 0, sizeof(queuedFrame));
queueSwap = 0;
queuedCount = 0;
queuedState = 0;
initialized = false; initialized = false;
} }
@ -280,8 +179,7 @@ void C3Di_RenderQueueWaitDone(void)
{ {
if (!initialized) if (!initialized)
return; return;
while (queuedCount || transferQueue || clearQueue) C3Di_WaitAndClearQueue(-1);
gspWaitForAnyEvent();
} }
static bool checkRenderQueueInit(void) static bool checkRenderQueueInit(void)
@ -315,15 +213,12 @@ float C3D_FrameRate(float fps)
bool C3D_FrameBegin(u8 flags) bool C3D_FrameBegin(u8 flags)
{ {
if (inFrame) return false; if (inFrame) return false;
int maxCount = (flags & C3D_FRAME_SYNCDRAW) ? 1 : 2; if (flags & C3D_FRAME_SYNCDRAW)
while (queuedCount >= maxCount) C3D_FrameSync();
{ if (!C3Di_WaitAndClearQueue((flags & C3D_FRAME_NONBLOCK) ? 0 : -1))
if (flags & C3D_FRAME_NONBLOCK) return false;
return false;
gspWaitForP3D();
}
osTickCounterStart(&cpuTime);
inFrame = true; inFrame = true;
osTickCounterStart(&cpuTime);
return true; return true;
} }
@ -331,50 +226,59 @@ bool C3D_FrameDrawOn(C3D_RenderTarget* target)
{ {
if (!inFrame) return false; if (!inFrame) return false;
// Queue the target in the frame if it hasn't already been. target->used = true;
int pos = queueSwap^queuedCount;
if (!target->frame[pos])
{
if (!queuedFrame[pos].targetList)
queuedFrame[pos].targetList = target;
else
{
C3D_RenderTarget* a;
for (a = queuedFrame[pos].targetList; a->frame[pos]; a = a->frame[pos]);
a->frame[pos] = target;
}
}
C3D_SetFrameBuf(&target->frameBuf); C3D_SetFrameBuf(&target->frameBuf);
C3D_SetViewport(0, 0, target->frameBuf.width, target->frameBuf.height); C3D_SetViewport(0, 0, target->frameBuf.width, target->frameBuf.height);
return true; return true;
} }
void C3D_FrameSplit(u8 flags)
{
u32 *cmdBuf, cmdBufSize;
if (!inFrame) return;
if (C3Di_SplitFrame(&cmdBuf, &cmdBufSize))
GX_ProcessCommandList(cmdBuf, cmdBufSize*4, flags);
}
void C3D_FrameEnd(u8 flags) void C3D_FrameEnd(u8 flags)
{ {
if (!inFrame) return; C3D_Context* ctx = C3Di_GetContext();
C3D_FrameSplit(flags);
inFrame = false; inFrame = false;
osTickCounterUpdate(&cpuTime); osTickCounterUpdate(&cpuTime);
int pos = queueSwap^queuedCount;
if (!queuedFrame[pos].targetList) return;
// Add the frame to the queue
queuedCount++;
C3Di_FinalizeFrame(&queuedFrame[pos].cmdBuf, &queuedFrame[pos].cmdBufSize);
queuedFrame[pos].flags = flags;
// Flush the entire linear memory if the user did not explicitly mandate to flush the command list // Flush the entire linear memory if the user did not explicitly mandate to flush the command list
if (!(flags & GX_CMDLIST_FLUSH)) if (!(flags & GX_CMDLIST_FLUSH))
{ {
// Take advantage of GX_FlushCacheRegions to flush gsp heap
extern u32 __ctru_linear_heap; extern u32 __ctru_linear_heap;
extern u32 __ctru_linear_heap_size; extern u32 __ctru_linear_heap_size;
GX_FlushCacheRegions(queuedFrame[queueSwap].cmdBuf, queuedFrame[queueSwap].cmdBufSize, (u32 *) __ctru_linear_heap, __ctru_linear_heap_size, NULL, 0); GSPGPU_FlushDataCache((void*)__ctru_linear_heap, __ctru_linear_heap_size);
} }
// Update the frame queue int i;
updateFrameQueue(); C3D_RenderTarget* target;
for (i = 2; i >= 0; i --)
{
target = linkedTarget[i];
if (!target || !target->used)
continue;
target->used = false;
frameStage |= STAGE_HAS_TRANSFER(i);
}
for (target = firstTarget; target; target = target->next)
{
if (!target->used || !target->clearBits)
continue;
target->used = false;
C3D_FrameBufClear(&target->frameBuf, target->clearBits, target->clearColor, target->clearDepth);
}
GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0);
measureGpuTime = true;
osTickCounterStart(&gpuTime);
gxCmdQueueRun(&ctx->gxQueue);
} }
float C3D_GetDrawingTime(void) float C3D_GetDrawingTime(void)
@ -397,7 +301,6 @@ static C3D_RenderTarget* C3Di_RenderTargetNew(void)
static void C3Di_RenderTargetFinishInit(C3D_RenderTarget* target) static void C3Di_RenderTargetFinishInit(C3D_RenderTarget* target)
{ {
target->drawOk = true;
target->prev = lastTarget; target->prev = lastTarget;
target->next = NULL; target->next = NULL;
if (lastTarget) if (lastTarget)
@ -411,15 +314,14 @@ C3D_RenderTarget* C3D_RenderTargetCreate(int width, int height, GPU_COLORBUF col
{ {
if (!checkRenderQueueInit()) goto _fail0; if (!checkRenderQueueInit()) goto _fail0;
u32 size = width*height;
GPU_DEPTHBUF depthFmtReal = GPU_RB_DEPTH16; GPU_DEPTHBUF depthFmtReal = GPU_RB_DEPTH16;
void* depthBuf = NULL; void* depthBuf = NULL;
void* colorBuf = vramAlloc(size*(2+colorFmtSizes[colorFmt])); void* colorBuf = vramAlloc(C3D_CalcColorBufSize(width,height,colorFmt));
if (!colorBuf) goto _fail0; if (!colorBuf) goto _fail0;
if (C3D_DEPTHTYPE_OK(depthFmt)) if (C3D_DEPTHTYPE_OK(depthFmt))
{ {
depthFmtReal = C3D_DEPTHTYPE_VAL(depthFmt); depthFmtReal = C3D_DEPTHTYPE_VAL(depthFmt);
depthBuf = vramAlloc(size*(2+depthFmtSizes[depthFmtReal])); depthBuf = vramAlloc(C3D_CalcDepthBufSize(width,height,depthFmtReal));
if (!depthBuf) goto _fail1; if (!depthBuf) goto _fail1;
} }
@ -459,8 +361,7 @@ C3D_RenderTarget* C3D_RenderTargetCreateFromTex(C3D_Tex* tex, GPU_TEXFACE face,
if (C3D_DEPTHTYPE_OK(depthFmt)) if (C3D_DEPTHTYPE_OK(depthFmt))
{ {
GPU_DEPTHBUF depthFmtReal = C3D_DEPTHTYPE_VAL(depthFmt); GPU_DEPTHBUF depthFmtReal = C3D_DEPTHTYPE_VAL(depthFmt);
u32 size = (u32)fb->width*fb->height; void* depthBuf = vramAlloc(C3D_CalcDepthBufSize(fb->width,fb->height,depthFmtReal));
void* depthBuf = vramAlloc(size*(2+depthFmtSizes[depthFmtReal]));
if (!depthBuf) if (!depthBuf)
{ {
free(target); free(target);
@ -475,13 +376,8 @@ C3D_RenderTarget* C3D_RenderTargetCreateFromTex(C3D_Tex* tex, GPU_TEXFACE face,
return target; return target;
} }
void C3D_RenderTargetDelete(C3D_RenderTarget* target) void C3Di_RenderTargetDestroy(C3D_RenderTarget* target)
{ {
target->clearBits = 0;
target->linked = false;
while (!target->drawOk)
gspWaitForAnyEvent();
if (target->ownsColor) if (target->ownsColor)
vramFree(target->frameBuf.colorBuf); vramFree(target->frameBuf.colorBuf);
if (target->ownsDepth) if (target->ownsDepth)
@ -494,6 +390,14 @@ void C3D_RenderTargetDelete(C3D_RenderTarget* target)
free(target); free(target);
} }
void C3D_RenderTargetDelete(C3D_RenderTarget* target)
{
if (inFrame)
svcBreak(USERBREAK_PANIC); // Shouldn't happen.
C3Di_WaitAndClearQueue(-1);
C3Di_RenderTargetDestroy(target);
}
void C3D_RenderTargetSetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth) void C3D_RenderTargetSetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth)
{ {
if (!target->frameBuf.colorBuf) clearBits &= ~C3D_CLEAR_COLOR; if (!target->frameBuf.colorBuf) clearBits &= ~C3D_CLEAR_COLOR;
@ -504,11 +408,8 @@ void C3D_RenderTargetSetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits,
target->clearColor = clearColor; target->clearColor = clearColor;
target->clearDepth = clearDepth; target->clearDepth = clearDepth;
if (clearBits &~ oldClearBits && target->drawOk) if (clearBits &~ oldClearBits)
{ C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth);
target->drawOk = false;
clearTarget(target);
}
} }
void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags) void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags)
@ -527,27 +428,24 @@ void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx
void C3D_SafeDisplayTransfer(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 flags) void C3D_SafeDisplayTransfer(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 flags)
{ {
while (transferQueue || inSafeTransfer) C3Di_WaitAndClearQueue(-1);
gspWaitForPPF();
inSafeTransfer = true; inSafeTransfer = true;
gspSetEventCallback(GSPGPU_EVENT_PPF, onTransferFinish, NULL, true);
GX_DisplayTransfer(inadr, indim, outadr, outdim, flags); GX_DisplayTransfer(inadr, indim, outadr, outdim, flags);
gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
} }
void C3D_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags) void C3D_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
{ {
while (transferQueue || inSafeTransfer) C3Di_WaitAndClearQueue(-1);
gspWaitForPPF();
inSafeTransfer = true; inSafeTransfer = true;
gspSetEventCallback(GSPGPU_EVENT_PPF, onTransferFinish, NULL, true);
GX_TextureCopy(inadr, indim, outadr, outdim, size, flags); GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
} }
void C3D_SafeMemoryFill(u32* buf0a, u32 buf0v, u32* buf0e, u16 control0, u32* buf1a, u32 buf1v, u32* buf1e, u16 control1) void C3D_SafeMemoryFill(u32* buf0a, u32 buf0v, u32* buf0e, u16 control0, u32* buf1a, u32 buf1v, u32* buf1e, u16 control1)
{ {
while (clearQueue || inSafeClear) C3Di_WaitAndClearQueue(-1);
gspWaitForAnyEvent(); inSafeTransfer = true;
inSafeClear = true;
gspSetEventCallback(buf0a ? GSPGPU_EVENT_PSC0 : GSPGPU_EVENT_PSC1, onClearDone, NULL, true);
GX_MemoryFill(buf0a, buf0v, buf0e, control0, buf1a, buf1v, buf1e, control1); GX_MemoryFill(buf0a, buf0v, buf0e, control0, buf1a, buf1v, buf1e, control1);
gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
} }