From f4232926c3d3f31b28a7ae7bc3b6c9a658f6eb51 Mon Sep 17 00:00:00 2001 From: fincs Date: Sun, 26 Mar 2017 19:48:34 +0200 Subject: [PATCH] Add GX command queue system for batching GX commands --- libctru/include/3ds/gpu/gx.h | 85 ++++++++++++++++++++-- libctru/source/gpu/gx.c | 45 ++++++++---- libctru/source/gpu/gxqueue.c | 120 +++++++++++++++++++++++++++++++ libctru/source/services/gspgpu.c | 6 ++ 4 files changed, 240 insertions(+), 16 deletions(-) create mode 100644 libctru/source/gpu/gxqueue.c diff --git a/libctru/include/3ds/gpu/gx.h b/libctru/include/3ds/gpu/gx.h index 75a8d26..a714844 100644 --- a/libctru/include/3ds/gpu/gx.h +++ b/libctru/include/3ds/gpu/gx.h @@ -60,13 +60,90 @@ typedef enum /// Creates a transfer scaling flag. #define GX_TRANSFER_SCALING(x) ((x)<<24) -/// Command list flag bit 0. -#define GX_CMDLIST_BIT0 BIT(0) +/// Updates gas additive blend results. +#define GX_CMDLIST_UPDATE_GAS_ACC BIT(0) /// Flushes the command list. -#define GX_CMDLIST_FLUSH BIT(1) +#define GX_CMDLIST_FLUSH BIT(1) extern u32* gxCmdBuf; ///< GX command buffer. +/// GX command entry +typedef union +{ + u32 data[8]; ///< Raw command data + struct + { + u8 type; ///< Command type + u8 unk1; + u8 unk2; + u8 unk3; + u32 args[7]; ///< Command arguments + }; +} gxCmdEntry_s; + +/// GX command queue structure +typedef struct tag_gxCmdQueue_s +{ + gxCmdEntry_s* entries; ///< Pointer to array of GX command entries + u16 maxEntries; ///< Capacity of the command array + u16 numEntries; ///< Number of commands in the queue + u16 curEntry; ///< Index of the first pending command to be submitted to GX + u16 lastEntry; ///< Number of commands completed by GX + void (* callback)(struct tag_gxCmdQueue_s*); ///< User callback + void* user; ///< Data for user callback +} gxCmdQueue_s; + +/** + * @brief Clears a GX command queue. + * @param queue The GX command queue. + */ +void gxCmdQueueClear(gxCmdQueue_s* queue); + +/** + * @brief Adds a command to a GX command queue. + * @param queue The GX command queue. + * @param entry The GX command to add. + */ +void gxCmdQueueAdd(gxCmdQueue_s* queue, const gxCmdEntry_s* entry); + +/** + * @brief Runs a GX command queue, causing it to begin processing incoming commands as they arrive. + * @param queue The GX command queue. + */ +void gxCmdQueueRun(gxCmdQueue_s* queue); + +/** + * @brief Stops a GX command queue from processing incoming commands. + * @param queue The GX command queue. + */ +void gxCmdQueueStop(gxCmdQueue_s* queue); + +/** + * @brief Waits for a GX command queue to finish executing pending commands. + * @param queue The GX command queue. + * @param timeout Optional timeout (in nanoseconds) to wait (specify -1 for no timeout). + * @return false if timeout expired, true otherwise. + */ +bool gxCmdQueueWait(gxCmdQueue_s* queue, s64 timeout); + +/** + * @brief Sets the completion callback for a GX command queue. + * @param queue The GX command queue. + * @param callback The completion callback. + * @param user User data. + */ +static inline void gxCmdQueueSetCallback(gxCmdQueue_s* queue, void (* callback)(gxCmdQueue_s*), void* user) +{ + queue->callback = callback; + queue->user = user; +} + +/** + * @brief Selects a command queue to which GX_* functions will add commands instead of immediately submitting them to GX. + * @param queue The GX command queue. (Pass NULL to remove the bound command queue) + */ +void GX_BindQueue(gxCmdQueue_s* queue); + /** * @brief Requests a DMA. * @param src Source to DMA from. @@ -120,7 +197,7 @@ Result GX_DisplayTransfer(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 fl Result GX_TextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags); /** - * @brief Flushes the cache regions of three buffers. + * @brief Flushes the cache regions of three buffers. (This command cannot be queued in a GX command queue) * @param buf0a Address of the first buffer. * @param buf0s Size of the first buffer. * @param buf1a Address of the second buffer. diff --git a/libctru/source/gpu/gx.c b/libctru/source/gpu/gx.c index ae3bc1e..7e836e0 100644 --- a/libctru/source/gpu/gx.c +++ b/libctru/source/gpu/gx.c @@ -10,37 +10,58 @@ #include <3ds/services/gspgpu.h> u32* gxCmdBuf; +static gxCmdQueue_s* boundQueue; + +// Dummy version to avoid linking in gxqueue.c if not actually used +__attribute__((weak)) void gxCmdQueueAdd(gxCmdQueue_s* queue, const gxCmdEntry_s* entry) +{ +} + +void GX_BindQueue(gxCmdQueue_s* queue) +{ + boundQueue = queue; +} + +static Result submitGxCommand(u32 gxCommand[0x8]) +{ + if (boundQueue) + { + gxCmdQueueAdd(boundQueue, (const gxCmdEntry_s*)gxCommand); + return 0; + } + else + return gspSubmitGxCommand(gxCmdBuf, gxCommand); +} Result GX_RequestDma(u32* src, u32* dst, u32 length) { u32 gxCommand[0x8]; - gxCommand[0]=0x00; //CommandID + gxCommand[0]=0x01000100 | 0x00; //CommandID gxCommand[1]=(u32)src; //source address gxCommand[2]=(u32)dst; //destination address gxCommand[3]=length; //size gxCommand[4]=gxCommand[5]=gxCommand[6]=gxCommand[7]=0x0; - return gspSubmitGxCommand(gxCmdBuf, gxCommand); + return submitGxCommand(gxCommand); } Result GX_ProcessCommandList(u32* buf0a, u32 buf0s, u8 flags) { u32 gxCommand[0x8]; - gxCommand[0]=0x01; //CommandID + gxCommand[0]=0x01000100 | 0x01; //CommandID gxCommand[1]=(u32)buf0a; //buf0 address gxCommand[2]=(u32)buf0s; //buf0 size gxCommand[3]=flags&1; //written to GSP module state gxCommand[4]=gxCommand[5]=gxCommand[6]=0x0; gxCommand[7]=(flags>>1)&1; //when non-zero, call svcFlushProcessDataCache() with the specified buffer - return gspSubmitGxCommand(gxCmdBuf, gxCommand); + return submitGxCommand(gxCommand); } Result GX_MemoryFill(u32* buf0a, u32 buf0v, u32* buf0e, u16 control0, u32* buf1a, u32 buf1v, u32* buf1e, u16 control1) { u32 gxCommand[0x8]; - // gxCommand[0]=0x02; //CommandID - gxCommand[0]=0x01000102; //CommandID + gxCommand[0]=0x01000100 | 0x02; //CommandID gxCommand[1]=(u32)buf0a; //buf0 address gxCommand[2]=buf0v; //buf0 value gxCommand[3]=(u32)buf0e; //buf0 end addr @@ -49,14 +70,14 @@ Result GX_MemoryFill(u32* buf0a, u32 buf0v, u32* buf0e, u16 control0, u32* buf1a gxCommand[6]=(u32)buf1e; //buf1 end addr gxCommand[7]=(control0)|(control1<<16); - return gspSubmitGxCommand(gxCmdBuf, gxCommand); + return submitGxCommand(gxCommand); } // Flags, for applications this is 0x1001000 for the main screen, and 0x1000 for the sub screen. Result GX_DisplayTransfer(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 flags) { u32 gxCommand[0x8]; - gxCommand[0]=0x03; //CommandID + gxCommand[0]=0x01000100 | 0x03; //CommandID gxCommand[1]=(u32)inadr; gxCommand[2]=(u32)outadr; gxCommand[3]=indim; @@ -64,13 +85,13 @@ Result GX_DisplayTransfer(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 fl gxCommand[5]=flags; gxCommand[6]=gxCommand[7]=0x0; - return gspSubmitGxCommand(gxCmdBuf, gxCommand); + return submitGxCommand(gxCommand); } Result GX_TextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags) { u32 gxCommand[0x8]; - gxCommand[0]=0x04; //CommandID + gxCommand[0]=0x01000100 | 0x04; //CommandID gxCommand[1]=(u32)inadr; gxCommand[2]=(u32)outadr; gxCommand[3]=size; @@ -79,13 +100,13 @@ Result GX_TextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, gxCommand[6]=flags; gxCommand[7]=0x0; - return gspSubmitGxCommand(gxCmdBuf, gxCommand); + return submitGxCommand(gxCommand); } Result GX_FlushCacheRegions(u32* buf0a, u32 buf0s, u32* buf1a, u32 buf1s, u32* buf2a, u32 buf2s) { u32 gxCommand[0x8]; - gxCommand[0]=0x05; //CommandID + gxCommand[0]=0x00000100 | 0x05; //CommandID gxCommand[1]=(u32)buf0a; //buf0 address gxCommand[2]=(u32)buf0s; //buf0 size gxCommand[3]=(u32)buf1a; //buf1 address diff --git a/libctru/source/gpu/gxqueue.c b/libctru/source/gpu/gxqueue.c new file mode 100644 index 0000000..cc79f83 --- /dev/null +++ b/libctru/source/gpu/gxqueue.c @@ -0,0 +1,120 @@ +#include +#include +#include <3ds/types.h> +#include <3ds/svc.h> +#include <3ds/synchronization.h> +#include <3ds/gpu/gx.h> +#include <3ds/services/gspgpu.h> + +#define MAX_PARALLEL_CMDS 3 + +extern u32* gxCmdBuf; +static gxCmdQueue_s* curQueue; +static bool isActive, isRunning, shouldStop; +static LightLock queueLock = 1; + +static void gxCmdQueueDoCommands(void) +{ + if (shouldStop) + return; + int batchSize = curQueue->lastEntry+MAX_PARALLEL_CMDS-curQueue->curEntry; + while (curQueue->curEntry < curQueue->numEntries && batchSize--) + { + gxCmdEntry_s* entry = &curQueue->entries[curQueue->curEntry++]; + gspSubmitGxCommand(gxCmdBuf, entry->data); + } +} + +void gxCmdQueueInterrupt(GSPGPU_Event irq) +{ + if (!isRunning || irq==GSPGPU_EVENT_PSC1 || irq==GSPGPU_EVENT_VBlank0 || irq==GSPGPU_EVENT_VBlank1) + return; + gxCmdQueue_s* runCb = NULL; + LightLock_Lock(&queueLock); + curQueue->lastEntry++; + if (shouldStop) + { + curQueue = NULL; + isActive = false; + isRunning = false; + shouldStop = false; + } + else if (curQueue->lastEntry < curQueue->numEntries) + gxCmdQueueDoCommands(); + else + { + runCb = curQueue; + isRunning = false; + } + LightLock_Unlock(&queueLock); + if (runCb && runCb->callback) + runCb->callback(runCb); +} + +void gxCmdQueueClear(gxCmdQueue_s* queue) +{ + if (queue==curQueue && isRunning) + svcBreak(USERBREAK_PANIC); // Shouldn't happen. + queue->numEntries = 0; + queue->curEntry = 0; + queue->lastEntry = 0; +} + +void gxCmdQueueAdd(gxCmdQueue_s* queue, const gxCmdEntry_s* entry) +{ + if (queue->numEntries == queue->maxEntries) + svcBreak(USERBREAK_PANIC); // Shouldn't happen. + memcpy(&queue->entries[queue->numEntries], entry, sizeof(gxCmdEntry_s)); + LightLock_Lock(&queueLock); + queue->numEntries++; + if (queue==curQueue && isActive && !isRunning) + { + isRunning = true; + gxCmdQueueDoCommands(); + } + LightLock_Unlock(&queueLock); +} + +void gxCmdQueueRun(gxCmdQueue_s* queue) +{ + if (isRunning) + return; + curQueue = queue; + isActive = true; + if (queue->lastEntry < queue->numEntries) + { + isRunning = true; + LightLock_Lock(&queueLock); + gxCmdQueueDoCommands(); + LightLock_Unlock(&queueLock); + } else + isRunning = false; +} + +void gxCmdQueueStop(gxCmdQueue_s* queue) +{ + if (!curQueue) + return; + LightLock_Lock(&queueLock); + if (!isRunning) + { + curQueue = NULL; + isActive = false; + } else + shouldStop = true; + LightLock_Unlock(&queueLock); +} + +bool gxCmdQueueWait(gxCmdQueue_s* queue, s64 timeout) +{ + u64 deadline = U64_MAX; + if (timeout >= 0) + deadline = svcGetSystemTick() + timeout; + while (isRunning) + { + if (timeout >= 0 && (s64)(u64)(svcGetSystemTick()-deadline) >= 0) + return false; + gspWaitForAnyEvent(); + } + return true; +} diff --git a/libctru/source/services/gspgpu.c b/libctru/source/services/gspgpu.c index e185f10..8d86689 100644 --- a/libctru/source/services/gspgpu.c +++ b/libctru/source/services/gspgpu.c @@ -144,6 +144,11 @@ static int popInterrupt() return curEvt; } +// Dummy version to avoid linking in gxqueue.c if not actually used +__attribute__((weak)) void gxCmdQueueInterrupt(GSPGPU_Event irq) +{ +} + void gspEventThreadMain(void *arg) { while (gspRunEvents) @@ -160,6 +165,7 @@ void gspEventThreadMain(void *arg) if (curEvt < GSPGPU_EVENT_MAX) { + gxCmdQueueInterrupt((GSPGPU_Event)curEvt); if (gspEventCb[curEvt]) { ThreadFunc func = gspEventCb[curEvt];