From 59231120822511a1c760dd09f6c39e22daeeda05 Mon Sep 17 00:00:00 2001 From: fincs Date: Sat, 21 Nov 2015 00:49:33 +0100 Subject: [PATCH] Several improvements to shaderProgram, see details: - shaderProgramSetGshInputPermutation() was added - shaderInstanceGetUniformLocation() return type corrected - shaderInstanceSetBool bug fixed (true/false were incorrectly mapped) - shaderInstance now has bitmasks indicating which int/bool uniforms are used by int/bool constants in the DVLE - shaderProgramUse() was revamped: - Shader unit setup/code/opdesc upload code was separated into a new function called shaderProgramConfigure() - shaderProgramUse() calls shaderProgramConfigure() and afterwards uploads DVLE constants - GPU wrappers that perform uniform management will want to use shaderProgramConfigure() instead of shaderProgramUse() and later read the shaderInstance uniform usage bitmasks to set the constants. --- libctru/include/3ds/gpu/registers.h | 2 +- libctru/include/3ds/gpu/shaderProgram.h | 24 +++++++- libctru/source/gpu/shaderProgram.c | 78 ++++++++++++++++++------- 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/libctru/include/3ds/gpu/registers.h b/libctru/include/3ds/gpu/registers.h index 9a61a79..52bf566 100644 --- a/libctru/include/3ds/gpu/registers.h +++ b/libctru/include/3ds/gpu/registers.h @@ -1,6 +1,6 @@ /** * @file registers.h - * @param GPU registers. + * @description GPU registers. */ #pragma once diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h index 0393107..70d2457 100644 --- a/libctru/include/3ds/gpu/shaderProgram.h +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -19,8 +19,10 @@ typedef struct { DVLE_s* dvle; ///< Shader DVLE. u16 boolUniforms; ///< Boolean uniforms. + u16 boolUniformMask; ///< Used boolean uniform mask. u32 intUniforms[4]; ///< Integer uniforms. float24Uniform_s* float24Uniforms; ///< 24-bit float uniforms. + u8 intUniformMask; ///< Used integer uniform mask. u8 numFloat24Uniforms; ///< Float uniform count. }shaderInstance_s; @@ -29,7 +31,8 @@ typedef struct { shaderInstance_s* vertexShader; ///< Vertex shader. shaderInstance_s* geometryShader; ///< Geometry shader. - u8 geometryShaderInputStride; ///< Geometry shader input stride. + u32 geoShaderInputPermutation[2]; ///< Geometry shader input permutation. + u8 geoShaderInputStride; ///< Geometry shader input stride. }shaderProgram_s; /** @@ -66,7 +69,7 @@ Result shaderInstanceGetBool(shaderInstance_s* si, int id, bool* value); * @param si Shader instance to use. * @param name Name of the uniform. */ -Result shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name); +s8 shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name); /** * @brief Initializes a shader program. @@ -96,7 +99,22 @@ Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle); Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride); /** - * @brief Sets the active shader program. + * @brief Configures the permutation of the input attributes of the geometry shader of a shader program. + * @param sp Shader program to use. + * @param permutation Attribute permutation to use. + */ +Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation); + +/** + * @brief Configures the shader units to use the specified shader program. + * @param sp Shader program to use. + * @param sendVshCode When true, the vertex shader's code and operand descriptors are uploaded. + * @param sendGshCode When true, the geometry shader's code and operand descriptors are uploaded. + */ +Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode); + +/** + * @brief Same as shaderProgramConfigure, but always loading code/operand descriptors and uploading DVLE constants afterwards. * @param sp Shader program to use. */ Result shaderProgramUse(shaderProgram_s* sp); diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index 5f54829..abefbe1 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -1,6 +1,7 @@ #include #include #include <3ds/types.h> +#include <3ds/result.h> #include <3ds/gpu/registers.h> #include <3ds/gpu/shaderProgram.h> @@ -14,12 +15,14 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) si->dvle = dvle; - si->boolUniforms = 0xFFFF; + si->boolUniforms = 0; + si->boolUniformMask = 0; si->intUniforms[0] = 0x00000000; si->intUniforms[1] = 0x00000000; si->intUniforms[2] = 0x00000000; si->intUniforms[3] = 0x00000000; si->float24Uniforms = NULL; + si->intUniformMask = 0; int i; DVLE_constEntry_s* cnst = dvle->constTableData; @@ -34,7 +37,11 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) shaderInstanceSetBool(si, cnst[i].id, cnst[i].data[0]&1); break; case DVLE_CONST_u8: - if(cnst[i].id<4)si->intUniforms[cnst[i].id] = cnst[i].data[0]; + if(cnst[i].id<4) + { + si->intUniforms[cnst[i].id] = cnst[i].data[0]; + si->intUniformMask |= (1<boolUniforms &= ~(1<boolUniforms |= (value)<boolUniformMask |= (1<15)return -2; if(!value)return -3; - *value = !((si->boolUniforms>>id)&1); + *value = ((si->boolUniforms>>id)&1); return 0; } -Result shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name) +s8 shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name) { if(!si)return -1; @@ -157,19 +165,28 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride) sp->geometryShader = (shaderInstance_s*)malloc(sizeof(shaderInstance_s)); if(!sp->geometryShader)return -3; - sp->geometryShaderInputStride = stride; + sp->geoShaderInputPermutation[0] = 0x76543210; + sp->geoShaderInputPermutation[1] = 0xFEDCBA98; + sp->geoShaderInputStride = stride; return shaderInstanceInit(sp->geometryShader, dvle); } -Result shaderProgramUse(shaderProgram_s* sp) +Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation) +{ + if(!sp || !sp->geometryShader)return -1; + + sp->geoShaderInputPermutation[0] = permutation & 0xFFFFFFFF; + sp->geoShaderInputPermutation[0] = permutation>>32; + return 0; +} + +Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode) { if(!sp)return -1; if(!sp->vertexShader)return -2; - int i; - // configure geostage // has to be done first or else VSH registers might only reconfigure 3 of the 4 shader units ! if(!sp->geometryShader) @@ -184,11 +201,11 @@ Result shaderProgramUse(shaderProgram_s* sp) // setup vertex shader stuff no matter what const DVLE_s* vshDvle = sp->vertexShader->dvle; const DVLP_s* vshDvlp = vshDvle->dvlp; - GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize); - GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize); - GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms); - GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4); - for(i=0; ivertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4); + if (sendVshCode) + { + GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize); + GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize); + } GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF)); GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask); @@ -209,19 +226,19 @@ Result shaderProgramUse(shaderProgram_s* sp) // setup both vertex and geometry shader const DVLE_s* gshDvle = sp->geometryShader->dvle; const DVLP_s* gshDvlp = gshDvle->dvlp; - GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize); - GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize); - GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms); - GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4); - for(i=0; igeometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4); + if (sendGshCode) + { + GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize); + GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize); + } GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF)); GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask); GPU_SetShaderOutmap((u32*)gshDvle->outmapData); //GSH input attributes stuff - GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geometryShaderInputStride-1)); - GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2); + GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geoShaderInputStride-1)); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2); GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ? @@ -230,6 +247,27 @@ Result shaderProgramUse(shaderProgram_s* sp) return 0; } +Result shaderProgramUse(shaderProgram_s* sp) +{ + Result rc = shaderProgramConfigure(sp, true, true); + if (R_FAILED(rc)) return rc; + + int i; + + // Set up uniforms + GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|~sp->vertexShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4); + for(i=0; ivertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4); + if (sp->geometryShader) + { + GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|~sp->geometryShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4); + for(i=0; igeometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4); + } + + return 0; +} + void GPU_SetShaderOutmap(u32 outmapData[8]) { GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);