From 80e6bcfd34c8033a0a944ab13c9d8974d4f139bf Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 19:16:23 -0800 Subject: [PATCH] shaderProgram --- libctru/include/3ds.h | 1 + libctru/include/3ds/gpu/gpu.h | 10 ++- libctru/include/3ds/gpu/shaderProgram.h | 9 ++ libctru/include/3ds/gpu/shbin.h | 19 ++++- libctru/source/gpu/gpu.c | 31 +++++++ libctru/source/gpu/shaderProgram.c | 109 +++++++++++++++++++++++- libctru/source/gpu/shbin.c | 81 ++++++++---------- 7 files changed, 211 insertions(+), 49 deletions(-) diff --git a/libctru/include/3ds.h b/libctru/include/3ds.h index 83e6d05..20e6b28 100644 --- a/libctru/include/3ds.h +++ b/libctru/include/3ds.h @@ -35,6 +35,7 @@ extern "C" { #include <3ds/gpu/gx.h> #include <3ds/gpu/gpu.h> #include <3ds/gpu/shbin.h> +#include <3ds/gpu/shaderProgram.h> #include <3ds/sdmc.h> diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index dc590e0..6373240 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -203,6 +203,11 @@ typedef enum{ GPU_UNKPRIM = 0x0300 // ? }GPU_Primitive_t; +typedef enum{ + GPU_VERTEX_SHADER=0x0, + GPU_GEOMETRY_SHADER=0x1 +}GPU_SHADER_TYPE; + void GPU_SetUniform(u32 startreg, u32* data, u32 numreg); void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h); @@ -232,5 +237,8 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16 void GPU_DrawArray(GPU_Primitive_t primitive, u32 n); void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n); - void GPU_FinishDrawing(); + +void GPU_SetShaderOutmap(u32 outmapData[8]); +void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length); +void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length); diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h index e5f2cba..b6f0315 100644 --- a/libctru/include/3ds/gpu/shaderProgram.h +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -3,11 +3,20 @@ #include <3ds/types.h> #include <3ds/gpu/shbin.h> +typedef struct +{ + u32 id; + u32 data[3]; +}float24Uniform_s; + // this structure describes an instance of either a vertex or geometry shader typedef struct { DVLE_s* dvle; u16 boolUniforms; + u32 intUniforms[4]; + float24Uniform_s* float24Uniforms; + u8 numFloat24Uniforms; }shaderInstance_s; // this structure describes an instance of a full shader program diff --git a/libctru/include/3ds/gpu/shbin.h b/libctru/include/3ds/gpu/shbin.h index 89cbde9..ed41114 100644 --- a/libctru/include/3ds/gpu/shbin.h +++ b/libctru/include/3ds/gpu/shbin.h @@ -1,10 +1,18 @@ #pragma once +#include <3ds/gpu/gpu.h> + typedef enum{ - VERTEX_SHDR=0x0, - GEOMETRY_SHDR=0x1 + VERTEX_SHDR=GPU_VERTEX_SHADER, + GEOMETRY_SHDR=GPU_GEOMETRY_SHADER }DVLE_type; +typedef enum{ + DVLE_CONST_BOOL=0x0, + DVLE_CONST_u8=0x1, + DVLE_CONST_FLOAT24=0x2, +}DVLE_constantType; + typedef enum{ RESULT_POSITION = 0x0, RESULT_NORMALQUAT = 0x1, @@ -24,7 +32,8 @@ typedef struct{ }DVLP_s; typedef struct{ - u32 header; + u16 type; + u16 id; u32 data[4]; }DVLE_constEntry_s; @@ -43,6 +52,7 @@ typedef struct{ typedef struct{ DVLE_type type; + DVLP_s* dvlp; u32 mainOffset, endmainOffset; u32 constTableSize; DVLE_constEntry_s* constTableData; @@ -51,6 +61,8 @@ typedef struct{ u32 uniformTableSize; DVLE_uniformEntry_s* uniformTableData; char* symbolTableData; + u8 outmapMask; + u32 outmapData[8]; }DVLE_s; typedef struct{ @@ -69,3 +81,4 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type); void DVLE_SendOutmap(DVLE_s* dvle); void DVLE_SendConstants(DVLE_s* dvle); +void DVLE_GenerateOutmap(DVLE_s* dvle); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index ba36fe1..b1f3dbb 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -509,3 +509,34 @@ void GPU_FinishDrawing() GPUCMD_AddSingleParam(0x000F0110, 0x00000001); GPUCMD_AddSingleParam(0x000F0063, 0x00000001); } + +void GPU_SetShaderOutmap(u32 outmapData[8]) +{ + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1); + GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8); +} + +void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length) +{ + if(!data)return; + + u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, offset); + + int i; + for(i=0;i +#include #include <3ds/types.h> +#include <3ds/gpu/registers.h> #include <3ds/gpu/shaderProgram.h> Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) @@ -7,7 +9,64 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) if(!si || !dvle)return -1; si->dvle = dvle; + si->boolUniforms = 0xFFFF; + si->intUniforms[0] = 0x00000000; + si->intUniforms[1] = 0x00000000; + si->intUniforms[2] = 0x00000000; + si->intUniforms[3] = 0x00000000; + si->float24Uniforms = NULL; + + int i; + DVLE_constEntry_s* cnst = dvle->constTableData; + if(cnst) + { + int float24cnt=0; + for(i=0; iconstTableSize; i++) + { + switch(cnst[i].type) + { + case DVLE_CONST_BOOL: + shaderInstanceSetBool(si, cnst[i].id, cnst[i].data[0]&1); + break; + case DVLE_CONST_u8: + if(cnst[i].id<4)si->intUniforms[cnst[i].id] = cnst[i].data[0]; + break; + case DVLE_CONST_FLOAT24: + float24cnt++; + break; + } + } + + if(float24cnt) + { + si->float24Uniforms = malloc(sizeof(float24Uniform_s)*float24cnt); + if(!si->float24Uniforms) + { + float24cnt = 0; + u32 rev[3]; + u8* rev8=(u8*)rev; + for(i=0; iconstTableSize; i++) + { + if(cnst[i].type==DVLE_CONST_FLOAT24) + { + memcpy(&rev8[0], &cnst[i].data[0], 3); + memcpy(&rev8[3], &cnst[i].data[1], 3); + memcpy(&rev8[6], &cnst[i].data[2], 3); + memcpy(&rev8[9], &cnst[i].data[3], 3); + + si->float24Uniforms[float24cnt].id = cnst[i].id; + si->float24Uniforms[float24cnt].data[0] = rev[2]; + si->float24Uniforms[float24cnt].data[1] = rev[1]; + si->float24Uniforms[float24cnt].data[2] = rev[0]; + + float24cnt++; + } + } + } + si->numFloat24Uniforms = float24cnt; + } + } return 0; } @@ -16,6 +75,7 @@ Result shaderInstanceFree(shaderInstance_s* si) { if(!si)return -1; + if(si->float24Uniforms)free(si->float24Uniforms); free(si); return 0; @@ -97,11 +157,58 @@ Result shaderProgramUse(shaderProgram_s* sp) if(!sp->vertexShader)return -2; + int i; + + // setup vertex shader stuff no matter what + const DVLE_s* vshDvle = sp->vertexShader->dvle; + const DVLP_s* vshDvlp = vshDvle->dvlp; + GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize); + GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize); + GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4); + for(i=0; ivertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4); + GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF)); + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask); + + GPUCMD_AddWrite(GPUREG_024A, vshDvle->outmapData[0]-1); // ? + GPUCMD_AddWrite(GPUREG_0251, vshDvle->outmapData[0]-1); // ? + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); // ? + GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // ? + if(!sp->geometryShader) { - // only deal with vertex shader + // finish setting up vertex shader alone + GPU_SetShaderOutmap((u32*)vshDvle->outmapData); + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000); + + GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? + GPUCMD_AddWrite(GPUREG_006F, 0x00000703); // ? }else{ // setup both vertex and geometry shader + const DVLE_s* gshDvle = sp->geometryShader->dvle; + const DVLP_s* gshDvlp = gshDvle->dvlp; + GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize); + GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize); + GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4); + for(i=0; igeometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4); + GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF)); + GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask); + + GPU_SetShaderOutmap((u32*)gshDvle->outmapData); + + //GSH input attributes stuff + GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000003); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2); + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001); + + GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? + GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ? } return 0; diff --git a/libctru/source/gpu/shbin.c b/libctru/source/gpu/shbin.c index e825f85..9c51f9b 100644 --- a/libctru/source/gpu/shbin.c +++ b/libctru/source/gpu/shbin.c @@ -25,15 +25,18 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) ret->DVLP.codeSize=dvlpData[3]; ret->DVLP.codeData=&dvlpData[dvlpData[2]/4]; ret->DVLP.opdescSize=dvlpData[5]; - ret->DVLP.opcdescData=&dvlpData[dvlpData[4]/4]; + ret->DVLP.opcdescData=(u32*)malloc(sizeof(u32)*ret->DVLP.opdescSize); + if(!ret->DVLP.opcdescData)goto clean2; + int i; for(i=0;iDVLP.opdescSize;i++)ret->DVLP.opcdescData[i]=dvlpData[dvlpData[4]/4+i*2]; //parse DVLE - int i; for(i=0;inumDVLE;i++) { DVLE_s* dvle=&ret->DVLE[i]; u32* dvleData=&shbinData[shbinData[2+i]/4]; + dvle->dvlp=&ret->DVLP; + dvle->type=(dvleData[1]>>16)&0xFF; dvle->mainOffset=dvleData[2]; dvle->endmainOffset=dvleData[3]; @@ -48,9 +51,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) dvle->uniformTableData=(DVLE_uniformEntry_s*)&dvleData[dvleData[12]/4]; dvle->symbolTableData=(char*)&dvleData[dvleData[14]/4]; + + DVLE_GenerateOutmap(dvle); } goto exit; + clean2: + free(ret->DVLE); clean1: free(ret); ret=NULL; @@ -58,6 +65,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) return ret; } +//TODO +void SHDR_FreeDVLB(DVLB_s* dvlb) +{ + if(!dvlb)return; + +} + s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) { if(!dvlb || !name)return -1; @@ -76,51 +90,31 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; - - u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - - GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, 0x00000000); - - int i; - for(i=0;icodeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80); - - GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001); + + GPU_SendShaderCode(type, dvlp->codeData, 0, dvlp->codeSize); } void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; - u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - - GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000); - - u32 param[0x80]; - - int i; - //TODO : should probably preprocess this - for(i=0;iopdescSize;i++)param[i]=dvlp->opcdescData[i*2]; - - GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_DATA)+regOffset, param, dvlp->opdescSize); + GPU_SendOperandDescriptors(type, dvlp->opcdescData, 0, dvlp->opdescSize); } -void DVLE_SendOutmap(DVLE_s* dvle) +void DVLE_GenerateOutmap(DVLE_s* dvle) { if(!dvle)return; - u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); - - u32 param[0x8]={0x00000000,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F, - 0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F}; + memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData)); int i; u8 numAttr=0; u8 maxAttr=0; u8 attrMask=0; - //TODO : should probably preprocess this + for(i=0;ioutTableSize;i++) { - u32* out=¶m[dvle->outTableData[i].regID+1]; + u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1]; u32 mask=0x00000000; u8 tmpmask=dvle->outTableData[i].mask; mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; @@ -148,17 +142,24 @@ void DVLE_SendOutmap(DVLE_s* dvle) if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; } - param[0]=numAttr; + dvle->outmapData[0]=numAttr; + dvle->outmapMask=attrMask; +} + +void DVLE_SendOutmap(DVLE_s* dvle) +{ + if(!dvle)return; + + u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); if(dvle->type==VERTEX_SHDR) { - GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? - GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? + GPUCMD_AddWrite(GPUREG_024A, dvle->outmapData[0]-1); //? + GPUCMD_AddWrite(GPUREG_0251, dvle->outmapData[0]-1); //? } - GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask); - GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1); - GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, param, 8); + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, dvle->outmapMask); + GPU_SetShaderOutmap(dvle->outmapData); } void DVLE_SendConstants(DVLE_s* dvle) @@ -180,7 +181,7 @@ void DVLE_SendConstants(DVLE_s* dvle) memcpy(&rev8[6], &cnst->data[2], 3); memcpy(&rev8[9], &cnst->data[3], 3); - param[0x0]=(cnst->header>>16)&0xFF; + param[0x0]=(cnst->id)&0xFF; param[0x1]=rev[2]; param[0x2]=rev[1]; param[0x3]=rev[0]; @@ -196,7 +197,6 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); @@ -215,10 +215,3 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) GPUCMD_AddWrite(GPUREG_0064, 0x00000001); GPUCMD_AddWrite(GPUREG_006F, 0x00000703); } - -//TODO -void SHDR_FreeDVLB(DVLB_s* dvlb) -{ - if(!dvlb)return; - -}