Enhanced and corrected shader code to fully support geometry shaders

This commit is contained in:
fincs 2016-07-20 17:35:26 +02:00
parent 88a9c58bec
commit 7aad0b4968
4 changed files with 193 additions and 148 deletions

View File

@ -33,18 +33,8 @@ typedef struct
shaderInstance_s* geometryShader; ///< Geometry shader.
u32 geoShaderInputPermutation[2]; ///< Geometry shader input permutation.
u8 geoShaderInputStride; ///< Geometry shader input stride.
u8 geoShaderMode; ///< Geometry shader operation mode.
}shaderProgram_s;
/// Geometry shader operation modes.
typedef enum
{
GSH_NORMAL = 0, ///< Normal operation.
GSH_PARTICLE = 1, ///< Particle system.
GSH_SUBDIVISION_LOOP = 2, ///< Loop subdivision surface.
GSH_SUBDIVISION_CATMULL_CLARK = 3, ///< Catmull-Clark subdivision surface.
} geoShaderMode;
/**
* @brief Initializes a shader instance.
* @param si Shader instance to initialize.
@ -104,7 +94,7 @@ Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle);
* @brief Sets the geometry shader of a shader program.
* @param sp Shader program to use.
* @param dvle Geometry shader to set.
* @param stride Stride of the geometry shader.
* @param stride Input stride of the shader (pass 0 to match the number of outputs of the vertex shader).
*/
Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride);
@ -115,13 +105,6 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride);
*/
Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation);
/**
* @brief Configures the operation mode of the geometry shader of a shader program.
* @param sp Shader program to use.
* @param mode Operation mode to use.
*/
Result shaderProgramSetGshMode(shaderProgram_s* sp, geoShaderMode mode);
/**
* @brief Configures the shader units to use the specified shader program.
* @param sp Shader program to use.

View File

@ -28,9 +28,18 @@ typedef enum{
RESULT_TEXCOORD0W = 0x4, ///< Texture coordinate 0 W.
RESULT_TEXCOORD1 = 0x5, ///< Texture coordinate 1.
RESULT_TEXCOORD2 = 0x6, ///< Texture coordinate 2.
RESULT_VIEW = 0x8 ///< View.
RESULT_VIEW = 0x8, ///< View.
RESULT_DUMMY = 0x9, ///< Dummy attribute (used as passthrough for geometry shader input).
}DVLE_outputAttribute_t;
/// Geometry shader operation modes.
typedef enum
{
GSH_POINT = 0, ///< Point processing mode.
GSH_VARIABLE_PRIM = 1, ///< Variable-size primitive processing mode.
GSH_FIXED_PRIM = 2, ///< Fixed-size primitive processing mode.
} DVLE_geoShaderMode;
/// DVLP data.
typedef struct{
u32 codeSize; ///< Code size.
@ -64,6 +73,11 @@ typedef struct{
/// DVLE data.
typedef struct{
DVLE_type type; ///< DVLE type.
bool mergeOutmaps; ///< true = merge vertex/geometry shader outmaps ('dummy' output attribute is present).
DVLE_geoShaderMode gshMode; ///< Geometry shader operation mode.
u8 gshFixedVtxStart; ///< Starting float uniform register number for storing the fixed-size primitive vertex array.
u8 gshVariableVtxNum; ///< Number of fully-defined vertices in the variable-size primitive vertex array.
u8 gshFixedVtxNum; ///< Number of vertices in the fixed-size primitive vertex array.
DVLP_s* dvlp; ///< Contained DVLPs.
u32 mainOffset; ///< Offset of the start of the main function.
u32 endmainOffset; ///< Offset of the end of the main function.

View File

@ -5,7 +5,7 @@
#include <3ds/gpu/registers.h>
#include <3ds/gpu/shaderProgram.h>
static void GPU_SetShaderOutmap(u32 outmapData[8]);
static void GPU_SetShaderOutmap(const u32 outmapData[8]);
static void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);
static void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);
@ -168,7 +168,6 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride)
sp->geoShaderInputPermutation[0] = 0x76543210;
sp->geoShaderInputPermutation[1] = 0xFEDCBA98;
sp->geoShaderInputStride = stride;
sp->geoShaderMode = GSH_NORMAL;
return shaderInstanceInit(sp->geometryShader, dvle);
}
@ -182,82 +181,149 @@ Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation)
return 0;
}
Result shaderProgramSetGshMode(shaderProgram_s* sp, geoShaderMode mode)
static inline void shaderProgramUploadDvle(const DVLE_s* dvle)
{
if(!sp || !sp->geometryShader)return -1;
const DVLP_s* dvlp = dvle->dvlp;
// Limit vertex shader code size to the first 512 instructions
int codeSize = dvle->type == GEOMETRY_SHDR ? dvlp->codeSize : (dvlp->codeSize < 512 ? dvlp->codeSize : 512);
GPU_SendShaderCode(dvle->type, dvlp->codeData, 0, codeSize);
GPU_SendOperandDescriptors(dvle->type, dvlp->opcdescData, 0, dvlp->opdescSize);
}
sp->geoShaderMode = mode & 3;
return 0;
static inline void shaderProgramMergeOutmaps(u32* outmapData, const u32* vshOutmap, const u32* gshOutmap)
{
int i, j;
// Find and copy attributes common to both vertex and geometry shader
u32 vsh_common = 0, gsh_common = 0;
for (i = 1; i < 8; i ++)
{
u32 mask = gshOutmap[i];
if (mask == 0x1F1F1F1F)
break;
for (j = 1; j < 8; j ++)
{
if (vshOutmap[j] == mask)
{
outmapData[++outmapData[0]] = mask;
vsh_common |= BIT(j);
gsh_common |= BIT(i);
break;
}
}
}
// Find and copy attributes that are exclusive to the geometry shader
for (i = 1; i < 8; i ++)
{
u32 mask = gshOutmap[i];
if (mask == 0x1F1F1F1F)
break;
if (!(gsh_common & BIT(i)))
outmapData[++outmapData[0]] = mask;
}
// Find and copy attributes that are exclusive to the vertex shader
for (i = 1; i < 8; i ++)
{
u32 mask = vshOutmap[i];
if (mask == 0x1F1F1F1F)
break;
if (!(vsh_common & BIT(i)))
outmapData[++outmapData[0]] = mask;
}
}
Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode)
{
if(!sp)return -1;
if (!sp || !sp->vertexShader) return -1;
if(!sp->vertexShader)return -2;
// configure geostage
// has to be done first or else VSH registers might only reconfigure 3 of the 4 shader units !
if(!sp->geometryShader)
{
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);
GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000000);
}else{
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002);
GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000001);
}
// setup vertex shader stuff no matter what
// Get pointers to relevant structures
const DVLE_s* vshDvle = sp->vertexShader->dvle;
const DVLP_s* vshDvlp = vshDvle->dvlp;
const DVLE_s* gshDvle = sp->geometryShader ? sp->geometryShader->dvle : NULL;
const DVLE_s* mainDvle = gshDvle ? gshDvle : vshDvle;
// Variables for working with the outmap
u32 outmapData[8];
u32 outmapMode = mainDvle->outmapMode;
u32 outmapClock = mainDvle->outmapClock;
// Initialize geometry engine - do this early in order to ensure all 4 units are correctly initialized
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x3, gshDvle ? 2 : 0);
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 0x3, 0);
GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, gshDvle ? 1 : 0);
// Set up vertex shader code blob (if necessary)
if (sendVshCode)
{
GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize);
GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize);
}
shaderProgramUploadDvle(vshDvle);
// Set up vertex shader entrypoint & outmap mask
GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1);
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1);
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1); // ?
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1); // ?
bool subdivision = sp->geoShaderMode >= GSH_SUBDIVISION_LOOP;
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, subdivision ? 0x80000000 : 0); // Enable or disable subdivision
u32 gshMisc = 0;
if (subdivision)
gshMisc = 1;
else if (sp->geoShaderMode == GSH_PARTICLE)
gshMisc = 0x01004302;
GPUCMD_AddWrite(GPUREG_GSH_MISC0, gshMisc);
GPUCMD_AddWrite(GPUREG_GSH_MISC1, sp->geoShaderMode);
if(!sp->geometryShader)
// Set up geometry shader (if present)
if (gshDvle)
{
// finish setting up vertex shader alone
GPU_SetShaderOutmap((u32*)vshDvle->outmapData);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, vshDvle->outmapMode);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, vshDvle->outmapClock);
}else{
// setup both vertex and geometry shader
const DVLE_s* gshDvle = sp->geometryShader->dvle;
const DVLP_s* gshDvlp = gshDvle->dvlp;
// Set up geometry shader code blob (if necessary)
if (sendGshCode)
{
GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize);
GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize);
}
shaderProgramUploadDvle(gshDvle);
// Set up geometry shader entrypoint & outmap mask
GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF));
GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask);
}
GPU_SetShaderOutmap((u32*)gshDvle->outmapData);
// Merge vertex shader & geometry shader outmaps if requested
if (gshDvle && gshDvle->mergeOutmaps)
{
// Clear outmap
memset(outmapData, 0x1F, sizeof(outmapData));
outmapData[0] = 0;
//GSH input attributes stuff
GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geoShaderInputStride-1)|(subdivision?0x100:0));
// Merge outmaps
shaderProgramMergeOutmaps(outmapData, vshDvle->outmapData, gshDvle->outmapData);
outmapMode |= vshDvle->outmapMode;
outmapClock |= vshDvle->outmapClock;
} else
memcpy(outmapData, mainDvle->outmapData, sizeof(outmapData));
// Upload and configure outmap
GPU_SetShaderOutmap(outmapData);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, outmapMode);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, outmapClock);
// Configure geostage
if (gshDvle)
{
// Input stride: use value if specified, otherwise use number of outputs in vertex shader
int stride = sp->geoShaderInputStride ? sp->geoShaderInputStride : vshDvle->outmapData[0];
// Enable or disable variable-size primitive processing
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, gshDvle->gshMode == GSH_VARIABLE_PRIM ? 0x80000000 : 0);
// Set up geoshader processing mode
u32 misc = gshDvle->gshMode;
if (misc == GSH_FIXED_PRIM)
misc |= 0x01000000 | ((u32)gshDvle->gshFixedVtxStart<<16) | ((stride-1)<<12) | ((u32)(gshDvle->gshFixedVtxNum-1)<<8);
GPUCMD_AddWrite(GPUREG_GSH_MISC0, misc);
// Set up variable-size primitive mode parameters
GPUCMD_AddWrite(GPUREG_GSH_MISC1, gshDvle->gshMode == GSH_VARIABLE_PRIM ? (gshDvle->gshVariableVtxNum-1) : 0);
// Set up geoshader input
GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000 | (gshDvle->gshMode ? 0x0100 : 0) | (stride-1));
// Set up geoshader permutation
GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, gshDvle->outmapMode);
GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, gshDvle->outmapClock);
} else
{
// Defaults for when geostage is disabled
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, 0);
GPUCMD_AddWrite(GPUREG_GSH_MISC0, 0);
GPUCMD_AddWrite(GPUREG_GSH_MISC1, 0);
GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0xA0000000);
}
return 0;
@ -271,12 +337,12 @@ Result shaderProgramUse(shaderProgram_s* sp)
int i;
// Set up uniforms
GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|~sp->vertexShader->boolUniforms);
GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms);
GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4);
for(i=0; i<sp->vertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4);
if (sp->geometryShader)
{
GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|~sp->geometryShader->boolUniforms);
GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms);
GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4);
for(i=0; i<sp->geometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4);
}
@ -284,7 +350,7 @@ Result shaderProgramUse(shaderProgram_s* sp)
return 0;
}
void GPU_SetShaderOutmap(u32 outmapData[8])
void GPU_SetShaderOutmap(const u32 outmapData[8])
{
GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);
GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8);

View File

@ -38,9 +38,18 @@ DVLB_s* DVLB_ParseFile(u32* shbinData, u32 shbinSize)
dvle->dvlp=&ret->DVLP;
dvle->type=(dvleData[1]>>16)&0xFF;
dvle->mergeOutmaps=(dvleData[1]>>24)&1;
dvle->mainOffset=dvleData[2];
dvle->endmainOffset=dvleData[3];
if(dvle->type==GEOMETRY_SHDR)
{
dvle->gshMode=dvleData[5]&0xFF;
dvle->gshFixedVtxStart=(dvleData[5]>>8)&0xFF;
dvle->gshVariableVtxNum=(dvleData[5]>>16)&0xFF;
dvle->gshFixedVtxNum=(dvleData[5]>>24)&0xFF;
}
dvle->constTableSize=dvleData[7];
dvle->constTableData=(DVLE_constEntry_s*)&dvleData[dvleData[6]/4];
@ -91,78 +100,51 @@ void DVLE_GenerateOutmap(DVLE_s* dvle)
{
if (!dvle) return;
// Initialize outmap data
memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData));
dvle->outmapData[0] = 0;
dvle->outmapMask = 0;
dvle->outmapMode = 0;
dvle->outmapClock = 0;
int i;
u8 numAttr=0;
u8 maxAttr=0;
u8 attrMask=0;
u32 attrMode=0;
u32 attrClock=0;
int i, j, k;
for (i = 0; i < dvle->outTableSize; i ++)
{
u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1];
u32 mask=0x00000000;
u8 tmpmask=dvle->outTableData[i].mask;
mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
int type = dvle->outTableData[i].type;
int mask = dvle->outTableData[i].mask;
int regID = dvle->outTableData[i].regID;
u32* out = &dvle->outmapData[regID+1];
if(*out==0x1F1F1F1F)numAttr++;
u32 val=0x1F1F1F1F;
switch(dvle->outTableData[i].type)
if (!(dvle->outmapMask & BIT(regID)))
{
case RESULT_POSITION: val=0x03020100; break;
case RESULT_NORMALQUAT: val=0x07060504; break;
case RESULT_COLOR: val=0x0B0A0908; break;
case RESULT_TEXCOORD0: val=0x1F1F0D0C; break;
case RESULT_TEXCOORD0W: val=0x10101010; break;
case RESULT_TEXCOORD1: val=0x1F1F0F0E; break;
case RESULT_TEXCOORD2: val=0x1F1F1716; break;
case RESULT_VIEW: val=0x1F141312; break;
dvle->outmapMask |= BIT(regID);
dvle->outmapData[0] ++;
}
*out=((*out)&~mask)|(val&mask);
switch(dvle->outTableData[i].type)
int sem = 0x1F, num = 0;
switch (type)
{
case RESULT_POSITION:
if ((*out & 0xFF0000)==0x020000)
attrClock |= BIT(0);
break;
case RESULT_COLOR:
attrClock |= BIT(1);
break;
case RESULT_TEXCOORD0:
attrMode = 1;
attrClock |= BIT(8);
break;
case RESULT_TEXCOORD1:
attrMode = 1;
attrClock |= BIT(9);
break;
case RESULT_TEXCOORD2:
attrMode = 1;
attrClock |= BIT(10);
break;
case RESULT_TEXCOORD0W:
attrMode = 1;
attrClock |= BIT(16);
break;
case RESULT_NORMALQUAT:
case RESULT_VIEW:
attrClock |= BIT(24);
break;
case RESULT_POSITION: sem = 0x00; num = 4; break;
case RESULT_NORMALQUAT: sem = 0x04; num = 4; dvle->outmapClock |= BIT(24); break;
case RESULT_COLOR: sem = 0x08; num = 4; dvle->outmapClock |= BIT(1); break;
case RESULT_TEXCOORD0: sem = 0x0C; num = 2; dvle->outmapClock |= BIT(8); dvle->outmapMode = 1; break;
case RESULT_TEXCOORD0W: sem = 0x10; num = 1; dvle->outmapClock |= BIT(16); dvle->outmapMode = 1; break;
case RESULT_TEXCOORD1: sem = 0x0E; num = 2; dvle->outmapClock |= BIT(9); dvle->outmapMode = 1; break;
case RESULT_TEXCOORD2: sem = 0x16; num = 2; dvle->outmapClock |= BIT(10); dvle->outmapMode = 1; break;
case RESULT_VIEW: sem = 0x12; num = 3; dvle->outmapClock |= BIT(24); break;
default: continue;
}
attrMask|=1<<dvle->outTableData[i].regID;
if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1;
for (j = 0, k = 0; j < 4 && k < num; j ++)
{
if (mask & BIT(j))
{
*out &= ~(0xFF << (j*8));
*out |= (sem++) << (j*8);
k ++;
if (type==RESULT_POSITION && k==3)
dvle->outmapClock |= BIT(0);
}
}
}
dvle->outmapData[0]=numAttr;
dvle->outmapMask=attrMask;
dvle->outmapMode=attrMode;
dvle->outmapClock=attrClock;
}