shaderProgram

This commit is contained in:
smea 2015-01-02 19:16:23 -08:00
parent cd05cc45ff
commit 80e6bcfd34
7 changed files with 211 additions and 49 deletions

View File

@ -35,6 +35,7 @@ extern "C" {
#include <3ds/gpu/gx.h> #include <3ds/gpu/gx.h>
#include <3ds/gpu/gpu.h> #include <3ds/gpu/gpu.h>
#include <3ds/gpu/shbin.h> #include <3ds/gpu/shbin.h>
#include <3ds/gpu/shaderProgram.h>
#include <3ds/sdmc.h> #include <3ds/sdmc.h>

View File

@ -203,6 +203,11 @@ typedef enum{
GPU_UNKPRIM = 0x0300 // ? GPU_UNKPRIM = 0x0300 // ?
}GPU_Primitive_t; }GPU_Primitive_t;
typedef enum{
GPU_VERTEX_SHADER=0x0,
GPU_GEOMETRY_SHADER=0x1
}GPU_SHADER_TYPE;
void GPU_SetUniform(u32 startreg, u32* data, u32 numreg); void GPU_SetUniform(u32 startreg, u32* data, u32 numreg);
void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h); void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h);
@ -232,5 +237,8 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16
void GPU_DrawArray(GPU_Primitive_t primitive, u32 n); void GPU_DrawArray(GPU_Primitive_t primitive, u32 n);
void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n); void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n);
void GPU_FinishDrawing(); void GPU_FinishDrawing();
void GPU_SetShaderOutmap(u32 outmapData[8]);
void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);
void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);

View File

@ -3,11 +3,20 @@
#include <3ds/types.h> #include <3ds/types.h>
#include <3ds/gpu/shbin.h> #include <3ds/gpu/shbin.h>
typedef struct
{
u32 id;
u32 data[3];
}float24Uniform_s;
// this structure describes an instance of either a vertex or geometry shader // this structure describes an instance of either a vertex or geometry shader
typedef struct typedef struct
{ {
DVLE_s* dvle; DVLE_s* dvle;
u16 boolUniforms; u16 boolUniforms;
u32 intUniforms[4];
float24Uniform_s* float24Uniforms;
u8 numFloat24Uniforms;
}shaderInstance_s; }shaderInstance_s;
// this structure describes an instance of a full shader program // this structure describes an instance of a full shader program

View File

@ -1,10 +1,18 @@
#pragma once #pragma once
#include <3ds/gpu/gpu.h>
typedef enum{ typedef enum{
VERTEX_SHDR=0x0, VERTEX_SHDR=GPU_VERTEX_SHADER,
GEOMETRY_SHDR=0x1 GEOMETRY_SHDR=GPU_GEOMETRY_SHADER
}DVLE_type; }DVLE_type;
typedef enum{
DVLE_CONST_BOOL=0x0,
DVLE_CONST_u8=0x1,
DVLE_CONST_FLOAT24=0x2,
}DVLE_constantType;
typedef enum{ typedef enum{
RESULT_POSITION = 0x0, RESULT_POSITION = 0x0,
RESULT_NORMALQUAT = 0x1, RESULT_NORMALQUAT = 0x1,
@ -24,7 +32,8 @@ typedef struct{
}DVLP_s; }DVLP_s;
typedef struct{ typedef struct{
u32 header; u16 type;
u16 id;
u32 data[4]; u32 data[4];
}DVLE_constEntry_s; }DVLE_constEntry_s;
@ -43,6 +52,7 @@ typedef struct{
typedef struct{ typedef struct{
DVLE_type type; DVLE_type type;
DVLP_s* dvlp;
u32 mainOffset, endmainOffset; u32 mainOffset, endmainOffset;
u32 constTableSize; u32 constTableSize;
DVLE_constEntry_s* constTableData; DVLE_constEntry_s* constTableData;
@ -51,6 +61,8 @@ typedef struct{
u32 uniformTableSize; u32 uniformTableSize;
DVLE_uniformEntry_s* uniformTableData; DVLE_uniformEntry_s* uniformTableData;
char* symbolTableData; char* symbolTableData;
u8 outmapMask;
u32 outmapData[8];
}DVLE_s; }DVLE_s;
typedef struct{ typedef struct{
@ -69,3 +81,4 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type);
void DVLE_SendOutmap(DVLE_s* dvle); void DVLE_SendOutmap(DVLE_s* dvle);
void DVLE_SendConstants(DVLE_s* dvle); void DVLE_SendConstants(DVLE_s* dvle);
void DVLE_GenerateOutmap(DVLE_s* dvle);

View File

@ -509,3 +509,34 @@ void GPU_FinishDrawing()
GPUCMD_AddSingleParam(0x000F0110, 0x00000001); GPUCMD_AddSingleParam(0x000F0110, 0x00000001);
GPUCMD_AddSingleParam(0x000F0063, 0x00000001); GPUCMD_AddSingleParam(0x000F0063, 0x00000001);
} }
void GPU_SetShaderOutmap(u32 outmapData[8])
{
GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);
GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8);
}
void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length)
{
if(!data)return;
u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0);
GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, offset);
int i;
for(i=0;i<length;i+=0x80)GPUCMD_AddWrites(GPUREG_VSH_CODETRANSFER_DATA+regOffset, &data[i], ((length-i)<0x80)?(length-i):0x80);
GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001);
}
void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length)
{
if(!data)return;
u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0);
GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, offset);
GPUCMD_AddWrites(GPUREG_VSH_OPDESCS_DATA+regOffset, data, length);
}

View File

@ -1,5 +1,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <3ds/types.h> #include <3ds/types.h>
#include <3ds/gpu/registers.h>
#include <3ds/gpu/shaderProgram.h> #include <3ds/gpu/shaderProgram.h>
Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle)
@ -7,7 +9,64 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle)
if(!si || !dvle)return -1; if(!si || !dvle)return -1;
si->dvle = dvle; si->dvle = dvle;
si->boolUniforms = 0xFFFF; si->boolUniforms = 0xFFFF;
si->intUniforms[0] = 0x00000000;
si->intUniforms[1] = 0x00000000;
si->intUniforms[2] = 0x00000000;
si->intUniforms[3] = 0x00000000;
si->float24Uniforms = NULL;
int i;
DVLE_constEntry_s* cnst = dvle->constTableData;
if(cnst)
{
int float24cnt=0;
for(i=0; i<dvle->constTableSize; i++)
{
switch(cnst[i].type)
{
case DVLE_CONST_BOOL:
shaderInstanceSetBool(si, cnst[i].id, cnst[i].data[0]&1);
break;
case DVLE_CONST_u8:
if(cnst[i].id<4)si->intUniforms[cnst[i].id] = cnst[i].data[0];
break;
case DVLE_CONST_FLOAT24:
float24cnt++;
break;
}
}
if(float24cnt)
{
si->float24Uniforms = malloc(sizeof(float24Uniform_s)*float24cnt);
if(!si->float24Uniforms)
{
float24cnt = 0;
u32 rev[3];
u8* rev8=(u8*)rev;
for(i=0; i<dvle->constTableSize; i++)
{
if(cnst[i].type==DVLE_CONST_FLOAT24)
{
memcpy(&rev8[0], &cnst[i].data[0], 3);
memcpy(&rev8[3], &cnst[i].data[1], 3);
memcpy(&rev8[6], &cnst[i].data[2], 3);
memcpy(&rev8[9], &cnst[i].data[3], 3);
si->float24Uniforms[float24cnt].id = cnst[i].id;
si->float24Uniforms[float24cnt].data[0] = rev[2];
si->float24Uniforms[float24cnt].data[1] = rev[1];
si->float24Uniforms[float24cnt].data[2] = rev[0];
float24cnt++;
}
}
}
si->numFloat24Uniforms = float24cnt;
}
}
return 0; return 0;
} }
@ -16,6 +75,7 @@ Result shaderInstanceFree(shaderInstance_s* si)
{ {
if(!si)return -1; if(!si)return -1;
if(si->float24Uniforms)free(si->float24Uniforms);
free(si); free(si);
return 0; return 0;
@ -97,11 +157,58 @@ Result shaderProgramUse(shaderProgram_s* sp)
if(!sp->vertexShader)return -2; if(!sp->vertexShader)return -2;
int i;
// setup vertex shader stuff no matter what
const DVLE_s* vshDvle = sp->vertexShader->dvle;
const DVLP_s* vshDvlp = vshDvle->dvlp;
GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize);
GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize);
GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms);
GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4);
for(i=0; i<sp->vertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4);
GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);
GPUCMD_AddWrite(GPUREG_024A, vshDvle->outmapData[0]-1); // ?
GPUCMD_AddWrite(GPUREG_0251, vshDvle->outmapData[0]-1); // ?
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); // ?
GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // ?
if(!sp->geometryShader) if(!sp->geometryShader)
{ {
// only deal with vertex shader // finish setting up vertex shader alone
GPU_SetShaderOutmap((u32*)vshDvle->outmapData);
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);
GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000);
GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ?
GPUCMD_AddWrite(GPUREG_006F, 0x00000703); // ?
}else{ }else{
// setup both vertex and geometry shader // setup both vertex and geometry shader
const DVLE_s* gshDvle = sp->geometryShader->dvle;
const DVLP_s* gshDvlp = gshDvle->dvlp;
GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize);
GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize);
GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms);
GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4);
for(i=0; i<sp->geometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4);
GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF));
GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask);
GPU_SetShaderOutmap((u32*)gshDvle->outmapData);
//GSH input attributes stuff
GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000003);
GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2);
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002);
GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001);
GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ?
GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ?
} }
return 0; return 0;

View File

@ -25,15 +25,18 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
ret->DVLP.codeSize=dvlpData[3]; ret->DVLP.codeSize=dvlpData[3];
ret->DVLP.codeData=&dvlpData[dvlpData[2]/4]; ret->DVLP.codeData=&dvlpData[dvlpData[2]/4];
ret->DVLP.opdescSize=dvlpData[5]; ret->DVLP.opdescSize=dvlpData[5];
ret->DVLP.opcdescData=&dvlpData[dvlpData[4]/4]; ret->DVLP.opcdescData=(u32*)malloc(sizeof(u32)*ret->DVLP.opdescSize);
if(!ret->DVLP.opcdescData)goto clean2;
int i; for(i=0;i<ret->DVLP.opdescSize;i++)ret->DVLP.opcdescData[i]=dvlpData[dvlpData[4]/4+i*2];
//parse DVLE //parse DVLE
int i;
for(i=0;i<ret->numDVLE;i++) for(i=0;i<ret->numDVLE;i++)
{ {
DVLE_s* dvle=&ret->DVLE[i]; DVLE_s* dvle=&ret->DVLE[i];
u32* dvleData=&shbinData[shbinData[2+i]/4]; u32* dvleData=&shbinData[shbinData[2+i]/4];
dvle->dvlp=&ret->DVLP;
dvle->type=(dvleData[1]>>16)&0xFF; dvle->type=(dvleData[1]>>16)&0xFF;
dvle->mainOffset=dvleData[2]; dvle->mainOffset=dvleData[2];
dvle->endmainOffset=dvleData[3]; dvle->endmainOffset=dvleData[3];
@ -48,9 +51,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
dvle->uniformTableData=(DVLE_uniformEntry_s*)&dvleData[dvleData[12]/4]; dvle->uniformTableData=(DVLE_uniformEntry_s*)&dvleData[dvleData[12]/4];
dvle->symbolTableData=(char*)&dvleData[dvleData[14]/4]; dvle->symbolTableData=(char*)&dvleData[dvleData[14]/4];
DVLE_GenerateOutmap(dvle);
} }
goto exit; goto exit;
clean2:
free(ret->DVLE);
clean1: clean1:
free(ret); free(ret);
ret=NULL; ret=NULL;
@ -58,6 +65,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
return ret; return ret;
} }
//TODO
void SHDR_FreeDVLB(DVLB_s* dvlb)
{
if(!dvlb)return;
}
s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID)
{ {
if(!dvlb || !name)return -1; if(!dvlb || !name)return -1;
@ -76,51 +90,31 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID)
void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type) void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type)
{ {
if(!dvlp)return; if(!dvlp)return;
u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); GPU_SendShaderCode(type, dvlp->codeData, 0, dvlp->codeSize);
GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, 0x00000000);
int i;
for(i=0;i<dvlp->codeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80);
GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001);
} }
void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type) void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type)
{ {
if(!dvlp)return; if(!dvlp)return;
u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); GPU_SendOperandDescriptors(type, dvlp->opcdescData, 0, dvlp->opdescSize);
GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000);
u32 param[0x80];
int i;
//TODO : should probably preprocess this
for(i=0;i<dvlp->opdescSize;i++)param[i]=dvlp->opcdescData[i*2];
GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_DATA)+regOffset, param, dvlp->opdescSize);
} }
void DVLE_SendOutmap(DVLE_s* dvle) void DVLE_GenerateOutmap(DVLE_s* dvle)
{ {
if(!dvle)return; if(!dvle)return;
u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData));
u32 param[0x8]={0x00000000,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,
0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F};
int i; int i;
u8 numAttr=0; u8 numAttr=0;
u8 maxAttr=0; u8 maxAttr=0;
u8 attrMask=0; u8 attrMask=0;
//TODO : should probably preprocess this
for(i=0;i<dvle->outTableSize;i++) for(i=0;i<dvle->outTableSize;i++)
{ {
u32* out=&param[dvle->outTableData[i].regID+1]; u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1];
u32 mask=0x00000000; u32 mask=0x00000000;
u8 tmpmask=dvle->outTableData[i].mask; u8 tmpmask=dvle->outTableData[i].mask;
mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
@ -148,17 +142,24 @@ void DVLE_SendOutmap(DVLE_s* dvle)
if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1;
} }
param[0]=numAttr; dvle->outmapData[0]=numAttr;
dvle->outmapMask=attrMask;
}
void DVLE_SendOutmap(DVLE_s* dvle)
{
if(!dvle)return;
u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0);
if(dvle->type==VERTEX_SHDR) if(dvle->type==VERTEX_SHDR)
{ {
GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? GPUCMD_AddWrite(GPUREG_024A, dvle->outmapData[0]-1); //?
GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? GPUCMD_AddWrite(GPUREG_0251, dvle->outmapData[0]-1); //?
} }
GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask); GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, dvle->outmapMask);
GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1); GPU_SetShaderOutmap(dvle->outmapData);
GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, param, 8);
} }
void DVLE_SendConstants(DVLE_s* dvle) void DVLE_SendConstants(DVLE_s* dvle)
@ -180,7 +181,7 @@ void DVLE_SendConstants(DVLE_s* dvle)
memcpy(&rev8[6], &cnst->data[2], 3); memcpy(&rev8[6], &cnst->data[2], 3);
memcpy(&rev8[9], &cnst->data[3], 3); memcpy(&rev8[9], &cnst->data[3], 3);
param[0x0]=(cnst->header>>16)&0xFF; param[0x0]=(cnst->id)&0xFF;
param[0x1]=rev[2]; param[0x1]=rev[2];
param[0x2]=rev[1]; param[0x2]=rev[1];
param[0x3]=rev[0]; param[0x3]=rev[0];
@ -196,7 +197,6 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id)
u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0);
GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);
GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0);
@ -215,10 +215,3 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id)
GPUCMD_AddWrite(GPUREG_0064, 0x00000001); GPUCMD_AddWrite(GPUREG_0064, 0x00000001);
GPUCMD_AddWrite(GPUREG_006F, 0x00000703); GPUCMD_AddWrite(GPUREG_006F, 0x00000703);
} }
//TODO
void SHDR_FreeDVLB(DVLB_s* dvlb)
{
if(!dvlb)return;
}