From 5f10c8ee74732fbf3b575d2c54ace830abb471f4 Mon Sep 17 00:00:00 2001 From: smea Date: Wed, 17 Dec 2014 22:47:41 -0800 Subject: [PATCH 01/21] added registers.h --- libctru/include/3ds/gpu/gpu.h | 6 ++++ libctru/include/3ds/gpu/shdr.h | 4 +-- libctru/source/gpu/gpu.c | 7 +--- libctru/source/gpu/shdr.c | 58 ++++++++++++++++++++-------------- 4 files changed, 43 insertions(+), 32 deletions(-) diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index 5fd3071..ac25969 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -1,8 +1,14 @@ #pragma once +#include "3ds/gpu/registers.h" + +//GPU void GPU_Init(Handle *gsphandle); void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize); +//GPUCMD +#define GPUCMD_HEADER(consec, mask, reg) (((consec)<<31)|(((mask)&0xF)<<16)|((reg)&0x3FF)) + void GPUCMD_SetBuffer(u32* adr, u32 size, u32 offset); void GPUCMD_SetBufferOffset(u32 offset); void GPUCMD_GetBuffer(u32** adr, u32* size, u32* offset); diff --git a/libctru/include/3ds/gpu/shdr.h b/libctru/include/3ds/gpu/shdr.h index 02b5128..1a89ddb 100644 --- a/libctru/include/3ds/gpu/shdr.h +++ b/libctru/include/3ds/gpu/shdr.h @@ -61,8 +61,8 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id); void SHDR_FreeDVLB(DVLB_s* dvlb); s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID); -void DVLP_SendCode(DVLP_s* dvlp); -void DVLP_SendOpDesc(DVLP_s* dvlp); +void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type); +void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type); void DVLE_SendOutmap(DVLE_s* dvle); void DVLE_SendConstants(DVLE_s* dvle); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 97d14db..6e0586d 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -467,23 +467,18 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16 void GPU_DrawArray(GPU_Primitive_t primitive, u32 n) { - // //? - // GPUCMD_AddSingleParam(0x00040080, 0x00010000); //set primitive type GPUCMD_AddSingleParam(0x0002025E, primitive); GPUCMD_AddSingleParam(0x0002025F, 0x00000001); - //index buffer not used for drawArrays but 0x000F0227 still required - GPUCMD_AddSingleParam(0x000F0227, 0x80000000); //pass number of vertices GPUCMD_AddSingleParam(0x000F0228, n); + //all the following except 0x000F022E might be useless GPUCMD_AddSingleParam(0x00010253, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000000); GPUCMD_AddSingleParam(0x000F022E, 0x00000001); GPUCMD_AddSingleParam(0x00010245, 0x00000001); GPUCMD_AddSingleParam(0x000F0231, 0x00000001); - GPUCMD_AddSingleParam(0x000F0111, 0x00000001); } diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shdr.c index a9d2de5..0efe3fa 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shdr.c @@ -71,23 +71,27 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) return -1; } -void DVLP_SendCode(DVLP_s* dvlp) +void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type) { if(!dvlp)return; - GPUCMD_AddSingleParam(0x000F02CB, 0x00000000); + u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); + + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_CONFIG)+regOffset, 0x00000000); int i; - for(i=0;icodeSize;i+=0x80)GPUCMD_Add(0x000F02CC, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80); + for(i=0;icodeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80); - GPUCMD_AddSingleParam(0x000F02BF, 0x00000001); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_END)+regOffset, 0x00000001); } -void DVLP_SendOpDesc(DVLP_s* dvlp) +void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type) { if(!dvlp)return; - GPUCMD_AddSingleParam(0x000F02D5, 0x00000000); + u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); + + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_CONFIG)+regOffset, 0x00000000); u32 param[0x20]; @@ -95,13 +99,15 @@ void DVLP_SendOpDesc(DVLP_s* dvlp) //TODO : should probably preprocess this for(i=0;iopdescSize;i++)param[i]=dvlp->opcdescData[i*2]; - GPUCMD_Add(0x000F02D6, param, dvlp->opdescSize); + GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_DATA)+regOffset, param, dvlp->opdescSize); } void DVLE_SendOutmap(DVLE_s* dvle) { if(!dvle)return; + u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); + u32 param[0x7]={0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F, 0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F}; @@ -131,18 +137,20 @@ void DVLE_SendOutmap(DVLE_s* dvle) if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; } - GPUCMD_AddSingleParam(0x000F0251, numAttr-1); //? - GPUCMD_AddSingleParam(0x000F024A, numAttr-1); //? - GPUCMD_AddSingleParam(0x000F02BD, attrMask); //? - GPUCMD_AddSingleParam(0x0001025E, numAttr-1); //? - GPUCMD_AddSingleParam(0x000F004F, numAttr); //? - GPUCMD_Add(0x800F0050, param, 0x00000007); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0251), numAttr-1); //? + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_024A), numAttr-1); //? + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OUTMAP_MASK)+regOffset, attrMask); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_PRIMITIVE_CONFIG), numAttr-1); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_SH_OUTMAP_TOTAL), numAttr); + GPUCMD_Add(GPUCMD_HEADER(1, 0xF, GPUREG_SH_OUTMAP_O0), param, 0x00000007); } void DVLE_SendConstants(DVLE_s* dvle) { if(!dvle)return; + u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); + u32 param[4]; u32 rev[3]; u8* rev8=(u8*)rev; @@ -161,7 +169,7 @@ void DVLE_SendConstants(DVLE_s* dvle) param[0x2]=rev[1]; param[0x3]=rev[0]; - GPUCMD_Add(0x800F02C0, param, 0x00000004); + GPUCMD_Add(GPUCMD_HEADER(1, 0xF, GPUREG_VSH_FLOATUNIFORM_CONFIG)+regOffset, param, 0x00000004); } } @@ -170,24 +178,26 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) if(!dvlb || id>dvlb->numDVLE)return; DVLE_s* dvle=&dvlb->DVLE[id]; - //? - GPUCMD_AddSingleParam(0x00010229, 0x00000000); - GPUCMD_AddSingleParam(0x00010244, 0x00000000); + u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); - DVLP_SendCode(&dvlb->DVLP); - DVLP_SendOpDesc(&dvlb->DVLP); + + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_GEOSTAGE_CONFIG), 0x00000000); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_0244), (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); + + DVLP_SendCode(&dvlb->DVLP, dvlb->DVLE[id].type); + DVLP_SendOpDesc(&dvlb->DVLP, dvlb->DVLE[id].type); DVLE_SendConstants(dvle); - GPUCMD_AddSingleParam(0x00080229, 0x00000000); - GPUCMD_AddSingleParam(0x000F02BA, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x8, GPUREG_GEOSTAGE_CONFIG), 0x00000000); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_ENTRYPOINT)-regOffset, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint - GPUCMD_AddSingleParam(0x000F0252, 0x00000000); // should all be part of DVLE_SendOutmap ? + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0252), 0x00000000); // should all be part of DVLE_SendOutmap ? DVLE_SendOutmap(dvle); //? - GPUCMD_AddSingleParam(0x000F0064, 0x00000001); - GPUCMD_AddSingleParam(0x000F006F, 0x00000703); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0064), 0x00000001); + GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_006F), 0x00000703); } //TODO From 91f01e8280876cb277c592f056a43c1bfba81e31 Mon Sep 17 00:00:00 2001 From: smea Date: Wed, 17 Dec 2014 22:48:05 -0800 Subject: [PATCH 02/21] added registers.h (for real this time) --- libctru/include/3ds/gpu/registers.h | 729 ++++++++++++++++++++++++++++ 1 file changed, 729 insertions(+) create mode 100644 libctru/include/3ds/gpu/registers.h diff --git a/libctru/include/3ds/gpu/registers.h b/libctru/include/3ds/gpu/registers.h new file mode 100644 index 0000000..abbf61e --- /dev/null +++ b/libctru/include/3ds/gpu/registers.h @@ -0,0 +1,729 @@ +#pragma once + +#define GPUREG_0000 0x0000 +#define GPUREG_0001 0x0001 +#define GPUREG_0002 0x0002 +#define GPUREG_0003 0x0003 +#define GPUREG_0004 0x0004 +#define GPUREG_0005 0x0005 +#define GPUREG_0006 0x0006 +#define GPUREG_0007 0x0007 +#define GPUREG_0008 0x0008 +#define GPUREG_0009 0x0009 +#define GPUREG_000A 0x000A +#define GPUREG_000B 0x000B +#define GPUREG_000C 0x000C +#define GPUREG_000D 0x000D +#define GPUREG_000E 0x000E +#define GPUREG_000F 0x000F +#define GPUREG_FINALIZE 0x0010 +#define GPUREG_0011 0x0011 +#define GPUREG_0012 0x0012 +#define GPUREG_0013 0x0013 +#define GPUREG_0014 0x0014 +#define GPUREG_0015 0x0015 +#define GPUREG_0016 0x0016 +#define GPUREG_0017 0x0017 +#define GPUREG_0018 0x0018 +#define GPUREG_0019 0x0019 +#define GPUREG_001A 0x001A +#define GPUREG_001B 0x001B +#define GPUREG_001C 0x001C +#define GPUREG_001D 0x001D +#define GPUREG_001E 0x001E +#define GPUREG_001F 0x001F +#define GPUREG_0020 0x0020 +#define GPUREG_0021 0x0021 +#define GPUREG_0022 0x0022 +#define GPUREG_0023 0x0023 +#define GPUREG_0024 0x0024 +#define GPUREG_0025 0x0025 +#define GPUREG_0026 0x0026 +#define GPUREG_0027 0x0027 +#define GPUREG_0028 0x0028 +#define GPUREG_0029 0x0029 +#define GPUREG_002A 0x002A +#define GPUREG_002B 0x002B +#define GPUREG_002C 0x002C +#define GPUREG_002D 0x002D +#define GPUREG_002E 0x002E +#define GPUREG_002F 0x002F +#define GPUREG_0030 0x0030 +#define GPUREG_0031 0x0031 +#define GPUREG_0032 0x0032 +#define GPUREG_0033 0x0033 +#define GPUREG_0034 0x0034 +#define GPUREG_0035 0x0035 +#define GPUREG_0036 0x0036 +#define GPUREG_0037 0x0037 +#define GPUREG_0038 0x0038 +#define GPUREG_0039 0x0039 +#define GPUREG_003A 0x003A +#define GPUREG_003B 0x003B +#define GPUREG_003C 0x003C +#define GPUREG_003D 0x003D +#define GPUREG_003E 0x003E +#define GPUREG_003F 0x003F +#define GPUREG_FACECULLING_CONFIG 0x0040 +#define GPUREG_0041 0x0041 +#define GPUREG_0042 0x0042 +#define GPUREG_0043 0x0043 +#define GPUREG_0044 0x0044 +#define GPUREG_0045 0x0045 +#define GPUREG_0046 0x0046 +#define GPUREG_0047 0x0047 +#define GPUREG_0048 0x0048 +#define GPUREG_0049 0x0049 +#define GPUREG_004A 0x004A +#define GPUREG_004B 0x004B +#define GPUREG_004C 0x004C +#define GPUREG_DEPTHRANGE_NEAR 0x004D +#define GPUREG_DEPTHRANGE_FAR 0x004E +#define GPUREG_SH_OUTMAP_TOTAL 0x004F +#define GPUREG_SH_OUTMAP_O0 0x0050 +#define GPUREG_SH_OUTMAP_O1 0x0051 +#define GPUREG_SH_OUTMAP_O2 0x0052 +#define GPUREG_SH_OUTMAP_O3 0x0053 +#define GPUREG_SH_OUTMAP_O4 0x0054 +#define GPUREG_SH_OUTMAP_O5 0x0055 +#define GPUREG_SH_OUTMAP_O6 0x0056 +#define GPUREG_0057 0x0057 +#define GPUREG_0058 0x0058 +#define GPUREG_0059 0x0059 +#define GPUREG_005A 0x005A +#define GPUREG_005B 0x005B +#define GPUREG_005C 0x005C +#define GPUREG_005D 0x005D +#define GPUREG_005E 0x005E +#define GPUREG_005F 0x005F +#define GPUREG_0060 0x0060 +#define GPUREG_0061 0x0061 +#define GPUREG_0062 0x0062 +#define GPUREG_0063 0x0063 +#define GPUREG_0064 0x0064 +#define GPUREG_SCISSORTEST_MODE 0x0065 +#define GPUREG_SCISSORTEST_POS 0x0066 +#define GPUREG_SCISSORTEST_DIM 0x0067 +#define GPUREG_0068 0x0068 +#define GPUREG_0069 0x0069 +#define GPUREG_006A 0x006A +#define GPUREG_006B 0x006B +#define GPUREG_006C 0x006C +#define GPUREG_006D 0x006D +#define GPUREG_006E 0x006E +#define GPUREG_006F 0x006F +#define GPUREG_0070 0x0070 +#define GPUREG_0071 0x0071 +#define GPUREG_0072 0x0072 +#define GPUREG_0073 0x0073 +#define GPUREG_0074 0x0074 +#define GPUREG_0075 0x0075 +#define GPUREG_0076 0x0076 +#define GPUREG_0077 0x0077 +#define GPUREG_0078 0x0078 +#define GPUREG_0079 0x0079 +#define GPUREG_007A 0x007A +#define GPUREG_007B 0x007B +#define GPUREG_007C 0x007C +#define GPUREG_007D 0x007D +#define GPUREG_007E 0x007E +#define GPUREG_007F 0x007F +#define GPUREG_TEXUNITS_CONFIG 0x0080 +#define GPUREG_0081 0x0081 +#define GPUREG_TEXUNIT0_DIM 0x0082 +#define GPUREG_TEXUNIT0_PARAM 0x0083 +#define GPUREG_0084 0x0084 +#define GPUREG_TEXUNIT0_LOC 0x0085 +#define GPUREG_0086 0x0086 +#define GPUREG_0087 0x0087 +#define GPUREG_0088 0x0088 +#define GPUREG_0089 0x0089 +#define GPUREG_008A 0x008A +#define GPUREG_008B 0x008B +#define GPUREG_008C 0x008C +#define GPUREG_008D 0x008D +#define GPUREG_TEXUNIT0_TYPE 0x008E +#define GPUREG_008F 0x008F +#define GPUREG_0090 0x0090 +#define GPUREG_0091 0x0091 +#define GPUREG_TEXUNIT1_DIM 0x0092 +#define GPUREG_TEXUNIT1_PARAM 0x0093 +#define GPUREG_0094 0x0094 +#define GPUREG_TEXUNIT1_LOC 0x0095 +#define GPUREG_TEXUNIT1_TYPE 0x0096 +#define GPUREG_0097 0x0097 +#define GPUREG_0098 0x0098 +#define GPUREG_0099 0x0099 +#define GPUREG_TEXUNIT2_DIM 0x009A +#define GPUREG_TEXUNIT2_PARAM 0x009B +#define GPUREG_009C 0x009C +#define GPUREG_TEXUNIT2_LOC 0x009D +#define GPUREG_TEXUNIT2_TYPE 0x009E +#define GPUREG_009F 0x009F +#define GPUREG_00A0 0x00A0 +#define GPUREG_00A1 0x00A1 +#define GPUREG_00A2 0x00A2 +#define GPUREG_00A3 0x00A3 +#define GPUREG_00A4 0x00A4 +#define GPUREG_00A5 0x00A5 +#define GPUREG_00A6 0x00A6 +#define GPUREG_00A7 0x00A7 +#define GPUREG_00A8 0x00A8 +#define GPUREG_00A9 0x00A9 +#define GPUREG_00AA 0x00AA +#define GPUREG_00AB 0x00AB +#define GPUREG_00AC 0x00AC +#define GPUREG_00AD 0x00AD +#define GPUREG_00AE 0x00AE +#define GPUREG_00AF 0x00AF +#define GPUREG_00B0 0x00B0 +#define GPUREG_00B1 0x00B1 +#define GPUREG_00B2 0x00B2 +#define GPUREG_00B3 0x00B3 +#define GPUREG_00B4 0x00B4 +#define GPUREG_00B5 0x00B5 +#define GPUREG_00B6 0x00B6 +#define GPUREG_00B7 0x00B7 +#define GPUREG_00B8 0x00B8 +#define GPUREG_00B9 0x00B9 +#define GPUREG_00BA 0x00BA +#define GPUREG_00BB 0x00BB +#define GPUREG_00BC 0x00BC +#define GPUREG_00BD 0x00BD +#define GPUREG_00BE 0x00BE +#define GPUREG_00BF 0x00BF +#define GPUREG_TEXENV0_CONFIG0 0x00C0 +#define GPUREG_TEXENV0_CONFIG1 0x00C1 +#define GPUREG_TEXENV0_CONFIG2 0x00C2 +#define GPUREG_TEXENV0_CONFIG3 0x00C3 +#define GPUREG_TEXENV0_CONFIG4 0x00C4 +#define GPUREG_00C5 0x00C5 +#define GPUREG_00C6 0x00C6 +#define GPUREG_00C7 0x00C7 +#define GPUREG_TEXENV1_CONFIG0 0x00C8 +#define GPUREG_TEXENV1_CONFIG1 0x00C9 +#define GPUREG_TEXENV1_CONFIG2 0x00CA +#define GPUREG_TEXENV1_CONFIG3 0x00CB +#define GPUREG_TEXENV1_CONFIG4 0x00CC +#define GPUREG_00CD 0x00CD +#define GPUREG_00CE 0x00CE +#define GPUREG_00CF 0x00CF +#define GPUREG_TEXENV2_CONFIG0 0x00D0 +#define GPUREG_TEXENV2_CONFIG1 0x00D1 +#define GPUREG_TEXENV2_CONFIG2 0x00D2 +#define GPUREG_TEXENV2_CONFIG3 0x00D3 +#define GPUREG_TEXENV2_CONFIG4 0x00D4 +#define GPUREG_00D5 0x00D5 +#define GPUREG_00D6 0x00D6 +#define GPUREG_00D7 0x00D7 +#define GPUREG_TEXENV3_CONFIG0 0x00D8 +#define GPUREG_TEXENV3_CONFIG1 0x00D9 +#define GPUREG_TEXENV3_CONFIG2 0x00DA +#define GPUREG_TEXENV3_CONFIG3 0x00DB +#define GPUREG_TEXENV3_CONFIG4 0x00DC +#define GPUREG_00DD 0x00DD +#define GPUREG_00DE 0x00DE +#define GPUREG_00DF 0x00DF +#define GPUREG_00E0 0x00E0 +#define GPUREG_00E1 0x00E1 +#define GPUREG_00E2 0x00E2 +#define GPUREG_00E3 0x00E3 +#define GPUREG_00E4 0x00E4 +#define GPUREG_00E5 0x00E5 +#define GPUREG_00E6 0x00E6 +#define GPUREG_00E7 0x00E7 +#define GPUREG_00E8 0x00E8 +#define GPUREG_00E9 0x00E9 +#define GPUREG_00EA 0x00EA +#define GPUREG_00EB 0x00EB +#define GPUREG_00EC 0x00EC +#define GPUREG_00ED 0x00ED +#define GPUREG_00EE 0x00EE +#define GPUREG_00EF 0x00EF +#define GPUREG_TEXENV4_CONFIG0 0x00F0 +#define GPUREG_TEXENV4_CONFIG1 0x00F1 +#define GPUREG_TEXENV4_CONFIG2 0x00F2 +#define GPUREG_TEXENV4_CONFIG3 0x00F3 +#define GPUREG_TEXENV4_CONFIG4 0x00F4 +#define GPUREG_00F5 0x00F5 +#define GPUREG_00F6 0x00F6 +#define GPUREG_00F7 0x00F7 +#define GPUREG_TEXENV5_CONFIG0 0x00F8 +#define GPUREG_TEXENV5_CONFIG1 0x00F9 +#define GPUREG_TEXENV5_CONFIG2 0x00FA +#define GPUREG_TEXENV5_CONFIG3 0x00FB +#define GPUREG_TEXENV5_CONFIG4 0x00FC +#define GPUREG_00FD 0x00FD +#define GPUREG_00FE 0x00FE +#define GPUREG_00FF 0x00FF +#define GPUREG_COLOROUTPUT_CONFIG 0x0100 +#define GPUREG_BLEND_CONFIG 0x0101 +#define GPUREG_COLORLOGICOP_CONFIG 0x0102 +#define GPUREG_BLEND_COLOR 0x0103 +#define GPUREG_ALPHATEST_CONFIG 0x0104 +#define GPUREG_STENCILTEST_CONFIG 0x0105 +#define GPUREG_STENCILOP_CONFIG 0x0106 +#define GPUREG_DEPTHTEST_CONFIG 0x0107 +#define GPUREG_0108 0x0108 +#define GPUREG_0109 0x0109 +#define GPUREG_010A 0x010A +#define GPUREG_010B 0x010B +#define GPUREG_010C 0x010C +#define GPUREG_010D 0x010D +#define GPUREG_010E 0x010E +#define GPUREG_010F 0x010F +#define GPUREG_0110 0x0110 +#define GPUREG_0111 0x0111 +#define GPUREG_0112 0x0112 +#define GPUREG_0113 0x0113 +#define GPUREG_0114 0x0114 +#define GPUREG_0115 0x0115 +#define GPUREG_DEPTHBUFFER_FORMAT 0x0116 +#define GPUREG_COLORBUFFER_FORMAT 0x0117 +#define GPUREG_0118 0x0118 +#define GPUREG_0119 0x0119 +#define GPUREG_011A 0x011A +#define GPUREG_011B 0x011B +#define GPUREG_DEPTHBUFFER_LOC 0x011C +#define GPUREG_COLORBUFFER_LOC 0x011D +#define GPUREG_OUTBUFFER_DIM 0x011E +#define GPUREG_011F 0x011F +#define GPUREG_0120 0x0120 +#define GPUREG_0121 0x0121 +#define GPUREG_0122 0x0122 +#define GPUREG_0123 0x0123 +#define GPUREG_0124 0x0124 +#define GPUREG_0125 0x0125 +#define GPUREG_0126 0x0126 +#define GPUREG_0127 0x0127 +#define GPUREG_0128 0x0128 +#define GPUREG_0129 0x0129 +#define GPUREG_012A 0x012A +#define GPUREG_012B 0x012B +#define GPUREG_012C 0x012C +#define GPUREG_012D 0x012D +#define GPUREG_012E 0x012E +#define GPUREG_012F 0x012F +#define GPUREG_0130 0x0130 +#define GPUREG_0131 0x0131 +#define GPUREG_0132 0x0132 +#define GPUREG_0133 0x0133 +#define GPUREG_0134 0x0134 +#define GPUREG_0135 0x0135 +#define GPUREG_0136 0x0136 +#define GPUREG_0137 0x0137 +#define GPUREG_0138 0x0138 +#define GPUREG_0139 0x0139 +#define GPUREG_013A 0x013A +#define GPUREG_013B 0x013B +#define GPUREG_013C 0x013C +#define GPUREG_013D 0x013D +#define GPUREG_013E 0x013E +#define GPUREG_013F 0x013F +#define GPUREG_0140 0x0140 +#define GPUREG_0141 0x0141 +#define GPUREG_0142 0x0142 +#define GPUREG_0143 0x0143 +#define GPUREG_0144 0x0144 +#define GPUREG_0145 0x0145 +#define GPUREG_0146 0x0146 +#define GPUREG_0147 0x0147 +#define GPUREG_0148 0x0148 +#define GPUREG_0149 0x0149 +#define GPUREG_014A 0x014A +#define GPUREG_014B 0x014B +#define GPUREG_014C 0x014C +#define GPUREG_014D 0x014D +#define GPUREG_014E 0x014E +#define GPUREG_014F 0x014F +#define GPUREG_0150 0x0150 +#define GPUREG_0151 0x0151 +#define GPUREG_0152 0x0152 +#define GPUREG_0153 0x0153 +#define GPUREG_0154 0x0154 +#define GPUREG_0155 0x0155 +#define GPUREG_0156 0x0156 +#define GPUREG_0157 0x0157 +#define GPUREG_0158 0x0158 +#define GPUREG_0159 0x0159 +#define GPUREG_015A 0x015A +#define GPUREG_015B 0x015B +#define GPUREG_015C 0x015C +#define GPUREG_015D 0x015D +#define GPUREG_015E 0x015E +#define GPUREG_015F 0x015F +#define GPUREG_0160 0x0160 +#define GPUREG_0161 0x0161 +#define GPUREG_0162 0x0162 +#define GPUREG_0163 0x0163 +#define GPUREG_0164 0x0164 +#define GPUREG_0165 0x0165 +#define GPUREG_0166 0x0166 +#define GPUREG_0167 0x0167 +#define GPUREG_0168 0x0168 +#define GPUREG_0169 0x0169 +#define GPUREG_016A 0x016A +#define GPUREG_016B 0x016B +#define GPUREG_016C 0x016C +#define GPUREG_016D 0x016D +#define GPUREG_016E 0x016E +#define GPUREG_016F 0x016F +#define GPUREG_0170 0x0170 +#define GPUREG_0171 0x0171 +#define GPUREG_0172 0x0172 +#define GPUREG_0173 0x0173 +#define GPUREG_0174 0x0174 +#define GPUREG_0175 0x0175 +#define GPUREG_0176 0x0176 +#define GPUREG_0177 0x0177 +#define GPUREG_0178 0x0178 +#define GPUREG_0179 0x0179 +#define GPUREG_017A 0x017A +#define GPUREG_017B 0x017B +#define GPUREG_017C 0x017C +#define GPUREG_017D 0x017D +#define GPUREG_017E 0x017E +#define GPUREG_017F 0x017F +#define GPUREG_0180 0x0180 +#define GPUREG_0181 0x0181 +#define GPUREG_0182 0x0182 +#define GPUREG_0183 0x0183 +#define GPUREG_0184 0x0184 +#define GPUREG_0185 0x0185 +#define GPUREG_0186 0x0186 +#define GPUREG_0187 0x0187 +#define GPUREG_0188 0x0188 +#define GPUREG_0189 0x0189 +#define GPUREG_018A 0x018A +#define GPUREG_018B 0x018B +#define GPUREG_018C 0x018C +#define GPUREG_018D 0x018D +#define GPUREG_018E 0x018E +#define GPUREG_018F 0x018F +#define GPUREG_0190 0x0190 +#define GPUREG_0191 0x0191 +#define GPUREG_0192 0x0192 +#define GPUREG_0193 0x0193 +#define GPUREG_0194 0x0194 +#define GPUREG_0195 0x0195 +#define GPUREG_0196 0x0196 +#define GPUREG_0197 0x0197 +#define GPUREG_0198 0x0198 +#define GPUREG_0199 0x0199 +#define GPUREG_019A 0x019A +#define GPUREG_019B 0x019B +#define GPUREG_019C 0x019C +#define GPUREG_019D 0x019D +#define GPUREG_019E 0x019E +#define GPUREG_019F 0x019F +#define GPUREG_01A0 0x01A0 +#define GPUREG_01A1 0x01A1 +#define GPUREG_01A2 0x01A2 +#define GPUREG_01A3 0x01A3 +#define GPUREG_01A4 0x01A4 +#define GPUREG_01A5 0x01A5 +#define GPUREG_01A6 0x01A6 +#define GPUREG_01A7 0x01A7 +#define GPUREG_01A8 0x01A8 +#define GPUREG_01A9 0x01A9 +#define GPUREG_01AA 0x01AA +#define GPUREG_01AB 0x01AB +#define GPUREG_01AC 0x01AC +#define GPUREG_01AD 0x01AD +#define GPUREG_01AE 0x01AE +#define GPUREG_01AF 0x01AF +#define GPUREG_01B0 0x01B0 +#define GPUREG_01B1 0x01B1 +#define GPUREG_01B2 0x01B2 +#define GPUREG_01B3 0x01B3 +#define GPUREG_01B4 0x01B4 +#define GPUREG_01B5 0x01B5 +#define GPUREG_01B6 0x01B6 +#define GPUREG_01B7 0x01B7 +#define GPUREG_01B8 0x01B8 +#define GPUREG_01B9 0x01B9 +#define GPUREG_01BA 0x01BA +#define GPUREG_01BB 0x01BB +#define GPUREG_01BC 0x01BC +#define GPUREG_01BD 0x01BD +#define GPUREG_01BE 0x01BE +#define GPUREG_01BF 0x01BF +#define GPUREG_01C0 0x01C0 +#define GPUREG_01C1 0x01C1 +#define GPUREG_01C2 0x01C2 +#define GPUREG_01C3 0x01C3 +#define GPUREG_01C4 0x01C4 +#define GPUREG_01C5 0x01C5 +#define GPUREG_01C6 0x01C6 +#define GPUREG_01C7 0x01C7 +#define GPUREG_01C8 0x01C8 +#define GPUREG_01C9 0x01C9 +#define GPUREG_01CA 0x01CA +#define GPUREG_01CB 0x01CB +#define GPUREG_01CC 0x01CC +#define GPUREG_01CD 0x01CD +#define GPUREG_01CE 0x01CE +#define GPUREG_01CF 0x01CF +#define GPUREG_01D0 0x01D0 +#define GPUREG_01D1 0x01D1 +#define GPUREG_01D2 0x01D2 +#define GPUREG_01D3 0x01D3 +#define GPUREG_01D4 0x01D4 +#define GPUREG_01D5 0x01D5 +#define GPUREG_01D6 0x01D6 +#define GPUREG_01D7 0x01D7 +#define GPUREG_01D8 0x01D8 +#define GPUREG_01D9 0x01D9 +#define GPUREG_01DA 0x01DA +#define GPUREG_01DB 0x01DB +#define GPUREG_01DC 0x01DC +#define GPUREG_01DD 0x01DD +#define GPUREG_01DE 0x01DE +#define GPUREG_01DF 0x01DF +#define GPUREG_01E0 0x01E0 +#define GPUREG_01E1 0x01E1 +#define GPUREG_01E2 0x01E2 +#define GPUREG_01E3 0x01E3 +#define GPUREG_01E4 0x01E4 +#define GPUREG_01E5 0x01E5 +#define GPUREG_01E6 0x01E6 +#define GPUREG_01E7 0x01E7 +#define GPUREG_01E8 0x01E8 +#define GPUREG_01E9 0x01E9 +#define GPUREG_01EA 0x01EA +#define GPUREG_01EB 0x01EB +#define GPUREG_01EC 0x01EC +#define GPUREG_01ED 0x01ED +#define GPUREG_01EE 0x01EE +#define GPUREG_01EF 0x01EF +#define GPUREG_01F0 0x01F0 +#define GPUREG_01F1 0x01F1 +#define GPUREG_01F2 0x01F2 +#define GPUREG_01F3 0x01F3 +#define GPUREG_01F4 0x01F4 +#define GPUREG_01F5 0x01F5 +#define GPUREG_01F6 0x01F6 +#define GPUREG_01F7 0x01F7 +#define GPUREG_01F8 0x01F8 +#define GPUREG_01F9 0x01F9 +#define GPUREG_01FA 0x01FA +#define GPUREG_01FB 0x01FB +#define GPUREG_01FC 0x01FC +#define GPUREG_01FD 0x01FD +#define GPUREG_01FE 0x01FE +#define GPUREG_01FF 0x01FF +#define pipeline 0xGeometry +#define GPUREG_ATTRIBBUFFERS_LOC 0x0200 +#define GPUREG_ATTRIBBUFFERS_FORMAT_LOW 0x0201 +#define GPUREG_ATTRIBBUFFERS_FORMAT_HIGH 0x0202 +#define GPUREG_ATTRIBBUFFER0_CONFIG0 0x0203 +#define GPUREG_ATTRIBBUFFER0_CONFIG1 0x0204 +#define GPUREG_ATTRIBBUFFER0_CONFIG2 0x0205 +#define GPUREG_ATTRIBBUFFER1_CONFIG0 0x0206 +#define GPUREG_ATTRIBBUFFER1_CONFIG1 0x0207 +#define GPUREG_ATTRIBBUFFER1_CONFIG2 0x0208 +#define GPUREG_ATTRIBBUFFER2_CONFIG0 0x0209 +#define GPUREG_ATTRIBBUFFER2_CONFIG1 0x020A +#define GPUREG_ATTRIBBUFFER2_CONFIG2 0x020B +#define GPUREG_ATTRIBBUFFER3_CONFIG0 0x020C +#define GPUREG_ATTRIBBUFFER3_CONFIG1 0x020D +#define GPUREG_ATTRIBBUFFER3_CONFIG2 0x020E +#define GPUREG_ATTRIBBUFFER4_CONFIG0 0x020F +#define GPUREG_ATTRIBBUFFER4_CONFIG1 0x0210 +#define GPUREG_ATTRIBBUFFER4_CONFIG2 0x0211 +#define GPUREG_ATTRIBBUFFER5_CONFIG0 0x0212 +#define GPUREG_ATTRIBBUFFER5_CONFIG1 0x0213 +#define GPUREG_ATTRIBBUFFER5_CONFIG2 0x0214 +#define GPUREG_ATTRIBBUFFER6_CONFIG0 0x0215 +#define GPUREG_ATTRIBBUFFER6_CONFIG1 0x0216 +#define GPUREG_ATTRIBBUFFER6_CONFIG2 0x0217 +#define GPUREG_ATTRIBBUFFER7_CONFIG0 0x0218 +#define GPUREG_ATTRIBBUFFER7_CONFIG1 0x0219 +#define GPUREG_ATTRIBBUFFER7_CONFIG2 0x021A +#define GPUREG_ATTRIBBUFFER8_CONFIG0 0x021B +#define GPUREG_ATTRIBBUFFER8_CONFIG1 0x021C +#define GPUREG_ATTRIBBUFFER8_CONFIG2 0x021D +#define GPUREG_ATTRIBBUFFER9_CONFIG0 0x021E +#define GPUREG_ATTRIBBUFFER9_CONFIG1 0x021F +#define GPUREG_ATTRIBBUFFER9_CONFIG2 0x0220 +#define GPUREG_ATTRIBBUFFERA_CONFIG0 0x0221 +#define GPUREG_ATTRIBBUFFERA_CONFIG1 0x0222 +#define GPUREG_ATTRIBBUFFERA_CONFIG2 0x0223 +#define GPUREG_ATTRIBBUFFERB_CONFIG0 0x0224 +#define GPUREG_ATTRIBBUFFERB_CONFIG1 0x0225 +#define GPUREG_ATTRIBBUFFERB_CONFIG2 0x0226 +#define GPUREG_INDEXBUFFER_CONFIG 0x0227 +#define GPUREG_NUMVERTICES 0x0228 +#define GPUREG_GEOSTAGE_CONFIG 0x0229 +#define GPUREG_022A 0x022A +#define GPUREG_022B 0x022B +#define GPUREG_022C 0x022C +#define GPUREG_022D 0x022D +#define GPUREG_DRAWARRAYS 0x022E +#define GPUREG_DRAWELEMENTS 0x022F +#define GPUREG_0230 0x0230 +#define GPUREG_0231 0x0231 +#define GPUREG_0232 0x0232 +#define GPUREG_0233 0x0233 +#define GPUREG_0234 0x0234 +#define GPUREG_0235 0x0235 +#define GPUREG_0236 0x0236 +#define GPUREG_0237 0x0237 +#define GPUREG_0238 0x0238 +#define GPUREG_0239 0x0239 +#define GPUREG_023A 0x023A +#define GPUREG_023B 0x023B +#define GPUREG_023C 0x023C +#define GPUREG_023D 0x023D +#define GPUREG_023E 0x023E +#define GPUREG_023F 0x023F +#define GPUREG_0240 0x0240 +#define GPUREG_0241 0x0241 +#define GPUREG_0242 0x0242 +#define GPUREG_0243 0x0243 +#define GPUREG_0244 0x0244 +#define GPUREG_0245 0x0245 +#define GPUREG_0246 0x0246 +#define GPUREG_0247 0x0247 +#define GPUREG_0248 0x0248 +#define GPUREG_0249 0x0249 +#define GPUREG_024A 0x024A +#define GPUREG_024B 0x024B +#define GPUREG_024C 0x024C +#define GPUREG_024D 0x024D +#define GPUREG_024E 0x024E +#define GPUREG_024F 0x024F +#define GPUREG_0250 0x0250 +#define GPUREG_0251 0x0251 +#define GPUREG_0252 0x0252 +#define GPUREG_0253 0x0253 +#define GPUREG_0254 0x0254 +#define GPUREG_0255 0x0255 +#define GPUREG_0256 0x0256 +#define GPUREG_0257 0x0257 +#define GPUREG_0258 0x0258 +#define GPUREG_0259 0x0259 +#define GPUREG_025A 0x025A +#define GPUREG_025B 0x025B +#define GPUREG_025C 0x025C +#define GPUREG_025D 0x025D +#define GPUREG_PRIMITIVE_CONFIG 0x025E +#define GPUREG_025F 0x025F +#define GPUREG_0260 0x0260 +#define GPUREG_0261 0x0261 +#define GPUREG_0262 0x0262 +#define GPUREG_0263 0x0263 +#define GPUREG_0264 0x0264 +#define GPUREG_0265 0x0265 +#define GPUREG_0266 0x0266 +#define GPUREG_0267 0x0267 +#define GPUREG_0268 0x0268 +#define GPUREG_0269 0x0269 +#define GPUREG_026A 0x026A +#define GPUREG_026B 0x026B +#define GPUREG_026C 0x026C +#define GPUREG_026D 0x026D +#define GPUREG_026E 0x026E +#define GPUREG_026F 0x026F +#define GPUREG_0270 0x0270 +#define GPUREG_0271 0x0271 +#define GPUREG_0272 0x0272 +#define GPUREG_0273 0x0273 +#define GPUREG_0274 0x0274 +#define GPUREG_0275 0x0275 +#define GPUREG_0276 0x0276 +#define GPUREG_0277 0x0277 +#define GPUREG_0278 0x0278 +#define GPUREG_0279 0x0279 +#define GPUREG_027A 0x027A +#define GPUREG_027B 0x027B +#define GPUREG_027C 0x027C +#define GPUREG_027D 0x027D +#define GPUREG_027E 0x027E +#define GPUREG_027F 0x027F +#define GPUREG_GSH_BOOLUNIFORM 0x0280 +#define GPUREG_GSH_INTUNIFORM_I0 0x0281 +#define GPUREG_GSH_INTUNIFORM_I1 0x0282 +#define GPUREG_GSH_INTUNIFORM_I2 0x0283 +#define GPUREG_GSH_INTUNIFORM_I3 0x0284 +#define GPUREG_0285 0x0285 +#define GPUREG_0286 0x0286 +#define GPUREG_0287 0x0287 +#define GPUREG_0288 0x0288 +#define GPUREG_GSH_INPUTBUFFER_CONFIG 0x0289 +#define GPUREG_GSH_ENTRYPOINT 0x028A +#define GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW 0x028B +#define GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH 0x028C +#define GPUREG_GSH_OUTMAP_MASK 0x028D +#define GPUREG_028E 0x028E +#define GPUREG_GSH_CODETRANSFER_END 0x028F +#define GPUREG_GSH_FLOATUNIFORM_CONFIG 0x0290 +#define GPUREG_GSH_FLOATUNIFORM_DATA 0x0291 +#define GPUREG_0299 0x0299 +#define GPUREG_029A 0x029A +#define GPUREG_GSH_CODETRANSFER_CONFIG 0x029B +#define GPUREG_GSH_CODETRANSFER_DATA 0x029C +#define GPUREG_02A4 0x02A4 +#define GPUREG_GSH_OPDESCS_CONFIG 0x02A5 +#define GPUREG_GSH_OPDESCS_DATA 0x02A6 +#define GPUREG_02AE 0x02AE +#define GPUREG_02AF 0x02AF +#define GPUREG_VSH_BOOLUNIFORM 0x02B0 +#define GPUREG_VSH_INTUNIFORM_I0 0x02B1 +#define GPUREG_VSH_INTUNIFORM_I1 0x02B2 +#define GPUREG_VSH_INTUNIFORM_I2 0x02B3 +#define GPUREG_VSH_INTUNIFORM_I3 0x02B4 +#define GPUREG_02B5 0x02B5 +#define GPUREG_02B6 0x02B6 +#define GPUREG_02B7 0x02B7 +#define GPUREG_02B8 0x02B8 +#define GPUREG_VSH_INPUTBUFFER_CONFIG 0x02B9 +#define GPUREG_VSH_ENTRYPOINT 0x02BA +#define GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW 0x02BB +#define GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH 0x02BC +#define GPUREG_VSH_OUTMAP_MASK 0x02BD +#define GPUREG_02BE 0x02BE +#define GPUREG_VSH_CODETRANSFER_END 0x02BF +#define GPUREG_VSH_FLOATUNIFORM_CONFIG 0x02C0 +#define GPUREG_VSH_FLOATUNIFORM_DATA 0x02C1 +#define GPUREG_02C9 0x02C9 +#define GPUREG_02CA 0x02CA +#define GPUREG_VSH_CODETRANSFER_CONFIG 0x02CB +#define GPUREG_VSH_CODETRANSFER_DATA 0x02CC +#define GPUREG_02D4 0x02D4 +#define GPUREG_VSH_OPDESCS_CONFIG 0x02D5 +#define GPUREG_VSH_OPDESCS_DATA 0x02D6 +#define GPUREG_02DE 0x02DE +#define GPUREG_02DF 0x02DF +#define GPUREG_02E0 0x02E0 +#define GPUREG_02E1 0x02E1 +#define GPUREG_02E2 0x02E2 +#define GPUREG_02E3 0x02E3 +#define GPUREG_02E4 0x02E4 +#define GPUREG_02E5 0x02E5 +#define GPUREG_02E6 0x02E6 +#define GPUREG_02E7 0x02E7 +#define GPUREG_02E8 0x02E8 +#define GPUREG_02E9 0x02E9 +#define GPUREG_02EA 0x02EA +#define GPUREG_02EB 0x02EB +#define GPUREG_02EC 0x02EC +#define GPUREG_02ED 0x02ED +#define GPUREG_02EE 0x02EE +#define GPUREG_02EF 0x02EF +#define GPUREG_02F0 0x02F0 +#define GPUREG_02F1 0x02F1 +#define GPUREG_02F2 0x02F2 +#define GPUREG_02F3 0x02F3 +#define GPUREG_02F4 0x02F4 +#define GPUREG_02F5 0x02F5 +#define GPUREG_02F6 0x02F6 +#define GPUREG_02F7 0x02F7 +#define GPUREG_02F8 0x02F8 +#define GPUREG_02F9 0x02F9 +#define GPUREG_02FA 0x02FA +#define GPUREG_02FB 0x02FB +#define GPUREG_02FC 0x02FC +#define GPUREG_02FD 0x02FD +#define GPUREG_02FE 0x02FE +#define GPUREG_02FF 0x02FF From db11dd9bbcf429480b23b03d9659436cd8ba7013 Mon Sep 17 00:00:00 2001 From: smea Date: Thu, 18 Dec 2014 21:11:53 -0800 Subject: [PATCH 03/21] GPUCMD_AddMaskedWrite, GPUCMD_AddWrite, GPUCMD_AddMaskedWrites, GPUCMD_AddWrites, GPUCMD_AddMaskedIncrementalWrites, GPUCMD_AddIncrementalWrites --- libctru/include/3ds/gpu/gpu.h | 14 +++++++++++--- libctru/source/gpu/gpu.c | 11 +++-------- libctru/source/gpu/shdr.c | 34 +++++++++++++++++----------------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index ac25969..dc590e0 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -7,7 +7,7 @@ void GPU_Init(Handle *gsphandle); void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize); //GPUCMD -#define GPUCMD_HEADER(consec, mask, reg) (((consec)<<31)|(((mask)&0xF)<<16)|((reg)&0x3FF)) +#define GPUCMD_HEADER(incremental, mask, reg) (((incremental)<<31)|(((mask)&0xF)<<16)|((reg)&0x3FF)) void GPUCMD_SetBuffer(u32* adr, u32 size, u32 offset); void GPUCMD_SetBufferOffset(u32 offset); @@ -15,10 +15,18 @@ void GPUCMD_GetBuffer(u32** adr, u32* size, u32* offset); void GPUCMD_AddRawCommands(u32* cmd, u32 size); void GPUCMD_Run(u32* gxbuf); void GPUCMD_FlushAndRun(u32* gxbuf); -void GPUCMD_Add(u32 cmd, u32* param, u32 paramlength); -void GPUCMD_AddSingleParam(u32 cmd, u32 param); +void GPUCMD_Add(u32 header, u32* param, u32 paramlength); void GPUCMD_Finalize(); +#define GPUCMD_AddSingleParam(header, param) GPUCMD_Add((header), (u32[]){(u32)(param)}, 1) + +#define GPUCMD_AddMaskedWrite(reg, mask, val) GPUCMD_AddSingleParam(GPUCMD_HEADER(0, (mask), (reg)), (val)) +#define GPUCMD_AddWrite(reg, val) GPUCMD_AddMaskedWrite((reg), 0xF, (val)) +#define GPUCMD_AddMaskedWrites(reg, mask, vals, num) GPUCMD_Add(GPUCMD_HEADER(0, (mask), (reg)), (vals), (num)) +#define GPUCMD_AddWrites(reg, vals, num) GPUCMD_AddMaskedWrites((reg), 0xF, (vals), (num)) +#define GPUCMD_AddMaskedIncrementalWrites(reg, mask, vals, num) GPUCMD_Add(GPUCMD_HEADER(1, (mask), (reg)), (vals), (num)) +#define GPUCMD_AddIncrementalWrites(reg, vals, num) GPUCMD_AddMaskedIncrementalWrites((reg), 0xF, (vals), (num)) + //tex param #define GPU_TEXTURE_MAG_FILTER(v) (((v)&0x1)<<1) //takes a GPU_TEXTURE_FILTER_PARAM #define GPU_TEXTURE_MIN_FILTER(v) (((v)&0x1)<<2) //takes a GPU_TEXTURE_FILTER_PARAM diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 6e0586d..303da75 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -60,7 +60,7 @@ void GPUCMD_FlushAndRun(u32* gxbuf) GX_SetCommandList_Last(gxbuf, gpuCmdBuf, gpuCmdBufOffset*4, 0x0); } -void GPUCMD_Add(u32 cmd, u32* param, u32 paramlength) +void GPUCMD_Add(u32 header, u32* param, u32 paramlength) { u32 zero=0x0; @@ -73,10 +73,10 @@ void GPUCMD_Add(u32 cmd, u32* param, u32 paramlength) if(!gpuCmdBuf || gpuCmdBufOffset+paramlength+1>gpuCmdBufSize)return; paramlength--; - cmd|=(paramlength&0x7ff)<<20; + header|=(paramlength&0x7ff)<<20; gpuCmdBuf[gpuCmdBufOffset]=param[0]; - gpuCmdBuf[gpuCmdBufOffset+1]=cmd; + gpuCmdBuf[gpuCmdBufOffset+1]=header; if(paramlength)memcpy(&gpuCmdBuf[gpuCmdBufOffset+2], ¶m[1], paramlength*4); @@ -85,11 +85,6 @@ void GPUCMD_Add(u32 cmd, u32* param, u32 paramlength) if(paramlength&1)gpuCmdBuf[gpuCmdBufOffset++]=0x00000000; //alignment } -void GPUCMD_AddSingleParam(u32 cmd, u32 param) -{ - GPUCMD_Add(cmd, ¶m, 1); -} - void GPUCMD_Finalize() { GPUCMD_AddSingleParam(0x0008025E, 0x00000000); diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shdr.c index 0efe3fa..60952e8 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shdr.c @@ -77,12 +77,12 @@ void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type) u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_CONFIG)+regOffset, 0x00000000); + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, 0x00000000); int i; for(i=0;icodeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_END)+regOffset, 0x00000001); + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001); } void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type) @@ -91,7 +91,7 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type) u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_CONFIG)+regOffset, 0x00000000); + GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000); u32 param[0x20]; @@ -137,12 +137,12 @@ void DVLE_SendOutmap(DVLE_s* dvle) if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; } - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0251), numAttr-1); //? - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_024A), numAttr-1); //? - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OUTMAP_MASK)+regOffset, attrMask); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_PRIMITIVE_CONFIG), numAttr-1); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_SH_OUTMAP_TOTAL), numAttr); - GPUCMD_Add(GPUCMD_HEADER(1, 0xF, GPUREG_SH_OUTMAP_O0), param, 0x00000007); + GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? + GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask); + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1); + GPUCMD_AddWrite(GPUREG_SH_OUTMAP_TOTAL, numAttr); + GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_O0, param, 0x00000007); } void DVLE_SendConstants(DVLE_s* dvle) @@ -169,7 +169,7 @@ void DVLE_SendConstants(DVLE_s* dvle) param[0x2]=rev[1]; param[0x3]=rev[0]; - GPUCMD_Add(GPUCMD_HEADER(1, 0xF, GPUREG_VSH_FLOATUNIFORM_CONFIG)+regOffset, param, 0x00000004); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG+regOffset, param, 0x00000004); } } @@ -181,23 +181,23 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_GEOSTAGE_CONFIG), 0x00000000); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x1, GPUREG_0244), (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); DVLP_SendCode(&dvlb->DVLP, dvlb->DVLE[id].type); DVLP_SendOpDesc(&dvlb->DVLP, dvlb->DVLE[id].type); DVLE_SendConstants(dvle); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0x8, GPUREG_GEOSTAGE_CONFIG), 0x00000000); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_ENTRYPOINT)-regOffset, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); + GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT-regOffset, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0252), 0x00000000); // should all be part of DVLE_SendOutmap ? + GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // should all be part of DVLE_SendOutmap ? DVLE_SendOutmap(dvle); //? - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_0064), 0x00000001); - GPUCMD_AddSingleParam(GPUCMD_HEADER(0, 0xF, GPUREG_006F), 0x00000703); + GPUCMD_AddWrite(GPUREG_0064, 0x00000001); + GPUCMD_AddWrite(GPUREG_006F, 0x00000703); } //TODO From e3017fa30456d88f3bda09d3a9d2f4649718d16a Mon Sep 17 00:00:00 2001 From: smea Date: Thu, 18 Dec 2014 22:35:13 -0800 Subject: [PATCH 04/21] made small adjustments to shdr.c --- libctru/source/gpu/shdr.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shdr.c index 60952e8..bcbac36 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shdr.c @@ -93,7 +93,7 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type) GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000); - u32 param[0x20]; + u32 param[0x80]; int i; //TODO : should probably preprocess this @@ -108,8 +108,8 @@ void DVLE_SendOutmap(DVLE_s* dvle) u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); - u32 param[0x7]={0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F, - 0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F}; + u32 param[0x8]={0x00000000,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F, + 0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F}; int i; u8 numAttr=0; @@ -118,7 +118,7 @@ void DVLE_SendOutmap(DVLE_s* dvle) //TODO : should probably preprocess this for(i=0;ioutTableSize;i++) { - u32* out=¶m[dvle->outTableData[i].regID]; + u32* out=¶m[dvle->outTableData[i].regID+1]; if(*out==0x1F1F1F1F)numAttr++; @@ -137,12 +137,17 @@ void DVLE_SendOutmap(DVLE_s* dvle) if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; } - GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? - GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? + param[0]=numAttr; + + if(dvle->type==VERTEX_SHDR) + { + GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? + GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? + } + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask); GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1); - GPUCMD_AddWrite(GPUREG_SH_OUTMAP_TOTAL, numAttr); - GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_O0, param, 0x00000007); + GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, param, 8); } void DVLE_SendConstants(DVLE_s* dvle) @@ -169,7 +174,7 @@ void DVLE_SendConstants(DVLE_s* dvle) param[0x2]=rev[1]; param[0x3]=rev[0]; - GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG+regOffset, param, 0x00000004); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG+regOffset, param, 4); } } @@ -191,7 +196,7 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT-regOffset, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint - GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // should all be part of DVLE_SendOutmap ? + GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // gsh related ? DVLE_SendOutmap(dvle); From 05e886c9fa8bf37a08586974baea1de44b97be13 Mon Sep 17 00:00:00 2001 From: smea Date: Sat, 27 Dec 2014 11:22:48 -0800 Subject: [PATCH 05/21] fixed up DVLE_SendOutmap --- libctru/include/3ds/gpu/shdr.h | 8 ++++++-- libctru/source/gpu/shdr.c | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/libctru/include/3ds/gpu/shdr.h b/libctru/include/3ds/gpu/shdr.h index 1a89ddb..ecfe41f 100644 --- a/libctru/include/3ds/gpu/shdr.h +++ b/libctru/include/3ds/gpu/shdr.h @@ -7,10 +7,13 @@ typedef enum{ typedef enum{ RESULT_POSITION = 0x0, + RESULT_NORMALQUAT = 0x1, RESULT_COLOR = 0x2, RESULT_TEXCOORD0 = 0x3, + RESULT_TEXCOORD0W = 0x4, RESULT_TEXCOORD1 = 0x5, - RESULT_TEXCOORD2 = 0x6 + RESULT_TEXCOORD2 = 0x6, + RESULT_VIEW = 0x8 }SHDR_outType; typedef struct{ @@ -28,7 +31,8 @@ typedef struct{ typedef struct{ u16 type; u16 regID; - u32 header; + u8 mask; + u8 unk[3]; }DVLE_outEntry_s; typedef struct{ diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shdr.c index bcbac36..c38638e 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shdr.c @@ -119,19 +119,28 @@ void DVLE_SendOutmap(DVLE_s* dvle) for(i=0;ioutTableSize;i++) { u32* out=¶m[dvle->outTableData[i].regID+1]; + u32 mask=0x00000000; + u8 tmpmask=dvle->outTableData[i].mask; + mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; + mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; + mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; + mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; if(*out==0x1F1F1F1F)numAttr++; - //desc could include masking/swizzling info not currently taken into account - //also TODO : map out other output register values + u32 val=0x1F1F1F1F; switch(dvle->outTableData[i].type) { - case RESULT_POSITION: *out=0x03020100; break; - case RESULT_COLOR: *out=0x0B0A0908; break; - case RESULT_TEXCOORD0: *out=0x1F1F0D0C; break; - case RESULT_TEXCOORD1: *out=0x1F1F0F0E; break; - case RESULT_TEXCOORD2: *out=0x1F1F1716; break; + case RESULT_POSITION: val=0x03020100; break; + case RESULT_NORMALQUAT: val=0x07060504; break; + case RESULT_COLOR: val=0x0B0A0908; break; + case RESULT_TEXCOORD0: val=0x1F1F0D0C; break; + case RESULT_TEXCOORD0W: val=0x10101010; break; + case RESULT_TEXCOORD1: val=0x1F1F0F0E; break; + case RESULT_TEXCOORD2: val=0x1F1F1716; break; + case RESULT_VIEW: val=0x1F141312; break; } + *out=((*out)&~mask)|(val&mask); attrMask|=1<outTableData[i].regID; if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; From 28fee060065263814b2d4dcef538fa32403d10a1 Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 16:41:07 -0800 Subject: [PATCH 06/21] minifix --- libctru/source/gpu/gpu.c | 2 ++ libctru/source/gpu/shdr.c | 1 + 2 files changed, 3 insertions(+) diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 5a1c978..11703cb 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -468,6 +468,8 @@ void GPU_DrawArray(GPU_Primitive_t primitive, u32 n) //set primitive type GPUCMD_AddSingleParam(0x0002025E, primitive); GPUCMD_AddSingleParam(0x0002025F, 0x00000001); + //index buffer address register should be cleared (except bit 31) before drawing + GPUCMD_AddSingleParam(0x000F0227, 0x80000000); //pass number of vertices GPUCMD_AddSingleParam(0x000F0228, n); diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shdr.c index 566520d..f79fbcd 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shdr.c @@ -53,6 +53,7 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) goto exit; clean1: free(ret); + ret=NULL; exit: return ret; } From cd05cc45fffb4d45452caddcb3e9ea7c077d1e72 Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 17:15:44 -0800 Subject: [PATCH 07/21] started work on shaderProgram --- libctru/include/3ds.h | 2 +- libctru/include/3ds/gpu/shaderProgram.h | 29 ++++++ libctru/include/3ds/gpu/{shdr.h => shbin.h} | 11 +- libctru/source/gpu/gpu.c | 2 +- libctru/source/gpu/shaderProgram.c | 108 ++++++++++++++++++++ libctru/source/gpu/{shdr.c => shbin.c} | 6 +- 6 files changed, 147 insertions(+), 11 deletions(-) create mode 100644 libctru/include/3ds/gpu/shaderProgram.h rename libctru/include/3ds/gpu/{shdr.h => shbin.h} (88%) create mode 100644 libctru/source/gpu/shaderProgram.c rename libctru/source/gpu/{shdr.c => shbin.c} (97%) diff --git a/libctru/include/3ds.h b/libctru/include/3ds.h index 1713255..83e6d05 100644 --- a/libctru/include/3ds.h +++ b/libctru/include/3ds.h @@ -34,7 +34,7 @@ extern "C" { #include <3ds/gpu/gx.h> #include <3ds/gpu/gpu.h> -#include <3ds/gpu/shdr.h> +#include <3ds/gpu/shbin.h> #include <3ds/sdmc.h> diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h new file mode 100644 index 0000000..e5f2cba --- /dev/null +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -0,0 +1,29 @@ +#pragma once + +#include <3ds/types.h> +#include <3ds/gpu/shbin.h> + +// this structure describes an instance of either a vertex or geometry shader +typedef struct +{ + DVLE_s* dvle; + u16 boolUniforms; +}shaderInstance_s; + +// this structure describes an instance of a full shader program +typedef struct +{ + shaderInstance_s* vertexShader; + shaderInstance_s* geometryShader; +}shaderProgram_s; + +Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle); +Result shaderInstanceFree(shaderInstance_s* si); +Result shaderInstanceSetBool(shaderInstance_s* si, int id, bool value); +Result shaderInstanceGetBool(shaderInstance_s* si, int id, bool* value); + +Result shaderProgramInit(shaderProgram_s* sp); +Result shaderProgramFree(shaderProgram_s* sp); +Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle); +Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle); +Result shaderProgramUse(shaderProgram_s* sp); diff --git a/libctru/include/3ds/gpu/shdr.h b/libctru/include/3ds/gpu/shbin.h similarity index 88% rename from libctru/include/3ds/gpu/shdr.h rename to libctru/include/3ds/gpu/shbin.h index ecfe41f..89cbde9 100644 --- a/libctru/include/3ds/gpu/shdr.h +++ b/libctru/include/3ds/gpu/shbin.h @@ -3,7 +3,7 @@ typedef enum{ VERTEX_SHDR=0x0, GEOMETRY_SHDR=0x1 -}SHDR_type; +}DVLE_type; typedef enum{ RESULT_POSITION = 0x0, @@ -14,7 +14,7 @@ typedef enum{ RESULT_TEXCOORD1 = 0x5, RESULT_TEXCOORD2 = 0x6, RESULT_VIEW = 0x8 -}SHDR_outType; +}DVLE_outputAttribute_t; typedef struct{ u32 codeSize; @@ -42,7 +42,7 @@ typedef struct{ }DVLE_uniformEntry_s; typedef struct{ - SHDR_type type; + DVLE_type type; u32 mainOffset, endmainOffset; u32 constTableSize; DVLE_constEntry_s* constTableData; @@ -59,14 +59,13 @@ typedef struct{ DVLE_s* DVLE; }DVLB_s; - DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize); void SHDR_UseProgram(DVLB_s* dvlb, u8 id); void SHDR_FreeDVLB(DVLB_s* dvlb); s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID); -void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type); -void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type); +void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type); +void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type); void DVLE_SendOutmap(DVLE_s* dvle); void DVLE_SendConstants(DVLE_s* dvle); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 11703cb..ba36fe1 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -7,7 +7,7 @@ #include <3ds/types.h> #include <3ds/gpu/gpu.h> #include <3ds/gpu/gx.h> -#include <3ds/gpu/shdr.h> +#include <3ds/gpu/shbin.h> u32* gpuCmdBuf; u32 gpuCmdBufSize; diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c new file mode 100644 index 0000000..5a28802 --- /dev/null +++ b/libctru/source/gpu/shaderProgram.c @@ -0,0 +1,108 @@ +#include +#include <3ds/types.h> +#include <3ds/gpu/shaderProgram.h> + +Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) +{ + if(!si || !dvle)return -1; + + si->dvle = dvle; + si->boolUniforms = 0xFFFF; + + return 0; +} + +Result shaderInstanceFree(shaderInstance_s* si) +{ + if(!si)return -1; + + free(si); + + return 0; +} + +Result shaderInstanceSetBool(shaderInstance_s* si, int id, bool value) +{ + if(!si)return -1; + if(id<0 || id>15)return -2; + + si->boolUniforms &= ~(1<boolUniforms |= (!value)<15)return -2; + if(!value)return -3; + + *value = !((si->boolUniforms>>id)&1); + + return 0; +} + +Result shaderProgramInit(shaderProgram_s* sp) +{ + if(!sp)return -1; + + sp->vertexShader = NULL; + sp->geometryShader = NULL; + + return 0; +} + +Result shaderProgramFree(shaderProgram_s* sp) +{ + if(!sp)return -1; + + shaderInstanceFree(sp->vertexShader); + shaderInstanceFree(sp->geometryShader); + + free(sp); + + return 0; +} + +Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle) +{ + if(!sp || !dvle)return -1; + if(dvle->type != VERTEX_SHDR)return -2; + + if(sp->vertexShader)shaderInstanceFree(sp->vertexShader); + + sp->vertexShader = (shaderInstance_s*)malloc(sizeof(shaderInstance_s)); + if(!sp->vertexShader)return -3; + + return shaderInstanceInit(sp->vertexShader, dvle); +} + +Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle) +{ + if(!sp || !dvle)return -1; + if(dvle->type != GEOMETRY_SHDR)return -2; + + if(sp->geometryShader)shaderInstanceFree(sp->geometryShader); + + sp->geometryShader = (shaderInstance_s*)malloc(sizeof(shaderInstance_s)); + if(!sp->geometryShader)return -3; + + return shaderInstanceInit(sp->geometryShader, dvle); +} + +Result shaderProgramUse(shaderProgram_s* sp) +{ + if(!sp)return -1; + + if(!sp->vertexShader)return -2; + + if(!sp->geometryShader) + { + // only deal with vertex shader + }else{ + // setup both vertex and geometry shader + } + + return 0; +} diff --git a/libctru/source/gpu/shdr.c b/libctru/source/gpu/shbin.c similarity index 97% rename from libctru/source/gpu/shdr.c rename to libctru/source/gpu/shbin.c index f79fbcd..e825f85 100644 --- a/libctru/source/gpu/shdr.c +++ b/libctru/source/gpu/shbin.c @@ -6,7 +6,7 @@ #include #include <3ds/types.h> #include <3ds/gpu/gpu.h> -#include <3ds/gpu/shdr.h> +#include <3ds/gpu/shbin.h> //please don't feed this an invalid SHBIN DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) @@ -73,7 +73,7 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) return -1; } -void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type) +void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; @@ -87,7 +87,7 @@ void DVLP_SendCode(DVLP_s* dvlp, SHDR_type type) GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001); } -void DVLP_SendOpDesc(DVLP_s* dvlp, SHDR_type type) +void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; From 80e6bcfd34c8033a0a944ab13c9d8974d4f139bf Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 19:16:23 -0800 Subject: [PATCH 08/21] shaderProgram --- libctru/include/3ds.h | 1 + libctru/include/3ds/gpu/gpu.h | 10 ++- libctru/include/3ds/gpu/shaderProgram.h | 9 ++ libctru/include/3ds/gpu/shbin.h | 19 ++++- libctru/source/gpu/gpu.c | 31 +++++++ libctru/source/gpu/shaderProgram.c | 109 +++++++++++++++++++++++- libctru/source/gpu/shbin.c | 81 ++++++++---------- 7 files changed, 211 insertions(+), 49 deletions(-) diff --git a/libctru/include/3ds.h b/libctru/include/3ds.h index 83e6d05..20e6b28 100644 --- a/libctru/include/3ds.h +++ b/libctru/include/3ds.h @@ -35,6 +35,7 @@ extern "C" { #include <3ds/gpu/gx.h> #include <3ds/gpu/gpu.h> #include <3ds/gpu/shbin.h> +#include <3ds/gpu/shaderProgram.h> #include <3ds/sdmc.h> diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index dc590e0..6373240 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -203,6 +203,11 @@ typedef enum{ GPU_UNKPRIM = 0x0300 // ? }GPU_Primitive_t; +typedef enum{ + GPU_VERTEX_SHADER=0x0, + GPU_GEOMETRY_SHADER=0x1 +}GPU_SHADER_TYPE; + void GPU_SetUniform(u32 startreg, u32* data, u32 numreg); void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h); @@ -232,5 +237,8 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16 void GPU_DrawArray(GPU_Primitive_t primitive, u32 n); void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n); - void GPU_FinishDrawing(); + +void GPU_SetShaderOutmap(u32 outmapData[8]); +void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length); +void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length); diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h index e5f2cba..b6f0315 100644 --- a/libctru/include/3ds/gpu/shaderProgram.h +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -3,11 +3,20 @@ #include <3ds/types.h> #include <3ds/gpu/shbin.h> +typedef struct +{ + u32 id; + u32 data[3]; +}float24Uniform_s; + // this structure describes an instance of either a vertex or geometry shader typedef struct { DVLE_s* dvle; u16 boolUniforms; + u32 intUniforms[4]; + float24Uniform_s* float24Uniforms; + u8 numFloat24Uniforms; }shaderInstance_s; // this structure describes an instance of a full shader program diff --git a/libctru/include/3ds/gpu/shbin.h b/libctru/include/3ds/gpu/shbin.h index 89cbde9..ed41114 100644 --- a/libctru/include/3ds/gpu/shbin.h +++ b/libctru/include/3ds/gpu/shbin.h @@ -1,10 +1,18 @@ #pragma once +#include <3ds/gpu/gpu.h> + typedef enum{ - VERTEX_SHDR=0x0, - GEOMETRY_SHDR=0x1 + VERTEX_SHDR=GPU_VERTEX_SHADER, + GEOMETRY_SHDR=GPU_GEOMETRY_SHADER }DVLE_type; +typedef enum{ + DVLE_CONST_BOOL=0x0, + DVLE_CONST_u8=0x1, + DVLE_CONST_FLOAT24=0x2, +}DVLE_constantType; + typedef enum{ RESULT_POSITION = 0x0, RESULT_NORMALQUAT = 0x1, @@ -24,7 +32,8 @@ typedef struct{ }DVLP_s; typedef struct{ - u32 header; + u16 type; + u16 id; u32 data[4]; }DVLE_constEntry_s; @@ -43,6 +52,7 @@ typedef struct{ typedef struct{ DVLE_type type; + DVLP_s* dvlp; u32 mainOffset, endmainOffset; u32 constTableSize; DVLE_constEntry_s* constTableData; @@ -51,6 +61,8 @@ typedef struct{ u32 uniformTableSize; DVLE_uniformEntry_s* uniformTableData; char* symbolTableData; + u8 outmapMask; + u32 outmapData[8]; }DVLE_s; typedef struct{ @@ -69,3 +81,4 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type); void DVLE_SendOutmap(DVLE_s* dvle); void DVLE_SendConstants(DVLE_s* dvle); +void DVLE_GenerateOutmap(DVLE_s* dvle); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index ba36fe1..b1f3dbb 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -509,3 +509,34 @@ void GPU_FinishDrawing() GPUCMD_AddSingleParam(0x000F0110, 0x00000001); GPUCMD_AddSingleParam(0x000F0063, 0x00000001); } + +void GPU_SetShaderOutmap(u32 outmapData[8]) +{ + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1); + GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8); +} + +void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length) +{ + if(!data)return; + + u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, offset); + + int i; + for(i=0;i +#include #include <3ds/types.h> +#include <3ds/gpu/registers.h> #include <3ds/gpu/shaderProgram.h> Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) @@ -7,7 +9,64 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) if(!si || !dvle)return -1; si->dvle = dvle; + si->boolUniforms = 0xFFFF; + si->intUniforms[0] = 0x00000000; + si->intUniforms[1] = 0x00000000; + si->intUniforms[2] = 0x00000000; + si->intUniforms[3] = 0x00000000; + si->float24Uniforms = NULL; + + int i; + DVLE_constEntry_s* cnst = dvle->constTableData; + if(cnst) + { + int float24cnt=0; + for(i=0; iconstTableSize; i++) + { + switch(cnst[i].type) + { + case DVLE_CONST_BOOL: + shaderInstanceSetBool(si, cnst[i].id, cnst[i].data[0]&1); + break; + case DVLE_CONST_u8: + if(cnst[i].id<4)si->intUniforms[cnst[i].id] = cnst[i].data[0]; + break; + case DVLE_CONST_FLOAT24: + float24cnt++; + break; + } + } + + if(float24cnt) + { + si->float24Uniforms = malloc(sizeof(float24Uniform_s)*float24cnt); + if(!si->float24Uniforms) + { + float24cnt = 0; + u32 rev[3]; + u8* rev8=(u8*)rev; + for(i=0; iconstTableSize; i++) + { + if(cnst[i].type==DVLE_CONST_FLOAT24) + { + memcpy(&rev8[0], &cnst[i].data[0], 3); + memcpy(&rev8[3], &cnst[i].data[1], 3); + memcpy(&rev8[6], &cnst[i].data[2], 3); + memcpy(&rev8[9], &cnst[i].data[3], 3); + + si->float24Uniforms[float24cnt].id = cnst[i].id; + si->float24Uniforms[float24cnt].data[0] = rev[2]; + si->float24Uniforms[float24cnt].data[1] = rev[1]; + si->float24Uniforms[float24cnt].data[2] = rev[0]; + + float24cnt++; + } + } + } + si->numFloat24Uniforms = float24cnt; + } + } return 0; } @@ -16,6 +75,7 @@ Result shaderInstanceFree(shaderInstance_s* si) { if(!si)return -1; + if(si->float24Uniforms)free(si->float24Uniforms); free(si); return 0; @@ -97,11 +157,58 @@ Result shaderProgramUse(shaderProgram_s* sp) if(!sp->vertexShader)return -2; + int i; + + // setup vertex shader stuff no matter what + const DVLE_s* vshDvle = sp->vertexShader->dvle; + const DVLP_s* vshDvlp = vshDvle->dvlp; + GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize); + GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize); + GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4); + for(i=0; ivertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4); + GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF)); + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask); + + GPUCMD_AddWrite(GPUREG_024A, vshDvle->outmapData[0]-1); // ? + GPUCMD_AddWrite(GPUREG_0251, vshDvle->outmapData[0]-1); // ? + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); // ? + GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // ? + if(!sp->geometryShader) { - // only deal with vertex shader + // finish setting up vertex shader alone + GPU_SetShaderOutmap((u32*)vshDvle->outmapData); + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000); + + GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? + GPUCMD_AddWrite(GPUREG_006F, 0x00000703); // ? }else{ // setup both vertex and geometry shader + const DVLE_s* gshDvle = sp->geometryShader->dvle; + const DVLP_s* gshDvlp = gshDvle->dvlp; + GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize); + GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize); + GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4); + for(i=0; igeometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4); + GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF)); + GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask); + + GPU_SetShaderOutmap((u32*)gshDvle->outmapData); + + //GSH input attributes stuff + GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000003); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2); + + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001); + + GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? + GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ? } return 0; diff --git a/libctru/source/gpu/shbin.c b/libctru/source/gpu/shbin.c index e825f85..9c51f9b 100644 --- a/libctru/source/gpu/shbin.c +++ b/libctru/source/gpu/shbin.c @@ -25,15 +25,18 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) ret->DVLP.codeSize=dvlpData[3]; ret->DVLP.codeData=&dvlpData[dvlpData[2]/4]; ret->DVLP.opdescSize=dvlpData[5]; - ret->DVLP.opcdescData=&dvlpData[dvlpData[4]/4]; + ret->DVLP.opcdescData=(u32*)malloc(sizeof(u32)*ret->DVLP.opdescSize); + if(!ret->DVLP.opcdescData)goto clean2; + int i; for(i=0;iDVLP.opdescSize;i++)ret->DVLP.opcdescData[i]=dvlpData[dvlpData[4]/4+i*2]; //parse DVLE - int i; for(i=0;inumDVLE;i++) { DVLE_s* dvle=&ret->DVLE[i]; u32* dvleData=&shbinData[shbinData[2+i]/4]; + dvle->dvlp=&ret->DVLP; + dvle->type=(dvleData[1]>>16)&0xFF; dvle->mainOffset=dvleData[2]; dvle->endmainOffset=dvleData[3]; @@ -48,9 +51,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) dvle->uniformTableData=(DVLE_uniformEntry_s*)&dvleData[dvleData[12]/4]; dvle->symbolTableData=(char*)&dvleData[dvleData[14]/4]; + + DVLE_GenerateOutmap(dvle); } goto exit; + clean2: + free(ret->DVLE); clean1: free(ret); ret=NULL; @@ -58,6 +65,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) return ret; } +//TODO +void SHDR_FreeDVLB(DVLB_s* dvlb) +{ + if(!dvlb)return; + +} + s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) { if(!dvlb || !name)return -1; @@ -76,51 +90,31 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; - - u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - - GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, 0x00000000); - - int i; - for(i=0;icodeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80); - - GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001); + + GPU_SendShaderCode(type, dvlp->codeData, 0, dvlp->codeSize); } void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type) { if(!dvlp)return; - u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0); - - GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000); - - u32 param[0x80]; - - int i; - //TODO : should probably preprocess this - for(i=0;iopdescSize;i++)param[i]=dvlp->opcdescData[i*2]; - - GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_DATA)+regOffset, param, dvlp->opdescSize); + GPU_SendOperandDescriptors(type, dvlp->opcdescData, 0, dvlp->opdescSize); } -void DVLE_SendOutmap(DVLE_s* dvle) +void DVLE_GenerateOutmap(DVLE_s* dvle) { if(!dvle)return; - u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); - - u32 param[0x8]={0x00000000,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F, - 0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F}; + memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData)); int i; u8 numAttr=0; u8 maxAttr=0; u8 attrMask=0; - //TODO : should probably preprocess this + for(i=0;ioutTableSize;i++) { - u32* out=¶m[dvle->outTableData[i].regID+1]; + u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1]; u32 mask=0x00000000; u8 tmpmask=dvle->outTableData[i].mask; mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1; @@ -148,17 +142,24 @@ void DVLE_SendOutmap(DVLE_s* dvle) if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1; } - param[0]=numAttr; + dvle->outmapData[0]=numAttr; + dvle->outmapMask=attrMask; +} + +void DVLE_SendOutmap(DVLE_s* dvle) +{ + if(!dvle)return; + + u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); if(dvle->type==VERTEX_SHDR) { - GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //? - GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //? + GPUCMD_AddWrite(GPUREG_024A, dvle->outmapData[0]-1); //? + GPUCMD_AddWrite(GPUREG_0251, dvle->outmapData[0]-1); //? } - GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask); - GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1); - GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, param, 8); + GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, dvle->outmapMask); + GPU_SetShaderOutmap(dvle->outmapData); } void DVLE_SendConstants(DVLE_s* dvle) @@ -180,7 +181,7 @@ void DVLE_SendConstants(DVLE_s* dvle) memcpy(&rev8[6], &cnst->data[2], 3); memcpy(&rev8[9], &cnst->data[3], 3); - param[0x0]=(cnst->header>>16)&0xFF; + param[0x0]=(cnst->id)&0xFF; param[0x1]=rev[2]; param[0x2]=rev[1]; param[0x3]=rev[0]; @@ -196,7 +197,6 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); @@ -215,10 +215,3 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id) GPUCMD_AddWrite(GPUREG_0064, 0x00000001); GPUCMD_AddWrite(GPUREG_006F, 0x00000703); } - -//TODO -void SHDR_FreeDVLB(DVLB_s* dvlb) -{ - if(!dvlb)return; - -} From a1830973c7a10b06b5b193edc367f288ca97f717 Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 19:29:02 -0800 Subject: [PATCH 09/21] minifix --- libctru/source/gpu/shaderProgram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index 769ee79..338c119 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -41,7 +41,7 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) if(float24cnt) { si->float24Uniforms = malloc(sizeof(float24Uniform_s)*float24cnt); - if(!si->float24Uniforms) + if(si->float24Uniforms) { float24cnt = 0; u32 rev[3]; @@ -55,7 +55,7 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle) memcpy(&rev8[6], &cnst[i].data[2], 3); memcpy(&rev8[9], &cnst[i].data[3], 3); - si->float24Uniforms[float24cnt].id = cnst[i].id; + si->float24Uniforms[float24cnt].id = cnst[i].id&0xFF; si->float24Uniforms[float24cnt].data[0] = rev[2]; si->float24Uniforms[float24cnt].data[1] = rev[1]; si->float24Uniforms[float24cnt].data[2] = rev[0]; From d195917ec1bc7a3c25cb12bee1d3480f1e9144ef Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 19:39:37 -0800 Subject: [PATCH 10/21] minifix + geoshader stride --- libctru/include/3ds/gpu/shaderProgram.h | 3 ++- libctru/source/gpu/shaderProgram.c | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h index b6f0315..c69e14e 100644 --- a/libctru/include/3ds/gpu/shaderProgram.h +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -24,6 +24,7 @@ typedef struct { shaderInstance_s* vertexShader; shaderInstance_s* geometryShader; + u8 geometryShaderInputStride; }shaderProgram_s; Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle); @@ -34,5 +35,5 @@ Result shaderInstanceGetBool(shaderInstance_s* si, int id, bool* value); Result shaderProgramInit(shaderProgram_s* sp); Result shaderProgramFree(shaderProgram_s* sp); Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle); -Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle); +Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride); Result shaderProgramUse(shaderProgram_s* sp); diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index 338c119..cb53476 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -87,7 +87,7 @@ Result shaderInstanceSetBool(shaderInstance_s* si, int id, bool value) if(id<0 || id>15)return -2; si->boolUniforms &= ~(1<boolUniforms |= (!value)<boolUniforms |= (value)<vertexShader, dvle); } -Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle) +Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride) { if(!sp || !dvle)return -1; if(dvle->type != GEOMETRY_SHDR)return -2; @@ -148,6 +148,8 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle) sp->geometryShader = (shaderInstance_s*)malloc(sizeof(shaderInstance_s)); if(!sp->geometryShader)return -3; + sp->geometryShaderInputStride = stride; + return shaderInstanceInit(sp->geometryShader, dvle); } @@ -201,7 +203,7 @@ Result shaderProgramUse(shaderProgram_s* sp) GPU_SetShaderOutmap((u32*)gshDvle->outmapData); //GSH input attributes stuff - GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000003); + GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geometryShaderInputStride-1)); GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2); GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002); From 8b84747df3c3cbee5b6e603a79e794902a424785 Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 21:14:03 -0800 Subject: [PATCH 11/21] updated gpu.c to use new convention --- libctru/include/3ds/gpu/registers.h | 1 - libctru/source/gpu/gpu.c | 286 ++++++++++++++-------------- 2 files changed, 141 insertions(+), 146 deletions(-) diff --git a/libctru/include/3ds/gpu/registers.h b/libctru/include/3ds/gpu/registers.h index abbf61e..1390a8f 100644 --- a/libctru/include/3ds/gpu/registers.h +++ b/libctru/include/3ds/gpu/registers.h @@ -512,7 +512,6 @@ #define GPUREG_01FD 0x01FD #define GPUREG_01FE 0x01FE #define GPUREG_01FF 0x01FF -#define pipeline 0xGeometry #define GPUREG_ATTRIBBUFFERS_LOC 0x0200 #define GPUREG_ATTRIBBUFFERS_FORMAT_LOW 0x0201 #define GPUREG_ATTRIBBUFFERS_FORMAT_HIGH 0x0202 diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index b1f3dbb..e32265b 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -90,11 +90,11 @@ void GPUCMD_Add(u32 header, u32* param, u32 paramlength) void GPUCMD_Finalize() { - GPUCMD_AddSingleParam(0x0008025E, 0x00000000); - GPUCMD_AddSingleParam(0x000F0111, 0x00000001); - GPUCMD_AddSingleParam(0x000F0110, 0x00000001); - GPUCMD_AddSingleParam(0x000F0010, 0x12345678); - GPUCMD_AddSingleParam(0x000F0010, 0x12345678); //not the cleanest way of guaranteeing 0x10-byte size but whatever good enough for now + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0x00000000); + GPUCMD_AddWrite(GPUREG_0111, 0x00000001); + GPUCMD_AddWrite(GPUREG_0110, 0x00000001); + GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678); + GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678); //not the cleanest way of guaranteeing 0x10-byte size but whatever good enough for now } extern u32 gpuResetSequence[]; @@ -109,55 +109,55 @@ void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize) GPUCMD_SetBuffer(gpuBuf, gpuBufSize, 0); - GPUCMD_AddSingleParam(0x000D0080, 0x00011000); + GPUCMD_AddMaskedWrite(GPUREG_TEXUNITS_CONFIG, 0xD, 0x00011000); - for(i=0x1;i<0xC;i++)GPUCMD_AddSingleParam(0x000F0080+i, 0x00000000); - GPUCMD_AddSingleParam(0x000F008C, 0x00FF0000); - GPUCMD_AddSingleParam(0x000F008D, 0x00000000); - GPUCMD_AddSingleParam(0x000F008E, 0x00000000); + for(i=0x1;i<0xC;i++)GPUCMD_AddWrite(GPUREG_TEXUNITS_CONFIG+i, 0x00000000); + GPUCMD_AddWrite(GPUREG_008C, 0x00FF0000); + GPUCMD_AddWrite(GPUREG_008D, 0x00000000); + GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, 0x00000000); - for(i=0x0;i<0xF;i++)GPUCMD_AddSingleParam(0x000F0090+i, 0x00000000); + for(i=0x0;i<0xF;i++)GPUCMD_AddWrite(GPUREG_0090+i, 0x00000000); - GPUCMD_AddSingleParam(0x00010245, 0x00000001); - GPUCMD_AddSingleParam(0x00010244, 0x00000000); - GPUCMD_AddSingleParam(0x00080289, 0x80000000); - GPUCMD_AddSingleParam(0x000B0229, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0245, 0x1, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x8, 0x80000000); + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xB, 0x00000000); - GPUCMD_AddSingleParam(0x000F0252, 0x00000000); - GPUCMD_AddSingleParam(0x000F0251, 0x00000000); - GPUCMD_AddSingleParam(0x000F0254, 0x00000000); - GPUCMD_AddSingleParam(0x00010253, 0x00000000); + GPUCMD_AddWrite(GPUREG_0252, 0x00000000); + GPUCMD_AddWrite(GPUREG_0251, 0x00000000); + GPUCMD_AddWrite(GPUREG_0254, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0253, 0x1, 0x00000000); - GPUCMD_AddSingleParam(0x000F0242, 0x00000000); - GPUCMD_AddSingleParam(0x000F024A, 0x00000000); + GPUCMD_AddWrite(GPUREG_0242, 0x00000000); + GPUCMD_AddWrite(GPUREG_024A, 0x00000000); - GPUCMD_AddSingleParam(0x0005025E, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x5, 0x00000000); - GPUCMD_Add(0x800F0101, zero, 0x00000007); + GPUCMD_AddIncrementalWrites(GPUREG_BLEND_CONFIG, zero, 0x00000007); - GPUCMD_AddSingleParam(0x000F011F, 0x00010140); - GPUCMD_AddSingleParam(0x000F0100, 0x00E40100); - GPUCMD_AddSingleParam(0x000F0101, 0x01010000); - GPUCMD_AddSingleParam(0x000F0107, 0x00001F40); - GPUCMD_AddSingleParam(0x000F0105, 0xFF00FF10); + GPUCMD_AddWrite(GPUREG_011F, 0x00010140); + GPUCMD_AddWrite(GPUREG_COLOROUTPUT_CONFIG, 0x00E40100); + GPUCMD_AddWrite(GPUREG_BLEND_CONFIG, 0x01010000); + GPUCMD_AddWrite(GPUREG_DEPTHTEST_CONFIG, 0x00001F40); + GPUCMD_AddWrite(GPUREG_STENCILTEST_CONFIG, 0xFF00FF10); - GPUCMD_AddSingleParam(0x00010061, 0x00000003); - GPUCMD_AddSingleParam(0x00010062, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0061, 0x1, 0x00000003); + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0x00000000); - GPUCMD_AddSingleParam(0x000F0065, 0x00000000); - GPUCMD_AddSingleParam(0x000F0066, 0x00000000); - GPUCMD_AddSingleParam(0x000F0067, 0x00000000); + GPUCMD_AddWrite(GPUREG_SCISSORTEST_MODE, 0x00000000); + GPUCMD_AddWrite(GPUREG_SCISSORTEST_POS, 0x00000000); + GPUCMD_AddWrite(GPUREG_SCISSORTEST_DIM, 0x00000000); - GPUCMD_AddSingleParam(0x00010118, 0x00000000); - GPUCMD_AddSingleParam(0x000F011B, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0118, 0x1, 0x00000000); + GPUCMD_AddWrite(GPUREG_011B, 0x00000000); - GPUCMD_AddSingleParam(0x0007006A, 0x00FFFFFF); + GPUCMD_AddMaskedWrite(GPUREG_006A, 0x7, 0x00FFFFFF); - GPUCMD_AddSingleParam(0x000F0102, 0x00000003); + GPUCMD_AddWrite(GPUREG_COLORLOGICOP_CONFIG, 0x00000003); - GPUCMD_AddSingleParam(0x00080126, 0x03000000); + GPUCMD_AddMaskedWrite(GPUREG_0126, 0x8, 0x03000000); - GPUCMD_Add(0x800F0040, zero, 0x00000010); + GPUCMD_AddIncrementalWrites(GPUREG_FACECULLING_CONFIG, zero, 0x00000010); param[0x0]=0x1F1F1F1F; param[0x1]=0x1F1F1F1F; @@ -166,54 +166,51 @@ void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize) param[0x4]=0x1F1F1F1F; param[0x5]=0x1F1F1F1F; param[0x6]=0x1F1F1F1F; - GPUCMD_Add(0x800F0050, param, 0x00000007); + GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_O0, param, 0x00000007); - GPUCMD_AddSingleParam(0x000F0058, 0x00000100); - GPUCMD_AddSingleParam(0x000F004C, 0x00000001); - GPUCMD_AddSingleParam(0x000F006F, 0x00000000); + GPUCMD_AddWrite(GPUREG_0058, 0x00000100); + GPUCMD_AddWrite(GPUREG_004C, 0x00000001); + GPUCMD_AddWrite(GPUREG_006F, 0x00000000); - GPUCMD_AddSingleParam(0x00020060, 0x00000000); - GPUCMD_AddSingleParam(0x000C0069, 0x00020000); + GPUCMD_AddMaskedWrite(GPUREG_0060, 0x2, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0069, 0xC, 0x00020000); - GPUCMD_AddSingleParam(0x000F0113, 0x0000000F); - GPUCMD_AddSingleParam(0x000F0112, 0x0000000F); - GPUCMD_AddSingleParam(0x000F0114, 0x00000003); - GPUCMD_AddSingleParam(0x000F0115, 0x00000003); + GPUCMD_AddWrite(GPUREG_0113, 0x0000000F); + GPUCMD_AddWrite(GPUREG_0112, 0x0000000F); + GPUCMD_AddWrite(GPUREG_0114, 0x00000003); + GPUCMD_AddWrite(GPUREG_0115, 0x00000003); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000000); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000100); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000200); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000300); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000400); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000500); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F01C5, 0x00000600); - for(i=0;i<32;i++)GPUCMD_Add(0x800F01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000000); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000100); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000200); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000300); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000400); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000500); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_01C5, 0x00000600); + for(i=0;i<32;i++)GPUCMD_AddIncrementalWrites(GPUREG_01C8, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F0290, 0x80000000); - for(i=0;i<48;i++)GPUCMD_Add(0x800F0291, zero, 0x00000008); - GPUCMD_AddSingleParam(0x000F02CB, 0x00000000); + GPUCMD_AddWrite(GPUREG_GSH_FLOATUNIFORM_CONFIG, 0x80000000); + for(i=0;i<48;i++)GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_DATA, zero, 0x00000008); + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG, 0x00000000); for(i=0;i<4;i++)GPUCMD_Add(0x000F02CC, zero, 0x00000080); - GPUCMD_AddSingleParam(0x000F029B, 0x00000200); + GPUCMD_AddWrite(GPUREG_GSH_CODETRANSFER_CONFIG, 0x00000200); for(i=0;i<28;i++)GPUCMD_Add(0x000F029C, zero, 0x00000080); + GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END, 0x00000000); - GPUCMD_AddSingleParam(0x000F02BF, 0x00000000); - GPUCMD_AddSingleParam(0x000F02B1, 0x00000000); - GPUCMD_AddSingleParam(0x000F02B2, 0x00000000); - GPUCMD_AddSingleParam(0x000F02B3, 0x00000000); - GPUCMD_AddSingleParam(0x000F02B4, 0x00000000); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, zero, 4); param[0x0]=0xFFFFFFFF; param[0x1]=0xFFFFFFFF; - GPUCMD_Add(0x800F028B, param, 0x00000002); + GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, param, 0x00000002); - GPUCMD_Add(0x800F0205, zero, 0x00000024); + GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFER0_CONFIG2, zero, 0x00000024); for(i=0;i>3; param[0x1]=((u32)colorBuffer)>>3; param[0x2]=f116e; - GPUCMD_Add(0x800F011C, param, 0x00000003); + GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, param, 0x00000003); - GPUCMD_AddSingleParam(0x000F006E, f116e); - GPUCMD_AddSingleParam(0x000F0116, 0x00000003); //depth buffer format - GPUCMD_AddSingleParam(0x000F0117, 0x00000002); //color buffer format - GPUCMD_AddSingleParam(0x000F011B, 0x00000000); //? + GPUCMD_AddWrite(GPUREG_006E, f116e); + GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 0x00000003); //depth buffer format + GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0x00000002); //color buffer format + GPUCMD_AddWrite(GPUREG_011B, 0x00000000); //? param[0x0]=f32tof24(fw/2); param[0x1]=computeInvValue(fw); param[0x2]=f32tof24(fh/2); param[0x3]=computeInvValue(fh); - GPUCMD_Add(0x800F0041, param, 0x00000004); + GPUCMD_AddIncrementalWrites(GPUREG_0041, param, 0x00000004); - GPUCMD_AddSingleParam(0x000F0068, (y<<16)|(x&0xFFFF)); + GPUCMD_AddWrite(GPUREG_0068, (y<<16)|(x&0xFFFF)); param[0x0]=0x00000000; param[0x1]=0x00000000; param[0x2]=((h-1)<<16)|((w-1)&0xFFFF); - GPUCMD_Add(0x800F0065, param, 0x00000003); + GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, param, 0x00000003); //enable depth buffer param[0x0]=0x0000000F; param[0x1]=0x0000000F; param[0x2]=0x00000002; param[0x3]=0x00000002; - GPUCMD_Add(0x800F0112, param, 0x00000004); + GPUCMD_AddIncrementalWrites(GPUREG_0112, param, 0x00000004); } void GPU_SetScissorTest(GPU_SCISSORMODE mode, u32 x, u32 y, u32 w, u32 h) @@ -315,59 +312,59 @@ void GPU_SetScissorTest(GPU_SCISSORMODE mode, u32 x, u32 y, u32 w, u32 h) param[0x0] = mode; param[0x1] = (y<<16)|(x&0xFFFF); param[0x2] = ((h-1)<<16)|((w-1)&0xFFFF); - GPUCMD_Add(0x800F0065, param, 0x00000003); + GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, param, 0x00000003); } void GPU_DepthRange(float nearVal, float farVal) { - GPUCMD_AddSingleParam(0x000F006D, 0x00000001); //? - GPUCMD_AddSingleParam(0x000F004D, f32tof24(nearVal)); - GPUCMD_AddSingleParam(0x000F004E, f32tof24(farVal)); + GPUCMD_AddWrite(GPUREG_006D, 0x00000001); //? + GPUCMD_AddWrite(GPUREG_DEPTHRANGE_NEAR, f32tof24(nearVal)); + GPUCMD_AddWrite(GPUREG_DEPTHRANGE_FAR, f32tof24(farVal)); } void GPU_SetAlphaTest(bool enable, GPU_TESTFUNC function, u8 ref) { - GPUCMD_AddSingleParam(0x000F0104, (enable&1)|((function&7)<<4)|(ref<<8)); + GPUCMD_AddWrite(GPUREG_ALPHATEST_CONFIG, (enable&1)|((function&7)<<4)|(ref<<8)); } void GPU_SetStencilTest(bool enable, GPU_TESTFUNC function, u8 ref, u8 mask, u8 replace) { - GPUCMD_AddSingleParam(0x000F0105, (enable&1)|((function&7)<<4)|(replace<<8)|(ref<<16)|(mask<<24)); + GPUCMD_AddWrite(GPUREG_STENCILTEST_CONFIG, (enable&1)|((function&7)<<4)|(replace<<8)|(ref<<16)|(mask<<24)); } void GPU_SetStencilOp(GPU_STENCILOP sfail, GPU_STENCILOP dfail, GPU_STENCILOP pass) { - GPUCMD_AddSingleParam(0x000F0106, sfail | (dfail << 4) | (pass << 8)); + GPUCMD_AddWrite(GPUREG_STENCILOP_CONFIG, sfail | (dfail << 4) | (pass << 8)); } void GPU_SetDepthTestAndWriteMask(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask) { - GPUCMD_AddSingleParam(0x000F0107, (enable&1)|((function&7)<<4)|(writemask<<8)); + GPUCMD_AddWrite(GPUREG_DEPTHTEST_CONFIG, (enable&1)|((function&7)<<4)|(writemask<<8)); } void GPU_SetAlphaBlending(GPU_BLENDEQUATION colorEquation, GPU_BLENDEQUATION alphaEquation, GPU_BLENDFACTOR colorSrc, GPU_BLENDFACTOR colorDst, GPU_BLENDFACTOR alphaSrc, GPU_BLENDFACTOR alphaDst) { - GPUCMD_AddSingleParam(0x000F0101, colorEquation | (alphaEquation<<8) | (colorSrc<<16) | (colorDst<<20) | (alphaSrc<<24) | (alphaDst<<28)); - GPUCMD_AddSingleParam(0x00020100, 0x00000100); + GPUCMD_AddWrite(GPUREG_BLEND_CONFIG, colorEquation | (alphaEquation<<8) | (colorSrc<<16) | (colorDst<<20) | (alphaSrc<<24) | (alphaDst<<28)); + GPUCMD_AddMaskedWrite(GPUREG_COLOROUTPUT_CONFIG, 0x2, 0x00000100); } void GPU_SetColorLogicOp(GPU_LOGICOP op) { - GPUCMD_AddSingleParam(0x000F0102, op); - GPUCMD_AddSingleParam(0x00020100, 0x00000000); + GPUCMD_AddWrite(GPUREG_COLORLOGICOP_CONFIG, op); + GPUCMD_AddMaskedWrite(GPUREG_COLOROUTPUT_CONFIG, 0x2, 0x00000000); } void GPU_SetBlendingColor(u8 r, u8 g, u8 b, u8 a) { - GPUCMD_AddSingleParam(0x000F0103, r | (g << 8) | (b << 16) | (a << 24)); + GPUCMD_AddWrite(GPUREG_BLEND_COLOR, r | (g << 8) | (b << 16) | (a << 24)); } void GPU_SetTextureEnable(GPU_TEXUNIT units) { - GPUCMD_AddSingleParam(0x0002006F, units<<8); // enables texcoord outputs - GPUCMD_AddSingleParam(0x000F0080, 0x00011000|units); // enables texture units + GPUCMD_AddMaskedWrite(GPUREG_006F, 0x2, units<<8); // enables texcoord outputs + GPUCMD_AddWrite(GPUREG_TEXUNITS_CONFIG, 0x00011000|units); // enables texture units } void GPU_SetTexture(GPU_TEXUNIT unit, u32* data, u16 width, u16 height, u32 param, GPU_TEXCOLOR colorType) @@ -375,24 +372,24 @@ void GPU_SetTexture(GPU_TEXUNIT unit, u32* data, u16 width, u16 height, u32 para switch (unit) { case GPU_TEXUNIT0: - GPUCMD_AddSingleParam(0x000F008E, colorType); - GPUCMD_AddSingleParam(0x000F0085, ((u32)data)>>3); - GPUCMD_AddSingleParam(0x000F0082, (width)|(height<<16)); - GPUCMD_AddSingleParam(0x000F0083, param); + GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, colorType); + GPUCMD_AddWrite(GPUREG_TEXUNIT0_LOC, ((u32)data)>>3); + GPUCMD_AddWrite(GPUREG_TEXUNIT0_DIM, (width)|(height<<16)); + GPUCMD_AddWrite(GPUREG_TEXUNIT0_PARAM, param); break; case GPU_TEXUNIT1: - GPUCMD_AddSingleParam(0x000F0096, colorType); - GPUCMD_AddSingleParam(0x000F0095, ((u32)data)>>3); - GPUCMD_AddSingleParam(0x000F0092, (width)|(height<<16)); - GPUCMD_AddSingleParam(0x000F0093, param); + GPUCMD_AddWrite(GPUREG_TEXUNIT1_TYPE, colorType); + GPUCMD_AddWrite(GPUREG_TEXUNIT1_LOC, ((u32)data)>>3); + GPUCMD_AddWrite(GPUREG_TEXUNIT1_DIM, (width)|(height<<16)); + GPUCMD_AddWrite(GPUREG_TEXUNIT1_PARAM, param); break; case GPU_TEXUNIT2: - GPUCMD_AddSingleParam(0x000F009E, colorType); - GPUCMD_AddSingleParam(0x000F009D, ((u32)data)>>3); - GPUCMD_AddSingleParam(0x000F009A, (width)|(height<<16)); - GPUCMD_AddSingleParam(0x000F009B, param); + GPUCMD_AddWrite(GPUREG_TEXUNIT2_TYPE, colorType); + GPUCMD_AddWrite(GPUREG_TEXUNIT2_LOC, ((u32)data)>>3); + GPUCMD_AddWrite(GPUREG_TEXUNIT2_DIM, (width)|(height<<16)); + GPUCMD_AddWrite(GPUREG_TEXUNIT2_PARAM, param); break; } } @@ -427,23 +424,22 @@ void GPU_SetAttributeBuffers(u8 totalAttributes, u32* baseAddress, u64 attribute param[3*(i+1)+2]=(bufferNumAttributes[i]<<28)|((stride&0xFFF)<<16)|((bufferPermutations[i]>>32)&0xFFFF); } - GPUCMD_Add(0x800F0200, param, 0x00000027); + GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFERS_LOC, param, 0x00000027); - GPUCMD_AddSingleParam(0x000B02B9, 0xA0000000|(totalAttributes-1)); - GPUCMD_AddSingleParam(0x000F0242, (totalAttributes-1)); + GPUCMD_AddMaskedWrite(GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, 0xA0000000|(totalAttributes-1)); + GPUCMD_AddWrite(GPUREG_0242, (totalAttributes-1)); - GPUCMD_AddSingleParam(0x000F02BB, attributePermutation&0xFFFFFFFF); - GPUCMD_AddSingleParam(0x000F02BC, (attributePermutation>>32)&0xFFFF); + GPUCMD_AddIncrementalWrites(GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){attributePermutation&0xFFFFFFFF, (attributePermutation>>32)&0xFFFF}), 2); } void GPU_SetAttributeBuffersAddress(u32* baseAddress) { - GPUCMD_AddSingleParam(0x000F0200, ((u32)baseAddress)>>3); + GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, ((u32)baseAddress)>>3); } void GPU_SetFaceCulling(GPU_CULLMODE mode) { - GPUCMD_AddSingleParam(0x000F0040, mode&0x3); + GPUCMD_AddWrite(GPUREG_FACECULLING_CONFIG, mode&0x3); } const u8 GPU_TEVID[]={0xC0,0xC8,0xD0,0xD8,0xF0,0xF8}; @@ -460,54 +456,54 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16 param[0x3]=constantColor; param[0x4]=0x00000000; // ? - GPUCMD_Add(0x800F0000|GPU_TEVID[id], param, 0x00000005); + GPUCMD_AddIncrementalWrites(GPUREG_0000|GPU_TEVID[id], param, 0x00000005); } void GPU_DrawArray(GPU_Primitive_t primitive, u32 n) { //set primitive type - GPUCMD_AddSingleParam(0x0002025E, primitive); - GPUCMD_AddSingleParam(0x0002025F, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x2, primitive); + GPUCMD_AddMaskedWrite(GPUREG_025F, 0x2, 0x00000001); //index buffer address register should be cleared (except bit 31) before drawing - GPUCMD_AddSingleParam(0x000F0227, 0x80000000); + GPUCMD_AddWrite(GPUREG_INDEXBUFFER_CONFIG, 0x80000000); //pass number of vertices - GPUCMD_AddSingleParam(0x000F0228, n); + GPUCMD_AddWrite(GPUREG_NUMVERTICES, n); //all the following except 0x000F022E might be useless - GPUCMD_AddSingleParam(0x00010253, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000000); - GPUCMD_AddSingleParam(0x000F022E, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000001); - GPUCMD_AddSingleParam(0x000F0231, 0x00000001); - GPUCMD_AddSingleParam(0x000F0111, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0253, 0x1, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0245, 0x1, 0x00000000); + GPUCMD_AddWrite(GPUREG_DRAWARRAYS, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0245, 0x1, 0x00000001); + GPUCMD_AddWrite(GPUREG_0231, 0x00000001); + GPUCMD_AddWrite(GPUREG_0111, 0x00000001); } void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n) { //set primitive type - GPUCMD_AddSingleParam(0x0002025E, primitive); - GPUCMD_AddSingleParam(0x0002025F, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x2, primitive); + GPUCMD_AddMaskedWrite(GPUREG_025F, 0x2, 0x00000001); //index buffer (TODO : support multiple types) - GPUCMD_AddSingleParam(0x000F0227, 0x80000000|((u32)indexArray)); + GPUCMD_AddWrite(GPUREG_INDEXBUFFER_CONFIG, 0x80000000|((u32)indexArray)); //pass number of vertices - GPUCMD_AddSingleParam(0x000F0228, n); + GPUCMD_AddWrite(GPUREG_NUMVERTICES, n); - GPUCMD_AddSingleParam(0x00020229, 0x00000100); - GPUCMD_AddSingleParam(0x00020253, 0x00000100); + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x2, 0x00000100); + GPUCMD_AddMaskedWrite(GPUREG_0253, 0x2, 0x00000100); - GPUCMD_AddSingleParam(0x00010245, 0x00000000); - GPUCMD_AddSingleParam(0x000F022F, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000001); - GPUCMD_AddSingleParam(0x000F0231, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0245, 0x1, 0x00000000); + GPUCMD_AddWrite(GPUREG_DRAWELEMENTS, 0x00000001); + GPUCMD_AddMaskedWrite(GPUREG_0245, 0x1, 0x00000001); + GPUCMD_AddWrite(GPUREG_0231, 0x00000001); // CHECKME: does this one also require command 0x0111 at the end? } void GPU_FinishDrawing() { - GPUCMD_AddSingleParam(0x000F0111, 0x00000001); - GPUCMD_AddSingleParam(0x000F0110, 0x00000001); - GPUCMD_AddSingleParam(0x000F0063, 0x00000001); + GPUCMD_AddWrite(GPUREG_0111, 0x00000001); + GPUCMD_AddWrite(GPUREG_0110, 0x00000001); + GPUCMD_AddWrite(GPUREG_0063, 0x00000001); } void GPU_SetShaderOutmap(u32 outmapData[8]) From 5df4902c4e3eb99f91c0fe8c535cf2f374e63f1b Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 21:16:17 -0800 Subject: [PATCH 12/21] GPU_SetUniform -> GPU_SetFloatUniform --- libctru/include/3ds/gpu/gpu.h | 2 +- libctru/source/gpu/gpu.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index 6373240..932a48a 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -208,7 +208,7 @@ typedef enum{ GPU_GEOMETRY_SHADER=0x1 }GPU_SHADER_TYPE; -void GPU_SetUniform(u32 startreg, u32* data, u32 numreg); +void GPU_SetFloatUniform(GPU_SHADER_TYPE type, u32 startreg, u32* data, u32 numreg); void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index e32265b..14ae0fa 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -218,12 +218,14 @@ void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize) GPUCMD_Run(gpuBuf); } -void GPU_SetUniform(u32 startreg, u32* data, u32 numreg) +void GPU_SetFloatUniform(GPU_SHADER_TYPE type, u32 startreg, u32* data, u32 numreg) { if(!data)return; - GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG, 0x80000000|startreg); - GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA, data, numreg*4); + u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + + GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG-regOffset, 0x80000000|startreg); + GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA-regOffset, data, numreg*4); } //TODO : fix From b7ab3f9a54456388b48ad5798174f4c487ae18f5 Mon Sep 17 00:00:00 2001 From: smea Date: Fri, 2 Jan 2015 21:22:37 -0800 Subject: [PATCH 13/21] shbin.c cleanup, shaderInstanceGetUniformLocation --- libctru/include/3ds/gpu/shaderProgram.h | 1 + libctru/include/3ds/gpu/shbin.h | 12 +--- libctru/source/gpu/shaderProgram.c | 7 ++ libctru/source/gpu/shbin.c | 94 ++----------------------- 4 files changed, 15 insertions(+), 99 deletions(-) diff --git a/libctru/include/3ds/gpu/shaderProgram.h b/libctru/include/3ds/gpu/shaderProgram.h index c69e14e..e4a747a 100644 --- a/libctru/include/3ds/gpu/shaderProgram.h +++ b/libctru/include/3ds/gpu/shaderProgram.h @@ -31,6 +31,7 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle); Result shaderInstanceFree(shaderInstance_s* si); Result shaderInstanceSetBool(shaderInstance_s* si, int id, bool value); Result shaderInstanceGetBool(shaderInstance_s* si, int id, bool* value); +Result shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name); Result shaderProgramInit(shaderProgram_s* sp); Result shaderProgramFree(shaderProgram_s* sp); diff --git a/libctru/include/3ds/gpu/shbin.h b/libctru/include/3ds/gpu/shbin.h index ed41114..b055567 100644 --- a/libctru/include/3ds/gpu/shbin.h +++ b/libctru/include/3ds/gpu/shbin.h @@ -71,14 +71,8 @@ typedef struct{ DVLE_s* DVLE; }DVLB_s; -DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize); -void SHDR_UseProgram(DVLB_s* dvlb, u8 id); -void SHDR_FreeDVLB(DVLB_s* dvlb); -s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID); +DVLB_s* DVLB_ParseFile(u32* shbinData, u32 shbinSize); +void DVLB_Free(DVLB_s* dvlb); -void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type); -void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type); - -void DVLE_SendOutmap(DVLE_s* dvle); -void DVLE_SendConstants(DVLE_s* dvle); +s8 DVLE_GetUniformRegister(DVLE_s* dvle, const char* name); void DVLE_GenerateOutmap(DVLE_s* dvle); diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index cb53476..66466e5 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -103,6 +103,13 @@ Result shaderInstanceGetBool(shaderInstance_s* si, int id, bool* value) return 0; } +Result shaderInstanceGetUniformLocation(shaderInstance_s* si, const char* name) +{ + if(!si)return -1; + + return DVLE_GetUniformRegister(si->dvle, name); +} + Result shaderProgramInit(shaderProgram_s* sp) { if(!sp)return -1; diff --git a/libctru/source/gpu/shbin.c b/libctru/source/gpu/shbin.c index 9c51f9b..1875a4d 100644 --- a/libctru/source/gpu/shbin.c +++ b/libctru/source/gpu/shbin.c @@ -9,7 +9,7 @@ #include <3ds/gpu/shbin.h> //please don't feed this an invalid SHBIN -DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) +DVLB_s* DVLB_ParseFile(u32* shbinData, u32 shbinSize) { if(!shbinData)return NULL; DVLB_s* ret=malloc(sizeof(DVLB_s)); @@ -66,17 +66,15 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize) } //TODO -void SHDR_FreeDVLB(DVLB_s* dvlb) +void DVLB_Free(DVLB_s* dvlb) { if(!dvlb)return; } -s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) +s8 DVLE_GetUniformRegister(DVLE_s* dvle, const char* name) { - if(!dvlb || !name)return -1; - - DVLE_s* dvle=&dvlb->DVLE[programID]; + if(!dvle || !name)return -1; int i; DVLE_uniformEntry_s* u=dvle->uniformTableData; for(i=0;iuniformTableSize;i++) @@ -87,20 +85,6 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID) return -1; } -void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type) -{ - if(!dvlp)return; - - GPU_SendShaderCode(type, dvlp->codeData, 0, dvlp->codeSize); -} - -void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type) -{ - if(!dvlp)return; - - GPU_SendOperandDescriptors(type, dvlp->opcdescData, 0, dvlp->opdescSize); -} - void DVLE_GenerateOutmap(DVLE_s* dvle) { if(!dvle)return; @@ -145,73 +129,3 @@ void DVLE_GenerateOutmap(DVLE_s* dvle) dvle->outmapData[0]=numAttr; dvle->outmapMask=attrMask; } - -void DVLE_SendOutmap(DVLE_s* dvle) -{ - if(!dvle)return; - - u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); - - if(dvle->type==VERTEX_SHDR) - { - GPUCMD_AddWrite(GPUREG_024A, dvle->outmapData[0]-1); //? - GPUCMD_AddWrite(GPUREG_0251, dvle->outmapData[0]-1); //? - } - - GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, dvle->outmapMask); - GPU_SetShaderOutmap(dvle->outmapData); -} - -void DVLE_SendConstants(DVLE_s* dvle) -{ - if(!dvle)return; - - u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0); - - u32 param[4]; - u32 rev[3]; - u8* rev8=(u8*)rev; - - int i; - DVLE_constEntry_s* cnst=dvle->constTableData; - for(i=0;iconstTableSize;i++,cnst++) - { - memcpy(&rev8[0], &cnst->data[0], 3); - memcpy(&rev8[3], &cnst->data[1], 3); - memcpy(&rev8[6], &cnst->data[2], 3); - memcpy(&rev8[9], &cnst->data[3], 3); - - param[0x0]=(cnst->id)&0xFF; - param[0x1]=rev[2]; - param[0x2]=rev[1]; - param[0x3]=rev[0]; - - GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG+regOffset, param, 4); - } -} - -void SHDR_UseProgram(DVLB_s* dvlb, u8 id) -{ - if(!dvlb || id>dvlb->numDVLE)return; - DVLE_s* dvle=&dvlb->DVLE[id]; - - u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0); - - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); - GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0); - - DVLP_SendCode(&dvlb->DVLP, dvlb->DVLE[id].type); - DVLP_SendOpDesc(&dvlb->DVLP, dvlb->DVLE[id].type); - DVLE_SendConstants(dvle); - - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); - GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT-regOffset, 0x7FFF0000|(dvle->mainOffset&0xFFFF)); //set entrypoint - - GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // gsh related ? - - DVLE_SendOutmap(dvle); - - //? - GPUCMD_AddWrite(GPUREG_0064, 0x00000001); - GPUCMD_AddWrite(GPUREG_006F, 0x00000703); -} From 100f72c1931ae59896d1449f170f00a8e9ad105e Mon Sep 17 00:00:00 2001 From: smea Date: Sat, 3 Jan 2015 18:00:55 -0800 Subject: [PATCH 14/21] fixed linear/vram mem align --- libctru/source/allocator/mem_pool.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libctru/source/allocator/mem_pool.cpp b/libctru/source/allocator/mem_pool.cpp index 2c32638..a2c312d 100644 --- a/libctru/source/allocator/mem_pool.cpp +++ b/libctru/source/allocator/mem_pool.cpp @@ -39,6 +39,7 @@ bool MemPool::Allocate(MemChunk& chunk, u32 size, int align) { auto addr = b->base; u32 begWaste = (u32)addr & alignM; + if (begWaste > 0) begWaste = alignM + 1 - begWaste; addr += begWaste; u32 bSize = b->size - begWaste; if (bSize < size) continue; From 8d274afd132547fccbd105b071a4d5ecac1550b6 Mon Sep 17 00:00:00 2001 From: smea Date: Sat, 3 Jan 2015 18:06:22 -0800 Subject: [PATCH 15/21] fixed shaderProgramUse so that programs without geoshaders will play nice with those with --- libctru/source/gpu/shaderProgram.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index 66466e5..95ff7c8 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -168,6 +168,17 @@ Result shaderProgramUse(shaderProgram_s* sp) int i; + // configure geostage + // has to be done first or else VSH registers might only reconfigure 3 of the 4 shader units ! + if(!sp->geometryShader) + { + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000); + }else{ + GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002); + GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001); + } + // setup vertex shader stuff no matter what const DVLE_s* vshDvle = sp->vertexShader->dvle; const DVLP_s* vshDvlp = vshDvle->dvlp; @@ -190,9 +201,6 @@ Result shaderProgramUse(shaderProgram_s* sp) // finish setting up vertex shader alone GPU_SetShaderOutmap((u32*)vshDvle->outmapData); - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000); - GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000); - GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? GPUCMD_AddWrite(GPUREG_006F, 0x00000703); // ? }else{ @@ -213,9 +221,6 @@ Result shaderProgramUse(shaderProgram_s* sp) GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geometryShaderInputStride-1)); GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2); - GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002); - GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001); - GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ? GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ? } From b175fdbca5a37c54f7ffa6f5143b4c3b9d179003 Mon Sep 17 00:00:00 2001 From: fincs Date: Sun, 1 Mar 2015 23:19:29 +0100 Subject: [PATCH 16/21] Correct error in GPU_SetFloatUniform --- libctru/source/gpu/gpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 14ae0fa..d2ff78c 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -222,10 +222,10 @@ void GPU_SetFloatUniform(GPU_SHADER_TYPE type, u32 startreg, u32* data, u32 numr { if(!data)return; - u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + int regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); - GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG-regOffset, 0x80000000|startreg); - GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA-regOffset, data, numreg*4); + GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG+regOffset, 0x80000000|startreg); + GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA+regOffset, data, numreg*4); } //TODO : fix From 9abad5bbaf757fe20fd94291d3054b20ca3b2235 Mon Sep 17 00:00:00 2001 From: fincs Date: Sun, 1 Mar 2015 23:20:01 +0100 Subject: [PATCH 17/21] Update GPU example (untested) --- examples/gpu/source/gs.c | 9 +++++---- examples/gpu/source/gs.h | 2 +- examples/gpu/source/main.c | 18 ++++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/gpu/source/gs.c b/examples/gpu/source/gs.c index 4eabd45..b7c91d3 100644 --- a/examples/gpu/source/gs.c +++ b/examples/gpu/source/gs.c @@ -32,15 +32,16 @@ void initBufferMatrixList() bufferMatrixListLength=0; } -void gsInit(DVLB_s* shader) +void gsInit(shaderProgram_s* shader) { gsInitMatrixStack(); initBufferMatrixList(); svcCreateMutex(&linearAllocMutex, false); if(shader) { - gsMatrixStackRegisters[0]=SHDR_GetUniformRegister(shader, "projection", 0); - gsMatrixStackRegisters[1]=SHDR_GetUniformRegister(shader, "modelview", 0); + gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection"); + gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview"); + shaderProgramUse(shader); } } @@ -235,7 +236,7 @@ static void gsSetUniformMatrix(u32 startreg, float* m) param[0xe]=m[13]; param[0xf]=m[12]; - GPU_SetUniform(startreg, (u32*)param, 4); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4); } static int gsUpdateTransformation() diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h index 6976fca..2da15bd 100644 --- a/examples/gpu/source/gs.h +++ b/examples/gpu/source/gs.h @@ -24,7 +24,7 @@ typedef struct }gsVbo_s; -void gsInit(DVLB_s* shader); +void gsInit(shaderProgram_s* shader); void gsExit(void); void gsStartFrame(void); diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index 3662ccd..9f56189 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -25,7 +25,8 @@ #define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) //shader structure -DVLB_s* shader; +DVLB_s* dvlb; +shaderProgram_s shader; //texture data pointer u32* texData; //vbo structure @@ -139,9 +140,6 @@ void renderFrame() GPUCMD_AddSingleParam(0x00010062, 0); GPUCMD_AddSingleParam(0x000F0118, 0); - //setup shader - SHDR_UseProgram(shader, 0); - GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); @@ -168,8 +166,8 @@ void renderFrame() //setup lighting (this is specific to our shader) vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); - GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightDirection", 0), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1); - GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightAmbient", 0), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1); //initialize projection matrix to standard perspective stuff gsMatrixMode(GS_PROJECTION); @@ -199,10 +197,12 @@ int main(int argc, char** argv) gfxSet3D(true); //load our vertex shader binary - shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin, test_vsh_shbin_size); + dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); + shaderProgramInit(&shader); + shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); //initialize GS - gsInit(shader); + gsInit(&shader); //allocate our GPU command buffers //they *have* to be on the linear heap @@ -318,6 +318,8 @@ int main(int argc, char** argv) } gsExit(); + shaderProgramFree(&shader); + DVLB_Free(dvlb); gfxExit(); return 0; } From 386a700673fdab59f208b50e0c6c4536f8a175d4 Mon Sep 17 00:00:00 2001 From: fincs Date: Mon, 2 Mar 2015 00:54:29 +0100 Subject: [PATCH 18/21] Fix GPU example --- examples/gpu/data/test.vsh | 10 +++++----- examples/gpu/source/main.c | 25 +++++++++++++++---------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/examples/gpu/data/test.vsh b/examples/gpu/data/test.vsh index 37293ae..a142641 100644 --- a/examples/gpu/data/test.vsh +++ b/examples/gpu/data/test.vsh @@ -2,11 +2,11 @@ .const c20, 1.0, 0.0, 0.5, 1.0 ; setup outmap - .out o0, result.position, 0x0 - .out o1, result.color, 0x0 - .out o2, result.texcoord0, 0x0 - .out o3, result.texcoord1, 0x0 - .out o4, result.texcoord2, 0x0 + .out o0, result.position, 0xF + .out o1, result.color, 0xF + .out o2, result.texcoord0, 0x3 + .out o3, result.texcoord1, 0x3 + .out o4, result.texcoord2, 0x3 ; setup uniform map (not required) .uniform c0, c3, projection diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index 9f56189..411d7aa 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -137,8 +137,8 @@ void renderFrame() GPU_SetBlendingColor(0,0,0,0); GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); - GPUCMD_AddSingleParam(0x00010062, 0); - GPUCMD_AddSingleParam(0x000F0118, 0); + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); @@ -196,14 +196,6 @@ int main(int argc, char** argv) //let GFX know we're ok with doing stereoscopic 3D rendering gfxSet3D(true); - //load our vertex shader binary - dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); - shaderProgramInit(&shader); - shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); - - //initialize GS - gsInit(&shader); - //allocate our GPU command buffers //they *have* to be on the linear heap u32 gpuCmdSize=0x40000; @@ -213,6 +205,19 @@ int main(int argc, char** argv) //actually reset the GPU GPU_Reset(NULL, gpuCmd, gpuCmdSize); + //load our vertex shader binary + dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); + shaderProgramInit(&shader); + shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); + + //initialize GS + gsInit(&shader); + + // Flush the command buffer so that the shader upload gets executed + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); + //create texture texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned memcpy(texData, texture_bin, texture_bin_size); From 44d2606039f533dff17335e940f8a806636b5a9b Mon Sep 17 00:00:00 2001 From: fincs Date: Fri, 6 Mar 2015 16:32:57 +0100 Subject: [PATCH 19/21] Minor correction in GPU_SendShaderCode/SendOperandDescriptors --- libctru/source/gpu/gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index d2ff78c..f05ab9d 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -518,7 +518,7 @@ void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length) { if(!data)return; - u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + int regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, offset); @@ -532,7 +532,7 @@ void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 { if(!data)return; - u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); + int regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0); GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, offset); From 624dc1c1b33c3d989a8d235ac2f609f1c3553981 Mon Sep 17 00:00:00 2001 From: fincs Date: Sat, 7 Mar 2015 16:59:52 +0100 Subject: [PATCH 20/21] GPU_DepthRange() -> GPU_DepthMap() --- examples/gpu/source/main.c | 2 +- libctru/include/3ds/gpu/gpu.h | 2 +- libctru/include/3ds/gpu/registers.h | 4 ++-- libctru/source/gpu/gpu.c | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index 411d7aa..c42849e 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -130,7 +130,7 @@ void renderFrame() { GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); - GPU_DepthRange(-1.0f, 0.0f); + GPU_DepthMap(-1.0f, 0.0f); GPU_SetFaceCulling(GPU_CULL_BACK_CCW); GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index 932a48a..8d59c4d 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -214,7 +214,7 @@ void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u3 void GPU_SetScissorTest(GPU_SCISSORMODE mode, u32 x, u32 y, u32 w, u32 h); -void GPU_DepthRange(float nearVal, float farVal); +void GPU_DepthMap(float zScale, float zOffset); void GPU_SetAlphaTest(bool enable, GPU_TESTFUNC function, u8 ref); void GPU_SetDepthTestAndWriteMask(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask); // GPU_WRITEMASK values can be ORed together void GPU_SetStencilTest(bool enable, GPU_TESTFUNC function, u8 ref, u8 mask, u8 replace); diff --git a/libctru/include/3ds/gpu/registers.h b/libctru/include/3ds/gpu/registers.h index 1390a8f..89cd378 100644 --- a/libctru/include/3ds/gpu/registers.h +++ b/libctru/include/3ds/gpu/registers.h @@ -77,8 +77,8 @@ #define GPUREG_004A 0x004A #define GPUREG_004B 0x004B #define GPUREG_004C 0x004C -#define GPUREG_DEPTHRANGE_NEAR 0x004D -#define GPUREG_DEPTHRANGE_FAR 0x004E +#define GPUREG_DEPTHMAP_SCALE 0x004D +#define GPUREG_DEPTHMAP_OFFSET 0x004E #define GPUREG_SH_OUTMAP_TOTAL 0x004F #define GPUREG_SH_OUTMAP_O0 0x0050 #define GPUREG_SH_OUTMAP_O1 0x0051 diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index f05ab9d..c913bc9 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -317,11 +317,11 @@ void GPU_SetScissorTest(GPU_SCISSORMODE mode, u32 x, u32 y, u32 w, u32 h) GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, param, 0x00000003); } -void GPU_DepthRange(float nearVal, float farVal) +void GPU_DepthMap(float zScale, float zOffset) { GPUCMD_AddWrite(GPUREG_006D, 0x00000001); //? - GPUCMD_AddWrite(GPUREG_DEPTHRANGE_NEAR, f32tof24(nearVal)); - GPUCMD_AddWrite(GPUREG_DEPTHRANGE_FAR, f32tof24(farVal)); + GPUCMD_AddWrite(GPUREG_DEPTHMAP_SCALE, f32tof24(zScale)); + GPUCMD_AddWrite(GPUREG_DEPTHMAP_OFFSET, f32tof24(zOffset)); } void GPU_SetAlphaTest(bool enable, GPU_TESTFUNC function, u8 ref) From b085943d4d0c58fa37ff8825bc25f9aaedaa3234 Mon Sep 17 00:00:00 2001 From: fincs Date: Sat, 7 Mar 2015 20:39:28 +0100 Subject: [PATCH 21/21] Correct shaderProgramFree() and DVLB_Free() --- libctru/source/gpu/shaderProgram.c | 2 -- libctru/source/gpu/shbin.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libctru/source/gpu/shaderProgram.c b/libctru/source/gpu/shaderProgram.c index 95ff7c8..cd5fd86 100644 --- a/libctru/source/gpu/shaderProgram.c +++ b/libctru/source/gpu/shaderProgram.c @@ -127,8 +127,6 @@ Result shaderProgramFree(shaderProgram_s* sp) shaderInstanceFree(sp->vertexShader); shaderInstanceFree(sp->geometryShader); - free(sp); - return 0; } diff --git a/libctru/source/gpu/shbin.c b/libctru/source/gpu/shbin.c index 1875a4d..d6f66dc 100644 --- a/libctru/source/gpu/shbin.c +++ b/libctru/source/gpu/shbin.c @@ -69,7 +69,9 @@ DVLB_s* DVLB_ParseFile(u32* shbinData, u32 shbinSize) void DVLB_Free(DVLB_s* dvlb) { if(!dvlb)return; - + if(dvlb->DVLP.opcdescData)free(dvlb->DVLP.opcdescData); + if(dvlb->DVLE)free(dvlb->DVLE); + free(dvlb); } s8 DVLE_GetUniformRegister(DVLE_s* dvle, const char* name)