diff --git a/examples/gpu/README.md b/examples/gpu/README.md deleted file mode 100644 index 2a118cc..0000000 --- a/examples/gpu/README.md +++ /dev/null @@ -1,11 +0,0 @@ -gpu -======= - -example of how to use the GPU with libctru - -before trying to compile, make sure to download aemstro -( https://github.com/smealum/aemstro reflog: 51bfeef9e1a0149726dca43b50919bd45917015a ) -and update AEMSTRO environment variable with the proper path - -You'll also need to install Python 3 and have that in your path. - diff --git a/examples/gpu/data/test.vsh b/examples/gpu/data/test.vsh deleted file mode 100644 index a142641..0000000 --- a/examples/gpu/data/test.vsh +++ /dev/null @@ -1,57 +0,0 @@ -; setup constants - .const c20, 1.0, 0.0, 0.5, 1.0 - -; setup outmap - .out o0, result.position, 0xF - .out o1, result.color, 0xF - .out o2, result.texcoord0, 0x3 - .out o3, result.texcoord1, 0x3 - .out o4, result.texcoord2, 0x3 - -; setup uniform map (not required) - .uniform c0, c3, projection - .uniform c4, c7, modelview - .uniform c8, c8, lightDirection - .uniform c9, c9, lightAmbient - - .vsh vmain, end_vmain - -;code - vmain: - mov r1, v0 (0x4) - mov r1, c20 (0x3) - ; temp = modvMtx * in.pos - dp4 r0, c4, r1 (0x0) - dp4 r0, c5, r1 (0x1) - dp4 r0, c6, r1 (0x2) - mov r0, c20 (0x3) - ; result.pos = projMtx * temp - dp4 o0, c0, r0 (0x0) - dp4 o0, c1, r0 (0x1) - dp4 o0, c2, r0 (0x2) - dp4 o0, c3, r0 (0x3) - ; result.texcoord = in.texcoord - mov o2, v1 (0x5) - mov o3, c20 (0x7) - mov o4, c20 (0x7) - ; result.color = crappy lighting - dp3 r0, c8, v2 (0x4) - max r0, c20, r0 (0x9) - mul r0, c9, r0 (0x4) - add o1, c9, r0 (0x4) - mov o1, c20 (0x3) - nop - end - end_vmain: - -;operand descriptors - .opdesc x___, xyzw, xyzw ; 0x0 - .opdesc _y__, xyzw, xyzw ; 0x1 - .opdesc __z_, xyzw, xyzw ; 0x2 - .opdesc ___w, xyzw, xyzw ; 0x3 - .opdesc xyz_, xyzw, xyzw ; 0x4 - .opdesc xyzw, xyzw, xyzw ; 0x5 - .opdesc x_zw, xyzw, xyzw ; 0x6 - .opdesc xyzw, yyyw, xyzw ; 0x7 - .opdesc xyz_, wwww, wwww ; 0x8 - .opdesc xyz_, yyyy, xyzw ; 0x9 diff --git a/examples/gpu/data/texture.bin b/examples/gpu/data/texture.bin deleted file mode 100644 index 4a3312b..0000000 Binary files a/examples/gpu/data/texture.bin and /dev/null differ diff --git a/examples/gpu/source/_gs.s b/examples/gpu/source/_gs.s deleted file mode 100644 index 1b9c92b..0000000 --- a/examples/gpu/source/_gs.s +++ /dev/null @@ -1,16 +0,0 @@ -.section ".text" -.arm -.align 4 -.global _vboMemcpy50 - -# r0 : dst -# r1 : src -# fixed size 0x50 -_vboMemcpy50: - push {r4-r11} - ldmia r1!, {r2-r12} - stmia r0!, {r2-r12} - ldmia r1!, {r2-r12} - stmia r0!, {r2-r12} - pop {r4-r11} - bx lr diff --git a/examples/gpu/source/gs.c b/examples/gpu/source/gs.c deleted file mode 100644 index b7c91d3..0000000 --- a/examples/gpu/source/gs.c +++ /dev/null @@ -1,432 +0,0 @@ -#include -#include -#include -#include <3ds.h> - -#include "gs.h" -#include "math.h" - -#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) - -static void gsInitMatrixStack(); - -Handle linearAllocMutex; - -static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; - -typedef struct -{ - u32 offset; - mtx44 data; -}bufferMatrix_s; - -bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; -int bufferMatrixListLength; - -//---------------------- -// GS SYSTEM STUFF -//---------------------- - -void initBufferMatrixList() -{ - bufferMatrixListLength=0; -} - -void gsInit(shaderProgram_s* shader) -{ - gsInitMatrixStack(); - initBufferMatrixList(); - svcCreateMutex(&linearAllocMutex, false); - if(shader) - { - gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection"); - gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview"); - shaderProgramUse(shader); - } -} - -void gsExit(void) -{ - svcCloseHandle(linearAllocMutex); -} - -void gsStartFrame(void) -{ - GPUCMD_SetBufferOffset(0); - initBufferMatrixList(); -} - -void* gsLinearAlloc(size_t size) -{ - void* ret=NULL; - - svcWaitSynchronization(linearAllocMutex, U64_MAX); - ret=linearAlloc(size); - svcReleaseMutex(linearAllocMutex); - - return ret; -} - -void gsLinearFree(void* mem) -{ - svcWaitSynchronization(linearAllocMutex, U64_MAX); - linearFree(mem); - svcReleaseMutex(linearAllocMutex); -} - -//---------------------- -// MATRIX STACK STUFF -//---------------------- - -static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; -static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; -static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; -static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; -static GS_MATRIX gsCurrentMatrixType; - -static void gsInitMatrixStack() -{ - int i; - for(i=0; i=GS_MATRIXTYPES)return NULL; - - return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; -} - -int gsLoadMatrix(GS_MATRIX m, float* data) -{ - if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; - - memcpy(gsGetMatrix(m), data, sizeof(mtx44)); - - gsMatrixStackUpdated[m]=true; - - return 0; -} - -int gsPushMatrix() -{ - const GS_MATRIX m=gsCurrentMatrixType; - if(m<0 || m>=GS_MATRIXTYPES)return -1; - if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; - - float* cur=gsGetMatrix(m); - gsMatrixStackOffsets[m]++; - memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); - - return 0; -} - -int gsPopMatrix() -{ - const GS_MATRIX m=gsCurrentMatrixType; - if(m<0 || m>=GS_MATRIXTYPES)return -1; - if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; - - gsMatrixStackOffsets[m]--; - - gsMatrixStackUpdated[m]=true; - - return 0; -} - -int gsMatrixMode(GS_MATRIX m) -{ - if(m<0 || m>=GS_MATRIXTYPES)return -1; - - gsCurrentMatrixType=m; - - return 0; -} - -//------------------------ -// MATRIX TRANSFORM STUFF -//------------------------ - -int gsMultMatrix(float* data) -{ - if(!data)return -1; - - mtx44 tmp; - multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); - memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); - - gsMatrixStackUpdated[gsCurrentMatrixType]=true; - - return 0; -} - -void gsLoadIdentity() -{ - loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsProjectionMatrix(float fovy, float aspect, float near, float far) -{ - initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateX(float x) -{ - rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateY(float y) -{ - rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateZ(float z) -{ - rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsScale(float x, float y, float z) -{ - scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsTranslate(float x, float y, float z) -{ - translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -//---------------------- -// MATRIX RENDER STUFF -//---------------------- - -static void gsSetUniformMatrix(u32 startreg, float* m) -{ - float param[16]; - - param[0x0]=m[3]; //w - param[0x1]=m[2]; //z - param[0x2]=m[1]; //y - param[0x3]=m[0]; //x - - param[0x4]=m[7]; - param[0x5]=m[6]; - param[0x6]=m[5]; - param[0x7]=m[4]; - - param[0x8]=m[11]; - param[0x9]=m[10]; - param[0xa]=m[9]; - param[0xb]=m[8]; - - param[0xc]=m[15]; - param[0xd]=m[14]; - param[0xe]=m[13]; - param[0xf]=m[12]; - - GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4); -} - -static int gsUpdateTransformation() -{ - GS_MATRIX m; - for(m=0; mdata=NULL; - vbo->currentSize=0; - vbo->maxSize=0; - vbo->commands=NULL; - vbo->commandsSize=0; - - return 0; -} - -int gsVboCreate(gsVbo_s* vbo, u32 size) -{ - if(!vbo)return -1; - - vbo->data=gsLinearAlloc(size); - vbo->numVertices=0; - vbo->currentSize=0; - vbo->maxSize=size; - - return 0; -} - -void* gsVboGetOffset(gsVbo_s* vbo) -{ - if(!vbo)return NULL; - - return (void*)(&((u8*)vbo->data)[vbo->currentSize]); -} - -int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) -{ - if(!vbo || !data || !size)return -1; - if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; - - memcpy(gsVboGetOffset(vbo), data, size); - vbo->currentSize+=size; - vbo->numVertices+=units; - - return 0; -} - -int gsVboFlushData(gsVbo_s* vbo) -{ - if(!vbo)return -1; - - //unnecessary if we use flushAndRun - // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); - - return 0; -} - -int gsVboDestroy(gsVbo_s* vbo) -{ - if(!vbo)return -1; - - if(vbo->commands)free(vbo->commands); - if(vbo->data)gsLinearFree(vbo->data); - gsVboInit(vbo); - - return 0; -} - -extern u32 debugValue[]; - -void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) -{ - //set attribute buffer address - GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); - //set primitive type - GPUCMD_AddSingleParam(0x0002025E, primitive); - GPUCMD_AddSingleParam(0x0002025F, 0x00000001); - //index buffer not used for drawArrays but 0x000F0227 still required - GPUCMD_AddSingleParam(0x000F0227, 0x80000000); - //pass number of vertices - GPUCMD_AddSingleParam(0x000F0228, n); - - GPUCMD_AddSingleParam(0x00010253, 0x00000001); - - GPUCMD_AddSingleParam(0x00010245, 0x00000000); - GPUCMD_AddSingleParam(0x000F022E, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000001); - GPUCMD_AddSingleParam(0x000F0231, 0x00000001); - - // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff -} - -//not thread safe -int gsVboPrecomputeCommands(gsVbo_s* vbo) -{ - if(!vbo || vbo->commands)return -1; - - static u32 tmpBuffer[128]; - - u32* savedAdr; u32 savedSize, savedOffset; - GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); - GPUCMD_SetBuffer(tmpBuffer, 128, 0); - - GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); - - GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); - vbo->commands=memalign(0x4, vbo->commandsSize*4); - if(!vbo->commands)return -1; - memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); - - GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); - - return 0; -} - -extern u32* gpuCmdBuf; -extern u32 gpuCmdBufSize; -extern u32 gpuCmdBufOffset; - -void _vboMemcpy50(u32* dst, u32* src); - -void _GPUCMD_AddRawCommands(u32* cmd, u32 size) -{ - if(!cmd || !size)return; - - if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); - else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); - gpuCmdBufOffset+=size; -} - -int gsVboDraw(gsVbo_s* vbo) -{ - if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; - - gsUpdateTransformation(); - - gsVboPrecomputeCommands(vbo); - - // u64 val=svcGetSystemTick(); - if(vbo->commands) - { - _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); - }else{ - GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); - } - // debugValue[5]+=(u32)(svcGetSystemTick()-val); - // debugValue[6]++; - - return 0; -} diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h deleted file mode 100644 index 2da15bd..0000000 --- a/examples/gpu/source/gs.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef GS_H -#define GS_H - -#include <3ds.h> -#include "math.h" - -#define GS_MATRIXSTACK_SIZE (8) - -typedef enum -{ - GS_PROJECTION = 0, - GS_MODELVIEW = 1, - GS_MATRIXTYPES -}GS_MATRIX; - -typedef struct -{ - u8* data; - u32 currentSize; // in bytes - u32 maxSize; // in bytes - u32 numVertices; - u32* commands; - u32 commandsSize; -}gsVbo_s; - - -void gsInit(shaderProgram_s* shader); -void gsExit(void); - -void gsStartFrame(void); -void gsAdjustBufferMatrices(mtx44 transformation); - -void* gsLinearAlloc(size_t size); -void gsLinearFree(void* mem); - -float* gsGetMatrix(GS_MATRIX m); -int gsLoadMatrix(GS_MATRIX m, float* data); -int gsPushMatrix(); -int gsPopMatrix(); -int gsMatrixMode(GS_MATRIX m); - -void gsLoadIdentity(); -void gsProjectionMatrix(float fovy, float aspect, float near, float far); -void gsRotateX(float x); -void gsRotateY(float y); -void gsRotateZ(float z); -void gsScale(float x, float y, float z); -void gsTranslate(float x, float y, float z); -int gsMultMatrix(float* data); - -int gsVboInit(gsVbo_s* vbo); -int gsVboCreate(gsVbo_s* vbo, u32 size); -int gsVboFlushData(gsVbo_s* vbo); -int gsVboDestroy(gsVbo_s* vbo); -int gsVboDraw(gsVbo_s* vbo); -void* gsVboGetOffset(gsVbo_s* vbo); -int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); - -#endif diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c deleted file mode 100644 index a045fe3..0000000 --- a/examples/gpu/source/main.c +++ /dev/null @@ -1,354 +0,0 @@ -/////////////////////////////////////// -// GPU example // -/////////////////////////////////////// - -//this example is meant to show how to use the GPU to render a 3D object -//it also shows how to do stereoscopic 3D -//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft -//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited. - -#include -#include -#include -#include -#include <3ds.h> - -#include "math.h" -#include "gs.h" - -#include "test_vsh_shbin.h" -#include "texture_bin.h" - -//will be moved into ctrulib at some point -#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080) - -#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) - -//transfer from GPU output buffer to actual framebuffer flags -#define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X)) - -//shader structure -DVLB_s* dvlb; -shaderProgram_s shader; -//texture data pointer -u32* texData; -//vbo structure -gsVbo_s vbo; - -//GPU framebuffer address -u32* gpuOut=(u32*)0x1F119400; -//GPU depth buffer address -u32* gpuDOut=(u32*)0x1F370800; - -//angle for the vertex lighting (cf test.vsh) -float lightAngle; -//object position and rotation angle -vect3Df_s position, angle; - -//vertex structure -typedef struct -{ - vect3Df_s position; - float texcoord[2]; - vect3Df_s normal; -}vertex_s; - -//object data (cube) -//obviously this doesn't have to be defined manually, but we will here for the purposes of the example -//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z} -//we're drawing triangles so three lines = one triangle -const vertex_s modelVboData[]= -{ - //first face (PZ) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - //second face (MZ) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - //third face (PX) - //first triangle - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - //fourth face (MX) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - //second triangle - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - //fifth face (PY) - //first triangle - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - //sixth face (MY) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, -}; - -//stolen from staplebutt -void GPU_SetDummyTexEnv(u8 num) -{ - GPU_SetTexEnv(num, - GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), - GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), - GPU_TEVOPERANDS(0,0,0), - GPU_TEVOPERANDS(0,0,0), - GPU_REPLACE, - GPU_REPLACE, - 0xFFFFFFFF); -} - -// topscreen -void renderFrame() -{ - GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); - - GPU_DepthMap(-1.0f, 0.0f); - GPU_SetFaceCulling(GPU_CULL_BACK_CCW); - GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); - GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); - GPU_SetBlendingColor(0,0,0,0); - GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); - - GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); - GPUCMD_AddWrite(GPUREG_0118, 0); - - GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); - GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); - - GPU_SetTextureEnable(GPU_TEXUNIT0); - - GPU_SetTexEnv(0, - GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVOPERANDS(0,0,0), - GPU_TEVOPERANDS(0,0,0), - GPU_MODULATE, GPU_MODULATE, - 0xFFFFFFFF); - GPU_SetDummyTexEnv(1); - GPU_SetDummyTexEnv(2); - GPU_SetDummyTexEnv(3); - GPU_SetDummyTexEnv(4); - GPU_SetDummyTexEnv(5); - - //texturing stuff - GPU_SetTexture( - GPU_TEXUNIT0, //texture unit - (u32*)osConvertVirtToPhys((u32)texData), //data buffer - 128, //texture width - 128, //texture height - GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params - GPU_RGBA8 //texture pixel format - ); - - GPU_SetAttributeBuffers( - 3, //3 attributes: vertices, texcoords, and normals - (u32*)osConvertVirtToPhys((u32)texData), //mesh buffer - GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position) - GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord) - GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal) - 0xFFC, - 0x210, - 1, - (u32[]){0x00000000}, - (u64[]){0x210}, - (u8[]){3} - ); - - //setup lighting (this is specific to our shader) - vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); - GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1); - GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1); - - //initialize projection matrix to standard perspective stuff - gsMatrixMode(GS_PROJECTION); - gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f); - gsRotateZ(M_PI/2); //because framebuffer is sideways... - - //draw object - gsMatrixMode(GS_MODELVIEW); - gsPushMatrix(); - gsTranslate(position.x, position.y, position.z); - gsRotateX(angle.x); - gsRotateY(angle.y); - gsVboDraw(&vbo); - gsPopMatrix(); - GPU_FinishDrawing(); -} - -int main(int argc, char** argv) -{ - - gfxInitDefault(); - - //initialize GPU - GPU_Init(NULL); - - //let GFX know we're ok with doing stereoscopic 3D rendering - gfxSet3D(true); - - //allocate our GPU command buffers - //they *have* to be on the linear heap - u32 gpuCmdSize=0x40000; - u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4); - u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4); - - //actually reset the GPU - GPU_Reset(NULL, gpuCmd, gpuCmdSize); - - //load our vertex shader binary - dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); - shaderProgramInit(&shader); - shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); - - //initialize GS - gsInit(&shader); - - // Flush the command buffer so that the shader upload gets executed - GPUCMD_Finalize(); - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //create texture - texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned - memcpy(texData, texture_bin, texture_bin_size); - - //create VBO - gsVboInit(&vbo); - gsVboCreate(&vbo, sizeof(modelVboData)); - gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s)); - gsVboFlushData(&vbo); - - //initialize object position and angle - position=vect3Df(0.0f, 0.0f, -2.0f); - angle=vect3Df(M_PI/4, M_PI/4, 0.0f); - - //background color (blue) - u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF); - - while(aptMainLoop()) - { - //get current 3D slider state - float slider=CONFIG_3D_SLIDERSTATE; - - //controls - hidScanInput(); - //START to exit to hbmenu - if(keysDown()&KEY_START)break; - - //A/B to change vertex lighting angle - if(keysHeld()&KEY_A)lightAngle+=0.1f; - if(keysHeld()&KEY_B)lightAngle-=0.1f; - - //D-PAD to rotate object - if(keysHeld()&KEY_DOWN)angle.x+=0.05f; - if(keysHeld()&KEY_UP)angle.x-=0.05f; - if(keysHeld()&KEY_LEFT)angle.y+=0.05f; - if(keysHeld()&KEY_RIGHT)angle.y-=0.05f; - - //R/L to bring object closer to or move it further from the camera - if(keysHeld()&KEY_R)position.z+=0.1f; - if(keysHeld()&KEY_L)position.z-=0.1f; - - //generate our GPU command buffer for this frame - gsStartFrame(); - renderFrame(); - GPUCMD_Finalize(); - - if(slider>0.0f) - { - //new and exciting 3D ! - //make a copy of left gpu buffer - u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset); - memcpy(gpuCmdRight, gpuCmd, offset*4); - - //setup interaxial - float interaxial=slider*0.12f; - - //adjust left gpu buffer fo 3D ! - {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} - - //draw left framebuffer - GPUCMD_FlushAndRun(NULL); - - //while GPU starts drawing the left buffer, adjust right one for 3D ! - GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset); - {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} - - //we wait for the left buffer to finish drawing - gspWaitForP3D(); - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - - //we draw the right buffer, wait for it to finish and then switch back to left one - //clear the screen - GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); - gspWaitForPSC0(); - - //draw the right framebuffer - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //transfer from GPU output buffer to actual framebuffer - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); - }else{ - //boring old 2D ! - - //draw the frame - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //clear the screen - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - } - - //clear the screen - GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); - gspWaitForPSC0(); - gfxSwapBuffersGpu(); - - gspWaitForEvent(GSPEVENT_VBlank0, true); - } - - gsExit(); - shaderProgramFree(&shader); - DVLB_Free(dvlb); - gfxExit(); - return 0; -} diff --git a/examples/gpu/source/math.c b/examples/gpu/source/math.c deleted file mode 100644 index 13ab3dd..0000000 --- a/examples/gpu/source/math.c +++ /dev/null @@ -1,148 +0,0 @@ -#include -#include - -#include "math.h" - -void loadIdentity44(float* m) -{ - if(!m)return; - - memset(m, 0x00, 16*4); - m[0]=m[5]=m[10]=m[15]=1.0f; -} - -void multMatrix44(float* m1, float* m2, float* m) //4x4 -{ - int i, j; - for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); - -} - -void translateMatrix(float* tm, float x, float y, float z) -{ - float rm[16], m[16]; - - loadIdentity44(rm); - rm[3]=x; - rm[7]=y; - rm[11]=z; - - multMatrix44(tm,rm,m); - memcpy(tm,m,16*sizeof(float)); -} - -// 00 01 02 03 -// 04 05 06 07 -// 08 09 10 11 -// 12 13 14 15 - -void rotateMatrixX(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=1.0f; - rm[5]=cos(x); - rm[6]=sin(x); - rm[9]=-sin(x); - rm[10]=cos(x); - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void rotateMatrixY(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=cos(x); - rm[2]=sin(x); - rm[5]=1.0f; - rm[8]=-sin(x); - rm[10]=cos(x); - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void rotateMatrixZ(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=cos(x); - rm[1]=sin(x); - rm[4]=-sin(x); - rm[5]=cos(x); - rm[10]=1.0f; - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void scaleMatrix(float* tm, float x, float y, float z) -{ - tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x; - tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y; - tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z; -} - -void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far) -{ - float top = near*tan(fovy/2); - float right = (top*aspect); - - float mp[4*4]; - - mp[0x0] = near/right; - mp[0x1] = 0.0f; - mp[0x2] = 0.0f; - mp[0x3] = 0.0f; - - mp[0x4] = 0.0f; - mp[0x5] = near/top; - mp[0x6] = 0.0f; - mp[0x7] = 0.0f; - - mp[0x8] = 0.0f; - mp[0x9] = 0.0f; - mp[0xA] = -(far+near)/(far-near); - mp[0xB] = -2.0f*(far*near)/(far-near); - - mp[0xC] = 0.0f; - mp[0xD] = 0.0f; - mp[0xE] = -1.0f; - mp[0xF] = 0.0f; - - float mp2[4*4]; - loadIdentity44(mp2); - mp2[0xA]=0.5; - mp2[0xB]=-0.5; - - multMatrix44(mp2, mp, m); -} - -vect3Df_s getMatrixColumn(float* m, u8 i) -{ - if(!m || i>=4)return vect3Df(0,0,0); - return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); -} - -vect3Df_s getMatrixRow(float* m, u8 i) -{ - if(!m || i>=4)return vect3Df(0,0,0); - return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); -} - -vect4Df_s getMatrixColumn4(float* m, u8 i) -{ - if(!m || i>=4)return vect4Df(0,0,0,0); - return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); -} - -vect4Df_s getMatrixRow4(float* m, u8 i) -{ - if(!m || i>=4)return vect4Df(0,0,0,0); - return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); -} diff --git a/examples/gpu/source/math.h b/examples/gpu/source/math.h deleted file mode 100644 index 8137b90..0000000 --- a/examples/gpu/source/math.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef MATH_H -#define MATH_H - -#include <3ds/types.h> -#include - -typedef float mtx44[4][4]; -typedef float mtx33[3][3]; - -typedef struct -{ - s32 x, y, z; -}vect3Di_s; - -static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) -{ - return (vect3Di_s){x,y,z}; -} - -static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) -{ - return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; -} - -static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) -{ - return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; -} - -static inline vect3Di_s vmuli(vect3Di_s v, s32 f) -{ - return (vect3Di_s){v.x*f,v.y*f,v.z*f}; -} - -typedef struct -{ - float x, y, z; -}vect3Df_s; - -static inline vect3Df_s vect3Df(float x, float y, float z) -{ - return (vect3Df_s){x,y,z}; -} - -static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) -{ - return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; -} - -static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) -{ - return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; -} - -static inline vect3Df_s vmulf(vect3Df_s v, float f) -{ - return (vect3Df_s){v.x*f,v.y*f,v.z*f}; -} - -static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) -{ - return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; -} - -static inline float vmagf(vect3Df_s v) -{ - return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); -} - -static inline float vdistf(vect3Df_s v1, vect3Df_s v2) -{ - return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); -} - -static inline vect3Df_s vnormf(vect3Df_s v) -{ - const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); - return (vect3Df_s){v.x/l,v.y/l,v.z/l}; -} - -typedef struct -{ - float x, y, z, w; -}vect4Df_s; - -static inline vect4Df_s vect4Df(float x, float y, float z, float w) -{ - return (vect4Df_s){x,y,z,w}; -} - -static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) -{ - return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; -} - -static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) -{ - return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; -} - -static inline vect4Df_s vmulf4(vect4Df_s v, float f) -{ - return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; -} - -static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) -{ - return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; -} - -static inline vect4Df_s vnormf4(vect4Df_s v) -{ - const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); - return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; -} - -//interstuff -static inline vect3Di_s vf2i(vect3Df_s v) -{ - return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; -} - -static inline vect3Df_s vi2f(vect3Di_s v) -{ - return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; -} - -void loadIdentity44(float* m); -void multMatrix44(float* m1, float* m2, float* m); - -void translateMatrix(float* tm, float x, float y, float z); -void rotateMatrixX(float* tm, float x, bool r); -void rotateMatrixY(float* tm, float x, bool r); -void rotateMatrixZ(float* tm, float x, bool r); -void scaleMatrix(float* tm, float x, float y, float z); - -void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); - -vect3Df_s getMatrixColumn(float* m, u8 i); -vect3Df_s getMatrixRow(float* m, u8 i); -vect4Df_s getMatrixColumn4(float* m, u8 i); -vect4Df_s getMatrixRow4(float* m, u8 i); - -#endif diff --git a/examples/gpu/Makefile b/examples/graphics/gpu/geoshader/Makefile similarity index 89% rename from examples/gpu/Makefile rename to examples/graphics/gpu/geoshader/Makefile index 19c9ac9..1e4e9ef 100644 --- a/examples/gpu/Makefile +++ b/examples/graphics/gpu/geoshader/Makefile @@ -75,6 +75,7 @@ export DEPSDIR := $(CURDIR)/$(BUILD) CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) #--------------------------------------------------------------------------------- @@ -91,7 +92,7 @@ else endif #--------------------------------------------------------------------------------- -export OFILES := $(addsuffix .o,$(BINFILES)) \ +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ @@ -156,17 +157,18 @@ $(OUTPUT).elf : $(OFILES) @echo $(notdir $<) @$(bin2o) -# WARNING: This is not the right way to do this! TODO: Do it right! #--------------------------------------------------------------------------------- -%_vsh.h %.vsh.o : %.vsh +# rule for assembling GPU shaders #--------------------------------------------------------------------------------- +%.shbin.o: %.pica @echo $(notdir $<) - @python3 $(AEMSTRO)/aemstro_as.py $< ../$(notdir $<).shbin - @bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@ - @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h - @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h - @echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h - @rm ../$(notdir $<).shbin + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h -include $(DEPENDS) diff --git a/examples/graphics/gpu/geoshader/README.md b/examples/graphics/gpu/geoshader/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/geoshader/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/geoshader/source/3dmath.c b/examples/graphics/gpu/geoshader/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/geoshader/source/3dmath.h b/examples/graphics/gpu/geoshader/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/geoshader/source/gpu.c b/examples/graphics/gpu/geoshader/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/geoshader/source/gpu.h b/examples/graphics/gpu/geoshader/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/geoshader/source/gshader.pica b/examples/graphics/gpu/geoshader/source/gshader.pica new file mode 100644 index 0000000..3b65db3 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gshader.pica @@ -0,0 +1,91 @@ +; Example PICA200 geometry shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones +.alias half myconst.wwww + +; Outputs - this time the type *is* used +.out outpos position +.out outclr color + +; Inputs: we will receive the following inputs: +; v0-v1: position/color of the first vertex +; v2-v3: position/color of the second vertex +; v4-v5: position/color of the third vertex + +.proc main + ; Calculate the midpoints of the vertices + mov r4, v0 + add r4, v2, r4 + mul r4, half, r4 + mov r5, v2 + add r5, v4, r5 + mul r5, half, r5 + mov r6, v4 + add r6, v0, r6 + mul r6, half, r6 + + ; Emit the first triangle + mov r0, v0 + mov r1, r4 + mov r2, r6 + call emit_triangle + + ; Emit the second triangle + mov r0, r4 + mov r1, v2 + mov r2, r5 + call emit_triangle + + ; Emit the third triangle + mov r0, r6 + mov r1, r5 + mov r2, v4 + call emit_triangle + + ; We're finished + end +.end + +.proc emit_triangle + ; Emit the first vertex + setemit 0 + mov r8, r0 + mov r9, v1 + call process_vertex + emit + + ; Emit the second vertex + setemit 1 + mov r8, r1 + mov r9, v3 + call process_vertex + emit + + ; Emit the third vertex and finish the primitive + setemit 2, prim + mov r8, r2 + mov r9, v5 + call process_vertex + emit +.end + +; Subroutine +; Inputs: +; r8: vertex position +; r9: vertex color +.proc process_vertex + ; outpos = projectionMatrix * r8 + dp4 outpos.x, projection[0], r8 + dp4 outpos.y, projection[1], r8 + dp4 outpos.z, projection[2], r8 + dp4 outpos.w, projection[3], r8 + + ; outclr = r9 + mov outclr, r9 +.end diff --git a/examples/graphics/gpu/geoshader/source/main.c b/examples/graphics/gpu/geoshader/source/main.c new file mode 100644 index 0000000..a2c9998 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/main.c @@ -0,0 +1,139 @@ +/* + * ~~ Simple libctru GPU geometry shader example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene using a geoshader. + * The example geoshader receives the vertices of a triangle and emits three + * smaller triangles, thus forming a 'triforce' shape. + */ + +#include "gpu.h" +#include "vshader_shbin.h" +#include "gshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float position[3]; float color[4]; } vertex; + +static const vertex vertex_list[] = +{ + { {200.0f, 200.0f, 0.5f}, {1.0f, 0.0f, 0.0f, 1.0f} }, + { {100.0f, 40.0f, 0.5f}, {0.0f, 1.0f, 0.0f, 1.0f} }, + { {300.0f, 40.0f, 0.5f}, {0.0f, 0.0f, 1.0f, 1.0f} }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s *vshader_dvlb, *gshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static matrix_4x4 projection; + +static void* vbo_data; + +static void sceneInit(void) +{ + // Load the shaders and create a shader program + // The geoshader stride is set to 6 so that it processes a triangle at a time + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + gshader_dvlb = DVLB_ParseFile((u32*)gshader_shbin, gshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + shaderProgramSetGsh(&program, &gshader_dvlb->DVLE[0], 6); + + // Get the location of the projection matrix uniform + uLoc_projection = shaderInstanceGetUniformLocation(program.geometryShader, "projection"); + + // Compute the projection matrix + m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_REPLACE, GPU_REPLACE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 2, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | + GPU_ATTRIBFMT(1, 4, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector) + 0xFFC, // Unused attribute mask, in our case bit 0 is cleared since it is used + 0x10, // Attribute permutations (here it is the identity) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x10 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 2 }); // Number of attributes for each buffer + + // Upload the projection matrix + GPU_SetFloatUniformMatrix(GPU_GEOMETRY_SHADER, uLoc_projection, &projection); + + // Draw the VBO - GPU_UNKPRIM allows the geoshader to control primitive emission + GPU_DrawArray(GPU_UNKPRIM, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); + DVLB_Free(gshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/geoshader/source/vshader.pica b/examples/graphics/gpu/geoshader/source/vshader.pica new file mode 100644 index 0000000..8bcbc22 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/vshader.pica @@ -0,0 +1,24 @@ +; Example PICA200 vertex shader + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs - since we are also using a geoshader the output type isn't really used +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.proc main + ; Pass through both inputs to the geoshader + mov outpos.xyz, inpos + mov outpos.w, ones + mov outclr, inclr + + ; We're finished + end +.end diff --git a/examples/graphics/gpu/simple_tri/Makefile b/examples/graphics/gpu/simple_tri/Makefile new file mode 100644 index 0000000..1e4e9ef --- /dev/null +++ b/examples/graphics/gpu/simple_tri/Makefile @@ -0,0 +1,177 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -fomit-frame-pointer -ffast-math \ + $(ARCH) + +CFLAGS += $(INCLUDE) -DARM11 -D_3DS + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ + $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +.PHONY: $(BUILD) clean all + +#--------------------------------------------------------------------------------- +all: $(BUILD) + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf + + +#--------------------------------------------------------------------------------- +else + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +ifeq ($(strip $(NO_SMDH)),) +$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh +else +$(OUTPUT).3dsx : $(OUTPUT).elf +endif + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rule for assembling GPU shaders +#--------------------------------------------------------------------------------- +%.shbin.o: %.pica + @echo $(notdir $<) + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/examples/graphics/gpu/simple_tri/README.md b/examples/graphics/gpu/simple_tri/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/simple_tri/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/simple_tri/source/3dmath.c b/examples/graphics/gpu/simple_tri/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/simple_tri/source/3dmath.h b/examples/graphics/gpu/simple_tri/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/simple_tri/source/gpu.c b/examples/graphics/gpu/simple_tri/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/simple_tri/source/gpu.h b/examples/graphics/gpu/simple_tri/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/simple_tri/source/main.c b/examples/graphics/gpu/simple_tri/source/main.c new file mode 100644 index 0000000..d82cc12 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/main.c @@ -0,0 +1,131 @@ +/* + * ~~ Simple libctru GPU triangle example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene consisting of a white solid triangle. + */ + +#include "gpu.h" +#include "vshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float x, y, z; } vertex; + +static const vertex vertex_list[] = +{ + { 200.0f, 200.0f, 0.5f }, + { 100.0f, 40.0f, 0.5f }, + { 300.0f, 40.0f, 0.5f }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static matrix_4x4 projection; + +static void* vbo_data; + +static void sceneInit(void) +{ + // Load the vertex shader and create a shader program + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + + // Get the location of the projection matrix uniform + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Compute the projection matrix + m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_REPLACE, GPU_REPLACE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 1, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector) + 0xFFE, // Unused attribute mask, in our case bit 0 is cleared since it is used + 0x0, // Attribute permutations (here it is the identity) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x0 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 1 }); // Number of attributes for each buffer + + // Upload the projection matrix + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the VBO + GPU_DrawArray(GPU_TRIANGLES, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/simple_tri/source/vshader.pica b/examples/graphics/gpu/simple_tri/source/vshader.pica new file mode 100644 index 0000000..cdd9759 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/vshader.pica @@ -0,0 +1,34 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; outclr = solid white color + mov outclr, ones + + ; We're finished + end +.end diff --git a/examples/graphics/gpu/textured_cube/Makefile b/examples/graphics/gpu/textured_cube/Makefile new file mode 100644 index 0000000..1e4e9ef --- /dev/null +++ b/examples/graphics/gpu/textured_cube/Makefile @@ -0,0 +1,177 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -fomit-frame-pointer -ffast-math \ + $(ARCH) + +CFLAGS += $(INCLUDE) -DARM11 -D_3DS + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ + $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +.PHONY: $(BUILD) clean all + +#--------------------------------------------------------------------------------- +all: $(BUILD) + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf + + +#--------------------------------------------------------------------------------- +else + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +ifeq ($(strip $(NO_SMDH)),) +$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh +else +$(OUTPUT).3dsx : $(OUTPUT).elf +endif + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rule for assembling GPU shaders +#--------------------------------------------------------------------------------- +%.shbin.o: %.pica + @echo $(notdir $<) + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/examples/graphics/gpu/textured_cube/README.md b/examples/graphics/gpu/textured_cube/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/textured_cube/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/textured_cube/data/kitten.bin b/examples/graphics/gpu/textured_cube/data/kitten.bin new file mode 100644 index 0000000..a87ac4a Binary files /dev/null and b/examples/graphics/gpu/textured_cube/data/kitten.bin differ diff --git a/examples/graphics/gpu/textured_cube/source/3dmath.c b/examples/graphics/gpu/textured_cube/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/textured_cube/source/3dmath.h b/examples/graphics/gpu/textured_cube/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/textured_cube/source/gpu.c b/examples/graphics/gpu/textured_cube/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/textured_cube/source/gpu.h b/examples/graphics/gpu/textured_cube/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/textured_cube/source/main.c b/examples/graphics/gpu/textured_cube/source/main.c new file mode 100644 index 0000000..d8274b7 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/main.c @@ -0,0 +1,244 @@ +/* + * ~~ Simple libctru GPU textured cube example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene consisting of a rotating + * textured cube which is also shaded using a simple shading algorithm. + * The shading algorithm is explained in the vertex shader source code. + */ + +#include "gpu.h" +#include "vshader_shbin.h" +#include "kitten_bin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float position[3]; float texcoord[2]; float normal[3]; } vertex; + +static const vertex vertex_list[] = +{ + // First face (PZ) + // First triangle + { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + { {+0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + { {-0.5f, +0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + + // Second face (MZ) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + { {-0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + // Second triangle + { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + { {+0.5f, -0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + + // Third face (PX) + // First triangle + { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + + // Fourth face (MX) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + // Second triangle + { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + + // Fifth face (PY) + // First triangle + { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + { {-0.5f, +0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + { {+0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + + // Sixth face (MY) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, + { {+0.5f, -0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, + { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + // Second triangle + { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + { {-0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection, uLoc_modelView; +static int uLoc_lightVec, uLoc_lightHalfVec, uLoc_lightClr, uLoc_material; +static matrix_4x4 projection; +static matrix_4x4 material = +{ + { + { { 0.0f, 0.2f, 0.2f, 0.2f } }, // Ambient + { { 0.0f, 0.4f, 0.4f, 0.4f } }, // Diffuse + { { 0.0f, 0.8f, 0.8f, 0.8f } }, // Specular + { { 1.0f, 0.0f, 0.0f, 0.0f } }, // Emission + } +}; + +static void* vbo_data; +static void* tex_data; +static float angleX = 0.0, angleY = 0.0; + +static void sceneInit(void) +{ + // Load the vertex shader and create a shader program + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + uLoc_modelView = shaderInstanceGetUniformLocation(program.vertexShader, "modelView"); + uLoc_lightVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightVec"); + uLoc_lightHalfVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightHalfVec"); + uLoc_lightClr = shaderInstanceGetUniformLocation(program.vertexShader, "lightClr"); + uLoc_material = shaderInstanceGetUniformLocation(program.vertexShader, "material"); + + // Compute the projection matrix + m4x4_persp_tilt(&projection, 80.0f*M_PI/180.0f, 400.0f/240.0f, 0.01f, 1000.0f); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); + + // Load the texture + tex_data = linearAlloc(kitten_bin_size); + memcpy(tex_data, kitten_bin, kitten_bin_size); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to blend the texture color with + // the vertex color (calculated by the vertex shader using a lighting algorithm) + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_MODULATE, GPU_MODULATE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the first texture unit + GPU_SetTextureEnable(GPU_TEXUNIT0); + GPU_SetTexture( + GPU_TEXUNIT0, + (u32*)osConvertVirtToPhys((u32)tex_data), + 64, // Width + 64, // Height + GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_REPEAT) | GPU_TEXTURE_WRAP_T(GPU_REPEAT), // Flags + GPU_RGBA8 // Pixel format + ); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 3, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // Format of the inputs + GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | + GPU_ATTRIBFMT(2, 3, GPU_FLOAT), + 0xFFC, // Unused attribute mask, in our case bits 0~2 are cleared since they are used + 0x210, // Attribute permutations (here it is the identity, passing each attribute in order) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x210 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 3 }); // Number of attributes for each buffer + + // Calculate the modelView matrix + matrix_4x4 modelView; + m4x4_identity(&modelView); + m4x4_translate(&modelView, 0.0, 0.0, -2.0 + 0.5*sinf(angleX)); + m4x4_rotate_x(&modelView, angleX, true); + m4x4_rotate_y(&modelView, angleY, true); + + // Rotate the cube each frame + angleX += M_PI / 180; + angleY += M_PI / 360; + + // Upload the uniforms + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection); + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_modelView, &modelView); + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_material, &material); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightHalfVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightClr, (u32*)(float[]){1.0f, 1.0f, 1.0f, 1.0f}, 1); + + // Draw the VBO + GPU_DrawArray(GPU_TRIANGLES, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the texture + linearFree(tex_data); + + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/textured_cube/source/vshader.pica b/examples/graphics/gpu/textured_cube/source/vshader.pica new file mode 100644 index 0000000..0f0ac5d --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/vshader.pica @@ -0,0 +1,90 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4], modelView[4] +.fvec lightVec, lightHalfVec, lightClr, material[4] +.alias mat_amb material[0] +.alias mat_dif material[1] +.alias mat_spe material[2] +.alias mat_emi material[3] + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs +.out outpos position +.out outtc0 texcoord0 +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias intex v1 +.alias innrm v2 + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; r1 = modelView * inpos + dp4 r1.x, modelView[0], r0 + dp4 r1.y, modelView[1], r0 + dp4 r1.z, modelView[2], r0 + dp4 r1.w, modelView[3], r0 + + ; outpos = projection * r1 + dp4 outpos.x, projection[0], r1 + dp4 outpos.y, projection[1], r1 + dp4 outpos.z, projection[2], r1 + dp4 outpos.w, projection[3], r1 + + ; outtex = intex + mov outtc0, intex + + ; Transform the normal vector with the modelView matrix + ; r1 = normalize(modelView * innrm) + mov r0.xyz, innrm + mov r0.w, zeros + dp4 r1.x, modelView[0], r0 + dp4 r1.y, modelView[1], r0 + dp4 r1.z, modelView[2], r0 + mov r1.w, zeros + dp3 r2, r1, r1 ; r2 = x^2+y^2+z^2 for each component + rsq r2, r2 ; r2 = 1/sqrt(r2) '' + mul r1, r2, r1 ; r1 = r1*r2 + + ; Calculate the diffuse level (r0.x) and the shininess level (r0.y) + ; r0.x = max(0, -(lightVec * r1)) + ; r0.y = max(0, (-lightHalfVec[i]) * r1) ^ 2 + dp3 r0.x, lightVec, r1 + add r0.x, zeros, -r0 + dp3 r0.y, -lightHalfVec, r1 + max r0, zeros, r0 + mul r0.y, r0, r0 + + ; Accumulate the vertex color in r1, initializing it to the emission color + mov r1, mat_emi + + ; r1 += specularColor * lightClr * shininessLevel + mul r2, lightClr, r0.yyyy + mul r2, mat_spe, r2 + add r1, r2, r1 + + ; r1 += diffuseColor * lightClr * diffuseLevel + mul r2, lightClr, r0.xxxx + mul r2, mat_dif, r2 + add r1, r2, r1 + + ; r1 += ambientColor * lightClr + mov r2, lightClr + mul r2, mat_amb, r2 + add r1, r2, r1 + + ; outclr = clamp r1 to [0,1] + min outclr, ones, r1 + + ; We're finished + end +.end