diff --git a/examples/gpu/data/test.vsh b/examples/gpu/data/test.vsh index 686b22b..0da0e64 100644 --- a/examples/gpu/data/test.vsh +++ b/examples/gpu/data/test.vsh @@ -1,7 +1,7 @@ -; make sure you update aemstro_as for this (27/05/14) +; make sure you update aemstro_as for this (15/11/14) ; setup constants - .const 5, 0.0, 0.0, -0.99, 1.0 + .const 20, 1.0, 0.0, 0.5, 1.0 ; setup outmap .out o0, result.position @@ -9,35 +9,37 @@ .out o2, result.texcoord0 .out o3, result.texcoord1 .out o4, result.texcoord2 - -; setup uniform map (not required) - .uniform 0x10, 0x13, mdlvMtx - .uniform 0x14, 0x17, projMtx + +; setup uniform map (required to use SHDR_GetUniformRegister) + .uniform 0, 3, projection ; c0-c3 = projection matrix + .uniform 4, 7, modelview ; c4-c7 = modelview matrix + .uniform 8, 8, lightDirection ; c8 = light direction vector + .uniform 9, 9, lightAmbient ; c9 = light ambient color ;code main: - mov d1A, d00 (0x4) - mov d1A, d25 (0x3) + mov r1, v0 (0x6) + mov r1, c20 (0x3) ; tempreg = mdlvMtx * in.pos - dp4 d10, d44, d1A (0x0) - dp4 d10, d45, d1A (0x1) - dp4 d10, d46, d1A (0x2) - mov d10, d25 (0x3) + dp4 r0, c4, r1 (0x0) + dp4 r0, c5, r1 (0x1) + dp4 r0, c6, r1 (0x2) + mov r0, c20 (0x3) ; result.pos = projMtx * tempreg - dp4 d00, d40, d10 (0x0) - dp4 d00, d41, d10 (0x1) - dp4 d00, d42, d10 (0x2) - dp4 d00, d43, d10 (0x3) + dp4 o0, c0, r0 (0x0) + dp4 o0, c1, r0 (0x1) + dp4 o0, c2, r0 (0x2) + dp4 o0, c3, r0 (0x3) ; result.texcoord = in.texcoord - mov d02, d01 (0x5) - mov d03, d25 (0x7) - mov d04, d25 (0x7) + mov o2, v1 (0x5) + mov o3, c20 (0x7) + mov o4, c20 (0x7) ; result.color = crappy lighting - dp3 d1A, d44, d02 (0x0) - dp3 d1A, d45, d02 (0x1) - dp3 d1A, d46, d02 (0x2) - dp4 d01, d00, d1A (0x6) - mov d01, d25 (0x3) + dp3 r0, c8, v2 (0x6) + max r0, c20, r0 (0x4) + mul r0, c9, r0 (0x8) + add o1, c9, r0 (0x6) + mov o1, c20 (0x3) flush end endmain: @@ -47,8 +49,8 @@ .opdesc _y__, xyzw, xyzw ; 0x1 .opdesc __z_, xyzw, xyzw ; 0x2 .opdesc ___w, xyzw, xyzw ; 0x3 - .opdesc xyz_, xyzw, xyzw ; 0x4 + .opdesc xyz_, yyyy, xyzw ; 0x4 .opdesc xyzw, xyzw, xyzw ; 0x5 .opdesc xyz_, xyzw, xyzw ; 0x6 .opdesc xyzw, yyyw, xyzw ; 0x7 - .opdesc xyzw, wwww, wwww ; 0x8 + .opdesc xyz_, wwww, xyzw ; 0x8 diff --git a/examples/gpu/data/texture.bin b/examples/gpu/data/texture.bin new file mode 100644 index 0000000..4a3312b Binary files /dev/null and b/examples/gpu/data/texture.bin differ diff --git a/examples/gpu/source/_gs.s b/examples/gpu/source/_gs.s new file mode 100644 index 0000000..1b9c92b --- /dev/null +++ b/examples/gpu/source/_gs.s @@ -0,0 +1,16 @@ +.section ".text" +.arm +.align 4 +.global _vboMemcpy50 + +# r0 : dst +# r1 : src +# fixed size 0x50 +_vboMemcpy50: + push {r4-r11} + ldmia r1!, {r2-r12} + stmia r0!, {r2-r12} + ldmia r1!, {r2-r12} + stmia r0!, {r2-r12} + pop {r4-r11} + bx lr diff --git a/examples/gpu/source/gs.c b/examples/gpu/source/gs.c new file mode 100644 index 0000000..4eabd45 --- /dev/null +++ b/examples/gpu/source/gs.c @@ -0,0 +1,431 @@ +#include +#include +#include +#include <3ds.h> + +#include "gs.h" +#include "math.h" + +#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) + +static void gsInitMatrixStack(); + +Handle linearAllocMutex; + +static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; + +typedef struct +{ + u32 offset; + mtx44 data; +}bufferMatrix_s; + +bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; +int bufferMatrixListLength; + +//---------------------- +// GS SYSTEM STUFF +//---------------------- + +void initBufferMatrixList() +{ + bufferMatrixListLength=0; +} + +void gsInit(DVLB_s* shader) +{ + gsInitMatrixStack(); + initBufferMatrixList(); + svcCreateMutex(&linearAllocMutex, false); + if(shader) + { + gsMatrixStackRegisters[0]=SHDR_GetUniformRegister(shader, "projection", 0); + gsMatrixStackRegisters[1]=SHDR_GetUniformRegister(shader, "modelview", 0); + } +} + +void gsExit(void) +{ + svcCloseHandle(linearAllocMutex); +} + +void gsStartFrame(void) +{ + GPUCMD_SetBufferOffset(0); + initBufferMatrixList(); +} + +void* gsLinearAlloc(size_t size) +{ + void* ret=NULL; + + svcWaitSynchronization(linearAllocMutex, U64_MAX); + ret=linearAlloc(size); + svcReleaseMutex(linearAllocMutex); + + return ret; +} + +void gsLinearFree(void* mem) +{ + svcWaitSynchronization(linearAllocMutex, U64_MAX); + linearFree(mem); + svcReleaseMutex(linearAllocMutex); +} + +//---------------------- +// MATRIX STACK STUFF +//---------------------- + +static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; +static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; +static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; +static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; +static GS_MATRIX gsCurrentMatrixType; + +static void gsInitMatrixStack() +{ + int i; + for(i=0; i=GS_MATRIXTYPES)return NULL; + + return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; +} + +int gsLoadMatrix(GS_MATRIX m, float* data) +{ + if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; + + memcpy(gsGetMatrix(m), data, sizeof(mtx44)); + + gsMatrixStackUpdated[m]=true; + + return 0; +} + +int gsPushMatrix() +{ + const GS_MATRIX m=gsCurrentMatrixType; + if(m<0 || m>=GS_MATRIXTYPES)return -1; + if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; + + float* cur=gsGetMatrix(m); + gsMatrixStackOffsets[m]++; + memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); + + return 0; +} + +int gsPopMatrix() +{ + const GS_MATRIX m=gsCurrentMatrixType; + if(m<0 || m>=GS_MATRIXTYPES)return -1; + if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; + + gsMatrixStackOffsets[m]--; + + gsMatrixStackUpdated[m]=true; + + return 0; +} + +int gsMatrixMode(GS_MATRIX m) +{ + if(m<0 || m>=GS_MATRIXTYPES)return -1; + + gsCurrentMatrixType=m; + + return 0; +} + +//------------------------ +// MATRIX TRANSFORM STUFF +//------------------------ + +int gsMultMatrix(float* data) +{ + if(!data)return -1; + + mtx44 tmp; + multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); + memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); + + gsMatrixStackUpdated[gsCurrentMatrixType]=true; + + return 0; +} + +void gsLoadIdentity() +{ + loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsProjectionMatrix(float fovy, float aspect, float near, float far) +{ + initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateX(float x) +{ + rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateY(float y) +{ + rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateZ(float z) +{ + rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsScale(float x, float y, float z) +{ + scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsTranslate(float x, float y, float z) +{ + translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +//---------------------- +// MATRIX RENDER STUFF +//---------------------- + +static void gsSetUniformMatrix(u32 startreg, float* m) +{ + float param[16]; + + param[0x0]=m[3]; //w + param[0x1]=m[2]; //z + param[0x2]=m[1]; //y + param[0x3]=m[0]; //x + + param[0x4]=m[7]; + param[0x5]=m[6]; + param[0x6]=m[5]; + param[0x7]=m[4]; + + param[0x8]=m[11]; + param[0x9]=m[10]; + param[0xa]=m[9]; + param[0xb]=m[8]; + + param[0xc]=m[15]; + param[0xd]=m[14]; + param[0xe]=m[13]; + param[0xf]=m[12]; + + GPU_SetUniform(startreg, (u32*)param, 4); +} + +static int gsUpdateTransformation() +{ + GS_MATRIX m; + for(m=0; mdata=NULL; + vbo->currentSize=0; + vbo->maxSize=0; + vbo->commands=NULL; + vbo->commandsSize=0; + + return 0; +} + +int gsVboCreate(gsVbo_s* vbo, u32 size) +{ + if(!vbo)return -1; + + vbo->data=gsLinearAlloc(size); + vbo->numVertices=0; + vbo->currentSize=0; + vbo->maxSize=size; + + return 0; +} + +void* gsVboGetOffset(gsVbo_s* vbo) +{ + if(!vbo)return NULL; + + return (void*)(&((u8*)vbo->data)[vbo->currentSize]); +} + +int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) +{ + if(!vbo || !data || !size)return -1; + if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; + + memcpy(gsVboGetOffset(vbo), data, size); + vbo->currentSize+=size; + vbo->numVertices+=units; + + return 0; +} + +int gsVboFlushData(gsVbo_s* vbo) +{ + if(!vbo)return -1; + + //unnecessary if we use flushAndRun + // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); + + return 0; +} + +int gsVboDestroy(gsVbo_s* vbo) +{ + if(!vbo)return -1; + + if(vbo->commands)free(vbo->commands); + if(vbo->data)gsLinearFree(vbo->data); + gsVboInit(vbo); + + return 0; +} + +extern u32 debugValue[]; + +void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) +{ + //set attribute buffer address + GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); + //set primitive type + GPUCMD_AddSingleParam(0x0002025E, primitive); + GPUCMD_AddSingleParam(0x0002025F, 0x00000001); + //index buffer not used for drawArrays but 0x000F0227 still required + GPUCMD_AddSingleParam(0x000F0227, 0x80000000); + //pass number of vertices + GPUCMD_AddSingleParam(0x000F0228, n); + + GPUCMD_AddSingleParam(0x00010253, 0x00000001); + + GPUCMD_AddSingleParam(0x00010245, 0x00000000); + GPUCMD_AddSingleParam(0x000F022E, 0x00000001); + GPUCMD_AddSingleParam(0x00010245, 0x00000001); + GPUCMD_AddSingleParam(0x000F0231, 0x00000001); + + // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff +} + +//not thread safe +int gsVboPrecomputeCommands(gsVbo_s* vbo) +{ + if(!vbo || vbo->commands)return -1; + + static u32 tmpBuffer[128]; + + u32* savedAdr; u32 savedSize, savedOffset; + GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); + GPUCMD_SetBuffer(tmpBuffer, 128, 0); + + GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); + + GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); + vbo->commands=memalign(0x4, vbo->commandsSize*4); + if(!vbo->commands)return -1; + memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); + + GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); + + return 0; +} + +extern u32* gpuCmdBuf; +extern u32 gpuCmdBufSize; +extern u32 gpuCmdBufOffset; + +void _vboMemcpy50(u32* dst, u32* src); + +void _GPUCMD_AddRawCommands(u32* cmd, u32 size) +{ + if(!cmd || !size)return; + + if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); + else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); + gpuCmdBufOffset+=size; +} + +int gsVboDraw(gsVbo_s* vbo) +{ + if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; + + gsUpdateTransformation(); + + gsVboPrecomputeCommands(vbo); + + // u64 val=svcGetSystemTick(); + if(vbo->commands) + { + _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); + }else{ + GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); + } + // debugValue[5]+=(u32)(svcGetSystemTick()-val); + // debugValue[6]++; + + return 0; +} diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h new file mode 100644 index 0000000..6976fca --- /dev/null +++ b/examples/gpu/source/gs.h @@ -0,0 +1,59 @@ +#ifndef GS_H +#define GS_H + +#include <3ds.h> +#include "math.h" + +#define GS_MATRIXSTACK_SIZE (8) + +typedef enum +{ + GS_PROJECTION = 0, + GS_MODELVIEW = 1, + GS_MATRIXTYPES +}GS_MATRIX; + +typedef struct +{ + u8* data; + u32 currentSize; // in bytes + u32 maxSize; // in bytes + u32 numVertices; + u32* commands; + u32 commandsSize; +}gsVbo_s; + + +void gsInit(DVLB_s* shader); +void gsExit(void); + +void gsStartFrame(void); +void gsAdjustBufferMatrices(mtx44 transformation); + +void* gsLinearAlloc(size_t size); +void gsLinearFree(void* mem); + +float* gsGetMatrix(GS_MATRIX m); +int gsLoadMatrix(GS_MATRIX m, float* data); +int gsPushMatrix(); +int gsPopMatrix(); +int gsMatrixMode(GS_MATRIX m); + +void gsLoadIdentity(); +void gsProjectionMatrix(float fovy, float aspect, float near, float far); +void gsRotateX(float x); +void gsRotateY(float y); +void gsRotateZ(float z); +void gsScale(float x, float y, float z); +void gsTranslate(float x, float y, float z); +int gsMultMatrix(float* data); + +int gsVboInit(gsVbo_s* vbo); +int gsVboCreate(gsVbo_s* vbo, u32 size); +int gsVboFlushData(gsVbo_s* vbo); +int gsVboDestroy(gsVbo_s* vbo); +int gsVboDraw(gsVbo_s* vbo); +void* gsVboGetOffset(gsVbo_s* vbo); +int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); + +#endif diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index c059540..17a4bee 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -1,201 +1,325 @@ +/////////////////////////////////////// +// GPU example // +/////////////////////////////////////// + +//this example is meant to show how to use the GPU to render a 3D object +//it also shows how to do stereoscopic 3D +//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft +//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited. + #include #include #include -#include #include <3ds.h> + #include "math.h" +#include "gs.h" + #include "test_vsh_shbin.h" -#include "test_png_bin.h" -#include "mdl.h" +#include "texture_bin.h" +//will be moved into ctrulib at some point +#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080) + +#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) + +//shader structure DVLB_s* shader; -float* vertArray; +//texture data pointer u32* texData; +//vbo structure +gsVbo_s vbo; -void setUniformMatrix(u32 startreg, float* m) -{ - float param[16]; - - param[0x0]=m[3]; //w - param[0x1]=m[2]; //z - param[0x2]=m[1]; //y - param[0x3]=m[0]; //x - - param[0x4]=m[7]; - param[0x5]=m[6]; - param[0x6]=m[5]; - param[0x7]=m[4]; - - param[0x8]=m[11]; - param[0x9]=m[10]; - param[0xa]=m[9]; - param[0xb]=m[8]; - - param[0xc]=m[15]; - param[0xd]=m[14]; - param[0xe]=m[13]; - param[0xf]=m[12]; - - GPU_SetUniform(startreg, (u32*)param, 4); -} - -float angle=0.0f; -float angleZ=0.0f; -float tx, ty, tz; - +//GPU framebuffer address u32* gpuOut=(u32*)0x1F119400; +//GPU depth buffer address u32* gpuDOut=(u32*)0x1F370800; -// topscreen -void doFrame1() +//angle for the vertex lighting (cf test.vsh) +float lightAngle; +//object position and rotation angle +vect3Df_s position, angle; + +//vertex structure +typedef struct { - //general setup - GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); + vect3Df_s position; + float texcoord[2]; + vect3Df_s normal; +}vertex_s; - GPU_DepthRange(-1.0f, 0.0f); +//object data (cube) +//obviously this doesn't have to be defined manually, but we will here for the purposes of the example +//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z} +//we're drawing triangles so three lines = one triangle +const vertex_s modelVboData[]= +{ + //first face (PZ) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + //second face (MZ) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + //third face (PX) + //first triangle + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + //fourth face (MX) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + //second triangle + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + //fifth face (PY) + //first triangle + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + //sixth face (MY) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, +}; - GPU_SetFaceCulling(GPU_CULL_BACK_CCW); - GPU_SetStencilTest(false, GPU_ALWAYS, 0x00); - GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); - - // ? - GPUCMD_AddSingleParam(0x00010062, 0x00000000); //param always 0x0 according to code - GPUCMD_AddSingleParam(0x000F0118, 0x00000000); +//stolen from staplebutt +void GPU_SetDummyTexEnv(u8 num) +{ + GPU_SetTexEnv(num, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0,0,0), + GPU_TEVOPERANDS(0,0,0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} +// topscreen +void renderFrame() +{ + GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); + + GPU_DepthRange(-1.0f, 0.0f); + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + GPUCMD_AddSingleParam(0x00010062, 0); + GPUCMD_AddSingleParam(0x000F0118, 0); + //setup shader - SHDR_UseProgram(shader, 0); + SHDR_UseProgram(shader, 0); + + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + GPU_SetTextureEnable(GPU_TEXUNIT0); + + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), + GPU_TEVOPERANDS(0,0,0), + GPU_TEVOPERANDS(0,0,0), + GPU_MODULATE, GPU_MODULATE, + 0xFFFFFFFF); + GPU_SetDummyTexEnv(1); + GPU_SetDummyTexEnv(2); + GPU_SetDummyTexEnv(3); + GPU_SetDummyTexEnv(4); + GPU_SetDummyTexEnv(5); - //attribute buffers - GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)vertArray), + //texturing stuff + GPU_SetTexture(GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texData),128,128,GPU_TEXTURE_MAG_FILTER(GPU_NEAREST)|GPU_TEXTURE_MIN_FILTER(GPU_NEAREST),GPU_RGBA8); + GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)texData), GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT), 0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3}); - //? - GPUCMD_AddSingleParam(0x000F0100, 0x00E40100); - GPUCMD_AddSingleParam(0x000F0101, 0x01010000); - GPUCMD_AddSingleParam(0x000F0104, 0x00000010); - - //texturing stuff - GPUCMD_AddSingleParam(0x0002006F, 0x00000100); - GPUCMD_AddSingleParam(0x000F0080, 0x00011001); //enables/disables texturing - - //texenv - GPU_SetTexEnv(3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); - GPU_SetTexEnv(4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); - GPU_SetTexEnv(5, GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVOPERANDS(0,0,0), GPU_TEVOPERANDS(0,0,0), GPU_MODULATE, GPU_MODULATE, 0xFFFFFFFF); + //setup lighting (this is specific to our shader) + vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); + GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightDirection", 0), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 4); + GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightAmbient", 0), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 4); - //texturing stuff - GPU_SetTexture((u32*)osConvertVirtToPhys((u32)texData),256,256,0x6,GPU_RGBA8); + //initialize projection matrix to standard perspective stuff + gsMatrixMode(GS_PROJECTION); + gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f); + gsRotateZ(M_PI/2); //because framebuffer is sideways... - //setup matrices - float modelView[16]; - float projection[16]; - - loadIdentity44(modelView); - loadIdentity44(projection); - - translateMatrix(modelView, tx, ty, tz); - rotateMatrixX(modelView, angle); - rotateMatrixZ(modelView, angleZ); - - initProjectionMatrix(projection, 1.3962634f, 240.0f/400.0f, 0.01f, 10.0f); - - setUniformMatrix(0x24, modelView); - setUniformMatrix(0x20, projection); - - //draw first model - GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); - // GPU_DrawElements(GPU_TRIANGLES, (u32*)(((u32)((void*)indArray-(void*)gspHeap))+0x20000000-base), 6); - - //setup matrices - loadIdentity44(modelView); - loadIdentity44(projection); - - translateMatrix(modelView, tx, -ty, tz); - rotateMatrixX(modelView, -angle); - rotateMatrixZ(modelView, -angleZ); - - setUniformMatrix(0x24, modelView); - - //draw second - GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); - - //finalize stuff ? - GPU_FinishDrawing(); + //draw object + gsMatrixMode(GS_MODELVIEW); + gsPushMatrix(); + gsTranslate(position.x, position.y, position.z); + gsRotateX(angle.x); + gsRotateY(angle.y); + gsVboDraw(&vbo); + gsPopMatrix(); + GPU_FinishDrawing(); } -void demoControls(void) -{ - hidScanInput(); - u32 PAD=hidKeysHeld(); - - if(PAD&KEY_UP)tx+=0.1f; - if(PAD&KEY_DOWN)tx-=0.1f; - - if(PAD&KEY_LEFT)ty+=0.1f; - if(PAD&KEY_RIGHT)ty-=0.1f; - - if(PAD&KEY_R)tz+=0.1f; - if(PAD&KEY_L)tz-=0.1f; - - if(PAD&KEY_A)angle+=0.1f; - if(PAD&KEY_Y)angle-=0.1f; - - if(PAD&KEY_X)angleZ+=0.1f; - if(PAD&KEY_B)angleZ-=0.1f; -} - -extern u32* gxCmdBuf; - -int main() +int main(int argc, char** argv) { + //setup services srvInit(); aptInit(); gfxInit(); hidInit(NULL); - + + //initialize GPU GPU_Init(NULL); + //let GFX know we're ok with doing stereoscopic 3D rendering + gfxSet3D(true); + + //load our vertex shader binary + shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin, test_vsh_shbin_size); + + //initialize GS + gsInit(shader); + + //allocate our GPU command buffers + //they *have* to be on the linear heap u32 gpuCmdSize=0x40000; u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4); + u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4); - GPU_Reset(gxCmdBuf, gpuCmd, gpuCmdSize); + //actually reset the GPU + GPU_Reset(NULL, gpuCmd, gpuCmdSize); - vertArray=(float*)linearAlloc(0x100000); - texData=(u32*)linearAlloc(0x100000); + //create texture + texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned + memcpy(texData, texture_bin, texture_bin_size); - memcpy(texData, test_png_bin, test_png_bin_size); - memcpy(vertArray, mdlData, sizeof(mdlData)); - GSPGPU_FlushDataCache(NULL, mdlData, sizeof(mdlData)); - GSPGPU_FlushDataCache(NULL, test_png_bin, test_png_bin_size); + //create VBO + gsVboInit(&vbo); + gsVboCreate(&vbo, sizeof(modelVboData)); + gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s)); + gsVboFlushData(&vbo); - tx=ty=0.0f; tz=-0.1f; - shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin,test_vsh_shbin_size); + //initialize object position and angle + position=vect3Df(0.0f, 0.0f, -2.0f); + angle=vect3Df(M_PI/4, M_PI/4, 0.0f); - GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); - gspWaitForPSC0(); - gfxSwapBuffersGpu(); + //background color (blue) + u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF); while(aptMainLoop()) { - demoControls(); + //get current 3D slider state + float slider=CONFIG_3D_SLIDERSTATE; - GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); - gspWaitForPSC0(); + //controls + hidScanInput(); + //START to exit to hbmenu + if(keysDown()&KEY_START)break; - GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); - doFrame1(); + //A/B to change vertex lighting angle + if(keysHeld()&KEY_A)lightAngle+=0.1f; + if(keysHeld()&KEY_B)lightAngle-=0.1f; + + //D-PAD to rotate object + if(keysHeld()&KEY_RIGHT)angle.x+=0.05f; + if(keysHeld()&KEY_LEFT)angle.x-=0.05f; + if(keysHeld()&KEY_UP)angle.y+=0.05f; + if(keysHeld()&KEY_DOWN)angle.y-=0.05f; + + //R/L to bring object closer to or move it further from the camera + if(keysHeld()&KEY_R)position.z+=0.1f; + if(keysHeld()&KEY_L)position.z-=0.1f; + + //generate our GPU command buffer for this frame + gsStartFrame(); + renderFrame(); GPUCMD_Finalize(); - GPUCMD_Run(gxCmdBuf); - gspWaitForP3D(); + if(slider>0.0f) + { + //new and exciting 3D ! + //make a copy of left gpu buffer + u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset); + memcpy(gpuCmdRight, gpuCmd, offset*4); + + //setup interaxial + float interaxial=slider*0.12f; + + //adjust left gpu buffer fo 3D ! + {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} + + //draw left framebuffer + GPUCMD_FlushAndRun(NULL); + + //while GPU starts drawing the left buffer, adjust right one for 3D ! + GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset); + {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} + + //we wait for the left buffer to finish drawing + gspWaitForP3D(); + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + + //we draw the right buffer, wait for it to finish and then switch back to left one + //clear the screen + GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + gspWaitForPSC0(); + + //draw the right framebuffer + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); + + //transfer from GPU output buffer to actual framebuffer + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); + }else{ + //boring old 2D ! + + //draw the frame + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); + + //clear the screen + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + } + + //clear the screen + GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + gspWaitForPSC0(); gfxSwapBuffersGpu(); - GX_SetDisplayTransfer(gxCmdBuf, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); - gspWaitForPPF(); - gspWaitForVBlank(); + + gspWaitForEvent(GSPEVENT_VBlank0, true); } + gsExit(); hidExit(); gfxExit(); aptExit(); diff --git a/examples/gpu/source/math.c b/examples/gpu/source/math.c index 9c0977a..13ab3dd 100644 --- a/examples/gpu/source/math.c +++ b/examples/gpu/source/math.c @@ -15,6 +15,7 @@ void multMatrix44(float* m1, float* m2, float* m) //4x4 { int i, j; for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); + } void translateMatrix(float* tm, float x, float y, float z) @@ -26,11 +27,16 @@ void translateMatrix(float* tm, float x, float y, float z) rm[7]=y; rm[11]=z; - multMatrix44(rm,tm,m); + multMatrix44(tm,rm,m); memcpy(tm,m,16*sizeof(float)); } -void rotateMatrixX(float* tm, float x) +// 00 01 02 03 +// 04 05 06 07 +// 08 09 10 11 +// 12 13 14 15 + +void rotateMatrixX(float* tm, float x, bool r) { float rm[16], m[16]; memset(rm, 0x00, 16*4); @@ -40,11 +46,27 @@ void rotateMatrixX(float* tm, float x) rm[9]=-sin(x); rm[10]=cos(x); rm[15]=1.0f; - multMatrix44(tm,rm,m); + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); memcpy(tm,m,16*sizeof(float)); } -void rotateMatrixZ(float* tm, float x) +void rotateMatrixY(float* tm, float x, bool r) +{ + float rm[16], m[16]; + memset(rm, 0x00, 16*4); + rm[0]=cos(x); + rm[2]=sin(x); + rm[5]=1.0f; + rm[8]=-sin(x); + rm[10]=cos(x); + rm[15]=1.0f; + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); + memcpy(tm,m,16*sizeof(float)); +} + +void rotateMatrixZ(float* tm, float x, bool r) { float rm[16], m[16]; memset(rm, 0x00, 16*4); @@ -54,7 +76,8 @@ void rotateMatrixZ(float* tm, float x) rm[5]=cos(x); rm[10]=1.0f; rm[15]=1.0f; - multMatrix44(tm,rm,m); + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); memcpy(tm,m,16*sizeof(float)); } @@ -69,27 +92,57 @@ void initProjectionMatrix(float* m, float fovy, float aspect, float near, float { float top = near*tan(fovy/2); float right = (top*aspect); + + float mp[4*4]; - *(m++) = near/right; - *(m++) = 0.0f; - *(m++) = 0.0f; - *(m++) = 0.0f; + mp[0x0] = near/right; + mp[0x1] = 0.0f; + mp[0x2] = 0.0f; + mp[0x3] = 0.0f; - *(m++) = 0.0f; - *(m++) = near/top; - *(m++) = 0.0f; - *(m++) = 0.0f; + mp[0x4] = 0.0f; + mp[0x5] = near/top; + mp[0x6] = 0.0f; + mp[0x7] = 0.0f; - *(m++) = 0.0f; - *(m++) = 0.0f; - // *(m++) = -(far+near)/(far-near); - *(m++) = 0.0f; - // *(m++) = -2.0f*(far*near)/(far-near); - // *(m++) = 1.0f; - *(m++) = -1.0f; + mp[0x8] = 0.0f; + mp[0x9] = 0.0f; + mp[0xA] = -(far+near)/(far-near); + mp[0xB] = -2.0f*(far*near)/(far-near); - *(m++) = 0.0f; - *(m++) = 0.0f; - *(m++) = -1.0f; - *(m++) = 0.0f; + mp[0xC] = 0.0f; + mp[0xD] = 0.0f; + mp[0xE] = -1.0f; + mp[0xF] = 0.0f; + + float mp2[4*4]; + loadIdentity44(mp2); + mp2[0xA]=0.5; + mp2[0xB]=-0.5; + + multMatrix44(mp2, mp, m); +} + +vect3Df_s getMatrixColumn(float* m, u8 i) +{ + if(!m || i>=4)return vect3Df(0,0,0); + return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); +} + +vect3Df_s getMatrixRow(float* m, u8 i) +{ + if(!m || i>=4)return vect3Df(0,0,0); + return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); +} + +vect4Df_s getMatrixColumn4(float* m, u8 i) +{ + if(!m || i>=4)return vect4Df(0,0,0,0); + return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); +} + +vect4Df_s getMatrixRow4(float* m, u8 i) +{ + if(!m || i>=4)return vect4Df(0,0,0,0); + return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); } diff --git a/examples/gpu/source/math.h b/examples/gpu/source/math.h index 5eed360..8137b90 100644 --- a/examples/gpu/source/math.h +++ b/examples/gpu/source/math.h @@ -1,13 +1,144 @@ #ifndef MATH_H +#define MATH_H + +#include <3ds/types.h> +#include + +typedef float mtx44[4][4]; +typedef float mtx33[3][3]; + +typedef struct +{ + s32 x, y, z; +}vect3Di_s; + +static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) +{ + return (vect3Di_s){x,y,z}; +} + +static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) +{ + return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; +} + +static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) +{ + return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; +} + +static inline vect3Di_s vmuli(vect3Di_s v, s32 f) +{ + return (vect3Di_s){v.x*f,v.y*f,v.z*f}; +} + +typedef struct +{ + float x, y, z; +}vect3Df_s; + +static inline vect3Df_s vect3Df(float x, float y, float z) +{ + return (vect3Df_s){x,y,z}; +} + +static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) +{ + return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; +} + +static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) +{ + return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; +} + +static inline vect3Df_s vmulf(vect3Df_s v, float f) +{ + return (vect3Df_s){v.x*f,v.y*f,v.z*f}; +} + +static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) +{ + return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; +} + +static inline float vmagf(vect3Df_s v) +{ + return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); +} + +static inline float vdistf(vect3Df_s v1, vect3Df_s v2) +{ + return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); +} + +static inline vect3Df_s vnormf(vect3Df_s v) +{ + const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); + return (vect3Df_s){v.x/l,v.y/l,v.z/l}; +} + +typedef struct +{ + float x, y, z, w; +}vect4Df_s; + +static inline vect4Df_s vect4Df(float x, float y, float z, float w) +{ + return (vect4Df_s){x,y,z,w}; +} + +static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) +{ + return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; +} + +static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) +{ + return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; +} + +static inline vect4Df_s vmulf4(vect4Df_s v, float f) +{ + return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; +} + +static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) +{ + return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; +} + +static inline vect4Df_s vnormf4(vect4Df_s v) +{ + const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); + return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; +} + +//interstuff +static inline vect3Di_s vf2i(vect3Df_s v) +{ + return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; +} + +static inline vect3Df_s vi2f(vect3Di_s v) +{ + return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; +} void loadIdentity44(float* m); void multMatrix44(float* m1, float* m2, float* m); void translateMatrix(float* tm, float x, float y, float z); -void rotateMatrixX(float* tm, float x); -void rotateMatrixZ(float* tm, float x); +void rotateMatrixX(float* tm, float x, bool r); +void rotateMatrixY(float* tm, float x, bool r); +void rotateMatrixZ(float* tm, float x, bool r); void scaleMatrix(float* tm, float x, float y, float z); void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); +vect3Df_s getMatrixColumn(float* m, u8 i); +vect3Df_s getMatrixRow(float* m, u8 i); +vect4Df_s getMatrixColumn4(float* m, u8 i); +vect4Df_s getMatrixRow4(float* m, u8 i); + #endif