Add new GPU examples

This commit is contained in:
fincs 2015-07-22 19:41:59 +02:00
parent 10b81077e3
commit bf7d686e88
35 changed files with 2177 additions and 1230 deletions

View File

@ -1,11 +0,0 @@
gpu
=======
example of how to use the GPU with libctru
before trying to compile, make sure to download aemstro
( https://github.com/smealum/aemstro reflog: 51bfeef9e1a0149726dca43b50919bd45917015a )
and update AEMSTRO environment variable with the proper path
You'll also need to install Python 3 and have that in your path.

View File

@ -1,57 +0,0 @@
; setup constants
.const c20, 1.0, 0.0, 0.5, 1.0
; setup outmap
.out o0, result.position, 0xF
.out o1, result.color, 0xF
.out o2, result.texcoord0, 0x3
.out o3, result.texcoord1, 0x3
.out o4, result.texcoord2, 0x3
; setup uniform map (not required)
.uniform c0, c3, projection
.uniform c4, c7, modelview
.uniform c8, c8, lightDirection
.uniform c9, c9, lightAmbient
.vsh vmain, end_vmain
;code
vmain:
mov r1, v0 (0x4)
mov r1, c20 (0x3)
; temp = modvMtx * in.pos
dp4 r0, c4, r1 (0x0)
dp4 r0, c5, r1 (0x1)
dp4 r0, c6, r1 (0x2)
mov r0, c20 (0x3)
; result.pos = projMtx * temp
dp4 o0, c0, r0 (0x0)
dp4 o0, c1, r0 (0x1)
dp4 o0, c2, r0 (0x2)
dp4 o0, c3, r0 (0x3)
; result.texcoord = in.texcoord
mov o2, v1 (0x5)
mov o3, c20 (0x7)
mov o4, c20 (0x7)
; result.color = crappy lighting
dp3 r0, c8, v2 (0x4)
max r0, c20, r0 (0x9)
mul r0, c9, r0 (0x4)
add o1, c9, r0 (0x4)
mov o1, c20 (0x3)
nop
end
end_vmain:
;operand descriptors
.opdesc x___, xyzw, xyzw ; 0x0
.opdesc _y__, xyzw, xyzw ; 0x1
.opdesc __z_, xyzw, xyzw ; 0x2
.opdesc ___w, xyzw, xyzw ; 0x3
.opdesc xyz_, xyzw, xyzw ; 0x4
.opdesc xyzw, xyzw, xyzw ; 0x5
.opdesc x_zw, xyzw, xyzw ; 0x6
.opdesc xyzw, yyyw, xyzw ; 0x7
.opdesc xyz_, wwww, wwww ; 0x8
.opdesc xyz_, yyyy, xyzw ; 0x9

Binary file not shown.

View File

@ -1,16 +0,0 @@
.section ".text"
.arm
.align 4
.global _vboMemcpy50
# r0 : dst
# r1 : src
# fixed size 0x50
_vboMemcpy50:
push {r4-r11}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
ldmia r1!, {r2-r12}
stmia r0!, {r2-r12}
pop {r4-r11}
bx lr

View File

@ -1,432 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <3ds.h>
#include "gs.h"
#include "math.h"
#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
static void gsInitMatrixStack();
Handle linearAllocMutex;
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
typedef struct
{
u32 offset;
mtx44 data;
}bufferMatrix_s;
bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
int bufferMatrixListLength;
//----------------------
// GS SYSTEM STUFF
//----------------------
void initBufferMatrixList()
{
bufferMatrixListLength=0;
}
void gsInit(shaderProgram_s* shader)
{
gsInitMatrixStack();
initBufferMatrixList();
svcCreateMutex(&linearAllocMutex, false);
if(shader)
{
gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
shaderProgramUse(shader);
}
}
void gsExit(void)
{
svcCloseHandle(linearAllocMutex);
}
void gsStartFrame(void)
{
GPUCMD_SetBufferOffset(0);
initBufferMatrixList();
}
void* gsLinearAlloc(size_t size)
{
void* ret=NULL;
svcWaitSynchronization(linearAllocMutex, U64_MAX);
ret=linearAlloc(size);
svcReleaseMutex(linearAllocMutex);
return ret;
}
void gsLinearFree(void* mem)
{
svcWaitSynchronization(linearAllocMutex, U64_MAX);
linearFree(mem);
svcReleaseMutex(linearAllocMutex);
}
//----------------------
// MATRIX STACK STUFF
//----------------------
static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
static GS_MATRIX gsCurrentMatrixType;
static void gsInitMatrixStack()
{
int i;
for(i=0; i<GS_MATRIXTYPES; i++)
{
gsMatrixStackOffsets[i]=0;
gsMatrixStackUpdated[i]=true;
loadIdentity44((float*)gsMatrixStacks[i][0]);
}
gsCurrentMatrixType=GS_PROJECTION;
}
float* gsGetMatrix(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return NULL;
return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
}
int gsLoadMatrix(GS_MATRIX m, float* data)
{
if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
memcpy(gsGetMatrix(m), data, sizeof(mtx44));
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsPushMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
float* cur=gsGetMatrix(m);
gsMatrixStackOffsets[m]++;
memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
return 0;
}
int gsPopMatrix()
{
const GS_MATRIX m=gsCurrentMatrixType;
if(m<0 || m>=GS_MATRIXTYPES)return -1;
if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
gsMatrixStackOffsets[m]--;
gsMatrixStackUpdated[m]=true;
return 0;
}
int gsMatrixMode(GS_MATRIX m)
{
if(m<0 || m>=GS_MATRIXTYPES)return -1;
gsCurrentMatrixType=m;
return 0;
}
//------------------------
// MATRIX TRANSFORM STUFF
//------------------------
int gsMultMatrix(float* data)
{
if(!data)return -1;
mtx44 tmp;
multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
return 0;
}
void gsLoadIdentity()
{
loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsProjectionMatrix(float fovy, float aspect, float near, float far)
{
initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateX(float x)
{
rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateY(float y)
{
rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsRotateZ(float z)
{
rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsScale(float x, float y, float z)
{
scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
void gsTranslate(float x, float y, float z)
{
translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
gsMatrixStackUpdated[gsCurrentMatrixType]=true;
}
//----------------------
// MATRIX RENDER STUFF
//----------------------
static void gsSetUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
}
static int gsUpdateTransformation()
{
GS_MATRIX m;
for(m=0; m<GS_MATRIXTYPES; m++)
{
if(gsMatrixStackUpdated[m])
{
if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
{
GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
bufferMatrixListLength++;
}
gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
gsMatrixStackUpdated[m]=false;
}
}
return 0;
}
void gsAdjustBufferMatrices(mtx44 transformation)
{
int i;
u32* buffer;
u32 offset;
GPUCMD_GetBuffer(&buffer, NULL, &offset);
for(i=0; i<bufferMatrixListLength; i++)
{
u32 o=bufferMatrixList[i].offset;
if(o+2<offset) //TODO : better check, need to account for param size
{
mtx44 newMatrix;
GPUCMD_SetBufferOffset(o);
multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
}
}
GPUCMD_SetBufferOffset(offset);
}
//----------------------
// VBO STUFF
//----------------------
int gsVboInit(gsVbo_s* vbo)
{
if(!vbo)return -1;
vbo->data=NULL;
vbo->currentSize=0;
vbo->maxSize=0;
vbo->commands=NULL;
vbo->commandsSize=0;
return 0;
}
int gsVboCreate(gsVbo_s* vbo, u32 size)
{
if(!vbo)return -1;
vbo->data=gsLinearAlloc(size);
vbo->numVertices=0;
vbo->currentSize=0;
vbo->maxSize=size;
return 0;
}
void* gsVboGetOffset(gsVbo_s* vbo)
{
if(!vbo)return NULL;
return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
}
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
{
if(!vbo || !data || !size)return -1;
if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
memcpy(gsVboGetOffset(vbo), data, size);
vbo->currentSize+=size;
vbo->numVertices+=units;
return 0;
}
int gsVboFlushData(gsVbo_s* vbo)
{
if(!vbo)return -1;
//unnecessary if we use flushAndRun
// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
return 0;
}
int gsVboDestroy(gsVbo_s* vbo)
{
if(!vbo)return -1;
if(vbo->commands)free(vbo->commands);
if(vbo->data)gsLinearFree(vbo->data);
gsVboInit(vbo);
return 0;
}
extern u32 debugValue[];
void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
{
//set attribute buffer address
GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
//set primitive type
GPUCMD_AddSingleParam(0x0002025E, primitive);
GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
//index buffer not used for drawArrays but 0x000F0227 still required
GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
//pass number of vertices
GPUCMD_AddSingleParam(0x000F0228, n);
GPUCMD_AddSingleParam(0x00010253, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000000);
GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
GPUCMD_AddSingleParam(0x00010245, 0x00000001);
GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
}
//not thread safe
int gsVboPrecomputeCommands(gsVbo_s* vbo)
{
if(!vbo || vbo->commands)return -1;
static u32 tmpBuffer[128];
u32* savedAdr; u32 savedSize, savedOffset;
GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
GPUCMD_SetBuffer(tmpBuffer, 128, 0);
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
vbo->commands=memalign(0x4, vbo->commandsSize*4);
if(!vbo->commands)return -1;
memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
return 0;
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
void _vboMemcpy50(u32* dst, u32* src);
void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
{
if(!cmd || !size)return;
if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
gpuCmdBufOffset+=size;
}
int gsVboDraw(gsVbo_s* vbo)
{
if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
gsUpdateTransformation();
gsVboPrecomputeCommands(vbo);
// u64 val=svcGetSystemTick();
if(vbo->commands)
{
_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
}else{
GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
}
// debugValue[5]+=(u32)(svcGetSystemTick()-val);
// debugValue[6]++;
return 0;
}

View File

@ -1,59 +0,0 @@
#ifndef GS_H
#define GS_H
#include <3ds.h>
#include "math.h"
#define GS_MATRIXSTACK_SIZE (8)
typedef enum
{
GS_PROJECTION = 0,
GS_MODELVIEW = 1,
GS_MATRIXTYPES
}GS_MATRIX;
typedef struct
{
u8* data;
u32 currentSize; // in bytes
u32 maxSize; // in bytes
u32 numVertices;
u32* commands;
u32 commandsSize;
}gsVbo_s;
void gsInit(shaderProgram_s* shader);
void gsExit(void);
void gsStartFrame(void);
void gsAdjustBufferMatrices(mtx44 transformation);
void* gsLinearAlloc(size_t size);
void gsLinearFree(void* mem);
float* gsGetMatrix(GS_MATRIX m);
int gsLoadMatrix(GS_MATRIX m, float* data);
int gsPushMatrix();
int gsPopMatrix();
int gsMatrixMode(GS_MATRIX m);
void gsLoadIdentity();
void gsProjectionMatrix(float fovy, float aspect, float near, float far);
void gsRotateX(float x);
void gsRotateY(float y);
void gsRotateZ(float z);
void gsScale(float x, float y, float z);
void gsTranslate(float x, float y, float z);
int gsMultMatrix(float* data);
int gsVboInit(gsVbo_s* vbo);
int gsVboCreate(gsVbo_s* vbo, u32 size);
int gsVboFlushData(gsVbo_s* vbo);
int gsVboDestroy(gsVbo_s* vbo);
int gsVboDraw(gsVbo_s* vbo);
void* gsVboGetOffset(gsVbo_s* vbo);
int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
#endif

View File

@ -1,354 +0,0 @@
///////////////////////////////////////
// GPU example //
///////////////////////////////////////
//this example is meant to show how to use the GPU to render a 3D object
//it also shows how to do stereoscopic 3D
//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <3ds.h>
#include "math.h"
#include "gs.h"
#include "test_vsh_shbin.h"
#include "texture_bin.h"
//will be moved into ctrulib at some point
#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
//transfer from GPU output buffer to actual framebuffer flags
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
//shader structure
DVLB_s* dvlb;
shaderProgram_s shader;
//texture data pointer
u32* texData;
//vbo structure
gsVbo_s vbo;
//GPU framebuffer address
u32* gpuOut=(u32*)0x1F119400;
//GPU depth buffer address
u32* gpuDOut=(u32*)0x1F370800;
//angle for the vertex lighting (cf test.vsh)
float lightAngle;
//object position and rotation angle
vect3Df_s position, angle;
//vertex structure
typedef struct
{
vect3Df_s position;
float texcoord[2];
vect3Df_s normal;
}vertex_s;
//object data (cube)
//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
//we're drawing triangles so three lines = one triangle
const vertex_s modelVboData[]=
{
//first face (PZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
//second face (MZ)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
//third face (PX)
//first triangle
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
//fourth face (MX)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//second triangle
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
//fifth face (PY)
//first triangle
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
//sixth face (MY)
//first triangle
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
//second triangle
{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
};
//stolen from staplebutt
void GPU_SetDummyTexEnv(u8 num)
{
GPU_SetTexEnv(num,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}
// topscreen
void renderFrame()
{
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthMap(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0),
GPU_TEVOPERANDS(0,0,0),
GPU_MODULATE, GPU_MODULATE,
0xFFFFFFFF);
GPU_SetDummyTexEnv(1);
GPU_SetDummyTexEnv(2);
GPU_SetDummyTexEnv(3);
GPU_SetDummyTexEnv(4);
GPU_SetDummyTexEnv(5);
//texturing stuff
GPU_SetTexture(
GPU_TEXUNIT0, //texture unit
(u32*)osConvertVirtToPhys((u32)texData), //data buffer
128, //texture width
128, //texture height
GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
GPU_RGBA8 //texture pixel format
);
GPU_SetAttributeBuffers(
3, //3 attributes: vertices, texcoords, and normals
(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
0xFFC,
0x210,
1,
(u32[]){0x00000000},
(u64[]){0x210},
(u8[]){3}
);
//setup lighting (this is specific to our shader)
vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1);
//initialize projection matrix to standard perspective stuff
gsMatrixMode(GS_PROJECTION);
gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
gsRotateZ(M_PI/2); //because framebuffer is sideways...
//draw object
gsMatrixMode(GS_MODELVIEW);
gsPushMatrix();
gsTranslate(position.x, position.y, position.z);
gsRotateX(angle.x);
gsRotateY(angle.y);
gsVboDraw(&vbo);
gsPopMatrix();
GPU_FinishDrawing();
}
int main(int argc, char** argv)
{
gfxInitDefault();
//initialize GPU
GPU_Init(NULL);
//let GFX know we're ok with doing stereoscopic 3D rendering
gfxSet3D(true);
//allocate our GPU command buffers
//they *have* to be on the linear heap
u32 gpuCmdSize=0x40000;
u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
//actually reset the GPU
GPU_Reset(NULL, gpuCmd, gpuCmdSize);
//load our vertex shader binary
dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
shaderProgramInit(&shader);
shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
//initialize GS
gsInit(&shader);
// Flush the command buffer so that the shader upload gets executed
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//create texture
texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
memcpy(texData, texture_bin, texture_bin_size);
//create VBO
gsVboInit(&vbo);
gsVboCreate(&vbo, sizeof(modelVboData));
gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
gsVboFlushData(&vbo);
//initialize object position and angle
position=vect3Df(0.0f, 0.0f, -2.0f);
angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
//background color (blue)
u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
while(aptMainLoop())
{
//get current 3D slider state
float slider=CONFIG_3D_SLIDERSTATE;
//controls
hidScanInput();
//START to exit to hbmenu
if(keysDown()&KEY_START)break;
//A/B to change vertex lighting angle
if(keysHeld()&KEY_A)lightAngle+=0.1f;
if(keysHeld()&KEY_B)lightAngle-=0.1f;
//D-PAD to rotate object
if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
if(keysHeld()&KEY_UP)angle.x-=0.05f;
if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
//R/L to bring object closer to or move it further from the camera
if(keysHeld()&KEY_R)position.z+=0.1f;
if(keysHeld()&KEY_L)position.z-=0.1f;
//generate our GPU command buffer for this frame
gsStartFrame();
renderFrame();
GPUCMD_Finalize();
if(slider>0.0f)
{
//new and exciting 3D !
//make a copy of left gpu buffer
u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
memcpy(gpuCmdRight, gpuCmd, offset*4);
//setup interaxial
float interaxial=slider*0.12f;
//adjust left gpu buffer fo 3D !
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//draw left framebuffer
GPUCMD_FlushAndRun(NULL);
//while GPU starts drawing the left buffer, adjust right one for 3D !
GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
//we wait for the left buffer to finish drawing
gspWaitForP3D();
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
//we draw the right buffer, wait for it to finish and then switch back to left one
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
//draw the right framebuffer
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//transfer from GPU output buffer to actual framebuffer
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
}else{
//boring old 2D !
//draw the frame
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D();
//clear the screen
GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF();
}
//clear the screen
GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0();
gfxSwapBuffersGpu();
gspWaitForEvent(GSPEVENT_VBlank0, true);
}
gsExit();
shaderProgramFree(&shader);
DVLB_Free(dvlb);
gfxExit();
return 0;
}

View File

@ -1,148 +0,0 @@
#include <math.h>
#include <string.h>
#include "math.h"
void loadIdentity44(float* m)
{
if(!m)return;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
// 00 01 02 03
// 04 05 06 07
// 08 09 10 11
// 12 13 14 15
void rotateMatrixX(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixY(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[2]=sin(x);
rm[5]=1.0f;
rm[8]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x, bool r)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
if(!r)multMatrix44(tm,rm,m);
else multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
float mp[4*4];
mp[0x0] = near/right;
mp[0x1] = 0.0f;
mp[0x2] = 0.0f;
mp[0x3] = 0.0f;
mp[0x4] = 0.0f;
mp[0x5] = near/top;
mp[0x6] = 0.0f;
mp[0x7] = 0.0f;
mp[0x8] = 0.0f;
mp[0x9] = 0.0f;
mp[0xA] = -(far+near)/(far-near);
mp[0xB] = -2.0f*(far*near)/(far-near);
mp[0xC] = 0.0f;
mp[0xD] = 0.0f;
mp[0xE] = -1.0f;
mp[0xF] = 0.0f;
float mp2[4*4];
loadIdentity44(mp2);
mp2[0xA]=0.5;
mp2[0xB]=-0.5;
multMatrix44(mp2, mp, m);
}
vect3Df_s getMatrixColumn(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
}
vect3Df_s getMatrixRow(float* m, u8 i)
{
if(!m || i>=4)return vect3Df(0,0,0);
return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
}
vect4Df_s getMatrixColumn4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
}
vect4Df_s getMatrixRow4(float* m, u8 i)
{
if(!m || i>=4)return vect4Df(0,0,0,0);
return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
}

View File

@ -1,144 +0,0 @@
#ifndef MATH_H
#define MATH_H
#include <3ds/types.h>
#include <math.h>
typedef float mtx44[4][4];
typedef float mtx33[3][3];
typedef struct
{
s32 x, y, z;
}vect3Di_s;
static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
{
return (vect3Di_s){x,y,z};
}
static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
{
return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
{
return (vect3Di_s){v.x*f,v.y*f,v.z*f};
}
typedef struct
{
float x, y, z;
}vect3Df_s;
static inline vect3Df_s vect3Df(float x, float y, float z)
{
return (vect3Df_s){x,y,z};
}
static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
}
static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
{
return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
}
static inline vect3Df_s vmulf(vect3Df_s v, float f)
{
return (vect3Df_s){v.x*f,v.y*f,v.z*f};
}
static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
{
return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
}
static inline float vmagf(vect3Df_s v)
{
return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
}
static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
{
return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
}
static inline vect3Df_s vnormf(vect3Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
return (vect3Df_s){v.x/l,v.y/l,v.z/l};
}
typedef struct
{
float x, y, z, w;
}vect4Df_s;
static inline vect4Df_s vect4Df(float x, float y, float z, float w)
{
return (vect4Df_s){x,y,z,w};
}
static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
}
static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
{
return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
}
static inline vect4Df_s vmulf4(vect4Df_s v, float f)
{
return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
}
static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
{
return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
}
static inline vect4Df_s vnormf4(vect4Df_s v)
{
const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
}
//interstuff
static inline vect3Di_s vf2i(vect3Df_s v)
{
return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
}
static inline vect3Df_s vi2f(vect3Di_s v)
{
return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
}
void loadIdentity44(float* m);
void multMatrix44(float* m1, float* m2, float* m);
void translateMatrix(float* tm, float x, float y, float z);
void rotateMatrixX(float* tm, float x, bool r);
void rotateMatrixY(float* tm, float x, bool r);
void rotateMatrixZ(float* tm, float x, bool r);
void scaleMatrix(float* tm, float x, float y, float z);
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
vect3Df_s getMatrixColumn(float* m, u8 i);
vect3Df_s getMatrixRow(float* m, u8 i);
vect4Df_s getMatrixColumn4(float* m, u8 i);
vect4Df_s getMatrixRow4(float* m, u8 i);
#endif

View File

@ -75,6 +75,7 @@ export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
@ -91,7 +92,7 @@ else
endif
#---------------------------------------------------------------------------------
export OFILES := $(addsuffix .o,$(BINFILES)) \
export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
@ -156,17 +157,18 @@ $(OUTPUT).elf : $(OFILES)
@echo $(notdir $<)
@$(bin2o)
# WARNING: This is not the right way to do this! TODO: Do it right!
#---------------------------------------------------------------------------------
%_vsh.h %.vsh.o : %.vsh
# rule for assembling GPU shaders
#---------------------------------------------------------------------------------
%.shbin.o: %.pica
@echo $(notdir $<)
@python3 $(AEMSTRO)/aemstro_as.py $< ../$(notdir $<).shbin
@bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@
@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h
@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h
@echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h
@rm ../$(notdir $<).shbin
$(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
$(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
@picasso $(CURBIN) $< $(CURH)
@bin2s $(CURBIN) | $(AS) -o $@
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
-include $(DEPENDS)

View File

@ -0,0 +1,6 @@
# GPU example
This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
https://github.com/fincs/picasso/releases

View File

@ -0,0 +1,172 @@
#include "3dmath.h"
void m4x4_identity(matrix_4x4* out)
{
m4x4_zeros(out);
out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
}
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
{
int i, j;
for (i = 0; i < 4; i ++)
for (j = 0; j < 4; j ++)
out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
}
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
{
matrix_4x4 tm, om;
m4x4_identity(&tm);
tm.r[0].w = x;
tm.r[1].w = y;
tm.r[2].w = z;
m4x4_multiply(&om, mtx, &tm);
m4x4_copy(mtx, &om);
}
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
{
int i;
for (i = 0; i < 4; i ++)
{
mtx->r[i].x *= x;
mtx->r[i].y *= y;
mtx->r[i].z *= z;
}
}
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = 1.0f;
rm.r[1].y = cosAngle;
rm.r[1].z = sinAngle;
rm.r[2].y = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].z = sinAngle;
rm.r[1].y = 1.0f;
rm.r[2].x = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].y = sinAngle;
rm.r[1].x = -sinAngle;
rm.r[1].y = cosAngle;
rm.r[2].z = 1.0f;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
{
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard orthogonal projection matrix
mp.r[0].x = 2.0f / (right - left);
mp.r[0].w = (left + right) / (left - right);
mp.r[1].y = 2.0f / (top - bottom);
mp.r[1].w = (bottom + top) / (bottom - top);
mp.r[2].z = 2.0f / (near - far);
mp.r[2].w = (far + near) / (far - near);
mp.r[3].w = 1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2, mp3;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(&mp3, &mp2, &mp);
// Fix the 3DS screens' orientation by swapping the X and Y axis
m4x4_identity(&mp2);
mp2.r[0].x = 0.0;
mp2.r[0].y = 1.0;
mp2.r[1].x = -1.0; // flipped
mp2.r[1].y = 0.0;
m4x4_multiply(mtx, &mp2, &mp3);
}
void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
{
// Notes:
// We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
// and so are these parameters -- in fact, they are actually the fovx and the inverse
// of the aspect ratio. Therefore the formula for the perspective projection matrix
// had to be modified to be expressed in these terms instead.
// Notes:
// fovx = 2 atan(tan(fovy/2)*w/h)
// fovy = 2 atan(tan(fovx/2)*h/w)
// invaspect = h/w
// a0,0 = h / (w*tan(fovy/2)) =
// = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
// = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
// = h / (w * tan(fovx/2)*h/w) =
// = 1 / tan(fovx/2)
// a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
float fovx_tan = tanf(fovx / 2);
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard perspective projection matrix
mp.r[0].x = 1.0f / fovx_tan;
mp.r[1].y = 1.0f / (fovx_tan*invaspect);
mp.r[2].z = (near + far) / (near - far);
mp.r[2].w = (2 * near * far) / (near - far);
mp.r[3].z = -1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(mtx, &mp2, &mp);
// Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
m4x4_rotate_z(mtx, M_PI / 2, true);
}

View File

@ -0,0 +1,56 @@
/*
* Bare-bones simplistic 3D math library
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <stdbool.h>
#include <math.h>
typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
typedef struct { vector_4f r[4]; } matrix_4x4;
static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
{
return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
}
static inline float v4f_mod4(const vector_4f* a)
{
return sqrtf(v4f_dp4(a,a));
}
static inline void v4f_norm4(vector_4f* vec)
{
float m = v4f_mod4(vec);
if (m == 0.0) return;
vec->x /= m;
vec->y /= m;
vec->z /= m;
vec->w /= m;
}
static inline void m4x4_zeros(matrix_4x4* out)
{
memset(out, 0, sizeof(*out));
}
static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
{
memcpy(out, in, sizeof(*out));
}
void m4x4_identity(matrix_4x4* out);
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
// Special versions of the projection matrices that take the 3DS' screen orientation into account
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);

View File

@ -0,0 +1,93 @@
#include "gpu.h"
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
static u32 *colorBuf, *depthBuf;
static u32 *cmdBuf;
void gpuInit(void)
{
colorBuf = vramAlloc(400*240*4);
depthBuf = vramAlloc(400*240*4);
cmdBuf = linearAlloc(0x40000*4);
GPU_Init(NULL);
GPU_Reset(NULL, cmdBuf, 0x40000);
}
void gpuExit(void)
{
linearFree(cmdBuf);
vramFree(depthBuf);
vramFree(colorBuf);
}
void gpuClearBuffers(u32 clearColor)
{
GX_SetMemoryFill(NULL,
colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0(); // Wait for the fill to complete
}
void gpuFrameBegin(void)
{
// Configure the viewport and the depth linear conversion function
GPU_SetViewport(
(u32*)osConvertVirtToPhys((u32)depthBuf),
(u32*)osConvertVirtToPhys((u32)colorBuf),
0, 0, 240, 400); // The top screen is physically 240x400 pixels
GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
// Configure some boilerplate
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
// This is unknown
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
// Configure alpha blending and test
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
int i;
for (i = 0; i < 6; i ++)
GPU_SetDummyTexEnv(i);
}
void gpuFrameEnd(void)
{
// Finish rendering
GPU_FinishDrawing();
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D(); // Wait for the rendering to complete
// Transfer the GPU output to the framebuffer
GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
(u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF(); // Wait for the transfer to complete
// Reset the command buffer
GPUCMD_SetBufferOffset(0);
};
void GPU_SetDummyTexEnv(int id)
{
GPU_SetTexEnv(id,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}

View File

@ -0,0 +1,26 @@
/*
* Bare-bones simplistic GPU wrapper
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <3ds.h>
#include "3dmath.h"
void gpuInit(void);
void gpuExit(void);
void gpuClearBuffers(u32 clearColor);
void gpuFrameBegin(void);
void gpuFrameEnd(void);
// Configures the specified fixed-function fragment shading substage to be a no-operation
void GPU_SetDummyTexEnv(int id);
// Uploads an uniform matrix
static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
{
GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
}

View File

@ -0,0 +1,91 @@
; Example PICA200 geometry shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.5)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
.alias half myconst.wwww
; Outputs - this time the type *is* used
.out outpos position
.out outclr color
; Inputs: we will receive the following inputs:
; v0-v1: position/color of the first vertex
; v2-v3: position/color of the second vertex
; v4-v5: position/color of the third vertex
.proc main
; Calculate the midpoints of the vertices
mov r4, v0
add r4, v2, r4
mul r4, half, r4
mov r5, v2
add r5, v4, r5
mul r5, half, r5
mov r6, v4
add r6, v0, r6
mul r6, half, r6
; Emit the first triangle
mov r0, v0
mov r1, r4
mov r2, r6
call emit_triangle
; Emit the second triangle
mov r0, r4
mov r1, v2
mov r2, r5
call emit_triangle
; Emit the third triangle
mov r0, r6
mov r1, r5
mov r2, v4
call emit_triangle
; We're finished
end
.end
.proc emit_triangle
; Emit the first vertex
setemit 0
mov r8, r0
mov r9, v1
call process_vertex
emit
; Emit the second vertex
setemit 1
mov r8, r1
mov r9, v3
call process_vertex
emit
; Emit the third vertex and finish the primitive
setemit 2, prim
mov r8, r2
mov r9, v5
call process_vertex
emit
.end
; Subroutine
; Inputs:
; r8: vertex position
; r9: vertex color
.proc process_vertex
; outpos = projectionMatrix * r8
dp4 outpos.x, projection[0], r8
dp4 outpos.y, projection[1], r8
dp4 outpos.z, projection[2], r8
dp4 outpos.w, projection[3], r8
; outclr = r9
mov outclr, r9
.end

View File

@ -0,0 +1,139 @@
/*
* ~~ Simple libctru GPU geometry shader example ~~
* This example demonstrates the basics of using the PICA200 in a 3DS homebrew
* application in order to render a basic scene using a geoshader.
* The example geoshader receives the vertices of a triangle and emits three
* smaller triangles, thus forming a 'triforce' shape.
*/
#include "gpu.h"
#include "vshader_shbin.h"
#include "gshader_shbin.h"
#define CLEAR_COLOR 0x68B0D8FF
typedef struct { float position[3]; float color[4]; } vertex;
static const vertex vertex_list[] =
{
{ {200.0f, 200.0f, 0.5f}, {1.0f, 0.0f, 0.0f, 1.0f} },
{ {100.0f, 40.0f, 0.5f}, {0.0f, 1.0f, 0.0f, 1.0f} },
{ {300.0f, 40.0f, 0.5f}, {0.0f, 0.0f, 1.0f, 1.0f} },
};
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s *vshader_dvlb, *gshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static matrix_4x4 projection;
static void* vbo_data;
static void sceneInit(void)
{
// Load the shaders and create a shader program
// The geoshader stride is set to 6 so that it processes a triangle at a time
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
gshader_dvlb = DVLB_ParseFile((u32*)gshader_shbin, gshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
shaderProgramSetGsh(&program, &gshader_dvlb->DVLE[0], 6);
// Get the location of the projection matrix uniform
uLoc_projection = shaderInstanceGetUniformLocation(program.geometryShader, "projection");
// Compute the projection matrix
m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
}
static void sceneRender(void)
{
// Bind the shader program
shaderProgramUse(&program);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
GPU_TEVOPERANDS(0, 0, 0), // RGB
GPU_TEVOPERANDS(0, 0, 0), // Alpha
GPU_REPLACE, GPU_REPLACE, // RGB, Alpha
0xFFFFFFFF);
// Configure the "attribute buffers" (that is, the vertex input buffers)
GPU_SetAttributeBuffers(
2, // Number of inputs per vertex
(u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) |
GPU_ATTRIBFMT(1, 4, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector)
0xFFC, // Unused attribute mask, in our case bit 0 is cleared since it is used
0x10, // Attribute permutations (here it is the identity)
1, // Number of buffers
(u32[]) { 0x0 }, // Buffer offsets (placeholders)
(u64[]) { 0x10 }, // Attribute permutations for each buffer (identity again)
(u8[]) { 2 }); // Number of attributes for each buffer
// Upload the projection matrix
GPU_SetFloatUniformMatrix(GPU_GEOMETRY_SHADER, uLoc_projection, &projection);
// Draw the VBO - GPU_UNKPRIM allows the geoshader to control primitive emission
GPU_DrawArray(GPU_UNKPRIM, vertex_list_count);
}
static void sceneExit(void)
{
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
DVLB_Free(gshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
gpuInit();
// Initialize the scene
sceneInit();
gpuClearBuffers(CLEAR_COLOR);
// Main loop
while (aptMainLoop())
{
gspWaitForVBlank(); // Synchronize with the start of VBlank
gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
hidScanInput(); // Read the user input
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
gpuFrameBegin();
sceneRender();
gpuFrameEnd();
gpuClearBuffers(CLEAR_COLOR);
// Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
//gfxFlushBuffers();
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
gpuExit();
gfxExit();
return 0;
}

View File

@ -0,0 +1,24 @@
; Example PICA200 vertex shader
; Constants
.constf myconst(0.0, 1.0, -1.0, -0.5)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs - since we are also using a geoshader the output type isn't really used
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Pass through both inputs to the geoshader
mov outpos.xyz, inpos
mov outpos.w, ones
mov outclr, inclr
; We're finished
end
.end

View File

@ -0,0 +1,177 @@
#---------------------------------------------------------------------------------
.SUFFIXES:
#---------------------------------------------------------------------------------
ifeq ($(strip $(DEVKITARM)),)
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
endif
TOPDIR ?= $(CURDIR)
include $(DEVKITARM)/3ds_rules
#---------------------------------------------------------------------------------
# TARGET is the name of the output
# BUILD is the directory where object files & intermediate files will be placed
# SOURCES is a list of directories containing source code
# DATA is a list of directories containing data files
# INCLUDES is a list of directories containing header files
#
# NO_SMDH: if set to anything, no SMDH file is generated.
# APP_TITLE is the name of the app stored in the SMDH file (Optional)
# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
# ICON is the filename of the icon (.png), relative to the project folder.
# If not set, it attempts to use one of the following (in this order):
# - <Project name>.png
# - icon.png
# - <libctru folder>/default_icon.png
#---------------------------------------------------------------------------------
TARGET := $(notdir $(CURDIR))
BUILD := build
SOURCES := source
DATA := data
INCLUDES := include
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard
CFLAGS := -g -Wall -O2 -mword-relocations \
-fomit-frame-pointer -ffast-math \
$(ARCH)
CFLAGS += $(INCLUDE) -DARM11 -D_3DS
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lctru -lm
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing
# include and lib
#---------------------------------------------------------------------------------
LIBDIRS := $(CTRULIB)
#---------------------------------------------------------------------------------
# no real need to edit anything past this point unless you need to add additional
# rules for different file extensions
#---------------------------------------------------------------------------------
ifneq ($(BUILD),$(notdir $(CURDIR)))
#---------------------------------------------------------------------------------
export OUTPUT := $(CURDIR)/$(TARGET)
export TOPDIR := $(CURDIR)
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
ifeq ($(strip $(CPPFILES)),)
#---------------------------------------------------------------------------------
export LD := $(CC)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export LD := $(CXX)
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-I$(CURDIR)/$(BUILD)
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
ifeq ($(strip $(ICON)),)
icons := $(wildcard *.png)
ifneq (,$(findstring $(TARGET).png,$(icons)))
export APP_ICON := $(TOPDIR)/$(TARGET).png
else
ifneq (,$(findstring icon.png,$(icons)))
export APP_ICON := $(TOPDIR)/icon.png
endif
endif
else
export APP_ICON := $(TOPDIR)/$(ICON)
endif
ifeq ($(strip $(NO_SMDH)),)
export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
endif
.PHONY: $(BUILD) clean all
#---------------------------------------------------------------------------------
all: $(BUILD)
$(BUILD):
@[ -d $@ ] || mkdir -p $@
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
#---------------------------------------------------------------------------------
clean:
@echo clean ...
@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf
#---------------------------------------------------------------------------------
else
DEPENDS := $(OFILES:.o=.d)
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
ifeq ($(strip $(NO_SMDH)),)
$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh
else
$(OUTPUT).3dsx : $(OUTPUT).elf
endif
$(OUTPUT).elf : $(OFILES)
#---------------------------------------------------------------------------------
# you need a rule like this for each extension you use as binary data
#---------------------------------------------------------------------------------
%.bin.o : %.bin
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
# rule for assembling GPU shaders
#---------------------------------------------------------------------------------
%.shbin.o: %.pica
@echo $(notdir $<)
$(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
$(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
@picasso $(CURBIN) $< $(CURH)
@bin2s $(CURBIN) | $(AS) -o $@
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
-include $(DEPENDS)
#---------------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------------

View File

@ -0,0 +1,6 @@
# GPU example
This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
https://github.com/fincs/picasso/releases

View File

@ -0,0 +1,172 @@
#include "3dmath.h"
void m4x4_identity(matrix_4x4* out)
{
m4x4_zeros(out);
out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
}
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
{
int i, j;
for (i = 0; i < 4; i ++)
for (j = 0; j < 4; j ++)
out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
}
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
{
matrix_4x4 tm, om;
m4x4_identity(&tm);
tm.r[0].w = x;
tm.r[1].w = y;
tm.r[2].w = z;
m4x4_multiply(&om, mtx, &tm);
m4x4_copy(mtx, &om);
}
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
{
int i;
for (i = 0; i < 4; i ++)
{
mtx->r[i].x *= x;
mtx->r[i].y *= y;
mtx->r[i].z *= z;
}
}
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = 1.0f;
rm.r[1].y = cosAngle;
rm.r[1].z = sinAngle;
rm.r[2].y = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].z = sinAngle;
rm.r[1].y = 1.0f;
rm.r[2].x = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].y = sinAngle;
rm.r[1].x = -sinAngle;
rm.r[1].y = cosAngle;
rm.r[2].z = 1.0f;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
{
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard orthogonal projection matrix
mp.r[0].x = 2.0f / (right - left);
mp.r[0].w = (left + right) / (left - right);
mp.r[1].y = 2.0f / (top - bottom);
mp.r[1].w = (bottom + top) / (bottom - top);
mp.r[2].z = 2.0f / (near - far);
mp.r[2].w = (far + near) / (far - near);
mp.r[3].w = 1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2, mp3;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(&mp3, &mp2, &mp);
// Fix the 3DS screens' orientation by swapping the X and Y axis
m4x4_identity(&mp2);
mp2.r[0].x = 0.0;
mp2.r[0].y = 1.0;
mp2.r[1].x = -1.0; // flipped
mp2.r[1].y = 0.0;
m4x4_multiply(mtx, &mp2, &mp3);
}
void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
{
// Notes:
// We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
// and so are these parameters -- in fact, they are actually the fovx and the inverse
// of the aspect ratio. Therefore the formula for the perspective projection matrix
// had to be modified to be expressed in these terms instead.
// Notes:
// fovx = 2 atan(tan(fovy/2)*w/h)
// fovy = 2 atan(tan(fovx/2)*h/w)
// invaspect = h/w
// a0,0 = h / (w*tan(fovy/2)) =
// = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
// = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
// = h / (w * tan(fovx/2)*h/w) =
// = 1 / tan(fovx/2)
// a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
float fovx_tan = tanf(fovx / 2);
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard perspective projection matrix
mp.r[0].x = 1.0f / fovx_tan;
mp.r[1].y = 1.0f / (fovx_tan*invaspect);
mp.r[2].z = (near + far) / (near - far);
mp.r[2].w = (2 * near * far) / (near - far);
mp.r[3].z = -1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(mtx, &mp2, &mp);
// Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
m4x4_rotate_z(mtx, M_PI / 2, true);
}

View File

@ -0,0 +1,56 @@
/*
* Bare-bones simplistic 3D math library
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <stdbool.h>
#include <math.h>
typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
typedef struct { vector_4f r[4]; } matrix_4x4;
static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
{
return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
}
static inline float v4f_mod4(const vector_4f* a)
{
return sqrtf(v4f_dp4(a,a));
}
static inline void v4f_norm4(vector_4f* vec)
{
float m = v4f_mod4(vec);
if (m == 0.0) return;
vec->x /= m;
vec->y /= m;
vec->z /= m;
vec->w /= m;
}
static inline void m4x4_zeros(matrix_4x4* out)
{
memset(out, 0, sizeof(*out));
}
static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
{
memcpy(out, in, sizeof(*out));
}
void m4x4_identity(matrix_4x4* out);
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
// Special versions of the projection matrices that take the 3DS' screen orientation into account
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);

View File

@ -0,0 +1,93 @@
#include "gpu.h"
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
static u32 *colorBuf, *depthBuf;
static u32 *cmdBuf;
void gpuInit(void)
{
colorBuf = vramAlloc(400*240*4);
depthBuf = vramAlloc(400*240*4);
cmdBuf = linearAlloc(0x40000*4);
GPU_Init(NULL);
GPU_Reset(NULL, cmdBuf, 0x40000);
}
void gpuExit(void)
{
linearFree(cmdBuf);
vramFree(depthBuf);
vramFree(colorBuf);
}
void gpuClearBuffers(u32 clearColor)
{
GX_SetMemoryFill(NULL,
colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0(); // Wait for the fill to complete
}
void gpuFrameBegin(void)
{
// Configure the viewport and the depth linear conversion function
GPU_SetViewport(
(u32*)osConvertVirtToPhys((u32)depthBuf),
(u32*)osConvertVirtToPhys((u32)colorBuf),
0, 0, 240, 400); // The top screen is physically 240x400 pixels
GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
// Configure some boilerplate
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
// This is unknown
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
// Configure alpha blending and test
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
int i;
for (i = 0; i < 6; i ++)
GPU_SetDummyTexEnv(i);
}
void gpuFrameEnd(void)
{
// Finish rendering
GPU_FinishDrawing();
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D(); // Wait for the rendering to complete
// Transfer the GPU output to the framebuffer
GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
(u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF(); // Wait for the transfer to complete
// Reset the command buffer
GPUCMD_SetBufferOffset(0);
};
void GPU_SetDummyTexEnv(int id)
{
GPU_SetTexEnv(id,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}

View File

@ -0,0 +1,26 @@
/*
* Bare-bones simplistic GPU wrapper
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <3ds.h>
#include "3dmath.h"
void gpuInit(void);
void gpuExit(void);
void gpuClearBuffers(u32 clearColor);
void gpuFrameBegin(void);
void gpuFrameEnd(void);
// Configures the specified fixed-function fragment shading substage to be a no-operation
void GPU_SetDummyTexEnv(int id);
// Uploads an uniform matrix
static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
{
GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
}

View File

@ -0,0 +1,131 @@
/*
* ~~ Simple libctru GPU triangle example ~~
* This example demonstrates the basics of using the PICA200 in a 3DS homebrew
* application in order to render a basic scene consisting of a white solid triangle.
*/
#include "gpu.h"
#include "vshader_shbin.h"
#define CLEAR_COLOR 0x68B0D8FF
typedef struct { float x, y, z; } vertex;
static const vertex vertex_list[] =
{
{ 200.0f, 200.0f, 0.5f },
{ 100.0f, 40.0f, 0.5f },
{ 300.0f, 40.0f, 0.5f },
};
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static matrix_4x4 projection;
static void* vbo_data;
static void sceneInit(void)
{
// Load the vertex shader and create a shader program
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
// Get the location of the projection matrix uniform
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Compute the projection matrix
m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
}
static void sceneRender(void)
{
// Bind the shader program
shaderProgramUse(&program);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
GPU_TEVOPERANDS(0, 0, 0), // RGB
GPU_TEVOPERANDS(0, 0, 0), // Alpha
GPU_REPLACE, GPU_REPLACE, // RGB, Alpha
0xFFFFFFFF);
// Configure the "attribute buffers" (that is, the vertex input buffers)
GPU_SetAttributeBuffers(
1, // Number of inputs per vertex
(u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
GPU_ATTRIBFMT(0, 3, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector)
0xFFE, // Unused attribute mask, in our case bit 0 is cleared since it is used
0x0, // Attribute permutations (here it is the identity)
1, // Number of buffers
(u32[]) { 0x0 }, // Buffer offsets (placeholders)
(u64[]) { 0x0 }, // Attribute permutations for each buffer (identity again)
(u8[]) { 1 }); // Number of attributes for each buffer
// Upload the projection matrix
GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw the VBO
GPU_DrawArray(GPU_TRIANGLES, vertex_list_count);
}
static void sceneExit(void)
{
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
gpuInit();
// Initialize the scene
sceneInit();
gpuClearBuffers(CLEAR_COLOR);
// Main loop
while (aptMainLoop())
{
gspWaitForVBlank(); // Synchronize with the start of VBlank
gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
hidScanInput(); // Read the user input
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
gpuFrameBegin();
sceneRender();
gpuFrameEnd();
gpuClearBuffers(CLEAR_COLOR);
// Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
//gfxFlushBuffers();
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
gpuExit();
gfxExit();
return 0;
}

View File

@ -0,0 +1,34 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, -0.5)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = solid white color
mov outclr, ones
; We're finished
end
.end

View File

@ -0,0 +1,177 @@
#---------------------------------------------------------------------------------
.SUFFIXES:
#---------------------------------------------------------------------------------
ifeq ($(strip $(DEVKITARM)),)
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
endif
TOPDIR ?= $(CURDIR)
include $(DEVKITARM)/3ds_rules
#---------------------------------------------------------------------------------
# TARGET is the name of the output
# BUILD is the directory where object files & intermediate files will be placed
# SOURCES is a list of directories containing source code
# DATA is a list of directories containing data files
# INCLUDES is a list of directories containing header files
#
# NO_SMDH: if set to anything, no SMDH file is generated.
# APP_TITLE is the name of the app stored in the SMDH file (Optional)
# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
# ICON is the filename of the icon (.png), relative to the project folder.
# If not set, it attempts to use one of the following (in this order):
# - <Project name>.png
# - icon.png
# - <libctru folder>/default_icon.png
#---------------------------------------------------------------------------------
TARGET := $(notdir $(CURDIR))
BUILD := build
SOURCES := source
DATA := data
INCLUDES := include
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard
CFLAGS := -g -Wall -O2 -mword-relocations \
-fomit-frame-pointer -ffast-math \
$(ARCH)
CFLAGS += $(INCLUDE) -DARM11 -D_3DS
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lctru -lm
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing
# include and lib
#---------------------------------------------------------------------------------
LIBDIRS := $(CTRULIB)
#---------------------------------------------------------------------------------
# no real need to edit anything past this point unless you need to add additional
# rules for different file extensions
#---------------------------------------------------------------------------------
ifneq ($(BUILD),$(notdir $(CURDIR)))
#---------------------------------------------------------------------------------
export OUTPUT := $(CURDIR)/$(TARGET)
export TOPDIR := $(CURDIR)
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
ifeq ($(strip $(CPPFILES)),)
#---------------------------------------------------------------------------------
export LD := $(CC)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export LD := $(CXX)
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-I$(CURDIR)/$(BUILD)
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
ifeq ($(strip $(ICON)),)
icons := $(wildcard *.png)
ifneq (,$(findstring $(TARGET).png,$(icons)))
export APP_ICON := $(TOPDIR)/$(TARGET).png
else
ifneq (,$(findstring icon.png,$(icons)))
export APP_ICON := $(TOPDIR)/icon.png
endif
endif
else
export APP_ICON := $(TOPDIR)/$(ICON)
endif
ifeq ($(strip $(NO_SMDH)),)
export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
endif
.PHONY: $(BUILD) clean all
#---------------------------------------------------------------------------------
all: $(BUILD)
$(BUILD):
@[ -d $@ ] || mkdir -p $@
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
#---------------------------------------------------------------------------------
clean:
@echo clean ...
@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf
#---------------------------------------------------------------------------------
else
DEPENDS := $(OFILES:.o=.d)
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
ifeq ($(strip $(NO_SMDH)),)
$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh
else
$(OUTPUT).3dsx : $(OUTPUT).elf
endif
$(OUTPUT).elf : $(OFILES)
#---------------------------------------------------------------------------------
# you need a rule like this for each extension you use as binary data
#---------------------------------------------------------------------------------
%.bin.o : %.bin
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
# rule for assembling GPU shaders
#---------------------------------------------------------------------------------
%.shbin.o: %.pica
@echo $(notdir $<)
$(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
$(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
@picasso $(CURBIN) $< $(CURH)
@bin2s $(CURBIN) | $(AS) -o $@
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
@echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
-include $(DEPENDS)
#---------------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------------

View File

@ -0,0 +1,6 @@
# GPU example
This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
https://github.com/fincs/picasso/releases

Binary file not shown.

View File

@ -0,0 +1,172 @@
#include "3dmath.h"
void m4x4_identity(matrix_4x4* out)
{
m4x4_zeros(out);
out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
}
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
{
int i, j;
for (i = 0; i < 4; i ++)
for (j = 0; j < 4; j ++)
out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
}
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
{
matrix_4x4 tm, om;
m4x4_identity(&tm);
tm.r[0].w = x;
tm.r[1].w = y;
tm.r[2].w = z;
m4x4_multiply(&om, mtx, &tm);
m4x4_copy(mtx, &om);
}
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
{
int i;
for (i = 0; i < 4; i ++)
{
mtx->r[i].x *= x;
mtx->r[i].y *= y;
mtx->r[i].z *= z;
}
}
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = 1.0f;
rm.r[1].y = cosAngle;
rm.r[1].z = sinAngle;
rm.r[2].y = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].z = sinAngle;
rm.r[1].y = 1.0f;
rm.r[2].x = -sinAngle;
rm.r[2].z = cosAngle;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
{
matrix_4x4 rm, om;
float cosAngle = cosf(angle);
float sinAngle = sinf(angle);
m4x4_zeros(&rm);
rm.r[0].x = cosAngle;
rm.r[0].y = sinAngle;
rm.r[1].x = -sinAngle;
rm.r[1].y = cosAngle;
rm.r[2].z = 1.0f;
rm.r[3].w = 1.0f;
if (bRightSide) m4x4_multiply(&om, mtx, &rm);
else m4x4_multiply(&om, &rm, mtx);
m4x4_copy(mtx, &om);
}
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
{
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard orthogonal projection matrix
mp.r[0].x = 2.0f / (right - left);
mp.r[0].w = (left + right) / (left - right);
mp.r[1].y = 2.0f / (top - bottom);
mp.r[1].w = (bottom + top) / (bottom - top);
mp.r[2].z = 2.0f / (near - far);
mp.r[2].w = (far + near) / (far - near);
mp.r[3].w = 1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2, mp3;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(&mp3, &mp2, &mp);
// Fix the 3DS screens' orientation by swapping the X and Y axis
m4x4_identity(&mp2);
mp2.r[0].x = 0.0;
mp2.r[0].y = 1.0;
mp2.r[1].x = -1.0; // flipped
mp2.r[1].y = 0.0;
m4x4_multiply(mtx, &mp2, &mp3);
}
void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
{
// Notes:
// We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
// and so are these parameters -- in fact, they are actually the fovx and the inverse
// of the aspect ratio. Therefore the formula for the perspective projection matrix
// had to be modified to be expressed in these terms instead.
// Notes:
// fovx = 2 atan(tan(fovy/2)*w/h)
// fovy = 2 atan(tan(fovx/2)*h/w)
// invaspect = h/w
// a0,0 = h / (w*tan(fovy/2)) =
// = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
// = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
// = h / (w * tan(fovx/2)*h/w) =
// = 1 / tan(fovx/2)
// a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
float fovx_tan = tanf(fovx / 2);
matrix_4x4 mp;
m4x4_zeros(&mp);
// Build standard perspective projection matrix
mp.r[0].x = 1.0f / fovx_tan;
mp.r[1].y = 1.0f / (fovx_tan*invaspect);
mp.r[2].z = (near + far) / (near - far);
mp.r[2].w = (2 * near * far) / (near - far);
mp.r[3].z = -1.0f;
// Fix depth range to [-1, 0]
matrix_4x4 mp2;
m4x4_identity(&mp2);
mp2.r[2].z = 0.5;
mp2.r[2].w = -0.5;
m4x4_multiply(mtx, &mp2, &mp);
// Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
m4x4_rotate_z(mtx, M_PI / 2, true);
}

View File

@ -0,0 +1,56 @@
/*
* Bare-bones simplistic 3D math library
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <stdbool.h>
#include <math.h>
typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
typedef struct { vector_4f r[4]; } matrix_4x4;
static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
{
return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
}
static inline float v4f_mod4(const vector_4f* a)
{
return sqrtf(v4f_dp4(a,a));
}
static inline void v4f_norm4(vector_4f* vec)
{
float m = v4f_mod4(vec);
if (m == 0.0) return;
vec->x /= m;
vec->y /= m;
vec->z /= m;
vec->w /= m;
}
static inline void m4x4_zeros(matrix_4x4* out)
{
memset(out, 0, sizeof(*out));
}
static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
{
memcpy(out, in, sizeof(*out));
}
void m4x4_identity(matrix_4x4* out);
void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
// Special versions of the projection matrices that take the 3DS' screen orientation into account
void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);

View File

@ -0,0 +1,93 @@
#include "gpu.h"
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
static u32 *colorBuf, *depthBuf;
static u32 *cmdBuf;
void gpuInit(void)
{
colorBuf = vramAlloc(400*240*4);
depthBuf = vramAlloc(400*240*4);
cmdBuf = linearAlloc(0x40000*4);
GPU_Init(NULL);
GPU_Reset(NULL, cmdBuf, 0x40000);
}
void gpuExit(void)
{
linearFree(cmdBuf);
vramFree(depthBuf);
vramFree(colorBuf);
}
void gpuClearBuffers(u32 clearColor)
{
GX_SetMemoryFill(NULL,
colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
gspWaitForPSC0(); // Wait for the fill to complete
}
void gpuFrameBegin(void)
{
// Configure the viewport and the depth linear conversion function
GPU_SetViewport(
(u32*)osConvertVirtToPhys((u32)depthBuf),
(u32*)osConvertVirtToPhys((u32)colorBuf),
0, 0, 240, 400); // The top screen is physically 240x400 pixels
GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
// Configure some boilerplate
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
GPU_SetBlendingColor(0,0,0,0);
GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
// This is unknown
GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
GPUCMD_AddWrite(GPUREG_0118, 0);
// Configure alpha blending and test
GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
int i;
for (i = 0; i < 6; i ++)
GPU_SetDummyTexEnv(i);
}
void gpuFrameEnd(void)
{
// Finish rendering
GPU_FinishDrawing();
GPUCMD_Finalize();
GPUCMD_FlushAndRun(NULL);
gspWaitForP3D(); // Wait for the rendering to complete
// Transfer the GPU output to the framebuffer
GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
(u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
DISPLAY_TRANSFER_FLAGS);
gspWaitForPPF(); // Wait for the transfer to complete
// Reset the command buffer
GPUCMD_SetBufferOffset(0);
};
void GPU_SetDummyTexEnv(int id)
{
GPU_SetTexEnv(id,
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_TEVOPERANDS(0, 0, 0),
GPU_REPLACE,
GPU_REPLACE,
0xFFFFFFFF);
}

View File

@ -0,0 +1,26 @@
/*
* Bare-bones simplistic GPU wrapper
* This library is common to all libctru GPU examples
*/
#pragma once
#include <string.h>
#include <3ds.h>
#include "3dmath.h"
void gpuInit(void);
void gpuExit(void);
void gpuClearBuffers(u32 clearColor);
void gpuFrameBegin(void);
void gpuFrameEnd(void);
// Configures the specified fixed-function fragment shading substage to be a no-operation
void GPU_SetDummyTexEnv(int id);
// Uploads an uniform matrix
static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
{
GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
}

View File

@ -0,0 +1,244 @@
/*
* ~~ Simple libctru GPU textured cube example ~~
* This example demonstrates the basics of using the PICA200 in a 3DS homebrew
* application in order to render a basic scene consisting of a rotating
* textured cube which is also shaded using a simple shading algorithm.
* The shading algorithm is explained in the vertex shader source code.
*/
#include "gpu.h"
#include "vshader_shbin.h"
#include "kitten_bin.h"
#define CLEAR_COLOR 0x68B0D8FF
typedef struct { float position[3]; float texcoord[2]; float normal[3]; } vertex;
static const vertex vertex_list[] =
{
// First face (PZ)
// First triangle
{ {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
{ {+0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
// Second triangle
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
{ {-0.5f, +0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
{ {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
// Second face (MZ)
// First triangle
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
{ {-0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
{ {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
// Second triangle
{ {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
{ {+0.5f, -0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
// Third face (PX)
// First triangle
{ {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
{ {+0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
// Second triangle
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
{ {+0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
{ {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
// Fourth face (MX)
// First triangle
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
{ {-0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
{ {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
// Second triangle
{ {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
{ {-0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
// Fifth face (PY)
// First triangle
{ {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
{ {-0.5f, +0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
// Second triangle
{ {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
{ {+0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
{ {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
// Sixth face (MY)
// First triangle
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
{ {+0.5f, -0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
{ {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
// Second triangle
{ {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
{ {-0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
{ {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
};
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection, uLoc_modelView;
static int uLoc_lightVec, uLoc_lightHalfVec, uLoc_lightClr, uLoc_material;
static matrix_4x4 projection;
static matrix_4x4 material =
{
{
{ { 0.0f, 0.2f, 0.2f, 0.2f } }, // Ambient
{ { 0.0f, 0.4f, 0.4f, 0.4f } }, // Diffuse
{ { 0.0f, 0.8f, 0.8f, 0.8f } }, // Specular
{ { 1.0f, 0.0f, 0.0f, 0.0f } }, // Emission
}
};
static void* vbo_data;
static void* tex_data;
static float angleX = 0.0, angleY = 0.0;
static void sceneInit(void)
{
// Load the vertex shader and create a shader program
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
uLoc_modelView = shaderInstanceGetUniformLocation(program.vertexShader, "modelView");
uLoc_lightVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightVec");
uLoc_lightHalfVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightHalfVec");
uLoc_lightClr = shaderInstanceGetUniformLocation(program.vertexShader, "lightClr");
uLoc_material = shaderInstanceGetUniformLocation(program.vertexShader, "material");
// Compute the projection matrix
m4x4_persp_tilt(&projection, 80.0f*M_PI/180.0f, 400.0f/240.0f, 0.01f, 1000.0f);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
// Load the texture
tex_data = linearAlloc(kitten_bin_size);
memcpy(tex_data, kitten_bin, kitten_bin_size);
}
static void sceneRender(void)
{
// Bind the shader program
shaderProgramUse(&program);
// Configure the first fragment shading substage to blend the texture color with
// the vertex color (calculated by the vertex shader using a lighting algorithm)
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
GPU_SetTexEnv(0,
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
GPU_TEVOPERANDS(0, 0, 0), // RGB
GPU_TEVOPERANDS(0, 0, 0), // Alpha
GPU_MODULATE, GPU_MODULATE, // RGB, Alpha
0xFFFFFFFF);
// Configure the first texture unit
GPU_SetTextureEnable(GPU_TEXUNIT0);
GPU_SetTexture(
GPU_TEXUNIT0,
(u32*)osConvertVirtToPhys((u32)tex_data),
64, // Width
64, // Height
GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_REPEAT) | GPU_TEXTURE_WRAP_T(GPU_REPEAT), // Flags
GPU_RGBA8 // Pixel format
);
// Configure the "attribute buffers" (that is, the vertex input buffers)
GPU_SetAttributeBuffers(
3, // Number of inputs per vertex
(u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // Format of the inputs
GPU_ATTRIBFMT(1, 2, GPU_FLOAT) |
GPU_ATTRIBFMT(2, 3, GPU_FLOAT),
0xFFC, // Unused attribute mask, in our case bits 0~2 are cleared since they are used
0x210, // Attribute permutations (here it is the identity, passing each attribute in order)
1, // Number of buffers
(u32[]) { 0x0 }, // Buffer offsets (placeholders)
(u64[]) { 0x210 }, // Attribute permutations for each buffer (identity again)
(u8[]) { 3 }); // Number of attributes for each buffer
// Calculate the modelView matrix
matrix_4x4 modelView;
m4x4_identity(&modelView);
m4x4_translate(&modelView, 0.0, 0.0, -2.0 + 0.5*sinf(angleX));
m4x4_rotate_x(&modelView, angleX, true);
m4x4_rotate_y(&modelView, angleY, true);
// Rotate the cube each frame
angleX += M_PI / 180;
angleY += M_PI / 360;
// Upload the uniforms
GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection);
GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_material, &material);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightHalfVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1);
GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightClr, (u32*)(float[]){1.0f, 1.0f, 1.0f, 1.0f}, 1);
// Draw the VBO
GPU_DrawArray(GPU_TRIANGLES, vertex_list_count);
}
static void sceneExit(void)
{
// Free the texture
linearFree(tex_data);
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
gpuInit();
// Initialize the scene
sceneInit();
gpuClearBuffers(CLEAR_COLOR);
// Main loop
while (aptMainLoop())
{
gspWaitForVBlank(); // Synchronize with the start of VBlank
gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
hidScanInput(); // Read the user input
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
gpuFrameBegin();
sceneRender();
gpuFrameEnd();
gpuClearBuffers(CLEAR_COLOR);
// Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
//gfxFlushBuffers();
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
gpuExit();
gfxExit();
return 0;
}

View File

@ -0,0 +1,90 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4], modelView[4]
.fvec lightVec, lightHalfVec, lightClr, material[4]
.alias mat_amb material[0]
.alias mat_dif material[1]
.alias mat_spe material[2]
.alias mat_emi material[3]
; Constants
.constf myconst(0.0, 1.0, -1.0, -0.5)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outtc0 texcoord0
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias intex v1
.alias innrm v2
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; r1 = modelView * inpos
dp4 r1.x, modelView[0], r0
dp4 r1.y, modelView[1], r0
dp4 r1.z, modelView[2], r0
dp4 r1.w, modelView[3], r0
; outpos = projection * r1
dp4 outpos.x, projection[0], r1
dp4 outpos.y, projection[1], r1
dp4 outpos.z, projection[2], r1
dp4 outpos.w, projection[3], r1
; outtex = intex
mov outtc0, intex
; Transform the normal vector with the modelView matrix
; r1 = normalize(modelView * innrm)
mov r0.xyz, innrm
mov r0.w, zeros
dp4 r1.x, modelView[0], r0
dp4 r1.y, modelView[1], r0
dp4 r1.z, modelView[2], r0
mov r1.w, zeros
dp3 r2, r1, r1 ; r2 = x^2+y^2+z^2 for each component
rsq r2, r2 ; r2 = 1/sqrt(r2) ''
mul r1, r2, r1 ; r1 = r1*r2
; Calculate the diffuse level (r0.x) and the shininess level (r0.y)
; r0.x = max(0, -(lightVec * r1))
; r0.y = max(0, (-lightHalfVec[i]) * r1) ^ 2
dp3 r0.x, lightVec, r1
add r0.x, zeros, -r0
dp3 r0.y, -lightHalfVec, r1
max r0, zeros, r0
mul r0.y, r0, r0
; Accumulate the vertex color in r1, initializing it to the emission color
mov r1, mat_emi
; r1 += specularColor * lightClr * shininessLevel
mul r2, lightClr, r0.yyyy
mul r2, mat_spe, r2
add r1, r2, r1
; r1 += diffuseColor * lightClr * diffuseLevel
mul r2, lightClr, r0.xxxx
mul r2, mat_dif, r2
add r1, r2, r1
; r1 += ambientColor * lightClr
mov r2, lightClr
mul r2, mat_amb, r2
add r1, r2, r1
; outclr = clamp r1 to [0,1]
min outclr, ones, r1
; We're finished
end
.end