diff --git a/examples/gpu/Makefile b/examples/gpu/Makefile index becbe2b..c0eae79 100644 --- a/examples/gpu/Makefile +++ b/examples/gpu/Makefile @@ -1,73 +1,139 @@ -CC = arm-none-eabi-gcc -LINK = arm-none-eabi-gcc -AS = arm-none-eabi-as -OBJCOPY = arm-none-eabi-objcopy -CTRULIB = ../libctru -AEMSTROPATH = ../../aemstro -CFLAGS += -Wall -std=c99 -march=armv6 -O3 -I"$(CTRULIB)/include" -I$(DEVKITPRO)/libnds/include -# LDFLAGS += --script=ccd00.ld -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(DEVKITARM)/lib/gcc/arm-none-eabi/4.7.1" -L"$(CTRULIB)/lib" -# LDFLAGS += --script=ccd00.ld -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(CTRULIB)/lib" -LDFLAGS += -nostartfiles --specs=ccd00.specs -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(CTRULIB)/lib" +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- -CFILES = $(wildcard source/*.c) -VSHFILES = $(wildcard source/*.vsh) -SHBINFILES = $(VSHFILES:source/%.vsh=data/%.shbin) -BINFILES = $(wildcard data/*.bin) -OFILES = $(BINFILES:data/%.bin=build/%.bin.o) -OFILES += $(SHBINFILES:data/%.shbin=build/%.shbin.o) -OFILES += $(CFILES:source/%.c=build/%.o) -DFILES = $(CFILES:source/%.c=build/%.d) -SFILES = $(wildcard source/*.s) -OFILES += $(SFILES:source/%.s=build/%.o) -PROJECTNAME = ${shell basename "$(CURDIR)"} +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +ifeq ($(strip $(CTRULIB)),) +# THIS IS TEMPORARY - in the future it should be at $(DEVKITPRO)/libctru +$(error "Please set CTRULIB in your environment. export CTRULIB=libctru") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules #--------------------------------------------------------------------------------- -# canned command sequence for binary data, taken from devkitARM +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# SPECS is the directory containing the important build and link files #--------------------------------------------------------------------------------- -define bin2o - bin2s $< | $(AS) -o $(@) - echo "extern const u8" `(echo $( source/`(echo $(> source/`(echo $(> source/`(echo $( build/$*.d - -build/%.o: source/%.s - $(CC) $(CFLAGS) -c $< -o $@ - @$(CC) -MM $< > build/$*.d - -build/%.shbin.o: data/%.shbin +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o : %.bin +#--------------------------------------------------------------------------------- @echo $(notdir $<) @$(bin2o) -build/%.bin.o: data/%.bin - @echo $(notdir $<) - @$(bin2o) +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/examples/gpu/source/crt0.s b/examples/gpu/source/crt0.s deleted file mode 100644 index 557c327..0000000 --- a/examples/gpu/source/crt0.s +++ /dev/null @@ -1,12 +0,0 @@ -.section ".init" -.arm -.align 4 -.global _init -.global _start - -_start: - blx __libc_init_array - blx main - -_init: - bx lr diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index 81655f2..b3329c3 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include <3ds/types.h> #include <3ds/srv.h> #include <3ds/APT.h> @@ -7,98 +11,281 @@ #include <3ds/HID.h> #include <3ds/SHDR.h> #include <3ds/svc.h> +#include <3ds/os.h> +#include <3ds/gfx.h> #include "costable.h" #include "test_shbin.h" +#include "test_png_bin.h" +#include "mdl.h" u8* gspHeap; u32* gxCmdBuf; -u8 currentBuffer; -u8* topLeftFramebuffers[2]; -u8* topLeftFramebuffersPA[2]; - Handle gspEvent, gspSharedMemHandle; -void gspGpuInit() +DVLB_s* shader; + +float* vertArray; +float* colorArray; +u16* indArray; +u32* texData; + +void loadIdentity44(float* m) { - gspInit(); + if(!m)return; - GSPGPU_AcquireRight(NULL, 0x0); - GSPGPU_SetLcdForceBlack(NULL, 0x0); - - //set subscreen to blue - u32 regData=0x01FF0000; - GSPGPU_WriteHWRegs(NULL, 0x202A04, ®Data, 4); - - //grab main left screen framebuffer addresses - GSPGPU_ReadHWRegs(NULL, 0x400468, (u32*)&topLeftFramebuffersPA, 8); - - //convert PA to VA (assuming FB in VRAM) - topLeftFramebuffers[0]=topLeftFramebuffersPA[0]+0x7000000; - topLeftFramebuffers[1]=topLeftFramebuffersPA[1]+0x7000000; - - //setup our gsp shared mem section - u8 threadID; - svcCreateEvent(&gspEvent, 0x0); - GSPGPU_RegisterInterruptRelayQueue(NULL, gspEvent, 0x1, &gspSharedMemHandle, &threadID); - svcMapMemoryBlock(gspSharedMemHandle, 0x10002000, 0x3, 0x10000000); - - //map GSP heap - svcControlMemory((u32*)&gspHeap, 0x0, 0x0, 0x2000000, 0x10003, 0x3); - - //wait until we can write stuff to it - svcWaitSynchronization1(gspEvent, 0x55bcb0); - - //GSP shared mem : 0x2779F000 - gxCmdBuf=(u32*)(0x10002000+0x800+threadID*0x200); - - currentBuffer=0; + memset(m, 0x00, 16*4); + m[0]=m[5]=m[10]=m[15]=1.0f; } -void gspGpuExit() +void multMatrix44(float* m1, float* m2, float* m) //4x4 { - GSPGPU_UnregisterInterruptRelayQueue(NULL); + int i, j; + for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); +} - //unmap GSP shared mem - svcUnmapMemoryBlock(gspSharedMemHandle, 0x10002000); - svcCloseHandle(gspSharedMemHandle); - svcCloseHandle(gspEvent); +void translateMatrix(float* tm, float x, float y, float z) +{ + float rm[16], m[16]; + + loadIdentity44(rm); + rm[3]=x; + rm[7]=y; + rm[11]=z; - gspExit(); - - //free GSP heap - svcControlMemory((u32*)&gspHeap, (u32)gspHeap, 0x0, 0x2000000, MEMOP_FREE, 0x0); + multMatrix44(rm,tm,m); + memcpy(tm,m,16*sizeof(float)); } -void swapBuffers() +void rotateMatrixX(float* tm, float x) { - u32 regData; - GSPGPU_ReadHWRegs(NULL, 0x400478, ®Data, 4); - regData^=1; - currentBuffer=regData&1; - GSPGPU_WriteHWRegs(NULL, 0x400478, ®Data, 4); + float rm[16], m[16]; + memset(rm, 0x00, 16*4); + rm[0]=1.0f; + rm[5]=cos(x); + rm[6]=sin(x); + rm[9]=-sin(x); + rm[10]=cos(x); + rm[15]=1.0f; + multMatrix44(tm,rm,m); + memcpy(tm,m,16*sizeof(float)); } +void rotateMatrixZ(float* tm, float x) +{ + float rm[16], m[16]; + memset(rm, 0x00, 16*4); + rm[0]=cos(x); + rm[1]=sin(x); + rm[4]=-sin(x); + rm[5]=cos(x); + rm[10]=1.0f; + rm[15]=1.0f; + multMatrix44(tm,rm,m); + memcpy(tm,m,16*sizeof(float)); +} + +void scaleMatrix(float* tm, float x, float y, float z) +{ + tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x; + tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y; + tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z; +} + +void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far) +{ + float top = near*tan(fovy/2); + float right = (top*aspect); + + *(m++) = near/right; + *(m++) = 0.0f; + *(m++) = 0.0f; + *(m++) = 0.0f; + + *(m++) = 0.0f; + *(m++) = near/top; + *(m++) = 0.0f; + *(m++) = 0.0f; + + *(m++) = 0.0f; + *(m++) = 0.0f; + // *(m++) = -(far+near)/(far-near); + *(m++) = 0.0f; + // *(m++) = -2.0f*(far*near)/(far-near); + // *(m++) = 1.0f; + *(m++) = -1.0f; + + *(m++) = 0.0f; + *(m++) = 0.0f; + *(m++) = -1.0f; + *(m++) = 0.0f; +} + +void setUniformMatrix(u32 startreg, float* m) +{ + float param[16]; + + param[0x0]=m[3]; //w + param[0x1]=m[2]; //z + param[0x2]=m[1]; //y + param[0x3]=m[0]; //x + + param[0x4]=m[7]; + param[0x5]=m[6]; + param[0x6]=m[5]; + param[0x7]=m[4]; + + param[0x8]=m[11]; + param[0x9]=m[10]; + param[0xa]=m[9]; + param[0xb]=m[8]; + + param[0xc]=m[15]; + param[0xd]=m[14]; + param[0xe]=m[13]; + param[0xf]=m[12]; + + GPU_SetUniform(startreg, (u32*)param, 4); +} + +float angle=0.0f; +float angleZ=0.0f; +float tx, ty, tz; + +u32* gpuOut=(u32*)0x1F119400; +u32* gpuDOut=(u32*)0x1F370800; + +// topscreen +void doFrame1() +{ + static u32 zero[0x400]; + memset(zero, 0x00, 0x400*4); + + //general setup + GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); + + GPU_DepthRange(-1.0f, 0.0f); + + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00); + GPU_SetDepthTest(true, GPU_GREATER, 0x1F); + + // ? + GPUCMD_AddSingleParam(0x00010062, 0x00000000); //param always 0x0 according to code + GPUCMD_AddSingleParam(0x000F0118, 0x00000000); + + //setup shader + SHDR_UseProgram(shader, 0); + + //attribute buffers + GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)vertArray), + GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT), + 0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3}); + + //? + GPUCMD_AddSingleParam(0x000F0100, 0x00E40100); + GPUCMD_AddSingleParam(0x000F0101, 0x01010000); + GPUCMD_AddSingleParam(0x000F0104, 0x00000010); + + //texturing stuff + GPUCMD_AddSingleParam(0x0002006F, 0x00000100); + GPUCMD_AddSingleParam(0x000F0080, 0x00011001); //enables/disables texturing + + //texenv + GPU_SetTexEnv(3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); + GPU_SetTexEnv(4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); + GPU_SetTexEnv(5, GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), + GPU_TEVOPERANDS(0,0,0), GPU_TEVOPERANDS(0,0,0), GPU_MODULATE, GPU_MODULATE, 0xFFFFFFFF); + + //texturing stuff + GPU_SetTexture((u32*)osConvertVirtToPhys((u32)texData),256,256,0x6,GPU_RGBA8); + + //setup matrices + float modelView[16]; + float projection[16]; + + loadIdentity44(modelView); + loadIdentity44(projection); + + translateMatrix(modelView, tx, ty, tz); + rotateMatrixX(modelView, angle); + rotateMatrixZ(modelView, angleZ); + + initProjectionMatrix(projection, 1.3962634f, 240.0f/400.0f, 0.01f, 10.0f); + + setUniformMatrix(0x20, modelView); + // setUniformMatrix(0x24, projection); + setUniformMatrix(0x80, projection); + + //draw first model + GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); + // GPU_DrawElements(GPU_TRIANGLES, (u32*)(((u32)((void*)indArray-(void*)gspHeap))+0x20000000-base), 6); + + //setup matrices + loadIdentity44(modelView); + loadIdentity44(projection); + + translateMatrix(modelView, tx, -ty, tz); + rotateMatrixX(modelView, -angle); + rotateMatrixZ(modelView, -angleZ); + + setUniformMatrix(0x20, modelView); + + //draw second + GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); + + //finalize stuff ? + GPUCMD_AddSingleParam(0x000F0111, 0x00000001); + GPUCMD_AddSingleParam(0x000F0110, 0x00000001); + GPUCMD_AddSingleParam(0x0008025E, 0x00000000); +} + +void doFrame3() +{ + GPUCMD_AddSingleParam(0x0008025E, 0x00000000); +} + +void doModel() +{ + memcpy(vertArray, mdlData, sizeof(mdlData)); +} + +extern u32* gpuCmdBuf; +extern u32 gpuCmdBufSize; +extern u32 gpuCmdBufOffset; + int main() { - srvInit(); - - aptInit(APPID_APPLICATION); - - gspGpuInit(); - + srvInit(); + aptInit(); + gfxInit(); hidInit(NULL); - aptSetupEventHandler(); - + GPU_Init(NULL); - u32* gpuCmd=(u32*)(&gspHeap[0x100000]); + u32* gpuCmd=(u32*)(&gspHeap[0x200000]); u32 gpuCmdSize=0x10000; GPU_Reset(gxCmdBuf, gpuCmd, gpuCmdSize); - - DVLB_s* shader=SHDR_ParseSHBIN((u32*)test_shbin,test_shbin_size); + + vertArray=(float*)&gpuCmd[gpuCmdSize]; + colorArray=(float*)&vertArray[0x300]; + indArray=(u16*)&colorArray[0x100]; + texData=(u32*)&indArray[0x10000]; + + memset(vertArray, 0x00, 0x500*4); + memcpy(texData, test_png_bin, test_png_bin_size); + + doModel(); + + tx=ty=0.0f; + tz=-0.1f; + shader=SHDR_ParseSHBIN((u32*)test_shbin,test_shbin_size); + + GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + + gfxSwapBuffersGpu(); APP_STATUS status; while((status=aptGetStatus())!=APP_EXITING) @@ -108,37 +295,47 @@ int main() u32 PAD=hidSharedMem[7]; u32 regData=PAD|0x01000000; + if(!PAD)regData=0x0; GSPGPU_WriteHWRegs(NULL, 0x202A04, ®Data, 4); + if(PAD&KEY_UP)tx+=0.1f; + if(PAD&KEY_DOWN)tx-=0.1f; + + if(PAD&KEY_LEFT)ty+=0.1f; + if(PAD&KEY_RIGHT)ty-=0.1f; + + if(PAD&KEY_R)tz+=0.1f; + if(PAD&KEY_L)tz-=0.1f; + + if(PAD&KEY_A)angle+=0.1f; + if(PAD&KEY_Y)angle-=0.1f; + + if(PAD&KEY_X)angleZ+=0.1f; + if(PAD&KEY_B)angleZ-=0.1f; + + GX_SetDisplayTransfer(gxCmdBuf, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); + GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + + svcSleepThread(1000000); //not sure how to do proper GPU (v)sync yet + + // GPUCMD_SetBuffer((u32*)gspHeap, gpuCmdSize, 0); + // GPUCMD_AddSingleParam(0x0008025E, 0x00000000); + // GPUCMD_Finalize(); + // GPUCMD_Run(gxCmdBuf); + GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); - - //depth buffer in VRAM, color buffer in FCRAM (gspHeap) - //(no real reasoning behind this configuration) - GPU_SetViewport((u32*)0x18000000,(u32*)0x20000000,0,0,240*2,400); - SHDR_UseProgram(shader, 0); - GPUCMD_AddSingleParam(0x0008025E, 0x00000000); - + doFrame1(); GPUCMD_Finalize(); GPUCMD_Run(gxCmdBuf); - GX_SetDisplayTransfer(gxCmdBuf, (u32*)gspHeap, GX_BUFFER_DIM(480,400), (u32*)topLeftFramebuffers[currentBuffer], GX_BUFFER_DIM(480,400), 0x01001000); - - swapBuffers(); + gfxSwapBuffersGpu(); } - else if(status == APP_SUSPENDING) - { - aptReturnToMenu(); - } - else if(status == APP_SLEEPMODE) - { - aptWaitStatusEvent(); - } - svcSleepThread(16666666); + svcSleepThread(16666666/2); } hidExit(); - gspGpuExit(); + gfxExit(); aptExit(); - svcExitProcess(); + srvExit(); return 0; } diff --git a/examples/gpu/source/test.vsh b/examples/gpu/source/test.vsh index 8501c47..3f9f5f8 100644 --- a/examples/gpu/source/test.vsh +++ b/examples/gpu/source/test.vsh @@ -1,19 +1,19 @@ ; make sure you update aemstro_as for this (27/05/14) - + ; setup constants .const 5, 0.0, 0.0, -0.99, 1.0 - + ; setup outmap .out o0, result.position .out o1, result.color .out o2, result.texcoord0 .out o3, result.texcoord1 .out o4, result.texcoord2 - + ; setup uniform map (not required) .uniform 0x10, 0x13, mdlvMtx .uniform 0x14, 0x17, projMtx - + ;code main: mov d1A, d00 (0x4) @@ -41,7 +41,7 @@ flush end endmain: - + ;operand descriptors .opdesc x___, xyzw, xyzw ; 0x0 .opdesc _y__, xyzw, xyzw ; 0x1