added WIP GPU example code

This commit is contained in:
smea 2014-08-25 21:47:50 -07:00
parent b9d6ffe8f2
commit 4f397abd63
4 changed files with 410 additions and 159 deletions

View File

@ -1,73 +1,139 @@
CC = arm-none-eabi-gcc
LINK = arm-none-eabi-gcc
AS = arm-none-eabi-as
OBJCOPY = arm-none-eabi-objcopy
CTRULIB = ../libctru
AEMSTROPATH = ../../aemstro
CFLAGS += -Wall -std=c99 -march=armv6 -O3 -I"$(CTRULIB)/include" -I$(DEVKITPRO)/libnds/include
# LDFLAGS += --script=ccd00.ld -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(DEVKITARM)/lib/gcc/arm-none-eabi/4.7.1" -L"$(CTRULIB)/lib"
# LDFLAGS += --script=ccd00.ld -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(CTRULIB)/lib"
LDFLAGS += -nostartfiles --specs=ccd00.specs -L"$(DEVKITARM)/arm-none-eabi/lib" -L"$(CTRULIB)/lib"
#---------------------------------------------------------------------------------
.SUFFIXES:
#---------------------------------------------------------------------------------
CFILES = $(wildcard source/*.c)
VSHFILES = $(wildcard source/*.vsh)
SHBINFILES = $(VSHFILES:source/%.vsh=data/%.shbin)
BINFILES = $(wildcard data/*.bin)
OFILES = $(BINFILES:data/%.bin=build/%.bin.o)
OFILES += $(SHBINFILES:data/%.shbin=build/%.shbin.o)
OFILES += $(CFILES:source/%.c=build/%.o)
DFILES = $(CFILES:source/%.c=build/%.d)
SFILES = $(wildcard source/*.s)
OFILES += $(SFILES:source/%.s=build/%.o)
PROJECTNAME = ${shell basename "$(CURDIR)"}
ifeq ($(strip $(DEVKITARM)),)
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
endif
ifeq ($(strip $(CTRULIB)),)
# THIS IS TEMPORARY - in the future it should be at $(DEVKITPRO)/libctru
$(error "Please set CTRULIB in your environment. export CTRULIB=<path to>libctru")
endif
TOPDIR ?= $(CURDIR)
include $(DEVKITARM)/3ds_rules
#---------------------------------------------------------------------------------
# canned command sequence for binary data, taken from devkitARM
# TARGET is the name of the output
# BUILD is the directory where object files & intermediate files will be placed
# SOURCES is a list of directories containing source code
# DATA is a list of directories containing data files
# INCLUDES is a list of directories containing header files
# SPECS is the directory containing the important build and link files
#---------------------------------------------------------------------------------
define bin2o
bin2s $< | $(AS) -o $(@)
echo "extern const u8" `(echo $(<F) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > source/`(echo $(<F) | tr . _)`.h
echo "extern const u8" `(echo $(<F) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> source/`(echo $(<F) | tr . _)`.h
echo "extern const u32" `(echo $(<F) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> source/`(echo $(<F) | tr . _)`.h
endef
export TARGET := $(shell basename $(CURDIR))
BUILD := build
SOURCES := source
DATA := data
INCLUDES := include
.PHONY:=all dir
all: dir $(PROJECTNAME).bin
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -march=armv6k -mtune=mpcore
dir:
@mkdir -p build
@mkdir -p data
CFLAGS := -g -Wall -O2 -mword-relocations -save-temps \
-fomit-frame-pointer -ffast-math \
$(ARCH)
$(PROJECTNAME).bin: $(PROJECTNAME).elf
$(OBJCOPY) -O binary $< $@
CFLAGS += $(INCLUDE) -DARM11 -D_3DS
$(PROJECTNAME).elf: $(SHBINFILES) $(OFILES)
# $(LINK) $(LDFLAGS) -o $(PROJECTNAME).elf $(filter-out build/crt0.o, $(OFILES)) -lctru -lc -lgcc
$(LINK) $(LDFLAGS) -o $(PROJECTNAME).elf $(filter-out build/crt0.o, $(OFILES)) -g -lctru -lm
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=3dsx.specs -g $(ARCH) \
-Wl,-Map,$(TARGET).map
LIBS := -lctru -lm
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing
# include and lib
#---------------------------------------------------------------------------------
LIBDIRS := $(CTRULIB)
#---------------------------------------------------------------------------------
# no real need to edit anything past this point unless you need to add additional
# rules for different file extensions
#---------------------------------------------------------------------------------
ifneq ($(BUILD),$(notdir $(CURDIR)))
#---------------------------------------------------------------------------------
export OUTPUT := $(CURDIR)/$(TARGET)
export TOPDIR := $(CURDIR)
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
ifeq ($(strip $(CPPFILES)),)
#---------------------------------------------------------------------------------
export LD := $(CC)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export LD := $(CXX)
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
export OFILES := $(addsuffix .o,$(BINFILES)) \
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-I$(CURDIR)/$(BUILD)
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
.PHONY: $(BUILD) clean all
#---------------------------------------------------------------------------------
all: $(BUILD)
$(BUILD):
@[ -d $@ ] || mkdir -p $@
@make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
#---------------------------------------------------------------------------------
clean:
@rm -f build/*.o build/*.d
@rm -f $(PROJECTNAME).elf $(PROJECTNAME).bin
@echo "all cleaned up !"
@echo clean ...
@rm -fr $(BUILD) $(TARGET).3dsx $(TARGET).elf
#---------------------------------------------------------------------------------
else
DEPENDS := $(OFILES:.o=.d)
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
$(OUTPUT).3dsx : $(OUTPUT).elf
$(OUTPUT).elf : $(OFILES)
-include $(DFILES)
data/%.shbin: source/%.vsh
@python $(AEMSTROPATH)/aemstro_as.py $< $@
build/%.o: source/%.c
$(CC) $(CFLAGS) -c $< -o $@
@$(CC) -MM $< > build/$*.d
build/%.o: source/%.s
$(CC) $(CFLAGS) -c $< -o $@
@$(CC) -MM $< > build/$*.d
build/%.shbin.o: data/%.shbin
#---------------------------------------------------------------------------------
# you need a rule like this for each extension you use as binary data
#---------------------------------------------------------------------------------
%.bin.o : %.bin
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
build/%.bin.o: data/%.bin
@echo $(notdir $<)
@$(bin2o)
-include $(DEPENDS)
#---------------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------------

View File

@ -1,12 +0,0 @@
.section ".init"
.arm
.align 4
.global _init
.global _start
_start:
blx __libc_init_array
blx main
_init:
bx lr

View File

@ -1,3 +1,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <3ds/types.h>
#include <3ds/srv.h>
#include <3ds/APT.h>
@ -7,98 +11,281 @@
#include <3ds/HID.h>
#include <3ds/SHDR.h>
#include <3ds/svc.h>
#include <3ds/os.h>
#include <3ds/gfx.h>
#include "costable.h"
#include "test_shbin.h"
#include "test_png_bin.h"
#include "mdl.h"
u8* gspHeap;
u32* gxCmdBuf;
u8 currentBuffer;
u8* topLeftFramebuffers[2];
u8* topLeftFramebuffersPA[2];
Handle gspEvent, gspSharedMemHandle;
void gspGpuInit()
DVLB_s* shader;
float* vertArray;
float* colorArray;
u16* indArray;
u32* texData;
void loadIdentity44(float* m)
{
gspInit();
if(!m)return;
GSPGPU_AcquireRight(NULL, 0x0);
GSPGPU_SetLcdForceBlack(NULL, 0x0);
//set subscreen to blue
u32 regData=0x01FF0000;
GSPGPU_WriteHWRegs(NULL, 0x202A04, &regData, 4);
//grab main left screen framebuffer addresses
GSPGPU_ReadHWRegs(NULL, 0x400468, (u32*)&topLeftFramebuffersPA, 8);
//convert PA to VA (assuming FB in VRAM)
topLeftFramebuffers[0]=topLeftFramebuffersPA[0]+0x7000000;
topLeftFramebuffers[1]=topLeftFramebuffersPA[1]+0x7000000;
//setup our gsp shared mem section
u8 threadID;
svcCreateEvent(&gspEvent, 0x0);
GSPGPU_RegisterInterruptRelayQueue(NULL, gspEvent, 0x1, &gspSharedMemHandle, &threadID);
svcMapMemoryBlock(gspSharedMemHandle, 0x10002000, 0x3, 0x10000000);
//map GSP heap
svcControlMemory((u32*)&gspHeap, 0x0, 0x0, 0x2000000, 0x10003, 0x3);
//wait until we can write stuff to it
svcWaitSynchronization1(gspEvent, 0x55bcb0);
//GSP shared mem : 0x2779F000
gxCmdBuf=(u32*)(0x10002000+0x800+threadID*0x200);
currentBuffer=0;
memset(m, 0x00, 16*4);
m[0]=m[5]=m[10]=m[15]=1.0f;
}
void gspGpuExit()
void multMatrix44(float* m1, float* m2, float* m) //4x4
{
GSPGPU_UnregisterInterruptRelayQueue(NULL);
int i, j;
for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
}
//unmap GSP shared mem
svcUnmapMemoryBlock(gspSharedMemHandle, 0x10002000);
svcCloseHandle(gspSharedMemHandle);
svcCloseHandle(gspEvent);
void translateMatrix(float* tm, float x, float y, float z)
{
float rm[16], m[16];
loadIdentity44(rm);
rm[3]=x;
rm[7]=y;
rm[11]=z;
gspExit();
//free GSP heap
svcControlMemory((u32*)&gspHeap, (u32)gspHeap, 0x0, 0x2000000, MEMOP_FREE, 0x0);
multMatrix44(rm,tm,m);
memcpy(tm,m,16*sizeof(float));
}
void swapBuffers()
void rotateMatrixX(float* tm, float x)
{
u32 regData;
GSPGPU_ReadHWRegs(NULL, 0x400478, &regData, 4);
regData^=1;
currentBuffer=regData&1;
GSPGPU_WriteHWRegs(NULL, 0x400478, &regData, 4);
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=1.0f;
rm[5]=cos(x);
rm[6]=sin(x);
rm[9]=-sin(x);
rm[10]=cos(x);
rm[15]=1.0f;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
void rotateMatrixZ(float* tm, float x)
{
float rm[16], m[16];
memset(rm, 0x00, 16*4);
rm[0]=cos(x);
rm[1]=sin(x);
rm[4]=-sin(x);
rm[5]=cos(x);
rm[10]=1.0f;
rm[15]=1.0f;
multMatrix44(tm,rm,m);
memcpy(tm,m,16*sizeof(float));
}
void scaleMatrix(float* tm, float x, float y, float z)
{
tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
}
void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
{
float top = near*tan(fovy/2);
float right = (top*aspect);
*(m++) = near/right;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = near/top;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = 0.0f;
// *(m++) = -(far+near)/(far-near);
*(m++) = 0.0f;
// *(m++) = -2.0f*(far*near)/(far-near);
// *(m++) = 1.0f;
*(m++) = -1.0f;
*(m++) = 0.0f;
*(m++) = 0.0f;
*(m++) = -1.0f;
*(m++) = 0.0f;
}
void setUniformMatrix(u32 startreg, float* m)
{
float param[16];
param[0x0]=m[3]; //w
param[0x1]=m[2]; //z
param[0x2]=m[1]; //y
param[0x3]=m[0]; //x
param[0x4]=m[7];
param[0x5]=m[6];
param[0x6]=m[5];
param[0x7]=m[4];
param[0x8]=m[11];
param[0x9]=m[10];
param[0xa]=m[9];
param[0xb]=m[8];
param[0xc]=m[15];
param[0xd]=m[14];
param[0xe]=m[13];
param[0xf]=m[12];
GPU_SetUniform(startreg, (u32*)param, 4);
}
float angle=0.0f;
float angleZ=0.0f;
float tx, ty, tz;
u32* gpuOut=(u32*)0x1F119400;
u32* gpuDOut=(u32*)0x1F370800;
// topscreen
void doFrame1()
{
static u32 zero[0x400];
memset(zero, 0x00, 0x400*4);
//general setup
GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
GPU_DepthRange(-1.0f, 0.0f);
GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
GPU_SetStencilTest(false, GPU_ALWAYS, 0x00);
GPU_SetDepthTest(true, GPU_GREATER, 0x1F);
// ?
GPUCMD_AddSingleParam(0x00010062, 0x00000000); //param always 0x0 according to code
GPUCMD_AddSingleParam(0x000F0118, 0x00000000);
//setup shader
SHDR_UseProgram(shader, 0);
//attribute buffers
GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)vertArray),
GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT),
0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3});
//?
GPUCMD_AddSingleParam(0x000F0100, 0x00E40100);
GPUCMD_AddSingleParam(0x000F0101, 0x01010000);
GPUCMD_AddSingleParam(0x000F0104, 0x00000010);
//texturing stuff
GPUCMD_AddSingleParam(0x0002006F, 0x00000100);
GPUCMD_AddSingleParam(0x000F0080, 0x00011001); //enables/disables texturing
//texenv
GPU_SetTexEnv(3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000);
GPU_SetTexEnv(4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000);
GPU_SetTexEnv(5, GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
GPU_TEVOPERANDS(0,0,0), GPU_TEVOPERANDS(0,0,0), GPU_MODULATE, GPU_MODULATE, 0xFFFFFFFF);
//texturing stuff
GPU_SetTexture((u32*)osConvertVirtToPhys((u32)texData),256,256,0x6,GPU_RGBA8);
//setup matrices
float modelView[16];
float projection[16];
loadIdentity44(modelView);
loadIdentity44(projection);
translateMatrix(modelView, tx, ty, tz);
rotateMatrixX(modelView, angle);
rotateMatrixZ(modelView, angleZ);
initProjectionMatrix(projection, 1.3962634f, 240.0f/400.0f, 0.01f, 10.0f);
setUniformMatrix(0x20, modelView);
// setUniformMatrix(0x24, projection);
setUniformMatrix(0x80, projection);
//draw first model
GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3);
// GPU_DrawElements(GPU_TRIANGLES, (u32*)(((u32)((void*)indArray-(void*)gspHeap))+0x20000000-base), 6);
//setup matrices
loadIdentity44(modelView);
loadIdentity44(projection);
translateMatrix(modelView, tx, -ty, tz);
rotateMatrixX(modelView, -angle);
rotateMatrixZ(modelView, -angleZ);
setUniformMatrix(0x20, modelView);
//draw second
GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3);
//finalize stuff ?
GPUCMD_AddSingleParam(0x000F0111, 0x00000001);
GPUCMD_AddSingleParam(0x000F0110, 0x00000001);
GPUCMD_AddSingleParam(0x0008025E, 0x00000000);
}
void doFrame3()
{
GPUCMD_AddSingleParam(0x0008025E, 0x00000000);
}
void doModel()
{
memcpy(vertArray, mdlData, sizeof(mdlData));
}
extern u32* gpuCmdBuf;
extern u32 gpuCmdBufSize;
extern u32 gpuCmdBufOffset;
int main()
{
srvInit();
aptInit(APPID_APPLICATION);
gspGpuInit();
srvInit();
aptInit();
gfxInit();
hidInit(NULL);
aptSetupEventHandler();
GPU_Init(NULL);
u32* gpuCmd=(u32*)(&gspHeap[0x100000]);
u32* gpuCmd=(u32*)(&gspHeap[0x200000]);
u32 gpuCmdSize=0x10000;
GPU_Reset(gxCmdBuf, gpuCmd, gpuCmdSize);
DVLB_s* shader=SHDR_ParseSHBIN((u32*)test_shbin,test_shbin_size);
vertArray=(float*)&gpuCmd[gpuCmdSize];
colorArray=(float*)&vertArray[0x300];
indArray=(u16*)&colorArray[0x100];
texData=(u32*)&indArray[0x10000];
memset(vertArray, 0x00, 0x500*4);
memcpy(texData, test_png_bin, test_png_bin_size);
doModel();
tx=ty=0.0f;
tz=-0.1f;
shader=SHDR_ParseSHBIN((u32*)test_shbin,test_shbin_size);
GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
gfxSwapBuffersGpu();
APP_STATUS status;
while((status=aptGetStatus())!=APP_EXITING)
@ -108,37 +295,47 @@ int main()
u32 PAD=hidSharedMem[7];
u32 regData=PAD|0x01000000;
if(!PAD)regData=0x0;
GSPGPU_WriteHWRegs(NULL, 0x202A04, &regData, 4);
if(PAD&KEY_UP)tx+=0.1f;
if(PAD&KEY_DOWN)tx-=0.1f;
if(PAD&KEY_LEFT)ty+=0.1f;
if(PAD&KEY_RIGHT)ty-=0.1f;
if(PAD&KEY_R)tz+=0.1f;
if(PAD&KEY_L)tz-=0.1f;
if(PAD&KEY_A)angle+=0.1f;
if(PAD&KEY_Y)angle-=0.1f;
if(PAD&KEY_X)angleZ+=0.1f;
if(PAD&KEY_B)angleZ-=0.1f;
GX_SetDisplayTransfer(gxCmdBuf, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000);
GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
svcSleepThread(1000000); //not sure how to do proper GPU (v)sync yet
// GPUCMD_SetBuffer((u32*)gspHeap, gpuCmdSize, 0);
// GPUCMD_AddSingleParam(0x0008025E, 0x00000000);
// GPUCMD_Finalize();
// GPUCMD_Run(gxCmdBuf);
GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
//depth buffer in VRAM, color buffer in FCRAM (gspHeap)
//(no real reasoning behind this configuration)
GPU_SetViewport((u32*)0x18000000,(u32*)0x20000000,0,0,240*2,400);
SHDR_UseProgram(shader, 0);
GPUCMD_AddSingleParam(0x0008025E, 0x00000000);
doFrame1();
GPUCMD_Finalize();
GPUCMD_Run(gxCmdBuf);
GX_SetDisplayTransfer(gxCmdBuf, (u32*)gspHeap, GX_BUFFER_DIM(480,400), (u32*)topLeftFramebuffers[currentBuffer], GX_BUFFER_DIM(480,400), 0x01001000);
swapBuffers();
gfxSwapBuffersGpu();
}
else if(status == APP_SUSPENDING)
{
aptReturnToMenu();
}
else if(status == APP_SLEEPMODE)
{
aptWaitStatusEvent();
}
svcSleepThread(16666666);
svcSleepThread(16666666/2);
}
hidExit();
gspGpuExit();
gfxExit();
aptExit();
svcExitProcess();
srvExit();
return 0;
}

View File

@ -1,19 +1,19 @@
; make sure you update aemstro_as for this (27/05/14)
; setup constants
.const 5, 0.0, 0.0, -0.99, 1.0
; setup outmap
.out o0, result.position
.out o1, result.color
.out o2, result.texcoord0
.out o3, result.texcoord1
.out o4, result.texcoord2
; setup uniform map (not required)
.uniform 0x10, 0x13, mdlvMtx
.uniform 0x14, 0x17, projMtx
;code
main:
mov d1A, d00 (0x4)
@ -41,7 +41,7 @@
flush
end
endmain:
;operand descriptors
.opdesc x___, xyzw, xyzw ; 0x0
.opdesc _y__, xyzw, xyzw ; 0x1