libctru/libctru/source/gpu/gpu.c

/*
  gpu.c _ Advanced GPU commands.
*/

#include <stdlib.h>
#include <string.h>
#include <3ds/types.h>
#include <3ds/gpu/gpu.h>
#include <3ds/gpu/gx.h>
#include <3ds/gpu/shbin.h>

u32* gpuCmdBuf;
u32 gpuCmdBufSize;
u32 gpuCmdBufOffset;

void GPUCMD_SetBuffer(u32* adr, u32 size, u32 offset)
{
	gpuCmdBuf=adr;
	gpuCmdBufSize=size;
	gpuCmdBufOffset=offset;
}

void GPUCMD_SetBufferOffset(u32 offset)
{
	gpuCmdBufOffset=offset;
}

void GPUCMD_GetBuffer(u32** adr, u32* size, u32* offset)
{
	if(adr)*adr=gpuCmdBuf;
	if(size)*size=gpuCmdBufSize;
	if(offset)*offset=gpuCmdBufOffset;
}

void GPUCMD_AddRawCommands(const u32* cmd, u32 size)
{
	if(!cmd || !size)return;

	memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
	gpuCmdBufOffset+=size;
}

void GPUCMD_Run(void)
{
	GX_ProcessCommandList(gpuCmdBuf, gpuCmdBufOffset*4, GX_CMDLIST_FLUSH);
}

extern u32 __ctru_linear_heap;
extern u32 __ctru_linear_heap_size;

void GPUCMD_FlushAndRun(void)
{
	//take advantage of GX_FlushCacheRegions to flush gsp heap
	GX_FlushCacheRegions(gpuCmdBuf, gpuCmdBufOffset*4, (u32 *) __ctru_linear_heap, __ctru_linear_heap_size, NULL, 0);
	GX_ProcessCommandList(gpuCmdBuf, gpuCmdBufOffset*4, 0x0);
}

void GPUCMD_Add(u32 header, const u32* param, u32 paramlength)
{
	u32 zero=0x0;

	if(!param || !paramlength)
	{
		paramlength=1;
		param=&zero;
	}

	if(!gpuCmdBuf || gpuCmdBufOffset+paramlength+1>gpuCmdBufSize)return;

	paramlength--;
	header|=(paramlength&0x7ff)<<20;

	gpuCmdBuf[gpuCmdBufOffset]=param[0];
	gpuCmdBuf[gpuCmdBufOffset+1]=header;

	if(paramlength)memcpy(&gpuCmdBuf[gpuCmdBufOffset+2], &param[1], paramlength*4);

	gpuCmdBufOffset+=paramlength+2;

	if(paramlength&1)gpuCmdBuf[gpuCmdBufOffset++]=0x00000000; //alignment
}

void GPUCMD_Finalize(void)
{
	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0x00000000);
	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 0x00000001);
	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 0x00000001);
	GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678);
	GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678); //not the cleanest way of guaranteeing 0x10-byte size but whatever good enough for now
}

static inline u32 floatrawbits(float f)
{
	union { float f; u32 i; } s;
	s.f = f;
	return s.i;
}

// f16 has:
//  - 1 sign bit
//  - 5 exponent bits
//  - 10 mantissa bits
u32 f32tof16(float f)
{
	u32 i = floatrawbits(f);

	u32 mantissa = (i << 9) >>  9;
	s32 exponent = (i << 1) >> 24;
	u32 sign     = (i << 0) >> 31;

	// Truncate mantissa
	mantissa >>= 13;

	// Re-bias exponent
	exponent = exponent - 127 + 15;
	if (exponent < 0)
	{
		// Underflow: flush to zero
		return sign << 15;
	}
	else if (exponent > 0x1F)
	{
		// Overflow: saturate to infinity
		return sign << 15 | 0x1F << 10;
	}

	return sign << 15 | exponent << 10 | mantissa;
}

// f20 has:
//  - 1 sign bit
//  - 7 exponent bits
//  - 12 mantissa bits
u32 f32tof20(float f)
{
	u32 i = floatrawbits(f);

	u32 mantissa = (i << 9) >>  9;
	s32 exponent = (i << 1) >> 24;
	u32 sign     = (i << 0) >> 31;

	// Truncate mantissa
	mantissa >>= 11;

	// Re-bias exponent
	exponent = exponent - 127 + 63;
	if (exponent < 0)
	{
		// Underflow: flush to zero
		return sign << 19;
	}
	else if (exponent > 0x7F)
	{
		// Overflow: saturate to infinity
		return sign << 19 | 0x7F << 12;
	}

	return sign << 19 | exponent << 12 | mantissa;
}

// f24 has:
//  - 1 sign bit
//  - 7 exponent bits
//  - 16 mantissa bits
u32 f32tof24(float f)
{
	u32 i = floatrawbits(f);

	u32 mantissa = (i << 9) >>  9;
	s32 exponent = (i << 1) >> 24;
	u32 sign     = (i << 0) >> 31;

	// Truncate mantissa
	mantissa >>= 7;

	// Re-bias exponent
	exponent = exponent - 127 + 63;
	if (exponent < 0)
	{
		// Underflow: flush to zero
		return sign << 23;
	}
	else if (exponent > 0x7F)
	{
		// Overflow: saturate to infinity
		return sign << 23 | 0x7F << 16;
	}

	return sign << 23 | exponent << 16 | mantissa;
}

// f31 has:
//  - 1 sign bit
//  - 7 exponent bits
//  - 23 mantissa bits
u32 f32tof31(float f)
{
	u32 i = floatrawbits(f);

	u32 mantissa = (i << 9) >>  9;
	s32 exponent = (i << 1) >> 24;
	u32 sign     = (i << 0) >> 31;

	// Re-bias exponent
	exponent = exponent - 127 + 63;
	if (exponent < 0)
	{
		// Underflow: flush to zero
		return sign << 30;
	}
	else if (exponent > 0x7F)
	{
		// Overflow: saturate to infinity
		return sign << 30 | 0x7F << 23;
	}

	return sign << 30 | exponent << 23 | mantissa;
}