Merge branch 'master' into great-refactor

Conflicts: libctru/source/gpu/gpu.c
2015-09-05 22:14:34 +02:00 · 2015-09-05 22:14:34 +02:00 · 15af3eff30
commit 15af3eff30
parent 2707bfad93 57eb921f1a
3 changed files with 56 additions and 31 deletions
--- a/libctru/include/3ds/gpu/gpu.h
+++ b/libctru/include/3ds/gpu/gpu.h
@ -20,7 +20,7 @@ void GPUCMD_Add(u32 header, u32* param, u32 paramlength);
 void GPUCMD_Finalize(void);
 u32 f32tof24(float f);
-u32 computeInvValue(u32 val);
+u32 f32tof31(float f);
 #define GPUCMD_AddSingleParam(header, param) GPUCMD_Add((header), (u32[]){(u32)(param)}, 1)
--- a/libctru/source/gpu/gpu-old.c
+++ b/libctru/source/gpu/gpu-old.c
@ -58,9 +58,9 @@ void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u3
 	GPUCMD_AddWrite(GPUREG_011B, 0x00000000); //?
 	param[0x0]=f32tof24(fw/2);
-	param[0x1]=computeInvValue(fw);
+	param[0x1]=f32tof31(2.0f / fw) << 1;
 	param[0x2]=f32tof24(fh/2);
-	param[0x3]=computeInvValue(fh);
+	param[0x3]=f32tof31(2.0f / fh) << 1;
 	GPUCMD_AddIncrementalWrites(GPUREG_0041, param, 0x00000004);
 	GPUCMD_AddWrite(GPUREG_0068, (y<<16)|(x&0xFFFF));
--- a/libctru/source/gpu/gpu.c
+++ b/libctru/source/gpu/gpu.c
@ -90,38 +90,63 @@ void GPUCMD_Finalize(void)
 	GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678); //not the cleanest way of guaranteeing 0x10-byte size but whatever good enough for now
 }
-//TODO : fix
+// f24 has:
 //  - 1 sign bit
 //  - 7 exponent bits
 //  - 16 mantissa bits
 u32 f32tof24(float f)
 {
-	if(!f)return 0;
+	u32 i;
-	u32 v=*((u32*)&f);
+	memcpy(&i, &f, 4);
 	u8 s=v>>31;
 	u32 exp=((v>>23)&0xFF)-0x40;
 	u32 man=(v>>7)&0xFFFF;
-	if(exp>=0)return man|(exp<<16)|(s<<23);
+	u32 mantissa = (i << 9) >>  9;
-	else return s<<23;
+	s32 exponent = (i << 1) >> 24;
-}
+	u32 sign     = (i << 0) >> 31;
-u32 computeInvValue(u32 val)
+	// Truncate mantissa
-{
+	mantissa >>= 7;
-	//usual values
+
-	if(val==240)return 0x38111111;
+	// Re-bias exponent
-	if(val==480)return 0x37111111;
+	exponent = exponent - 127 + 63;
-	if(val==400)return 0x3747ae14;
+	if (exponent < 0)
 	//but let's not limit ourselves to the usual
 	float fval=2.0/val;
 	u32 tmp1,tmp2;
 	u32 tmp3=*((u32*)&fval);
 	tmp1=(tmp3<<9)>>9;
 	tmp2=tmp3&(~0x80000000);
 	if(tmp2)
 	{
-		tmp1=(tmp3<<9)>>9;
+		// Underflow: flush to zero
-		int tmp=((tmp3<<1)>>24)-0x40;
+		return sign << 23;
 		if(tmp<0)return ((tmp3>>31)<<30)<<1;
 		else tmp2=tmp;
 	}
-	tmp3>>=31;
+	else if (exponent > 0x7F)
-	return (tmp1|(tmp2<<23)|(tmp3<<30))<<1;
+	{
 		// Overflow: saturate to infinity
 		return sign << 23 | 0x7F << 16;
 	}
 	return sign << 23 | exponent << 16 | mantissa;
 }
 // f31 has:
 //  - 1 sign bit
 //  - 7 exponent bits
 //  - 23 mantissa bits
 u32 f32tof31(float f)
 {
 	u32 i;
 	memcpy(&i, &f, 4);
 	u32 mantissa = (i << 9) >>  9;
 	s32 exponent = (i << 1) >> 24;
 	u32 sign     = (i << 0) >> 31;
 	// Re-bias exponent
 	exponent = exponent - 127 + 63;
 	if (exponent < 0)
 	{
 		// Underflow: flush to zero
 		return sign << 30;
 	}
 	else if (exponent > 0x7F)
 	{
 		// Overflow: saturate to infinity
 		return sign << 30 | 0x7F << 23;
 	}
 	return sign << 30 | exponent << 23 | mantissa;
 }