diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index 0267188..b9a869e 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -20,7 +20,7 @@ void GPUCMD_Add(u32 header, u32* param, u32 paramlength); void GPUCMD_Finalize(void); u32 f32tof24(float f); -u32 computeInvValue(u32 val); +u32 f32tof31(float f); #define GPUCMD_AddSingleParam(header, param) GPUCMD_Add((header), (u32[]){(u32)(param)}, 1) diff --git a/libctru/source/gpu/gpu-old.c b/libctru/source/gpu/gpu-old.c index daee5f0..96cd54e 100644 --- a/libctru/source/gpu/gpu-old.c +++ b/libctru/source/gpu/gpu-old.c @@ -58,9 +58,9 @@ void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u3 GPUCMD_AddWrite(GPUREG_011B, 0x00000000); //? param[0x0]=f32tof24(fw/2); - param[0x1]=computeInvValue(fw); + param[0x1]=f32tof31(2.0f / fw) << 1; param[0x2]=f32tof24(fh/2); - param[0x3]=computeInvValue(fh); + param[0x3]=f32tof31(2.0f / fh) << 1; GPUCMD_AddIncrementalWrites(GPUREG_0041, param, 0x00000004); GPUCMD_AddWrite(GPUREG_0068, (y<<16)|(x&0xFFFF)); diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index ecf8410..401ea60 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -90,38 +90,63 @@ void GPUCMD_Finalize(void) GPUCMD_AddWrite(GPUREG_FINALIZE, 0x12345678); //not the cleanest way of guaranteeing 0x10-byte size but whatever good enough for now } -//TODO : fix +// f24 has: +// - 1 sign bit +// - 7 exponent bits +// - 16 mantissa bits u32 f32tof24(float f) { - if(!f)return 0; - u32 v=*((u32*)&f); - u8 s=v>>31; - u32 exp=((v>>23)&0xFF)-0x40; - u32 man=(v>>7)&0xFFFF; + u32 i; + memcpy(&i, &f, 4); - if(exp>=0)return man|(exp<<16)|(s<<23); - else return s<<23; -} + u32 mantissa = (i << 9) >> 9; + s32 exponent = (i << 1) >> 24; + u32 sign = (i << 0) >> 31; -u32 computeInvValue(u32 val) -{ - //usual values - if(val==240)return 0x38111111; - if(val==480)return 0x37111111; - if(val==400)return 0x3747ae14; - //but let's not limit ourselves to the usual - float fval=2.0/val; - u32 tmp1,tmp2; - u32 tmp3=*((u32*)&fval); - tmp1=(tmp3<<9)>>9; - tmp2=tmp3&(~0x80000000); - if(tmp2) + // Truncate mantissa + mantissa >>= 7; + + // Re-bias exponent + exponent = exponent - 127 + 63; + if (exponent < 0) { - tmp1=(tmp3<<9)>>9; - int tmp=((tmp3<<1)>>24)-0x40; - if(tmp<0)return ((tmp3>>31)<<30)<<1; - else tmp2=tmp; + // Underflow: flush to zero + return sign << 23; } - tmp3>>=31; - return (tmp1|(tmp2<<23)|(tmp3<<30))<<1; + else if (exponent > 0x7F) + { + // Overflow: saturate to infinity + return sign << 23 | 0x7F << 16; + } + + return sign << 23 | exponent << 16 | mantissa; +} + +// f31 has: +// - 1 sign bit +// - 7 exponent bits +// - 23 mantissa bits +u32 f32tof31(float f) +{ + u32 i; + memcpy(&i, &f, 4); + + u32 mantissa = (i << 9) >> 9; + s32 exponent = (i << 1) >> 24; + u32 sign = (i << 0) >> 31; + + // Re-bias exponent + exponent = exponent - 127 + 63; + if (exponent < 0) + { + // Underflow: flush to zero + return sign << 30; + } + else if (exponent > 0x7F) + { + // Overflow: saturate to infinity + return sign << 30 | 0x7F << 23; + } + + return sign << 30 | exponent << 23 | mantissa; }