From 4109bf5b66d1d29ad816f327be49c0b82ec28f30 Mon Sep 17 00:00:00 2001 From: Lectem Date: Mon, 20 Jul 2015 00:20:35 +0200 Subject: [PATCH 1/8] Fixed wrap_s and wrap_t Those were swapped. --- libctru/include/3ds/gpu/gpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libctru/include/3ds/gpu/gpu.h b/libctru/include/3ds/gpu/gpu.h index dc99f54..8370721 100644 --- a/libctru/include/3ds/gpu/gpu.h +++ b/libctru/include/3ds/gpu/gpu.h @@ -30,8 +30,8 @@ void GPUCMD_Finalize(); //tex param #define GPU_TEXTURE_MAG_FILTER(v) (((v)&0x1)<<1) //takes a GPU_TEXTURE_FILTER_PARAM #define GPU_TEXTURE_MIN_FILTER(v) (((v)&0x1)<<2) //takes a GPU_TEXTURE_FILTER_PARAM -#define GPU_TEXTURE_WRAP_S(v) (((v)&0x3)<<8) //takes a GPU_TEXTURE_WRAP_PARAM -#define GPU_TEXTURE_WRAP_T(v) (((v)&0x3)<<12) //takes a GPU_TEXTURE_WRAP_PARAM +#define GPU_TEXTURE_WRAP_S(v) (((v)&0x3)<<12) //takes a GPU_TEXTURE_WRAP_PARAM +#define GPU_TEXTURE_WRAP_T(v) (((v)&0x3)<<8) //takes a GPU_TEXTURE_WRAP_PARAM typedef enum { From bf7d686e8876c00e74e9c2f3745b3314c5eacbb1 Mon Sep 17 00:00:00 2001 From: fincs Date: Wed, 22 Jul 2015 19:41:59 +0200 Subject: [PATCH 2/8] Add new GPU examples --- examples/gpu/README.md | 11 - examples/gpu/data/test.vsh | 57 --- examples/gpu/data/texture.bin | Bin 65536 -> 0 bytes examples/gpu/source/_gs.s | 16 - examples/gpu/source/gs.c | 432 ------------------ examples/gpu/source/gs.h | 59 --- examples/gpu/source/main.c | 354 -------------- examples/gpu/source/math.c | 148 ------ examples/gpu/source/math.h | 144 ------ .../{gpu => graphics/gpu/geoshader}/Makefile | 20 +- examples/graphics/gpu/geoshader/README.md | 6 + .../graphics/gpu/geoshader/source/3dmath.c | 172 +++++++ .../graphics/gpu/geoshader/source/3dmath.h | 56 +++ examples/graphics/gpu/geoshader/source/gpu.c | 93 ++++ examples/graphics/gpu/geoshader/source/gpu.h | 26 ++ .../gpu/geoshader/source/gshader.pica | 91 ++++ examples/graphics/gpu/geoshader/source/main.c | 139 ++++++ .../gpu/geoshader/source/vshader.pica | 24 + examples/graphics/gpu/simple_tri/Makefile | 177 +++++++ examples/graphics/gpu/simple_tri/README.md | 6 + .../graphics/gpu/simple_tri/source/3dmath.c | 172 +++++++ .../graphics/gpu/simple_tri/source/3dmath.h | 56 +++ examples/graphics/gpu/simple_tri/source/gpu.c | 93 ++++ examples/graphics/gpu/simple_tri/source/gpu.h | 26 ++ .../graphics/gpu/simple_tri/source/main.c | 131 ++++++ .../gpu/simple_tri/source/vshader.pica | 34 ++ examples/graphics/gpu/textured_cube/Makefile | 177 +++++++ examples/graphics/gpu/textured_cube/README.md | 6 + .../gpu/textured_cube/data/kitten.bin | Bin 0 -> 16384 bytes .../gpu/textured_cube/source/3dmath.c | 172 +++++++ .../gpu/textured_cube/source/3dmath.h | 56 +++ .../graphics/gpu/textured_cube/source/gpu.c | 93 ++++ .../graphics/gpu/textured_cube/source/gpu.h | 26 ++ .../graphics/gpu/textured_cube/source/main.c | 244 ++++++++++ .../gpu/textured_cube/source/vshader.pica | 90 ++++ 35 files changed, 2177 insertions(+), 1230 deletions(-) delete mode 100644 examples/gpu/README.md delete mode 100644 examples/gpu/data/test.vsh delete mode 100644 examples/gpu/data/texture.bin delete mode 100644 examples/gpu/source/_gs.s delete mode 100644 examples/gpu/source/gs.c delete mode 100644 examples/gpu/source/gs.h delete mode 100644 examples/gpu/source/main.c delete mode 100644 examples/gpu/source/math.c delete mode 100644 examples/gpu/source/math.h rename examples/{gpu => graphics/gpu/geoshader}/Makefile (89%) create mode 100644 examples/graphics/gpu/geoshader/README.md create mode 100644 examples/graphics/gpu/geoshader/source/3dmath.c create mode 100644 examples/graphics/gpu/geoshader/source/3dmath.h create mode 100644 examples/graphics/gpu/geoshader/source/gpu.c create mode 100644 examples/graphics/gpu/geoshader/source/gpu.h create mode 100644 examples/graphics/gpu/geoshader/source/gshader.pica create mode 100644 examples/graphics/gpu/geoshader/source/main.c create mode 100644 examples/graphics/gpu/geoshader/source/vshader.pica create mode 100644 examples/graphics/gpu/simple_tri/Makefile create mode 100644 examples/graphics/gpu/simple_tri/README.md create mode 100644 examples/graphics/gpu/simple_tri/source/3dmath.c create mode 100644 examples/graphics/gpu/simple_tri/source/3dmath.h create mode 100644 examples/graphics/gpu/simple_tri/source/gpu.c create mode 100644 examples/graphics/gpu/simple_tri/source/gpu.h create mode 100644 examples/graphics/gpu/simple_tri/source/main.c create mode 100644 examples/graphics/gpu/simple_tri/source/vshader.pica create mode 100644 examples/graphics/gpu/textured_cube/Makefile create mode 100644 examples/graphics/gpu/textured_cube/README.md create mode 100644 examples/graphics/gpu/textured_cube/data/kitten.bin create mode 100644 examples/graphics/gpu/textured_cube/source/3dmath.c create mode 100644 examples/graphics/gpu/textured_cube/source/3dmath.h create mode 100644 examples/graphics/gpu/textured_cube/source/gpu.c create mode 100644 examples/graphics/gpu/textured_cube/source/gpu.h create mode 100644 examples/graphics/gpu/textured_cube/source/main.c create mode 100644 examples/graphics/gpu/textured_cube/source/vshader.pica diff --git a/examples/gpu/README.md b/examples/gpu/README.md deleted file mode 100644 index 2a118cc..0000000 --- a/examples/gpu/README.md +++ /dev/null @@ -1,11 +0,0 @@ -gpu -======= - -example of how to use the GPU with libctru - -before trying to compile, make sure to download aemstro -( https://github.com/smealum/aemstro reflog: 51bfeef9e1a0149726dca43b50919bd45917015a ) -and update AEMSTRO environment variable with the proper path - -You'll also need to install Python 3 and have that in your path. - diff --git a/examples/gpu/data/test.vsh b/examples/gpu/data/test.vsh deleted file mode 100644 index a142641..0000000 --- a/examples/gpu/data/test.vsh +++ /dev/null @@ -1,57 +0,0 @@ -; setup constants - .const c20, 1.0, 0.0, 0.5, 1.0 - -; setup outmap - .out o0, result.position, 0xF - .out o1, result.color, 0xF - .out o2, result.texcoord0, 0x3 - .out o3, result.texcoord1, 0x3 - .out o4, result.texcoord2, 0x3 - -; setup uniform map (not required) - .uniform c0, c3, projection - .uniform c4, c7, modelview - .uniform c8, c8, lightDirection - .uniform c9, c9, lightAmbient - - .vsh vmain, end_vmain - -;code - vmain: - mov r1, v0 (0x4) - mov r1, c20 (0x3) - ; temp = modvMtx * in.pos - dp4 r0, c4, r1 (0x0) - dp4 r0, c5, r1 (0x1) - dp4 r0, c6, r1 (0x2) - mov r0, c20 (0x3) - ; result.pos = projMtx * temp - dp4 o0, c0, r0 (0x0) - dp4 o0, c1, r0 (0x1) - dp4 o0, c2, r0 (0x2) - dp4 o0, c3, r0 (0x3) - ; result.texcoord = in.texcoord - mov o2, v1 (0x5) - mov o3, c20 (0x7) - mov o4, c20 (0x7) - ; result.color = crappy lighting - dp3 r0, c8, v2 (0x4) - max r0, c20, r0 (0x9) - mul r0, c9, r0 (0x4) - add o1, c9, r0 (0x4) - mov o1, c20 (0x3) - nop - end - end_vmain: - -;operand descriptors - .opdesc x___, xyzw, xyzw ; 0x0 - .opdesc _y__, xyzw, xyzw ; 0x1 - .opdesc __z_, xyzw, xyzw ; 0x2 - .opdesc ___w, xyzw, xyzw ; 0x3 - .opdesc xyz_, xyzw, xyzw ; 0x4 - .opdesc xyzw, xyzw, xyzw ; 0x5 - .opdesc x_zw, xyzw, xyzw ; 0x6 - .opdesc xyzw, yyyw, xyzw ; 0x7 - .opdesc xyz_, wwww, wwww ; 0x8 - .opdesc xyz_, yyyy, xyzw ; 0x9 diff --git a/examples/gpu/data/texture.bin b/examples/gpu/data/texture.bin deleted file mode 100644 index 4a3312bd8d77116a79b265fd9a6b02514845fa08..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65536 zcmeIb3D{;ubuRjSf43mwHK7}5rreW9NltF=lQ<BM6Eb$Lka|-jisAqarFZ z5+;#Blm=v!K}A6nh@#9$Gj~IGGbm~RnT!h94Ky>>ecyMjw|0H?e{Cm+oJXG=ny08; zRcm1BM$-DP~oeP#5iuaxl>50e=D9B} zqrZMj8NGPhGFtbUW%LY`ZQfZ%+rFoamV^FM(A`zUKfVGwuL4%uhA|VQigem9;*&SBD?=!}g~WH*Kd6Xggze32NbozZp}u;gjCpzCFH;F^Av9 zpR&}ZKha*y_@P{hW8zjk(Pk_ew@cw4jE#N@%vbT4W67hK;+QgcOghS_?WFY>$Ek<> zS$e#RzveS&VL!2tU<0uVuk3*g_=52izvc_Z;j>A3AAsF)t9CKg4ZY?NeMs8svuro~ zmOmSR*yQ~z;-9fs{Ad&LhYhRe2T!wpV!i~g=Y!T0W0(7tY-)~eqvI!=WCLV7-n1Wm zZfh>JeVi-yWgZW596!)-&7)}k6SoTbwEoei?Ti`s0__KEn}47m{J}oZYX3(JHD7{1 z_5;d9o7h{MaqM|&oVLw&w(Yw2PxVL0x13sYUlIL^@^tOjwm<6Fx!PpZ& z=&|-`bL^M1*FWdG;~ndt?eL}c6xyHjRhVc${lRlp6IaekDHjx}EAKV)(oJh7gl zEt*D-edl_J7kFW}+KxTrNBr9FVKZYWdr-r6&WC{?m^gnX{6Tuy(##jPV?BU}b}Ig~ z7y7iHp}vys!3G`Ej`T~lm3^ekBHy+1pW`_<`kFige%hb&zwZCEpZIBSlz#u}_dChc z`2c!xK4|Tg%{hO50k8R@q23xh}O|SMOzv#i^{jinUU+)Bd zt_R{r3^IO>m)kM_ajl^B6TZ{@$vLF84}W^T#C{2xIe*xeOyX+hzxM*i*?#D2*}yfy z`Ow;Ie^Ty*&Gbq1dA_h-Q~ZU$x!$4uv-Ur&7huu+i8M9YHsKCk7HG@f714L?#c%C+XuWRqTkdW{MP!fKUVznT7}mvlk)+xnt16N zu3pb!3^qL-*E{Pf{#-LFbR2Ey&-p+4F~%7GMeyH`L*|Ib+*1&LVa^z9KcEdw%z4b* zrd}f|_H2XBw&%Rj^^cF~8?F({c6~YW0`egTdOEPLdkbig$IZqM{%*dgW+#9V2uJCOq+=TQ#CT#7jmb0y|XuX%SFz4q^v3$69VMP>A&S0M*_ zW`h?#fDQi#@UKUI$R6M#c3$%mE6#s0A8?%We{}Qx<+Ph_E2Fy}Dl;EDp=Q%NTy}$w z$3J_pjE}wmc0OLlA3dPV?De&>@6ub!Xy-%AoE;8PuA()&2)MZx=nuv%YvpRp@7}*> zQ4Zv}K!0&vFMq6zcRRd{uZw!L=TT+TgHJB37aVkQ886Fz(H&MsdwjlZg#OVdk5(Od z_4&xd;giohzeZl2`Sr6i|9(!^%)3X|uB!RD@GgEJa3c;g7r+&yj1k9iLdKta!jmY_->-jH7-p~C0T*Tc&E-4=>=^rx>Z zn}Bvc`eoZ6{bd=y2Kn|&USHNn--11lEbH&G5aqD4>47Jfwcha|@E%b%{P-bdqkWDk z^D(yRNA?5%5oLYIB<;Li7fSv!_d2F*y4{{-{FQ5PzvBF|;G+kY4R$)X%-`$SvhfZF zihcv|%-{9!GLL$2ykM8ll})zWtE_YIsb#|*4k(+wbH}p&UdNZU&;Nc|Z|{>}_k(3- z1@56-%X>RZ;S03iw`&<=E%_Xwdnw#I-t)0CUIM@FdjjVFPa~e1r}x9R@G0l2d`mw! zg?*aCrirmxd-I8i`_hNY=&bLT(GtYOv9c`oVXqo3zCYrZa$8;69~s+;vB|b+%-6PK zZ-uSA=FIEPR#to5^6Oahda%2tZI4OYwslOr9qyLKx?FqAwfpONLH;I|JP*nr>3`o} zXnaID-f!xjlX%?jafi-~t!XKfC2j1_Ny6)K#x;Fr9kcBjd?k$5)BCX6n{z95SibS7 z_Ic7cvfa{5($Rx^$>1F_qdmZ$b(t=9EwTCd;4V7^*Qf9M+Qj&)_4Djzu6r+)h-eOK|;oG$Upw&an0 z*>9f{CuPx2T_;ca!@3nG>ZT0Iq%UUE(*9xG{&alp|Bzu@xF-Zp$get6AKRYCUIT5P zc`ILHjDE_FxP=XkEZI(-oE!833VlGmj7yeQu8&1i{a~AeM>LS9xsdS>dgEX{_=NPc z#=S@8TIltX-hG3tLB8T-`xp})W1M!%ufUq)@(b~B4m2?h`$Lc9QD>vqeia?@(qG0) zI{7o_Tf-;Zxz2QNsT-U0ZTM=^-mp`B;O)m8H_0KUM!x#g?(Cv3nli!D_T#wvqUo1U z8S5_BnAIkKgNHh!op`){vp;lkUZtJ#abj)doNN~UXQhPdmQlzAJ`8Quyfj-a2lqZEgAV^YxFE@v+E(mLf+w>xPLrDvaDi zm*JY4Sdjyb{%#lLJ>$21sEq!fts@6|!OM|rzopFI^N2F~t2f27#}~Y!%-ij-vI%mb z4UuPTu*VU|TafSkH{{WOx}|cYO%6P%jQ-@6;CTabrO%f+ulw(11LP&^?+RMxOFZ7= z$g(MNrS*3{40+TMWxUnfkpmrFX7)X?Z2F;nA@eZMBfmm^G#@z-b2a8bx<`J>HD$98 z?pZcO4#Zq&edIu--2k$Khq`!-d{0E zA_s!~f3!uJIpbP9TYX)bdGAihiQqTJ3HKs+zF7MmA1kkr*@9_ zW783i)-r=%u)Km zGTDaAUBCCQW!D@3wcK+j?lYoqHyrPdayQy6_uqeidEoy0%F6p9CV>ZX$dCCyx^zX^ z_}cr*W=jL(j@RE+c20S>Jp4$x<&h`KefQm8R;~mF(xYs4?aHzViurlG$@A3BS7VEU*p1yBS<7xY5;cw#4Inl?fZ};);<@NM^yT{wNTddQLd-}dT zji>E1ey-omu;%OhXZ)tw-_7UBFZ$%ivka|=f5ZIg@%HT&>$KyZzHd+CY5RaOFMTlSztz~^_^-#?w_B{!j(hsPJ&mXB6JEv(_SXIT-x0s+KhJ+&Q}KL$%@aB= z$Nu>X?f(xx^iX-vSr_4&{E4#ctgn^#f9G zaxI@V&;M@wnnks)Q-|vhU&HG90+^>=7wMYD*LcoNeC?NYdyU)6_{VSK`49c^&!?9U zoP%S_TKc&B_vn2q%2JFG^P?+Qln;Iz@2@U<9DQFe|M;DOe$tM-PSQONyfeypq`Z)i z@w^^qtjufOq9>29-FS`VWA-;Z;%&yni|aAVR6EBlBkF;M?RHFU>+pRW%1pYRw(9EQ zH^}YeGfs*XYCK;X-GCg3?W>UkAs3p1^Y@s~Qjr6-_@xJQZ9X&y?=~<88e#n?KSeGy zVh(gA`e=89ne%Ak8MkvH;bt6w)4pR4WnWWH`eU#O7+IvP^Lpc`ax?$;xbGA6;qpBn$qwU74kZ0`p=vho+@ns0tx*Bm_VY1wY+w_^^D z-1?eto>6q1`J40YM^-@Ix6TmH!z=DB|AYBE=$D)g`m-;cDu0^JU)9&*dH5IilpRc(xNGZu=Jt6p)A$QtIupws?}XPj7D#M0rEzbz-9&2sIu{o?Cv zk(|6wrTbcvVOv92*vq*{c^pH`NXJ4PT%+ped?rm`Z*_<7$U|O^H!{RKn@@C-m1{5U z(Yp!k&!TxJe^7p44}8Xvea0nwL!ar|IWQN$(Pv)GJX`rSWrCNqtljV0c7|Ok7kGqA zcp9vUul139AzOZh{3-h9BtGZWuKV+h`(5|rPquBju6udzcioRa)&9-BHv7ddW?w%s zhkf+XN6RCRJW{@X`qyS%pFG)kK7?mX-~Ptw<-rFZESFz!-Yog-zx2FwG|rkBAA9UE zl_ghRE^Ng2k-vL)*=4(bD9f(Avb_K8Z!YhA^BcwkKeP9V!4KXM`$)@zy+dUw zuB)=m{hPM35c@3rAN;_8SNQiu`!(fxTfHXjY3-lF51F*}x!{rgu=)AqM_D)J3GY6z z7kE|p`>dudTZx%_KF<{me%Z6niDd@<8krxwlIb>VrHnXNkw5(5_Ui2471&v1uki!( z3vm2`R{=k4p$+CYecvYk3({ZYdBK*i2LG$YOaAe`h@I>m;y2US&seqi9nW}{gzprQ z7VnN$|FMl?f6!m@S&jdsydJOHyQlquciW2h`CAb`+MWHoZ~uX^xgUW+h?6o z9>ctP{#M(T=WYGk;Qt?=Dlh!M{-AvGv{TER_wHQ&@BjK=%NX;O@iJb<((z|s8*jxd z{h@O;&)bN<)=S2FwBMJ@3wHb`oNwaUwH?{?=M?ic5B@q zrtMYSi|7yDE7*6Ry=B{z-Jg2?Gfw%kaX0*L5$?{jR@p;_>B~cqjVnC!j{oq>J#CA4G+4x!G8&v=59}a6`%~VK#}Vgb z%tS-{lxd&v`)hcvKCaLA+xP!*ZoAIosGa{KFOT~G7;j`*W|f~~ZNG9zVqo4!45@dV zI3zdodp-Zh`v=0u`O4p`EB_z#JHK|_=GXk4z56GhzvJDId_LdiZN^sf*u)rd1lEj| z+O!SpEbc3uTkk{ozJhD(HIC4W_fi`DArJB$6Y7P0w|Q)xLktmj`VkoF`X!&{!Z3eA zPOcl#$8jq&+Y@oc@2c!#QteO8WQT`vb)>rC|@whMG`vlZ)9W8D2rwuJ` z;iB#+loq|ba83Ok}>-!F_omac=&lm1@-H$)nw&l9+<+=ZkJMNfzebm!G z)&BKf7p+<~4%>do;9S?p`|P(IpqZ}E8BuRdbo{=oesiuNAZCSKCCV_n-m zAdSx>;#WKNo#5v&Wu{!76G+qPsk*$!(uQ{b29I$?9X4|B(|Lxrq#s<1ZkjKsyOo_Z zq_>WWhggZdqo1Xt8MlmHzwB=C)b~jfyY)*K^-jT6abQgM(OH|e!(bd9H)hLKzj;wp zwsjkOYFk5So3*ad+r4t`u`Gnx5q1%Mpi3N^P}|@b=A3Hy*Zai z2l+3iUe(Z(a*d<*3*W89dG+PrJacM(UB~|y<=<64t{Iq<_4{5O|JHPTJ^fSdU+?+P zd9~|49`1MDk3ZSA<+|?Wx!<+&YUI~vBEMd@&;PN`&gA`py^hwn^MCCd%Ky0*r|h>{ zheLm?!Ca&5ItSgECo`}AeAY)r{tvyr|8VBXCn*09x*X>o0R6FdKrZ(y%Jo|3xZBvX zD;@ir{Yd9WwFh3=MmpMHIoxk_-0o$u-wt|Gr~EM-x8uSAIf18N89uuWi)Uf7*D`Z4F-t|PzZ^LN|_;CJ|tm+QGdYrTKpJb&l+ z`5N8EraEycKLw5ZtlxO7SKphc@>4(F1K_yy@;w2)AK?2Fd_REC|Ks_3`+UFszMlJC z*Sx>x-|4d^K9lDsUYBrw=6s=D)wY$hp8wapUhe_${sZ1G!t;OE?Kr>Hpty3qq)hw8 z>oWSGUwRuqdTyX!*G?Sgdpy1d;hg6f`_X5=D-N0qwkzp!{+hPEri>T|#!26xk2rE} zu?~G9Q+^V@PKPjsEtY3o_H(tPk#8Gh*M$9p{1^+_ft&U>c=A~K5*sl~Hfrb9b8sJE zG0xfU*Brq3)A08fZoOhrnSA(0`Wx zYA1s9b&3RXS$-Dm)iu@%hE{eVeo_S0}yO z;=k(3^46EWw7hexSCrqzcTm=Q>j%p_wtPi-C(3$n{l_v7WqjzFW&DwY%lNx^KKPZ3 z%6P}mpnSHBPx~&)q6vEPkeBkv%l2r;LqU&n+9gqNtiEjbc>DGQUf9sLd-}dz>0v8v z?%Tqf_E!98&wJ|!gkRV{n)pwJ4IO^kYJ2Cs_5C`w|7O|$)kU&>)X3}c_U#G0u%Wgy zo;|(uYt9=!U+?MpJnh;mE-hDGeO0;Q)RO}*Y#>(IkK=b`n=xdp+IWTij6dVKXyAWK z@W1<&e_h`E@!iV$Z~n)!)`va?UeGfBr!EqYWPM=o}MWc*A?4ZTUcMY-)PloVY9@QROR_G0RI_CK%{Uw^zLmSO6pP*kh@tkxipX*Mx z^-b-I?=1^&`9)cHJATgun$O*EXF2qSyUJrfy}dlP^5^A|haWDh@LSsZ@Gi=L2Uu*k zb*Jp0v7VM^z}DXjwp`W&e;hj&vg0Cri~8bw%EH?o3cP9qzuFI#haY-a#oy29+oOvx zCs7vO_Mmu1uwCujArE~$ec!GzlV4kV;yB_I*DsF4U{A*0yt)oy-XHn>OHkVnAqO&% zcmLTU+#k4NRe9m%_*QbY7x;mB(w;Tr5Bl^Md^Qi)`=|Zr)^frvc*ptPpN9R2xBd6v z%Dc-7JlAL8-gSBA-IuK>p+A@uUB*R}I%<=M{vxt=q>XaBl;;r;;gaMY1^ zV;wvH&hyT2-H$zv{5qF#zvI5?IRvtdPs$fV7@zyIHS-+v+h z4q%`EpIb)R@B09QPeNCXKVs%NRIiuX_z@rHU-~uXNAk4uhdi`_wh+5)i(Cwzs$1Zu!*7W7j$N)TX}dw;b8E z4A)#YqA0eY;W0{Fi_`Ah8_8eyE9#*Q@OMj3KV@68<+tFot;W*E(RhVVdO~*Hms7WK zhi=>IIhpPBMQdx-X&V~9w0N~d7X8)@$iDD(|^Y`=b!mNGIX)jqaG zKgg|kb54ZqTyv0-CHzF+un%MWottfHXe_JJVN9`Vbeg8()!1bG?B_v$%q{N+t-WCn z^mO*i4%iz!#GUaXCib^=&igutX(Mr|KjL&gNTNwJ&oowjYB`%2GZq+Tq(= zLEpFk?bqVd^!0tt3*Fzh-M_lJr|;XV-Mt8kNXmIFyKy9A$DT^41AMHk;Fg1Ms?z^t9+>cs1(l3(T$j}(u8jrf3 z|5Jwb7^~Z=Eth_>PWPp5*+4tk4_)T3{Vm3%3k>9e?UYSk)~0psevaw={v+ad_|Cum z{e6DlFY|B4b;Q39$Nc=@%)e1nhvn;-vaKg;_nTJNMT!l4r~3*=pyqe}fCF>5%CGu4 z&c6f5aeN;je;*Lf|9|!K^|&u^Zv9Sw&AaRK|G*p9QR#2suKwfvVLPwUXoG0eZr^us z?fVPXGra#FIQwz>YS7)pJLLzC>}MbKiwAwE6>GQ6!?wmUHrbAA@s!1Jm*lf-$Fs`t zctbB)>__S8gfG`eov)gc4KDJr5oB~6}mTRZgQFG(Q2egs$T$8>n?5BM0=f)t~P9JH&7kPE~ zE9PEcu`eSp51&L|V9|L(dK!Oat}C0W&d@7fonsOgc}e2}o7h&qEI)9JSO%IHM>~CF zddVR_?V-(<&34kZec?a)ZIDHou@1tYRWJLAv6EB#+HunKC3)+80N%rBF>+jZfqR-P zjmJ!I_Ohn^Q~T2et)ai zN?!0E02#6W&5NH%lIVxt@H2U*))p2VK>p{{*L#?n)4ko3J)-iPsHEDI|1Jy z`#$(lbgl-*@l9x>=)C50qR*3*r}HJx5j>Ch{K@kPkDJ_&*CUlCXh;5=YNX5SBk6@6 z@z--t%AU|~9dYiB>jmqNy6WS#j?UL9NADUZA91E!UkilpY1aTD#~25DZ9gzI{tX{M zC+*1Vt=gA<(=`F@wQa6>PG*1gY2c(yrj6?m(hPJOGoSR*?->Wn=D3d=`zLf|e~W?R zv{@x}w8!GlejnSX@>Qp^(o|g`+x^l_d5j%-Jx5HRdA0NKiRb&w@A=+6|IVTE^gjRR z`~Q=3(t3@xZ7b%Szd4V@E4f(Hs3(43uxGpn;QJ4Qe%>D#-cRszeb z-?gSs>>JwRc(Z1l=sU%$jW_$E-+90FGw+}M`-9>8099|^e~9%343d{RCgy+WaqQ&R zj9vAy;v2l8lTB=UzN3~6X^-tq+bo-Pdz zisT25mRE8~!{@S+k@P7qu;=`?EbFK;>F0)zKId4<(Ef*ZuXlpS^yJC4RQYrM#QqXK zZ+%+rC9cF#?P5NZdrXWC*n~&xj5 z7Q9;b;ICsmPMMnLup@i{jMmFqItQDyCm4tKAJ&%L)D^GfS@$5n?#uL{Ws;w9upDDi zKX7#U_XK9j$h_RO^KaMv^|149*Zue;uFzled)!|zPTMNI>`xq)>$;ck{?*~@>7Q!< z?|naj`Pl{MenR+1ox=44#m!SWc zi=sXjzYn_>?{0COqUM@8lH>S$;89S3x$~u^!P!eiqU;{No?sJLQzgeI<+bPwqw9)6uUzG33TRFzkcT4n5*0X4)m* zfsTD4AAOYPy)yT;0c-4gtt=heeqv{7>lHHP+a`isTIkC#6eswFKrWM z*YwFC!#)i^3^HmggeT_+?eLmoTU7QQ)17VP!I5$tw6HnTry2YEvluKC=$Mk1A z*77&@ST6ZyGOj(QikUdfoAN}9W9#jCWtVAL``C6nw$`2F(n)!2Q@TR07o4{hmU7{{Zg+;68x! zZ`VV9efE_1@6XHo0;}rx_v9nTiT+@G<-?rUjX$g3JQqN}dHwzXuIWC1ZoTh79Q-=~ zt9$-maWZ}`i9a<-PtNZ0(Ys`(1Yuv^bc+yXt8x#7%w*#hVJ8#zB*V0=* zZL%+oE!$z+fJJl-ZN){ovenqK?&Jjx_cqH!4PNKPEe@|I$4Rz>FKA+ogs;+fK3}A4 z$>km@n^Pb8vzWKG$ww^7FFkFWe6**LCt9{Gv(}s=IVbEd%N0!%@8p@ZJ$0GKeoWqo zdqd-SB>K(<>Q4;JtNC2N)(tkrrnx_G)Wuhn1Si? zW0618Kl0V$hsx34!E+_NXZemJa2~7nMSPUEoU$zXaqj)WvTOA>Gxyo~6XnQ557oP+ ze7|(zA&01}_505&>;LhK%IKhP;5>O$nfJ$kt}@yk&p-D#c2<7X=VLwIzTMOJ?Vi4G z6KjjNZ_j&f;@$ID;jQ?AeYD4Mv+Onh;c5RQ{jp{J33{nDj1uRiiiUqPN7`M2gY@@&rC z=)bre{gqRdr%&YPnD@qO{QUc`H(YydIRbO}b9?VuX2t)xFDk$PvMs~@osTRhei_%0 z$p8Og?}P9>7vBWIcQUBg_2}>SE~Dd-_kZNTGXDBf^=ps9?|!@|dCwA`_mSKa)JJ(- zha7A61Dz{HkNu4`uLV@+UQa%aM=cpC%g5R)DVqiUh<3C0fnPW&C$IA$-N3R#_!`@?6UgNz>w}_siW0zA6DGrW6W#|>+t)aGyJ6URD)A-upg+C$1QJSlAP2t@tm`>x8gSb8t2q6 z-R&`b7V~j}ctzb8>!7V72Uun{$+YY+}JNkEdU% zF42Zv_IX=N7HOIi@@eaUQ~pmK<~PrT4C-%m>KcL9AlYv@HHWj0TfRoG_&C>6x7wsl z8n@+liP_13*gvVlRE>1)UlR?7?j z+mH5P zp`#4P9++DGz-!#$hbn{iBxd)iZu$am`rP)Cj~IxTy0eyl60^oRMmtm|uH+3K@`ula zAH3r8Gr}u=G4M7zE!*>IjPDoo@6gWNgmWRzd!v6kw#IX{SoY5x+|~_k7$RzEW57{9ci z--2|!`29NCRNMGAHvJ`j@<<;12U&yv9vJAs+w%AJ_U+!@zTMOJ?Vi4Gj}b50OT4s! z_(yvj5pDWIcsu-f=Xfo|b95DQ=#!9h^L=N&qui|9VO{6E_FDB?WE-W&`{o}#P+qz0 zUd`o|*IzwrzViOVHF)1!_aT-n);i$Y{co4zH#`j!v0~+dp!nC+fN*B(|#V;I_1@IjCd-|PMR98X{AvYuRlL=oU8W=_}t$(tUvIbU%yKj_)J}bk;WK+!P(`o7Qjrs~hWMLHa67c6yCBHuLuivVU01;;~P% zo~66@8|Y?C%ekW8K@9qh*|o4)hkd}ciay!lG408~$ZH?T4Vn5|5~O8Z*`Mb-<5XYD ztv-;v5;M+Dh5!Gd=gu-`Dd! z{E$nX^d;z;=l>jEmd{GB%l82K=ikeg#QOoO8y5!JNmUA|a?~kb)qD1Q zKSgUM@1Y&_CG;PCVL3|g2V9KzTJU_HYmI-;0(1UX$zPNHWs8?UFZ5gfF(IG&sfV?2 zaXj(qy#THwzMsI_=QLshy$bs{*7oJtv_9!;=q0z~b)WVS%R=4C^KsDnoI#qFE^t|1 z#cb@B-RQ6Tocpxbg#4i4-o*BTvm>4&|6gLT?D(H9sd?Hv5fbM&O%I3FmVU!Bj0qlw#KPuLc?hdJl- zbjvef?qVgIfv@pb*wy%wJ~SOMX3PUeqgQ@_EcwO!9Cxg2yL}z99&rco4djgGquj3Y%>~H6p=4eV`vKXn`ve$|yn1$i-{;?vhdcisp1((2Vs6BoiMZAE=or=-ag)uA zAD{p8x&QEeLF;wx`xCCC-}eXdTr1fbyT+ewyu*(9h_UVOZd`1Y=fpf@5)AEuGqsK(WDI7%WHMfC61Im;iG1pJdq<)A9!Ys zah^bz?vI2_`?amfJE4O-#M;)at5?mp^XldKp1<=MKVQEQ@BQ;xPd!@aH2EFZ{1KY=t(c|sgvLD|s6aF@Sv>&CX@7sL8oib+GpZ+uc z4QVe*#1H-ecHwutXm4XbV>g3#jW_9A`{h5_Z+_CFsJ-V2vVEHU%At|J-}naJ)4v$M`$yi*{Y>?W z8ZX+;IZJ!q_>vcujsI%v^2~Q^Kj6=J3qQV@M*GJHeWg6-Js&Eg6Yy^24*0J5aTi41 z&%Gr7HuCrbdzJBpca)jS?#1;E-e*`6f48$a!{<8fcD$*Xh9z*F}*jJ163SKkyK+25T% zCU_F3=sMXI5Ao+%^aVcaRBd})r(DxVJt?Q^kA!jJfrDZd!+J?(0h5;^eVyPn<1Z#cQ5v{;_SWla4ZG;YmFc z@*7)&cXFOtj_TCi^0(veZ|#g@$Jc%w=C}EMJw^;VZgI8#Y3z%0aa?27dr7vDHTRX( z$_YCw=9DX+`MA+FVH5iypZTxm7w0i^9~O*_mg9Ld-p!i%9&(mj@wbAJ|1g(%+FH+= zpcjvABQN(B&@u;F8@bY)wdW%j!ZihQs;9l@ z$NOXYz8;^$EC0{;@0WG?x$YAnKeztOGd*|JPdxMqaoogQb#lFk<~qv9`GCLg-yJyk zeShe!_aSuO;mG_xK%4&uo%3(!_sp;T{Cx)J5ckLL^yB$F|1O`u(--)O5q{*iH1T_g znG0@@>$ewe9q}Uu+HyVW{bBRFb}Xa)aF60u?}Yrz^x% zk>}iF@eP3&z5?fBoC{xu@17DD{?5Df)7PgjD|0`35ONBf4^LfM#_P;4qh~HCqpkn8 zj9&fTGJ4)$mC;|lp^RVtmNK)&>&s}1*Or+--=>WJ9OvhKk1OlE_Y-BkoewRW9B@L} z5Pvsh^e3+r|IADNri@?pt}=spHusF>W%P=-l+l7`m-Ti!q&(yAb}Ae4y^H-%D05!? zI@tY=vI$}|^Xk7XzrEu@WdYuwU9j)5W&SRQmG$v`R@NKrdU%tSW1T@NiA>~eV7 zV7qeVXO5!=?fhL9q94a78}52Ung8)a(Z8^42%PMjzw1KKA66FN?}2Z) z09dvE(N;>Y_1h$H>THOiWE zb7FmP+-t@2HQPzw@^a43T>KOKb`gKiektxv@Nb+@p5=ez`uJv%`cijdFc#Gu@A_?n z<^bn`a8Wk#02}*ILe9W%n?f%6(oWXY0bPTQVH;_KuJMccY>zQc=nZ+s8dwH>wGHg{ zi}ct(mRa==*nE$m#c!LcT-w4yyRwMBwo`9IBODEF)3ZMoFLYXtEn z9mm>n%4y}2CuP%i;idiTPyFPUKN`Nluf~PCx1YAN(00y&)Z^OzP`i=seql)Lp>J|d z4|J3LVK05r`pW)j@N&GRNxaowwtqa}AN&@w95d_D`=9tNz?k3kn`RIDpig$u7fEA& zud9~V$H9{}a2~g~EpwtT*HHF}KG&V~TDEKNJGE}{Vt$gY&bbDAi>1mUZPElEvBUO; zPqf6Iw6bx+h9QRZwPjnL=qs-DtJ|&{4B~VDwCm^ASLSQ?7h`wbVl_SL@W+5X_I1a@ zesnBby5O}x9fzrYGY0D8G5t6VzxSx5mk-RFJS`uODQ`|o-^ykTr|PkNF<#@9{;P5K zp3Z*Ck*v@e$HTm-JdOUiPD^@V6qc&PKDAE*6aDA8W9)5PHdBXfhCCl@Ec;{6PhZ!* zwA)xLBikd~pB~}<^~~kSRo$1mM!3JtZv+CFWZ*TfD9? z2O6!jUKv0A8D+fnyOe*;+xfF)-p3Cv3y>>qgx>}}{k;3yS|36^LJ&G%Ul-qGd=|$o%#JT!tV`r?ztv@6?IA`bokzcRw`Twf=UcU14BQdY^{y(rS{8r>@#G2P1I!|1W@4lXQ8|;7| z@pmgdN8?&3&yhS|UXJSxlsP9~Sw?5yQua9>`S83yKz{zcGQ!^o9dGmYGWWOsPyT7o z@`*2gO*Q{Fv)P}P(Q{r{{`tn6%4pL+5-sWHuJ>P5cs$X5_6y5sbCf^CH-rB0FXH(3 z{v?js{@U{L+xwpgoWS#~TgvbJ{Z3`G_k0ZN7ykSq^4_HnmJxo}I>P<)@nyKC!M&)_ z4Tu4+Z7#jH{Lb5VDC;vukAe1alzULzLwA9?Tcf(HAJTx5w{xjbGxQum>?UzvBP9T^sgS|26S%{OA16whA-t&7!r) zW7-S6rfb`^k9IY@ZGZT-eVz$f^v{~`bM9*XHt}olSAP4el{XLmkiVMwPTnT|!iei5 z+3o8j+Tpn8v24$HYK+J92jj_O+nw=kZ4dg!f0D_$(8Pa@>vs+Iua2L-YyH*xv$ZL0 zu2~v>8IE~7jtU#9IX8|{uA>4d{+D? zuA7O|*U`jIdr`|zI_4Tizx(J5*@nKX)#fq%Wgpab8-Jd^hW&djt!K1lZ?}I>FMm$$rik`;YQ^`)lS;S_;h}W*dyM#)?@wid`5fv{5E`K z8JY{&@5U^aPusZ%u5tc$`(N)5^V?r-o9lnVU+KSg{uAJ594`+d`n^B>lKo&Q)bk~i!p!s0c+ z^-ipAr(bA`u)Cja?Pci?K3h;9=LyDcL=GaGV_j5x=>zo3H{J_-ynVa3m-yTH+tc^$ z-hW+t{GD6M=(J_{dlk%U?ai&j{S0lg9=j@aDJ^sF*-v7t* z`D-7>--lU<{Ga(b-uuV%;EnkmfIeOm?L7a{X0P{I_CH!iCx5rhU4Qd37w -#include -#include -#include <3ds.h> - -#include "gs.h" -#include "math.h" - -#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) - -static void gsInitMatrixStack(); - -Handle linearAllocMutex; - -static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; - -typedef struct -{ - u32 offset; - mtx44 data; -}bufferMatrix_s; - -bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; -int bufferMatrixListLength; - -//---------------------- -// GS SYSTEM STUFF -//---------------------- - -void initBufferMatrixList() -{ - bufferMatrixListLength=0; -} - -void gsInit(shaderProgram_s* shader) -{ - gsInitMatrixStack(); - initBufferMatrixList(); - svcCreateMutex(&linearAllocMutex, false); - if(shader) - { - gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection"); - gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview"); - shaderProgramUse(shader); - } -} - -void gsExit(void) -{ - svcCloseHandle(linearAllocMutex); -} - -void gsStartFrame(void) -{ - GPUCMD_SetBufferOffset(0); - initBufferMatrixList(); -} - -void* gsLinearAlloc(size_t size) -{ - void* ret=NULL; - - svcWaitSynchronization(linearAllocMutex, U64_MAX); - ret=linearAlloc(size); - svcReleaseMutex(linearAllocMutex); - - return ret; -} - -void gsLinearFree(void* mem) -{ - svcWaitSynchronization(linearAllocMutex, U64_MAX); - linearFree(mem); - svcReleaseMutex(linearAllocMutex); -} - -//---------------------- -// MATRIX STACK STUFF -//---------------------- - -static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; -static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; -static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; -static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; -static GS_MATRIX gsCurrentMatrixType; - -static void gsInitMatrixStack() -{ - int i; - for(i=0; i=GS_MATRIXTYPES)return NULL; - - return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; -} - -int gsLoadMatrix(GS_MATRIX m, float* data) -{ - if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; - - memcpy(gsGetMatrix(m), data, sizeof(mtx44)); - - gsMatrixStackUpdated[m]=true; - - return 0; -} - -int gsPushMatrix() -{ - const GS_MATRIX m=gsCurrentMatrixType; - if(m<0 || m>=GS_MATRIXTYPES)return -1; - if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; - - float* cur=gsGetMatrix(m); - gsMatrixStackOffsets[m]++; - memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); - - return 0; -} - -int gsPopMatrix() -{ - const GS_MATRIX m=gsCurrentMatrixType; - if(m<0 || m>=GS_MATRIXTYPES)return -1; - if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; - - gsMatrixStackOffsets[m]--; - - gsMatrixStackUpdated[m]=true; - - return 0; -} - -int gsMatrixMode(GS_MATRIX m) -{ - if(m<0 || m>=GS_MATRIXTYPES)return -1; - - gsCurrentMatrixType=m; - - return 0; -} - -//------------------------ -// MATRIX TRANSFORM STUFF -//------------------------ - -int gsMultMatrix(float* data) -{ - if(!data)return -1; - - mtx44 tmp; - multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); - memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); - - gsMatrixStackUpdated[gsCurrentMatrixType]=true; - - return 0; -} - -void gsLoadIdentity() -{ - loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsProjectionMatrix(float fovy, float aspect, float near, float far) -{ - initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateX(float x) -{ - rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateY(float y) -{ - rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsRotateZ(float z) -{ - rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsScale(float x, float y, float z) -{ - scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -void gsTranslate(float x, float y, float z) -{ - translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); - gsMatrixStackUpdated[gsCurrentMatrixType]=true; -} - -//---------------------- -// MATRIX RENDER STUFF -//---------------------- - -static void gsSetUniformMatrix(u32 startreg, float* m) -{ - float param[16]; - - param[0x0]=m[3]; //w - param[0x1]=m[2]; //z - param[0x2]=m[1]; //y - param[0x3]=m[0]; //x - - param[0x4]=m[7]; - param[0x5]=m[6]; - param[0x6]=m[5]; - param[0x7]=m[4]; - - param[0x8]=m[11]; - param[0x9]=m[10]; - param[0xa]=m[9]; - param[0xb]=m[8]; - - param[0xc]=m[15]; - param[0xd]=m[14]; - param[0xe]=m[13]; - param[0xf]=m[12]; - - GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4); -} - -static int gsUpdateTransformation() -{ - GS_MATRIX m; - for(m=0; mdata=NULL; - vbo->currentSize=0; - vbo->maxSize=0; - vbo->commands=NULL; - vbo->commandsSize=0; - - return 0; -} - -int gsVboCreate(gsVbo_s* vbo, u32 size) -{ - if(!vbo)return -1; - - vbo->data=gsLinearAlloc(size); - vbo->numVertices=0; - vbo->currentSize=0; - vbo->maxSize=size; - - return 0; -} - -void* gsVboGetOffset(gsVbo_s* vbo) -{ - if(!vbo)return NULL; - - return (void*)(&((u8*)vbo->data)[vbo->currentSize]); -} - -int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) -{ - if(!vbo || !data || !size)return -1; - if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; - - memcpy(gsVboGetOffset(vbo), data, size); - vbo->currentSize+=size; - vbo->numVertices+=units; - - return 0; -} - -int gsVboFlushData(gsVbo_s* vbo) -{ - if(!vbo)return -1; - - //unnecessary if we use flushAndRun - // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); - - return 0; -} - -int gsVboDestroy(gsVbo_s* vbo) -{ - if(!vbo)return -1; - - if(vbo->commands)free(vbo->commands); - if(vbo->data)gsLinearFree(vbo->data); - gsVboInit(vbo); - - return 0; -} - -extern u32 debugValue[]; - -void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) -{ - //set attribute buffer address - GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); - //set primitive type - GPUCMD_AddSingleParam(0x0002025E, primitive); - GPUCMD_AddSingleParam(0x0002025F, 0x00000001); - //index buffer not used for drawArrays but 0x000F0227 still required - GPUCMD_AddSingleParam(0x000F0227, 0x80000000); - //pass number of vertices - GPUCMD_AddSingleParam(0x000F0228, n); - - GPUCMD_AddSingleParam(0x00010253, 0x00000001); - - GPUCMD_AddSingleParam(0x00010245, 0x00000000); - GPUCMD_AddSingleParam(0x000F022E, 0x00000001); - GPUCMD_AddSingleParam(0x00010245, 0x00000001); - GPUCMD_AddSingleParam(0x000F0231, 0x00000001); - - // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff -} - -//not thread safe -int gsVboPrecomputeCommands(gsVbo_s* vbo) -{ - if(!vbo || vbo->commands)return -1; - - static u32 tmpBuffer[128]; - - u32* savedAdr; u32 savedSize, savedOffset; - GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); - GPUCMD_SetBuffer(tmpBuffer, 128, 0); - - GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); - - GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); - vbo->commands=memalign(0x4, vbo->commandsSize*4); - if(!vbo->commands)return -1; - memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); - - GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); - - return 0; -} - -extern u32* gpuCmdBuf; -extern u32 gpuCmdBufSize; -extern u32 gpuCmdBufOffset; - -void _vboMemcpy50(u32* dst, u32* src); - -void _GPUCMD_AddRawCommands(u32* cmd, u32 size) -{ - if(!cmd || !size)return; - - if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); - else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); - gpuCmdBufOffset+=size; -} - -int gsVboDraw(gsVbo_s* vbo) -{ - if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; - - gsUpdateTransformation(); - - gsVboPrecomputeCommands(vbo); - - // u64 val=svcGetSystemTick(); - if(vbo->commands) - { - _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); - }else{ - GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); - } - // debugValue[5]+=(u32)(svcGetSystemTick()-val); - // debugValue[6]++; - - return 0; -} diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h deleted file mode 100644 index 2da15bd..0000000 --- a/examples/gpu/source/gs.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef GS_H -#define GS_H - -#include <3ds.h> -#include "math.h" - -#define GS_MATRIXSTACK_SIZE (8) - -typedef enum -{ - GS_PROJECTION = 0, - GS_MODELVIEW = 1, - GS_MATRIXTYPES -}GS_MATRIX; - -typedef struct -{ - u8* data; - u32 currentSize; // in bytes - u32 maxSize; // in bytes - u32 numVertices; - u32* commands; - u32 commandsSize; -}gsVbo_s; - - -void gsInit(shaderProgram_s* shader); -void gsExit(void); - -void gsStartFrame(void); -void gsAdjustBufferMatrices(mtx44 transformation); - -void* gsLinearAlloc(size_t size); -void gsLinearFree(void* mem); - -float* gsGetMatrix(GS_MATRIX m); -int gsLoadMatrix(GS_MATRIX m, float* data); -int gsPushMatrix(); -int gsPopMatrix(); -int gsMatrixMode(GS_MATRIX m); - -void gsLoadIdentity(); -void gsProjectionMatrix(float fovy, float aspect, float near, float far); -void gsRotateX(float x); -void gsRotateY(float y); -void gsRotateZ(float z); -void gsScale(float x, float y, float z); -void gsTranslate(float x, float y, float z); -int gsMultMatrix(float* data); - -int gsVboInit(gsVbo_s* vbo); -int gsVboCreate(gsVbo_s* vbo, u32 size); -int gsVboFlushData(gsVbo_s* vbo); -int gsVboDestroy(gsVbo_s* vbo); -int gsVboDraw(gsVbo_s* vbo); -void* gsVboGetOffset(gsVbo_s* vbo); -int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); - -#endif diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c deleted file mode 100644 index a045fe3..0000000 --- a/examples/gpu/source/main.c +++ /dev/null @@ -1,354 +0,0 @@ -/////////////////////////////////////// -// GPU example // -/////////////////////////////////////// - -//this example is meant to show how to use the GPU to render a 3D object -//it also shows how to do stereoscopic 3D -//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft -//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited. - -#include -#include -#include -#include -#include <3ds.h> - -#include "math.h" -#include "gs.h" - -#include "test_vsh_shbin.h" -#include "texture_bin.h" - -//will be moved into ctrulib at some point -#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080) - -#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) - -//transfer from GPU output buffer to actual framebuffer flags -#define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X)) - -//shader structure -DVLB_s* dvlb; -shaderProgram_s shader; -//texture data pointer -u32* texData; -//vbo structure -gsVbo_s vbo; - -//GPU framebuffer address -u32* gpuOut=(u32*)0x1F119400; -//GPU depth buffer address -u32* gpuDOut=(u32*)0x1F370800; - -//angle for the vertex lighting (cf test.vsh) -float lightAngle; -//object position and rotation angle -vect3Df_s position, angle; - -//vertex structure -typedef struct -{ - vect3Df_s position; - float texcoord[2]; - vect3Df_s normal; -}vertex_s; - -//object data (cube) -//obviously this doesn't have to be defined manually, but we will here for the purposes of the example -//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z} -//we're drawing triangles so three lines = one triangle -const vertex_s modelVboData[]= -{ - //first face (PZ) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, - //second face (MZ) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, - //third face (PX) - //first triangle - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, - //fourth face (MX) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - //second triangle - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, - //fifth face (PY) - //first triangle - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, - //sixth face (MY) - //first triangle - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - //second triangle - {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, - {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, -}; - -//stolen from staplebutt -void GPU_SetDummyTexEnv(u8 num) -{ - GPU_SetTexEnv(num, - GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), - GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), - GPU_TEVOPERANDS(0,0,0), - GPU_TEVOPERANDS(0,0,0), - GPU_REPLACE, - GPU_REPLACE, - 0xFFFFFFFF); -} - -// topscreen -void renderFrame() -{ - GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); - - GPU_DepthMap(-1.0f, 0.0f); - GPU_SetFaceCulling(GPU_CULL_BACK_CCW); - GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); - GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); - GPU_SetBlendingColor(0,0,0,0); - GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); - - GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); - GPUCMD_AddWrite(GPUREG_0118, 0); - - GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); - GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); - - GPU_SetTextureEnable(GPU_TEXUNIT0); - - GPU_SetTexEnv(0, - GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVOPERANDS(0,0,0), - GPU_TEVOPERANDS(0,0,0), - GPU_MODULATE, GPU_MODULATE, - 0xFFFFFFFF); - GPU_SetDummyTexEnv(1); - GPU_SetDummyTexEnv(2); - GPU_SetDummyTexEnv(3); - GPU_SetDummyTexEnv(4); - GPU_SetDummyTexEnv(5); - - //texturing stuff - GPU_SetTexture( - GPU_TEXUNIT0, //texture unit - (u32*)osConvertVirtToPhys((u32)texData), //data buffer - 128, //texture width - 128, //texture height - GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params - GPU_RGBA8 //texture pixel format - ); - - GPU_SetAttributeBuffers( - 3, //3 attributes: vertices, texcoords, and normals - (u32*)osConvertVirtToPhys((u32)texData), //mesh buffer - GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position) - GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord) - GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal) - 0xFFC, - 0x210, - 1, - (u32[]){0x00000000}, - (u64[]){0x210}, - (u8[]){3} - ); - - //setup lighting (this is specific to our shader) - vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); - GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1); - GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1); - - //initialize projection matrix to standard perspective stuff - gsMatrixMode(GS_PROJECTION); - gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f); - gsRotateZ(M_PI/2); //because framebuffer is sideways... - - //draw object - gsMatrixMode(GS_MODELVIEW); - gsPushMatrix(); - gsTranslate(position.x, position.y, position.z); - gsRotateX(angle.x); - gsRotateY(angle.y); - gsVboDraw(&vbo); - gsPopMatrix(); - GPU_FinishDrawing(); -} - -int main(int argc, char** argv) -{ - - gfxInitDefault(); - - //initialize GPU - GPU_Init(NULL); - - //let GFX know we're ok with doing stereoscopic 3D rendering - gfxSet3D(true); - - //allocate our GPU command buffers - //they *have* to be on the linear heap - u32 gpuCmdSize=0x40000; - u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4); - u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4); - - //actually reset the GPU - GPU_Reset(NULL, gpuCmd, gpuCmdSize); - - //load our vertex shader binary - dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); - shaderProgramInit(&shader); - shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); - - //initialize GS - gsInit(&shader); - - // Flush the command buffer so that the shader upload gets executed - GPUCMD_Finalize(); - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //create texture - texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned - memcpy(texData, texture_bin, texture_bin_size); - - //create VBO - gsVboInit(&vbo); - gsVboCreate(&vbo, sizeof(modelVboData)); - gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s)); - gsVboFlushData(&vbo); - - //initialize object position and angle - position=vect3Df(0.0f, 0.0f, -2.0f); - angle=vect3Df(M_PI/4, M_PI/4, 0.0f); - - //background color (blue) - u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF); - - while(aptMainLoop()) - { - //get current 3D slider state - float slider=CONFIG_3D_SLIDERSTATE; - - //controls - hidScanInput(); - //START to exit to hbmenu - if(keysDown()&KEY_START)break; - - //A/B to change vertex lighting angle - if(keysHeld()&KEY_A)lightAngle+=0.1f; - if(keysHeld()&KEY_B)lightAngle-=0.1f; - - //D-PAD to rotate object - if(keysHeld()&KEY_DOWN)angle.x+=0.05f; - if(keysHeld()&KEY_UP)angle.x-=0.05f; - if(keysHeld()&KEY_LEFT)angle.y+=0.05f; - if(keysHeld()&KEY_RIGHT)angle.y-=0.05f; - - //R/L to bring object closer to or move it further from the camera - if(keysHeld()&KEY_R)position.z+=0.1f; - if(keysHeld()&KEY_L)position.z-=0.1f; - - //generate our GPU command buffer for this frame - gsStartFrame(); - renderFrame(); - GPUCMD_Finalize(); - - if(slider>0.0f) - { - //new and exciting 3D ! - //make a copy of left gpu buffer - u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset); - memcpy(gpuCmdRight, gpuCmd, offset*4); - - //setup interaxial - float interaxial=slider*0.12f; - - //adjust left gpu buffer fo 3D ! - {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} - - //draw left framebuffer - GPUCMD_FlushAndRun(NULL); - - //while GPU starts drawing the left buffer, adjust right one for 3D ! - GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset); - {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} - - //we wait for the left buffer to finish drawing - gspWaitForP3D(); - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - - //we draw the right buffer, wait for it to finish and then switch back to left one - //clear the screen - GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); - gspWaitForPSC0(); - - //draw the right framebuffer - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //transfer from GPU output buffer to actual framebuffer - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); - }else{ - //boring old 2D ! - - //draw the frame - GPUCMD_FlushAndRun(NULL); - gspWaitForP3D(); - - //clear the screen - GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); - gspWaitForPPF(); - } - - //clear the screen - GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); - gspWaitForPSC0(); - gfxSwapBuffersGpu(); - - gspWaitForEvent(GSPEVENT_VBlank0, true); - } - - gsExit(); - shaderProgramFree(&shader); - DVLB_Free(dvlb); - gfxExit(); - return 0; -} diff --git a/examples/gpu/source/math.c b/examples/gpu/source/math.c deleted file mode 100644 index 13ab3dd..0000000 --- a/examples/gpu/source/math.c +++ /dev/null @@ -1,148 +0,0 @@ -#include -#include - -#include "math.h" - -void loadIdentity44(float* m) -{ - if(!m)return; - - memset(m, 0x00, 16*4); - m[0]=m[5]=m[10]=m[15]=1.0f; -} - -void multMatrix44(float* m1, float* m2, float* m) //4x4 -{ - int i, j; - for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); - -} - -void translateMatrix(float* tm, float x, float y, float z) -{ - float rm[16], m[16]; - - loadIdentity44(rm); - rm[3]=x; - rm[7]=y; - rm[11]=z; - - multMatrix44(tm,rm,m); - memcpy(tm,m,16*sizeof(float)); -} - -// 00 01 02 03 -// 04 05 06 07 -// 08 09 10 11 -// 12 13 14 15 - -void rotateMatrixX(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=1.0f; - rm[5]=cos(x); - rm[6]=sin(x); - rm[9]=-sin(x); - rm[10]=cos(x); - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void rotateMatrixY(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=cos(x); - rm[2]=sin(x); - rm[5]=1.0f; - rm[8]=-sin(x); - rm[10]=cos(x); - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void rotateMatrixZ(float* tm, float x, bool r) -{ - float rm[16], m[16]; - memset(rm, 0x00, 16*4); - rm[0]=cos(x); - rm[1]=sin(x); - rm[4]=-sin(x); - rm[5]=cos(x); - rm[10]=1.0f; - rm[15]=1.0f; - if(!r)multMatrix44(tm,rm,m); - else multMatrix44(rm,tm,m); - memcpy(tm,m,16*sizeof(float)); -} - -void scaleMatrix(float* tm, float x, float y, float z) -{ - tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x; - tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y; - tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z; -} - -void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far) -{ - float top = near*tan(fovy/2); - float right = (top*aspect); - - float mp[4*4]; - - mp[0x0] = near/right; - mp[0x1] = 0.0f; - mp[0x2] = 0.0f; - mp[0x3] = 0.0f; - - mp[0x4] = 0.0f; - mp[0x5] = near/top; - mp[0x6] = 0.0f; - mp[0x7] = 0.0f; - - mp[0x8] = 0.0f; - mp[0x9] = 0.0f; - mp[0xA] = -(far+near)/(far-near); - mp[0xB] = -2.0f*(far*near)/(far-near); - - mp[0xC] = 0.0f; - mp[0xD] = 0.0f; - mp[0xE] = -1.0f; - mp[0xF] = 0.0f; - - float mp2[4*4]; - loadIdentity44(mp2); - mp2[0xA]=0.5; - mp2[0xB]=-0.5; - - multMatrix44(mp2, mp, m); -} - -vect3Df_s getMatrixColumn(float* m, u8 i) -{ - if(!m || i>=4)return vect3Df(0,0,0); - return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); -} - -vect3Df_s getMatrixRow(float* m, u8 i) -{ - if(!m || i>=4)return vect3Df(0,0,0); - return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); -} - -vect4Df_s getMatrixColumn4(float* m, u8 i) -{ - if(!m || i>=4)return vect4Df(0,0,0,0); - return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); -} - -vect4Df_s getMatrixRow4(float* m, u8 i) -{ - if(!m || i>=4)return vect4Df(0,0,0,0); - return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); -} diff --git a/examples/gpu/source/math.h b/examples/gpu/source/math.h deleted file mode 100644 index 8137b90..0000000 --- a/examples/gpu/source/math.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef MATH_H -#define MATH_H - -#include <3ds/types.h> -#include - -typedef float mtx44[4][4]; -typedef float mtx33[3][3]; - -typedef struct -{ - s32 x, y, z; -}vect3Di_s; - -static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) -{ - return (vect3Di_s){x,y,z}; -} - -static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) -{ - return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; -} - -static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) -{ - return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; -} - -static inline vect3Di_s vmuli(vect3Di_s v, s32 f) -{ - return (vect3Di_s){v.x*f,v.y*f,v.z*f}; -} - -typedef struct -{ - float x, y, z; -}vect3Df_s; - -static inline vect3Df_s vect3Df(float x, float y, float z) -{ - return (vect3Df_s){x,y,z}; -} - -static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) -{ - return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; -} - -static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) -{ - return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; -} - -static inline vect3Df_s vmulf(vect3Df_s v, float f) -{ - return (vect3Df_s){v.x*f,v.y*f,v.z*f}; -} - -static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) -{ - return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; -} - -static inline float vmagf(vect3Df_s v) -{ - return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); -} - -static inline float vdistf(vect3Df_s v1, vect3Df_s v2) -{ - return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); -} - -static inline vect3Df_s vnormf(vect3Df_s v) -{ - const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); - return (vect3Df_s){v.x/l,v.y/l,v.z/l}; -} - -typedef struct -{ - float x, y, z, w; -}vect4Df_s; - -static inline vect4Df_s vect4Df(float x, float y, float z, float w) -{ - return (vect4Df_s){x,y,z,w}; -} - -static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) -{ - return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; -} - -static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) -{ - return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; -} - -static inline vect4Df_s vmulf4(vect4Df_s v, float f) -{ - return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; -} - -static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) -{ - return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; -} - -static inline vect4Df_s vnormf4(vect4Df_s v) -{ - const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); - return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; -} - -//interstuff -static inline vect3Di_s vf2i(vect3Df_s v) -{ - return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; -} - -static inline vect3Df_s vi2f(vect3Di_s v) -{ - return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; -} - -void loadIdentity44(float* m); -void multMatrix44(float* m1, float* m2, float* m); - -void translateMatrix(float* tm, float x, float y, float z); -void rotateMatrixX(float* tm, float x, bool r); -void rotateMatrixY(float* tm, float x, bool r); -void rotateMatrixZ(float* tm, float x, bool r); -void scaleMatrix(float* tm, float x, float y, float z); - -void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); - -vect3Df_s getMatrixColumn(float* m, u8 i); -vect3Df_s getMatrixRow(float* m, u8 i); -vect4Df_s getMatrixColumn4(float* m, u8 i); -vect4Df_s getMatrixRow4(float* m, u8 i); - -#endif diff --git a/examples/gpu/Makefile b/examples/graphics/gpu/geoshader/Makefile similarity index 89% rename from examples/gpu/Makefile rename to examples/graphics/gpu/geoshader/Makefile index 19c9ac9..1e4e9ef 100644 --- a/examples/gpu/Makefile +++ b/examples/graphics/gpu/geoshader/Makefile @@ -75,6 +75,7 @@ export DEPSDIR := $(CURDIR)/$(BUILD) CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) #--------------------------------------------------------------------------------- @@ -91,7 +92,7 @@ else endif #--------------------------------------------------------------------------------- -export OFILES := $(addsuffix .o,$(BINFILES)) \ +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ @@ -156,17 +157,18 @@ $(OUTPUT).elf : $(OFILES) @echo $(notdir $<) @$(bin2o) -# WARNING: This is not the right way to do this! TODO: Do it right! #--------------------------------------------------------------------------------- -%_vsh.h %.vsh.o : %.vsh +# rule for assembling GPU shaders #--------------------------------------------------------------------------------- +%.shbin.o: %.pica @echo $(notdir $<) - @python3 $(AEMSTRO)/aemstro_as.py $< ../$(notdir $<).shbin - @bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@ - @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h - @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h - @echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h - @rm ../$(notdir $<).shbin + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h -include $(DEPENDS) diff --git a/examples/graphics/gpu/geoshader/README.md b/examples/graphics/gpu/geoshader/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/geoshader/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/geoshader/source/3dmath.c b/examples/graphics/gpu/geoshader/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/geoshader/source/3dmath.h b/examples/graphics/gpu/geoshader/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/geoshader/source/gpu.c b/examples/graphics/gpu/geoshader/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/geoshader/source/gpu.h b/examples/graphics/gpu/geoshader/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/geoshader/source/gshader.pica b/examples/graphics/gpu/geoshader/source/gshader.pica new file mode 100644 index 0000000..3b65db3 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/gshader.pica @@ -0,0 +1,91 @@ +; Example PICA200 geometry shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones +.alias half myconst.wwww + +; Outputs - this time the type *is* used +.out outpos position +.out outclr color + +; Inputs: we will receive the following inputs: +; v0-v1: position/color of the first vertex +; v2-v3: position/color of the second vertex +; v4-v5: position/color of the third vertex + +.proc main + ; Calculate the midpoints of the vertices + mov r4, v0 + add r4, v2, r4 + mul r4, half, r4 + mov r5, v2 + add r5, v4, r5 + mul r5, half, r5 + mov r6, v4 + add r6, v0, r6 + mul r6, half, r6 + + ; Emit the first triangle + mov r0, v0 + mov r1, r4 + mov r2, r6 + call emit_triangle + + ; Emit the second triangle + mov r0, r4 + mov r1, v2 + mov r2, r5 + call emit_triangle + + ; Emit the third triangle + mov r0, r6 + mov r1, r5 + mov r2, v4 + call emit_triangle + + ; We're finished + end +.end + +.proc emit_triangle + ; Emit the first vertex + setemit 0 + mov r8, r0 + mov r9, v1 + call process_vertex + emit + + ; Emit the second vertex + setemit 1 + mov r8, r1 + mov r9, v3 + call process_vertex + emit + + ; Emit the third vertex and finish the primitive + setemit 2, prim + mov r8, r2 + mov r9, v5 + call process_vertex + emit +.end + +; Subroutine +; Inputs: +; r8: vertex position +; r9: vertex color +.proc process_vertex + ; outpos = projectionMatrix * r8 + dp4 outpos.x, projection[0], r8 + dp4 outpos.y, projection[1], r8 + dp4 outpos.z, projection[2], r8 + dp4 outpos.w, projection[3], r8 + + ; outclr = r9 + mov outclr, r9 +.end diff --git a/examples/graphics/gpu/geoshader/source/main.c b/examples/graphics/gpu/geoshader/source/main.c new file mode 100644 index 0000000..a2c9998 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/main.c @@ -0,0 +1,139 @@ +/* + * ~~ Simple libctru GPU geometry shader example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene using a geoshader. + * The example geoshader receives the vertices of a triangle and emits three + * smaller triangles, thus forming a 'triforce' shape. + */ + +#include "gpu.h" +#include "vshader_shbin.h" +#include "gshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float position[3]; float color[4]; } vertex; + +static const vertex vertex_list[] = +{ + { {200.0f, 200.0f, 0.5f}, {1.0f, 0.0f, 0.0f, 1.0f} }, + { {100.0f, 40.0f, 0.5f}, {0.0f, 1.0f, 0.0f, 1.0f} }, + { {300.0f, 40.0f, 0.5f}, {0.0f, 0.0f, 1.0f, 1.0f} }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s *vshader_dvlb, *gshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static matrix_4x4 projection; + +static void* vbo_data; + +static void sceneInit(void) +{ + // Load the shaders and create a shader program + // The geoshader stride is set to 6 so that it processes a triangle at a time + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + gshader_dvlb = DVLB_ParseFile((u32*)gshader_shbin, gshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + shaderProgramSetGsh(&program, &gshader_dvlb->DVLE[0], 6); + + // Get the location of the projection matrix uniform + uLoc_projection = shaderInstanceGetUniformLocation(program.geometryShader, "projection"); + + // Compute the projection matrix + m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_REPLACE, GPU_REPLACE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 2, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | + GPU_ATTRIBFMT(1, 4, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector) + 0xFFC, // Unused attribute mask, in our case bit 0 is cleared since it is used + 0x10, // Attribute permutations (here it is the identity) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x10 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 2 }); // Number of attributes for each buffer + + // Upload the projection matrix + GPU_SetFloatUniformMatrix(GPU_GEOMETRY_SHADER, uLoc_projection, &projection); + + // Draw the VBO - GPU_UNKPRIM allows the geoshader to control primitive emission + GPU_DrawArray(GPU_UNKPRIM, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); + DVLB_Free(gshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/geoshader/source/vshader.pica b/examples/graphics/gpu/geoshader/source/vshader.pica new file mode 100644 index 0000000..8bcbc22 --- /dev/null +++ b/examples/graphics/gpu/geoshader/source/vshader.pica @@ -0,0 +1,24 @@ +; Example PICA200 vertex shader + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs - since we are also using a geoshader the output type isn't really used +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.proc main + ; Pass through both inputs to the geoshader + mov outpos.xyz, inpos + mov outpos.w, ones + mov outclr, inclr + + ; We're finished + end +.end diff --git a/examples/graphics/gpu/simple_tri/Makefile b/examples/graphics/gpu/simple_tri/Makefile new file mode 100644 index 0000000..1e4e9ef --- /dev/null +++ b/examples/graphics/gpu/simple_tri/Makefile @@ -0,0 +1,177 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -fomit-frame-pointer -ffast-math \ + $(ARCH) + +CFLAGS += $(INCLUDE) -DARM11 -D_3DS + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ + $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +.PHONY: $(BUILD) clean all + +#--------------------------------------------------------------------------------- +all: $(BUILD) + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf + + +#--------------------------------------------------------------------------------- +else + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +ifeq ($(strip $(NO_SMDH)),) +$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh +else +$(OUTPUT).3dsx : $(OUTPUT).elf +endif + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rule for assembling GPU shaders +#--------------------------------------------------------------------------------- +%.shbin.o: %.pica + @echo $(notdir $<) + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/examples/graphics/gpu/simple_tri/README.md b/examples/graphics/gpu/simple_tri/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/simple_tri/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/simple_tri/source/3dmath.c b/examples/graphics/gpu/simple_tri/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/simple_tri/source/3dmath.h b/examples/graphics/gpu/simple_tri/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/simple_tri/source/gpu.c b/examples/graphics/gpu/simple_tri/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/simple_tri/source/gpu.h b/examples/graphics/gpu/simple_tri/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/simple_tri/source/main.c b/examples/graphics/gpu/simple_tri/source/main.c new file mode 100644 index 0000000..d82cc12 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/main.c @@ -0,0 +1,131 @@ +/* + * ~~ Simple libctru GPU triangle example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene consisting of a white solid triangle. + */ + +#include "gpu.h" +#include "vshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float x, y, z; } vertex; + +static const vertex vertex_list[] = +{ + { 200.0f, 200.0f, 0.5f }, + { 100.0f, 40.0f, 0.5f }, + { 300.0f, 40.0f, 0.5f }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static matrix_4x4 projection; + +static void* vbo_data; + +static void sceneInit(void) +{ + // Load the vertex shader and create a shader program + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + + // Get the location of the projection matrix uniform + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Compute the projection matrix + m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_REPLACE, GPU_REPLACE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 1, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector) + 0xFFE, // Unused attribute mask, in our case bit 0 is cleared since it is used + 0x0, // Attribute permutations (here it is the identity) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x0 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 1 }); // Number of attributes for each buffer + + // Upload the projection matrix + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the VBO + GPU_DrawArray(GPU_TRIANGLES, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/simple_tri/source/vshader.pica b/examples/graphics/gpu/simple_tri/source/vshader.pica new file mode 100644 index 0000000..cdd9759 --- /dev/null +++ b/examples/graphics/gpu/simple_tri/source/vshader.pica @@ -0,0 +1,34 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; outclr = solid white color + mov outclr, ones + + ; We're finished + end +.end diff --git a/examples/graphics/gpu/textured_cube/Makefile b/examples/graphics/gpu/textured_cube/Makefile new file mode 100644 index 0000000..1e4e9ef --- /dev/null +++ b/examples/graphics/gpu/textured_cube/Makefile @@ -0,0 +1,177 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -fomit-frame-pointer -ffast-math \ + $(ARCH) + +CFLAGS += $(INCLUDE) -DARM11 -D_3DS + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \ + $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +.PHONY: $(BUILD) clean all + +#--------------------------------------------------------------------------------- +all: $(BUILD) + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf + + +#--------------------------------------------------------------------------------- +else + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +ifeq ($(strip $(NO_SMDH)),) +$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh +else +$(OUTPUT).3dsx : $(OUTPUT).elf +endif + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rule for assembling GPU shaders +#--------------------------------------------------------------------------------- +%.shbin.o: %.pica + @echo $(notdir $<) + $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<))) + $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<))) + @picasso $(CURBIN) $< $(CURH) + @bin2s $(CURBIN) | $(AS) -o $@ + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + +-include $(DEPENDS) + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/examples/graphics/gpu/textured_cube/README.md b/examples/graphics/gpu/textured_cube/README.md new file mode 100644 index 0000000..5e3b6dd --- /dev/null +++ b/examples/graphics/gpu/textured_cube/README.md @@ -0,0 +1,6 @@ +# GPU example + +This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up. +Users of earlier versions of devkitARM need to install the tool, which can be found in the address below: + +https://github.com/fincs/picasso/releases diff --git a/examples/graphics/gpu/textured_cube/data/kitten.bin b/examples/graphics/gpu/textured_cube/data/kitten.bin new file mode 100644 index 0000000000000000000000000000000000000000..a87ac4a8f17788e646796732120bda82629e0c97 GIT binary patch literal 16384 zcmYLwb$ngNm4BQ#w#CfM%(9r7nHit-#LO%UY>{P2woEce=9uCbVu<57GsU?%$vHRF|IfQkI_gQiq_=`Kn6K z?N$RRw)v?`+XUZ7MS9*>Q&0u}Ie&F*eBQ68q)KDS|G&>!P?Rb!J5+cP)%$P z(So4c0yG6*(OwMI#fzcZ*b%Oco#DFJ6{#z2^V1eGrELK^&tzg-Am<9u5L9|TL>JEm zs!K0~>EXE$>Jr4cDNppbh3epWA)D7;WZNELfE^KfP>RyS&PaVIN9$v|xE5)M7sBF@{Qv3DjauCF&yW;yu)b_vuSY5r&d#j3HE_jiDZAj9n3wBkrRN!BdSf zg?f@1ubDw3#X?exH<45mETEal-^tdJT8g!#m0=IfbUWx|J3&3g0vhaVrddHd%MqH{ zj?gV|hi<+r3`)Ex(*UZmCTxaKji*kOrOvTxiI%+28tU=J(BOO;38v7ZEUh$aXlB_% zJA;(6G&60Xo#z6rYrS>%+*8hFe=QoEl@`-kt)Xi)D2n%&PohT^kHL zHi!BUIMjv0Q64OGgbig_$#_lN&*mf#QBapg{^l5`2J#w0;M&YNo8sWnkqWogM0j?l zkrxj47S7ocK^g1?TR?8<~=ce-TLo+w$h#=*8d2{!HA2g-Qg6HQa_9D9YlbU5^hF`01S z7%|q7)V3=gLarUDT~`7edeg|~bLdOuxFp4O`|d==HBzCgWKBJVt~Q;Cl66;-WXth( zy{Wv1@+ixeauoOUB*MNw1r7sg3hFe(^>VybC*^TGn`GIMAX&7>gK{AF4g;yM>ErVr zN{21)weBUK^NYUV3%NosK^=RO;nj`i?>d`9g2Oci?X^x1e?dDUvzYZ_4*V=xp8|FB!MQNJdT3Fyn92wm4XDA1(OI zthjb->S@V!8FwYZhU>NBdJP&QC8Nej7&S-3xG739lt)T>vT*ieU?`7*G38h^aSuC^ zVUryMle#z<^1or85d=e7H0-zs*8NF}eX{48MYHYVv+T=2-e5V`$UWincHp`LIx-N` zmWA`@)^YyKDu(CwplY-l$2aD&b>;}B_wU2zrFHDCPr>rx6)c`wM*s9Q>ITOkADc$Q z!~$eP6F7P00?G&Xp=aN+q+J~%X_Waxw>At~m4T9aiI1dG=n0KdU#J#&L6`qMt;zuC z@&BmJ|Eb_}477MYXqN4kl=Ivr^=ucYiFRGN(O>>5Nu{YH7J)l*# z8(Kx|=eR*N%Nd#lit#Y4i-KNFFwcM}7;p|T7xjQP<_BAo#bZ!?->I?dxMOl}A8d z7D`=1U{n_dJ)v7g05qwSChws>&?xkRhM12!YO=3Y6{yfvhjVE2JQHKpxh^r5>ywld zY^9wsmQYEwgHoKeq!eYr-&Vv4Oa8WjQi>gRCRjl=-4RMjHrN$!$=^THT}gJ*i_wq9mL+Lq&tYc zgBtfmDcKgvsSZ#{bKsoZzdU!Sag5kQ(O1oMf?B4lf~S_{2IUk-C?`4d9w+KY8t*8H z`BamfiAx-t;mT{2oymPq_kc>Wi=@JFDk-j#GW*X*7-CzvjzU;4r1vG#MPbgW>6t6s>Pdg zKP|al3vB0@oxJab1T*YNwxq5WP!p6`rkZLYZA-Dmb17ER^J(_jMr_-bVu=@$IG+9I zQ!J(DGaT_krk(VBmJ_xSUD6VbK+xSmC|gum)6k8BxO^}w#BYg zYY4tl7Wag>td_++&#~jRAF04rtu#yGv^g}1sam{GE6*9)yjPcas=;fT0)-(3s*kW)qs@-l~4SqOGl zMZnwN24OLLPdSfvo+C8!9H3L=LOC9=uJea}nFp+D`0f@sL9f&u8pSTqtsu5ndcm+Z z0ERWbFp&jeU`B?r;slh87GeK#KYU!x5a?qMd1V@WeC*)f6ovL#8LZ@CC>SZg#`-Aa zHJLbcZ~zxi&cdob6m|{au#<NQ`yk+I{-C-hpgnj5H!)u$H)<3V&i)NJk!)j!mPas{DtLgk&TamY}^=1~YeO_y_yLGb9AQ5g`bUjzD2W0TOevQQ6yz(%yDNrDvkB zvJ^ejUATMq8lJxU02>#!VDILP)2CLjJ1hvB=QeQV&P}XtUclPPQ&`@-45zL%96L6P zis5R!boCUL4lSU2ViG+QV`v?nCT|0CYl}F4@f5lzC($%Aj=F(q49y=z&%y~bjqO9@ zo;5U$FQRYd0xHMXQQSL^n(0lXHZP-Xb^~2&FQakpCCKKkBdz-gl9~@Ax#u{NTGlbM zwv5RGhcJKm81`+P!{q8FMpjhB|OfD#RIKlWDLi`o&%forPS5?$r62T(ulG z+GU(i*bQvrnTR!N&y3JH~$a8wA}+>(m;E(%8Pa;ZCB2tS04nUhCrCo z?=)%(gBjmZEBc-Z{h2Ut3a8H*i9H89v3YD5rPC!?JkkZT<_OrfMZl7}x)B#r#&a>U z-iDZwEYvJkLq`?}L(0?PnoKy>l;bShVqnn`3mamkHrHUv_g}X!0cJf3uD6AGKQ@AUXJ^p7zYYU)b%erpjhG>}oLe73Z+itg8uL+-pMdF!R+JVbV|KV6UG3#)m*=9dPljVF zPrw48wyi%9TZSbY)la3v`w;e zl8}|1jPU4Sq~vgKy`2!waZ$7{GICPU-d>NS>_oJ+)gw429R3mh$jnVcaBLV#s!P$` z-GZp(IP?z@=NfAj`!+t>4g+I-bhcI@f*2j2k&Lp+e0YZj!rsvq0Rf(E06v4&Y1M%tfKg99^8_=8&SGZu04}}qGW&;c z;_^kj`Sg99xO5Zme*O{O{pb_acXd(L7P6~quzGqE7jE9f!toP0Pdqw#^*X9Ld$4i& zDth+Lm#@l%Hy(gUeeZ2hUJGlS! zV_bRTO}z5fyLkA?=eYg$d${xDDc<<(3%t&`KmPXD*mvqW4qdvBm2-D+;N0t&I(`S~ zjce#%zK%rMoPvM!@?(r0yo9;a?_zT05|S$SAVoHhq}nO?7SAEN_7EJ>Mv>gQhWzeL z;`Ci?z4A6TUVa3#h&E(5FGAl>hOoRYoW6P&m+n5s-h-#`<_90+{O!lMaO(-)`0x`v z`tVB(9e4>Por^em<1rdXR`Ko^U*XQ%AL7wRpW?NrpJLP_M?jb`qfHOC5qddc z#AafwDKYjL-rCS+5Z7#*V--#G#TZ-iZHUROM<*5YCXCd;o^d*X?_yqS#`0`PgWB_9 zM_(kcD-RCTLtGd0+i|S@Pcom$7QgM-gj~_LCKbQ!IhR8VV{+8bPUuPO7h^0*H7fm~ zTITzs?ZP;x=&KQP)GK|7y}r<+9id6SA!8BRv{#<-H7mq*Via*p#2G|PL=iVB@DjL6 zto4vIa)`&|3(Qt$tXIS)Gz&c?b=nx3`CgI+@!5!Sb%^aIv_b#~wKS4{h!BnXBB|?2T z1!@B+P#H^y>O?x!M^m9ZodxwhnY@-p9{VHdP@Bkx%1jP4r?a6tlMBt6TuHN(``R2S z=~ghV!)L2WT+LG)^5Mq5<4gfuXN%xL%=K9)hv$4Lb}v@IV6Ffrvw1L_D}?cEJ}juCE!SYmzQbHG zj2DVvzPEt17?yKIux4{yD22mP8Jrf&;IOXtrol2>*325bi5h<)G>cm4*#V}L@iWe-(ow)hw9KdR*mxV1OzTt za!dsR7Ag=lTT0)i2$kh=$ZF5VU`G*#x+*Y2-*9rQ9xF3#SlHJOS#1dpEe)ZxGzWY7 zD>20D)BRP%#!8$!whu|kkyu_BMqF|fg7;S=WU>fBOH~M<@0(d2hmp==$Qv^;+))Vs z1J#UYCL?rR4v)$hWambqI4=U~dP>--wiEkCYB4|Aj_HAFEbr~Z{)I96wrRNj@)^vo zEFw2I7FD%H2n*+b#zYT$=Ew2Mn-8IHtOH#W1NxWM(9+i<_Ey8e#X*6cDmvPTC0;uD zc%h-L7;U{RC@9Iq;mxBcZ)!lktcK_JD%z(e;Y>XBqOTno9R*oiD{jAb8@w%>w5sa)Yaq-S=4DFr4 z`0_Gs^TY7qv(mS;Kv-rD0#efO^sArYozK6*2VZ=Gw?F+1$1lHvgu*7;8gF2HZTX_Go&vEzhV|>c{K4w26vl?q>FQ8{?mbiKZ*Jz&{p$+rotFN&2 z+8vy}coAEdUc&Kn=dkh8C4Bjt-(czLX?*tUZ!xiX0LzCC;=uYkT>N|ymzj>2Zr{Sy z`}g_Yv||6p1~iQfpkr!`jIv4?+uC7wOe|`924G}mi)zM7F5kUNTi_ASzH$@mFWtb; ze*XhH4o=~d-+qtojd}FU%+p4?fwb}(WY@Og)>|K;ZfO9^XD=bKuLM5KeK`_WJ=@dZ zKwNbot~voJl9<)-;Vyih(%h{U$V+fYK$Cz;k5w8%KZy@3d>|0SM6UG^= zsJB^Tlp-b~_$K6wF(T$6at(qC`#_NcVH}@12ub8X6nx_PkCYf&>?>*IctX3>PtwWv zR>-yF`h|QeQgb#DdlA$`@VKYe++#s)Xm45Y8JIAyq@aw=>I(azB8V{%>RlNOgYrO0 zw~&5pWr$=zOgF3vW1J%tMlxZSiI_*Iq+b>w`jSCKkYrR#|C0D>EMu-ptdF*!CNWTt z`>olVESZtd_>g2yYQnuTV4h2}GYzs;rQF(3$xP_M=VZ*i*5%w{e+`+7GpiwA7ABci z^Eq&Su{TE4Q{;b)nRnBp?`uRnHfG$}Y>;#;8y1tqQDU;~bP@A-aj+uatd;L6^O-ih zW6cd-$&%|JJy7dlWhptXhG!RJ^bb{}a_P@j!vgq-exH)$Zp+3hWb zGqKNQUl}~;ue&YR!1tgW-ur72c%%jH`^w?9z&QB63ivW!;J-q@@@Ol94>ck9SQ`S5 zwIJvOe;;o{_-3bKelc(K*&c*azTk&!b|7S<9YLJeZ>tjl=Xw<_=;9#4j<>+CF&*I> zl+m7r7u2>xO-F_PqZZ5^jSx!NLF2?v%8Xj5g`JlfcGF+B@wJ4Ft0_8~N?>Yjjh40w zoVmCG4HMQd7-?djICbgz1^7n?qIP6hSc@7QDa^@(qkZ`6485>;) zrS1u*x)G4(ht!l{L>ANL4|PZ6m0=`Z8bruT1Bkvdh?trLm^0oG%3Qpir!|~?nU8TW zhn<@(EM2S+8WVzua4*CrMa{^;wFVRdCMZr#3y&65YQc9?O}SnA&?gOig98k;JZSKW_;$B#nZ+KkfbYCL}D zZT#`if5H0ZIoy5nDN39AcrFj3P}Ys5jkDOZxPq>+S(Mc`W9i6g)OGZuXMBq1^{ZGR zc2!lTz$d^5BZG1n+B(BGDxERzL|8>uLfh6KXEs++KRCp_+`=pLo%J02VeAry+{P|= z#^fM4ArqtXhhXLrjth4m;kU&1;}@^v)}yyEwSOITd)D#s@BWCxcb?+i-~16DeDfVH z-g*she*Hb({QTEAck>l|^v&0J_scJ6f1JTX;#|k{42Jg~Kw(Q8Hm|&duBkor;}2nS z{VCe_Tpt!9AfoT~SUS7k|D>t!s;}qmWlc?(-MkRgrl9o=CaBL!RtA1n;ed`N& z^!b;#Ps}_^j646}K5dZ8m_BqIr>OV&tFPev!^h}ZWNf^u9@jtr26w;x2i$n@CjI13 z@Qd$%z|Vj9fou3JzW%Sj;^+VNC%pf|AK;ysij2x?TzL8sW;eGGmXQS$S9kPL|Kj#e zeEO&Vz#-Zlum0>C)H4TOKQ;kB>K%~GyiITzTK7y7?~4(Un~(CIe(X}$WZZ;)nX@O9 z4NMptNk&KpV@gfU$gFQdH}^EZvm39{A0Iz-7`gTB7+hF~nP(tk^Q*9Q<{bQ5v(d|V zMO05QVlwj(n@RgWBpgX4)o33XL1b$_<7sOs+S?AF*kmkiorix?CS(Jn_~^Ue;~HZZ zdpAy^Z+-~|_AW4W^@2}AGF-xmzX8NNzfgEaC&4o|9X|1Ch{-FZJyQf(FJoX+(}<+c z+&nOZm@b}K!Lf+SDMhBN9l3QKNGT~pL_;Q$$|?|2$2y8OVm zPEE0JAQmX((+;sEPCAME6!!{yhK+WFBXPxnwK;-s&oP2Niyb_RQCRbSGu9}HxP@i2 zn3H2#g{&mTKUf38`z>j+h#DU&UNd3N#EjT#CMR~YFL2d}I82@-aM!#+tou|B)j%>>Oz~K zCR8%6q8(Ht>;&Q~G1!21gP}Y^GAIj%5o<(rifK1j1S4UmcXtjoiXSlNsGC09oj;gd8EaD z(63@`2lrFIoN@7*FiAa^HEe~xjFosYX5tB*k{`#2S`z(o#_0Jznelm;&=xY~^Rnqm zg~w!x4MW` zLkPXlkEm<>ub=Ef6!~Io#6~B=h|6Kb--xqpmxmBMTZJ&jzyqer5i(PW&;#`dU!&}W zIz+5C!hf<3V`L7B|m9;3~%)s5Io1BfH;CSDjo*v$zjr1C7|{n@M=h$KJi z_9P;2?LpZ6S;Rb?K^8GN{m~M7o*ae=u{b6<82OCz%iAh(aAg9w?_9vitpm7m?=t%L zHe+sa6oVJ0QB;+U>7`L@Ufw`SO(Ev?4-oYLDHK!B|+G#TIRWqM9#7OyTAMeSKt2x--!F^%b&Xc7*D_ZM|}BD|AyDT z{1txp*Z;u)h$f9<`em}Y*Vofy}(e;FCIHTd9%f5M@gucCAD07~0> zP|-h%wmth;t5S~!VtqdX<`owX-k~Bf17#!X~g80GcVBrZ3j<87E}`VtBI#F zxP&FbFFB3)nuWyTDkRGbh|d}Djmv_$PdLJ}ST`YSMNUIGYCHN+-`9p?*Z3cqn}Do` zH5*aU$Slc+wQn%|;!_xBlEW(|9;J;f$f}m1gL)J-G;<9JC~j_tTO`kYzbHiJl)=m= z2o-JJXdj+H-$)-;kDfs1Ko64gaxgTz7sd5;=ouTsz{D&vi%SuimPOl{2gZi&nWMMn zd2dI&vSb~QeLL+7!6y~DN;}pCS+SnR`X^dCTm@J9W~~$SLjR?Qn#>6xe+620^NA&qDjCswtH68k`Y0z(`U)z>O z{G;F7mH~s7EUqn6GVY>}-I^^Kx8=f!y6H9jUlV*b`q){LW>dPPk}m2UT%}zp%;RS` zDQX0COIat$+6L_+U)D#mZZXwCdNGc*j7ioU%Ubh7=JHt&sg>tN+u%n#z=*gl;tOW9 zcZ6*sY$0KnFvh^T48{#g{8=BzJO;7diet>GS(8B9RS?1;CJUWj6iUp&VoSt#lycTg8mBdJrs+Pk9N&}6PfyTFTe>YSsJ zITP9mVonX}E7qcsMZH;rpvj}$^S#aN?Pe2>M;dld5u8tz<#wh%*4#N9_upc_%DPkIoRRkSsgc->p`|2Fx2rqwEw06}o zwqTAbo`XJ%)y#dAFkVAjjj`s7&5=2H<82mQ5tlIn2%xX$du#s zN3&qY+)DODIRaz65jvPh3}u|noca6|)_drxvTu&azHAhfCLpOJ2QAH|uytnruBImQ z<%|pm=Roj@e>4V`-?>Ff6)ASnS2JiCV%_QBZE0byxr*vTA)fs->D z`nxeOJB94Za&(Li(;sg}+ukKCZX8GFzC|3n_Zn^zvuEgsE?vHY&hb$kxq5|o$2AS~ z;s9goGbcCbqqkvx<0wYgk6`Z1S={*KQ@s1#_o&}9g(+S?_0|)ddiXlt`Tl#_0e{Aa z^uNFSr+>vOpMQ;uPu|Bx;`~L%{>P6TW}Kh}(@-HBmYj3Yey)vxjDSKlxu{xYVS%Rk8&z~sgzX0|S%YkohL zMC|<4dpO2?^e3lpeQ=NO#5~65W{^=-jOu2_ ze0(6MT~N{>W9*-G$p-oeNsNcK8P7~LHK>`fo;V;7J2iBnV`Ks)Z9|y5dGb4eRV3#Z z;QpI$<3$Y(ge4^*JS_#2OAC1I%}4m%AHK(v58uVpPd`FlRViW^4`}M?KpyL7Y>1O) zjB7i~iAVGY+xQ-)%kmh*8b!9O7_K$+VXH&n&ODuQRWNb}CJ^Jtl zJe#x_Q#TT|{EUAaRR%$q?HON>aRg1`@$tU>!`<+%OJ+`-HPQ+ij6=uQ zQB+%s`PF3<)8^eROQv5Kir|I};&}`V=xggJa^uV&b3Sc0O`df+j4^1@rqL(%=rPWq z$KQH93w7vEYZCLdyOJ5-W-gif>NK(*jQXfCPodkC4DE(gXg4N7lRBu>h4EShlsQg` z@pvtHB2=s6p(c-oN_8}}@TLDo>Ks>3^%##6wvoXbg7AVxiWM zEUA&N+K?cr&`(zqa_J*0(SP1aj8~P%aBLLki-8iq4^gJAvXeHN60u+xZ3HFqmF47d z?aED*Lz%l|97|cs^~}x76C_RcwMq5s5}?{dz4$Cu83Rz`eR}dl#s?Dl4N8J!#r_Mi zwyfK?l$4VB{Xn8McCZNy7T-Fkq}nU&1N9UKNj1+KhMBBGOy;)Ks_7FR)fi*bN1q#9HDp>kd>?Z28TCJ(Lryu`Ah%IP9#bRaE0Q4{Eutl1efC{2U)i zse-weJl1Vyuy&NX=%=xUBGEzGQNw&gQQ+TSsEbvMRa{SSmNYY1$C$|PBlvyAj#zVk zFK5SZ5KQ@fffZ~2?a3408}Pf0cz#z9Wg@91I7lzV@mq!%{&%=u(ZpJH8As3&jk;>G zPEypQ@|zfbx8lRv5Wc%yi^6{3dPLoZD(fKC%KcgM$a+Un$IACyo3SNrzJnUHjrEyt zFl61k7Hwx;+NB!I%cwDzrON++N=+bCXftcjW>sQrOO5x5Z()=(ovr*mwHH|ryJV^;MY+SKYjtnr|YufcWcu>QiB7_2|YdI{PYkSjoSKOi&p|!J zXY+q;paWMw)+REKpsc4Pxs}C8+7@bfQF)ih_e;9QY=#DK3k!jnw-4rytRjUzqOCX2 z?c_vkSJzNXIUnk$M@|PoeHtU+7CnY(xB+cE~~?8Nv-3Culc zXB+YRTG}c*)YYLwoLbyGg^AS_9KUfL>z6Mff_9D{akqcpEE@a!kV+dWFqJi{)FUD% zA2r>5m}E{oHopjMV`E5RjYLFNu9RL;EoIl%Gmc%4#FBEUacn}WW{i7+csb8G*3N99 zftKsp{fQ| zBV)$sS=a6)_Pda_-!SWU*H9>Lz~J7!XyMrAfj&$SLtFd1kdU2Cdou+~>nqIhZ(;xH zBF1SS9z1>+eXNlvudAlrmW%MXIHVU8z{b;;-!XVGw-bSg)Esz4#v+(|=bxB>^omO6 z=4;S5GsS&itST!P!+WP$Ya&N8=dI~zk&+7XBvJq18x;bN06)pj(@QdUa%FCT_C{JN z^9=MYy}fBSxC;S5LTDL`r{Vq z&e|*8sUphHh4x4;wCKZY53^6IGgSh$kvvIntWZ)K%w`REHnBfV((1{O4Cr_7Vmx?f zYa)~xH&&vLzmwk&D>2@nM890Qjb{qKy%g8f=>w`!o?;xoA694VShYJ1$}LGyZ-`~x zc{1zd=&Q@)CA)@1#wX(WEi~-}%GRzY?l&i}mYi{m`Z&o>76;SnXvvy3fjWJDU4CDq z)5{nU&o<+sBIxz!z<#^}W&?RJ>1W;8RE^{~RxWu@%UD;&?_v7$CBZZ5$|knN@H-3E z)l;@wt}p%H2+6FRdHkXf$*737OGP-8v%Msx(hzAE@mo7TK+?|(lvFaj{;rkb`~Lxn CZh=bx literal 0 HcmV?d00001 diff --git a/examples/graphics/gpu/textured_cube/source/3dmath.c b/examples/graphics/gpu/textured_cube/source/3dmath.c new file mode 100644 index 0000000..eb8d03f --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/3dmath.c @@ -0,0 +1,172 @@ +#include "3dmath.h" + +void m4x4_identity(matrix_4x4* out) +{ + m4x4_zeros(out); + out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f; +} + +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b) +{ + int i, j; + for (i = 0; i < 4; i ++) + for (j = 0; j < 4; j ++) + out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; +} + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z) +{ + matrix_4x4 tm, om; + + m4x4_identity(&tm); + tm.r[0].w = x; + tm.r[1].w = y; + tm.r[2].w = z; + + m4x4_multiply(&om, mtx, &tm); + m4x4_copy(mtx, &om); +} + +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z) +{ + int i; + for (i = 0; i < 4; i ++) + { + mtx->r[i].x *= x; + mtx->r[i].y *= y; + mtx->r[i].z *= z; + } +} + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = 1.0f; + rm.r[1].y = cosAngle; + rm.r[1].z = sinAngle; + rm.r[2].y = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].z = sinAngle; + rm.r[1].y = 1.0f; + rm.r[2].x = -sinAngle; + rm.r[2].z = cosAngle; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide) +{ + matrix_4x4 rm, om; + + float cosAngle = cosf(angle); + float sinAngle = sinf(angle); + + m4x4_zeros(&rm); + rm.r[0].x = cosAngle; + rm.r[0].y = sinAngle; + rm.r[1].x = -sinAngle; + rm.r[1].y = cosAngle; + rm.r[2].z = 1.0f; + rm.r[3].w = 1.0f; + + if (bRightSide) m4x4_multiply(&om, mtx, &rm); + else m4x4_multiply(&om, &rm, mtx); + m4x4_copy(mtx, &om); +} + +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far) +{ + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard orthogonal projection matrix + mp.r[0].x = 2.0f / (right - left); + mp.r[0].w = (left + right) / (left - right); + mp.r[1].y = 2.0f / (top - bottom); + mp.r[1].w = (bottom + top) / (bottom - top); + mp.r[2].z = 2.0f / (near - far); + mp.r[2].w = (far + near) / (far - near); + mp.r[3].w = 1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2, mp3; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(&mp3, &mp2, &mp); + + // Fix the 3DS screens' orientation by swapping the X and Y axis + m4x4_identity(&mp2); + mp2.r[0].x = 0.0; + mp2.r[0].y = 1.0; + mp2.r[1].x = -1.0; // flipped + mp2.r[1].y = 0.0; + m4x4_multiply(mtx, &mp2, &mp3); +} + +void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far) +{ + // Notes: + // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways, + // and so are these parameters -- in fact, they are actually the fovx and the inverse + // of the aspect ratio. Therefore the formula for the perspective projection matrix + // had to be modified to be expressed in these terms instead. + + // Notes: + // fovx = 2 atan(tan(fovy/2)*w/h) + // fovy = 2 atan(tan(fovx/2)*h/w) + // invaspect = h/w + + // a0,0 = h / (w*tan(fovy/2)) = + // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) = + // = h / (w*tan( atan(tan(fovx/2)*h/w) )) = + // = h / (w * tan(fovx/2)*h/w) = + // = 1 / tan(fovx/2) + + // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2)) + + float fovx_tan = tanf(fovx / 2); + matrix_4x4 mp; + m4x4_zeros(&mp); + + // Build standard perspective projection matrix + mp.r[0].x = 1.0f / fovx_tan; + mp.r[1].y = 1.0f / (fovx_tan*invaspect); + mp.r[2].z = (near + far) / (near - far); + mp.r[2].w = (2 * near * far) / (near - far); + mp.r[3].z = -1.0f; + + // Fix depth range to [-1, 0] + matrix_4x4 mp2; + m4x4_identity(&mp2); + mp2.r[2].z = 0.5; + mp2.r[2].w = -0.5; + m4x4_multiply(mtx, &mp2, &mp); + + // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation + m4x4_rotate_z(mtx, M_PI / 2, true); +} diff --git a/examples/graphics/gpu/textured_cube/source/3dmath.h b/examples/graphics/gpu/textured_cube/source/3dmath.h new file mode 100644 index 0000000..a9a8596 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/3dmath.h @@ -0,0 +1,56 @@ +/* + * Bare-bones simplistic 3D math library + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include +#include + +typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f; +typedef struct { vector_4f r[4]; } matrix_4x4; + +static inline float v4f_dp4(const vector_4f* a, const vector_4f* b) +{ + return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w; +} + +static inline float v4f_mod4(const vector_4f* a) +{ + return sqrtf(v4f_dp4(a,a)); +} + +static inline void v4f_norm4(vector_4f* vec) +{ + float m = v4f_mod4(vec); + if (m == 0.0) return; + vec->x /= m; + vec->y /= m; + vec->z /= m; + vec->w /= m; +} + +static inline void m4x4_zeros(matrix_4x4* out) +{ + memset(out, 0, sizeof(*out)); +} + +static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in) +{ + memcpy(out, in, sizeof(*out)); +} + +void m4x4_identity(matrix_4x4* out); +void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b); + +void m4x4_translate(matrix_4x4* mtx, float x, float y, float z); +void m4x4_scale(matrix_4x4* mtx, float x, float y, float z); + +void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide); +void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide); + +// Special versions of the projection matrices that take the 3DS' screen orientation into account +void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far); +void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far); diff --git a/examples/graphics/gpu/textured_cube/source/gpu.c b/examples/graphics/gpu/textured_cube/source/gpu.c new file mode 100644 index 0000000..c583ae5 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/gpu.c @@ -0,0 +1,93 @@ +#include "gpu.h" + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static u32 *colorBuf, *depthBuf; +static u32 *cmdBuf; + +void gpuInit(void) +{ + colorBuf = vramAlloc(400*240*4); + depthBuf = vramAlloc(400*240*4); + cmdBuf = linearAlloc(0x40000*4); + + GPU_Init(NULL); + GPU_Reset(NULL, cmdBuf, 0x40000); +} + +void gpuExit(void) +{ + linearFree(cmdBuf); + vramFree(depthBuf); + vramFree(colorBuf); +} + +void gpuClearBuffers(u32 clearColor) +{ + GX_SetMemoryFill(NULL, + colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, + depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); + gspWaitForPSC0(); // Wait for the fill to complete +} + +void gpuFrameBegin(void) +{ + // Configure the viewport and the depth linear conversion function + GPU_SetViewport( + (u32*)osConvertVirtToPhys((u32)depthBuf), + (u32*)osConvertVirtToPhys((u32)colorBuf), + 0, 0, 240, 400); // The top screen is physically 240x400 pixels + GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0 + + // Configure some boilerplate + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + // This is unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + // Configure alpha blending and test + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + int i; + for (i = 0; i < 6; i ++) + GPU_SetDummyTexEnv(i); +} + +void gpuFrameEnd(void) +{ + // Finish rendering + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); // Wait for the rendering to complete + + // Transfer the GPU output to the framebuffer + GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400), + (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400), + DISPLAY_TRANSFER_FLAGS); + gspWaitForPPF(); // Wait for the transfer to complete + + // Reset the command buffer + GPUCMD_SetBufferOffset(0); +}; + +void GPU_SetDummyTexEnv(int id) +{ + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} diff --git a/examples/graphics/gpu/textured_cube/source/gpu.h b/examples/graphics/gpu/textured_cube/source/gpu.h new file mode 100644 index 0000000..845d139 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/gpu.h @@ -0,0 +1,26 @@ +/* + * Bare-bones simplistic GPU wrapper + * This library is common to all libctru GPU examples + */ + +#pragma once +#include +#include <3ds.h> +#include "3dmath.h" + +void gpuInit(void); +void gpuExit(void); + +void gpuClearBuffers(u32 clearColor); + +void gpuFrameBegin(void); +void gpuFrameEnd(void); + +// Configures the specified fixed-function fragment shading substage to be a no-operation +void GPU_SetDummyTexEnv(int id); + +// Uploads an uniform matrix +static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix) +{ + GPU_SetFloatUniform(type, location, (u32*)matrix, 4); +} diff --git a/examples/graphics/gpu/textured_cube/source/main.c b/examples/graphics/gpu/textured_cube/source/main.c new file mode 100644 index 0000000..d8274b7 --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/main.c @@ -0,0 +1,244 @@ +/* + * ~~ Simple libctru GPU textured cube example ~~ + * This example demonstrates the basics of using the PICA200 in a 3DS homebrew + * application in order to render a basic scene consisting of a rotating + * textured cube which is also shaded using a simple shading algorithm. + * The shading algorithm is explained in the vertex shader source code. + */ + +#include "gpu.h" +#include "vshader_shbin.h" +#include "kitten_bin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +typedef struct { float position[3]; float texcoord[2]; float normal[3]; } vertex; + +static const vertex vertex_list[] = +{ + // First face (PZ) + // First triangle + { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + { {+0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + { {-0.5f, +0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, +1.0f} }, + { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} }, + + // Second face (MZ) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + { {-0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + // Second triangle + { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + { {+0.5f, -0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, -1.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} }, + + // Third face (PX) + // First triangle + { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} }, + { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} }, + + // Fourth face (MX) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + // Second triangle + { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} }, + + // Fifth face (PY) + // First triangle + { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + { {-0.5f, +0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + // Second triangle + { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + { {+0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, +1.0f, 0.0f} }, + { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} }, + + // Sixth face (MY) + // First triangle + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, + { {+0.5f, -0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, + { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + // Second triangle + { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + { {-0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, -1.0f, 0.0f} }, + { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} }, +}; + +#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0])) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection, uLoc_modelView; +static int uLoc_lightVec, uLoc_lightHalfVec, uLoc_lightClr, uLoc_material; +static matrix_4x4 projection; +static matrix_4x4 material = +{ + { + { { 0.0f, 0.2f, 0.2f, 0.2f } }, // Ambient + { { 0.0f, 0.4f, 0.4f, 0.4f } }, // Diffuse + { { 0.0f, 0.8f, 0.8f, 0.8f } }, // Specular + { { 1.0f, 0.0f, 0.0f, 0.0f } }, // Emission + } +}; + +static void* vbo_data; +static void* tex_data; +static float angleX = 0.0, angleY = 0.0; + +static void sceneInit(void) +{ + // Load the vertex shader and create a shader program + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + uLoc_modelView = shaderInstanceGetUniformLocation(program.vertexShader, "modelView"); + uLoc_lightVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightVec"); + uLoc_lightHalfVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightHalfVec"); + uLoc_lightClr = shaderInstanceGetUniformLocation(program.vertexShader, "lightClr"); + uLoc_material = shaderInstanceGetUniformLocation(program.vertexShader, "material"); + + // Compute the projection matrix + m4x4_persp_tilt(&projection, 80.0f*M_PI/180.0f, 400.0f/240.0f, 0.01f, 1000.0f); + + // Create the VBO (vertex buffer object) + vbo_data = linearAlloc(sizeof(vertex_list)); + memcpy(vbo_data, vertex_list, sizeof(vertex_list)); + + // Load the texture + tex_data = linearAlloc(kitten_bin_size); + memcpy(tex_data, kitten_bin, kitten_bin_size); +} + +static void sceneRender(void) +{ + // Bind the shader program + shaderProgramUse(&program); + + // Configure the first fragment shading substage to blend the texture color with + // the vertex color (calculated by the vertex shader using a lighting algorithm) + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_MODULATE, GPU_MODULATE, // RGB, Alpha + 0xFFFFFFFF); + + // Configure the first texture unit + GPU_SetTextureEnable(GPU_TEXUNIT0); + GPU_SetTexture( + GPU_TEXUNIT0, + (u32*)osConvertVirtToPhys((u32)tex_data), + 64, // Width + 64, // Height + GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_REPEAT) | GPU_TEXTURE_WRAP_T(GPU_REPEAT), // Flags + GPU_RGBA8 // Pixel format + ); + + // Configure the "attribute buffers" (that is, the vertex input buffers) + GPU_SetAttributeBuffers( + 3, // Number of inputs per vertex + (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO + GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // Format of the inputs + GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | + GPU_ATTRIBFMT(2, 3, GPU_FLOAT), + 0xFFC, // Unused attribute mask, in our case bits 0~2 are cleared since they are used + 0x210, // Attribute permutations (here it is the identity, passing each attribute in order) + 1, // Number of buffers + (u32[]) { 0x0 }, // Buffer offsets (placeholders) + (u64[]) { 0x210 }, // Attribute permutations for each buffer (identity again) + (u8[]) { 3 }); // Number of attributes for each buffer + + // Calculate the modelView matrix + matrix_4x4 modelView; + m4x4_identity(&modelView); + m4x4_translate(&modelView, 0.0, 0.0, -2.0 + 0.5*sinf(angleX)); + m4x4_rotate_x(&modelView, angleX, true); + m4x4_rotate_y(&modelView, angleY, true); + + // Rotate the cube each frame + angleX += M_PI / 180; + angleY += M_PI / 360; + + // Upload the uniforms + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection); + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_modelView, &modelView); + GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_material, &material); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightHalfVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1); + GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightClr, (u32*)(float[]){1.0f, 1.0f, 1.0f, 1.0f}, 1); + + // Draw the VBO + GPU_DrawArray(GPU_TRIANGLES, vertex_list_count); +} + +static void sceneExit(void) +{ + // Free the texture + linearFree(tex_data); + + // Free the VBO + linearFree(vbo_data); + + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + gpuInit(); + + // Initialize the scene + sceneInit(); + gpuClearBuffers(CLEAR_COLOR); + + // Main loop + while (aptMainLoop()) + { + gspWaitForVBlank(); // Synchronize with the start of VBlank + gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible + hidScanInput(); // Read the user input + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + gpuFrameBegin(); + sceneRender(); + gpuFrameEnd(); + gpuClearBuffers(CLEAR_COLOR); + + // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering) + //gfxFlushBuffers(); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + gpuExit(); + gfxExit(); + return 0; +} diff --git a/examples/graphics/gpu/textured_cube/source/vshader.pica b/examples/graphics/gpu/textured_cube/source/vshader.pica new file mode 100644 index 0000000..0f0ac5d --- /dev/null +++ b/examples/graphics/gpu/textured_cube/source/vshader.pica @@ -0,0 +1,90 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4], modelView[4] +.fvec lightVec, lightHalfVec, lightClr, material[4] +.alias mat_amb material[0] +.alias mat_dif material[1] +.alias mat_spe material[2] +.alias mat_emi material[3] + +; Constants +.constf myconst(0.0, 1.0, -1.0, -0.5) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +; Outputs +.out outpos position +.out outtc0 texcoord0 +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias intex v1 +.alias innrm v2 + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; r1 = modelView * inpos + dp4 r1.x, modelView[0], r0 + dp4 r1.y, modelView[1], r0 + dp4 r1.z, modelView[2], r0 + dp4 r1.w, modelView[3], r0 + + ; outpos = projection * r1 + dp4 outpos.x, projection[0], r1 + dp4 outpos.y, projection[1], r1 + dp4 outpos.z, projection[2], r1 + dp4 outpos.w, projection[3], r1 + + ; outtex = intex + mov outtc0, intex + + ; Transform the normal vector with the modelView matrix + ; r1 = normalize(modelView * innrm) + mov r0.xyz, innrm + mov r0.w, zeros + dp4 r1.x, modelView[0], r0 + dp4 r1.y, modelView[1], r0 + dp4 r1.z, modelView[2], r0 + mov r1.w, zeros + dp3 r2, r1, r1 ; r2 = x^2+y^2+z^2 for each component + rsq r2, r2 ; r2 = 1/sqrt(r2) '' + mul r1, r2, r1 ; r1 = r1*r2 + + ; Calculate the diffuse level (r0.x) and the shininess level (r0.y) + ; r0.x = max(0, -(lightVec * r1)) + ; r0.y = max(0, (-lightHalfVec[i]) * r1) ^ 2 + dp3 r0.x, lightVec, r1 + add r0.x, zeros, -r0 + dp3 r0.y, -lightHalfVec, r1 + max r0, zeros, r0 + mul r0.y, r0, r0 + + ; Accumulate the vertex color in r1, initializing it to the emission color + mov r1, mat_emi + + ; r1 += specularColor * lightClr * shininessLevel + mul r2, lightClr, r0.yyyy + mul r2, mat_spe, r2 + add r1, r2, r1 + + ; r1 += diffuseColor * lightClr * diffuseLevel + mul r2, lightClr, r0.xxxx + mul r2, mat_dif, r2 + add r1, r2, r1 + + ; r1 += ambientColor * lightClr + mov r2, lightClr + mul r2, mat_amb, r2 + add r1, r2, r1 + + ; outclr = clamp r1 to [0,1] + min outclr, ones, r1 + + ; We're finished + end +.end From 894d5342a1f50c4ed93be88f6c9f4e7d31e98a25 Mon Sep 17 00:00:00 2001 From: Lectem Date: Sat, 11 Jul 2015 20:37:31 +0200 Subject: [PATCH 3/8] fixed y2r doc and added csnd pan/vol ranges --- libctru/include/3ds/services/csnd.h | 4 ++++ libctru/include/3ds/services/y2r.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libctru/include/3ds/services/csnd.h b/libctru/include/3ds/services/csnd.h index 9264a27..e3258a0 100644 --- a/libctru/include/3ds/services/csnd.h +++ b/libctru/include/3ds/services/csnd.h @@ -147,6 +147,10 @@ void CSND_SetCapRegs(u32 capUnit, u32 flags, u32 addr, u32 size); Result CSND_SetDspFlags(bool waitDone); Result CSND_UpdateInfo(bool waitDone); +/** + * @param vol The volume, ranges from 0.0 to 1.0 included + * @param pan The pan, ranges from -1.0 to 1.0 included + */ Result csndPlaySound(int chn, u32 flags, u32 sampleRate, float vol, float pan, void* data0, void* data1, u32 size); void csndGetDspFlags(u32* outSemFlags, u32* outIrqFlags); // Requires previous CSND_UpdateInfo() diff --git a/libctru/include/3ds/services/y2r.h b/libctru/include/3ds/services/y2r.h index 62cf3bf..613336c 100644 --- a/libctru/include/3ds/services/y2r.h +++ b/libctru/include/3ds/services/y2r.h @@ -233,7 +233,7 @@ Result Y2RU_GetTransferEndEvent(Handle* end_event); * @param src_buf A pointer to the beginning of your Y data buffer. * @param image_size The total size of the data buffer. * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. - * @param transfer_unit Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. * * This specifies the Y data buffer for the planar input formats (INPUT_YUV42*_INDIV_*). * The actual transfer will only happen after calling @ref Y2RU_StartConversion. @@ -245,7 +245,7 @@ Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, u16 transfer_unit, * @param src_buf A pointer to the beginning of your Y data buffer. * @param image_size The total size of the data buffer. * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. - * @param transfer_unit Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. * * This specifies the U data buffer for the planar input formats (INPUT_YUV42*_INDIV_*). * The actual transfer will only happen after calling @ref Y2RU_StartConversion. From 2d7a53d75f90555cb35e1897f5262278e29cf9d6 Mon Sep 17 00:00:00 2001 From: Lectem Date: Thu, 23 Jul 2015 21:23:50 +0200 Subject: [PATCH 4/8] Documented GX TRANSFER_SCALE and FILL_CONTROL --- libctru/include/3ds/gpu/gx.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/libctru/include/3ds/gpu/gx.h b/libctru/include/3ds/gpu/gx.h index 561473f..e2298b4 100644 --- a/libctru/include/3ds/gpu/gx.h +++ b/libctru/include/3ds/gpu/gx.h @@ -1,7 +1,15 @@ +/** + * @file gx.h + */ + #pragma once #define GX_BUFFER_DIM(w, h) (((h)<<16)|((w)&0xFFFF)) +/** + * @brief Pixel formats + * @sa GSP_FramebufferFormats + */ typedef enum { GX_TRANSFER_FMT_RGBA8 = 0, @@ -11,20 +19,29 @@ typedef enum GX_TRANSFER_FMT_RGBA4 = 4 } GX_TRANSFER_FORMAT; +/** + * @brief Anti-aliasing modes + * + * Please remember that the framebuffer is sideways. + * Hence if you activate 2x1 anti-aliasing the destination dimensions are w = 240*2 and h = 400 + */ typedef enum { - GX_TRANSFER_SCALE_NO = 0, - GX_TRANSFER_SCALE_X = 1, - GX_TRANSFER_SCALE_Y = 2 + GX_TRANSFER_SCALE_NO = 0, ///< No anti-aliasing + GX_TRANSFER_SCALE_X = 1, ///< 2x1 anti-aliasing + GX_TRANSFER_SCALE_XY = 2, ///< 2x2 anti-aliasing } GX_TRANSFER_SCALE; +/** + * @brief GX transfer control flags + */ typedef enum { - GX_FILL_TRIGGER = 0x001, - GX_FILL_FINISHED = 0x002, - GX_FILL_16BIT_DEPTH = 0x000, - GX_FILL_24BIT_DEPTH = 0x100, - GX_FILL_32BIT_DEPTH = 0x200, + GX_FILL_TRIGGER = 0x001, ///< Trigger the PPF event + GX_FILL_FINISHED = 0x002, ///< Indicates if the memory fill is complete. You should not use it when requesting a transfer. + GX_FILL_16BIT_DEPTH = 0x000, ///< The buffer has a 16 bit per pixel depth + GX_FILL_24BIT_DEPTH = 0x100, ///< The buffer has a 24 bit per pixel depth + GX_FILL_32BIT_DEPTH = 0x200, ///< The buffer has a 32 bit per pixel depth } GX_FILL_CONTROL; #define GX_TRANSFER_FLIP_VERT(x) ((x)<<0) From ea7b0eac5d7208d2de459872166a0f54e2c8e6fc Mon Sep 17 00:00:00 2001 From: Lectem Date: Thu, 23 Jul 2015 22:41:13 +0200 Subject: [PATCH 5/8] added gpu examples to doc --- libctru/include/3ds.h | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/libctru/include/3ds.h b/libctru/include/3ds.h index d43bfe5..9b54fd1 100644 --- a/libctru/include/3ds.h +++ b/libctru/include/3ds.h @@ -53,12 +53,52 @@ extern "C" { * @example app_launch/source/main.c * @example audio/mic/source/main.c * @example get_system_language/source/main.c - * @example gpu/source/main.c * @example graphics/bitmap/24bit-color/source/main.c * @example graphics/printing/hello-world/source/main.c * @example graphics/printing/both-screen-text/source/main.c * @example graphics/printing/colored-text/source/main.c * @example graphics/printing/multiple-windows-text/source/main.c + + * @example graphics/gpu/geoshader/source/main.c + graphics/gpu/geoshader/source/gpu.h + @include graphics/gpu/geoshader/source/gpu.h + graphics/gpu/geoshader/source/gpu.c + @include graphics/gpu/geoshader/source/gpu.c + graphics/gpu/geoshader/source/3dmath.h + @include graphics/gpu/geoshader/source/3dmath.h + graphics/gpu/geoshader/source/3dmath.c + @include graphics/gpu/geoshader/source/3dmath.c + graphics/gpu/geoshader/source/vshader.pica + @include graphics/gpu/geoshader/source/vshader.pica + graphics/gpu/geoshader/source/gshader.pica + @include graphics/gpu/geoshader/source/gshader.pica + + + * @example graphics/gpu/simple_tri/source/main.c + graphics/gpu/simple_tri/source/gpu.h + @include graphics/gpu/simple_tri/source/gpu.h + graphics/gpu/simple_tri/source/gpu.c + @include graphics/gpu/simple_tri/source/gpu.c + graphics/gpu/simple_tri/source/3dmath.h + @include graphics/gpu/simple_tri/source/3dmath.h + graphics/gpu/simple_tri/source/3dmath.c + @include graphics/gpu/simple_tri/source/3dmath.c + graphics/gpu/simple_tri/source/vshader.pica + @include graphics/gpu/simple_tri/source/vshader.pica + + + * @example graphics/gpu/textured_cube/source/main.c + graphics/gpu/textured_cube/source/gpu.h + @include graphics/gpu/textured_cube/source/gpu.h + graphics/gpu/textured_cube/source/gpu.c + @include graphics/gpu/textured_cube/source/gpu.c + graphics/gpu/textured_cube/source/3dmath.h + @include graphics/gpu/textured_cube/source/3dmath.h + graphics/gpu/textured_cube/source/3dmath.c + @include graphics/gpu/textured_cube/source/3dmath.c + graphics/gpu/textured_cube/source/vshader.pica + @include graphics/gpu/textured_cube/source/vshader.pica + * @example http/source/main.c * @example input/read-controls/source/main.c * @example input/touch-screen/source/main.c From 73e0267ae472a3c8ca0b897ab5967427458861b6 Mon Sep 17 00:00:00 2001 From: aroulin Date: Fri, 24 Jul 2015 21:49:49 +0200 Subject: [PATCH 6/8] Fix GPU_Reset having wrong GSP command buffer --- libctru/source/gpu/gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 49d5809..5610c27 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -215,7 +215,7 @@ void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize) for(i=0;i Date: Tue, 4 Aug 2015 19:31:46 +0200 Subject: [PATCH 7/8] Revert 73e0267a because it breaks all existing GPU code --- libctru/source/gpu/gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libctru/source/gpu/gpu.c b/libctru/source/gpu/gpu.c index 5610c27..49d5809 100644 --- a/libctru/source/gpu/gpu.c +++ b/libctru/source/gpu/gpu.c @@ -215,7 +215,7 @@ void GPU_Reset(u32* gxbuf, u32* gpuBuf, u32 gpuBufSize) for(i=0;i Date: Mon, 10 Aug 2015 00:32:41 +0200 Subject: [PATCH 8/8] Fixed transfer unit and gap type and added a warning --- libctru/include/3ds/services/y2r.h | 30 ++++++++++++++++++++---------- libctru/source/services/y2r.c | 10 +++++----- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/libctru/include/3ds/services/y2r.h b/libctru/include/3ds/services/y2r.h index 613336c..0ba78f1 100644 --- a/libctru/include/3ds/services/y2r.h +++ b/libctru/include/3ds/services/y2r.h @@ -105,8 +105,8 @@ typedef struct Y2R_OutputFormat output_format : 8; ///< Value passed to @ref Y2RU_SetOutputFormat Y2R_Rotation rotation : 8; ///< Value passed to @ref Y2RU_SetRotation Y2R_BlockAlignment block_alignment : 8; ///< Value passed to @ref Y2RU_SetBlockAlignment - u16 input_line_width; ///< Value passed to @ref Y2RU_SetInputLineWidth - u16 input_lines; ///< Value passed to @ref Y2RU_SetInputLines + s16 input_line_width; ///< Value passed to @ref Y2RU_SetInputLineWidth + s16 input_lines; ///< Value passed to @ref Y2RU_SetInputLines Y2R_StandardCoefficient standard_coefficient : 8; ///< Value passed to @ref Y2RU_SetStandardCoefficient u8 unused; u16 alpha; ///< Value passed to @ref Y2RU_SetAlpha @@ -235,10 +235,12 @@ Result Y2RU_GetTransferEndEvent(Handle* end_event); * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. * + * @warning transfer_unit+transfer_gap must be less than 32768 (0x8000) + * * This specifies the Y data buffer for the planar input formats (INPUT_YUV42*_INDIV_*). * The actual transfer will only happen after calling @ref Y2RU_StartConversion. */ -Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap); +Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap); /** * @brief Configures the U plane buffer. @@ -247,46 +249,54 @@ Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, u16 transfer_unit, * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. * + * @warning transfer_unit+transfer_gap must be less than 32768 (0x8000) + * * This specifies the U data buffer for the planar input formats (INPUT_YUV42*_INDIV_*). * The actual transfer will only happen after calling @ref Y2RU_StartConversion. */ -Result Y2RU_SetSendingU(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap); +Result Y2RU_SetSendingU(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap); /** * @brief Configures the V plane buffer. * @param src_buf A pointer to the beginning of your Y data buffer. * @param image_size The total size of the data buffer. * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. - * @param transfer_unit Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * + * @warning transfer_unit+transfer_gap must be less than 32768 (0x8000) * * This specifies the V data buffer for the planar input formats (INPUT_YUV42*_INDIV_*). * The actual transfer will only happen after calling @ref Y2RU_StartConversion. */ -Result Y2RU_SetSendingV(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap); +Result Y2RU_SetSendingV(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap); /** * @brief Configures the YUYV source buffer. * @param src_buf A pointer to the beginning of your Y data buffer. * @param image_size The total size of the data buffer. * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. - * @param transfer_unit Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * + * @warning transfer_unit+transfer_gap must be less than 32768 (0x8000) * * This specifies the YUYV data buffer for the packed input format @ref INPUT_YUV422_BATCH. * The actual transfer will only happen after calling @ref Y2RU_StartConversion. */ -Result Y2RU_SetSendingYUYV(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap); +Result Y2RU_SetSendingYUYV(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap); /** * @brief Configures the destination buffer. * @param src_buf A pointer to the beginning of your destination buffer in FCRAM * @param image_size The total size of the data buffer. * @param transfer_unit Specifies the size of 1 DMA transfer. Usually set to 1 line. This has to be a divisor of image_size. - * @param transfer_unit Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. + * @param transfer_gap Specifies the gap (offset) to be added after each transfer. Can be used to convert images with stride or only a part of it. * * This specifies the destination buffer of the conversion. * The actual transfer will only happen after calling @ref Y2RU_StartConversion. * The buffer does NOT need to be allocated in the linear heap. * + * @warning transfer_unit+transfer_gap must be less than 32768 (0x8000) + * * @note * It seems that depending on the size of the image and of the transfer unit,\n * it is possible for the end of conversion interrupt to be triggered right after the conversion began.\n @@ -294,7 +304,7 @@ Result Y2RU_SetSendingYUYV(const void* src_buf, u32 image_size, u16 transfer_uni * * @note Setting a transfer_unit of 4 or 8 lines seems to bring the best results in terms of speed for a 400x240 image. */ -Result Y2RU_SetReceiving(void* dst_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap); +Result Y2RU_SetReceiving(void* dst_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap); /** * @brief Checks if the DMA has finished sending the Y buffer. diff --git a/libctru/source/services/y2r.c b/libctru/source/services/y2r.c index 11d0109..8f679e9 100644 --- a/libctru/source/services/y2r.c +++ b/libctru/source/services/y2r.c @@ -120,7 +120,7 @@ Result Y2RU_GetTransferEndEvent(Handle* end_event) return cmdbuf[1]; } -Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap) +Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap) { Result ret = 0; u32* cmdbuf = getThreadCommandBuffer(); @@ -136,7 +136,7 @@ Result Y2RU_SetSendingY(const void* src_buf, u32 image_size, u16 transfer_unit, return cmdbuf[1]; } -Result Y2RU_SetSendingU(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap) +Result Y2RU_SetSendingU(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap) { Result ret = 0; u32* cmdbuf = getThreadCommandBuffer(); @@ -152,7 +152,7 @@ Result Y2RU_SetSendingU(const void* src_buf, u32 image_size, u16 transfer_unit, return cmdbuf[1]; } -Result Y2RU_SetSendingV(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap) +Result Y2RU_SetSendingV(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap) { Result ret = 0; u32* cmdbuf = getThreadCommandBuffer(); @@ -168,7 +168,7 @@ Result Y2RU_SetSendingV(const void* src_buf, u32 image_size, u16 transfer_unit, return cmdbuf[1]; } -Result Y2RU_SetSendingYUYV(const void* src_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap) +Result Y2RU_SetSendingYUYV(const void* src_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap) { Result ret = 0; u32* cmdbuf = getThreadCommandBuffer(); @@ -228,7 +228,7 @@ Result Y2RU_IsDoneSendingV(bool* is_done) return cmdbuf[1]; } -Result Y2RU_SetReceiving(void* dst_buf, u32 image_size, u16 transfer_unit, u16 transfer_gap) +Result Y2RU_SetReceiving(void* dst_buf, u32 image_size, s16 transfer_unit, s16 transfer_gap) { Result ret = 0; u32* cmdbuf = getThreadCommandBuffer();