From c1ad708045456800bf0666163cd35b4afa5b98f6 Mon Sep 17 00:00:00 2001 From: smea Date: Wed, 19 Nov 2014 17:13:43 -0800 Subject: [PATCH] rewrote GPU examples/gpu/data/test.vsh --- examples/gpu/data/test.vsh | 54 +++-- examples/gpu/data/texture.bin | Bin 0 -> 65536 bytes examples/gpu/source/_gs.s | 16 ++ examples/gpu/source/gs.c | 431 ++++++++++++++++++++++++++++++++++ examples/gpu/source/gs.h | 59 +++++ examples/gpu/source/main.c | 420 +++++++++++++++++++++------------ examples/gpu/source/math.c | 101 ++++++-- examples/gpu/source/math.h | 135 ++++++++++- 8 files changed, 1016 insertions(+), 200 deletions(-) create mode 100644 examples/gpu/data/texture.bin create mode 100644 examples/gpu/source/_gs.s create mode 100644 examples/gpu/source/gs.c create mode 100644 examples/gpu/source/gs.h diff --git a/examples/gpu/data/test.vsh b/examples/gpu/data/test.vsh index 686b22b..0da0e64 100644 --- a/examples/gpu/data/test.vsh +++ b/examples/gpu/data/test.vsh @@ -1,7 +1,7 @@ -; make sure you update aemstro_as for this (27/05/14) +; make sure you update aemstro_as for this (15/11/14) ; setup constants - .const 5, 0.0, 0.0, -0.99, 1.0 + .const 20, 1.0, 0.0, 0.5, 1.0 ; setup outmap .out o0, result.position @@ -9,35 +9,37 @@ .out o2, result.texcoord0 .out o3, result.texcoord1 .out o4, result.texcoord2 - -; setup uniform map (not required) - .uniform 0x10, 0x13, mdlvMtx - .uniform 0x14, 0x17, projMtx + +; setup uniform map (required to use SHDR_GetUniformRegister) + .uniform 0, 3, projection ; c0-c3 = projection matrix + .uniform 4, 7, modelview ; c4-c7 = modelview matrix + .uniform 8, 8, lightDirection ; c8 = light direction vector + .uniform 9, 9, lightAmbient ; c9 = light ambient color ;code main: - mov d1A, d00 (0x4) - mov d1A, d25 (0x3) + mov r1, v0 (0x6) + mov r1, c20 (0x3) ; tempreg = mdlvMtx * in.pos - dp4 d10, d44, d1A (0x0) - dp4 d10, d45, d1A (0x1) - dp4 d10, d46, d1A (0x2) - mov d10, d25 (0x3) + dp4 r0, c4, r1 (0x0) + dp4 r0, c5, r1 (0x1) + dp4 r0, c6, r1 (0x2) + mov r0, c20 (0x3) ; result.pos = projMtx * tempreg - dp4 d00, d40, d10 (0x0) - dp4 d00, d41, d10 (0x1) - dp4 d00, d42, d10 (0x2) - dp4 d00, d43, d10 (0x3) + dp4 o0, c0, r0 (0x0) + dp4 o0, c1, r0 (0x1) + dp4 o0, c2, r0 (0x2) + dp4 o0, c3, r0 (0x3) ; result.texcoord = in.texcoord - mov d02, d01 (0x5) - mov d03, d25 (0x7) - mov d04, d25 (0x7) + mov o2, v1 (0x5) + mov o3, c20 (0x7) + mov o4, c20 (0x7) ; result.color = crappy lighting - dp3 d1A, d44, d02 (0x0) - dp3 d1A, d45, d02 (0x1) - dp3 d1A, d46, d02 (0x2) - dp4 d01, d00, d1A (0x6) - mov d01, d25 (0x3) + dp3 r0, c8, v2 (0x6) + max r0, c20, r0 (0x4) + mul r0, c9, r0 (0x8) + add o1, c9, r0 (0x6) + mov o1, c20 (0x3) flush end endmain: @@ -47,8 +49,8 @@ .opdesc _y__, xyzw, xyzw ; 0x1 .opdesc __z_, xyzw, xyzw ; 0x2 .opdesc ___w, xyzw, xyzw ; 0x3 - .opdesc xyz_, xyzw, xyzw ; 0x4 + .opdesc xyz_, yyyy, xyzw ; 0x4 .opdesc xyzw, xyzw, xyzw ; 0x5 .opdesc xyz_, xyzw, xyzw ; 0x6 .opdesc xyzw, yyyw, xyzw ; 0x7 - .opdesc xyzw, wwww, wwww ; 0x8 + .opdesc xyz_, wwww, xyzw ; 0x8 diff --git a/examples/gpu/data/texture.bin b/examples/gpu/data/texture.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a3312bd8d77116a79b265fd9a6b02514845fa08 GIT binary patch literal 65536 zcmeIb3D{;ubuRjSf43mwHK7}5rreW9NltF=lQ<BM6Eb$Lka|-jisAqarFZ z5+;#Blm=v!K}A6nh@#9$Gj~IGGbm~RnT!h94Ky>>ecyMjw|0H?e{Cm+oJXG=ny08; zRcm1BM$-DP~oeP#5iuaxl>50e=D9B} zqrZMj8NGPhGFtbUW%LY`ZQfZ%+rFoamV^FM(A`zUKfVGwuL4%uhA|VQigem9;*&SBD?=!}g~WH*Kd6Xggze32NbozZp}u;gjCpzCFH;F^Av9 zpR&}ZKha*y_@P{hW8zjk(Pk_ew@cw4jE#N@%vbT4W67hK;+QgcOghS_?WFY>$Ek<> zS$e#RzveS&VL!2tU<0uVuk3*g_=52izvc_Z;j>A3AAsF)t9CKg4ZY?NeMs8svuro~ zmOmSR*yQ~z;-9fs{Ad&LhYhRe2T!wpV!i~g=Y!T0W0(7tY-)~eqvI!=WCLV7-n1Wm zZfh>JeVi-yWgZW596!)-&7)}k6SoTbwEoei?Ti`s0__KEn}47m{J}oZYX3(JHD7{1 z_5;d9o7h{MaqM|&oVLw&w(Yw2PxVL0x13sYUlIL^@^tOjwm<6Fx!PpZ& z=&|-`bL^M1*FWdG;~ndt?eL}c6xyHjRhVc${lRlp6IaekDHjx}EAKV)(oJh7gl zEt*D-edl_J7kFW}+KxTrNBr9FVKZYWdr-r6&WC{?m^gnX{6Tuy(##jPV?BU}b}Ig~ z7y7iHp}vys!3G`Ej`T~lm3^ekBHy+1pW`_<`kFige%hb&zwZCEpZIBSlz#u}_dChc z`2c!xK4|Tg%{hO50k8R@q23xh}O|SMOzv#i^{jinUU+)Bd zt_R{r3^IO>m)kM_ajl^B6TZ{@$vLF84}W^T#C{2xIe*xeOyX+hzxM*i*?#D2*}yfy z`Ow;Ie^Ty*&Gbq1dA_h-Q~ZU$x!$4uv-Ur&7huu+i8M9YHsKCk7HG@f714L?#c%C+XuWRqTkdW{MP!fKUVznT7}mvlk)+xnt16N zu3pb!3^qL-*E{Pf{#-LFbR2Ey&-p+4F~%7GMeyH`L*|Ib+*1&LVa^z9KcEdw%z4b* zrd}f|_H2XBw&%Rj^^cF~8?F({c6~YW0`egTdOEPLdkbig$IZqM{%*dgW+#9V2uJCOq+=TQ#CT#7jmb0y|XuX%SFz4q^v3$69VMP>A&S0M*_ zW`h?#fDQi#@UKUI$R6M#c3$%mE6#s0A8?%We{}Qx<+Ph_E2Fy}Dl;EDp=Q%NTy}$w z$3J_pjE}wmc0OLlA3dPV?De&>@6ub!Xy-%AoE;8PuA()&2)MZx=nuv%YvpRp@7}*> zQ4Zv}K!0&vFMq6zcRRd{uZw!L=TT+TgHJB37aVkQ886Fz(H&MsdwjlZg#OVdk5(Od z_4&xd;giohzeZl2`Sr6i|9(!^%)3X|uB!RD@GgEJa3c;g7r+&yj1k9iLdKta!jmY_->-jH7-p~C0T*Tc&E-4=>=^rx>Z zn}Bvc`eoZ6{bd=y2Kn|&USHNn--11lEbH&G5aqD4>47Jfwcha|@E%b%{P-bdqkWDk z^D(yRNA?5%5oLYIB<;Li7fSv!_d2F*y4{{-{FQ5PzvBF|;G+kY4R$)X%-`$SvhfZF zihcv|%-{9!GLL$2ykM8ll})zWtE_YIsb#|*4k(+wbH}p&UdNZU&;Nc|Z|{>}_k(3- z1@56-%X>RZ;S03iw`&<=E%_Xwdnw#I-t)0CUIM@FdjjVFPa~e1r}x9R@G0l2d`mw! zg?*aCrirmxd-I8i`_hNY=&bLT(GtYOv9c`oVXqo3zCYrZa$8;69~s+;vB|b+%-6PK zZ-uSA=FIEPR#to5^6Oahda%2tZI4OYwslOr9qyLKx?FqAwfpONLH;I|JP*nr>3`o} zXnaID-f!xjlX%?jafi-~t!XKfC2j1_Ny6)K#x;Fr9kcBjd?k$5)BCX6n{z95SibS7 z_Ic7cvfa{5($Rx^$>1F_qdmZ$b(t=9EwTCd;4V7^*Qf9M+Qj&)_4Djzu6r+)h-eOK|;oG$Upw&an0 z*>9f{CuPx2T_;ca!@3nG>ZT0Iq%UUE(*9xG{&alp|Bzu@xF-Zp$get6AKRYCUIT5P zc`ILHjDE_FxP=XkEZI(-oE!833VlGmj7yeQu8&1i{a~AeM>LS9xsdS>dgEX{_=NPc z#=S@8TIltX-hG3tLB8T-`xp})W1M!%ufUq)@(b~B4m2?h`$Lc9QD>vqeia?@(qG0) zI{7o_Tf-;Zxz2QNsT-U0ZTM=^-mp`B;O)m8H_0KUM!x#g?(Cv3nli!D_T#wvqUo1U z8S5_BnAIkKgNHh!op`){vp;lkUZtJ#abj)doNN~UXQhPdmQlzAJ`8Quyfj-a2lqZEgAV^YxFE@v+E(mLf+w>xPLrDvaDi zm*JY4Sdjyb{%#lLJ>$21sEq!fts@6|!OM|rzopFI^N2F~t2f27#}~Y!%-ij-vI%mb z4UuPTu*VU|TafSkH{{WOx}|cYO%6P%jQ-@6;CTabrO%f+ulw(11LP&^?+RMxOFZ7= z$g(MNrS*3{40+TMWxUnfkpmrFX7)X?Z2F;nA@eZMBfmm^G#@z-b2a8bx<`J>HD$98 z?pZcO4#Zq&edIu--2k$Khq`!-d{0E zA_s!~f3!uJIpbP9TYX)bdGAihiQqTJ3HKs+zF7MmA1kkr*@9_ zW783i)-r=%u)Km zGTDaAUBCCQW!D@3wcK+j?lYoqHyrPdayQy6_uqeidEoy0%F6p9CV>ZX$dCCyx^zX^ z_}cr*W=jL(j@RE+c20S>Jp4$x<&h`KefQm8R;~mF(xYs4?aHzViurlG$@A3BS7VEU*p1yBS<7xY5;cw#4Inl?fZ};);<@NM^yT{wNTddQLd-}dT zji>E1ey-omu;%OhXZ)tw-_7UBFZ$%ivka|=f5ZIg@%HT&>$KyZzHd+CY5RaOFMTlSztz~^_^-#?w_B{!j(hsPJ&mXB6JEv(_SXIT-x0s+KhJ+&Q}KL$%@aB= z$Nu>X?f(xx^iX-vSr_4&{E4#ctgn^#f9G zaxI@V&;M@wnnks)Q-|vhU&HG90+^>=7wMYD*LcoNeC?NYdyU)6_{VSK`49c^&!?9U zoP%S_TKc&B_vn2q%2JFG^P?+Qln;Iz@2@U<9DQFe|M;DOe$tM-PSQONyfeypq`Z)i z@w^^qtjufOq9>29-FS`VWA-;Z;%&yni|aAVR6EBlBkF;M?RHFU>+pRW%1pYRw(9EQ zH^}YeGfs*XYCK;X-GCg3?W>UkAs3p1^Y@s~Qjr6-_@xJQZ9X&y?=~<88e#n?KSeGy zVh(gA`e=89ne%Ak8MkvH;bt6w)4pR4WnWWH`eU#O7+IvP^Lpc`ax?$;xbGA6;qpBn$qwU74kZ0`p=vho+@ns0tx*Bm_VY1wY+w_^^D z-1?eto>6q1`J40YM^-@Ix6TmH!z=DB|AYBE=$D)g`m-;cDu0^JU)9&*dH5IilpRc(xNGZu=Jt6p)A$QtIupws?}XPj7D#M0rEzbz-9&2sIu{o?Cv zk(|6wrTbcvVOv92*vq*{c^pH`NXJ4PT%+ped?rm`Z*_<7$U|O^H!{RKn@@C-m1{5U z(Yp!k&!TxJe^7p44}8Xvea0nwL!ar|IWQN$(Pv)GJX`rSWrCNqtljV0c7|Ok7kGqA zcp9vUul139AzOZh{3-h9BtGZWuKV+h`(5|rPquBju6udzcioRa)&9-BHv7ddW?w%s zhkf+XN6RCRJW{@X`qyS%pFG)kK7?mX-~Ptw<-rFZESFz!-Yog-zx2FwG|rkBAA9UE zl_ghRE^Ng2k-vL)*=4(bD9f(Avb_K8Z!YhA^BcwkKeP9V!4KXM`$)@zy+dUw zuB)=m{hPM35c@3rAN;_8SNQiu`!(fxTfHXjY3-lF51F*}x!{rgu=)AqM_D)J3GY6z z7kE|p`>dudTZx%_KF<{me%Z6niDd@<8krxwlIb>VrHnXNkw5(5_Ui2471&v1uki!( z3vm2`R{=k4p$+CYecvYk3({ZYdBK*i2LG$YOaAe`h@I>m;y2US&seqi9nW}{gzprQ z7VnN$|FMl?f6!m@S&jdsydJOHyQlquciW2h`CAb`+MWHoZ~uX^xgUW+h?6o z9>ctP{#M(T=WYGk;Qt?=Dlh!M{-AvGv{TER_wHQ&@BjK=%NX;O@iJb<((z|s8*jxd z{h@O;&)bN<)=S2FwBMJ@3wHb`oNwaUwH?{?=M?ic5B@q zrtMYSi|7yDE7*6Ry=B{z-Jg2?Gfw%kaX0*L5$?{jR@p;_>B~cqjVnC!j{oq>J#CA4G+4x!G8&v=59}a6`%~VK#}Vgb z%tS-{lxd&v`)hcvKCaLA+xP!*ZoAIosGa{KFOT~G7;j`*W|f~~ZNG9zVqo4!45@dV zI3zdodp-Zh`v=0u`O4p`EB_z#JHK|_=GXk4z56GhzvJDId_LdiZN^sf*u)rd1lEj| z+O!SpEbc3uTkk{ozJhD(HIC4W_fi`DArJB$6Y7P0w|Q)xLktmj`VkoF`X!&{!Z3eA zPOcl#$8jq&+Y@oc@2c!#QteO8WQT`vb)>rC|@whMG`vlZ)9W8D2rwuJ` z;iB#+loq|ba83Ok}>-!F_omac=&lm1@-H$)nw&l9+<+=ZkJMNfzebm!G z)&BKf7p+<~4%>do;9S?p`|P(IpqZ}E8BuRdbo{=oesiuNAZCSKCCV_n-m zAdSx>;#WKNo#5v&Wu{!76G+qPsk*$!(uQ{b29I$?9X4|B(|Lxrq#s<1ZkjKsyOo_Z zq_>WWhggZdqo1Xt8MlmHzwB=C)b~jfyY)*K^-jT6abQgM(OH|e!(bd9H)hLKzj;wp zwsjkOYFk5So3*ad+r4t`u`Gnx5q1%Mpi3N^P}|@b=A3Hy*Zai z2l+3iUe(Z(a*d<*3*W89dG+PrJacM(UB~|y<=<64t{Iq<_4{5O|JHPTJ^fSdU+?+P zd9~|49`1MDk3ZSA<+|?Wx!<+&YUI~vBEMd@&;PN`&gA`py^hwn^MCCd%Ky0*r|h>{ zheLm?!Ca&5ItSgECo`}AeAY)r{tvyr|8VBXCn*09x*X>o0R6FdKrZ(y%Jo|3xZBvX zD;@ir{Yd9WwFh3=MmpMHIoxk_-0o$u-wt|Gr~EM-x8uSAIf18N89uuWi)Uf7*D`Z4F-t|PzZ^LN|_;CJ|tm+QGdYrTKpJb&l+ z`5N8EraEycKLw5ZtlxO7SKphc@>4(F1K_yy@;w2)AK?2Fd_REC|Ks_3`+UFszMlJC z*Sx>x-|4d^K9lDsUYBrw=6s=D)wY$hp8wapUhe_${sZ1G!t;OE?Kr>Hpty3qq)hw8 z>oWSGUwRuqdTyX!*G?Sgdpy1d;hg6f`_X5=D-N0qwkzp!{+hPEri>T|#!26xk2rE} zu?~G9Q+^V@PKPjsEtY3o_H(tPk#8Gh*M$9p{1^+_ft&U>c=A~K5*sl~Hfrb9b8sJE zG0xfU*Brq3)A08fZoOhrnSA(0`Wx zYA1s9b&3RXS$-Dm)iu@%hE{eVeo_S0}yO z;=k(3^46EWw7hexSCrqzcTm=Q>j%p_wtPi-C(3$n{l_v7WqjzFW&DwY%lNx^KKPZ3 z%6P}mpnSHBPx~&)q6vEPkeBkv%l2r;LqU&n+9gqNtiEjbc>DGQUf9sLd-}dz>0v8v z?%Tqf_E!98&wJ|!gkRV{n)pwJ4IO^kYJ2Cs_5C`w|7O|$)kU&>)X3}c_U#G0u%Wgy zo;|(uYt9=!U+?MpJnh;mE-hDGeO0;Q)RO}*Y#>(IkK=b`n=xdp+IWTij6dVKXyAWK z@W1<&e_h`E@!iV$Z~n)!)`va?UeGfBr!EqYWPM=o}MWc*A?4ZTUcMY-)PloVY9@QROR_G0RI_CK%{Uw^zLmSO6pP*kh@tkxipX*Mx z^-b-I?=1^&`9)cHJATgun$O*EXF2qSyUJrfy}dlP^5^A|haWDh@LSsZ@Gi=L2Uu*k zb*Jp0v7VM^z}DXjwp`W&e;hj&vg0Cri~8bw%EH?o3cP9qzuFI#haY-a#oy29+oOvx zCs7vO_Mmu1uwCujArE~$ec!GzlV4kV;yB_I*DsF4U{A*0yt)oy-XHn>OHkVnAqO&% zcmLTU+#k4NRe9m%_*QbY7x;mB(w;Tr5Bl^Md^Qi)`=|Zr)^frvc*ptPpN9R2xBd6v z%Dc-7JlAL8-gSBA-IuK>p+A@uUB*R}I%<=M{vxt=q>XaBl;;r;;gaMY1^ zV;wvH&hyT2-H$zv{5qF#zvI5?IRvtdPs$fV7@zyIHS-+v+h z4q%`EpIb)R@B09QPeNCXKVs%NRIiuX_z@rHU-~uXNAk4uhdi`_wh+5)i(Cwzs$1Zu!*7W7j$N)TX}dw;b8E z4A)#YqA0eY;W0{Fi_`Ah8_8eyE9#*Q@OMj3KV@68<+tFot;W*E(RhVVdO~*Hms7WK zhi=>IIhpPBMQdx-X&V~9w0N~d7X8)@$iDD(|^Y`=b!mNGIX)jqaG zKgg|kb54ZqTyv0-CHzF+un%MWottfHXe_JJVN9`Vbeg8()!1bG?B_v$%q{N+t-WCn z^mO*i4%iz!#GUaXCib^=&igutX(Mr|KjL&gNTNwJ&oowjYB`%2GZq+Tq(= zLEpFk?bqVd^!0tt3*Fzh-M_lJr|;XV-Mt8kNXmIFyKy9A$DT^41AMHk;Fg1Ms?z^t9+>cs1(l3(T$j}(u8jrf3 z|5Jwb7^~Z=Eth_>PWPp5*+4tk4_)T3{Vm3%3k>9e?UYSk)~0psevaw={v+ad_|Cum z{e6DlFY|B4b;Q39$Nc=@%)e1nhvn;-vaKg;_nTJNMT!l4r~3*=pyqe}fCF>5%CGu4 z&c6f5aeN;je;*Lf|9|!K^|&u^Zv9Sw&AaRK|G*p9QR#2suKwfvVLPwUXoG0eZr^us z?fVPXGra#FIQwz>YS7)pJLLzC>}MbKiwAwE6>GQ6!?wmUHrbAA@s!1Jm*lf-$Fs`t zctbB)>__S8gfG`eov)gc4KDJr5oB~6}mTRZgQFG(Q2egs$T$8>n?5BM0=f)t~P9JH&7kPE~ zE9PEcu`eSp51&L|V9|L(dK!Oat}C0W&d@7fonsOgc}e2}o7h&qEI)9JSO%IHM>~CF zddVR_?V-(<&34kZec?a)ZIDHou@1tYRWJLAv6EB#+HunKC3)+80N%rBF>+jZfqR-P zjmJ!I_Ohn^Q~T2et)ai zN?!0E02#6W&5NH%lIVxt@H2U*))p2VK>p{{*L#?n)4ko3J)-iPsHEDI|1Jy z`#$(lbgl-*@l9x>=)C50qR*3*r}HJx5j>Ch{K@kPkDJ_&*CUlCXh;5=YNX5SBk6@6 z@z--t%AU|~9dYiB>jmqNy6WS#j?UL9NADUZA91E!UkilpY1aTD#~25DZ9gzI{tX{M zC+*1Vt=gA<(=`F@wQa6>PG*1gY2c(yrj6?m(hPJOGoSR*?->Wn=D3d=`zLf|e~W?R zv{@x}w8!GlejnSX@>Qp^(o|g`+x^l_d5j%-Jx5HRdA0NKiRb&w@A=+6|IVTE^gjRR z`~Q=3(t3@xZ7b%Szd4V@E4f(Hs3(43uxGpn;QJ4Qe%>D#-cRszeb z-?gSs>>JwRc(Z1l=sU%$jW_$E-+90FGw+}M`-9>8099|^e~9%343d{RCgy+WaqQ&R zj9vAy;v2l8lTB=UzN3~6X^-tq+bo-Pdz zisT25mRE8~!{@S+k@P7qu;=`?EbFK;>F0)zKId4<(Ef*ZuXlpS^yJC4RQYrM#QqXK zZ+%+rC9cF#?P5NZdrXWC*n~&xj5 z7Q9;b;ICsmPMMnLup@i{jMmFqItQDyCm4tKAJ&%L)D^GfS@$5n?#uL{Ws;w9upDDi zKX7#U_XK9j$h_RO^KaMv^|149*Zue;uFzled)!|zPTMNI>`xq)>$;ck{?*~@>7Q!< z?|naj`Pl{MenR+1ox=44#m!SWc zi=sXjzYn_>?{0COqUM@8lH>S$;89S3x$~u^!P!eiqU;{No?sJLQzgeI<+bPwqw9)6uUzG33TRFzkcT4n5*0X4)m* zfsTD4AAOYPy)yT;0c-4gtt=heeqv{7>lHHP+a`isTIkC#6eswFKrWM z*YwFC!#)i^3^HmggeT_+?eLmoTU7QQ)17VP!I5$tw6HnTry2YEvluKC=$Mk1A z*77&@ST6ZyGOj(QikUdfoAN}9W9#jCWtVAL``C6nw$`2F(n)!2Q@TR07o4{hmU7{{Zg+;68x! zZ`VV9efE_1@6XHo0;}rx_v9nTiT+@G<-?rUjX$g3JQqN}dHwzXuIWC1ZoTh79Q-=~ zt9$-maWZ}`i9a<-PtNZ0(Ys`(1Yuv^bc+yXt8x#7%w*#hVJ8#zB*V0=* zZL%+oE!$z+fJJl-ZN){ovenqK?&Jjx_cqH!4PNKPEe@|I$4Rz>FKA+ogs;+fK3}A4 z$>km@n^Pb8vzWKG$ww^7FFkFWe6**LCt9{Gv(}s=IVbEd%N0!%@8p@ZJ$0GKeoWqo zdqd-SB>K(<>Q4;JtNC2N)(tkrrnx_G)Wuhn1Si? zW0618Kl0V$hsx34!E+_NXZemJa2~7nMSPUEoU$zXaqj)WvTOA>Gxyo~6XnQ557oP+ ze7|(zA&01}_505&>;LhK%IKhP;5>O$nfJ$kt}@yk&p-D#c2<7X=VLwIzTMOJ?Vi4G z6KjjNZ_j&f;@$ID;jQ?AeYD4Mv+Onh;c5RQ{jp{J33{nDj1uRiiiUqPN7`M2gY@@&rC z=)bre{gqRdr%&YPnD@qO{QUc`H(YydIRbO}b9?VuX2t)xFDk$PvMs~@osTRhei_%0 z$p8Og?}P9>7vBWIcQUBg_2}>SE~Dd-_kZNTGXDBf^=ps9?|!@|dCwA`_mSKa)JJ(- zha7A61Dz{HkNu4`uLV@+UQa%aM=cpC%g5R)DVqiUh<3C0fnPW&C$IA$-N3R#_!`@?6UgNz>w}_siW0zA6DGrW6W#|>+t)aGyJ6URD)A-upg+C$1QJSlAP2t@tm`>x8gSb8t2q6 z-R&`b7V~j}ctzb8>!7V72Uun{$+YY+}JNkEdU% zF42Zv_IX=N7HOIi@@eaUQ~pmK<~PrT4C-%m>KcL9AlYv@HHWj0TfRoG_&C>6x7wsl z8n@+liP_13*gvVlRE>1)UlR?7?j z+mH5P zp`#4P9++DGz-!#$hbn{iBxd)iZu$am`rP)Cj~IxTy0eyl60^oRMmtm|uH+3K@`ula zAH3r8Gr}u=G4M7zE!*>IjPDoo@6gWNgmWRzd!v6kw#IX{SoY5x+|~_k7$RzEW57{9ci z--2|!`29NCRNMGAHvJ`j@<<;12U&yv9vJAs+w%AJ_U+!@zTMOJ?Vi4Gj}b50OT4s! z_(yvj5pDWIcsu-f=Xfo|b95DQ=#!9h^L=N&qui|9VO{6E_FDB?WE-W&`{o}#P+qz0 zUd`o|*IzwrzViOVHF)1!_aT-n);i$Y{co4zH#`j!v0~+dp!nC+fN*B(|#V;I_1@IjCd-|PMR98X{AvYuRlL=oU8W=_}t$(tUvIbU%yKj_)J}bk;WK+!P(`o7Qjrs~hWMLHa67c6yCBHuLuivVU01;;~P% zo~66@8|Y?C%ekW8K@9qh*|o4)hkd}ciay!lG408~$ZH?T4Vn5|5~O8Z*`Mb-<5XYD ztv-;v5;M+Dh5!Gd=gu-`Dd! z{E$nX^d;z;=l>jEmd{GB%l82K=ikeg#QOoO8y5!JNmUA|a?~kb)qD1Q zKSgUM@1Y&_CG;PCVL3|g2V9KzTJU_HYmI-;0(1UX$zPNHWs8?UFZ5gfF(IG&sfV?2 zaXj(qy#THwzMsI_=QLshy$bs{*7oJtv_9!;=q0z~b)WVS%R=4C^KsDnoI#qFE^t|1 z#cb@B-RQ6Tocpxbg#4i4-o*BTvm>4&|6gLT?D(H9sd?Hv5fbM&O%I3FmVU!Bj0qlw#KPuLc?hdJl- zbjvef?qVgIfv@pb*wy%wJ~SOMX3PUeqgQ@_EcwO!9Cxg2yL}z99&rco4djgGquj3Y%>~H6p=4eV`vKXn`ve$|yn1$i-{;?vhdcisp1((2Vs6BoiMZAE=or=-ag)uA zAD{p8x&QEeLF;wx`xCCC-}eXdTr1fbyT+ewyu*(9h_UVOZd`1Y=fpf@5)AEuGqsK(WDI7%WHMfC61Im;iG1pJdq<)A9!Ys zah^bz?vI2_`?amfJE4O-#M;)at5?mp^XldKp1<=MKVQEQ@BQ;xPd!@aH2EFZ{1KY=t(c|sgvLD|s6aF@Sv>&CX@7sL8oib+GpZ+uc z4QVe*#1H-ecHwutXm4XbV>g3#jW_9A`{h5_Z+_CFsJ-V2vVEHU%At|J-}naJ)4v$M`$yi*{Y>?W z8ZX+;IZJ!q_>vcujsI%v^2~Q^Kj6=J3qQV@M*GJHeWg6-Js&Eg6Yy^24*0J5aTi41 z&%Gr7HuCrbdzJBpca)jS?#1;E-e*`6f48$a!{<8fcD$*Xh9z*F}*jJ163SKkyK+25T% zCU_F3=sMXI5Ao+%^aVcaRBd})r(DxVJt?Q^kA!jJfrDZd!+J?(0h5;^eVyPn<1Z#cQ5v{;_SWla4ZG;YmFc z@*7)&cXFOtj_TCi^0(veZ|#g@$Jc%w=C}EMJw^;VZgI8#Y3z%0aa?27dr7vDHTRX( z$_YCw=9DX+`MA+FVH5iypZTxm7w0i^9~O*_mg9Ld-p!i%9&(mj@wbAJ|1g(%+FH+= zpcjvABQN(B&@u;F8@bY)wdW%j!ZihQs;9l@ z$NOXYz8;^$EC0{;@0WG?x$YAnKeztOGd*|JPdxMqaoogQb#lFk<~qv9`GCLg-yJyk zeShe!_aSuO;mG_xK%4&uo%3(!_sp;T{Cx)J5ckLL^yB$F|1O`u(--)O5q{*iH1T_g znG0@@>$ewe9q}Uu+HyVW{bBRFb}Xa)aF60u?}Yrz^x% zk>}iF@eP3&z5?fBoC{xu@17DD{?5Df)7PgjD|0`35ONBf4^LfM#_P;4qh~HCqpkn8 zj9&fTGJ4)$mC;|lp^RVtmNK)&>&s}1*Or+--=>WJ9OvhKk1OlE_Y-BkoewRW9B@L} z5Pvsh^e3+r|IADNri@?pt}=spHusF>W%P=-l+l7`m-Ti!q&(yAb}Ae4y^H-%D05!? zI@tY=vI$}|^Xk7XzrEu@WdYuwU9j)5W&SRQmG$v`R@NKrdU%tSW1T@NiA>~eV7 zV7qeVXO5!=?fhL9q94a78}52Ung8)a(Z8^42%PMjzw1KKA66FN?}2Z) z09dvE(N;>Y_1h$H>THOiWE zb7FmP+-t@2HQPzw@^a43T>KOKb`gKiektxv@Nb+@p5=ez`uJv%`cijdFc#Gu@A_?n z<^bn`a8Wk#02}*ILe9W%n?f%6(oWXY0bPTQVH;_KuJMccY>zQc=nZ+s8dwH>wGHg{ zi}ct(mRa==*nE$m#c!LcT-w4yyRwMBwo`9IBODEF)3ZMoFLYXtEn z9mm>n%4y}2CuP%i;idiTPyFPUKN`Nluf~PCx1YAN(00y&)Z^OzP`i=seql)Lp>J|d z4|J3LVK05r`pW)j@N&GRNxaowwtqa}AN&@w95d_D`=9tNz?k3kn`RIDpig$u7fEA& zud9~V$H9{}a2~g~EpwtT*HHF}KG&V~TDEKNJGE}{Vt$gY&bbDAi>1mUZPElEvBUO; zPqf6Iw6bx+h9QRZwPjnL=qs-DtJ|&{4B~VDwCm^ASLSQ?7h`wbVl_SL@W+5X_I1a@ zesnBby5O}x9fzrYGY0D8G5t6VzxSx5mk-RFJS`uODQ`|o-^ykTr|PkNF<#@9{;P5K zp3Z*Ck*v@e$HTm-JdOUiPD^@V6qc&PKDAE*6aDA8W9)5PHdBXfhCCl@Ec;{6PhZ!* zwA)xLBikd~pB~}<^~~kSRo$1mM!3JtZv+CFWZ*TfD9? z2O6!jUKv0A8D+fnyOe*;+xfF)-p3Cv3y>>qgx>}}{k;3yS|36^LJ&G%Ul-qGd=|$o%#JT!tV`r?ztv@6?IA`bokzcRw`Twf=UcU14BQdY^{y(rS{8r>@#G2P1I!|1W@4lXQ8|;7| z@pmgdN8?&3&yhS|UXJSxlsP9~Sw?5yQua9>`S83yKz{zcGQ!^o9dGmYGWWOsPyT7o z@`*2gO*Q{Fv)P}P(Q{r{{`tn6%4pL+5-sWHuJ>P5cs$X5_6y5sbCf^CH-rB0FXH(3 z{v?js{@U{L+xwpgoWS#~TgvbJ{Z3`G_k0ZN7ykSq^4_HnmJxo}I>P<)@nyKC!M&)_ z4Tu4+Z7#jH{Lb5VDC;vukAe1alzULzLwA9?Tcf(HAJTx5w{xjbGxQum>?UzvBP9T^sgS|26S%{OA16whA-t&7!r) zW7-S6rfb`^k9IY@ZGZT-eVz$f^v{~`bM9*XHt}olSAP4el{XLmkiVMwPTnT|!iei5 z+3o8j+Tpn8v24$HYK+J92jj_O+nw=kZ4dg!f0D_$(8Pa@>vs+Iua2L-YyH*xv$ZL0 zu2~v>8IE~7jtU#9IX8|{uA>4d{+D? zuA7O|*U`jIdr`|zI_4Tizx(J5*@nKX)#fq%Wgpab8-Jd^hW&djt!K1lZ?}I>FMm$$rik`;YQ^`)lS;S_;h}W*dyM#)?@wid`5fv{5E`K z8JY{&@5U^aPusZ%u5tc$`(N)5^V?r-o9lnVU+KSg{uAJ594`+d`n^B>lKo&Q)bk~i!p!s0c+ z^-ipAr(bA`u)Cja?Pci?K3h;9=LyDcL=GaGV_j5x=>zo3H{J_-ynVa3m-yTH+tc^$ z-hW+t{GD6M=(J_{dlk%U?ai&j{S0lg9=j@aDJ^sF*-v7t* z`D-7>--lU<{Ga(b-uuV%;EnkmfIeOm?L7a{X0P{I_CH!iCx5rhU4Qd37w +#include +#include +#include <3ds.h> + +#include "gs.h" +#include "math.h" + +#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) + +static void gsInitMatrixStack(); + +Handle linearAllocMutex; + +static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; + +typedef struct +{ + u32 offset; + mtx44 data; +}bufferMatrix_s; + +bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; +int bufferMatrixListLength; + +//---------------------- +// GS SYSTEM STUFF +//---------------------- + +void initBufferMatrixList() +{ + bufferMatrixListLength=0; +} + +void gsInit(DVLB_s* shader) +{ + gsInitMatrixStack(); + initBufferMatrixList(); + svcCreateMutex(&linearAllocMutex, false); + if(shader) + { + gsMatrixStackRegisters[0]=SHDR_GetUniformRegister(shader, "projection", 0); + gsMatrixStackRegisters[1]=SHDR_GetUniformRegister(shader, "modelview", 0); + } +} + +void gsExit(void) +{ + svcCloseHandle(linearAllocMutex); +} + +void gsStartFrame(void) +{ + GPUCMD_SetBufferOffset(0); + initBufferMatrixList(); +} + +void* gsLinearAlloc(size_t size) +{ + void* ret=NULL; + + svcWaitSynchronization(linearAllocMutex, U64_MAX); + ret=linearAlloc(size); + svcReleaseMutex(linearAllocMutex); + + return ret; +} + +void gsLinearFree(void* mem) +{ + svcWaitSynchronization(linearAllocMutex, U64_MAX); + linearFree(mem); + svcReleaseMutex(linearAllocMutex); +} + +//---------------------- +// MATRIX STACK STUFF +//---------------------- + +static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; +static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; +static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; +static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; +static GS_MATRIX gsCurrentMatrixType; + +static void gsInitMatrixStack() +{ + int i; + for(i=0; i=GS_MATRIXTYPES)return NULL; + + return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; +} + +int gsLoadMatrix(GS_MATRIX m, float* data) +{ + if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; + + memcpy(gsGetMatrix(m), data, sizeof(mtx44)); + + gsMatrixStackUpdated[m]=true; + + return 0; +} + +int gsPushMatrix() +{ + const GS_MATRIX m=gsCurrentMatrixType; + if(m<0 || m>=GS_MATRIXTYPES)return -1; + if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; + + float* cur=gsGetMatrix(m); + gsMatrixStackOffsets[m]++; + memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); + + return 0; +} + +int gsPopMatrix() +{ + const GS_MATRIX m=gsCurrentMatrixType; + if(m<0 || m>=GS_MATRIXTYPES)return -1; + if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; + + gsMatrixStackOffsets[m]--; + + gsMatrixStackUpdated[m]=true; + + return 0; +} + +int gsMatrixMode(GS_MATRIX m) +{ + if(m<0 || m>=GS_MATRIXTYPES)return -1; + + gsCurrentMatrixType=m; + + return 0; +} + +//------------------------ +// MATRIX TRANSFORM STUFF +//------------------------ + +int gsMultMatrix(float* data) +{ + if(!data)return -1; + + mtx44 tmp; + multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); + memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); + + gsMatrixStackUpdated[gsCurrentMatrixType]=true; + + return 0; +} + +void gsLoadIdentity() +{ + loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsProjectionMatrix(float fovy, float aspect, float near, float far) +{ + initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateX(float x) +{ + rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateY(float y) +{ + rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsRotateZ(float z) +{ + rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsScale(float x, float y, float z) +{ + scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +void gsTranslate(float x, float y, float z) +{ + translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); + gsMatrixStackUpdated[gsCurrentMatrixType]=true; +} + +//---------------------- +// MATRIX RENDER STUFF +//---------------------- + +static void gsSetUniformMatrix(u32 startreg, float* m) +{ + float param[16]; + + param[0x0]=m[3]; //w + param[0x1]=m[2]; //z + param[0x2]=m[1]; //y + param[0x3]=m[0]; //x + + param[0x4]=m[7]; + param[0x5]=m[6]; + param[0x6]=m[5]; + param[0x7]=m[4]; + + param[0x8]=m[11]; + param[0x9]=m[10]; + param[0xa]=m[9]; + param[0xb]=m[8]; + + param[0xc]=m[15]; + param[0xd]=m[14]; + param[0xe]=m[13]; + param[0xf]=m[12]; + + GPU_SetUniform(startreg, (u32*)param, 4); +} + +static int gsUpdateTransformation() +{ + GS_MATRIX m; + for(m=0; mdata=NULL; + vbo->currentSize=0; + vbo->maxSize=0; + vbo->commands=NULL; + vbo->commandsSize=0; + + return 0; +} + +int gsVboCreate(gsVbo_s* vbo, u32 size) +{ + if(!vbo)return -1; + + vbo->data=gsLinearAlloc(size); + vbo->numVertices=0; + vbo->currentSize=0; + vbo->maxSize=size; + + return 0; +} + +void* gsVboGetOffset(gsVbo_s* vbo) +{ + if(!vbo)return NULL; + + return (void*)(&((u8*)vbo->data)[vbo->currentSize]); +} + +int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) +{ + if(!vbo || !data || !size)return -1; + if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; + + memcpy(gsVboGetOffset(vbo), data, size); + vbo->currentSize+=size; + vbo->numVertices+=units; + + return 0; +} + +int gsVboFlushData(gsVbo_s* vbo) +{ + if(!vbo)return -1; + + //unnecessary if we use flushAndRun + // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); + + return 0; +} + +int gsVboDestroy(gsVbo_s* vbo) +{ + if(!vbo)return -1; + + if(vbo->commands)free(vbo->commands); + if(vbo->data)gsLinearFree(vbo->data); + gsVboInit(vbo); + + return 0; +} + +extern u32 debugValue[]; + +void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) +{ + //set attribute buffer address + GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); + //set primitive type + GPUCMD_AddSingleParam(0x0002025E, primitive); + GPUCMD_AddSingleParam(0x0002025F, 0x00000001); + //index buffer not used for drawArrays but 0x000F0227 still required + GPUCMD_AddSingleParam(0x000F0227, 0x80000000); + //pass number of vertices + GPUCMD_AddSingleParam(0x000F0228, n); + + GPUCMD_AddSingleParam(0x00010253, 0x00000001); + + GPUCMD_AddSingleParam(0x00010245, 0x00000000); + GPUCMD_AddSingleParam(0x000F022E, 0x00000001); + GPUCMD_AddSingleParam(0x00010245, 0x00000001); + GPUCMD_AddSingleParam(0x000F0231, 0x00000001); + + // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff +} + +//not thread safe +int gsVboPrecomputeCommands(gsVbo_s* vbo) +{ + if(!vbo || vbo->commands)return -1; + + static u32 tmpBuffer[128]; + + u32* savedAdr; u32 savedSize, savedOffset; + GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); + GPUCMD_SetBuffer(tmpBuffer, 128, 0); + + GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); + + GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); + vbo->commands=memalign(0x4, vbo->commandsSize*4); + if(!vbo->commands)return -1; + memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); + + GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); + + return 0; +} + +extern u32* gpuCmdBuf; +extern u32 gpuCmdBufSize; +extern u32 gpuCmdBufOffset; + +void _vboMemcpy50(u32* dst, u32* src); + +void _GPUCMD_AddRawCommands(u32* cmd, u32 size) +{ + if(!cmd || !size)return; + + if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); + else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); + gpuCmdBufOffset+=size; +} + +int gsVboDraw(gsVbo_s* vbo) +{ + if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; + + gsUpdateTransformation(); + + gsVboPrecomputeCommands(vbo); + + // u64 val=svcGetSystemTick(); + if(vbo->commands) + { + _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); + }else{ + GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); + } + // debugValue[5]+=(u32)(svcGetSystemTick()-val); + // debugValue[6]++; + + return 0; +} diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h new file mode 100644 index 0000000..6976fca --- /dev/null +++ b/examples/gpu/source/gs.h @@ -0,0 +1,59 @@ +#ifndef GS_H +#define GS_H + +#include <3ds.h> +#include "math.h" + +#define GS_MATRIXSTACK_SIZE (8) + +typedef enum +{ + GS_PROJECTION = 0, + GS_MODELVIEW = 1, + GS_MATRIXTYPES +}GS_MATRIX; + +typedef struct +{ + u8* data; + u32 currentSize; // in bytes + u32 maxSize; // in bytes + u32 numVertices; + u32* commands; + u32 commandsSize; +}gsVbo_s; + + +void gsInit(DVLB_s* shader); +void gsExit(void); + +void gsStartFrame(void); +void gsAdjustBufferMatrices(mtx44 transformation); + +void* gsLinearAlloc(size_t size); +void gsLinearFree(void* mem); + +float* gsGetMatrix(GS_MATRIX m); +int gsLoadMatrix(GS_MATRIX m, float* data); +int gsPushMatrix(); +int gsPopMatrix(); +int gsMatrixMode(GS_MATRIX m); + +void gsLoadIdentity(); +void gsProjectionMatrix(float fovy, float aspect, float near, float far); +void gsRotateX(float x); +void gsRotateY(float y); +void gsRotateZ(float z); +void gsScale(float x, float y, float z); +void gsTranslate(float x, float y, float z); +int gsMultMatrix(float* data); + +int gsVboInit(gsVbo_s* vbo); +int gsVboCreate(gsVbo_s* vbo, u32 size); +int gsVboFlushData(gsVbo_s* vbo); +int gsVboDestroy(gsVbo_s* vbo); +int gsVboDraw(gsVbo_s* vbo); +void* gsVboGetOffset(gsVbo_s* vbo); +int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); + +#endif diff --git a/examples/gpu/source/main.c b/examples/gpu/source/main.c index c059540..17a4bee 100644 --- a/examples/gpu/source/main.c +++ b/examples/gpu/source/main.c @@ -1,201 +1,325 @@ +/////////////////////////////////////// +// GPU example // +/////////////////////////////////////// + +//this example is meant to show how to use the GPU to render a 3D object +//it also shows how to do stereoscopic 3D +//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft +//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited. + #include #include #include -#include #include <3ds.h> + #include "math.h" +#include "gs.h" + #include "test_vsh_shbin.h" -#include "test_png_bin.h" -#include "mdl.h" +#include "texture_bin.h" +//will be moved into ctrulib at some point +#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080) + +#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) + +//shader structure DVLB_s* shader; -float* vertArray; +//texture data pointer u32* texData; +//vbo structure +gsVbo_s vbo; -void setUniformMatrix(u32 startreg, float* m) -{ - float param[16]; - - param[0x0]=m[3]; //w - param[0x1]=m[2]; //z - param[0x2]=m[1]; //y - param[0x3]=m[0]; //x - - param[0x4]=m[7]; - param[0x5]=m[6]; - param[0x6]=m[5]; - param[0x7]=m[4]; - - param[0x8]=m[11]; - param[0x9]=m[10]; - param[0xa]=m[9]; - param[0xb]=m[8]; - - param[0xc]=m[15]; - param[0xd]=m[14]; - param[0xe]=m[13]; - param[0xf]=m[12]; - - GPU_SetUniform(startreg, (u32*)param, 4); -} - -float angle=0.0f; -float angleZ=0.0f; -float tx, ty, tz; - +//GPU framebuffer address u32* gpuOut=(u32*)0x1F119400; +//GPU depth buffer address u32* gpuDOut=(u32*)0x1F370800; -// topscreen -void doFrame1() +//angle for the vertex lighting (cf test.vsh) +float lightAngle; +//object position and rotation angle +vect3Df_s position, angle; + +//vertex structure +typedef struct { - //general setup - GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); + vect3Df_s position; + float texcoord[2]; + vect3Df_s normal; +}vertex_s; - GPU_DepthRange(-1.0f, 0.0f); +//object data (cube) +//obviously this doesn't have to be defined manually, but we will here for the purposes of the example +//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z} +//we're drawing triangles so three lines = one triangle +const vertex_s modelVboData[]= +{ + //first face (PZ) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, + //second face (MZ) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, + //third face (PX) + //first triangle + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, + //fourth face (MX) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + //second triangle + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, + //fifth face (PY) + //first triangle + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, + //sixth face (MY) + //first triangle + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + //second triangle + {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, + {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, +}; - GPU_SetFaceCulling(GPU_CULL_BACK_CCW); - GPU_SetStencilTest(false, GPU_ALWAYS, 0x00); - GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); - - // ? - GPUCMD_AddSingleParam(0x00010062, 0x00000000); //param always 0x0 according to code - GPUCMD_AddSingleParam(0x000F0118, 0x00000000); +//stolen from staplebutt +void GPU_SetDummyTexEnv(u8 num) +{ + GPU_SetTexEnv(num, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0,0,0), + GPU_TEVOPERANDS(0,0,0), + GPU_REPLACE, + GPU_REPLACE, + 0xFFFFFFFF); +} +// topscreen +void renderFrame() +{ + GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); + + GPU_DepthRange(-1.0f, 0.0f); + GPU_SetFaceCulling(GPU_CULL_BACK_CCW); + GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); + GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP); + GPU_SetBlendingColor(0,0,0,0); + GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); + + GPUCMD_AddSingleParam(0x00010062, 0); + GPUCMD_AddSingleParam(0x000F0118, 0); + //setup shader - SHDR_UseProgram(shader, 0); + SHDR_UseProgram(shader, 0); + + GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); + + GPU_SetTextureEnable(GPU_TEXUNIT0); + + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), + GPU_TEVOPERANDS(0,0,0), + GPU_TEVOPERANDS(0,0,0), + GPU_MODULATE, GPU_MODULATE, + 0xFFFFFFFF); + GPU_SetDummyTexEnv(1); + GPU_SetDummyTexEnv(2); + GPU_SetDummyTexEnv(3); + GPU_SetDummyTexEnv(4); + GPU_SetDummyTexEnv(5); - //attribute buffers - GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)vertArray), + //texturing stuff + GPU_SetTexture(GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texData),128,128,GPU_TEXTURE_MAG_FILTER(GPU_NEAREST)|GPU_TEXTURE_MIN_FILTER(GPU_NEAREST),GPU_RGBA8); + GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)texData), GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT), 0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3}); - //? - GPUCMD_AddSingleParam(0x000F0100, 0x00E40100); - GPUCMD_AddSingleParam(0x000F0101, 0x01010000); - GPUCMD_AddSingleParam(0x000F0104, 0x00000010); - - //texturing stuff - GPUCMD_AddSingleParam(0x0002006F, 0x00000100); - GPUCMD_AddSingleParam(0x000F0080, 0x00011001); //enables/disables texturing - - //texenv - GPU_SetTexEnv(3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); - GPU_SetTexEnv(4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000); - GPU_SetTexEnv(5, GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), - GPU_TEVOPERANDS(0,0,0), GPU_TEVOPERANDS(0,0,0), GPU_MODULATE, GPU_MODULATE, 0xFFFFFFFF); + //setup lighting (this is specific to our shader) + vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); + GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightDirection", 0), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 4); + GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightAmbient", 0), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 4); - //texturing stuff - GPU_SetTexture((u32*)osConvertVirtToPhys((u32)texData),256,256,0x6,GPU_RGBA8); + //initialize projection matrix to standard perspective stuff + gsMatrixMode(GS_PROJECTION); + gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f); + gsRotateZ(M_PI/2); //because framebuffer is sideways... - //setup matrices - float modelView[16]; - float projection[16]; - - loadIdentity44(modelView); - loadIdentity44(projection); - - translateMatrix(modelView, tx, ty, tz); - rotateMatrixX(modelView, angle); - rotateMatrixZ(modelView, angleZ); - - initProjectionMatrix(projection, 1.3962634f, 240.0f/400.0f, 0.01f, 10.0f); - - setUniformMatrix(0x24, modelView); - setUniformMatrix(0x20, projection); - - //draw first model - GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); - // GPU_DrawElements(GPU_TRIANGLES, (u32*)(((u32)((void*)indArray-(void*)gspHeap))+0x20000000-base), 6); - - //setup matrices - loadIdentity44(modelView); - loadIdentity44(projection); - - translateMatrix(modelView, tx, -ty, tz); - rotateMatrixX(modelView, -angle); - rotateMatrixZ(modelView, -angleZ); - - setUniformMatrix(0x24, modelView); - - //draw second - GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3); - - //finalize stuff ? - GPU_FinishDrawing(); + //draw object + gsMatrixMode(GS_MODELVIEW); + gsPushMatrix(); + gsTranslate(position.x, position.y, position.z); + gsRotateX(angle.x); + gsRotateY(angle.y); + gsVboDraw(&vbo); + gsPopMatrix(); + GPU_FinishDrawing(); } -void demoControls(void) -{ - hidScanInput(); - u32 PAD=hidKeysHeld(); - - if(PAD&KEY_UP)tx+=0.1f; - if(PAD&KEY_DOWN)tx-=0.1f; - - if(PAD&KEY_LEFT)ty+=0.1f; - if(PAD&KEY_RIGHT)ty-=0.1f; - - if(PAD&KEY_R)tz+=0.1f; - if(PAD&KEY_L)tz-=0.1f; - - if(PAD&KEY_A)angle+=0.1f; - if(PAD&KEY_Y)angle-=0.1f; - - if(PAD&KEY_X)angleZ+=0.1f; - if(PAD&KEY_B)angleZ-=0.1f; -} - -extern u32* gxCmdBuf; - -int main() +int main(int argc, char** argv) { + //setup services srvInit(); aptInit(); gfxInit(); hidInit(NULL); - + + //initialize GPU GPU_Init(NULL); + //let GFX know we're ok with doing stereoscopic 3D rendering + gfxSet3D(true); + + //load our vertex shader binary + shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin, test_vsh_shbin_size); + + //initialize GS + gsInit(shader); + + //allocate our GPU command buffers + //they *have* to be on the linear heap u32 gpuCmdSize=0x40000; u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4); + u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4); - GPU_Reset(gxCmdBuf, gpuCmd, gpuCmdSize); + //actually reset the GPU + GPU_Reset(NULL, gpuCmd, gpuCmdSize); - vertArray=(float*)linearAlloc(0x100000); - texData=(u32*)linearAlloc(0x100000); + //create texture + texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned + memcpy(texData, texture_bin, texture_bin_size); - memcpy(texData, test_png_bin, test_png_bin_size); - memcpy(vertArray, mdlData, sizeof(mdlData)); - GSPGPU_FlushDataCache(NULL, mdlData, sizeof(mdlData)); - GSPGPU_FlushDataCache(NULL, test_png_bin, test_png_bin_size); + //create VBO + gsVboInit(&vbo); + gsVboCreate(&vbo, sizeof(modelVboData)); + gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s)); + gsVboFlushData(&vbo); - tx=ty=0.0f; tz=-0.1f; - shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin,test_vsh_shbin_size); + //initialize object position and angle + position=vect3Df(0.0f, 0.0f, -2.0f); + angle=vect3Df(M_PI/4, M_PI/4, 0.0f); - GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); - gspWaitForPSC0(); - gfxSwapBuffersGpu(); + //background color (blue) + u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF); while(aptMainLoop()) { - demoControls(); + //get current 3D slider state + float slider=CONFIG_3D_SLIDERSTATE; - GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); - gspWaitForPSC0(); + //controls + hidScanInput(); + //START to exit to hbmenu + if(keysDown()&KEY_START)break; - GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); - doFrame1(); + //A/B to change vertex lighting angle + if(keysHeld()&KEY_A)lightAngle+=0.1f; + if(keysHeld()&KEY_B)lightAngle-=0.1f; + + //D-PAD to rotate object + if(keysHeld()&KEY_RIGHT)angle.x+=0.05f; + if(keysHeld()&KEY_LEFT)angle.x-=0.05f; + if(keysHeld()&KEY_UP)angle.y+=0.05f; + if(keysHeld()&KEY_DOWN)angle.y-=0.05f; + + //R/L to bring object closer to or move it further from the camera + if(keysHeld()&KEY_R)position.z+=0.1f; + if(keysHeld()&KEY_L)position.z-=0.1f; + + //generate our GPU command buffer for this frame + gsStartFrame(); + renderFrame(); GPUCMD_Finalize(); - GPUCMD_Run(gxCmdBuf); - gspWaitForP3D(); + if(slider>0.0f) + { + //new and exciting 3D ! + //make a copy of left gpu buffer + u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset); + memcpy(gpuCmdRight, gpuCmd, offset*4); + + //setup interaxial + float interaxial=slider*0.12f; + + //adjust left gpu buffer fo 3D ! + {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} + + //draw left framebuffer + GPUCMD_FlushAndRun(NULL); + + //while GPU starts drawing the left buffer, adjust right one for 3D ! + GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset); + {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} + + //we wait for the left buffer to finish drawing + gspWaitForP3D(); + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + + //we draw the right buffer, wait for it to finish and then switch back to left one + //clear the screen + GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + gspWaitForPSC0(); + + //draw the right framebuffer + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); + + //transfer from GPU output buffer to actual framebuffer + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); + }else{ + //boring old 2D ! + + //draw the frame + GPUCMD_FlushAndRun(NULL); + gspWaitForP3D(); + + //clear the screen + GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); + gspWaitForPPF(); + } + + //clear the screen + GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201); + gspWaitForPSC0(); gfxSwapBuffersGpu(); - GX_SetDisplayTransfer(gxCmdBuf, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000); - gspWaitForPPF(); - gspWaitForVBlank(); + + gspWaitForEvent(GSPEVENT_VBlank0, true); } + gsExit(); hidExit(); gfxExit(); aptExit(); diff --git a/examples/gpu/source/math.c b/examples/gpu/source/math.c index 9c0977a..13ab3dd 100644 --- a/examples/gpu/source/math.c +++ b/examples/gpu/source/math.c @@ -15,6 +15,7 @@ void multMatrix44(float* m1, float* m2, float* m) //4x4 { int i, j; for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); + } void translateMatrix(float* tm, float x, float y, float z) @@ -26,11 +27,16 @@ void translateMatrix(float* tm, float x, float y, float z) rm[7]=y; rm[11]=z; - multMatrix44(rm,tm,m); + multMatrix44(tm,rm,m); memcpy(tm,m,16*sizeof(float)); } -void rotateMatrixX(float* tm, float x) +// 00 01 02 03 +// 04 05 06 07 +// 08 09 10 11 +// 12 13 14 15 + +void rotateMatrixX(float* tm, float x, bool r) { float rm[16], m[16]; memset(rm, 0x00, 16*4); @@ -40,11 +46,27 @@ void rotateMatrixX(float* tm, float x) rm[9]=-sin(x); rm[10]=cos(x); rm[15]=1.0f; - multMatrix44(tm,rm,m); + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); memcpy(tm,m,16*sizeof(float)); } -void rotateMatrixZ(float* tm, float x) +void rotateMatrixY(float* tm, float x, bool r) +{ + float rm[16], m[16]; + memset(rm, 0x00, 16*4); + rm[0]=cos(x); + rm[2]=sin(x); + rm[5]=1.0f; + rm[8]=-sin(x); + rm[10]=cos(x); + rm[15]=1.0f; + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); + memcpy(tm,m,16*sizeof(float)); +} + +void rotateMatrixZ(float* tm, float x, bool r) { float rm[16], m[16]; memset(rm, 0x00, 16*4); @@ -54,7 +76,8 @@ void rotateMatrixZ(float* tm, float x) rm[5]=cos(x); rm[10]=1.0f; rm[15]=1.0f; - multMatrix44(tm,rm,m); + if(!r)multMatrix44(tm,rm,m); + else multMatrix44(rm,tm,m); memcpy(tm,m,16*sizeof(float)); } @@ -69,27 +92,57 @@ void initProjectionMatrix(float* m, float fovy, float aspect, float near, float { float top = near*tan(fovy/2); float right = (top*aspect); + + float mp[4*4]; - *(m++) = near/right; - *(m++) = 0.0f; - *(m++) = 0.0f; - *(m++) = 0.0f; + mp[0x0] = near/right; + mp[0x1] = 0.0f; + mp[0x2] = 0.0f; + mp[0x3] = 0.0f; - *(m++) = 0.0f; - *(m++) = near/top; - *(m++) = 0.0f; - *(m++) = 0.0f; + mp[0x4] = 0.0f; + mp[0x5] = near/top; + mp[0x6] = 0.0f; + mp[0x7] = 0.0f; - *(m++) = 0.0f; - *(m++) = 0.0f; - // *(m++) = -(far+near)/(far-near); - *(m++) = 0.0f; - // *(m++) = -2.0f*(far*near)/(far-near); - // *(m++) = 1.0f; - *(m++) = -1.0f; + mp[0x8] = 0.0f; + mp[0x9] = 0.0f; + mp[0xA] = -(far+near)/(far-near); + mp[0xB] = -2.0f*(far*near)/(far-near); - *(m++) = 0.0f; - *(m++) = 0.0f; - *(m++) = -1.0f; - *(m++) = 0.0f; + mp[0xC] = 0.0f; + mp[0xD] = 0.0f; + mp[0xE] = -1.0f; + mp[0xF] = 0.0f; + + float mp2[4*4]; + loadIdentity44(mp2); + mp2[0xA]=0.5; + mp2[0xB]=-0.5; + + multMatrix44(mp2, mp, m); +} + +vect3Df_s getMatrixColumn(float* m, u8 i) +{ + if(!m || i>=4)return vect3Df(0,0,0); + return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); +} + +vect3Df_s getMatrixRow(float* m, u8 i) +{ + if(!m || i>=4)return vect3Df(0,0,0); + return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); +} + +vect4Df_s getMatrixColumn4(float* m, u8 i) +{ + if(!m || i>=4)return vect4Df(0,0,0,0); + return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); +} + +vect4Df_s getMatrixRow4(float* m, u8 i) +{ + if(!m || i>=4)return vect4Df(0,0,0,0); + return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); } diff --git a/examples/gpu/source/math.h b/examples/gpu/source/math.h index 5eed360..8137b90 100644 --- a/examples/gpu/source/math.h +++ b/examples/gpu/source/math.h @@ -1,13 +1,144 @@ #ifndef MATH_H +#define MATH_H + +#include <3ds/types.h> +#include + +typedef float mtx44[4][4]; +typedef float mtx33[3][3]; + +typedef struct +{ + s32 x, y, z; +}vect3Di_s; + +static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) +{ + return (vect3Di_s){x,y,z}; +} + +static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) +{ + return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; +} + +static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) +{ + return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; +} + +static inline vect3Di_s vmuli(vect3Di_s v, s32 f) +{ + return (vect3Di_s){v.x*f,v.y*f,v.z*f}; +} + +typedef struct +{ + float x, y, z; +}vect3Df_s; + +static inline vect3Df_s vect3Df(float x, float y, float z) +{ + return (vect3Df_s){x,y,z}; +} + +static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) +{ + return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; +} + +static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) +{ + return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; +} + +static inline vect3Df_s vmulf(vect3Df_s v, float f) +{ + return (vect3Df_s){v.x*f,v.y*f,v.z*f}; +} + +static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) +{ + return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; +} + +static inline float vmagf(vect3Df_s v) +{ + return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); +} + +static inline float vdistf(vect3Df_s v1, vect3Df_s v2) +{ + return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); +} + +static inline vect3Df_s vnormf(vect3Df_s v) +{ + const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); + return (vect3Df_s){v.x/l,v.y/l,v.z/l}; +} + +typedef struct +{ + float x, y, z, w; +}vect4Df_s; + +static inline vect4Df_s vect4Df(float x, float y, float z, float w) +{ + return (vect4Df_s){x,y,z,w}; +} + +static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) +{ + return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; +} + +static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) +{ + return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; +} + +static inline vect4Df_s vmulf4(vect4Df_s v, float f) +{ + return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; +} + +static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) +{ + return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; +} + +static inline vect4Df_s vnormf4(vect4Df_s v) +{ + const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); + return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; +} + +//interstuff +static inline vect3Di_s vf2i(vect3Df_s v) +{ + return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; +} + +static inline vect3Df_s vi2f(vect3Di_s v) +{ + return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; +} void loadIdentity44(float* m); void multMatrix44(float* m1, float* m2, float* m); void translateMatrix(float* tm, float x, float y, float z); -void rotateMatrixX(float* tm, float x); -void rotateMatrixZ(float* tm, float x); +void rotateMatrixX(float* tm, float x, bool r); +void rotateMatrixY(float* tm, float x, bool r); +void rotateMatrixZ(float* tm, float x, bool r); void scaleMatrix(float* tm, float x, float y, float z); void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); +vect3Df_s getMatrixColumn(float* m, u8 i); +vect3Df_s getMatrixRow(float* m, u8 i); +vect4Df_s getMatrixColumn4(float* m, u8 i); +vect4Df_s getMatrixRow4(float* m, u8 i); + #endif