From 4613bd1e02c82ea568dfa4bd56623fed82229654 Mon Sep 17 00:00:00 2001 From: SnapperTT <4939563+SnapperTT@users.noreply.github.com> Date: Mon, 29 Aug 2022 23:44:55 +1000 Subject: [PATCH] Parallelised compute shader in example 48 (#2906) * Parallelised compute shader in example 48 * Ex48 - Dispatch numToDraw/64 workgroups of 64 local threads * fixed vs build (again) --- examples/48-drawindirect/cs_drawindirect.sc | 17 ++++++++++++----- examples/48-drawindirect/drawindirect.cpp | 10 +++++++--- .../runtime/shaders/dx11/cs_drawindirect.bin | Bin 2249 -> 2589 bytes .../runtime/shaders/essl/cs_drawindirect.bin | Bin 2841 -> 3029 bytes .../runtime/shaders/glsl/cs_drawindirect.bin | Bin 3009 -> 3197 bytes .../runtime/shaders/metal/cs_drawindirect.bin | Bin 2359 -> 2579 bytes .../runtime/shaders/spirv/cs_drawindirect.bin | Bin 3947 -> 4227 bytes 7 files changed, 19 insertions(+), 8 deletions(-) diff --git a/examples/48-drawindirect/cs_drawindirect.sc b/examples/48-drawindirect/cs_drawindirect.sc index 76b82f955..989bb3005 100644 --- a/examples/48-drawindirect/cs_drawindirect.sc +++ b/examples/48-drawindirect/cs_drawindirect.sc @@ -14,19 +14,26 @@ BUFFER_WR(instanceBufferOut, vec4, 2); uniform vec4 u_drawParams; -NUM_THREADS(1, 1, 1) +// Use 64*1*1 local threads +NUM_THREADS(64, 1, 1) void main() { + int tId = int(gl_GlobalInvocationID.x); int numDrawItems = int(u_drawParams.x); int sideSize = int(u_drawParams.y); float time = u_drawParams.z; - // Prepare draw mtx - + // Work out the amount of work we're going to do here int maxToDraw = min(sideSize*sideSize, numDrawItems); - for (int k = 0; k < maxToDraw; k++) { + int numToDrawPerThread = maxToDraw/64 + 1; + + int idxStart = tId*numToDrawPerThread; + int idxMax = min(maxToDraw, (tId+1)*numToDrawPerThread); + + // Prepare draw mtx + for (int k = idxStart; k < idxMax; k++) { int yy = k / sideSize; int xx = k % sideSize; @@ -58,7 +65,7 @@ void main() // Fill indirect buffer - for (int k = 0; k < maxToDraw; k++) { + for (int k = idxStart; k < idxMax; k++) { drawIndexedIndirect( // Target location params: indirectBuffer, // target buffer diff --git a/examples/48-drawindirect/drawindirect.cpp b/examples/48-drawindirect/drawindirect.cpp index 3065fed24..eefd5e172 100644 --- a/examples/48-drawindirect/drawindirect.cpp +++ b/examples/48-drawindirect/drawindirect.cpp @@ -344,26 +344,30 @@ public: // The model matrix for each instance is also set on compute // you could modify this to, eg, do frustrum culling on the GPU float ud[4] = { float(m_nDrawElements), float(m_sideSize), float(time), 0 }; + uint32_t numToDraw = (m_sideSize*m_sideSize); + bgfx::setUniform(u_drawParams, ud); bgfx::setBuffer(0, m_object_list_buffer, bgfx::Access::Read); bgfx::setBuffer(1, m_indirect_buffer_handle, bgfx::Access::Write); bgfx::setBuffer(2, m_instance_buffer, bgfx::Access::Write); - bgfx::dispatch(0, m_indirect_program); + // Dispatch the call. We are using 64 local threads on the GPU to process the object list + // So lets dispatch ceil(numToDraw/64) workgroups of 64 local threads + bgfx::dispatch(0, m_indirect_program, uint32_t(numToDraw/64 + 1), 1, 1); // Submit our 1 draw call // Set vertex and index buffer. bgfx::setIndexBuffer(m_ibh); bgfx::setVertexBuffer(0, m_vbh); - bgfx::setInstanceDataBuffer(m_instance_buffer, 0, m_sideSize*m_sideSize); + bgfx::setInstanceDataBuffer(m_instance_buffer, 0, numToDraw); // Set render states. bgfx::setState(BGFX_STATE_DEFAULT); // Submit primitive for rendering to view 0. // note that this submission requires the draw count - bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(m_sideSize*m_sideSize)); + bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(numToDraw)); } else { diff --git a/examples/runtime/shaders/dx11/cs_drawindirect.bin b/examples/runtime/shaders/dx11/cs_drawindirect.bin index c7b570e39cbc0a7d32ff2af45b7a1c738ca24eaa..5df326d7050d0590248d0ec3b0fc79a2966fa449 100755 GIT binary patch literal 2589 zcmaKtO>10b6vxlp$t3Ai&9tJ6R&;!+XslXNQ4!nRPGW4K#zKgVs}dJJd|FA|ORB$)M67O2R(@%Y{d#i!<hugs^w%P)<59&&;#Bm#;??3{alh#J$)>r`rVT-!v;K{2hwrTP&qWN#3GM@3k<0n97d5X%95D{- zz*G7^{y+ZMs0VZOz<8!Hs*vBTu?*E;a%P`HeVh8UdCR_)Isnrf<1PAq&>OHir}gCh zdw+lbg`zjEaVcW7l z7dbV++n%4f`+Dx@e|2^JO4hNPhy5btmugRb%~%#KwQ=kEodOFpvA=3eGw#dWveqY- z&0kgQAB1y8HdU>!4oN>?#skT-Pgus@b!FghWzDkf`W>13<~=e2S&~0_{#j`8R&~Ukou3b4;9SPh4yOC3S9E}VC~nNw5f2@^MqQk_6Uf*v z%izTE-dmUb(mbulmpv?elb-y(Z`M06I_?I(>?^+Xv7KpMb7o(#VK&%!wzi=*Y`A;w zpYI;A9G~4k>^r@C`1&16EMn3V=e?h=-oNQx>fDPN5BTU8cW)w@YbNd;_4=Ov52?p< zU3;jtc{*a@p6tQCH=NF#SmjgI>XT%|03Y$mMV~w;`djAmE|SaGu=iY;WoiH3pv8eb zHl+@1U_=LoQb$eJfzAJ4;4S;Z`B9Z&+??qfT(bE;g^k!%t*;K@Z%ociHGw9km_lpr+_5A4<4n}o${gAd6DtC%GmFd?|N@P{POdI4BY8?+r2@QSVq@ldY#wnie8teFHNc2 z8|$^E=jq9dbCVjlE;;GaGWYY6x+#*Kc+uxMp;^?Fzm^SW96&sN8LM)e9l_ zUgbV%%9nB?8=IS(10p$#oeP*Dkxn$C`Bz70Jx^LWFIQH|!^7{(8uFHQEDLMN#l6Uw z?$L4_219ozfWlqqkdmBFf8Ktx{Ag$?M$JNDKWnzV5M&oSDCK zmJ_a&PiyWsipeS_?ewR~H12zsQ-9!y`C7K|`3ZI;sgm1ssh@BgJ`k2~P;dLg{!{8V z{Wb#`$!3@XVzUvhs6Nc_%m{sob7-u7wo}><=BrgXth^_1!P}f237YXyAFST{voRcN z&|WJ0ILh1JU8a(y!rPu?xX3mSeqtbU(8i0xSGPxAid;0Ls#gT@8_!adR@QW z?AgwBLgxg`0g;Y%#nX4@1WP+jqHBENqaHcX2bWstwKJr@ajlk+ms+L`yqybsj`25g zi4O2!;uyNoNDK{e4BZw(Tm1c}w^+(0`uFH!y4jEU)tCRFSaPSG{xlgJn;SUTpwj!b zx9kaHe&DlWe5;rX?C3T)t9@MLSw*BeHq$MOiq9yX-jVC+Qo`@m`rp)t=2|MjSP$^g qJBLfqOHpEZKx$D)Mp0@aNNa9l1w>Ha%tS$3!O)ru zq%}gI$eT5?vIiNLyP|p_*&*d~VUn0o<(s6vZ2f diff --git a/examples/runtime/shaders/glsl/cs_drawindirect.bin b/examples/runtime/shaders/glsl/cs_drawindirect.bin index 74133748d3de909acfe96437a670a47ff97d6f79..1e6d997537632ddfa64f787bbb94cb384d881ab2 100755 GIT binary patch delta 276 zcmX>o{#Qc4IoN}n0S>ZwHVQmuV=^B6nlwB}07FH+C| f*^&)%I@Bg>AkPNG0~)yb1-lxv2sW9?a@>mnQzcpf delta 91 zcmew>aZp^qIoN}n0S?x2ZxndU#%MVCE1S*aNcP#XY57G88ku<|3fT&_3I^6d%0?kK Zu_7ejr6{p{^8yYvW&w23$+x%{0{~T38k7J4 diff --git a/examples/runtime/shaders/metal/cs_drawindirect.bin b/examples/runtime/shaders/metal/cs_drawindirect.bin index 85a18267e120d4c1fdcb57e6ca776c42a3bfbe0a..c871d30b2d7aa0ce2887dbc0fc47c2b127c5c355 100755 GIT binary patch literal 2579 zcmb_dU2oeq6jk;o?Tzx zP~wi~E|)V#@3FkND?yiWX`1$Q?>XZ;FemU9X08rl=CcX&r?5RCcZ6GQO|T#^swUWN zHEh~deb=m{Dx3Q11Ei-uwS~;H&rMh2SXS2AZCmEcr8|fSXqE}#v$v3@aso&Y0t%CdUH+r8`6!lUp|}LlnzG;XyE92p9U{4LpQ=!CVqS*96vumLo!H0$q|4 zIxS;980TN@%~M^$ScHPEH`1X2kOowlF>mgBDP1B6M3AIj9K>Gi3GRop!MoFK`a0;0 z++*w**(b3%#TKzgR}e>uh;8H0`9~|B|Q*KByfkH2G{Xe&KQjBIf)oYTMCzwdiy}-Z*F&xHD~jRGVI^=IbU_zSBI^?T0g7OwryE2t?%+-&20bx delta 773 zcmZ`%%Sr-K7|shbY7inuNCpQ>IHRMT%Xk^aRxOHX(b^D^KoC+PbJe1in;;1OcW7O! zUZP#JEa(k6v~G~9;Q)|o&{zc-*%qXkWZhw#pIcX&almN~ULl9)%@>ee7_$%IEN?br9dh#)yf zs^;x#SDt1CaA%I^f8pKpVFz=R+(kyWzuKYNLeTe7$ghnfaWqu0Sd+1auS!eM?~zR%SL9AQ@~Kc1`sP zSr#c0{)37cW1Eqt$f9l&TEeqx`BumprJz;>w3=9-5q){pce2z*FfJS$B*PucCQ_xO znpT=M6ITExMlHknf9R{RA5pALv!Q)8O&aD7yqkycY$ih--pq*xrtxe}4K}zaYIgpj zxU)rp`&a?#F1*AVNjzKDsT{X3I3L>tK4N7Jp2TbVyonb*Bw;yX(YP@#p+nW5V(VR$ PGL-sXU|B#bDs2qPHjk0NMsO|$KJ-tV6AHucVT=brQXopbKF=PvJi&4SrO ziu5!TLsu?d+EuxGUZtzDY(+odx{?~%UR}{$sV-@qxw5UTwd=-}-E~DnF`(pzwO5z6 zceO6*u5oom{b{1+G^^5GX{pXA`XyrEh@zSc_sL_NLXTW(Yh{dqWj*icSlryvd3&X! zrMjwfNu|5Jv)VEX``K7nGoOjIu4twvVlHPh6}CPX_Hs3SLZAGOTmi-o^+&Eu;Z zzaPzf-LIa#b7`YM#V6pczw#Jt9Dh6w(_$RDXHHBT`Qh~AX+D=E-Fo_r#cDMDc$&Ur z=+&*)`Eah4*m#g}^o}WZRnlEQG2=Ss*)$*R&!LwyUW|JIa$TlRLeBj?#7&7e-la6{ zM`JZR;l^7;n>P5S>Bd#Rm9};Hd%b!mx=--yN^SWEk>`4J>l*w9r)YtU#ZehcJ&Gk5s|GVg|srB7WuWr3Qq_-Ag)>qyA zE9N-XV=t|dru{y8Ib#LCKVhLCKzH2GdyrjQ*ZNm_b5l1j&xV+Jc?Q%S=Lo&yh?$rA z(S+&e8Ho9~x7GFgJ=uuqr|$kXmi(Z)n(UbBW6=4P`&WB)_rI9=yUrtU-Y|K`v@58B zam?MvIi3KqHq$^o1LKnad{Aq5ExMS`3=sEcGrD`&eLI4tuj9-Hm#d=d+kz|>e%B#~ zpL;s|t_PP3zd6Wa;dcXa___DPZ!Wmp+LY^jWPNQznTU%KpQ0$S0lGk3`xXLllc4U36%cE(Y9U0b%zw6ftCO*_}2lTs; z`IWVh^Zu~rVb8}(VeGJ9h0L$Cm-Bwnekdzpyn7OV2C}gOzc=B-`rxZGdt-?!BOm?=)M?>us>S4)0|4a(&0yk$BIBwlU7TnV&W>PSoao zF!Qa|rgr{zCLeQ_k661BZqC|7tf<>a=I*>4Q(Nm6cjHIs-Yt8Z!EF$T&6Sg8G7GXpM#0-&tiRn9Dcl6%-_VGR$g}v_+ZR(MKppW;-_sH^#>3yFxB%bi)`$S%QapNk-ynaO1w^rK$ zx-5}7{+JST}3zZ(7`W>hf1SZCGCK*>YAj5pd@BQP%pbmTkfvyAq_VD`4{-1qJp5`?|J85r>)0#ocp@&`@Ux8o_Xfkn^!L% zm+~_zH4og=(OcWJs@7ZU>Kg&wP`M^{cK7wyy4%++8Cbu5UGK_){)RLvjjnv-JG(nN zd)Kx1*L@9X+GgW6UY-!q!|-!BzeaE3eiYJnbSzl1^Cu< zcdqa0?OL*-r~LsS?C6TuE^8ugMDIcko<`zcr~glZ;F|F2^I6id+^a#XB|&&+z4ma%3z&%`n+ z==x_2`RaIjf%juK(I(K;i#n8ga_*>l)}+)EtLI+n|AD^Li+YrLa-nB!N)BiqBfhO@rZesWjwn%Gmm=do@^H>+It=p8shA9{u7;6{_n^#x~dS!tc7BSKz7G z#$3ezb24vyt8MJsg~o}O()MlqU`Xq zNzQqSJ=b#Pd8T81gV8;MZM~jV8|U17T*DtQ z)}oC!$g?zqSkI$p&~w&Gw0k=fUSBuZnp}=87kO7;N1o>|@~(u}7kRU=pMiR-{&u|ezCl|+B@^SZooFzdF{&Ff`aSG^=pNbZ)WD!a}hSbbS+Jv@1kpt z_$AnCNBmN3epS3a-&NzA**EoW$$sw@wS!-tdGDQ)zpaQ@JK|Rq@$##vpXYETwm$C! zbM1GqUdO&2?D+EA@_N@~E@IY#<f8aAk8w|8%SGMSWAm%(uFpHpdoc3vgIhq0`1`T>Rq^_~1J#H* zY``8qhb}nzn8PV-xzOyv=2vOz^Byhd@E}}yR{Dx~ea=CRnDYR(b9gN8-$U4P-p5hf zO<;L*ec#;Y&0v0MHqE*k6MOkEwrh86<96YX@wR}=8f^vhtJb6MG_5?#+p^!YY;4rw zk<9zf7~}WB7US%IE94H&^E90Et=Fdh{ymd(oU{JW zdN%XU*_hCZy8Q&_x*XG3>lSz3Zm@UDK{)Tr!}QOA{hoQ3>0ih#F6a9hd3&%UZ+kxb zd*S4d6k5-NLrcF}a=w?L^#b;Atry|s{VvA5UIK@fezoLkw9tAPd$`s+TpTKd(Jv+hwFzXxhroAUc{2u}Paz4yL-d{53`y9dU6 zOM90V-;;N+^~d+*yX+^t`kv@FUS3`8Sl4^l=GJRFy80#`1^bP6JbnHh6yM4B!TP(n zcm2EL7`9v+@2ubR53$SK@*DdRKK+sVF}7Uf9>*?oCl%v{RTe9 eRm*YZ%!x7FOUI3~5PR=l{mrd)V|8;)NdEx-T#I1<