From 201a07064374cac76f1e6edcc8800b7b03489dae Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 30 Aug 2023 11:29:54 -0400 Subject: [PATCH 1/5] Start adding "How to determine the most efficient device." --- sphinx-doc/how-to.rst | 1 + .../determine-the-most-efficient-device.py | 39 ++++++++++++++++++ .../determine-the-most-efficient-device.rst | 7 ++++ sphinx-doc/howto/spheres.gsd | Bin 0 -> 54567 bytes 4 files changed, 47 insertions(+) create mode 100644 sphinx-doc/howto/determine-the-most-efficient-device.py create mode 100644 sphinx-doc/howto/determine-the-most-efficient-device.rst create mode 100644 sphinx-doc/howto/spheres.gsd diff --git a/sphinx-doc/how-to.rst b/sphinx-doc/how-to.rst index 584a4643c9..121e253694 100644 --- a/sphinx-doc/how-to.rst +++ b/sphinx-doc/how-to.rst @@ -7,6 +7,7 @@ How-to .. toctree:: :maxdepth: 1 + howto/determine-the-most-efficient-device howto/molecular howto/cpppotential howto/custom-md-potential diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.py b/sphinx-doc/howto/determine-the-most-efficient-device.py new file mode 100644 index 0000000000..9eec9e92a0 --- /dev/null +++ b/sphinx-doc/howto/determine-the-most-efficient-device.py @@ -0,0 +1,39 @@ +import hoomd +import argparse + +kT = 1.2 + +# Parse command line arguments. +parser = argparse.ArgumentParser() +parser.add_argument('--device', default='CPU') +parser.add_argument('--replicate', default=1, type=int) +parser.add_argument('--steps', default=10_000, type=int) +args = parser.parse_args() + +# Create WCA MD simulation +device = getattr(hoomd.device, args.device)() +simulation = hoomd.Simulation(device=device, seed=1) +simulation.create_state_from_gsd(filename='spheres.gsd') +simulation.state.replicate(nx=args.replicate, ny=args.replicate, nz=args.replicate,) +simulation.state.thermalize_particle_momenta(filter=hoomd.filter.All(), kT=kT) + +cell = hoomd.md.nlist.Cell(buffer=0.2) +lj = hoomd.md.pair.LJ(nlist=cell) +lj.params[('A', 'A')] = dict(sigma=1, epsilon=1) +lj.r_cut[('A', 'A')] = 2**(1/6) + +constant_volume = hoomd.md.methods.ConstantVolume( + filter=hoomd.filter.All(), + thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT)) + +simulation.operations.integrator = hoomd.md.Integrator(dt=0.001, methods=[constant_volume], forces=[lj]) + +# Wait until GPU kernel parameter autotuning is complete. +if args.device == 'GPU': + simulation.run(100) + while not simulation.operations.is_tuning_complete: + simulation.run(100) + +# Run the benchmark and print the performance. +simulation.run(args.steps) +device.notice(f'TPS: {simulation.tps}') diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.rst b/sphinx-doc/howto/determine-the-most-efficient-device.rst new file mode 100644 index 0000000000..6424260001 --- /dev/null +++ b/sphinx-doc/howto/determine-the-most-efficient-device.rst @@ -0,0 +1,7 @@ +.. Copyright (c) 2009-2023 The Regents of the University of Michigan. +.. Part of HOOMD-blue, released under the BSD 3-Clause License. + +How to determine the most efficient device +========================================== + + diff --git a/sphinx-doc/howto/spheres.gsd b/sphinx-doc/howto/spheres.gsd new file mode 100644 index 0000000000000000000000000000000000000000..eca7ea884902bc9d8fbc55e191cf9960734c94c2 GIT binary patch literal 54567 zcmeFZbzGER_bv>GpoEGjSSVOvVPK%b?4=ke28tjC3MwEXAt9J_r<8Pe3DO|5FFNdQ z?C!vhGr#jZzxVT;_xyAIdfz|J7d~*$J!{R(J=g5D_S)CF_T00)_dkEd#Kr#SkLSM* zVqyyax-sbA=aT=^K|)M_?b_8#rtR44YNiqwq-@o6! znAp&Nz39KrCI6o*_-}i^|FDbyH)tX+segO+{|~!_SpWa+@oN8J?<3adf3r{cw_QSH z`4{H@cIo%8qx!$?|Mkee|MC1{|7~~um*0QcrNpHFb@I>ue-QWwfqxMA2Z4VO_y>W1 z5cmgye-QWwfqxMA2Z4VO_`e*1oeuU^HrB3A7A`gp_S2nREFH!E*SmJv*jd^;i;T|y zFT)N8cQHo`Cl{NYdo7)(ulbMLE(aVfo&V$9(ZSi~zn=a-?y|A7u(tfa9QuFW{yzx( z|Bk@yfAeO2dZ!yK`tQI0<*Gyui^TqJ>%`Jq%ji^|8yWj0z%{WGfur+*0jg}FSQFBE zGjRJv2(9{3h0M<-P%Zn#eyDDs#a0=*s``8AX;d?WvCZ_(J_}Q+opeqQVUQ@I8)2<< zW>++mlGCRBXEN!eiK1Y(MJ09rv}BT%%Jfq1IKllTuX~%!`oEY=o5%KRd{W^`1uoqP z4%DZw1sOPTxr(F??1WZ7FRD1(2FJd^bTeiNt6u=6jnfw|`z8$UEs&OU3Bd6&neUiG7m9voZOtibxS*<^33fF-<5cuMj#m3BMu z=F5S!+jbZ|HSWgE9l}M(B~7UM@Z= z9p@(+>gmy^t?1`pk1uU$w12pee!pD76Ss50;pYpmbAC87B>WmXLQ_#PxRrZu)~CB8 zvf^0 zdnUwFy5m)zc3(&omPb?9is4dOJw8fopix8Ysn5M+nD4Bo&euBVce$G^HLGdr;Bs_0 z_vf$QYSHWwnkdf+V&YYS2-%l~kGsDLBrXB2SI6*cR_54mQBRh!gK6j0{j_G65LU;w z!0w=p;74Ez9vrUbYAd6Jj}CUy{Qc38znX=y1LI)!Gf&X=+LV048^|YcFjO~fzzFx{ z)a)z^v2Kpyhv9gpI}7(R4~ z1Dbw&apS-^7>?S3!MAtv-uJz%-$O6xzo=$=ynGPQa+vNZ7+_$GI6L8}5np%{)ksHy$kCyL;#g!`3y&fg# z=uSec#yrZ}uLE^93rXFnq$cgiC!UKXPp>fAbUBcf{E&h1Ur#(#uffQdiI`S!64w?z z7qp98!0UQ7WR(xXZ%j2RuXmIF4j!2IKiG5 z-x6?z<49jTi!u*v;?C_(xScXj;NJX`@#cCA76c13wyEjt6CaO)5#7wyD-Rm)6EH~X zAUf~5V{w))DW?|;JhUQl>tqMs_Q?^9xo5))wF$28g1D<4s%`d zsJ5|%XGYCMza28zXq(NpG^mhO#Bow12`V@@4s#?zNw+eGml(_O;oCdlFzF~s*Nowh zB9m!tX&gdl%8*)n4!UfH3#?=une@~VRT?76jsfgVj9f0mpFPKuD z5VG&vaNyN68Z@qfCJUTtipO8x_9KrXB~Q_aff@MQUBO-bN+4aVL(7XVaV_^S{E_!- z9P>s8KGG*?#FSyQHA@VcL%JyVbd6xYuMQ^MN}|-G(^%8)JfV$DB1wr!!o@vBl>Kta z)b1X86kSAX4+PTqHHGxl^bnn~%b+u+V_En23UoYN1b3-yTJUirkErKx>vSgDJvG?% zs~a0G=95R>D(EX`p`y2y<*hWs^nJO=irz~hC6;{B@o2iyXFLfG$0BudDt(KrrW&n* z+$yUMTSq=*N3F!Lvi1$zA6JPxWzKY2tO#e;O`brt)Wj z)B!)Rp4}Al4S0KH2pgBTkTz-KQrYH3f|3&WJ}88WLoTk3IxeX3YoTqsifM(T zJm>dzQcvD^dN6Mx9bfZ}jj2iF0YiwLYe}Hg$xZatY5;}pYeLcRLTE_LAU=DDi z*vGsKGJNfeRh~7x?DjSqS?Vn?pLvYB-PYm0)&jbtk%|f0>WFf>!IfW>(4xOta8?S& zl3VuF+%^#&p$&BNiziN%NAiVPQS@T}U_8?wjEur52srwdNgw>g1GXK)R}U4ze&&iIgRK4CfCX!#Y71_S9BW{f>#)&=^LUZwo0>Z7AC^Z#`{W zS4J(C$*_yor?$L9*m3MJpR)BllbonegD(uD?%rzJRZszig-#SNk$|dUyQxqphu`ng zAl8$JqsKI9R-zN!Yr`p}&qh?7UyX|Td~}31lH3JLNd6_9{kj4Ti;~&9t{A)w&7sim zO)OJY9WCtRDN%C5Od9nz1VxRuBze6NV-LLK z)>0O9+B20DPAsOsD;#K>jT62FWFl#~ItD!-jl6H-A*zWJcQ4-41wKnKMafV zhV`&$7*zMi)o;o)Z*4Oi7Olq3u{XHEOO9uT3vp8}nFVexMqOA0M!XJWkH()wnUy#9 z$?fF>-=BimU@!I{B@M%>YJk`ae6O<#RraZ*rB17afBUM^^9d2OC4O`x?#!XR;o{Ks z4a6j|3>XNG;{GlzY$}~g(j8G`6dTMgyUnK6`N{m}^F+MO=>vh>3JQO=2Gt%hNZOK4 z8IFdmK-L@0qqoqb3r=kQO+{!(er3My7_fEG*+eHO0^ovrp~5l ziB=}_;hezSbQ`_nrsP!~ixY=pnUdEST6(aQd*`;}UTHEnjC7$1Q(LL|M;wZSCt{gL zH`dONCDoc3$W&J1n8qB~Ur%Yg5Dr)*gi@bFr-jPNk99TL{Ydi92(mCdN=kF{>A@dI zdR@1M>*XEC*RB8*uQU=g9QVc2-5TV5K1?8awi_+(b!>_Jbr#uu42};%VHcdoP0V!Q zv?Pw*5{?vHjB|w7wK_JVXAU!aXGJd+&vK2!(fsVM3Y`D+gim(Pp_sKN=&8zqHy1V)D=D^OC#5)sP~i0Wc&1v07xK}lPOWE8(;x8Vo8Jm#Ej5|? z1bs;86v610AM*~EWKPKuIIg#vpK0GqSerqvqid10dnz3&=oPMw@uZNAp@P5RecA0n zPuQ6LqTKwt2u%)-+_W(YMK+a)6?eU7#2hRL5Xni)H0YZ zOC+i&U^l0hv0s}{lE>CO79=+cU4t^AASh)q`^&KEj|Y|4>eALXQuM)HpVr4O*1g=Q zO_$TO(JYGnhyivO8JdTWhhMS5Bf>DO%?l;!2IO(9l+{S9;I${3P&0)a7jh~*6+pYrP9!JwPkhFAf9jNMLhR~LVRS_p=E{oV z-^Y)fzdK`@)&qW`B!D0MZ)*?h8XsDJU@V%n?U7=0gRRraL^>7H{Fr?5pS1)}w2wjdQz&U_$qQHR z?Zn4VNi2s;^I_p2i(`IrSt`+jgTYco#QZ=t*mj%_494N{E>R(I^!QN|>37ucP{6UeYNX zu}@>`v#jub%PuM@I?bc|go)}%&VlOv$;^2g}wAj=XEpNPO?*~&_X=OqO+|8gq zDw`%LHb8Ez9X)DXN9%6nQ?61t$*D)`DF1QAFSP`^mzPa8q4OxiDH=bY+R!p-S%f_` zC1tZ|N2!hR2(LT{6;=-|%~_}Ueb z>pL|BnweAT^Ape;p@Fbb)hG)Qt&5*Vf-UxiNqsGgn^;YaS8}*jT^9RWFRHmL(h;8c ziMfxIrZajXU!}fucxe$;RX1TXWs{s|9PR2;i@9(8v08T;x?KJU-IaXsL2D{@#OCl# z4xAf}iKC_qlQ7k}lLl-_frDcOQYMGu$Jkg5Gi;}TE$;lAeG(Sv=h4-|(|r8KCbV9h z$b&9~vyn1-FbX?{H#g)d^>Gyrk4Qim;;7_hl(3)hgKp{>X*8|@MSSW{Pva7)k3=o? zoQ&g{MlBfE6v)S4+Y9s5WO8z^;79Z0@hd9~4?QEWX8jrz#iWzGRSnH{3dO*xWk@@} z1lu-N@!MTb1&Xi!@vKk1C7vsv+;TU~VjJf6OVv2D)sk=*% z)E*tEENSJ#F_I#89Ko70&i6dgBIz&9boQY%iSs6(+!&hP8fhKC&R7Mt;T;XW48+yxpkg#JudW~Om9oaJW$fppGi;lr! zmjeGB-vQsYO{~dN0>?e}AZS4Y_O9H-)~yPsfX^vd8I{1MbhXkf|0LnpUwNcjG9EYG z9&#;Z6WSD$1AmQNigRe7@&(J`XjqPKU*^(;FKZAu(wz_KbBa{AkH*FLRH5sITpD=B zAH1zy@Z;|rp6Qvu&4-b2Ox%4wK;;CVBcXuQuiLP2S2tF7s*=p@e8Iwbd5~TrnlELC z3@C+4-oMOp7xRjzt(nq-H&ouD#Ox`M%gYu~UR5Gi8{7y`SOoTjXb#F$rUkP8} zc9hl)bi-u5=e+H`Yom&OBBs7kf=zt_x)PJA-<>vg<7^T_9FmZrCe9nuU9eV|f@!mJ zQS)7!Jg1eQLo8(vv?3`1y02p;!FtU`KGScy@bvV6{&c|UFPehdI;_9(14B2@CrjZBf zU7xw=3r*a;^Pd0ip2+pyMPiP*Gl>_h#*~H_3^;p`iLJ|~9hqHJ*IUCZo^ZhqjXDhc zwGcfy&G4voLe2&~c7LEgofqZVu`{yy#i8lw8DD@zi<7$9GWB%2O^WS(z^F}YHf&}Zuiw40`*P|FDSzI|*AUg-s?#gq7rid^CD^k|q=?&1T! z4MDYM5RI5K45`umNn%GSPF{b-%$BCIyQuuN#Qs$t%KCRoMfZSXVVWiF%pRCV99_~8kV;fnsQ}= zcjBp(CaN7)#J&`aKfR9}rE6fjYXe0E&R`X*lljLFwX{=Pl1X)5U>Ot6u-@$FtjA;z z`JPRolA8%AO%0}BQu~=!m?jSTOgkMAaK{uHv!^YQ`y~+pn1RScT%*-21}e zw-R{v*$tQ`aKx9^7}B!ZiRnXPq3!mE+liat#9BGLw9ch-g%!MT_;QR?Po*12G!Ume zlsBbt?7BILekHoIPz8|w*L2D8llP8BM>_oh0uaZq357)Tzf+l z1&g@PzMhLP?nf0p9CL%eT_pwQ!y{vuBdU2M>ySmq+jKx5M>q1f7U#q7ALB^la&BQuu(OZ!u}fE$7j<#$Jhu?{#LG!FF^z7_$bn3LA?9Cw%kO^oM%KXH=zLf} ztEVrb#U5VxdD{z@21QU$XfY-`#!y^ zU4n6R3{H2#@n*&$EHC`b%|pjSYDyio-ZH8R>Berpw5{`_j7lcVKV`X@Iu52Rto9r%3=Ed|aoyPB9H%heqS{lwz1~sG|>Co49x5Zi<-P zNp|mBK<%My%_$u^=(QD};+NCwmtkPs>_^At4<0DAddK*u>G6B`=6mw&*d%}3w7MvYeAbhISNEb%^;+21JVKA-dP?TQP%dH$G*Ph5)SrE-|u8i~@ca=1BPntx8YDzw(| z=AOspam{uz@`o;gz0@G=`Z1TXHiTnhX9WZUG+;JPil%*t$IVfyc%)p&PU)|L^uYiY zUDANBJ+W9kDvk@RrAW)HhN4GZ70!0Bprl?M7=6{iX326qKPSP@XeF^_*=xyXb}b#q z38cBv?R-eP4Qhu*Qej5|vz{~@61!*O^;Lf^^THmQnz4M%pbuQazZzbp{o%S>2a5BO zQ1QwiVk5Fhe8E|MYQjQ(M(n4cZnQW4aBozNUx}@EbkSp7Dt!O_s^HC;MNnI}6?V?M z1s|&lu%_V~%YWKIw-yA6IDj*J*T!y2>t016haPbMLlN}CU>`1ebs!+Pn=hI$8v`%R z;nv5qaP;U!!QP+Zm^{Un9EJu_oRfg5o=XO6NfkJ2%JLsNr}@uTOUNEQ&b=*nv4D-a zC^a6}n48tZ&3bG3ne@XnDQp%i$t$HW$sqoyx197O+A!jYh7hW5`CetJ-Y zlu3zjQPhXkt_+-AzLZV(8iBfLY4ClQ#rpQFpk=L6Ot_{WKKLn-riCNYw{$VVycD{m zR7d9qh>_O&T9_ZwN5H#bc@i`S{O1=kT9l*u4yZgRR0j&-G#;!rJzGHe$Cv;D3E?X3Z(-dBbngdB+Sb3 z1FtY+sTH9@!SrS1P}@nl2U^L>R*elQucy^+aX6_g&lj9%N7dCrk{n_}9nP^ZVu{ox z6DrDu38=1~iV2S1%~NxQNDsI4#tx5RyA67y@I7Wl_Jfg7L&##FnhBwuAp~;Z*t^x zIF0eHz8xZdHJPToyvmP$+C%fi<00!QB7{Z7V0JP&nJbfNp1j5S{EsF@hHR0tLCJ0tC=L4PN6N4ky8I_ zJOW$Eae59(9Sf$P%BiHaY%FTmrXuHD8Gc%H;Ktzq-ZseyN^e%;Y>FrKbMQyk=T08% zKv*i9g=4pWaxw1`wu2v|rM8p#z8lA(@^&Ru7Z0UjJud{dp#^;D=zGlh{7l5(d&KVD z$fkY~>#(`ljCOklGr{E|o7ai@a8sWhg{r|gm_;R$mud>z@xG0u7B*95Kt9=j4&?7Qd%^te5ZWpl z%RT`DGLTzIkDS~2^?(ut|4pQ{p2z%6a3rnpNFlSsdzn(lCdhnR%{E>=h|QfNp?@x# zc=uOck|3%b4*TLfmlujXn*+@PZ$W`Y3?60{LMXO{e<>B!B7+0z^z`cj>B4?^)wZ1m zEc7Dts$arO^K&W2`a1uX5rv0WQlP6DOuj!J^Hmv!RF)@48i&##d$ATl+HQ1%*`Rf5 zA$Pu>j7^I@p#51$wju4fDg(+3X`x;Is@VET3bPK@qxsEnWGfWVlZp&dN z9>)XnOv&}^aQLYz>aB0Xr%#G#XEt;$U=eO?@`Aly85MkW!6SDm=!m%0 zmfK5Vh7!zNbpqeoKl9)>9-n(&h%?+g2|uwf2w-Rz3TuRMh>w#(yrZ6t+yPvR>3w&7?} z8(lG;$fdT4@sO$ULTO8Xa@gX}l;?^1<3Ih#t4$GW_L#y~+=PNn7jhf>1pXx^4Na>a zu*1SAE_N{(PrEj;;o^#zpiz&dxlfp>k|LF@yvs`hec_*&B&aZ1%g-&p&fV-&5q(>L zAGIWuUHXMD=qrcCW9o5Xw6^Y^`_b4xCj`akTwo^B0)&Pfrw6%*&@Q%!v=(3Cdq`y##_!+9PstF6L&Y^GO%_vt#mUT&LxoP)oI(Qm<`*wcA!G zo-X9F-uw9OjonPWwGKj$T%Pw%Q}EHv23a@kscwe^Rn2QbzQ-zha6O4u?Q-Vb+N#X1 z`4H{ue}!MTH<NB!4x&)Sl0bn^fotDbRC04C^an z@yBQ_8{@g3$?Ig$y5tIy(OoGRV<(3XskQ7%k$_e&3c<~X9b_^*f$|deQXfer>{&R0 zltjGu&A%V{Jcsqn^mrmNGm_aAmq2{3^j%n-dIBpfP75~PpF~4{$>Ge_0D*aftFY)$ z2i7k?&L6y~qJrB!yg>FCWoqo9Xuog#*X9EPr9-31Y>PO{IeeOnEm;Z4ilewB;@>`* zHlXN2Gme%@lDJwr3@`daM$wPk^6zt=lBQCS!-G10GGZ}@mBxs1wMCebL z4E4CzY|=7M3N9anj6f$ox~+x2&fNw@`$=Tdlt9W4&hl{=g1Ozze42T5E&JRxl`L+y zki~=oD$H-y-J{))70a}9l@;lj(A@>uZ>7w*wuU*a%jQ=V3V2s}yI{h5b+%DcikS}# zAj?{D%zt@@7fY)Mr>@yb-k$Y*L;of+T)CcgsAR*+T$+1c)uAgdv*531K*zV_l3=qv zCQeRa_x9gn^gA4j<4o8xXC=D%^EP{=uSqV)HE`WvD)j$r44yQlV=p54kSXyL8{CFI zBgFCefD{=Tb@B=M2QXpod~&SP#A*p;w&3a*a?kPxOTHwG_wr`xs#cJ?*A1TuX0%Pf z`6lI$!a?ofwD3q0Q*CUa8*fDMo->KYJQ009S~mp=22D)&Q=aY_wd0haSpoIna-KQk z7|pgdLb%>Sw)IE``R^>C${7(D}gYGhcPBgqE6Z#eJD% z-MAnjHq?D-v>aQDBMMpc+?vR`E1RNP<4Du&51ZG|7e_^FCcnfHw5Q+^dm1OI2Xej8 zo1y@(QU2WTKn&adL?Ge)azxBT`DoF-wBky z$iWQlCidH`jQm|1XyFNSrnE@JhosdBOGAWIx;K-qY*0jVRD!U*?`gU-TExZQSV<0Q zCty4Is_@dZ9mp#>L2@_p+2YH-psY+<Tc-{c5Wuynoyib4H5MxN>N)?%hZOi!_LhmNT2E}9KXZ{Z-xe9 zbM!a{Y>UUy)>;M!S4@1HBT0UlyC9=h8nc~k9WLs!c{-6r}cU2{|zVo87(;J~6 zS|=wbJ9KicAjWS?1&Q&c3)jqY7UB0tB1*;O6NWc(qZB zTGJ%Sy*Ey1aHEEA@t#4(A}%`Q&uC~@HPVaw!}-1QhTP?2G#Vs62#ky65w^d9mNsjU z+`xWt@JfVUn=iHQcjxtLO*C80tuaJv5r*sgz)dlT&LA4v4>KrF&Va^SwQ#Ycv+yo0 zhJJ_z;K8Q-obH!VTSqLfd%YLG%o}NNri$)`8Ao{gNfn&yyvz2Eu0hwNZh9+Qd!lsy~I9sXpkVXpScvj-bNuC{NupoH`?NAoq*W>exww z)~GDF+XvI0FY=I-ZzG>PD>6H;$38VQV6m{8o+nS`GT{}pA*O(-UFfFN#1$~h%f}TD z!li~b%5`W&KL_ljH{n-jy!WBzV>EOrJDtQQir+ zyRBq5^NE11n89}}O(K{1BZamyn=xHFlAIN`Aai&#UnlK?HzF;A-WpE`*0)f`!B_m6 zPjsX5t7f*W>?uD`?6h#^#wL=OC4jVpDfI-8#uHt6Y?xk%%5&?;Z}3+BAR~#2Tl&)S z=O);qZX-AvSV}Sjt+D*?ecq~DNaMbG^KlKcVC}MkjyLtk`3W=dXQ*M2?;AE)L|L-L+7k#;4VKOU~e~!)5#Q<$BA;(d%Mq)GZy1of|FL;7BEw+^o%1=k?)Y%e}bgh*3P% zP)ON@eOUZAPuS>#Cu~^88)rguBJ0Er!GO9kG^C`4=ASphF#S{}XE%(@H=W_uhi#yz zbcFhsy<|f~bLEngLReqTrOJ=lRBR`}7^fA&;VxBd(UFZ<+9EErur1@iZ}@|b4yVDP z$DsLgD6T|Ipi@_;^B42uuxQj%_OiNJFnP`ZS~lE_jDB=Nv%fd)r>%nf)&|xmZ#_QG z@?^g~J?Xmc0(4II79<%hq12mR{Jc^-xBWGiZYmzZlSwYnT|I?84sO>ywqh*pE~v!q znOe9W5Y2k)0}<&Giu$LOyt~UESMrhs9)tC8LA-?Wm^}V=o@S*>RQRLn4x!|XX|zp$ zIvQ+tV9n$ZdY}4QSgV+ciK+t9nem(nhihX$E#Vy#H-H+#~#?yZ|b;U6=m+7OQocM0#l>+-Y@%3RXch37U;q|`_KNue}~5A1)FH5~Eb zQLzVvEp`uB#F0_-ZQ@+2c>RU#i!MVSl|=5Mwgfvfw+U1-)DiY%8GHRMp1AZ);j$0? z=)0&N_~O+J=5juhCMe9s2Dde6S!F~kjI!~5z(>KzSk_3t( zZyI^X0XF@7Xxbb*+F_N}bbUEQg2xl#GbfU^ z_U1tCl{p)@yO^F#c7OB~A+OsUGy53O=W-3U7OBV{aF4a=L~wz0Ec!hxB&Bpku*`Nct!N^LG6Am+ z&Y-}dR|PmDk5{c#@X_Z;eUr-r2MHl|_z?B@yki%Jm(sc~%W$!AA$8BJqZ1>dMEani zWNva0E;{Rl8!}dMJ5m41Z*T$53{}B_=k^#LG>LC|{h8mDPR8T;5-g?PIl;6g$z=F9 znXlU(%%kPX$!A6e4f(kTu~*$GTt5&Ob=2AEt1FPSU?N5BSwMU13z2-ko~Epxhn6u- zly)|pMu^6GztRvIpMHb}8BE|Ynq_P-bt5r(9xU8WA=5vRzqM0 z_-x%nuME-KQ^1UzC0J)yHQhg4f`0dZajoM)?3m$mF5_#Cqum&BVFjFO9O=^Jf+oU(qM8riNcc7c!DGtQo zVUlDPv6m$-3`FWASqdO`e4CYzLG?T6@VPEpuO(nVrWETA`1OK+er7Ik9e-SyBi9eY zhJ{on(qjKsB2rRbLyyNFq#?m)b$>6dqaz>9C_q^kMc;>zla9aOQmqG~6~t&Tg7}w3 z+sI8NnBtwI7#HP#`~1B)KHZ7^e4+}2wF!t4Pa>)Lx$u2l&bvi5XHRefzldcplJU8h8Jf zCfXBbJDb@)&3Z1S=fl-6o4__9Mv!w=gLO)X(VtOW2y{#k{F)j}!EfSNN6=`VQnQqT zo1*cjp)J) z9aQKxmF~>Qqa&F%aIynZLWUrVnBDO^ z!Vjy&aQ4w$*k0A6)O8YMFj)uIzh5(nkaRv+s7Qycis^^VUb?cRNGRR6j`D9Q&>tB= z;|~!pp=NiI-kxk@x__m(c&!+ftPR2b)iS!85u!RUbTdzq9YoEBbx3{NCCC_4$}Y5o zKtu#-%8*It+sI5X2Y_FJJI2bOQ}K6ALnI~ zu`Nr1dRt@JBM%L_D|tot{>eyu{boXkY`oc3nE?XRh!h%SDM`L*3;2jNPXyaiMEi7H z9}749Xu?OW5)62BMR;IwExl9~VvGJ?VQ<+pc0_ob;;W_U^!jmB7n>#0m`YQ8MG&o= zT_}(r^prm_*-he)4|0{PEbcd86|vw2jXlH6V6mxJaHr2E=Bqw{{0t3+&ViZyPoxsY zWR4}P;v#l+#U`eAtBjc{PbGEhO6GlBna(Zx#*|tEd91-iN*3uK4%ofmH+B!9alN^? zd|8{;R7c|UVi#zh-^j0MY`_IkPW~aRgLFh5{kZ9asm4is%xEo25XINA9tXM;RRn{= zYHnDP!Y*y|L-4&Swr)i;5_lsiUmi_Dt}F~}QlVo-RlFww{DQGP>k{n^lkH4G}|DEoVP2mM{zqSF6J<9D|QL) z+}=*Ff0t9_w$H+?wKjMiTZqH?%LFb)F=SQY0Z*S9{NeB-+*>gnat>-4Fj1955ZjS6pJ> zQb%H@&mnGXTT2dd3()s^ExW&rzu&TeI^-6U!wdy}mz5xX@RMHpi< zANzujz$)(}n_j*i73T~|aa=LY-rYtE!#2_WrSUwkvQm(090b#ZQkqp8guS`>?3v3h z{GKY(_a8}PZs%Vynf5PSXMPdX&l~WiO-FF!*9e?R-ABHM3-NLNQ-LQ*;*;5Y7#`}x zrIeX4>)wao>u+%vvl8KA*+jhhJ&{Zl)sUL+$~$AjsK0fba7>R2Rx9a}#b-UfCPxJ) zZh15N(|&@<9tyao9Km*f`@~g8+-j8AQV7p^A6dWeqlH#=$M9l{JxyD+9)VJ+eC?ws zJ}oJo-&0)A4LhxPkE|}6AwLD_6_!luqb>jC?t;?unXLQBYz$a?i7(g9WF|77_{IVk zQC!<`*`0?N2HxaRT?d8s@8@BeXdSLE(ZSsA8$n^u5}mcMBNN?pu9n_Jy@>_%I6o8T z^^3_^dQBb2q zNUKoXD4AzXk6_CNY4Cu8o%Aa=kA54Kh_oN-xRxG)1N*-7$8U^rc}g*5jgu4YZyiOO zH@8A%TpO>sqC&IZIYU15Bu#l^KtCsl`lhSj^8F!!%zkStJ5nNOj7SP)G4}@|VOfRX zmV~vie_9#4JbATn!|1*&x+#h$9-oUarLDSxC~x|6<_mxFvX`xvH=SeL16#6;lsL*)Z$!3 zKD)AZ2~G~rrP#O>HfwVunxFdNbwdhIwVgpjo~QD!R~8{VIR~lsLAdK*%>KmIlS2A4 z!SC=6VXt@;4vVz6W-51tCH=Y(dXLz7+CdYqyHm7UBF~HHVN2KO(7vuJQhF|nJE0`% z^R6WGlvti+dVy&hd(bltN7j1Ym&UG0rj%E5NTM!Uc0LzghB62mxrkZsuSc~~D%noz zLiZ9q?55*_jf2AJ&mTLX;n5&Ie=sLQjbv1KEhO6`_t@b{E7_O6u7ZfKG9n&Gff;*e z(Mz=;(*86QkAKDqZk}1z057Q| zGF%!@)-j^BplELOj8f5^|KJ#&*sp%A4ldpk3Cv~(B}64(cX85MUj2mnsWjZj$p)`0CS+vHYZF7 zMldJLIe`iyiewOwAQF@e5+ze-lT=X0oO3`O)99Em->Nt7eD~aX-3_SooBPK*-#4?@ zS$plZcNge%S69_w_&_7PeLN38i`Ox2|GhU@rZ2`u4O)^p!N*7#xS-Xt!|=ps0W@oV z7#ch7CyN$;C&nl9q0Q%TXkqHCxPLScCM5U7`?mJb=E{8h5S|L+RWumA))!}gn1QuA z4TkO~oUuuEA$C3639iS~As6)?(eG1+LSEB!yl}D=tTy~aGJD$Mn63Rt(193QMf@K4 zlEDq(pvNHWS3MA?&p80=t_sBBoGu=(+W`(Qcc3l{^l|8k6Y#+N9NllZ8_&T^7}VET z(bBm+Tx=QwCyzFSlOu{CS@$Tqe|b+VTK2=D(G#hTW{hEiz9E|TF@rUyhvD<72GHS(r=mTiGErT{(kojJrOcP@#Mx6Qqg1* zzIggU(D@n&UB@<}hpSoVi{eBtX#SOj7&tLHMI}1ToBC57s@kNb2<2&_ewF^!2v>_+2{&3Xd0L9d9=cCby`B zpN_Tz-J9*Ot3x=(>Ga0*r5(}LzAt$DhL6feNDCf^K`&$$Z|6K#i`cnwmTD>8F}dPir}h$*GS)w>QByVgpvS z+koe1SEwQ0Ka6V6`Q6fTAC!k4Xe!LI29%pM$vn^xZ-w-2_#Pdla(uZ=^|wZ0ME zRev-6&CwKAgjmzb3qR7`9ah4q)jqhOYE{Uswi=!^IzR%(b;6a?mcZ^s`-HKd!=Rsg zCM2!7OYgKFNb0qXf^Xve(9YAB;3@6Kxc$!u^qL}+Hp-7cWBtODYzyy8Cf>VCtdy3W=d}}$bsOdls)@e)9rbm&0 zue0!(H$+>eZUrY@yQ+hg<<6GD=l=$*2R_yaqxH`!u02B@$u`2g734r&?<5> zo^fjeOJ3B1F;B$n$%I1ucp;f=yJiJDzAu2H^3 zu!o_>GvUBMCm@ESD9yS@n(QTbK+_#|J_v-b@7)zWb+SOK$8gvXbY6IxAdsW`1aPcq z3v0#=z}Mk%B(B>Sc-}fe(LE&}k|v!cTAwCB@$AF2eWrLHm*HBtwLJiWl171E=c{y% z`22>>)lzWZ)2g(scrCf5h6TL27Dm6+%fpLnDK_h#39*VIZ0BtcvjRU5)ZK>@Ln>jv zl2FiE5f1n^oAw!hg1+h#1Z!Y7Ii7fu*zDX6#nUqBu!|$b_ZkH-Y{w{Y9MFMOIz9|e zMK2(SeQU$=2VJ1WOJ^|fx=zxx!{O|v1EBNUUh*y05oR5lhn7o16#g}IVRef)9ZOeuD8S2Ir#-(!wspw}Zs z)b&Kz-0u_JCf=u5RlLt-{9#LQ`W&XvO0vTXo_@H{a3#4sEdYjJ4M4Mp)nI(ceoV9# zuip#l168aj8d+?``5n5DyJnl=@_a`c6xJO=qFUgTDQU33bFSiw-URXcJVy)+(1iLX zwlJ!v9rkuyO0^0SF}CLO?6nQXK)W-MIRC>q)Dk~8=epYAUJY%CiLr%NU5w#Nuht~< zMjsd~UQ0bE{0h-NS}Yje%m?qWRk87H3*6hz4bAe0f%m1$iV-D%T?|a|pnno99@9(w ze*Xh`{g(kQG83=Q8f^zdQXgmM>`#CxbO9Q!XiEDUoDlC_x1_mY2(el5hNgWt{9Pw|5S<^bDt#0gbZ9#<+>!w*|wV@hXzgG6f4Owxdz8RY8ca24<%+=#|~wpq6PYO!zhmFNybW4^e1mcMng5 zWw(7npLkKn9`5++q`W?RL#;Oy*HuNo;ntA;S8p&j3V@;W2hf5io5|Q7u4vOy2cIl9q>gb9N#N}X*nd$J zF}Kr)fk7I$vIEfmt-ccLx6c&Fe}I|eJ{QJh*mTv*h3Hh5e=inY3C6YW9b{o7va zaQ7+-u^N^zYFso1=S71-K?HU8ccDR-c0A2;(ic zk=J=kVCA~TSr5*4gdtDG>p*5Nz%Rqrf{Sf4Jb7RIzF4z5eK*2W=+&+V`P9M~!p6r@ z^P^eW3oSJvJPU$zwaA)|1NA6uV=oMpht$sZ$eK~Uubx`A!!hHh2Cv@00x}e zogJ4*$>VnM_{?lQ#EhOntmc~I?*3!Il{Uo@ZI(j==T6Wew;3rKDt>kjFT`^P{BS{c z4V>r~1qUs{vFZ9i>~q%>;-U{3HfvczBWG2G(D}YzBU4I{4-MAr3NJQb`mzc zA1eNR*Am+%_!4s`M+{ij6{pM$1s|_MJoQ^9dQ`fdy`#=cs_2>qAMc6x0aQs7&#?sK zJMsO0QE^kO*wq&2o+^azL#eP%h{E$4)@0s|=~%hyRGMOUjC2s+i)M8f-#=XYNbTo` z5Urx?3caJx}B)01@F7f;w`aEuHevI;u9ilaA%Tp@5c0WOf}L8HQ!EBuRt;KoKDX#Hnh zq$$4GbJQxVx%@5J9KI2jp4kDzChP=FA441&Sr@EkcN4DbH^Gd&!(bWG0g6}drmh;j z&|^zF^`5d*al4`TJ2cxDq8s+X2M#{)c8V1&3+}3zJ#`b!)~f+iJcdHIAbY4WWj-;# z;R9cQuq+j|=V6rJe9)?$ zO>Q)@hqx=zaKmIE9yt<=59{nxZ2iMek#aNwEzj?PCPQio+x^7vd;hQj^McFr1FL=903+;O!qK#}9LVMk{ zP*XTTbhg{U;5A9GbzCfUw;2J#_`dY$IcIohb0~Xc2YXU9U^koz3MTVI=E9fzJ7HOm z-Nem&BJoW_sM3E8C{h!Il%4L_(K#Rc9`uCyj{~@$ZTqdn%)d7bZ>rv+79}SQW(4* zIGYx%yhoDuR3f3(wQ2gr18_QY1l;M?4j#_-fIo|K;r8SYidh-rHHfA|;Qr+8(4fST z+??hCtHtv`?(glP&KpliT;~fn8VMx))fY1JojsbH1(7`Qp5l~So)A8>H_UJkg<)<6 za8rCPTr;`CcaU?&MDL-lUW5cjDpvFh8oOOp8Z%o><;GzJ%GZ-f~d4&d8pC(NngO{-Pz z4G!beq29|R^!{Lt`6<<4Z$<(J)fex{Pnisj`|N|Gc~Ov!;`@#Zd-2Hb0L83+#$Z1> z6)InQMxAo1!@56WA>#ERd{CH7h8TN6Mo2BR6Q7%5TrdLttDd60bsb?-_k8rpD5l$; zT_E*OdzeGkgO+0>=oOF!r`B{My=o@lYmE%N*TM(I@jH|QMg1}8Up zVYGK9n3-=SQMGev?xp}d?ivl&A6DU#i~^sts;H&>Y?I6viP}c4G2qm(W?ewP_4mo!>6?elOq2OB&CNRtp76!ed3nF zytj*}B1D_EGVOpm9p*!ut8O^mu036{qYD(ft`h>rRe*^T_AAEC&Ja*@Az03Ff=-i|{mhqQTARIKUVR8g1iE1ATMAW*8{_Jh)^u8uDU7(R3m(hF zYvC0e;jZ`|to3_6VQfqUBp)3|Z(l5-Gx9Xyn#XM1EB^N_`O`pndm{07FuBqVO%^fBVi_D%-5(rQGc!7zvD}8UDj$4|Tz(9M2plg|p^#&K=Tk%@R!uRdb z{I7PT2IOLp-b3;G%wdo^cpknzk_~M(MWX4XT$0(>4NmuJ1!nz9$nz*C>})n3JWSru zwyj6wiYMXdHTE;vSlyE*Bt@cbei*Idz6(PA3Sq*2Jv8dK7&cmjD|RQ&fm|UK{o@fQ zz04!d@bXF7}$TF@g}2KfDMB&@XeCh=}3;C$qADDIj`R=?OK9H_M!b|-W~ z&5u3NuzCt!3Tgzi&W*>AjD_G$&XUQq^YM5!1G=PlFC^mgJDzA|;?1i~Q9IWcJ1tE> zz3L0V>U9omeS8cT4Btn7a}9#j^ub_!&l4Lz&8HJJz3AuNU8vs5G#pcq3-3lLh<*PQ zL>B?9XF4c)RQDE!zxRci&3_kKTkXeMkbwzS4aBR3CPPHs?x0Ah1vBXVJ z=-wBF)ZY0OdE&PaA2$DsHm$iI?MC*6dbJ9nn{Ni3%QAv@m4-s}Wjm;QkYI)MkhgmD~He|X{o{? z^quhK{%V|(-cfiqJQCfmwFH;d2f-=Qk8E}FgS;UYuqXKz(K_r6tHgWWs)mon4S#8X z#WEXuzQHp1o-iDmtyx3o-)#?pT?d1MPGty7tPhS!J7KF&Rk*l55Dv6k1+RHF`(!ffc>r%;`OCD;ytDb zMEg~LTv5=Tl$`K}3J;IN@P!_9{R;6rOwR@A*CYZiRC`H6W^EUrhcp|$h>z4d;@$-H zPX&^!lOv&{eFARUTm>R3#6ruYO0ca>UD)L72jkYdQlscB_$FRA-#~oC#+4;A6ld0k z!8dO^;_|vNTr`Lm45lw9#gK_-(ge^7c7@(2(%?~4IzD@v3vu;q;b^UVqC3wH&D-^d z;FkzV;XSd_95aY)9*kDCLNQLy8~g+Zs53g1p8cRt2eo}q^(^gB_iDCyJ%;!^nbxty zZ*e%*-kbsJ=gomf;`x_OVXfiHkfrGJdL`Vymjzyl?lAV`8uTy^hT}#NiVIU(Kzf~% zu=l$OjM}Cv95FJ2owxka^yvs#XVeI44Df$mzD)qaJ7bZGyTB*%Q4WH?g#~T1vr0fCEO4=gmfL2V|eMgIrPhN2a~%iVavzNV4SEfoN}MdxAU&i2GrcRo`sl%M%V<9w)OJO~iv=(va5x&dG=`DGY{97V zH~QsVrg$CiLC|Rw3)Vj3bB_jzpZN{EaYgOz81p5VxJ)|%uW1IhvU))JF6o5dcjrUz zp1%nhZ<6qC@+0zkQ!Z$hEW<9Xdeb$gPS9#?F08FQ37R)_#lLnn#HR-r!IIEuoO@P$ zhTeRAQf*QKPWBY<1Kd(f?j6^Gz#SPlCFum&(z#I4&h-$knRx=-i{}uH5j*f@i*@MX zmy1J+lc8koap-noA%rfZbfsA>Y@KKT9?w(px8r{)W>xAA-Yae3*s=SkA{8!VU7!co`aqVecs`QYqK$Yx>Uiz1 zB=cDbjd01OTkm+_C-Hnt-#>z>$w*)DpS=T`dPIxw!r9 z%V4`}APp^v#~P6#cwy56!`tmb@X&8@s8y5>lW%Uqo&zlvdP`?OOx!4Zd@vPHI*8vF zJv2eRH(zL7Z-6aP_Hf9|82X-#!LIMpaFAsr9XcZpJI$~mjUy82DSZ!o_F@;gzRr{U zZeartNkdF<*aKC8gJW zy)=5G{s$|(kT8wpE1H5OiKMMEUeohOvJLxPbb_JFw}AH$U#!!|67KqcQy5*h1AXx` z#n{wV@aDrl@YEP(Ulc%)r}&xpX(4rXivv^f zyT$p_m*MKb6J*c15?U#zA+2WP01NZ%NLKVd8tbr~RH;5Vd*Zy^w9W=o;?lPjjMdZ! z)0SSaWNI-vHM1(6ZRUt=qH+}ryiF7hsRJo4ZjaXoohQc2x8voD2eJCwL$K=53Cv#6 z0hjEEg-lqFz0&)UdG*EXLDxsZnr(?fvt}tc#faj{q~8rkow1>YQ8C!BWe+M09YfdU zNQ#t_i~P zhYEdzr7-elJqWapC!@2h!MM|Ydh)|w__OMBvh$h-G<)2fZXOT~XWvEA+beD90*(8M z5~Cc5dT2#EJ=zQRjuztJ{b8^{^EeFnR9`%gmx2q!1emyYG+lqy2|t_e!(AdpZ->7KDUpM-1E69t9M zd6H68ndDErPp|7wfYk?|XP;`{5C>k|4|kH5fOq??xIaL={(Hn(;@@r(H0>bXk4tUP zOS1_qunoaKa~q@GBtOOF8biTi?`1k|`U3K9=6tZJuLG&#ZKNv(GyosF>Y%qY5=Pby zfo_}nh@Oeq>U$v>-OvqkG7`Zsd@U^09|w)2H{7h=t@WEk6RH(sh(1f83S-=mfc7oS}gi6*8C zV1Ad5@MrS_SgciyiCALewn#?I$bVM|;c2)Qnv>#xy= z9QV(Mi)+6VkHfhTnr?%WEv=yWK0ERID0>vPBQffZaQI*vsoMMzneUTSylFBSpBRVYHSu294Uc`rXSGMeC2u>N5_u3Yr~qznSdXh_yJ9DcV^F)N z23#7y9na*u5Yw5t*w%WFFw3$B^u2c!rv=2(1?LW6$e*_2J%MKM#YlYC*60veM%TlP z{^GTe;0BL^_d$!gTVVT#KNOxpkAx?i{e?%z_u#%KTfsLn7Sr=CD>lCNgYjWs$)kiN zr09JfZgm?27rI8n81cC?Mz3|Snx6p-^Yez{y~(&$yr#9)iv)Zh=Zp)FCqm2Hqj7N; zYjkXvhglT>i^cbO4n=-&s!cYWx|U3yET2qA`&LQUXfA%%+bTZSPwNQm6+gcn8)t^| zzSn|rXXBt3WWma>6dvU`V||+})E1vV*Br!iyWhlT0uOfs>72)d=q`BXo%qa^ur`p_ zb0>I>OM$Vjy+B{QF7IJ|@%>ztP<(&GADkXe!`4A|cwfAJU1L)!oQ>ZG_fL7jsznhP zrjw6rKD(i!W}xBI5&@d{^hSRJVNeBeegD5p!fZ*DD$H`~BA-)*mHA7Yq?i7;qVJxnvK_&SEH=9 zEb~Zxq$Sn=jsEAKC*GG~re`%j%k#&Od6g%snuqt0G|Kw0T6M>YuVs44yS!McQ8j}y zUedFrsu#;kQJseQtJ0|AC0}K=63>=Yt4bsH<@Zv(e9wET%duKz8lID5`JVM-e*9j} z@wIXt+cOP|jF+NxjCfuZW;IftUlK3v`N(V4@l3}e^WkfFP94v3lAbN*%c8_9!;DiE z`8rh^RlKTMnEq$HnFou!7mrz<{dr(}reRUeOM5<77R)@#QY)W_)yjV-8L!GynoC-| z?pI({o{X2Gw3N-o_#c_0Jhk#;$ZDl1E%|Kf=3#odhUa5l3hzh_vA(qi5W{~uBbuS}GY_U=k@38z9P^seu;eQ(-b2~XRPOV4)qnpS_&lFu-h7^6bva3+EHBNb zjAxt_rNw&uNaX8zEcYsFPMuEf`J?;t_9(BOpNp^MF|XmVT*LM(N?K)@=Xk##!OFb& z-*bKjInVm>9z16G(qegQO4Fv_!bs$zMx%;QIDq*_+Te#ShPh&%6jn{DK9Oi zXHgl)a{qK>y;cd)`A8W^>B(>gH0Hllv&!|Hz!me10{jY%W#DM^*11IX24rs`4)j z&(F+z^H`NnSv0C@%ficNQ`L|0ESAOlXEpq6KkM~R*7L_i!qVdV{cm_3(hTys_!)l& zm*&gAejc+rp5rk;lQN#w^Ezd?ytQ(@ypMb@^<uu|BL9+w)P5rTwp1 zd{0$=l15sp@)FPIRbjrK*Zd5xuD3E@RegDFS+FV}#k{@>*I*YPXo;C)r$U)fKU7vEczhVkXmvKnRaM}NO0uky}NUcIt;%gX&H zd;QEYWUoi^_!TqDXW{oeW__i|^ZX2|Fkh<-^M1Nu&E)w3w+CCtwwaX+%;p3FlP z&+_sp>G>Kq8;g>j$MX8p_I#}>tja^KXL}aqb*gx|t~`6mlN}!(^B(2xt4h!FJm!0; z!jhh!lh>8ki?97vSk-Jl>i(ya-5BXkQEz2pJLGHtASlvGr`QEHA&q-Jr|DTvgSr2|TDbHr(wQ`M=lQ^E|F<;AL zUZbvF*$i?{zL)wa<5cyR>-ky<%je^LSgpK<@l4C-8TTt9e;j!~W&Md&GXAHJTWS4N+pXJSCp@;(vB*PC2K{OO=M_S)J5R8PDe#W>L~hi|twDIaQcxsBt7RHKN_x2`zgLBM zk6(pV&Cc`uJin^%&-n1ONj~zu+*_KTY1F+IrYVc3q-A;*<#pULr5-->Cd1X0e9ACp@UdymN z@_IHW)9^V7tK#Ll()P@UMW&bbYzD^3qr8UYmGR0rrf0D<4a-Y&NsH}Sy%Z&$?U{z> zSdQ09K1?I`ERE;=7-ltWUzW)C%nW4Ajf>qTqUW)2wV>+o9kNG{%^H|cbrHW^HDJs)QvoW5ZlgGS=GR*2#_26sN zX{8+dr>r$hr_7h Date: Wed, 30 Aug 2023 11:31:12 -0400 Subject: [PATCH 2/5] Fix list formatting. --- sphinx-doc/howto/custom-md-potential.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sphinx-doc/howto/custom-md-potential.rst b/sphinx-doc/howto/custom-md-potential.rst index 5c38728ffc..de2d3b754b 100644 --- a/sphinx-doc/howto/custom-md-potential.rst +++ b/sphinx-doc/howto/custom-md-potential.rst @@ -8,15 +8,13 @@ There several different methods to apply arbitrary forces on particles in MD: 1. Implement a C++ :doc:`component <../components>` that evaluates the force/potential. Use one of the `example plugins`_ as a reference. - -1. Use the appropriate tabulated potential: +2. Use the appropriate tabulated potential: * `hoomd.md.pair.Table` * `hoomd.md.bond.Table` * `hoomd.md.angle.Table` * `hoomd.md.dihedral.Table` - -1. Implement a Python subclass `hoomd.md.force.Custom` that evaluates the force/potential. +3. Implement a Python subclass `hoomd.md.force.Custom` that evaluates the force/potential. C++ components provide the highest performance, accuracy, and flexibility. Tabulated potentials provide moderate performance and accuracy is limited by the interpolation. The performance and From 2e33beeb05de85e28cb84eb1cef18efe46248774 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 30 Aug 2023 11:36:50 -0400 Subject: [PATCH 3/5] Run pre-commit. --- setup.cfg | 4 +++- .../howto/determine-the-most-efficient-device.py | 15 ++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/setup.cfg b/setup.cfg index 43197363be..1f49224dfb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,9 +25,11 @@ ignore = # do not require docstrings in unit test files # F401 ignore unused imports in __init__.py files (these are for users) # D214 ignore overindented sections in Trigger - this is Google napoleon formatting +# N816 ignore mixed case kT variables +# D10* howto guides do not need docstrings per-file-ignores = */pytest/*.py:D100,D101,D102,D103,D104,D105,D106 - sphinx-doc/howto/*.py:D100,D101,D102,D103,D104,D105,D106 + sphinx-doc/howto/*.py:D100,D101,D102,D103,D104,D105,D106,N816 */__init__.py: F401 hoomd/version.py: F401 hoomd/trigger.py: D214 diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.py b/sphinx-doc/howto/determine-the-most-efficient-device.py index 9eec9e92a0..0bb8277b04 100644 --- a/sphinx-doc/howto/determine-the-most-efficient-device.py +++ b/sphinx-doc/howto/determine-the-most-efficient-device.py @@ -14,19 +14,24 @@ device = getattr(hoomd.device, args.device)() simulation = hoomd.Simulation(device=device, seed=1) simulation.create_state_from_gsd(filename='spheres.gsd') -simulation.state.replicate(nx=args.replicate, ny=args.replicate, nz=args.replicate,) +simulation.state.replicate( + nx=args.replicate, + ny=args.replicate, + nz=args.replicate, +) simulation.state.thermalize_particle_momenta(filter=hoomd.filter.All(), kT=kT) cell = hoomd.md.nlist.Cell(buffer=0.2) lj = hoomd.md.pair.LJ(nlist=cell) lj.params[('A', 'A')] = dict(sigma=1, epsilon=1) -lj.r_cut[('A', 'A')] = 2**(1/6) +lj.r_cut[('A', 'A')] = 2**(1 / 6) constant_volume = hoomd.md.methods.ConstantVolume( - filter=hoomd.filter.All(), - thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT)) + filter=hoomd.filter.All(), + thermostat=hoomd.md.methods.thermostats.Bussi(kT=kT)) -simulation.operations.integrator = hoomd.md.Integrator(dt=0.001, methods=[constant_volume], forces=[lj]) +simulation.operations.integrator = hoomd.md.Integrator( + dt=0.001, methods=[constant_volume], forces=[lj]) # Wait until GPU kernel parameter autotuning is complete. if args.device == 'GPU': From b1e784f933ef2a819f900a4de9c5dfdf57fc9301 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 31 Aug 2023 11:41:05 -0400 Subject: [PATCH 4/5] Add "How to determine the most efficient device." --- .../determine-the-most-efficient-device.ipynb | 136 +++ .../determine-the-most-efficient-device.py | 5 +- .../determine-the-most-efficient-device.rst | 137 +++ sphinx-doc/howto/wca-efficiency-131072.svg | 846 +++++++++++++++ sphinx-doc/howto/wca-efficiency-2048.svg | 976 ++++++++++++++++++ 5 files changed, 2099 insertions(+), 1 deletion(-) create mode 100644 sphinx-doc/figures/determine-the-most-efficient-device.ipynb create mode 100644 sphinx-doc/howto/wca-efficiency-131072.svg create mode 100644 sphinx-doc/howto/wca-efficiency-2048.svg diff --git a/sphinx-doc/figures/determine-the-most-efficient-device.ipynb b/sphinx-doc/figures/determine-the-most-efficient-device.ipynb new file mode 100644 index 0000000000..ac411c7e3c --- /dev/null +++ b/sphinx-doc/figures/determine-the-most-efficient-device.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "eeeb56f5-3a6b-4f6e-8873-ef30b0f7fa66", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "import numpy\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68b9ef5f-bec7-4eaf-aeba-ced28a10f44d", + "metadata": {}, + "outputs": [], + "source": [ + "matplotlib.style.use('ggplot')\n", + "matplotlib.rcParams.update({'font.size': 12})\n", + "matplotlib.rcParams.update({'xtick.labelsize': 'x-large'})\n", + "matplotlib.rcParams.update({'xtick.major.size': '0'})\n", + "matplotlib.rcParams.update({'ytick.labelsize': 'x-large'})\n", + "matplotlib.rcParams.update({'ytick.major.size': '0'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b9b1d3d-1b22-4535-bcd0-1f2558ceeb9c", + "metadata": {}, + "outputs": [], + "source": [ + "CPU_P = [1, 2, 4, 8, 16, 32, 64]\n", + "CPU_TPS = [2699, 4868, 8043, 12585, 18168, 22394, 25031]\n", + "GPU_TPS = 15955\n", + "CPU_eta = [CPU_TPS[i] / (CPU_TPS[0] * CPU_P[i]) for i in range(len(CPU_TPS))]\n", + "\n", + "fig = matplotlib.figure.Figure(figsize=(7, 4.32624056*2), dpi=100)\n", + "ax = fig.add_subplot(2, 1, 1)\n", + "ax.plot(CPU_P, CPU_TPS, 's', color='C0', label='CPU')\n", + "ax.hlines(y=GPU_TPS, xmin=1, xmax=64, color='C1', label='GPU')\n", + "ax.set_xlabel('P')\n", + "ax.set_ylabel('TPS')\n", + "ax.legend()\n", + "\n", + "ax = fig.add_subplot(2, 1, 2)\n", + "ax.plot(CPU_P, CPU_eta, 's', color='C0')\n", + "ax.hlines(y=GPU_TPS / (CPU_TPS[0] * 64), xmin=1, xmax=64, color='C1')\n", + "ax.set_xlabel('P')\n", + "ax.set_ylabel('$\\eta$')\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55211aa0-0eea-4522-b529-63111d25d007", + "metadata": {}, + "outputs": [], + "source": [ + "fig.savefig('../howto/wca-efficiency-2048.svg', bbox_inches='tight', facecolor=(1, 1, 1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b937ea0-8ca0-4803-ad29-b7b57a945d03", + "metadata": {}, + "outputs": [], + "source": [ + "CPU_P = [1, 2, 4, 8, 16, 32, 64, 128, 256]\n", + "CPU_TPS = [36.072, 61.988, 143.25, 281.35, 502.48, 910.58, 1451.5, 2216.1, 2706.8]\n", + "GPU_TPS = 7276.5\n", + "CPU_eta = [CPU_TPS[i] / (CPU_TPS[0] * CPU_P[i]) for i in range(len(CPU_TPS))]\n", + "\n", + "fig = matplotlib.figure.Figure(figsize=(7, 4.32624056*2), dpi=100)\n", + "ax = fig.add_subplot(2, 1, 1)\n", + "ax.plot(CPU_P, CPU_TPS, 's', color='C0', label='CPU')\n", + "ax.hlines(y=GPU_TPS, xmin=1, xmax=256, color='C1', label='GPU')\n", + "ax.set_xlabel('P')\n", + "ax.set_ylabel('TPS')\n", + "ax.legend()\n", + "\n", + "ax = fig.add_subplot(2, 1, 2)\n", + "ax.plot(CPU_P, CPU_eta, 's', color='C0')\n", + "ax.hlines(y=GPU_TPS / (CPU_TPS[0] * 64), xmin=1, xmax=256, color='C1')\n", + "ax.set_xlabel('P')\n", + "ax.set_ylabel('$\\eta$')\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c03bf456-f13e-43c0-9513-e31a31523443", + "metadata": {}, + "outputs": [], + "source": [ + "fig.savefig('../howto/wca-efficiency-131072.svg', bbox_inches='tight', facecolor=(1, 1, 1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7371b729-f9ee-481a-b3af-82b358ef55d2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.py b/sphinx-doc/howto/determine-the-most-efficient-device.py index 0bb8277b04..1d593a2eda 100644 --- a/sphinx-doc/howto/determine-the-most-efficient-device.py +++ b/sphinx-doc/howto/determine-the-most-efficient-device.py @@ -39,6 +39,9 @@ while not simulation.operations.is_tuning_complete: simulation.run(100) +# Warm up memory caches and pre-computed quantities. +simulation.run(args.steps) + # Run the benchmark and print the performance. simulation.run(args.steps) -device.notice(f'TPS: {simulation.tps}') +device.notice(f'TPS: {simulation.tps:0.5g}') diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.rst b/sphinx-doc/howto/determine-the-most-efficient-device.rst index 6424260001..825f4b286b 100644 --- a/sphinx-doc/howto/determine-the-most-efficient-device.rst +++ b/sphinx-doc/howto/determine-the-most-efficient-device.rst @@ -4,4 +4,141 @@ How to determine the most efficient device ========================================== +Execute benchmarks of your simulation on a variety of device configurations then compare the results +to determine which is the most efficient. Your simulation model, parameters, system size, and +available hardware all impact the resulting performance. When benchmarking, make sure that all GPU +kernels have completed autotuning and that the memory caches have been warmed up before measuring +performance. +For example: + +.. literalinclude:: determine-the-most-efficient-device.py + :language: python + +Example Results (N=2048) +------------------------ + +On AMD EPYC 7742 (PSC Bridges-2) and NVIDIA A100 (NCSA Delta), this script reports +(``$ mpirun -n $P python3 determine-the-most-efficient-device.py --device $PROCESSOR``): + +.. list-table:: + :header-rows: 1 + + * - Processor + - P + - TPS + * - CPU + - 1 + - 2699 + * - CPU + - 2 + - 4868 + * - CPU + - 4 + - 8043 + * - CPU + - 8 + - 12585 + * - CPU + - 16 + - 18168 + * - CPU + - 32 + - 22394 + * - CPU + - 64 + - 25031 + * - GPU + - 1 + - 15955 + +The optimal device selection depends on the metric. When the metric is wall clock time only, choose +the highest performance benchmark. When the metric is a cost, choose based on the efficiency of each +device configuration: + +.. math:: + + \eta = \frac{S}{S_\mathrm{ref}} \cdot \frac{C_\mathrm{ref}}{C} + +where :math:`\eta` is the efficiency, :math:`S` is the performance :math:`C` is the cost and the +:math:`\mathrm{ref}` subscript denotes the reference. + +One cost metric is compute time. Most HPC resources assign a cost by CPU core hours: + +.. math:: + + \frac{C_\mathrm{1\ CPU}}{C_\mathrm{P\ CPUs}} = \frac{1}{P} + +Some HPC resources may assign an effective cost to GPUs. When this is not the case, use the ratio of +available GPU hours to CPU core hours as a substitute. This example will assign a relative cost of + +.. math:: + + \frac{C_\mathrm{1\ CPU}}{C_\mathrm{1\ GPU}} = \frac{1}{64}. + +.. image:: wca-efficiency-2048.svg + :alt: Performance and efficiency of 2048 particle WCA simulations. + +With 2048 particles in this example, the CPU is always more efficient than the GPU and the CPU is +faster than the GPU when :math:`P \ge 16`. Therefore, the CPU is always the optimal choice. Choose a +number of ranks :math:`P` depending on project needs and budgets. Larger values of :math:`P` will +provide results with lower latency at the cost of more CPU core hours. In this example, :math:`P=8` +(:math:`\eta \sim 0.6`) is a middle ground providing a significant reduction in time to solution at +a moderate extra cost in CPU core hours. + +Example Results (N=131,072) +--------------------------- + +The results are very different with 131,072 particles +(``$ mpirun -n $P python3 determine-the-most-efficient-device.py --device $PROCESSOR --replicate=4``): + +.. list-table:: + :header-rows: 1 + + * - Processor + - P + - TPS + * - CPU + - 1 + - 36.072 + * - CPU + - 2 + - 61.988 + * - CPU + - 4 + - 143.25 + * - CPU + - 8 + - 281.35 + * - CPU + - 16 + - 502.48 + * - CPU + - 32 + - 910.58 + * - CPU + - 64 + - 1451.5 + * - CPU + - 128 + - 2216.1 + * - CPU + - 256 + - 2706.8 + * - GPU + - 1 + - 7276.5 + +.. image:: wca-efficiency-131072.svg + :alt: Performance and efficiency of 131,072 particle WCA simulations. + +At a this system size, the GPU is always both faster and more efficient than the CPU. + +Compare the two examples and notice that the TPS achieved by the GPU is only cut in half when the +system size is increased by a factor of 64. This signals that the smaller system size was not able +to utilize all the parallel processing units on the GPU. + +.. note:: + + Use trial moves per second (`hoomd.hpmc.integrate.HPMCIntegrator.mps`) as the performance + metric when benchmarking HPMC simulations. diff --git a/sphinx-doc/howto/wca-efficiency-131072.svg b/sphinx-doc/howto/wca-efficiency-131072.svg new file mode 100644 index 0000000000..5112bacef8 --- /dev/null +++ b/sphinx-doc/howto/wca-efficiency-131072.svg @@ -0,0 +1,846 @@ + + + + + + + + 2023-08-31T09:51:54.545612 + image/svg+xml + + + Matplotlib v3.7.2, https://matplotlib.orgdiff --git a/sphinx-doc/howto/wca-efficiency-2048.svg b/sphinx-doc/howto/wca-efficiency-2048.svg new file mode 100644 index 0000000000..14c42f957d --- /dev/null +++ b/sphinx-doc/howto/wca-efficiency-2048.svg @@ -0,0 +1,976 @@ + + + + + + + + 2023-08-31T09:50:49.016562 + image/svg+xml + + + Matplotlib v3.7.2, https://matplotlib.orgrom b7d2685658fd15e1111927521f4d0d769ab585b4 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 31 Aug 2023 15:37:20 -0400 Subject: [PATCH 5/5] Combine efficiency and cost equations. --- .../determine-the-most-efficient-device.rst | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/sphinx-doc/howto/determine-the-most-efficient-device.rst b/sphinx-doc/howto/determine-the-most-efficient-device.rst index 825f4b286b..b4d9c0a7bd 100644 --- a/sphinx-doc/howto/determine-the-most-efficient-device.rst +++ b/sphinx-doc/howto/determine-the-most-efficient-device.rst @@ -54,27 +54,22 @@ On AMD EPYC 7742 (PSC Bridges-2) and NVIDIA A100 (NCSA Delta), this script repor The optimal device selection depends on the metric. When the metric is wall clock time only, choose the highest performance benchmark. When the metric is a cost, choose based on the efficiency of each -device configuration: - -.. math:: - - \eta = \frac{S}{S_\mathrm{ref}} \cdot \frac{C_\mathrm{ref}}{C} - -where :math:`\eta` is the efficiency, :math:`S` is the performance :math:`C` is the cost and the -:math:`\mathrm{ref}` subscript denotes the reference. - -One cost metric is compute time. Most HPC resources assign a cost by CPU core hours: - -.. math:: - - \frac{C_\mathrm{1\ CPU}}{C_\mathrm{P\ CPUs}} = \frac{1}{P} +device configuration. +One cost metric is compute time. Most HPC resources assign a cost by CPU core hours. Some HPC resources may assign an effective cost to GPUs. When this is not the case, use the ratio of available GPU hours to CPU core hours as a substitute. This example will assign a relative cost of +1 GPU hour to 64 CPU core hours. The efficiency is: .. math:: - \frac{C_\mathrm{1\ CPU}}{C_\mathrm{1\ GPU}} = \frac{1}{64}. + \eta = + \begin{cases} + \frac{S_\mathrm{P\ CPUs}}{S_\mathrm{1\ CPU}} \cdot \frac{1}{P} & \mathrm{CPU} \\ + \frac{S_\mathrm{P\ GPUs}}{S_\mathrm{1\ CPU}} \cdot \frac{1}{64 P} & \mathrm{GPU} \\ + \end{cases} + +where :math:`S` is the relevant performance metric. .. image:: wca-efficiency-2048.svg :alt: Performance and efficiency of 2048 particle WCA simulations.