From a54c34dd1121d5c0096be0f30b7a9eec0393319d Mon Sep 17 00:00:00 2001 From: Timmy Date: Tue, 3 Nov 2015 15:55:46 -0600 Subject: [PATCH 01/19] helper messages from program options were not printed out --- src/tests/test-blas1.cpp | 5 +++++ src/tests/test-blas2.cpp | 5 +++++ src/tests/test-blas3.cpp | 5 +++++ src/tests/test-conversion.cpp | 5 +++++ src/tests/test-solvers.cpp | 5 +++++ 5 files changed, 25 insertions(+) diff --git a/src/tests/test-blas1.cpp b/src/tests/test-blas1.cpp index f5913d6..115c17c 100644 --- a/src/tests/test-blas1.cpp +++ b/src/tests/test-blas1.cpp @@ -640,6 +640,11 @@ int main (int argc, char* argv[]) try { po::store(parsed, vm); + if (vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } po::notify(vm); } catch (po::error& error) diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index f2a7c90..090e491 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -428,6 +428,11 @@ int main (int argc, char* argv[]) try { po::store(parsed, vm); + if (vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } po::notify(vm); } catch (po::error& error) diff --git a/src/tests/test-blas3.cpp b/src/tests/test-blas3.cpp index 1727d39..8c57ad9 100644 --- a/src/tests/test-blas3.cpp +++ b/src/tests/test-blas3.cpp @@ -679,6 +679,11 @@ int main (int argc, char* argv[]) try { po::store( parsed, vm ); + if (vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } po::notify( vm ); } catch( po::error& error ) diff --git a/src/tests/test-conversion.cpp b/src/tests/test-conversion.cpp index d10c518..2fce8fc 100644 --- a/src/tests/test-conversion.cpp +++ b/src/tests/test-conversion.cpp @@ -704,6 +704,11 @@ int main (int argc, char* argv[]) try { po::store( parsed, vm ); + if (vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } po::notify( vm ); } catch( po::error& error ) diff --git a/src/tests/test-solvers.cpp b/src/tests/test-solvers.cpp index 20dab6b..87063a7 100644 --- a/src/tests/test-solvers.cpp +++ b/src/tests/test-solvers.cpp @@ -288,6 +288,11 @@ int main (int argc, char* argv[]) try { po::store(parsed, vm); + if (vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } po::notify(vm); } catch (po::error& error) From f3f08790bad59c72899957c1c8caa47da91273e2 Mon Sep 17 00:00:00 2001 From: Kiran Date: Fri, 16 Oct 2015 21:57:43 +0530 Subject: [PATCH 02/19] Added performance graphs of Beta2 --- beta2graphs/CSR2COO.png | Bin 0 -> 37788 bytes beta2graphs/CSR2Dense.png | Bin 0 -> 21148 bytes beta2graphs/Coo2Csr.png | Bin 0 -> 29307 bytes beta2graphs/Dense2Csr.png | Bin 0 -> 37228 bytes beta2graphs/SpGemm.png | Bin 0 -> 41502 bytes beta2graphs/SpMdV_Double.png | Bin 0 -> 40044 bytes 6 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 beta2graphs/CSR2COO.png create mode 100644 beta2graphs/CSR2Dense.png create mode 100644 beta2graphs/Coo2Csr.png create mode 100644 beta2graphs/Dense2Csr.png create mode 100644 beta2graphs/SpGemm.png create mode 100644 beta2graphs/SpMdV_Double.png diff --git a/beta2graphs/CSR2COO.png b/beta2graphs/CSR2COO.png new file mode 100644 index 0000000000000000000000000000000000000000..1f5ba2a69d89de78ea087fae1f6007031a142fa2 GIT binary patch literal 37788 zcmeHw2UJtp+O}O00UZk>NJeMWanztFy~OS~2v|WO5D^iPPJqy(BA|nkQIx1O8B|nC zkRDnDga`owA_NFUh#?f|ge0W>J0J?S!MXQ-|GjHDYt7n*gCV;e9)&C6vo`fDf}Qcj)ezHti`! zoM$#0d=_)~?F4e#G|2+d|I>-~k4{gU7QAKu?j1+nOuKV-+@SpyeA@LV&P=1&dvOp_ixNv(sTdZ{iyuakMtv)_nhCX;u7H75o}^INI?r(Xd!yl zrG-1^tr{{{dn`!h(a_-E;1BIYr^97dN~2$GTN6_)`pKmVY?#5W(XSCsMiQgfc^AL? z(a?YTM=kLj1_eWa^4Lr%=u*d~7ed9J-FNkQR88!{eD0rB=TtFVNFd5bP_*wgOW$i+ zS|lVk&yv52>!B{~rYBy&waZ|kFRkn1u^(kxmpwk4Bq1o%eGRR2*D=a}i`T6Cx%^0N zqq^z&;{uuj+#2F9utU`O)i2KkD@@BC5gb@~wVKpP#2$BkKV>Tr=W_ zi2aaS&2xxQfULEdpu+%K_tPi;;5ioM@G=G=d ze!bYJyId9}xvHk!`p?^5Qn?9n{28|%Ci4?uP0S(k^nQWVfJbV1beq}&%RaYXA~)l3 z{R38+1cndEZ8avq*{sS&-SJno1_v&WS<}9MlQ|8&O$rT;e-f`rM?eQlVjna~IAqn`ZexcOR2vhREZY5^1KR5FWf!$N ze%QGltk|fh*_2Gkl~3PvRk#rlqKU90;229ky*-d3oLpt_YmN2xT< zN5u0U3Wq!WEvHNMyQ*O3zKY22W_95Lv{7p#8v-;oz9~_y%`X0vc$wO=7LU40RnW}h z3+fU)nuGQ(XSLkQ%{N>(SX&);KpQWGBKfY|t7dKRYEdl@-hgpSX0C)* zn;%Y3qHVK}T(8^9s?ZHyWXBBs4U-j#%5L7)$;lYXU}w1E!r)$;mygsrr}f)aWUyrA zm5BQreF%+PHyX+OTzk4{*hj|ZNSd!*)?kLvk(g6V|ABnUEN7cc-0hH{TjhUM=7vfr!EfK%m)9NQ>D%Q;+ZJ&T%7m6D77SW7!Mx#~OJ3JI*6yhG zBg2IYwSNQ36hcIWj(8U~`=@7i7-5WxJjc`&W>&L{wO5vzcKMc}W$q7AX&lw2l@B^n*d>lThJAZ#tjx3>R|tJps+`*( z{J~RQYIw2d)n^oyzMs>hn@Gd)Jz}>x3oP>@bkCGVb6l=+due!W2+OBem1Fi+Jo?s* zWUB$DMEaq->J zZI#>Nxd-;5tG&0eZ5}Jj#o+NtW*X1jDm)u|238H9$Aa{vfUIx9D5S(R zd+zJ;)6bpzAdiT+r^&lk=WQs`d66x8`0iX*P1RnRK!k#0ZSiE$Tt8w?)L6{F^N&v^av%=!MK)8@)b zEX|jBld|H8yI1!K#K`~!K@013OTu)TLPas9^Uc6aTXhWF_oOduUIVww%{f=n2Y$Z) zMg`xmI;Z&M{;E?b7SIl{iar_ZH<;GhfU`W^5{4^}wKKJbE7F$C&Da8Zqn$> z{fc6&iTStkUdg8hbR_WVoRyiZ0otvVzL!z*W<8J~2Iuz8MK+qRHps_6ixyY$u6(^E zi^#AR+pBs&;Fx!}t-mCQOUyD$kXv2a=1l0EB`dXbrxuXet!`fZD>J-ebz#k3jI!Lh zbDMKZX(8h2)7u_D{K>>!Y2|iXmL?y0`dQ#_EAoMfcF*o(C8nR$e~GY{xwdsvJ+s~k zce}~4Wud+AOJ^^#+v?zBIT+=;?E7cT7P}o)4XZa#$#XWG|1ep{5n_*hTl8a#|Gp3^ zk>)PnbM&Ha?;U8_E&dE!yYL07VJtuXZz_^U8XdUgj#CFL?y}Yv$g%VTjyybOl7YHk z*{-*yb|_;1?S&3Qgn}awVs*p&F8R}N%{@!@BoFIjY?zN-A;Qk2VPZ9bINsRD` zuP1>N>!J-E>@O+NT32FEu;1&Mk_nf?Z2fJ>IiHnF#`@8={C>n^2}@(n;+7WI=(4rt z5|WV^@qI_KOhQYC^MU+s?(^Oll&v^FgT82%$^q&2>%P6&yR?E-8t&YmPOWy|(ss7o z^NfzSnM%WhrFQ5prc~a~0JdE$>y~<497&3X9T6cn%=M~s^YYFNp|miBGsR{ie76b^ zf1#JWkXkrXE!KbG&R;}8E}ytO?D$~s6F=~I-G2l68v~o1)iKr0FyTlSE|SnsEuoj( zf)E6Gg`?oKFNZxNrkV*K8DtH>RAQ>>=~NkJBmy?fs2%&>mc1IC?x7ahma;+wX8Y{C zDyuq~e|oIhH->KQNrqbSrjoHRG=dyg@6HtRB8ADN+eRU<7ZfeztCU;LAn_4>DVZGU z=|kwHuXPS=9sSmZGY8B!4vurzAzr6sM(l;WzL}ZBR?W~JN5%?7Y1McG5eL9-^07bt z_t5qK48;FOVuliSmr#JiTp0m;o;w*=*^tw>h>G3O{{F7B=}3gaTQ{ z)(dn2&fUnH_fViS9dYXJs!IwI;X7OoL3Yk-d8VQo|3qlC{nMLvJG$`ui8wSA1{<{? zlNUQe=s-JY+)fBMdXHDwx$~pTf#UuuVqDn!c}6EVIA?s??;lrP`c92^GB|Z%D8FD} zEK(T#Q4{=&Z@GHrBPquA*mz(Z@?&m@Eb+}9mMcwO1t9NL*8t6FIWlSrOYS8HK|2f7 zD$aEQz36)kK;O=J78NJ7MKdwMgERW?!+JnL6mi(9jVGrUQ!8gPo`>N1`3wd=pahWXMX}*WNVzd zYN6hVhcY|o#q69&yr{eH&4+@Uk%`dz7%zliEU{|n!eqJ+DCx(N6g(5q6@Fe zY0K_;y|Z#=f23w^bG#jmPhDQvB<3Ohk-YaU7->y=zHn9c;-O_9=|_vqP@(OWRXg3* zwQs7JtWO17+XJaT?7GyaS7ChbBmJ|ikWOFYKd&YHUi$Wr^p$*1!A@tvqI``V_7^_r zw_!#3)8`A#^Z=Td8QOc|lXYfabPWPGL;{5;-%~01WZIUcDaegmcHa)JI_EYg=TOM< z)t68gn`V5lD!6;$=2uqb17Qd2zLxaKZrx^g%YkLJ0ga2;VSn?J#piE0%Q$}O@@9a< z`xbaagSkE;vgr;DB3PCh5Pr)FS&L6-);GUi4J@m4luJ#&{C_1uSCq-4z!ANJJHNIH zW(D-s9`AL(M174G9%rRH|H@r_v*K$U(Ml0_In*rn``ND$B~`xOFY{6FtTicLVU3Yg z_fC^nsEa=sy%!L0+p(8g8+YqZlkk;y#ad8`yhy);lk*wV_RDIL7-}OWsNQUgIEI^jM5$aeboou zh!1cbTlxnZFgEMxmUfNHxYzZn&fZ8gpGhG%vu){47Ic6a9Xe*K#rsT~$fVB@f*~Ge zKtXnpn@PfkFaxyk`6^L-_iFH*UM%GN->`)PAm#>cRi`gK<8D=_`$Xyc+`bX~luSWm zXWtS}5JXFD7yFw%byq!cG{`x?Y`!oB8uXvx@mbBPkzyNJ=<2E`{kS+&Hs#^|DUmt1 z)5?W&OlVxaVXE}~(Em#s z@tu(WCe!krklzU@aO{rs$@at|5_!i(gRHqxA z8^d>BH<@GL!eK_rD=-BBYhOnaV|b-y*4f8k3K97HDx4UDK>1=7hXD{5A<3^|ig&1L z@x2ECn~RX~*MY?tP+PZM2f%d^%>6pL_)hTgxa9+g`>!SVeEnDUWFde(X44`)vp41)SjN3Rh@H3HTqr1Ho=om z)^x#8iicKI4S!AN%|I(CyCqP#XPOL(BP$w;8U?(YVFs{i6Jn(a(amI=ge@&)t2?byrN2R5JcNj18cs_%)O$3x*oryx3mAX0_3?{2#)D|s3LaFO zGODM#vLhh774(>7#dJCj#x5Arh`D=musQ))e^wfIb&eztp&tu-K{PZq`TJErwrt9~ za)~)MLOseej=Kg*2~#74QcN`;{R2}^kaJA`i&5@m!B!hgsqabz;ncn!Li?< z8{RyI0mb|rf%cWaeDbH0J?OdpE|!m;I}e9afk;$}u1Fsuqj-==-t%| z@2F@OCT_IQ44ty*)>oag{sz%?8B~ci9O#Yr_$Z(CMQLxbx+P2n2FqlMv;uCpzL#(! z6#&04r86dD)g=)IS;BqOr@V(2D<$FDY4<+K?M=w8Ot!p*G1Wf8#wT;X#lov4C<~+w zkyAAZ-@GQZ_-_IKzB1U+owF;MQBns$q{Xzr^GU0YI{9f3KY zGN1b)iADn8h=@~lV##{3Si;#dt8X#1>1b=F9_gEbb=5Fy9N&q>>aD?hCue8J+{7d4}W*9PxAa}r`TCw2)TLf1~+ zwLeyiZ+)A=mnHk5>4;es!ei&B>{gwOOvr(hDq2sQBAzAYtF$U3$IM>tVt#oRR|5Wgp;`j!+g_P!95-@jBv_$aUe?l63+ zwEv!38Ao8>b6KN><1b>GZ?i>CM&FZ%@2kAulZT`A?svuC$=2$7wszbrgWCJ|^!U5F z|NFGt_j<%=r_UF2Zr@gqK(JFrxD12}I7pakIpugC=V4@mdGb<*xnn#$P(G+B>Tm(A zJME&Ne!Tx9R;*Mj$?-L?wj$_6T>>fAe~svB!Y|z zp7$Y{Jhh7hBMflwdx*!v;;9kZLO&K{zQ@qvRJQt2(Q9gandJZfl2}3(btZw{tMQuT zI8DBYqo%Z%$$0epW~QPs|BIZ7<)A^Il-c?ys0YZ6e58tBlh4?wnUB_hhOgdnHU6U8 zoTPM0&eKxwU&?H69B*p__G{eYe3^bYf603xz`W(eM!AiSlS9wRO&OEqdxd(+iNiZh zh7$Z+N5`Jg2E5M~v?odVx%P@P`^8);vsV0JIXaH@0DiVwJxOt6qjrUk66V}55#t1! z>LzW!Z*pSy!v7^F`C_1@7rG{8ckkp`J)$>>l*4dW|HgMp@)<#gO&@8kd2lJDy>X&@ zG~8vMG6hLkw;j=t3Cd47)QR5lT>G^&k)oJL6ehn@7|>oi?%>AJx?om&pfUCbqrXtH zK-xe^i(tXUJ|MAc&1pslNd%0ZM5Jd32nd{~QN8i$R;S z38?QjO;+!ry~cm9Zs-|9h4DIKn<}#=x-Xo6(^(zR?AlZfP>pDuTu2*_WcAPIX1J`6 zTH}#^p;gpJ4cbV-k%{UGS=+x?*Y8Yj(YlxZ>;?dS?@zG;RIz$)*>`rE|2cY4n35Fp_ntnyc_^T+INn&~0F(F$PY_p<#>GM+tmXxZ_X z0C)C|oA&V$_`uJY_>UCZO{#rU;(dU8-wXI;K`)I8kJbU*O-=9JE?B?Lg9&=_chr*u zlm&fV?JGf-zq-F`)EbaA9@3uv`=kHqdw6wMb;1Zf8#Ug5ql%)_Jg^@>nCZV`DH4Q6 zFAp34g}Oa>3xq)@m_!@`^ip_BtanwVAdo9O!9-_QJ(29*sRLr0cJCQC4^A-S ze<$Z^KnJWIZ>r>$vAMkjNA1POdxRIk!Q^JcLK!7=`jdaWFn7>&@@V|j)IX#%IC&*I zfQp^}k9MR1S`u9r{A*OvDCG>q922G9|B;{sS9LD0_X`27Q!xi5Fme4GD>bs`f>TmoG4LO( z!RHwCe?^$cX=BD`Wg9sAIcA*x*>cvs)AIl|a5VJGhW~G=!x94reMDH!OcIrW>psVAg(MQ3iWYgj&-(}^;BQ(_ z07t(o7IcVuQ zs)2fewCSjNjip~dYj(d;oxz--ab6UDeGU=7cU^$-#_w=Pm;tc@nsOiXPc@CtWw;Aq zmGI9l%EG})Jr zoB8jHoSH~~r{s5;90kGO73bT0!FR>^t~mc;#d+sAMTZxr4(;AugcueTp_5b$e0U>s zv8I25QzG|43VR&miN<%2M-bIFriNJFU6k7no-U;?cUZ__?$W}EdTT`q+i|QR8sD{) zPaUR>U35jk!^k^P4zy=`i;V?Q|CZ&BSh?`Cs=@Q#NXQVY&WhmV&tjy1zHYSQhex}y zQ_h7Q8$`uIZiv$M?5IdKv!w zh!UeXUg^D1g9jvioUok)twl$PI!EK^cLV{?_K^}Ddiq$LMWG9j7w`m*(M8vPnudNK zxs3B;G^~Cn)uMn%zx=ZS(F7^{e)rML>I906hKy6E`#bKBMRQ|(cmZ;PT56LsrQn;7 zemQOg|4v>+q1bpxHYV!eSrl7_-faAA&^F zReATzcabK@!F*Dk84tidLspTM{;BQm@ zDXqqY_+4^SU+>fE*LW2Sj7Af?XU2Og#>eq=^Msv`Q($bo`wnNUi}^IeI{}eQ>S!8Q z?D6qy3O)*;b@gJ++lx=U46gdL8aQ4R1mmOzr14Vt_&A=^pRn`MI^g@y;90~XMAlK< z@o5@;A|m^z#&8sIiK=cOJ3REp`9r1m6X5_I)1o%V@m7zI$MM3+gq@GpMc;oG)w2P1 z^>CU{<^Oj&j_+zRt{&s#cQqNUHhou<2^1Kk$ailtp%wbOn*0x{2`5SPb_y&rdw@oP zWw1q$Ng8c(`nS6+&R^hl6LB6I(SpIQ09RAd`JagmJMSI!zYJNxTL4T%FT|iR8Y`JH z;P}jTQ76hLstaPzf63PmuF;R}Ld4kV#gcGp@!A2C`;Y#8+fh-E7xW7G1neE~kaqBv z1Tq~aVNNwO!$aeq z9onY^a=!AtZKgL7;F>4OoNda<7<<(@?EqxW^6r&>M|1i&dD1>(I(rvPQhA!2a5! zYf4ypTbKdi=VmMQ@mpze^pGcBjNXop)Qj!f&t&wP%UVN8Wq|YhbE}+lJK#P-UVoRg zfVz~Vbb{J$!MXO~gOUD6G3ajeGzR}QSgqG9$?Go&nG$K_e2mc32r!ZE< z$9U-V@eJXq41QWpCc%97@bL+Pc&W{aFE8XH`HBP{nJCR|mkMm82)ePAg*1phCDJg& zco|eU@GQDV>2ovLnexg!30}DzN4|RwMlXDB8e%mn$d9@K6Pb>XCgLR5dd7(aKer0L z42WAk@&w@p+Ei%Z!g+|m{K@KN=0TRRqtEaiS&aOaRwoH;8l)M&z*gRna-W+O|88IR z>zsqQ6|V=|2KGG-uxT4u?lt(}DUa>eOR>MOArvPISM$F9!*C8-Gpg)0IeZZ&%0M1vM7F@IftQ>FwBjnx=-W2-Kz!2m?~wTX<=I|Y z=|_XSAERBkD34IV14dGT$Lff4g}8wfr)1xPF8xFPf9$0HRd!WUs>dYbs zA+2?Qc9OR#vcJckp=f#m<5&cD?egK!DL?f)%(XK>4<3+ARbXFbBqiNPthS4ougiG4 zd|9MYcnnZO3?b7?)R(q$s*>77lKP~Wb_EW3J}lOV(-fHUlV($T z7xd-fB#R;>iA}Scy~+*^`028l^*s?~$B8>{b-;P0#Ogu{)r0a=%>b2N&aHN7hlC{&d5@;iQ$LA zeE70vWgm zpXh0`)sHJ@FNA{^ofcffK2JYOIg#LNGl*W`4#gJA4QH((?9&uD=w@54>~ri_CNj}TXh4l9e?&-f1^t84yRs_huNQlDTY#z zGH(ceaPm4|6eCGNb@7-S1cuB!V8;-v^ zJjkL$x|lJkTeXMaA$nAItxVh1nmzbAv#6Q6^oI_9E81^Y;d}ltrqR=PQH*bRug)EO zI)fTXOjXQ>4|5RDYkfR~etVDGn<_LM7snc}?MMBjO$X0V-Ftl0iC4E|P-`~1Q_s|r zsMk`5l#*0?1p6gbC=MPZWHmWVl}U`>XbbM8_X_B**4|K{&1H2 zTOWB!$qOW&j$UNW?GRvz^Sa8YQU)PX>-t%~nzRYTFl#)|?M|yhcWIlTo9wc=WQbSU z8Mi96q9#*!p5_4`%bea3;*wvh^QEscPMy|n!^0aN-mXbdv4zPcHl*@z`ZNxPH?1{; z9)ITNA_0NPV^{PcPt-Of17&pE+Tfq+f$X9YO)!gqWm)0g+#2E;)>2}wW-yGe357s< zV8-Yi?=*t_b4c&g;w3dhFdRBIvyGF=ky7ayI2A&DDfJWCLiY6!X2ByPgZvA`BmAtAH+zQrgu0fPaQVp{{@Fa)}6oEU=qH^&xXUXb$r@OZHis zF{)>&7e!xX#DnaHtIrV8=1nef<#?ls5>&f+5Y+7ca zKENzlTia7^n&tK%oFQCO08;t{P_o~O}LqZ#FR znLtxmw!QJUZ?ebB; zTNXz4^40T%k|0uD=*Op7>zaR$%uv8g@ilXAEv=A%BT zS^-nRN^FSkQKPC~~{^>F!^YEq;MH8LKivLFt>QH7B*vWBBqH5PJB zh#5j>ZYL={86ZijKx|^)~>+!N$kpqV&e3 zvhm5RbUzH6~%jgUh|J6v9JyhlMbUci|roPk(iVo7TGC>j?DJ+aomDEGt!8G zm<_yHZjKR$hjJHKs^1YfTnw(QfiJsi*jBs?65s7M!qUJ|l#gUN$VS*zC7KHBc3v}A z=o?Z|fx=1-?0d|thTE1ji(HKQn7LOVOd71613McGj~ShHcR*#S#s ztnQpk8G<33i>$vak$B$Odu8<7X8C8<=dCx`Vbrku2%Dm7GcF_S zHZ7>!S}7=n-(S9^VKc|gJHlKOODc=(N5!&=L9&g3v9O1uy-GcjPi9?Fh`b8R;ALLj zn(fA~yR)`4tvkqOE&-XHA@fGNo8--X8p%>$t#7H0^}eK=CR-rZ)FWG77(UAZ_4>yy z--42IbF(l$k(d{mX5N5bAk8URMRB%OiIjLK%pMl&fiH`|Rr@5dZI|0^E$>gm9EfRW zLZH3A{8@Pp8Pr(-*1K&Pp2=uLYqxG*p;}INqb54IIqq$QKQrIKQgh%tIMN;IhxlHO z{!4Nc3hsb6rays1N|{rUClViSm2-!9Y^LNr@T{53HM5hm^Yl+HGgWxCWmw=NnJwg- z=E$7UEZhRAJwr^ryHCAH)!*~F*I>W6$jG5f2x8~=oH&yyfayTDQWL5M2K|Z8(^(b2 zZX!UzTh~p|H8JkE`ZSI0@d*o{J#$XL2>B26JtY>#4B9|R4~VJe1C6(K^wA!q)?tXk zmnUHji-+*p^6qzDnG>en)wq@{$mF<%Kew3nxN-WR?O$ygu1_h)t5O>qIKSnb57f7g zJei}I|2nHUAxctXP__JZ8uNx0kRRb92QzlZ+tuE#y-6|y2i12(kj&ftXzi>8$#-5B z(rlyi`wM0KkWu8bGbqK9&;;WSpN@VGQ$U~Vtq!DyR^Ufv6a@NS@P8U0jrdF7(oA!} z$pt%uAa~f_yrAQRAdhB))*VS#{nO}vX)tgb2R7{r7_ZPc4&_8JM)*yp_O z!&Tv)3p6u_XNde~>8dPtS63(pj-1cl5YHrh3x!N@u zTaBq*s#Kxw>wKr{MG@(;C(A74SVhp`9s^Tq|Eyk=R=oE`E|8+QYG`2_*31ovsjbSN z>f!YiG`+$ds2hK^cP7unB!)RMkVl6Q2UbDcpuR5kv((FPzBE;9QilK40uC%A>k`)z z;x_7mkE1#F%7UV&ux>E#K`M-0tk_3|X~bO^>j zB1JZeOz&8YdwU9IVm!hu+B8gH!I#=VmXZE&q zY1U)=!hkKmGLp@lU;pW+{QZ__c zdzoilhTLvbeA%)E?hk{nG|e7Zewx$KKMUXL80xPP&Y;;>IVAbn_A_a8v}9XST}Bt? zrH{#K}-Im&LdM?(Tr>RFqWzM2U)dg{IjQ` zt)=9v^@(*}?CTvza|BY#Vp3*?>{jJYK7;;nAFpNeOzq8BhPeWy3FhfU8&2CW>e#lP zYfG|q7dmIWE(lZ!nyJuU_1FU;Upm7?0SYTsArEjJ8-T#oM7ygE{M0;LL#*z`NQDGu zt=oq0UelvYJu{1{6R^;ii@VzM+>ai^p?O}^mUgTtv+dgOlykjC;M%pBGeGvlm0c!3 z6=jb3R;lYC^YABx)BzmEfkYo{DYP#0W%k?Q8X&BG<=H58g*GRyjUqNLz5s7ypDo4t z!4_JOk&fRO7l|ZS)&~{UHK4M#*|1hvL8(s-6KJqD#XoXlGv}%|;BykMXg%@1l1=ot zb_p$aOOguV+F|`^=gVv%cl64-(43Z{?t+$C0n#ef>8*{ITS*FJi<`q+uyaXk;CD3R zy0=qq6E_r4O%HFw7MIs8(Mot(xW6NuK3qwkuGC9XZWdI@VpOTIDGdd+1%X?ici3!g zI2=c**-J}ZY0_wFjE5r>dIlx!7ds$?Wjh<&6z`PUjHpE1?CA+^3K8#w7l~H&^UL9p z?g2A(+MDD>k!B*N|D|mIzlPBMF(|~pP8o?5@+@V39({-UCG5t1`{Y*74&j3S`_ZKT zc+F%|8RlK?{5^~wj}qQP>8~PiP{SC<((ePP_jvk?m7n#XtXD2X7bd)xmlN4Kl{p2Ua2Hlguao68bYK|(KbCR z6-bNB)dTQ0twGCNRI6I{c$kJ-!5>AE0{zrWo#}4DaIx=6_F9LlA#p#xF9lDkaf_v$5&qk8$`C|Iz(W+-sm%&dQr8j zP;-p^itdzJKR~-^^Q2nLyN^jghP}R} zds95rc3_D=^YdRnwTap424w{nP@x&go?Ec)(YJ^(p;CwgEw0^#w&{VP z8>A!3O6$!@W#OQthvfEmQE18ZG-^^DX6Wf|$o9y#%z7*gqOu4r+&yR(#B5ErbXsZc zNlBe6Jo}>_qXX`b57-)h0L`&wWh=im)+XK+_fgw?x1+Ds5jr^AT`^A#-3Ra4h{as+ zmsU9$R~&4lY6~?sEw%pBR`req@#)%V7=vtzhCf%ByEhU_Njn1fdAQ*hH|>ExVWA|i zfQA#=b@sOkR7n_Le4O05$iV=z1>Jq$h;!($dh6*O54+oV9mu*(>$PM?hKy9)Z<6ez zi2;by+I=I#chiC#V#X@w$+M1q%U{0=|>8_x)BhZHv{^b0GYvzgG1@ zE8ga?<}xR@{&Mz9f*N*{LY7UE#0`5@pAnv5J$iTq7Fp3=wTn;3+;1`WRvfquXy99R zr1m0-_@wL>>Czl22O|woeHqC937sjP6<#>IA!kSAi+o6#Me~(lTZOj_2aDja7#qFT zya0qy1GhrzjMFb_dW@frBkwSc5_%!?oBxoVBOTByU`l(slG_wZk>-R3mXr#rv1fUs zT88cMEHj5IiNGcgu8T0h3OTn+6_dHGCJHa%_9`~ZUgU+hX%$7Z%sj5p)S}<(?w*N7 z)STeE9mN<)K7F_g>Hgfi#!kJet5At8aM#MvKe6j>VA?_7&B1}&u(U+-T2+RA1j+Zr zJzC)g-`VKWo)JG6j!`zD7an*4G`YSxsgzqmK~gt`1mh`;n0c9IEeP zv)j>TafmMtt0(;F%!|NkW9_#6PQ@qHA^CSnTMBGC%xQ|xc~P|y)|HYO*?gv>N8b)$U9HVX^GZf#d-tWXOP}kEo@oDN~k5C%~+ktdM zmilEX5MD|TdK5Sj>5!Wphq7eoTi9)_F2mf;^$;I^uck?kNt{O3jTUF^OG?-iIHhvI z;|jSDME_kW1*oBd@L5bKH?B_W4k=N|2cNjD{=C%Oy1gh#ds)-$L{ks7^)EX<$deFIZ0nNL+I5mR$b!JS2WkhC?Wb5pOAV+AWW0Bq(Ax z(cDdL<9PXYH;C>M2`_k+UFhSzCYLK?xYk{TOk8@9e&3kHNY|tl#3VLkT6$8ry=nBTFqnJvS86yHCLCq~gi#1vp{2Wlqf_ zUn$g@f)1~F*3iLP(z>j91u@DQs$MLZde%CmOk18p=|Bs;rcrm>vsELW%*x5y!ebJ=l+u~)7=$M&`hlF}xlaU}U+awusS>s`Mg z+L{(<=<18`ae-izrLCfVDZORBQ@|H&K&L6nA&ZESq;Pttxl{V~g1~e{n)Ore+h+qE z`;m2uT?3l3-r2mpWVYKLU`~Y$Py3;c;j}JtDB8t#YxSY-4FVe|rimQ&#E*E39+L-` zm$SCywZMBcdsTTu5nMsRbw8pHI)~wi(yH`W!k)v<~m&mvfWPEGi&ty z4=1)Ib*1txICl;&h^+5#WjCTvmi5%Z{oVI!?%W8=DM>S3(WH!P^j0K#fRKtAbL@fz z^ftxO07G6@vve9OuZbth-3RQ6eGmXDB@p; zYu|1R+S-;xugfl~OIy0_y7x=k(g2tTUs}1vewE#z(Q>a$*>(>{jRAq!3`Y;JS=}sg z%-L7ocxQr>oTVmKmRipM-UR=&Bcn+*oa8z}(V@+Due5s+S#E)3s$y-$=C3WrZDhfG zSghLoxu!ymXff?Jr{aPgd)G+0R`oApD3&-hY!MH|+nkdZlafyVX|Mqr*(vh6J0h4b zM+O~!EwG6mbux0=&8uqUjqL>Bop$T`*6i&3Tnos5Fjx04Awh(Yi6H$rnzCG#eOgVu zb>aHN284r=#8?({0eJWN=)b(dE6hjn;?SCy>d_{i3T&9cF3`-ga3|vZk51^va(eas jM!%2XQP6yfPzs|ddE?C{=7nkCzx{jkb|>sSdGY@Ni#F!r literal 0 HcmV?d00001 diff --git a/beta2graphs/CSR2Dense.png b/beta2graphs/CSR2Dense.png new file mode 100644 index 0000000000000000000000000000000000000000..599f6bdbd83057ee7483d4c6d6cdd02e0a8a6eb2 GIT binary patch literal 21148 zcmeHv2Ut_t);3Pj>mYI+6_h&aD2kL3DbkWr#4-q2sDXf}hzTVWLoZ$(Kt(`NK)Qm0 z5E3LL1Ofqp0fZ2WkOTq*k)9AKF@zTKp8(E9$NPQHeE(PeGS3qqiBsvv9Dvr#J-CE+gHGmw}(z|0e-CWIe+q)7^Ycn4EW_6 zm!qaf#l#9@#TT8v1%BV?dCJyDOl)(F=-;|}#EUCpV&@N?K7Q1~-+|L>f_?T&9J+sp z^}Tm7mn4T0t|;w&Hs56+fEzC~?^%C=?rHtXX2+7v6a7~l;qm7&`jW=m|Bqy7)V zW#43`*TaIoLgmG!yW5{SR5JC0CuA>}q+V0c7_34jS<8ofwORI#cBpROTv-5LWhjkm znY9ZtWLOsFSoz(BnX=)%aqhoeN^}W# z&(Rxc=zBG{;j4D#GrWOoJh>>PHy=)+($Cg}Mn^&fn^H1Fo zraLBTMnVZEB)JpAH5fYI#xZ%M@?l~@_2b2nmLsQJ?cqyDhJz&~4~A=9fJ%%gKK=<^ ztzmeN5lbqg9X53{=t!Rk7;pE~OV@4>v~XmK)Av%G=;*HB7K;|2kaS`S^W#cww(q?&734- zGP6w+UEI^0n3Kc(x|>_fc)ROMrexiVb!=7TV|9WZ`C45zc*x>0VCYvikFA^@D;URS z7;~>0FLYH*olQ}Qq>!@l_yb3MIB$=lV`Jzx1h3lxw+hi?U_TUE$1V@$9X)s7;wCy74#^5 zOdm>YYrK?ERhN9A*QQ21++FdTNg#AT8CGa3SUwD>c`)7@_`K>X z%{02N-aT1gLT0;#7QH?cVuSC#>E^40Fepfm@w9uUx~(SjYH^D0P&v809_RbuV#xj6 z+jr!=7*9a)Tn9c{3*nPfWnqQS)O$cJ%#gC;6#tO2PW>_@{HS7NY<`BTY)f@~WT~Ab z4gH*YPTSkeSes_jJmyNuRdb^iLXIdPZO2ESe?6z*kcbpaAT_OZJT%m7IU1_|MU>p7 z(kG?QX;|Z-0(n2n@i%au|I3+SHb-aLJgT0N+^r>zQUBR_z%y%x#rh^DkJru!=qfi- zvCD|BbI|b_a7D!^NEgTF1zgHQ*YhsSEZjs&#*zZ_q=z&mr%Na>PPrR%R|2|SdxkjY zI-=+w1KS=kca+DfIjQJA5Sr3b?{cTR;MUkr*ag}^01n^xbIh|emD(kBD?-y=Jn8xm zcqZ&}&692fO$a&JZpG;LRX|bRRfQt&K>7#c2Rz;f6F^GjXHgd{-S}#AIYWj0u1dmO zLU)p?Y|7!$>ATvtE@{4Y3h=z2-Tg`|JF~9J4DadNdZw>C8f2>H&cp=AFJ&XMtF2Oq zrS{fT zI+jW^9oWyl9=){<>*f7nsIauZnnlcKhvBo3>uv=oQ3mj~2Kr}hto`|oUfqT*5H>x) zaTLm}Ebg~*wbPcAPDeYYa*qsiQ9pSUp3&nFY|8U4LMERzAe~xLY?EU(e|%hFc@hoZ z*qxgpPl^v?TUgTEtxS}WH+|p0Dr?_ac3HMW{X!+aba4^$2a19mw|%VN$jzNGRb|9^ zKFG;=nyu)rs%U5fMiUiB@i~)0RLkzZ9;-^|2$cZMciY`HtL9_eNwF7TN5m z`yNM`zu9Z>`p|^l?Oy9{#P%F=_XY+dh#cd)FR*EBfd*<{Z^x3i+upQMp*T03^rq<; zspdne*K1fW<%B8s1V@6gVMmbvS!}UxNFT!m6e7!#XZ!p;D(@Rv5)O7aMA2Zla*Y1ic+TS6#AH(QdLq?{@kWQlO61 z^uV?CU5u}H=hh0IRd*d+0C#1Fm?;k|6^7ykCvHj;T=Cj!t`7eoaV%2qZSartb`ZX@ zcP+X}J!EGkOKQFMVrG^%s=~Y%;_mEc#w+r%&1Wn=;Yu3R;l8`Tld}iikJwJwOV~hr zw~-K9T&xJ^79kdFb%r2vPw^w-UC9l@0;!sxL+y=oOhEg|X+l!?P`rL8 z-3%|pEd(|k%lfUsz6poz9qH+i4aZ+F)KpYzifr8jY61D_AT-9epSGg3h%@0nq$^Gk z(kUN;vJ(mo8`B4dt^;LTb`IxsHgdM$ODh6zxEprJ#Gcnecpu)^iGd$(S59o*@P3%q z9>FQj7QBylvw(1>7k`eC#*m3iiF$$lyPuDd+|Be2886N`4aE->6JUI-*QFdcG*f5h zK;BI|eQmpxA}xlzTx)aY-2;7@*mGIcjrQacV($o?Z2et#&sA>6HgeGgcftd7*N1CK zZa6qzV~Krz59gq}*#=mG2HL$?#RdO3W`Ea}mGN0S-tNq{9B=nmwlvS;K>{q?4HZ#_ z1VxXi@hHj%)3)FQ>+1_`Goh$vsg&YwNTq?M$$)>XO@PVsD)w-+JDk14HbctiBa10R)%ai$jIe(o`|Apr8J{ z2>Xv{H@#lm`~tgvl+A~?wK#^oCo{wWu{deltcBjZXY@!`Oe5AyPQYcm1be7`15A*k zOT|I1o=vlKwmp+e2?xaE>=Bn_E8*h2?Qdb_ieazy78_t%k2j|*&bQm|a0Jdz9yQZO zHye#kEI595#1kTz5H3x;IRKnk!h|i&DGe7%KjO6LqU*vK>bJ%O;13A3d+OcIZ}tHr z9DK83cm>nQSi+X;^OM*64=+pDM4vnu4-^Rfl00|>(>&w{}P4! zA93y%^t2vtAZ)2+>9##=Szhby^~sTba8N>|#(&Pa^8Uka@81Lh(j`mgujS z)5V7dLoOCkyG!(%Guuaot6i4!626jTsc?w=iH^4E7PFJwH^bGDpPi#D@0(ZG_*ZuR zMP`4|^8dG0EY%AF$fOZ~^(sQ^=|X|PvVtjCoMu@r7BeY!ykhA}S$%3le7dm?<<84p+}&qn9PczGV?Y zNy?@+esDn%!1nSkZDT$@t9naoMM{`~(NffmQjqQJ-j>9X`lerSf;xt13H(j|Y% zxc?qOn*4Rnvf`MXW48c%iirZnTR@Nb-H&V0D0c*%unW1vD)OmTs;h(ZTG8+p2dV4O zKrcUTWuI{T=zKD8~(}MuW@>M{Eo)8(x(-M#6583&R7|H%;t(j+n|FF{z#1(Mzo`IkoD~%K1bsKX?x?&ve@=p~ZeT9Th8@9)!mMl^jw zS&`8Cd8WYFmLFEg{~zrv#GETCD4RM4W+qmBsQ9DIev4B+Jh;%Hj;S<~XJWVuf|v*I+_KCv$y(X}Ke^)@zd(08d@ztGvZq z%Z-M`l@i^MY=sDGUC!V&V2ufLMS5eL;zc7b9$i7jo*e3Y0VaJi?AG$(k14eT!zZ8J z9wsWwZ_B^1@n6WHk}t01tKWcR)3{NaGe2;8t&jd?w_x}M41Z^N$*Z^5y#(o9Bz(1e zacbhqe(dBKl=LS%u-mYBuqd5Nepsb~$5wMeq6df4Kd#_pOFL`U1qE#F?(EI^Z7qS_ zh9O@rc8iV78&O}aJ^liQJRgf*2^e>!kuf|8!!McSFX!6GN(Y2;6rdQ_b!KxqHlO(Z z5ZWw@bwEgD{koQqwzMk?*B=BBAijFT+c3)|nOFBdGne$YwXj*9?ypwLU!AA_k{+5; zid9&cpkzTr`*~8ds1$LQHVoh(l3Dd;qI7+{ zT;c=k!NSF@+%ECPnUQ3x$kVuFL+77J!pJ2`_>^;ZE(7T3&azkc9CRFHxH)i>d`CO` zL+NJ|10CMT(jx6E&;d8~G5jt@8;o5clOG^5yMe?UxB%nUw!qmY1+t`L1Q^7B!iJK|A$0 z|EV{-Z~aL6+}0ag?{|M}QG+rzrTM--*ckO<oWg+-QUmfubmOTE#P zzS<#Q_J@(fIfvt>VrD;kn6I|PXM;7aGjcjEi5N}z12y=G#GXxGv&?X2IQOVaj<1fFNr5Th_o-w}a%QZd%bo1ogpy0|X?~eb#&z2VvwU+2fCBWYkO8P6besM#o4o6f6 zUL07dHDB}-u*U^x{7VM^zhYAT1I0Yx0o3ki{;QGv%&;Rdi)Vp?CcUe+`->5K$L4H}2fb-p3#+LoItR}t~ ziP0#7TtGuDhi_S2e9;>pjMF=Oj}w<&xGa&r7?kSM#uvty!?ar z%Ya&D>o3LUM#bxgjF?k?qM)wmsV~9gXUDiLkXIuf7zjC+gGBiub%(j_JN~6y(_T^7$kqb@^i-hLs0G;sU-n}^qz%&&Ygcq zid~WR8o(grDZ%F~_=hYzwz>2QFo=DB{tuM?dD^aV>@I-7GoJ(lsR}H7rDXpfHhTKy z%x-w25>TQ57-+3=h^Tbz-=6_6KM+ZgaNun$ec^jtPb@4&q$X+@K>7c8_TRFYsx+9% z*fJ)z=q^$&lCFRp`{<9Y|HMkTs8rsZK0W0tiiIgqXEN?b!B0N>dXmrpNHv3eKvVsG zb`2Or_369cmk%hFkYj3j@((t@>e~m%moFG60`j*Z;<o<3JBnBV1X|-iZV}x{r#hMfM-O`IXVXj4$%^_ zfw#uYUmN3B2z-U?Kg|&*4iGtWKpg;i3tvXCC^^5Imjciz0_53`(*Qz_wblFFYPnl3 zjiOBdXA%e40O%Nc22C6S2q5itvsJ;A6b)_h|s4od4C{Vzt^WS4JRX*078M z4G8c@8}^AfARNfT%qIu{fg%uo`};J&%Wt4v($v*rw_F%M&DQ^USM932x&mlvAjg5Q z0k}HAT#Kte`+oKTnF%}>Ae1USjp=%im2oP{beG%0GnO43w(>`)0kT85exbFeT7r=cj zfcr17(lEB2HBsOlF%h~nzg5&}4oUk+$bYzetz9cD`iE6~iG$Si&pE6 zM%=vKD)j*DJ`uhRaS~tUDKN$)2dAe|vE_i#6c85Sz zlXVP*m-pYU?E(OV#V4j_S0J>iSNIWCMsU$J2B_an z@m}EchFGjwIThLU7>2AIsyxp=1A)p?(+O4kh!tbpV;Rj+LakOhG?QJ^h6L-4&o`7<1ExGr957>&|Oem9gdAF5BbEmJC%39N62@s8dyMSvQa$b&AJlHicD zRJ)c2Xj~a{#J5?47)C>eEJX`5M#m7`q&Y%aQxRy_L~5oXJuvT@@--U`dH<}JWnPIx zu$|3b_>-sy%JosId7K14$E@qm7i!8p_KVg`1wZJg&rhCv>xh> zI>Wcu99Wu%82k;A9O)n2BRLwVtvJlLUi@36;@~{Lgf;&)BxZmKE+7S8IJq(Eq1Ace z;@k;KOn+y$WNf{JHfe5uP-1E#>yqXsaP5b~{ttWb6QSQKX6)-O!k`orr%VE;yMwbU zW~ps-r7NVtLqbU6<)#s%SekMc!|B?{Qa}JCPup(iC8uC7cS8AuOvnaZMT9^5SYmXN zRmz}}hTsRb_~8%*(4sPUEG2;8lvC!_Repu(!w4-ukQbeIrPlNDRHr%|Y4|KDuU9Qg zd64aDFzpG6sJd+1mH84@;5>DZf7n2gQ&_v}1SPz-{yLOyS>|#_JS&+uNPi++@*DEStYH zq(@{wEub0&U30Nu#1&BamMzWU!Bs^Vwj$mB7|N_PX2^%yt9sJ4U{@GV?uBn@2FZ9S zH?C^_*Zf4csRF#!p&fxQ5>y*)XqxXyJ(sTqo|}c|NOuQb$r@t)+ILX-q^v6ZamHC^ zFE>qoD+2}b?YlloB+_{EW1VcfY#Wuz5%s8S39Avr3@v;ald53_+%846Gsw~$lfw=v z{f#|f8l8RB!7?PB@Mh2E1l{Zb5{Ak<>ux#AxsY|QHq$GTb@{on8k= zSMV64^SvZ5pYMQ#`d0XpYbwAdyPWU~WE~#q_S4+gg^-_I(_(}Ye|1p;A{z2IhC4Rv5E-%i=fE{StF1TFQ zOnf!wgNFqbU~xnYGA1)Pty0piv2otD&;je*f>*c;X*!Q|xkcP`Ah2he#;eYpG~QDK zLiAO;cB@(sNh0x}zT}FqQao783zc1JJ@iEd3e#q*DSQ3z)~oJs*4 zWK-Mu^I}mWI5_oq(tv|bw8Sva$CXWyviEu%kJMLVR+muz{6eAuP}ZEZ1hPhXp#Tx15z`?_~i z?!KC2<4l$bDR|x>o0>0puQ+8FR5Hai?D->fuNAcATUtgz z$8;-a5eb?f9ZCs=k2vrj!x|kGuX%(l;R>sIddutKMatw~NFk_|q#|ZyjmE(1y!TQDIIADjp%1IWyn15_|`0A1XWT(}l zgSXLEz03PGUM(`k>{PE2G~&A9du-2TBwQ+5ghvg8zM4du*AB~X-09~wVY#%k zZk+m$yTk&UqEi+jxrr?q6Z)zT^9X9!fkJ?XdBsJF%l1XY@==HEDYG_c#RMkEOmQFY z`#FO}Fn$%Ym#s&)Fk7(CwPZMHWGZ=Y3xg;tEueZ4Za5 z-yJ486sVgpB>M6Jz}N>@oF~^fDacI$e(}hWyA=Af`55YJN!cb8>N#5?Tgd2shPRD( zcF%je#(P=yytTBwuER4 z6x$8K0Nj(^TzQ0M--b=@UQZ#>S}?GMvN-QWD-aVDD^y{nQro06B7><>*+Xb?sPdX8kp!+2;n``AbbV-to65KtR=!hdK`Xj9NJN-4}vd#Wc%h55JKO4uXFiyS%-i?A%TlxCCilB2q- z;R`7NIdfO(cEeXe^V3|;j;Keh6)y>{0GdSDsuqn=WOI)xVp8ko&MiidJUZF61>?2V z6Rkf*QMGHq5D-Z@`YR%Y_mI?vx2co`Gl4>-kuPL(M<%f{tiRK=aE{? zS>t~j@|FvgmWy@my))r4M{=R}rVyIM)st70NvOsbRMlbhpE<1f6MAU`EIe>p^tM;d z60abv_V07RTg~g*SxcMx-hOYa5QAB0bE4aihPEQX{*RFF&=_*v8C1r-|K`K><|yGr zeeBiWSk_I#J>gWLcE5Z7yRZh-e4j=t!BuUL-n#VD`p%im$%sk4O%RYUs78Xc+6=;Dra}Oo$RiiNf}KO*W_Wqn zGbZ$;SR$9`}Y3GK6 z&+pB!?8ud@IJ(J9R~L^oJ#@O)Bj-iSaeOa4aTw;N*!(hwt)!)C^~jhfxeL_C?MYzb zTAHpaNU;t37|5bDTJ?$HnOsk~X>N@AQvq2{tHAnnmWkq3hg7i|zupe>M*E=yigbAp zCihAeuS;Cz@PVG=R7rS}YfcE>~)l}8*S;!Z?zwj%pHK{~RhuNTStE>#;9J*wwX zT**1}7Aj$-mT}XC!J(^0Ck*S<3#^$No+b`Mco%@pt3Rflaj5hs8uhFt)|pF$_=;#H#}CE3vA)68knPN;HP zXI&=6TP9+iz}Jk`Ode?rj$^wID*1r3DB=8;$VC}t-qVPdhpbs0qb=YO9u>rAWM|V% zX7r!2jSXcvzxFqpMn$x11P9>AC8o|{3kzi)^s@=g;JP0qHjxrcX{tIcO@ekD1=QDt zEK7@4h%z4LP?&hskoO=Z!?{h(&4$QyNbtDgF|klFLChTqj6%n0wG0o}5YaytbOxby z#AV~nvikxSe(jM`S7`HLW-^o$wgo}d06brtA4AhSm71jIo#FzDPJx{H#2iY|{u}Go@WmHy1Ttx`|P*b(j^HKKq zuz9J#OSlUoMxhd0bzAf0&ReNy%UfHEXXWzZ_Q<{AWu{-%k9TSt_njyB+2-%$Ai~1v zXqn_%)|+YMXerfupvfY_@OoEJMG-r*tJx|xZf+{qzZa8WN56*cnt4&-P&0z%aTIGD zBo9mTDk3H-b5qFgSq6a$%29i4X08jKyllh1swEW8!TaBh_iCB9%>)Fs?V<$jdVu4u27O(D4; z=+AE>w^PC&4nbfQ`S-RKJ$R(I zrOt3%cO2^3fv-Mc|!`146j7;zVF@zRc8mhX?~`+ zH5w{20>gump zHK=K1v;r4jT3Rrq-qX?jZA(C-lA$Eod|?)f*k)ztrs3~>tZ^p!SyD*|PG_7IsOOEk zSBJ%3TR2Xg+sFQq0^|o*x?=Qlhy9T@JkPiQgxK5F$D7jOoe=jk7gv)tAYqqLJPYHc z#vJf2Vc{2QWiaE{h{4n$%nZb__nuKr@R zP3g|iL?F;+In>Tl%3_CJ@6`IF`|@9TcnXjvm4Hx*1758Z^Uu2%bLe=*6StO(KP7wq zoJjf?qlNLnOV$~wf_-0VP=P`rf+?^tXJ=vHOIAXz3Qu#G8v~}DniD*K#Ergs?Drly?dyMGn=B-nWBvx&p`-LcM z8W|5XJO4buQ<>>~icD6{dvh0w@X<$m+{KzurzyM1ORj027j()(1r|Nc#${}m(+}PEUo+Rb=yfJ5nkq^H`Qu?7Jz3+OmtXfY@l!L?`$xap?sC&@MNFoi#(9$V)SX z03G}6fiL&@z;-dC5KMR8+oXKb z1lKbW>)j~lDihF$oyf)nqBSu_B&W;By@77pvwk8m(F?BYL{ag6ca#}^?qM8 z`K4jmd35a6_zDQ;2rC;S<7TM~!KA>$UPk5L9mqFh1by$px+*R?udHC-HNPhD3SerkP<#G%c6=c!uH4t0aO*S5m_a13lC~fq#YzzN0Q#xg#Zc=kPF1FyB|m|dz|0rm#AknzbPMjIXY6_Z^e=xp9u6L zN3S0#I_pk9B8lU zCU)XMX(-ra8Y>U2w7BK)2o0EqX!E}2HdC<&!)v``0lWp_tnVzG_d7Ut$=y~*-Cb@< zHHfGXLn`;++;l~SC~&X_x_i0?+!xe7^!$8?|1vbA!aqsIP+Mn-25dyaTp?-ZZ=V3) z=TjCIclcaI9kgv0Z9u4`Ff6XLVJV9U7wOF#{N9sB%ydA+#CUDK#;ht2oVNy1o4H5^_i-1^tWTIQEQv zh+Qg-nzfIwKUyGyoL;tCm^V4b2n*5ZQUP?ry)@Vs@;*?1qjS|m3e%?!Llz>y6KXn*z5dE73wEKM zZwDAO7t{;ioy{lkRg=gn5B;%h1xwW?D{*P@yo36<(g!7JbCx0-Be0h+L8!8n+63b{ zCfru1XnZ`7JtBCa4Re_#V(lySs+qJ{63ttHdt1xz^z})chEW-`)1l;}s?wJW$qJ3< z(PgFiHaq~J3nGA0DhUiXe^5A&HC7Z97Xxg|r0Au#Bj>w93vtZKU^PdY$vd)IQB~3*|LrNRMN{A#Z z1OoC9+$1Ykj-;jp(%jrgo^8zXz(O0%!)$~`g9paHihMHKbhI@l@Se<>SmtV&^7ir!|#x^W{3Q9bQu$4AL!;W zV+|09A{brqL{pa3n0sItS?g(4jq^OhJ*xid>yrYRyt{V`Z=6W0$(*$pxSg=vS_oeu zM@_p#-I7E-oo+oaPs1IZpCWD9z)!tt2vzKuc5|C-;6YP2kPnnOxM_QQ7_=z&ptl5- zRkc}kcOuIsX=*hIT&YWXy6FQ576)^ZA6^qT%qqb~V^Wgj^@+@JkN0pXv?kHjql?%| z`BkBkJ=0QV+x=3-s&cjrjkVB^(cW4Pug5cAT!wm5k$5Z-!cj;hxu;&KsYoHdf0Fl9 zxd868Eozwi(|v^$iKXYFtaFA4#MRCX_5%-W9jCw@%tBFwLq1^WorE zxcP}NVcCzslP9rs<*dt_9su7-0dzJ*UmLmdr_=xDcQb08K`k!i0AoGPgiHKNA^67+ Wau$$dGf?1=*lA<4ux|9$S&=kduVd+%>p>s@QTYkl9| zx&E_>(WVVMH;9OcY&!nakuxG9YvM#iRww*pHSmoD?OQeAV^!c8qaQ``+hj(8KfZE1 zeCn`>ND*Qq$6+n-_j>=IECWSE#2WhtG!EO>z(49%?T3j_E89 z*y^`c_YQpcV%(iRp2rl=%Z1(gU|k^S2c`8t42raVd;LW0ucb#^6h{3uECNM;BAz~N z3FqSZ96VnF=tSh3n~zR$y;wu=Rf4}B-|anX1rZ$mbj4g!@cH9?h+r_6es+fluJ~r_ z;^}`}{ZF5+)#P)yLj}@c9?!_o?{#GhU!C;hDag>nP@tN3*9<2qL zm6iL`=I`n8J?)Z40KDmB*_+^#$jDbpfJ&%*>V;&NdPkf2-zo;r-ZfulO zn0$zQDazX@hqcOS3x7m*c3U_6(r$Oi;MX@Y&+~SbJ4_!SremiM&UKo;_%YcD7ZWqI zRU=1}5b7|H5}O=mI)dH}ZIh8Lu#+sOD%kqyuYdUyf zZis4+h>6M>8ffcQIkP|bw=pxI$?8kylH`_IW>{bSsWkY8De#plT9>|4m-k-P8OQSH zDOSny{5XbG=whmyoezC)y;H!0mYF-Gw@x)oZtX(_(WKypwN47TBGnRBkWe)|BA6@+~YLPJ-EUl}d6BlXDv~Hoq6-W%=9l>q$(#IM5kUnEgn~_lXr53w5iH zHEt6vc!?cJxXfkc5;CK3-Md-3uh+2k6?VteJoqtlOXA&j1{D`+ldH^1bBY&N*MDu1 znAk(v2U3Xbw7Hg8?=m43w!eJ;Pf+u=Uh*0oJFW1^L`--!yhUnVtC&olenA(^!&|bf zKHI=@L`@?jrtezyjq71=zZtykmyA6Lw<8^)T7(`7b;KDcI75LgZ`Z)0PQ7jGr==BEfaGjl^pt)#A4%snal48MPoLJc+P{w_4T^q~a~ zg0m?)(iiqb7Uq+-m*KruUUVHp+Kh;#YlGs#hJC18oLx6`>s)m1Ft?_4^TC z26%RwUT`fA>QrL(`r%1=`U*e$*x zwDd+dvC7QyqRsTC#H7EJsaHBn$LzX#lllw_-0?B3V8zVUeGh19|HNl3kPP8+#9S5_o+ zBLD)|L{l_cqGlL!DsJb9jHBhb;t)LVGqR4p9ACKE}@z`J$(`Y0zviim8xy{(qZCmVVOy2h+x`V$4 z4K&x^9w<4fZLZj8*`#kTVGgycI#68NvLjW_4gSjCu?H7f3%+4lM!gg?5WePM$%*}{ zN6U*&D_z0}oTV$XMUOpa1>H0O-YF`dN@BJx?4}PymCZ7(_XRecY3gy~`gXFXU$N=Y zaU8v|1EWv+Rq+}}-I{J3cJ;geHQ%u<)X-f2br4jW|IOhGuJ z)h^R>)#9=foiatKXTNvFE4`(nW2ple+gREyn-{f&&v8c{D3yKO5puoi7f zJgHjwp8bUj&If4KI{AT$NC)qaGS4DhEJvF z>1{O9G%(U@Y4McxqC;l7gHe-~T-i##X}ReNHgvZY}>_mmDR zPd>6?PH{(f0XTSWj#d~Luw!2FfqwL1bM9k}*Zz&M={1rGN1Fm0ugXk}X%HVyN8P=i zFnD^Puw)PMUSG2I9rT5n7R&|4`2qi@uZJ&$*-(64pLP$XHJ~ccqDc_3JaQYTgyeGhUKw@!Af`1SNc#}JA;o6~vGjh=T00)c| z5qzV;QpC@$*>(uvT%p@bkRmVKn9(=%+|RBO8Gyb(uC8m^QIgAn?GzEI-u~YrUjK^K z@!WG+;}rW47>A3v#$&Rh8(m?%m(zhGfm5)rfEz_>x4{b`T)LUTNM8o1x0OjpABOHP z%^Dx?80R)mY6>oi+{Jrq3Xwla8nMhhM9<2-Ut!EGN-i0*7Vyb|@!wt`^NAfydOZ7H zP(ezO>>}S>2Nzo{;V?*l;S$>Gsh=k~uiM-SE&BtI_L7%C0A4~f-ICQUn3keY1^!pwHR1)~2Q8a{z_kPatO!e4vpd9a z(XHKsY+6j5FPUpDWzt_R`XZ+hnm1 z-5ihA<@+Wm=Y+6_=%p(-&&(lUdCB?UK!dVn{N>Vap^qF`&2!FWsR2n4(CfQq7dgy- zJ8HZSQoy9SKv;lt7ucD6ZofU4pTcAsZ~sxqv)+%B+>JDm9qW74CY#&rX)a`F@1urx z8PW`Ks1%7Y2^u?10WY2mJijRZ=|*!pllH2@Fx|c%CjE_oWs8Wsk4quegoua~hSwv* z}r3H+?2J;z5&J_#FJ9uI|#;}PN-o7sp#+}8 z&q)Gbg7z$d$Ily*2{ujFd&SgTYT>(gEubNr7Mbh2d4<~NdV-Aze);Q`L##RnLcr3f z(^W{I}()VR$5mauKPfT1W){pEtvvnDjzo71oK3wZP; z{N<$>DQ!*Qs^z^D@aH;8-4bnxMU8O`35D=@&ov(lCLvRJsoT@VOMwa{FhU?`-T`DW zx;#Sv2{jCaKo#L|5*DcE+J(2j3*vyRlCb7MQ>SU+kmVB)3}H9nk#0I~5s>(+O9O_W z#wLvxLGWHMp-#E1g;N9t%XFp1@j5Pnu+2P(JB}1!Tz*sb56jH08Uu;FwhUyUZn>4L zZMOYW?VmA{Z^>Og3n~Vg%Xl@oT_A^ncP?U49{@Xk4xG+hw_N|SC?t=-Z_TCS{<%z^ z0tRH(=aklaMqXF^PDFzeY zO&Po%Q2aEh(66I%YvvVQARiDhDI#VINWTD_3&q_S58E52k7WQ3FOt*k(;*0`0=E%q zDV(E%gE)|pTr!&|^AJo_&{wUpP_iHOCBT>cHlZ$-5Ay?nEPU7E04UY}21)B9ltg|# zQn)8eMCYk*ssJiNDBy%p0Id(~zI{d{%@K1-fCm8-rd^8u>(hZ(3X?f3kHs{my87-G z6n5}~ODWCD@>~#ezQZ$`xV(=E`&&%Oed4vbHoZTx@hj}bO>+-h$Aw zD@e_1kp;Pxlzjw)oQD@<D?08y{)w2lL$UafrMRg+_->cex8w(Ivos zq@!sCYX~>;tbT7R;qA-EQ=-uwCZBCpkXZ6v{>1)AP5n=)`+p3*ER{k9B;Y#gC{Hrp zu3(bfZQWZSfZG}fbTqxVf?>ioPFzOh z#-a9P{q$Jo9X(tTkq7S9B;NUUofY9gu=ii%|T9fC;Y;1WZ%NP+tiR>mz^vwR+CO1?Cu4v{f=>njt0w`H@rwfe zSpKWZ2m)ZO0LITqVR2lmwx72q0jBhCVBt%L0$w9<_8$Z5i%11Y;EE393uwN8=D+1> zy3SWE<|51D&!R89w6PH73xv=W!tSE0FJDKT{J?F$5`={IP0 z`!1$2%VN^v4FjvabFF~@yFyT2bpQGF!Brnb*(I+1H=T(8rZVCO^L;Vat?f3)00d|S(zF=2ZK5WFKbY@3S|Pk)wQ5~iCBTPP z;82SZd~l~XW|6{wF!gsg{ACpTqG|zgSj>C>)2hDq(43x$g>y=$X-U`M+*i%^!o?zV zJ+Kl7{zgy>065#<*Zq@HBz_&e{#86Eyq(G7(*38`mLTPS6Vwa<>ht%p|Js`HNYQrF zqocCowZE9t*DU3wf8X>BZE{&}$T3#)hicL*Uk=_Q%V;E6Hoeaq7A->P3|2Y>QoB!q zZ84k4=NPqGUiRQk9H#~>&}rnnT42>9clDnMhEQIMZepdw6(PT?6awX#L4j=&4;l{F z=wADCx+^vDFC4}hVD-lo;#V{FU^BE46 zotHYZO4aIsYVzlqz%sV{S#FZI3T5nNe@ReRTP~N61qbZD_>@z8n8jPJu}bQ!75SI+ zTPUr+WIDy`N>sl9O0XSfspS4Sdin)W|K|Y81=#at#U?HDIDhF|1lzb)wqwD@uhRb5 z5AE_VEwBgJ4z;rT{4%(I?Yq86PO#4U1#^ov?GG~eWlZ4zb!!c#EcqlHu!rG4;+(#; z_(hIOTYLXMIOs3v`GTJRhtcy4*b-F2s5&6r^YuXn{9#hoqhR|fCH&NGmOJ!BU=j6Y zFSSv)Q|)u9dgjGX)8d}=j@z9uZJ-hW6wyCA5N`J^1{Q|Bu+qX3`)8xwJ-{|AU=$zjS_t+D7oh2gT+N?U?YG)*TU_U?;ukQ>>Yfk=%u}g6RLTOB@7%;54rkhRiuol z;xQTGBtEVl@$tdp(+0raj8E*x8Q1~%uG4F#L?n-;OWGlPGsZg8`w;FWfF@flysuEX zRFOW$`FrGl1b85m`vCbL8p8B(U=jK>^x#9_)o2eofyd`yKL8e_68XNOR!s6e4%`b= zR)7tgD<5>V^B~+OK$jB)1T`8-8U-94NL0z z%LC>y)EJ)$=DQ)c3LXmMeZ!~Q^Rq_s>JIWdFh~o|>K}CMA0@7j+q=c{*%d4G`-vwX z?uj>7>3CzhP=yREV7)czDli>{#a?~z{xopq9}wI@=9V(#mRxL}Cw)z)M!CJyp!?$8 ziw$`Y)KBr!P@eKrUFURCfJWc9Z&i9S-UE1S*M6e;@0}1OVsF`riSjpn?oLtQ9R)R9 z+|ja=xrrX?s4>b=jI5MV{Q6SoA61fnHs1Scb^u?33`}Lwx?st`+?=fl_@?c8#cJUP zfv2Vy*nZqok{yQOY<>b9#dp<=?F$uCbBczqC35D`Bug-hT?b(`J@I%;=|@Z_Wt!@% z3md)t%mU>?mj|bS$LV3UJ4sM8ZxG+2sG(i~Q3R=tXq?e*Uqw?VTA~7z_jVv*pxW+R z(Dnz(nGCiWEMm$}jS?L(cg|E}WSlj0lc3N(HrT(mZKJ`l#4hdK^LLrUF%moEk{9Ln z2G=>udBvwtGr@DEg^>+nwTdGW&{!Yr*5Igp8M*q!zaHjkFaoV8>Zh`SCv+lDSm77-oW4Sw-23V_Af(wKuXeYExvc&C zHH=9k&f8Q+S~5RWL#7X^J^K2}UIg}fT#s_#gCNxp1&2fHf(bZWlVQzH!t~aevFrAt zDidok@?waP@%G7^o8Jp*d;TzQ7jK-xISym@51T>fP8Bg*;MG!_rm(3~1#a1jTQL1n z=U&^PEk}-x+hjj0L1rS+!Ngb=>r8B>`~U(Feg02O@$Iik8X~K4um_0)$q!@ z;TLt@_hOOn!rRIZ+_?Sg_U(@BSt zgecivkjRC058xAM)@9Loo|d-@KnJsvaDia>hCWMZpS$z?%(SzP+x~KA__m7t`JhSo zYa6>N)()&KuA>_wKF%3!;-~9PGpw{Ey0N%SP*TY3q!*KhVE$&S5|3|E;7#hC_X(fR z+P@vb?n3&8QN?OgOIiku-Jaa`bWQGA90j&P5Bwu7&byWb=A5@b(?(bDe-1+ z<9o6!!c%xzfjut{Z9b4w&9TX7jFTmwu(l| zWe^qVLas?wWT&NLBB0+l*uunpqYW66@Yx=0!v62k{(76jB2@5eSR#mXdPF68q>EMH z`S}o}&QP#j!V~zcT!c2tN}vV4xO`;GwKuXo3-zi2hjm%9G+zJRckN^j?K*E>w_Mp>r=2| zCTgP0F3$BPxGgSJ6x2xc>hWexw)V%61}8DSEW+gpM|+w=n~ST``GAT;lDY0Zh(mB> zqM5JJtQujqEtQi-nn~XrrW!JG=q9bh6Yw$F5TD|-X6`qg3Ue}+u-{=-{1FMUnOknH z%%4vZ%A4@qUpq|-8bYjct@ud6Ey z<_=1FhA~5$$QVYO>p4mQMVCUsQ*>1@q~al3*d#tYe(aJ(=}is9P1%;}1NPRw2vor^ zIjlOx6j&;4<3 z)+40Vv-cuY@_vO|kx}GZn~|$pS?LBpM?KC|HpgV=gn&waKqrO`$@O*GrFqUYU9+yy z3iN@7L1Vb3HS4ACP!D9mCpfb=^$iS=-8RtfA~E9qAWKKH#>&|(Fm{i`?|_3&HZOg8 z(MT5jePw8s6m_1&ndOaLh{s03RP=~6jkz6`c8D}nH7)#$$idCblX@GJ1AlJQU`9FN z^}wbhVe!)3ne*WhsoJfX$2P{vuezZ#vd`euRjb6-OnZ{OFC%v493W~F6VB_q$~u(aHEwT5X=C27Q2F8o}w z9k@|TN(&~xT|Pc!;z2ZrftEia3>2Qx?dic03vw+MT_GeIT~r>uYto8QhB0j%V!dF_ zIXZ$tv``JPJatvK6CI2RFT7D;1V8cu<-(4Rxl=wydE(TfiBb2eNymb7M3x-e5S1celyZ$#mgG#mN|y zvGq2aigP1<1~-EGlmaAmoHlOt=nTN_k;^rq1{;ICy(br1vN-hd3*O0AD#WktbS&+m zwMAmIH(>+yGCHSpAD)k&x!Vt8M7axXXtEGX7-7#`K#U%<>kqbxu9#7QbuV_Wjf1Ab>PmvVbd4)iRU=jRggb?P-qQCq)3ubmtH4vrC4<*3C5?|PgjUf@RYsrCTzfWSp@>-H za_kPCY^*gFGcyV|KMp#HN*qd$v*3vrKJZIj%TGVuVKu4!tg>RZt2C*M^@JH}xc1poYaf=A&}9-%X_ z7I34X#sZ{GH0-XG$~eJqyg(UvhlrbZPFwTTP^sUP5|PI`#MmQ$XgjS+S;vn$JFDg9 z5~Nj$3Y2D+m%&Jb59YgD=^X2{+P4p`HD!kMk!?C_ZZL0jVPjP~@?7kg49y&ZkC&gg zIT38CZ80x~%iI|C%&~^N9?P8&ZAZGKoXqWYs_nTlRlzK)c_6`RsEW+cs68DKc`vbM zGJ%>yx=e!%K*{>GY$)yU`Sb??V;LlTM+VfbgJ;>Q%-HfvHBcTM>>kk(yVn zwrK<^;&55Nx9-a45q_1qohu&1>k;Mqek3U`RuJ z)(wT|qP3ulsv{v@!C{Yd*^jlOG%)RipGo`_bCpzR4GOdK7S3psu}VnfNCT7QXCK<= zKD)QxOQ|%^yU+NXxFeS}=qy*H;GR^!&LtdNd?@$&_EPS9_W|%sL}xMVT7LsIfd&I( z4RaHph11MJURzQ}(5Ibp5jA^g{)In~chhFjFbJ+mgU{wLD!!6T4M9LDiSGK{b9U%>zq}$%Law7$C z^1MAX&<7;CmePJQbIogpX_9>AW?CUgdc+jbq|g+|;HW|S3aD0uNeYFdt8r99)N8u= z9hW{cz;Myv|I$e#I)Z0iKWuHXUD@Xi`l<{cm1rETE8#iEn5{opk^&B7|1raNuZRBZ zXIH=bJuAOV>UuiO9zvCP{z;FC4=lM)%+zvBuQBaGHKp-U$IShLGi5uvxm_F!i)-?l z^M`A~*1qGdKdP*i>lZ^5Z0CCP%ll;sBOaYL^E7UPDA!s!4oCil(^7~1$jR_ytZFT|-NS}7`zf$q zcDIMWrq6mQ@y>x;?{cB6x;fIlf@@6N8zyxN-X2IC`Ko;dMUlh@JtasC;#Y#`laVd2 zwfy;CS@PFrn#u)QI2kYa7m9o^g+y~Y!vzf^v0l2r5Lz3Hx!`Ej*#1u50*3zb|L+qV zKt6CD1)lw#Ac+*AaJt~!q(jZCJ2=`UNNnby{eanB<=F(!L290yQ=_)gIHfYrdNxkq?dA1u*BgL>n2RPOeLWv(CbeGrIfMXup{$OeAt>Kl&=0F-i+Y#ML)Fl5KrX zf1)w(S(9~_GT?4BKP>Ot{ID*NY>yjF6i=gi*+163;$x@4l!~|nojqXiCn#%`dO!Ou=%~7TISk4bzSWQND3wcr$mjlo*f+6 zN72C4aqstqcH1>d+@5eJId!}Nzd)K;(;m9!`}GtO-&S8h%=&Ffe4eA7x(}Dy1iY9! zHXu3o-qYs=nLAUX8A(RxGDhiuSVkuSp_4;TrBGD$J(l3k0m$ZYO{&v+q0eCpVxwq-KbdO}N5^kU(Fj;2g-0jGRNI`uBJYz<9 zbeBEFq0QsvJ6glw%qwIoIXvjzc+d<~njKx@~&E!I_tVZ3)<{-$%br0o3m0+;erLp6FAK(m$K z^TAjB;wnnMYlawV#cpW_!0|pvP6Szm9NWnF?Mgkd#f>=anWAMQit?NFYnW&X_E3p z@;bFgWT)L*&=-g`qXcyaXKLelOjnuLamHLm(*0P{y{?7lME!%^3bCiUIlEyw+t74n z2e+w&1DV*yZMqLGASUIn3|^CP1MPRC^$lMxjM2{CW3JKmC=(ZGtCzwZNar@EA8xjJ zk`EY@A}L~`w$Xq+JJ#6ohTK?DP*2dcVDHPe#ZAA!L{ZwPVkSQ6&fztRd1p@nDadRl z)0Pl1jD@vHl6WD7lG<8BfqW8Th`e8s5Rc= zBAe6u0+u3IfhruGtlU{u>6THb0++H}n>lrzHfXhXL^G!g13p+*e4#cgh9;e=b6}@l z);V0O-{|`Ejpw?3xlttFs>u_#4M>4KJ$HM=;yjhZQw`_0QwF#xYXG4UTSq5J&H5nQ za_*q&u||^-yc3A~o2N?ZA2IjqugSMmJCU<7_`n@KaQuG-i{ zFA$_IJVU+}!G?urN%Q;_jI2IKJ@`PZP6;a zUTvSjlYa%G*$iH5kTj2c+pLv3Kj6TA)p##4_Su6LcKw0a0M|?-4^mfhR8R(!eoYrw zHiABD%`)0eR7lmg@10usHRwrtQ}Y}pvFn*{WO*Vg$;c&E#rAbmDPB@i;>aCOsospP z-a>TXxf}L6XeeNg^ETHWX-A=pHW7E?Yff22E0oB~OzV|;`#o&BotohD{7GB)3}>b$ zQT^92|1@64OE$%tP`8=VHORodh? z+qJZkLiF=}hqBu-UFr%XH(Bu;h7@1w(c32!Yk)!y{2`=e{HPv#3yO3q}|DRM%oqwOG+3_IfYgvBxj zT-BUoPa);)Xkvc4!M7Q=8uvnD�tUM?KBCUz5H&ev1}79)>$E0F`dBfsH>HG+mZ) zusm|V&TEXP6iB_}X1rmawBB88=ECg&l^lu3uA?2v^iC?o5=A#-s zybd}Hg@y;}FWd-MiMsjVlw9{h1=2IqWv7%)dWT17&2vRmU{81CK$MeTTX^S_{j~HU z854m4vaJax$M#ZcVE1TusefqBKR9Pf{r&}Vc|e{i@p?i)ntE0|M4gnq;kRAOm8#FzNydJt5jW4baC${gAhR;)NJ zi8aH8U#uWNjZM9zUh&e(mCybWiiq_Ap9}UYY#q1K3cKUs7xCL4N9cgl*4YvH`_3kE zhNie$!4`{gW~)`_14DUCn4uGGfZOWJE2~XDqf58yWG1lQx=ts@OeQDL!={@V`n4vk zlw&Fqd)3&Qol)b^+R3@HL{a>2bPmPmkc@tg6|Q{>l_=90fIgs)4eOk2ogQ-y)eJvI zlOwvwicQrR_<+gE`d*~HJ(TZXR5mS^VGGXUzWT6?e*5^l`uaYCdi}}_^+f4O|AUM5 z@x}W4dwzdLWsA(I#ftM{S^izJvtl>1(!eXvSq1!*+d{^fra_M#`)1=w8uJm5cK!Fe z7p@|#*bT?XieUeMZ!=vQNz`bI{)D13fxvFhr}8!KsN!YHkiy^e!J2nvUzZTPLw+6j zNKH#sv@}uE&$Vyff4crT&*^!6q#EZ=KWf;+t30P6H}}NB>YH1GNAF0GFDq=6G~O|Z z9~|v+fAyMYvCmXrP0TB-7<)*GcJ%fdWT{wuri9$WxiCJ{i$2m=3`zCa8F83vv+h}@ zpx%&MFfm-8dXMg%Xh#N!j7=zwp2aP?XD-#6Ydk=HV)J` zpJY12s5YH;M#icZ7D@BF42cc(W-v^mBw+?E{?I)BQC(_PFSSBGKIRL#e?h$8F}0 zRUi6=(j7ormvfVS>$=-WE4lg)UUeTXnHQ44j(l_os^@Lj#u(KGQTV z+A-{~wqxM7(?NB_rVWwxe(AVy+Efy?;OW5<;>h9V7YwW&?#5w7(I}1hdcyqq3%$nA zOY^knp2d))6TmeQC;M`2vc;1k>T|8UM^XLsUqbqMv(K96&&xL^#CA$*jkFRLd>%6rRR9~>B*9(bHTZBd<-xLzf)E+A;!J6s!m zGju9^R|qIlwASYBA7AaORzGteg6n+c%*ac7=}@CY)4$$skmN8+8JKe%_9^c-A`}>! zW~ZOd9X>jH_%iWVBM1Vl@CFJlb_il>0--!yMg{9X6{vsFyn7!D&+|<1ai}urvm+N{ zgAYbP1~$SdHf0f*eJ%#Gcf;k>OSsfm#g00W`{xIzE>!4DY7SeK=~WIn&zHGohaj$J z1aNsz0}Sn61-x}~ts>K2YaL4qOU*ZUDZWp3uZ+;G06&(H&I@@oYRO0cOhh$ZAxvF- zn50_};f5J(Fx(+dp5UE>{aTyiK%2jceWOy^rzTfWZc`eD9;_&){6=}SJ%G{>GLU=K z#JGHtsb5RVOFG6t&vW!sq2&F#BPe!i5xy`pMdzX-M0A>R!BE6CNkC zpgiQuG- zn%qHD?67(HcyCb6Z+h~zYw+8{Eq@{R-Bp;FZ`v#AP6&7lC-=2V(){!bdP@NI_Y3X2 z2rIyM5C@(apwlb!;i*HwtLjgbrtQU8O?3~-PShpD)riy;L7Ax(l&@o4p#}_^j z$Oyfy$TC1n4;!ZWP#~bn(iZ(~k;O&mhL)E2oXbS{+>K4T!8q9qb}=gZYf6dkc5Z{g z7q)237ReIT1`}#eF5ClFAK!rI%?Lq~wDCoeBC)Sf0z3!`K$+_*v_ZHb2M-}j6U!O=P9i8KFwwUH)^4O8N~(X^@xl=v$psxO58fZzmg*@bSKm`{ zQvZx(PEEEouEi6lRE0U9Q6nYW>Dv7;@50bgmGRh}KE3gTU(^^(-JpRBW-n>1+$$2o z`J(}*U)C*+E4^M3IU|#X+a6t2m}4`d#O-dr(n;FXw#K+csRQbN_sz45LDvn2F`kx) zR^`qSi=kZUanR%em)F&GyFvbhhR@7Nc+kR2@*;KRszVWja4|KF_Sn*h61!{3zZR zO&mDW9Tkly9hRX!i}KWsb?evfvqS;)_gBRX=1rTb>ZEdc-S4N@VuMu2C6fq=R-L;J zB|!yo#2)vY5l(j*92|&JTu^4zt4~S}TXkmX2OvWH6#Zps`pNWxvBDOrdxb@HzDolA z*4$vQEiSyi9z?BNGk5&lq>;LKnB!b}TF2~H61Wcg}V&>kPI^uT3fiOdzc;O^r` MO^)RMc>b6F1tzV&e*gdg literal 0 HcmV?d00001 diff --git a/beta2graphs/Dense2Csr.png b/beta2graphs/Dense2Csr.png new file mode 100644 index 0000000000000000000000000000000000000000..d37d84bbaa5dfe7d387e46f22a96b70929f53122 GIT binary patch literal 37228 zcmeHw30PCty0$|VTPw7sN*#ci=B{P2CQpUul@ zTr&v#@fzyWJ)h2-cQ) zF6Kl}4UPU`zE%m*33Y1St_`=Y?AwKf-2PHd9t7!%fIomMibFTPcFMjs{r`5ixJRDf zgmnDnlHz~n(kZnb+x-1xT6*at8=VwHr9Bpxd37NXPk zHWC$(io^=4JyD}-_s{q4ZloX)ttvvuws+c#!LjZ>G{mh?7{)lHebpgSPW!2%ewEY1jo{a<&>n7 zjRxC-nMDg(v6%E;Ug6oY%#_^Qif>3@qDkX0GOOdm)P*b1V?I+QPF&_#>9%js;lJlt5bSJ z+tc4F+q--R@I;g8C(1;sUf(#}j_XUMAT@nC7Ma4%YfG^sGWWIN5+?UW-H%_gCkuMG?Z=}6ya z$tZ9Xwo!RSUYMFh*fN!y5?t+@<8IH;PIi+K^deUfX#%e$shU|s!- z!YW5_Z#a-k=|}j>NXALe7!SK2-9kj9x)Tm2iz$AlF8GeFDKRFkWb84o%L2H0*M=2( z0a0QLr8NlbFz%b$zHencv&$l*3L@(eeB@yd zp|6`+ub;*!_o9K_z=Y?1z9V&o-~w&d;#*( z4S_}&J?hpU4=B%k_FCr)>AH4_w`kvWixvf*0s)pSH`e>g|j`P1rU6b0vsHAGrAurI_%gV9ML!2W za&_numE>W5kD!t(?Npouj}WRb+lW@w5tRm(M?4r#5AJa&8kW$}IbT%RaBw3HcEnKpo@=)f-E2c+hX)|$ z{4*T5I)P44UCKTMUlom8Y~p}+VI5QMMJ)`AJyldWVikCbZ9!a~%0eOsDjv%Q#r-h3 zYje((;=r*qn6E1_Mq3T9u!Fce$KXR7Zr0?8kL`(eQ%{l;i{Y99E?cjZ3ww2X4$-BA@47yne0C^UlhsDv8hh4#4RRtqiH09d3rLi$Eb{P9;060fDz}dg z^3Aoqn9}#sy*msivJ~KQ<>e99Slw67vJzon@L3(>fUwAo zm}6P^{V6a>>+sY1xc|8JwKC_t6-Ga^jl;)!9oK-(y@%T@ zd=QNGgSXx~7Eofa&8zW2_p_qSPIvw7d%DA7u=q3>3cNN_mYPBe^v7ZXh3((?!pF}9 zw49;dYP8pgBRr?-w{pz$GQ$WMOl@(dvC!l5)s48FL;VhRhj(E=2b{rO?Z?cqCg(8u zq)%$8HkD3j%?s$0jS!nwB}Q4M-}39P$Xp3COB@ly#@>RE?{nyME^!E=8W$TO`CZE# z@vPFOo^pBm@(fp!O8rRTLGSp?EnVq1bW*l*Q}Ds|ABUS;mGfaV8Y}cz5ao~^G`4p& zyJRCcYOnJCjn34>@7#;Aan@$evNyp!s&8%VjvCg38sZ-mE<%Qi_nCWtVg}hmA&$sF zecRHYZX4Q;tDK&kV{M>mmKExu=NA;-zPfE$njeKyr)!2(0FFBAaptyeDZ~o#$Q!ul zdpG?pr?YlvY2uFnGA6ZiT;-VsY~DPrm~1y83zI(ipv&OuSKdS z=$VyxB1pcOq*c3UghzeSm`)9MbQ;y&)I)B1OkIcEUL%1f$%1~N-eYZqk7m_A z*1#Q~>B;afIcA$B@W?DyV6(8Fr_-=>ih{?_CyG)k1Vmf1OL+vLv%qmGejZ1+_r!Mr z&tL06EH7}xbA$98-q0uc=zx#Q?hKPpZZ#Sq7W2z~;o6zC2YlMUch|Fg*M`vl2v*J! z0Z4k3QmE4qndX?Qz`D2r$`E&8&Df7QZ@rIp#eDFOj>J;EY^+Ozvj4I9eDq$}k_$x{ zrEk42*#Xx})FpRn!29pH((i0~t)^`+e8~mHk2%0&0=}g1w!U?CE#k@Y1--*}tY52% z-3tRAllK$%$jT)T-v)`^m@kd|+GIg5;|>*&2k*y0g>Syr^PLIi)I6+H=F<66#1l{8 zy_SHzTzoRLGO_ft6LeD^FbEQD1m1fMShjelegZygA!!Jpd9Zy56rH z8!{mTO(~cVulFH;YN`R3$|}AAP0AM$`PFE>V;aq!cnM-)ZF2Vx-^sZmb^Mu|8OXc3 zVPgHiB;v9|wwacm$&3y1v2yy;7q9)NYc)!RQqb3Mnshn%ZMK?yzoPXeNYU4De`jGv z>ApJgVRxzxLe-wh+!!18ujWHyebI;B>y*v3iS`1p%?qfAzVhjv$gg3$uxTfPFq`z) z!L0dZ9E4uHEz^{DbsvJFM?-U?|F3f6IUMb4n+Bto*)vJhbD)e%(c`7)FzgJ1@(Tt= z1JTo)uX$L{q;${Wt@M~4n|Idsy##utfd?{Le+AymJ$l&fT=Ehemv(gKy!_%B$?0R1 z!(}h&q!I|A+5Z^rmA-hjE;hcb|78khq)V|ls^MjZ%*ZCmvXL7v1xO7 zXSOeMW=4L|?TUk6Ry6^T23@%%-S4HVSoCrJODx*=Z(v;536=?_Zyxc#p2jAFNx-5Q zjsl_73qjb7tMQ-s&}o{k?wMXj^1&CUJLAOWZUp#8YJi2m zB_P&x0@(?N`+^tkPu+YikW$!oiz{+JVE0G$nn{tKFG|Z5cdAod`_Rl!oblBG`#-zQ z6F_uIA(_mQx5xEdTnL~f%66lHf>OxKYW=U?`(i3jquIY*L}U@1ooJCh?dEy|@f+lr zp6c|Ie#@Rr6GlC{>wbZ%D$Iwm`5ZM3Dp>LAzf%>&OtyL#S%KGW zKz8y2`Q|LM)6l!^^q4g#Ofaj?7_D*bt&W%t*snXy6@7BsJ-jE>E3Ln-0$G#{H41-T z$rOFE(%fFROYycos|rLyFO+4c6AjZL({HuZ={&{8l)SEj7;U!bU?wi+H11keK}>iA z184`F1-`~b`Dku@Ris=eg7+`nNI*N`+k)gSua3`w-@EL-XtLNnJ7qVO_|{M$NHiVE zy%PONlfU*XXcfAT?(;!js5h_sNOn^-m*=6EzA|PHxEv&}toa!O&NhP#eDXshiwU%U z%z_6;9kbKDc4vF+xuoz?d&h^j4-H(l37yp=-b{vi;Pzy%)}EC6&Q2tng5-?Zg`I#% z%go>B>Auiev-Z2_3&8qt`ggK1<09m3SnW<#xyn$)Z{&=A9#9fk_)@y-UxbW%T8{gq zJ$w@6<6=nVUXPg!rtp@|GPDn<2$HLPc@-C8CRJ^^Z6}Ta5zQ>3Z7T}TjxsB)y_W(V zYx`QWq1o=o+5IjVO42RR*=WM4Cb=oUCVzexn$8@~*VO>ac^cp>ao#J3@^#y|qBuLX zFcWB+m__EGE|M#1ZpD8L=;tVKLn^m&1C-mM8wmeQ47ji5=&=Vpg&160B>SC&C2isB{GZIipD)|rgrmv zjzBV*kbE++*?sm}01!Or zk)4jE9W3OE%;&wjKkF-S)MZe0(;ainrL$U+yx{^>^XxR}8do%AM@5cvvJtbDGQg0k z4*@AV;cU?rup)}7D5%x!j34l;C>?3R?51~`dj#;RF0)ncLek=Ytb5%Peo2FRV;&ekVCDTq<;_>0bb zoC4+0`MIq@a-)~aX8#gLg!!cD9`Wqdx&yN_WfBi+{_9uAJr?-QR#*RmN5Bz%&dn}F zxOW{Y2QBo%+3KWwW{2ngLiyq^g!L^)eBgk9ESeocPssy<3Yp*y0HrRNWxl$_-)GtE zSh(#vG>PS-856U_kheBVqp0~Fu~OYO>a4)qXF#1H-`z&k)k9YOFgt{Mz(->o;nVQy z?&=-K>_*+A927C$$x3E)a#p~o@9lr~z6gMJ?DOXnejeMp-Tf@kxh?citLQ@Tz!Y0; zI9q)NLZQtz1dWj_F-TrZRMj4sFqqE-%aXBxBc4UIW*ZQh>ToQe>Um&7`3x-FZ4?>%EdOlX^xJp|&(8>XNN;M1EW(P7^|OPt-?1AX9Html?d zJ1X#@-)ti4M+GtWC(VUXfUeAjU#U<*+N>y?E9d-M3cY{9QqHZ++{(b zjRgNaxNpvy%~`Ym)z<7JMc?Y>FeWQH-<~lmPzSUuRoqcMIU6&(Fb0S~g1qpdP#_;= zq3t@_bwF13>>ZBw=82%>zg~hNs0vg==1V8@ z*_E@*xc>38wpTAm0IcO-*qE-Y5&>T>&!T2i=}x81F8TczvH(B^?Dbj3%YPv;yoOYw%)XW~CuVbEHfPOdSAU$-nK_-A)0zKBotY?5325fOY)ml$o>eaS zl5@6uz9cl6KB?0xg;PMk z*Xvhb_$uVMkBb`X*>ER=SoD4NQ3IfzBK1|;DauZ;gfY|4+EWX3%)O1NOgJ1N+CvZVmH6!yEu58#*8T%cR*i#taFz-Dk@Q(Jk+ z#D|pMwTzrIfTTnMk9YswcSo&@)X~4te1;4q|FOepl@VdvFt7^0=Ro1o$+7VTM9@1M zQyvG&=Rd8B1Pbjn8>YVv{H<50r<`-!bTanh{^>5hvCeDQIJ&9BIiSVwePF6(K;3jy z@!`$94QWvG6Ekz#)=Yi06=15vbkE-^qmX2inzoDF=uXKZDdMAorK5^DBIY zggNb)vlECXa~AX0ExP77F~^D2hYPVz0LY+>{i9=P?{Liq{a$bU=;Q1mJ*fHJV30Ea zC$OM*S{F^mJ_mAx1*&;R1Dt?9l-^IwYTEQbx`?zai1zsGV_m zMxgq_V(A6zSJ`=i+85>S*Wo$QniH)TCGWpQw9cdfyJn?M@Z!u_TJB8)K$rfm?HVXh zO;Jui0k(^bjzj>Zz29{2uPrCgQTgDL9AK}AHGk8nKHmre*fz5t%-y=||BjkVza#22 zFj%*E$#N z%|&~EQkM9SiS|C$mst3le*sA&)W4enm@X&Tp8ds6Oo{$(=ssO3TL!cm>SOLsX3XD> zxu*+R6+rx<&%+ec{yfq0TWCLHn&uZgF~`2Y6$;L=Z;pNX((NlI%;uE$Z{-{Rd)Vi% zZwaU1Y-C!pi@FagL!Fol%y=m}!Kr8bn)bzmV6j8|EPZ3Qw3>*Zq6Wm;ttcZy? zo1*}Hz~4$c{WDX47TBg_RQ#j`I235AzCZ=hR_m@qRY4Iva)m)oshui(AQO@Rt^>9bFCSAuLql2e*IiH7FIqK*O1O{#W{>f@=m5I*vVFm-_=@MZQ-+?hu^$-XXU zsg(H`tsGvu8k~@$+CFejyQWmcxtWw_)OE|#!@|IwYMc-3O)|vKQVfJl0%JxuU!a2o zbxBKg$Y#1nRR{qRmM|WXx5G3J<9jDQpcjY+d)=u5l#=!&#-bT|>?u>S05+JJgzG;? za7tHK|ATDd&rA7*wBB`7M1FJ-k6W7tg*m9PPnQ~vR9O>~$l%^V$i(r$V?1A@E!Uo2 zqzFo4x`(5xX{zYDv)mS8PjC;1mnl)v!WC8?-w8U;M5U9~TcbFLgH>F}6C|Xu_z1G9 z`6QwX*pUepN2O1G9D6mRu_{_Mah~VKsYP#z<_UVD8+K-CA7u*pf=S8eMvDHm*0E77 z;rLGxC7%mQ_^6Ehrmh-gJo-C)XbfALw&^RU=j0n@c28Ol;-!mhiI z6qMHC64ykE+g;;`Md`hx6{E7Eu`R*WZAU_wRVz~L{kq8z6Qh&1P)N5{s9i*59#>u$ zKnhNm_*(vOg7PSLupEzUmjqv0!^2Fp8emFj*Da1!B3Kl73#B8|c+_A?^1-gr2) zETIpFXV5M419EH!eqe@{FnT5J87}wb(Q%}Z@rW=A&og&Nrgd_+2#O17P)mP#77};Bll}g!oZ2~*&J?CqIOYh3c zu1R4yco&<}jv9O00&3*0wxy=WHYkrgva`s6YD_G#V{v?E&31gmqzml+MYew++aWc) zbP223-HSdP9eqnU`DKxHQ~aVFX?Mnu=lWZ;zI0lB%V-QnmSTG?8;1Ad^R`drrW$ke zMu&%{sJ2da($~$hW0)n@Zv>|oCvo}tJX5FU4$J`A#UK!^ER9Lj=GVrFmxkecS7;fx ztKuqF1y-+EKsGFs)FUjMw3*GAUf)Lyc;++_+gu+BmxIS7rJyH+Fwrk;m4miCXf zGJfG!Fa9xETYCEzDTy>t8LnFION;TjuS{?E+Qdw2|AM|X6o1$l;ML%`ysuQ~H>#Eo zJ&{N4aYZRG9b2~GW0l(D>k+9$r%-@Zfb1LmTxDsCY>6vb{F4XD48}O{0n@9d%LSxc zf((zXwb8`$Tlv*Hi1=_nE%t5K6kgOGgYQjCb|gYMuki*S9cDk41$VX6Z0EsWUb@W{hl0$Et|YoI9Ii13T=fn z+9&P?=Tkj=1z6Io}AgUj+gEt&8v{4x336}oSDHa}l(TE;2qREh`v7=#`;$@qMyYGBG( zW&^XXkze%1O(BTc{du2cb!COxqx&Nq!h`d+jp|YpcYl_JfY-@(MdR*#DuZ{0!?b zFM(Ua{IajPEeDE!JC&GL&|7f?BtU>Qm4J~H_TjMP8;u&7QK_Xy$~$LlO)aDMhzh*f z%VjeDisHJ&q&%3i^m8GN0RO2OYS$xu`1VnTsHzHqP!5)zkdfJQdFppxsFSD(;%@uKduStqaKS?}g ziNq7mbxvGDZDmiiM3F!!M~YnzD_BLfFXVu~@n>2ye&LW(CEml{Cw+V_ecIS|e`p?( z#dN|cF?NPg`;m%5WM@2dUm>sA z;4Ay)<8#RP(XMaV?EDPAMXK&@kxekA!6(OIH4cxRs$2;I*C7J#osNI}?Q1YtRM+u= zi2ix^5b~poHo*vyNW(QOhEt9^W?G~!KUD{sF7LFqE)tHa`qGe9XoH4#T2Vr zFkoHc&(iVs@xC!J7irO-mhXYY;RjMQ=K|0DYU2%~dLAedyVr!Y#I>MJ4WkBB9_E~l z?!YO-Y1ci?JW|`aY;Oo91#b_I6;v|XjGCeNTzHEwY!yn^134$fv_>2q8|3?MqB8ws z_|ih^LYo{&cXPQ7u{<;B9IAG0KF2-}hY8>*-O{h)>v_Q(-B+Az;UA|GvzyrqJRlK? zQQ7I6!&4?VmV%Ex3Tw#fjrqkdSGhP^3%fCerR+G0d=J30!;QpG4u?XB+J{AKypwX)G=pzIh&pjB8|EUzqhGmOi-8(k!^v?UBI%PNu85r@y14=!N8@G zjH63$x7}5GluziN5(`omqWC5tZH@Do-3>jsdKM%{-v$sJ_&D#6*otTamkUSZs3X4v zZF-6a0iXl`j(>=GUQ=fV7;dgS9^V!GVL162(y^AzZseSRm|kfMw27Mu1j}-RL7SO*X#)z61h& z4MXaDrf8?8&c%cB9%UMBqCY!ryeYUObVGFiCRlyvYAPYOcd1FmAXV949K#)~r=7Bf zO^jE&loxvvlf|p!JUNJTX4-IDp(oA!lc$=57`YXfEz1B-b3ouFYBqYyc!KId--Msl z=%kzCHdv#uG7H$a=y^HXJuZm@1A9+HkLBVbgK*gZ@f$E`%(LFF3aOrO~K z6RgtvZmo7nQ(F*%c=)8j-alY z$LQqBHyXbPSGJ1ENi9csvQ1e39m5ZAbtc=nJLTgvm~Gzj)=QlFH%WR+M`E_{M0u?{X@s8>@Y0Bp+%DTgi=LBdtQTLy)Ju z%q|H<%y{g|c9WKSUrO-O)>q%?v1hJi>9Zz;UKV43p2C2Fl@@q{;D00)= z6Fr{E$P<|Pir)q0>Wt)aUu&AFgI`<)uQWz7xM)i(oBF2uCfLSGS7OQxGe$`mn3cq%BeQ<=HKG6Tx6k12VSP{Act zv+4~8cUy*Icz{9ZHqv}$ZqEw1H?WXf#O=DOux@mshtL4KQ0PT3F8v2SKg`%0r`rnV z1X7v%?AN4TNg^uuRj;~@rr2ehJ+ZU9!HOaq4V8#&iYbbb&ejEdP}QbMDj0VUm;3#h zmPCWwXp06Oa13h=fK@(7B`CV-4=%eg(PdiE(16_5!MsF3kM8MyQnBbCLeX7MMCl`L zEpdRH=a{7x^3k4@Li5x23V8>4Fw#(j3kOL9%!DQ&{ko-fAPN9)UL1i4q6rIt7!_nPwLCgQ?*b|iJx3v>2sFN0>>RlIZ zB);cff8JDu_DiV>2~|Bl zsrIHWsrasl4+33HOeB@+4++7^`-LUqCI|>|!i0h**&T}{8zHTMh^Z!-7tfJ3q{K#L zE1I`%iIv2Aav=VyYddJNG*1CsI)7CotiZ2bmIp_at9uVHzPGu~=?`P)3Gl3K+_MvC4_<+jw3T2Z2Ler(La$gPO7jxT}t zkcGa~bei}+sphB!g4Wk@>#c+eg?kDS-A#+Jj_h<>?8e;cOt-eC!P6X^g1aY;ncC3x z@i42#?`3^{E;)uYgpc(=B` zyCTms>`TI>B|}w(>yW1O7F@R!nv@1}f!LQ(EWXY!y+1rM9!1QlI*gSTXT^*V?)o_t z-%G8y%T6&oS~OsCjqX47O`OGT6OIAO!hMZI-Wm&YPq}V|8#&&sy3GG4uA3Pp>CxNn ze+ve(yAIU}0YJ9@OZ_?HyEYgl?{r;u(PML(K=4=45wO4UtRdmQm@M%yuqUK|H}d(P zWcL6tM$`&)DBBq2{e~bp3klIGQ4_5Z-DmXeeV-;Si>%li*oANQz3PjXt2#xiBzBO7 z3!b1WNP3b-jw)8H9mzDXQr29itW+yKIgQ{jdo||4McEsu>h;ZuWdHbKe1D8mfo?c@ zSH_3sJjKWq?~QF|M5bKxqn(5u(A)!u5YA^8Dcbkg`?g+eVGnfeC?1v1|Cn&Kfp|wE zIDZv}hJR1(UkGRjYRa_*yWDP~^XS#YMP`p;5ykej9!u=R;3jsdTr@VRs&XKgNIMG2 zq?RC81VXDvwFF8Kru2RNIi2)vY1>%PO;NpAW=VYoUa5>7)J##%Li?ad2KYPM^vLqM zSkuK>J_hoJjhAgldg48act*^IHGn5O=mwVd2CYEnidKpoL8HZ=rP)nMV{zfJWN%-s z#@8mTEH78Q>0YxMz7t&6ENM{rMlTAX%R^+cqYMVQIhiq>iBg3z>0G`sx2TBl3F4sG zQ@}D!3Tgg1I2)_^p*q*IXZ;yk(Lf5QU(9E75Nne7Bxihw-xBr%>eeZry2sgbTrq560CYnfb6e)JaF;}I#(qc>oa>a!Qo+Hdf9&*=Vk$lUi-AE#KIwG?c1+6(I}KgQsy-}FUGEbO}Kf=KDHu-?%= zDJB8P?Hvp(d(cXyze{ZpAf=VXp5lj=UJg+uUnAZm5Dx^9>6L6Zgb#8YlVj3mva2zq&7*(|H;ki^(XPmMM zQ?2Q*KFSi+e)HLbO?3%;k|Ol3KG5+YoamL96; zds-OyTBB~@xT<@Z&Zv8p6WqHJm`K>^#Gc3?4e8D`erouZF)f88>!w| zh-X2B)8~p5skUuv6P;rEpT(0XXd`ioDMNY2!zP5MKv(mqUg9`jzkZ%gnXu-~ckLY+ z9)dN@(&HE_v***CtQquuR$uWtcV#u}A0dQLXzU1Galh|FeB61H>A;gX5=E8Sf{Gq~ zYuz%mYBy!gNPseZC4S*pp$-Or467#QZg5>&y*!b-^4*>Uen;p?c^)q;n!kY9Ks?OzSBehq&GJ6R)P#aI z`}6FG*U-AB>BZn}rr6`5h(6vrnuKAKC{gw}AUV4@Ny<~*u}PMYW2zC=72)#7-Mz9q3rBAyv z5WV_R5*m^XF7FG2gXGfI%C6GyWaSm>mV^9mp{>K$Zw*yHORl~MDz7+`K!YOTPWI^L znC2AIdq{cZZpqSilZj>bXj8WJ-J*S zuvaEN&Rn_41ce=+h!s|E^PTn_+O@#;>hFIR7XCk-F8OmnfZm*~Rmp=lKF1w>A8-fm zd?0=OwhTZdZqwIR0;d8z&k-?!b9$%Ii3HfJ{?9M1xy{o~QS@1*eRZ)?F`uS`Jb8at W?VA2GS@VE@zTDyddCq4CPW~TaWXa$F literal 0 HcmV?d00001 diff --git a/beta2graphs/SpGemm.png b/beta2graphs/SpGemm.png new file mode 100644 index 0000000000000000000000000000000000000000..6b56b29055d35ae51189531de1356d6230402743 GIT binary patch literal 41502 zcmeFa2Ut_t)&{J@C{`E;1(kNyQEU(u=_LxIh*V4H5CsA0y_XDvpde%r0hK0-APFJ# z4gnERxnp5Y+*r3RHeLSDvsaz<#wben`(J07Z;r7kJ8Zi1WH|c0 zRPbQIj779%=2Kn5>yjqszqb^pG_8C2ZhgYZScR~6ao!QysoQ^jr~JN-Qb$_={?TMM z?eqCfX7Jwi2y#p>hz4aGZi+f-ekC2Uo&MG)srx9?#Sc9&hsZL1OSpmD%Q#W|pf(ox z$UdL?BW(y@#&61R)P(4#?X$m_g?PCVU$MNI!ce`Q4^6-bSgW*Y@QtHQejvKyc6Mv3P#YII+w-H0Mf`h|XJ zpSn^$sCySSLa7?wexOb>3lY78lb@Af<+x zjsKv6fEqUag_YBbYIQ`ybJ*E5X}=X+$U^kuAI97%W3qJL;@Rt>E-g7XiFYHUI}3E) z)vO(DNzb_~fmJ+M)<})ur5z4BZtN`K`ADzZBh-KHZ;w%%o8$R9l&8ljjs`aRShbk* z&0rr;2+EXn(iHv*eG)7V&k&!Aj8Yk!jY)j00t;Or6IzZH%}v7hW3r@w$ya3lP-H5P z(~DpkgFO-{I?Qg{^NPB?J2wfo3CgF9oiHfyJfsYNRpIR&&Ge6;Fvk?zEc>Dpg-5gEFzl8`W$Qx^Ny~`b?1qWLRR1Ua$;9fai zK3a_($s`<7==D}VMzKv)-yYxRdvfHUqkSbOY$n!7xGSxcS=pTLpbzgp`gp5Z3xbEo zZzuN6jt@<0o&X_#Vq=r~_M7g4zJ#-c`Hwij9t9eo=syoUry6!qT!uMcV zZgsjST(8UJTEA?>0cE8b(DXYehg!QvrdYRtjKV+7;ywAj(f;wQMOt&as&dUCqcOnh zB67Oyc;X}LgOtHwSVrWdK&Ju6tmFekwhorWH2dQn7Ll|5t_#I_a|>O=jU%G=FYwmS zVdxZ%b0H(b(gi*0`ZWO?iP-_f$irPR-MT^i zy!9ucag_-?$47cvRp1f{;Y}z?TQJ1!Bb1$KG%y<7P=0rp6kT5POaxAWQD3o zIpfKG)~5hNcPi$47VW}`VQXgX$K`6eTvih|KdpO`Pf|M>e5x?9uFgtevU@@FVSY{1 z#TH0pjo-|H1EO|diAay<@EA*Zk4o~J0ynkh|cqWSLF_AADlVKWb5$#r?%PRXd4T%xC{_K{Sf zLvRj7T~l2=9QCX#kGd~X0dOB4J5|c@vNrv}cSR{7xem~-#xebmw*mnAW;ndEFUj0~A9^>kGU^0B;8-PAQ(R27lbJqk;* zHp}ofj}K5EM+;;q_hO$zk2Wje>rgBmd#aiBsz>=|Tz4C6=5(tD?xGu1SH|aW1+uo?&TM3plfgd@ z4DA|9b+^A^CZDYwR55Z^J4X7oNPI-^k!rA&hmk<*(}ine+rl>HUm?5-+9q3BSB*ZF zNN{5{O~}03q>|U=@%TFaf!M$Tq1iYL*YRZGTJAdWtEXSU_dDxns_k3&15f~wm(X8` z&@GaaLaBNGT z$^Zk-9h^0-6{Ya#Bqo(>exL$V{7eCb^nG8K2`D2W_s-Rg5%b4eUTUFPb|%?2xrt@D zBhpg(F5E;JDV8L;5+?0}uXtccxuUnIq655KZ9HQo4GC1LQ^GN6Fzg*Xsa2vMcVj?g z!~kR`6sb{&Jr;I2HSWapx#CluQ&iFphm_uMCM5%M@Ge3JQdnhlXO_`pcoy(M$R@BH zpqmLAJom4;P%XmlO{_1Jj56ag70*thupb7i8`V{Wdjmn3qjk3_K0JVTPpV+ICe~OU zRX5GYn*Je~p6LXtrLaw&O#F&^rTe62WEW_)$9fcCi7Ox%E^v=rR zDcd|>L1fjmqlY$=DmaU8$5r-SRjeIS3^3-RFqdQ z$dk|y^|nKK9;mUZm7~1gROe346&0(#G~S<3I;)=acvNR__$52nX8sP=>Rq8{TP?Au z%d}tD&v;V2(dQ$&x}DwnI2$d{!hE~MT#M{V23+~^C6#+_IG0qHx81RI(3?)GDKIii z{>CaOJ=1sxxS2g5vv zGI^82ZZqD6?UFKwRMZTlU5TRFJDlOX3?`XTi!Y>FEdo{=k9aGBU7|+6QT)_`XhA=7 z1!ly`kj%iM`xdmF=hYlCcMx@l?rLLc1Q@mMgb4%GIv=1>#$lOa62+>tX4G(ZH!hhi zzX2bUO+IT5VWl#y8|M;a0NV`00P3jIqy8*=@e`OCZ5_?M-(cQFC}_$PN=H z{|Pe{PbXw)$-kI{IrFtX%tLJL4J&X#l4oaOhdgPSvKZEC`Zlpr$&e#%=NJPMUmo?W z1%x&yI4TMo8t)x}t5<;c@y|RlcoPF#=+U9Q*No@Q+Ecy_Fh*A<#xxhSN#igYvZigl zFBv#BoHEdj2XF1kZ?MZoxbC9okxWa{i86-}MdnhEc~GM~z;{rl)b)P>5{TKl^!9iM zB5b~45@(PthG_+pvejn%83MH5wDbY1cL-^xNBVQJmnn%uue(Z2+XNT_AN8V z>V7n8b1-cI-x328W>u?le4|F(^wsBGkm6RVe7@X+83e9wSkVlCF*t4@U4ND21&sbT z8PUy>Mhxp0??A9<*Z2J`9OpUFG8|G?^}?&Cln~7Pun!>fQSMN>u)ZSqu$3FuG3GgIOC3I75_1FSYf)({RL#{4ny&;imSMH`1v(?J8a*uy@z%ps*) zB!zT73DAL6I9}lAdq3s!AJPfSVyj;OP~@XmV$clHo$cxE*B4L^lsOC_@1)`@r4Zf0 zjSGk9>N>cKZqE*^JknpP~!)tNx8L)$mI2Mdn>O>x0ZwWx{Nx8LyrVRi*-sKEL2Z>#O0{0S{h$sd!k687IS9U-+ za3#R=1ite@|eGW7Y#woTTFIqu;&Yq7cxGdvA1vd~bw{QdOtNJ!_`??@pPqO!dd; z(#i4C#ePSIZyD=87)lQd?~1>>D1!{SOan!;>l-YueRq}Z8G?2N(zldfL{h%1HsXwx zD+IDb4KL1C$Qs1zLQ^BgfHA&S4Z^{lc!zhRch5NO! z2YS}FqFuQV?D@>rCqXb7tTD zYcn}98>INCYDLs=j=?Su3$=8J)GurE0Jv9xX3249A>dK;SK4FLFn*0@PQNG3WjIHc z8_G+jRnzR&u2Gdv*@(I^6DyjADd5=FI?1^YY7S(l7t!{GhBa%~Dl039Ts(#VZp4KN zt=;66+YrjrKXjif*$6fyznKWl7P5Bdq>~1T8XW1)tz7$j6tWQ>!E1M-e3p<3uxt&@ z0IR~hHnX+uQxl@WA++sl^p1emvbZ%ZfYm6@Yjo_unDAf~Zl@RhBlOeGtnoBsOGp)F zw!0gQ0JL=98czc5rOt^BUPv5fzO{B+q6W|iAEr~SYQRXc%RiF?|36-y^Ff}ERR<&? zJ%ecckm26Bxef7|n`AXyOg&a&6|FNq0h@pf1Ra?J!NQcWgN7Br#6F9Ts*a2K$O`E7 zV#A*~cdW68zl!rbm0u&n%)d5q>;M3w9$>8P5N=-2RW>mBPIy@*SLp#afZ z24<>ngC&{*81u8R`rY%`vJo@gkQ&(f9;TIHRJQGFkjVa9@BWvNXZ6Pz3Vpc-rcP|6 zpOd@+AQAq|G2Ll;?IfLz`5g0_uHPZBAAr6e;fwB80FBbc*Ho-eXN5iozfHgFXI#q_ zaEk~^3u20z+senhgYYq2;j)r9Ax3!U8Zr4^U7WrhL=o(SKhOQxw#2#owsmb(YsS0EcjXS$n@f zK4Wu8IE;uXg^k02mYMbMiUol961c3v+mFY84cM123;^On4q!rnH1;m*<6aHL@7Mzn zNI*X%1TizD)Y-qWY_6ZXAx--ZUAHV zZFIyg`<#DbG)bso#av^6e5~jGv6~h__-Q3rb4WhZ4@oB^3qgXfv!7Ez86m|1q*Mm@ zgG)4jIB{kMH*;6SA!~&^APqKcl6qK0PtTW?UQf&b>5rMmd|TnVezlb60CM)gJZ8IGdTRX9>jCx$ z_A!SruWlArzhuV+0F1!-PVZbs;{DMxn?uHdTyPab|Mu*F46ShWbuZZb?!Ev(7VLFk z9k8Y6 znS`3?29X!5b}XyAif>;BsQpnXQe!78q>xn3q*9g)NwWJGP@4yR(-VAq9xnZA4%ge9 zS}eoS}3uVJ)#s6{Qyi6#sT z$PT33M=!?6QF`u#4*6M#?+f0F~ z>XtxYXC9no7jspqXNp$Vx5=JwVz^46*$W0UgUG6{ITa#rVa#PQ)-a5MYnL=L!Xwj<)-O=I!Yy zr^5s=L)mewFg!qUw}Rj3O@;|J=5f&Rd5xuxy;UgMfHHfa@y>cXKqb7pzY2-dl?|cT ztWNs<6Ty5VQ%!vclpcOdUJsB{+6`-Ffd1KpSEsbf^z(q;m7I+LLuDzy3W2t|)A(m1 zSsg!c07UPlO}%>oo)3`a*PP7iG54RT#5dv;K&_wO=VL}E3Bv5Z<|4o0+y7e&1`w18 zdOPTkPmWG=U_GP(iu8w94j{jfd369q`s1?%h*8Mw96+>ucL~WGWg(CuOb}hwikerJ zIvBTRbngH^zOOwV-mwV8G!)p-Hv0?IY$XK?(58!hH1uYg?@zy|L=^%dqYu!jAav$R zvXmitddCmF9qRiw(|zp);wYsBs2Ud*x3sV1lj!1Lv^3Fw(9LxN(=mDxLw6$6W);6h z-0q7&G{Cs&#^jD#S$O(MGyM**^Xq!DO;(d#iA>oXhsSef`2WbU7~W!V;F4w+HGeEm{3|Q?Q(TK$ zzea`sDXyR5`cyUhX^c9Hjjum_*HVR;9$tUC`XxxRHZ<~6TtCJ2sb>1Wjk}<~0BHpy z9a<#^r9*k3X+kgN{8B~$HTzh{|2nPMts$@O(5paiYGWixU-J=bn(T@$jYVkqKOW-B zPMD2q4G>+@??4*n5CgP?d?RD{r#4F8DzenE!pIt4hSQt#==%s);SF9`owy1C&S*(k z&SzTMKQ_uNCq*k-ByM?y0*ydLzXF6zCmv{z`7SJWvVzd4Jk2w+P|E2k(W&>#+d_vd zkp5N=8T%vxR!Iw1boneMQ0|-Q9dYy&brphuo?QZGxO(gbx-Ek8Yr0{6G;MQ;U(6SK z=GXt3vl`a_7o{~>iy98Ws%7R`5xp^<7Uov7^v0DP0;{9|E4vUFdCtT^dJm#BZ#9m= zK>!`7jNL6OI+Q*sH&h!tsd6DeYH*@Ig_-DGikkpvJK%d#nC@6lW`1lXqqhNld5vq= z4gwvVAbR5#BSojvUCW8r3JHITk{>c5V_V!y7hwdDk1ls;0gYOWjWR3yjaIX#ORA)^bWiII{)7Fp=Ypi)~QQd zr|1as|K|1S{n1OVZRv&1d@Lt|J9NxBI64^KYmvtJ)Y-Kh5&oIGUF=w1>KR&oSwTNI z{L0Z$R<>w&UmDyRz2y=*{ru&V6#|M=Qs!ur$}qn#?d}KcoZcn9)KIwevTTgYi>+hM z2$+j*_)|mXDw=-y8|MCVIo9lUe=f%|xBAH_SHlvYo5d=b+)wNHhz)-_D>_8^(nqdv zR_>R+DklMffWAv;>E#o4Y(oU~=;!COG6-A-_r3zQEeC?n1itelmJ=v?@m2lrUv<6o z{WZX(R=jNam=#BK`~O#3(Y+GYHbtuj#mXWz`L++c@uX7 zrrw8OM^1cJrTn^e;qygVJ8E3vvp#7#kmqu3!|5MHB3iC5A%I0}{&l5)O|F@~|MGKt z{nnAX122oO?Z0ovUrY4!0e?C|=CLx%=4!k_3_I*z0*H)U0pOs%+={d&_x&|?{CtT% z9YH$%ykm9#eTwz9Blvv4pNjV34 zzi=_10ge^#VLe25SC(B=gP==4Yks)+uf?`iEqB#FCJ%eE199oYiQSdwX#X4f?eYZ1 zpewJ}cmEu~XA;<5`4I+wzB%v<{as1;t_aOPU~o=6gHSqZao9cW^cM5U;sty^ZbU7eVdm}}68vV%t17bhI=P&m5eR9uqNB#}q|0B@_??q)4ApFk4I4CQ^ zoBrXC>BSfOPCt1|x?g3ui&B8T0vry#vt=c<^aDOff^`~AFA0cmBj*o7KYp4CySR=P%ElmT1dWL&vIrOdhUfq9Ex_RKVo~ z$m*X0TAAucp!dc8=udEG1u*hEOJf+)iNSHi2Yot;+OV9>|2K_#$;3;6DD`I!!$Crv zP6FJ*wISUly7fn3B;gE=L2d&J9zb1z+)qCE$nAcyMg0>j`sB~wUa!X--M{D|5cv-Q zraxtRnHg9i!Bxfj>5q`kRYLEQ!{MNOEEav&>;5}6hptY5&r@G=Tgxu^DQq#Rns)k_ znt%R%1D}VYrK1%W`XBf|z*7#M07B!A&*IYZ%DwGgTYO%`mn$!X)l~%O91Yjy;>Oxh zIsKC>ba5$IFWcEU*xDl1*Gf&lkcao8CN+0{oY&_8?t4c+2ep#6`yWdRFsxO|IA43* z6?)Dv7!f+`P#D+m{Ajp365B)@N`7i-D5!COv9=$O>Y~p zw+;t3hJ40#{yT)WMrXVt)X^a~al5oVQr!G%!uDa{IT+j>N%ZeaVSF>x=g?}^qZL8? zqi0ye&;sPoI+52F(d~Bw@Nd(n@M$&iNE>@Sfjp1aL(ibx=v#&U&x|Y>=w)EN9Do|0 z=m%&)8U;q{E{y{#%76~WhTpFrdEKsek@352NIi{$p=C4r1;*1L9#s!F7e4R--bEiU zb34NLy;4ag;&;D3csIh6#=9;ji2g2cotEIt#ZQ88hhmlP%1$y<(-HkZ13*+5+9~%9 zj`zag^FO99TmJs|%gB#;}*ZWa%*-dEdFl!d;0^?oyJJleczggIRx|vT?oJl4mT_o1)`8EfAdP8Y9wm- z_Tp$fe5e%4NF6Ul`vHcgOp`I3gi;PucU8?z*d{g7Brg=5TfD;acYkCYu{m6;Mw9{O z0Nheqra~8QhnZ`IN$d52QIdjJ?~RXuD(2?o3H*5s!7ho*CJVvA7{v4G9pa-BnaB8r zUzAoE3X1c@>|qfZzuJAmv#;9hIgqddeX(`35rpyVEHKO^mE6N`7C{#$AdJ(1(m=Q9QpRIIf8*%t zQ8_CEe5WIv_^=;up-$!TR#)IG;Hwc217l;|2K*wK zRsN2oeYZ5#HWuH~9{_Ib){6w^ENrhJK6j~VKlDjsD#bI%^?K1nxIxtc_vDlWG6GKf zy;g{!nR~R%{mVQ4gcAD(si;ftS!tS&P4?OyE}Bzw&>;-P3MQH7M)%98z@N<#C-jf* z1(Tg~i&6;28@tl77*YikKRqivjfJ15y?-HL(&>q7df+ig907u!m-|4l?+(QvbzuhNbm_L&q>|e*paCxp1UJq%w$we*KTVUOT#R|-edhv)uiVF+=P zhbIPT9K*ba)AHSWp`CjPtV<&o=|r$>tqA2ruggk4x~KbX)#MZZ0do6ux2dp9r4^>_ zaC0nHiYtO=rm>^ctufZC@`}gdNcaQvxy9k{7WXU(r_FSe>LP2qXUVAR*&37{nZ}2* zll;IlS6+tR>1yYq4C|GE4iDUWFZ5(9h*0U9FgYg_n4EiRidjn z?YTh^ykcOxjtevPLp*b6twfTnI_p?&)wDe?3yNk}a3#l+%Z2A?U9Uoj+Qa#)dwF`H z>p?L2plh+R&GyG6Klq%=aW#7nMT+}n-*d4TB)h=#Z6LyPTvVOK{2zW!S)kFDSxh5#(Te1lk{`RecsQ-pSW7H zw)rqMOs=kS-d=NY{N(hQRW*OVbP&pPqQhoad-NSkCfONh>m;o(2U23vaoONR_()l| z;on09$0HRlyD=5shOdg12jM4g2XQ_CKc03uci#FX(}*G4?)UPAf(o4_E4Oq=5@*ZFqYDm7%OLUXYWVLltSK`qu% z-*A3NG5V71EzifbC`Tn!FI&Vk-;3MH%5F|&!bf@UjbGQ~^RHLeEc;z4oHgJcFK?4{ zSIv~3@($C0^rU<2`5IUvr3k?VYRve%YGf$=Ku4@WVfThcWA$<6&id;fH!UZFa7&{3 z+hK4*=`kj-oKSjE*g^$eH7RC|aRU;K9(%=(C?^q%^-!%}feBxK}R-UEg9L%n(}b@0Deb z)N;-s$r_1IcZV82!x5+3f;(07w}#aP`Nbb99T{pxlpalVC*Z@gm!hFsY-4!B`^o3g zySsFY&Li}89f$te+g<9#!>1cFIPK|T?3(ai1w(T*@ zc6dK+~Ot(#0lFcGqnqOk~1U|H@bB4#LrPwqX#dmA|s382hZI^m6AhBRqEak?PX<7 zd5wcd@Qxd~fQ$Im|t(spp8{?~a0<@k@1elr*dQ-I&-JIcU>BysHcjl{;BE(O+zR!-4CZ!r;Vt zyS`yRo2Ud94ZG>`si$w;u{@vwrUN(UuR&6bcFi}N<=xY`rz%FUJ+dXmQw6~+Hz03T zWf-F^N54|bF3zNIr@0FrT~pQZrDQXTyMLW>>jsL<;f`TES?u0vKbCRy@8`=ar+N@Z zQIu<&bunVJt2|kCtjr5yCkpk{!@+HF;IsVp4*ulpBn87V(@-{2YTPE2g9gl&YC^&8 zVF7crdA=h#i&VLZv5WK9PPDY6CPj^q`H15!_Aa`fMuO92##K%&;N#roeN%ytVxV0B zu%iPv&D%lU!B{891*#%*Il5uOl{(9AlvSk3+6$V)Xa_N#COSgAD$?JR~Yiduxt+W<$WS6T_c;nGS8ZmR;&y&S35TB$L z#j6SOyt=Op7C;AIWj(418lMX8+vyvArGCP5a%xV?s--T0H0raY7(8GwHwX&bXHS{* zI;myIcCLnW->UzXnmC9r)l=$ZSTe2V=@9XJ>3W8n3EHZbRNHT8gXmYNfgmGa@CO{0 zf!^5Xv}?i{edpn_NHXgy`tH#b2VFPwT1m1f!8|S}_r7+16s;gtB#3gQ>&dWgihI_~ z1`TIN4eAz8PnRVG_iJo__@9Gjr#*9{C21X==gNu+TX8YKV;M!$$d_ltOkBzv1JmA_ z6GpJV++!~9r?TD$ACZOgrFFR0IOI%eddWLWd$cc#}IJKS>pzLABWL zv5~~!aJBnp!A?2v>?c?3UJ`GXe`^9dL=$NQL0s z*Za?HR12S%-=S595O_=2YY=Qvv*d?sxrPI-aJ_YKNgkeR*{r6lKqiT7&U}G0&XTQA z(9zlL?kIT+^p`_%2~HBWEEbIX+}J);tSlOrpWfmO&(^7jYjmN}CT9WldwHCVs76Z$ zHSlz^^1~Xr_5nDw(N{>Y)TH974wa5fZ+Ctg3YOmC2ca+#8(;P8E$%8#&L&%G`0 z6ry)tB$g9e(089lN@E_B&HDbao?P?H<#<)e{2)mJW{S41?@b|kDCPJ9PW1R}#+m-7 zCS?vg!;Kb5{gkJ+N;XPWv|=ii*Z^mZH(;~7F=={(WFk;FB1CjRzLThm9p0dwuGb$N zf6}in-xA`1<7&Rwg3e)%232?;<%A|nPlRQ5vP9M8`gSD+Jxi!!iBfXi*Xh%o+(epq znB26G=W3nTC7wm?l76!+fYNL?@)Rp{uCj&e-fV-r-yqJ}uSq)ho$P_xqWG#j{*hDY zI)DkFH5eXlPRluMFl23Sp5|~Dm{4-qyGbm;kmy>&CpdYJ>x_$w7H|q{^oTI1Sus_S z8s9I(0OXV_*y}?BP@?t|qs;b3R$K|Bq*%MzQP5sApI9h9ku%O$?u6`rAYv6&bg1yY zrHq+6LQ)tyf%2|8sH?8HBtZZAsWu_^I_9HL;w_xs?wG);Dx77iaeU6Kkgk+$JnwYN z^|Epd(}D)_X)Q?U?aPNU>Mc)4MfAdP&rQp{1#x(P6?R^(BTSZVN*0~b95*%NA?w4l zWNbRm(UEZtO%tN`*Iv6C)64|kN^j&Xuk55Tqd{v&l1%9A3(NXpQQ!+b643JKxQP1p z(4+7gum!h{z)+t4{+4O+w5;&UKIC*WQr5U6A>1w2S)5(T|Jk6&xLKa7Z#7sFxcVjcdvHL$Vd*P?mpmCcZh!4DO1yUB4UswUqx1@1 z5k}ORE5b*{$3k=(8QjATargWL5bY%?6z$$G7uJC|eDv|QaSjl^g$Y=*GW~m7)srqw zcJuJiOqvZl10Ydhg$sut%2cUrAM|GGH?}GC6-j9CTX;ndX>=PA%o!5u)Xp|}6o8X{ znbyumUu7V92^x`1Pwi0?OwOVN9A6sd2OoB}p$-wx9WjGmmwDxQbvz8!-AyhFs2@Yi zU^*}tMOrdEpO=TTRZV1i6T)N3P)lA>$)QIh1zSAkGLaLZAugE(_(mX)w(a5(huu}9 z@op1FkJ)xsunK^}LA|zuN)ai1zzq8HAXNUk3R9C{9`&a>q&})%n1?f@us5L|o9XOQ zSWegS9Fhr_0WdPSg5`u1vLXUNE zTnhm_9_Y3(n$LMxdlM~nkh!vhkN?A<7f9*dtPl!KC}G}nSk|v-%&FM@RHh+J=RKbx zzCDf@8+5|AfIK*oiHaVkmZ%3fkh2FPJ4e9PkBoDWi~=zr98RvrUdEg-q23b z$d_YL-O61%JAuc%!n+4IbL2&hE0amX8G83V25_&r)D+eEWTEarsFpURA76)m%~3RE zmHPx5IqjyBtsc9bQjt(Kf#G3o%2v^m&gH>ywu4N&TQc%$2`!|Pgx(PyU(ccWld#tY ze%Y0FS^}~b4WQe+1K4&o6^H&OK`1%I+u7_v#h}|PBl0sG)-md_r5ld6!^1}=H)tmh zyWez3jG;Zjv-^c-yMn_KPV-2M!uah)`?rpVdj(-GJki0TC!*vl#_h+z`sDjopz(*> zX4GSsuxZgAawd8KuPEN7nu#&Cf1n;q*gjtX{+MY{Lhgc2G{mAIwcYoz!N%x{@&}tE z)p4jt9A?^5*x`~}wo(YLf+tljARB`$XkYkUCO^zMmGZEDn$a{7_xp#pyIon(GA&3U zmKiP+SBTyvOde;Gj5c6)RsqAVEkZ)p8&MgTfN?7ldeG1QS*h3 zr{4)2_>?(L71rafH^0Iwl5^^!*=kweu&^4d3PABMldrV=9e(>Uo27DJD@xi`TUGQEp`4E%BDF z$8FG#;Y2moVZR*((BAqD{A6~voygcX73z*GyI5cW(f6(#H)v8B5DJXNPaK^-+3s)h z#72=S6=hS#I_hW|3x`X)1%j{+ccJ?JM2{4m_D7Ym(&4ke7wxQsSm3O0jgqq(H-7(k1`5ki>&Pmw{g2B$5eCfgoo8cz~Vg`D#yL4F$b9ZWYwAG z?E6LLCZ6SQ17%qjPx0G6@_cQ7La&kanW>l%S`lc_6yADR-*5 zn%W+8#1Vn@CYMNG#=YLaJ-<_NM+EXxv}|Z;_ftM(7$+a$ae2L+^R7HSluZ`)ezQQg z-TIk<%&L{<3Tr`)!R*{PDeFL|zE#zRTH#ggcKb9e4Ger+je z3ymodIfL^vD&Mk@3Ym*-k3&k6uNUwqcq5VG1>h&y%`I+3`Ygi4nCh;ec-)J4x{vP9 z7PUPad@6Q>^C?+|&89Gnq(=8B!2`?MbD+brGb10byuyrNc@Dl{io&liCw^>n(} zZ-e0SNpEk)*c0FHZ6BXSH_Qt08$E8(GNWB{rwx|z%U1EgdCO_$mGy3$_R#Lxd!loS z`xWY3qt9GY^StpycnC;shx&2W=492$nUK`MJ(2}lg;xrM2>KRPjS*0FR^?KuprW4o z4XDV`GZsQTpaiz|_Y>KJZ}JQ&H}o1AW;A=g^dbvbdAv{q0HfBa11gGW8%1!Eez=)r z!DAK>^VU3_E(Ox%!vaQva3*&L^lFbH5q=4zHM(x2SEpfO%%|@XRzT zk6oCL8#&f4iM!CjHY+4|q_IerYfMZh3L(>2QT!mSP zn%~jAqg4DxKQa~qD&;BA1MG)f0;&G%;4Z)?oU5|^P>D$-L|z4(Ta&b>efZ5jsv3_4 zmwB#6)_rUhG?ralFKI}SFrvRtokP{{y#JGfMWnjmTKn9B@QF-SezQ^z$Fj-#KJt+y zQrEWAktKB?tIyc%&4ocKoayf<8IwCgf-pT8(e9TCx{c_NdW9kB>0H@zX;QAdWljJ+ zz5`Uy-vGgHFi3`3OOsyGJ^-W%z=MJ9Pw%?70c8zz^?)Em6jT- z2;YuzVoocdlkp~-J-P(?n=d%s;TEOyi@;^{b?~=Lu8<2hNo;T{FtlJ>;=YY6K+lEj zT`R+3zuHW|53nzvj~jc636h92hhS=|PKq?UWzTWW4>34rrbzir9 z_Ya{!F`!(LcS0cZVOByJ#D)4~mX$)GQ89K2!3!oD9hKi;764pAYbCvUb`lt6rcYu;H^3 z&ZnA$`}slgcq99K^TzL8O3tS*27Z)!1o2Nk1w2wDa;a(*K1HAxi&os}&%?m;C79Pa zE|!Y`1q1VbR>G+?_6c&snmB0{S$=oKiZ>^Dk*1_N@~(x2xZEe-sL%K?@}JMEAbJ;8 ztBWl$6^mu1|3HZeFiJp|7t2i|o9SGmN-Kg|M_Z_?|AYQe_hjF4k5)^-pV8Utf61s8_4neOib=7^%ZzN$Sh0*4I+Q1N+UpX$uL6F6o!u-LxIR zlhvOY)OllQ!wJF9lU^Ws3TnPt0knZ>V}WPmJ$8!gFS2u|ER0OR=#|RG3=VGal)J8! z=;b|j=H$56##-h8gLAmyiI9_W6E%gI9@FlH1U+t^DyP0^^V=WXC+5Op?gi~Mp11}f zz0Z%u-oah#h(m_(wu;lVXiu|F4HS#WX0ylHZr2GytPv&OxE}p zo|wsO;TFdyTrZ+7-{0*20iLH+cbUN1i>gR4z^XAnq$I5L1hTvM+ zMIsWMCtMp)vHtx;hj(|n=Get#CpoharCQ1d0%dRd4h`lsRB6bToT5&Y4oB&`Vj-;3 zJr515RTElZuo0&Ijz;4E;gJxx6R^0cf#!B;CJ~jaO;BZ}(K4yFO`X)92?81n^AAak z>iUy1xH)!f-JL0>tSu^tST{cK3CyJtOmKRic5Eqf_||Arxi<=Q-@_UUX*$F`r*&8w zVo?-Ix-r}7aML5V0C$Gi^XJ^qmI}1V!RNL{rBJ{$_msBNSW@U!S%S1G8nfwUhd zbbC@fh`Bs%8<@od!5o9Oxj#}l=fmz_$3DLQTA&kvd2J(!taeD^TOpp=_qYwZFja5Y z%lvM3)N~pA9ft;TBb!8QW-XIKixw@A6*Z@PQ~qRwYD(r_{R2&4U~7VscmYZ4Fc^?q z@Zp*6POYw|mMow|o=8pOy?%7D3D**6(k$kGX!kYGvBvOfW4n?wlB4c-_@~#+SAA^7oI_aqMC>9Tye=^)e6dgNF-ez-Hbk4Ieyk#tmHw73>Cwpk8^QR0EHSI-kYe;2Cv)KOv-h_)6CA z+0w^?l*n=S4zpVc=DH{vANH&QuX`fMd!AF|9bCTZx^K&dDw$9rjWGNzHCAb;UG+%= zWLq)$HK{Sg0qY$lGTSnP6NO_Iw5X!AODS#w2ef0ctVeHpoKjhM?R+zL@3Gn~y8C@O zsv- z0hx+y^-@WdmjYLaJue%K{<7l1#A6=}iz+-x2s~lazfe)C9d05z?T(H!w5UqwQpIu9 z-gruGu5D+z3Qm*Oka)ukNNuxj`=D&UmxE$%B+<$5HSX|5VLt8=wKrS2N-jD{&)+b1 zsTvS%>8&6-F^_|#N4t~nnhw6Rd()|rJPA3uGYv0&?O296@tT!ta5)&nQDzR8qp~q;x`D9cqNs5*_W)S;3dCI%& z*Z!*8*tqSnoOjKsIf__sL5Xooesq`uTCKMqDDpx~t#}IKIgj)sZ%-btc-dQ10X=SU z!`LFcwyXqeaxY0*wU19QU2x!(65-MDtKIEeJ4vsNUfGMQIM%+Bt=Q~4RRN9WE#1Nb z4hzU6esJEh)h~Ruq}7hNIp_p?l8S9Pc>V@ohPkxo!bD0aliAVOGQP@pQMkRk!9Mup zsE%stl(hDwfx&{U+6H6;*_ijLY=EXZ_<=WAW0%0F$Bp+rLQ$<3_8czfePL8n5Pfx` zl|-=anG8q$#gM5IO$+P%0TTm@7OxhidZ}5wCSFD}^Mo6bJ!{ZNX5P`D+&1SM&USM) z?&*&4Dr(P}=FPO@pQ87o;K{YNJ~k>WI`u=|rO=iKW4lGz(S)tFn>+9I0 zpDTX0P-Yr%MV6PA((t%)7s@v%O1+htm(n(B6NCm!ysVeKD{W5f+1A~u*A+XpL!(f! zBDvE$QKjUq8Ce`>V;-79t^Sn_8Rdsb(##98a#4RGEGS_h?Vla)gjd}1aQ2!=1~quN zgt=4#Db#v6y&j7{y-%PT@k)TR%cFRKBF~#rgD4wTde7RVd|f2NyhyN?#J-o+>VUh_ zLHmW*TO?Yh?2XFV_ROr;#;Z-4x*%gZ#P0~URVMk^=K8{uHM_XF5&ZqNay(oG@%qOM zZ#Y{yh-*7Aq4_*+Uw~}TFNqEJjZD#%>&#nvvx|=d12p376VW4 zG2CiOg!n88o?_OgK|VIXvZ3sJrM8bjWHqZuhF2b2hU{I4=0#<5xoVe zAC6|+)4YfbAF9vfOpL1Um50W?n4Jj6MQQ0BoOHmay*YZFiM4B zK*WwJ`JmCr&9Z)%G5c}$(nHjI$?#xu98R;{xKgBo*ZMwYmSNE9H6rDSj=E770@1ox zxcR$1c|0c^_~!DENg9K5|*_zUhI~%u|M?91`PC zDg8Zq??M9WIe3p_6hj+1!McF$QdG|u@MQ34LAAj&Q(9lo`GHv(rvxoj`C}BQA*9b6 zcUQ4u<`iakp8GKNlrnP#TgUwCSK{{1GDHiwa3%Xw^Nw!_Y`Oa4(im4&opOxMCHW*4 zp_8X|Sb{aeao)p0%IWv~ zB+tvWht0}U1hG3t|8OGkT$M%EK<^1oiAwS$8P}NWHuA|TKc_XMIX2bRytu8#R5_I3 z9r}{|#tU+D?(Pmx(>?CjjtgupPoH{-l{>UI`N)-xNBdjA`2+QFp*awO+{k3HP6k^c zjsF)$cMw?S{|=}|(})<~L;D!0dQ@VJq;&B_Wj zROY|2*-7rwvWq5r!c#h8ANqEE^Q|m{7~HD6~E*b zNhiAoQ%Z?9r>5ZcSoi@}gxS1g4#n_rD<=LHU&>n-X)m%C=pWN0_2+tXaW7!SUM;b1 z0R(p|hkM6VQ{GQK7&g;3gGA0x*E)81MLA5=N9o68*d9{x7vV|jLMgYOXzC}4TRk1T zl@8vUXQNs@-gZj8Xrp$^^jRsKO@`}HR-SS5n9`7ogAMrNivZYY8%~!vqHgr;jV-D5 zA=CC)CTaB@Vtx~+xB05o*$t&h7|3VVy&8yvtHidbUCcdH*TO2?;xrO)RTwpUr1nXJ zhg(C`*5PesKy9rSbjX(pNLA*fc)2!4o#!$W;UFIrJ|x~TbWdTy-GVQ-Y$Dh}*TVu@ z{i0OCGcd|YMBt`}zi|xlE{%d}+ykC+7UONx2`ID*uG8ktmeeZH^KiWF_W{&Sa+C^+ zahcJq+=>WM8({^W7w+{e7I(ZZdxY6ckkbvA8t&KOC<NuBaqWaY&mDxAq z_W30fwd4*BZLhm&HR&Jb$X;lR%utUH=Q<|qXT&uI9Vg4<$k&;-rAQ@uI;Wj%B0SG@ zKgHF>MN}OuFS$DgLh;wKv3jJjNiB4{7In2s`)&5kO1eRO(mF`EVRkvW`okMZJ!u!I zf#SFIWSvr#EB!-BqoRTudMYWk>b1yhJ`bW)@X=87QinTztPE{RGyrBcz!j zts1Uvy6{s4g0&hFIZ}2d$~WJTtD*KhUD3^jwQ4VXORBWACbO=93mpcVH;K;@3l4bR zxYl`_FH_LyEuz60V{sRgP2)f650Jr+nqC}_x~--)#+n(d`#^IvxW1;`H%yk)cP9|n zsnF((PP;c^r*USG93ng4a!gYyr`y;7kkk1P(xO2;&JnN*wm2;k># z?@>SyEN`>4s$w4$3QgwlVe3(UJ+I5tPr#_;3C0WV!~nJ0G(BIJhb{}L$uC@XZlhhf zZYDJI;+|MknJtg(#C;JzkONcMP*S4qK~|4@DUWSl)!j&%&wRDtVYV}HYo18i@D28I zxFmdj^DUIsTjzLTw3GII9!W5SV!m0ng4GC@+#5rGBedr)s?Tm3W5qOKba)jzPKeN2`)~ zH)b_EVomR5j1w$7yl|tF=lL_VB+K<1h98NRGMl^X(Xz;tXfjp0h%l!0s1BCIHsb|7 zZ}yw1V&e0U5Z@lH$`SYb-8T#Uen8B8Hc?qi^O0#ts-@Z4-B)c-+uxEa?>hIs%j9?y z+QcQgMlfur#};83IW<{}xO;vrW~a)N%py$>H`mW!`%A3-Oz-R3hqbB;MZh!-t|)_( z!**{ysq;9nthJ(oD}uF?7xw5%IjHN_!9XhxzKp*FcWvWGRD#ap6xArl0lBqsG?7CA zc)I1$Zr0{z43Abu>pf?C+2)CUbT?)eERm94ApFPgJYZI49$2DN_v=p0F1ka*rakJ9 zEroB$yNdPJcSlDrkOQW#5voyw=(B;S`pNXhyW`OOG}{^@>OdS}Q<3K5Sm4|31$vcgt1wc_Dnh6)$FN{7|L3nw`DuSBTo(h?!oK-?HA6EPhb&LRdyJ zyPdgze@}kSd^gXpvmV*ZPSbImKl7LP%Y#`fvb+ilLjNSx6ql-suUdHT;mJGf$~^XA zw->)T2@b<^^$ZDe6)7>VL=RifDA2G3CdhBIeuso_s+t)lA1&d3uRD8T_OZCD#*y;{ z{Zqf+Wfm_HwLI|ee9Nr`|8A;(y|A#Xw>Ic*0pqd7NB_svJX*Ri{`3oB=rZvE2R*>+euy-K6RSJGh5 z&W%jQIyb#yuD-3Af3)t?hj&@BW%@5dZ2pvw=H(GsZPD{aZC6OO@CiAv6T8Vjn^(* z_2jqLd2f3z;jen?>PJkVR_MyJ$9hz)Ys=QXTor1X`s4Jh(i0!bY`029*?wk!wb`z< z>ib&H@?#}!kG}uQ4()HgqiXZL z{j;iH%o3BaiO`*$$tx?-_gi{u^VMDK>-T%`o1NeDVvnp-*uvD;z&hs8`q-^SXI~X( zo2jqesq9On(;pH?w{?uEv+Q(k7II@%n{_D6*ffD%_4AJI)}QtMJFsa=b14ffX1XtJ=Ss!X23O{92;~KfJS^D+WzC(mHVdrk#gv>-XE19y@UC^xK(&As@E#TL rko5)4$G{tgSFVI`6oI3>KmRjdZ@F@4cHYhJ3_#%N>gTe~DWM4f)EDnI literal 0 HcmV?d00001 diff --git a/beta2graphs/SpMdV_Double.png b/beta2graphs/SpMdV_Double.png new file mode 100644 index 0000000000000000000000000000000000000000..b73f0294a7cd5084592c40ecace735a26f7af294 GIT binary patch literal 40044 zcmeHw2~<<(+O9*DRx7kc!2w8HsvJd=I)e1*5XijmP5|Xt?eU!3d(XZ9qHDPtGVHy-cX;0C z9rpgteDCZuZ_eU5)22pl zKJbCC*n2_gS+^SA$THvk4mtblqUZD5oS?UHyF)ItfA$q+CuGf6^#t?i@ zTCeb;;WYz*h7RCwUEKZc@>^yEwzCYC4#?rJ{w|<$bRS&|`M%g=mHutUt%+ON+bAV0TY$ac6)L-*HF%+7f2@KIgH0-SFf%qB-ql*&TYcp)1JHbDA{2>}v z@a++k#XHG=y6X{@REVUE6-*b%8rC-b}mY%awM$0kx^+h(n%@JVWN;)U+!|e zS~l9SXw+UUqFF@Vp$buoQu^k{L!;S0&=}=0tK05!_GHg&P4qH!!251UI~VuS&F4Xp zkHAZPth-b{h0>^7Fd$^0L}HO&KM;t5E2RTsQVVCT^NuA}E%WI1ef#>O-iv{-)E}T4 zJ|b_JVTFX#uXXtji>uLmSpGmxb7nBQn$JKZXDb$B?8K6K&zl_g)0`)&(CqX~L}%1+ z(zfSa6~WczA6!d{ITWoJ#C?3+E>(;%TJ73^z}Q4BQmR#5D~>{+pwBw&a0jKYdFWsm zVmB0<&ct?Q+@dXM4+;6QR<_3?(MwWChdcMwR5=;NC3qutf2(j31s16Dk-(-jcg-i1 zlp!IBNH3Ucg5Q&aKa04Dbn-kGX&XW7Y!v!uR<>wt>1G-QFEYO>%fLP5j}5iS*{sh^ z%RLcmk7Tx0uO)7NOvPw`X%3<0&))YWv4x8W#+ZnR4MWrX+sZ^z-p=GA< z*e&>r`UFgiXXrtGlK2?Wa69racKM%U> zj%L#a71xcUmJDfsPx#ov&!DDy+j(M`-K(=YpFKR2%@ zBo(W1wtli3Ypv&*q!JrZCLYKL17~W89S~8qzLWovUvLNZkk5nWNU}5tTvaJmx%x#@ z`<#w!tCd7a1V`9th6aDk-4}0UGMDm5oAk6YvbD_sAq=V1!P^O&t*@gSx>croG_QJGzLiL@wp*$NYD}z(Cx75V$zyh<`+C?15qXgKmar74MOcJ*h6`}av3XT=Gd+*=wGZ>z2 z>gleS@uhzzX~eB}|5b?nv-Rl|PoLyuQRaG+5ugvD1Q#g z%)0pe>M{bX=v>~09$B914j#k?#lRB~Bp1fRZ<6qvH)?T?W;Cxm@Dir6oCqizYbX!{ z$3~lTuz7~GI&ZIzp^S>m+`ay)3hJFW8t+`GmR83fuDk4RgqEH(E)9eIAXCv9N17~K{1N4DT!+H zsan-8ngc&q*fcU1wA<%QcAd?2REqS=k3%S4Bsgfc$1!b9&!7yOW+U9D)XL?=! zVzfVda5=M$uK3%h9Qdy5eC0O0gU2Hy=pK)G^19vlf=gBww-wX|`bA(yYt|+FMFKn# zjo$^)Ci1kurM2^(RYEx8Q`Y!OxLXAhUSvUyaX@ZI;`ZBXMP1zLClGXgRg;>OUF~}V zbuPj?`czI_wv$CO?wBJM{nLpn;Y*@|xsJhKbL(u=v4&M=uxi>PcMWz()$x86BbPCk zmHAy?L|=IrOMP@xyf}vlx1jjk>PYsuB90SFOLndJN1@|0*OnU`&LG?)z$8EyEIxgwvLECB@D02rahQbvDarZw(xQ&JaKZBc=;lv~GcEAqR)Z=suWQ z(j*{>#6tyQQ>51XWHP*UG@Ydh*yw*Wr>2!#-*njMrl+&AgKjdsso_S|zGkCBlLOSz zGX>P(jj4!~C$dmS1OCqsrICmqicrn?F?WtwkHEp8W2gI!!(>;LEoL7bkkr|Mo>(K7FK zDWdt!_I@|NTMn$!BR2sL830zcX_kB8lG}5HJtTQ^JK7Ir6tp!}A^#>eJ0^W)MkY>( zT|%wOdw7Mj7)6M%<*Q$`HjeT=;a@*)UFzrJf10}al4f9_qR-Nd-u|;UCfhIbyPwNj z6ADALR;VmMdCGqe)vWn)p8@=G-H0Sla;4zp&cJP954&cxMh!l;eiUi{qGtP&Tx2S& z3VBWH;IUIv)_M%`!zN7@mwqu743cK4weU*8EK%#qTNf{YyvKG^D^I;FY9q+6eh(w> zOLQLHNI%n73$nxIpjF*uYifgTpz}c@`7Js_PZR}3;Ma8Aad+a*|KeL_N6E@j5}@oP z(6u+W01B~@xt&zV+k|xp#RTfE?puYY*%94$nj+KCcZd$%3Zd9S_V92{vARSx#(sI# zW!lD`Q*4vr#z5I0?w)4*j(UP}xVXC}r{gk}BQTVyrOB+==MSKTg=1@X4X$YIX@c+4 zw5td1rNQNGfiggW*OJ)Zjr|VJqyw6uKGuiMLWn;(s_jf*zk>`A5?b#mf@YW{NwCQ-2@l-bDU4RF0GuS}ml>O170A1B`xz**Ef_px$&;ULde} z8W`>e8k_Do7~Am|%m3TlUB9ZSi};_f4#-drFbXIO7vDGv<_vmIAv~D9Zugdp0pp8T zeEj_Q_9LhBc8x;=e8!R|-^}}=aKeI}eww-X?q@X{Ky1+^pz8OJer36SYvb*42Y{TI zcj)(D{?R-RF%rxB(-WS3a;p@A`?#p#y5J6_`6h%=eig zExa7$7srI1XE3J8+UpS+c;Ie=5U*A8iwztCcEaFohB+4MlYjo+MrV)j`@TY`p27jG zO~iKUGfpfDOxksZGBRz<5Nx{wWop(=IQb@- zfh^6HGLgEx&xLrczOnTlZiNnHuFM=iNK2rs#MrafA$PS#4Gg0k?w}?tEc*DE1(|{H z+k}Raulrr1a>c9+1Nx0I8|$4uW<#OBxAtIol+L^$hk^Z-0oK&0L0!ZHtL`J30Ta9yI5Jwr&s)zg_(jHe==vF74DUV(KF> zh)hXi$~LClVG10k@NkS&|KG#IM6!oQ#lmaED7-d{r)2>O77CwYh%CYl1~-inL@ZbX z;C;^}?)EY+sRw!ZMej*utL>y#F)G4)lZ)lDB=&NmLyscVpWf8kv+C&J-ib1dl28*DPMz{4TPDG1xY znaJn|PV}5gz<;#!ZN~(&eos-LY{{S3pi6LgGv2to|Gz$J`=DeN$l$;H&1z;W5$A>5w!XDUu-_SV|w?qZj#YzuN7jwoLgdZyN=5jf zs~u=VAFLbxSE2|vS93tNm&qJ--F<85K#YOSrzLdZ^ndc&dWi&42_HW1HUai`ClyMyV#9Q zu3X3&Qojofu-;2Di{k2{*w!}*T8Br2m7sg{9)9vf_an%F3k#`TaNIhsr67W=oLB$l z8*EpHf}rg?fwsJEm?LxYK0uCo@toT5)(Zns=>!b)FVI7Np%rVtIR{=^OW!^tDczs* z&X?vHkztxBqn~{DUtDSJ5kPLIZMe+^vNFmw)C^smUU%*dE(F5j3>4^)AgavMiF;vV zR-0yYMvKjJ95`wcbo!S!IHwM)+v7o32c7kLPB#1#W3cuOch{)Ndsxj4ERv)4usFBy z-OqXXc~;u!9nuoE)MBs4v*xm!Pu^KHcUyZ_v}PS#5pD3~iRnDcWY3xmFiw0DbYcPn zb#dc?DTtIHCT>A_;=fp?;}GT<2E`t=KaLpwb{+Oc-*{bb-}Snn-tI}SfBVB;7r3^* zN9T@1sQ0_!|JJF($i;^J$iMb{U9^>VBqaVgvhGlKhL}+CAW*jVtq%42x3{7*#O-z4 zI+hRbeRl~w?sdrl$kY0@;^~Pp*bFf0n7^}&wB3Yi5pJGx`>=&NiuFcx(ZydHh}2+M zS`5~UELZ1wlmhUBGRlrVU{wb4?I`&A>MEWGE&x82DF<_PWUM(Xv8YjoNgE zGHAxn|As{!jK~-TX!mTM0~hv!H2dRsGQ(dYGW;$9-06lRguU>0)?5V$qTQhA-wmC> zx@q_^;pap1mH)`12(0^SW=m z$@5DLB16WR{UmDjdR*uZ3^#9~{4;qOorZ88qui62+n<6%YgTrwo#~nMC$5cl$A6*e zoL^$mUVUaP8JI*2Uk|QtvZ}V3_O@eb%4FX8I{v-^LYdLWt*plKhe@>dPb{F{g}05B^VnV!)%IS{_a_P)N-SIl+FfctWWi08M6q0p9%MdelYnVi(-dWo^p@9jdZVi(n;5%EkPH14h zsawNj`ZuNcckt~g#sA6U8O>Q*jZkzOP>^wU(%{Y{xK_!^k45OpMY`~%rVJqvH@#$z-kQG;_Qc+(r9^DS@!*0?mCf#l-ia8C#Mf{7GBk7pg# zL;C#q%r6tk^jrMmCYTlhong7F9W+r*m&bd)qBjD1MG{|MF8E|#%y@0#jVQ^xf(KN* zCeosmI{ZzrKfLC_n*jHnFVM6(3#Y{^b9CM431*}R%`;XEeKp~W#UTCBmmc0GCYp!l zdA8Tc4RE5+2)7NSa3AR?oIcb@o%F4)B&ul{@;^<&hem#vR83<*2@(9ZE^UEG_$cv z8<_Ix%5pkC^GrMWDi+WEd?I;zYb>_69rO2kkd1+rHl1%!?PtA;$0l1RN*iyD$FI8t zXqOR~Ij>^!s?!r$=^JA5SlvYDi07w{GbJA#r2bF0AJYu5$e1?|WcoW_RU7|IH^(YD zI#z5s)Hzlx-@4_Mzf0zWT;IiRccbqT@WrK9wYN8<9Qul;jvza|9z}I-FIWzyE}0P^ zHQ4~ZSQ-AZGWLe-Lto$1aplC<=>ESUD^uC$$)1H}N1VJ1#)!*Au%EEZe?x=u zQDD~74C*kq>CHeJ&q>q^p)dZ{%j(@*a9~^Mm}Jj`YSMRDUdR2)BTjlDj8yJ^S&@DN zZ0ibYH@x)v?)wHze)L3UoK3t8L*4}- zs2|R{4U$Y99UZ4IZ_YY&{Qlh{^oV{WZM81HdIc|k!Cc=B!j1HEXotbHM#oUcWOnAw zN!x_j$8@@XqTDz+A<@rJ>Bo*h#{M5DYQcdUg8da$R$1GSeMD6*zSE3CIG# zQu<^zFe$0Zp?Ek1xR}P7cyRqDh5{yCg|4y=da}NTG2Td>C_3w#{J~#oBmJUh2U?%zkaq>d%rs z_gen(YAD0<#4q}$&7Yx6369+k?FOH40+8Sl8JdmLY442zvIn3GN z3hADSv6jC}X7EoKysR|x9aH^1U6u9!2eh9^_b|UXJ{te`$^EZ0)^YdwPn9}#@bOmD z>t-ezn`&*beRY@3D}b8-cgxoRZn9^M4~f13hV&}CI$knJmA;&`*Htyf@n^iY@a2SW zz9GGzxKZd0&Z@7WOf6ta17MVzG9k)0Q%*dELi)MWDHzq?E;<#+#uI|6NUZO7Pf-b7 zLOMm};wz@u=oFoPeQ6d%>nsO8ryc#s-L|Iy6%qnsaX&SDC6#PgSR1=2%nJ z>HmY(X-s2l4-cQ#Ak{sihhXTado;A}`IhfK#G~shPCOLv&ofbD;~ZXL;PxdMUL$vO zkM0T5KSo>k0Mb8G1;*s3S2KF?vlRcV>DTLp`_0;`q(C-&r|)c?+-VaC6QB^qRxI7)CF?P z0V8K&$aiBOUuwJRBgv<<07OnaCl;7@gOcLhA1GU@*u@qh6L*Y#Pjam*fGZZ>|LQ zpcSCBF+FP|bk{s_Eh~k2a(F_RHZ%t=ioa=uGC`xJ$NRe)c~Q(f&W+l3D2es6g_l+N zH&h{;C{zxybK@={u}+{a#M1AsdNeh!)(6RR~cYEQ+bl#JZ;d{o?1E(TC+_v$z+T z|M2!z``M=%xBk1ty$zhyvS}@D)XhHz9h2fd^~{B`e=vX zLHOrf+RA=yFAC>wCzs2H71As0{j=JRL51*6JWjiZ!$v8kCK4q&M!@_UwO$Ar zpqW}2rX`A=Nch%}Epg49P_`=n=7jcsVq>s|Gqn_o`p(FTcuLD`l)BzV6B#|g!q0Ma zS5xZG$H9$UU0Ck6kxMk2ss&hjnhG9_q6NwvaP~z1+|tELHG4qYIj46uk&5o6KZ3`g zegTcVS}0-ByFBb43x1SIoB+Q6PmQV<(J$;_19)bSPYc3@mNq|MQkq{CaAz%f?fB&Y zhUNILjUu>p1QnUr=h+n!19$PFQOVD;HNhI!bl^5wL=QvtpYA_xV?-fhCw<^i*cGC$fs6S!1BJg5)0_cV2q=MpXVP z!w7BzFCfXh>js{umWsRhj6LN{C5o`LfHgWW3_Mi#sOu?M^*pMdWYH$+$_{!6>H)#x zcPn)ylW2yQXft>|1WHSmiUGs7 zF3xd05zj%jB*ppD+et+*Kj%Kg0#o!XHf7cJ3i$febZpB7>?Q?r8AKwqZ=u0#I%5=_ z_6r8pf!jiDQ(Ib(JnBotww)H37h6y?p)!KgpnBsLtQ28zCcV4~U1^H%dZMnlp;kO^ zwT;iUfQ2=t>}*p6hs%V0*oLeI5t*3vnh|OFXHjH#j!I##B1b3lxg(2A?TKuYegSN_(XHn_*FUR9bHhAUP^bIRI z=A^26oA%PeQFY7F(x{QIvmPsJvV<{wc91>CGNlgl0*bBR)vN&W+ApuELl)9C>fk=! zC~N=m)uzL6Clv3z|8?g4e#g?VviC>hE`@%A4s~-@*}`!eqpf2&x;@3>UZ3eU2+p2B zBD5(OEE1HZ_~=EL#Ss$fCi8qV%fA)@Zz^(Z(vJMPx&kd}wXq<%i*kM8H1WLjem^ zAHv=pv1Lj>WomilX-tN9c9-W>l(nOFuu+~SJx-2=l;eN%LF9M}!A;aHWtk2~Au!%4X*$zjKq=L^MxMyNfrht*l5#e{HOD%J zI_@v?E^00<*5*BG?#!TMCSa9>IB&6^R}mniaa&VZer=x19fg|8qd4`sEa%Mt=E$HN z)e;FOhK?Fr%R+l8EwC=%TNMl|?%LsU#fRY~*9HyZjab-oBSIbo2zJyCH?0C~$$1A8 z)B4n8L7)(8GM_0nafL$dW2cdhZ6YBmm&%8mxAfeqj20IMq^C)B=v;rDqmdp8jWU4B zgwYsyB1y~e{x!&lkMnbZl7zuwtBXi)W(uKX)u0Af|I?z%w@&T_H#%&pk)fe z3wNw*_{DaOYIE8Up9_4obqtFhDyEm5hP?&`s{?H7T1GdKu&P0AzBGr(>SuZI z8La13^XvKmwL1Nt&=J2%8^2^gdE^pvCq2P|=zD@NC%%CMN3n|FvU7`3xGp7hd$byh z=?ev|=KGOL5ngT01c-2DRGLQ{LZYo<`Y}mcXsXLG^WnhG)W@aY@g*eE{TG;nBx76r z!=**^hGkcnwj+Fh=X$p$g1k~fLO9)GeH;j2d;|w_A4k^_P{7QOYXxB;=!``|;jZpn zJ2-{Z9i-`s#X0(E+!J=BOWmUDRr}Dl6dey)(mDo2E0ZSAjVbKvKp!MgpISK@1|d}n zz9>D*PC8hh6k4v5r22IZc?um$x%u2pMv`gAbx}G-Z6E-My$4& zw%H&6g?6$m%Whc~;wx>+kvGI_ZdBbSL&EF$nFEGt3Y%h^ z1DEh$T&-&@#%+Zr-s$#TOcr4uu7wGRK+K0+|1e4GG?Y|;G%Y?AZc(3o&@0$XYLw_WIZ1pqtyAeaK-Gt2$UoJ`@sc63PCRSz|9G! zF1)=KxS>4?jg4{SJ+^z=dSD$fXfNF>Ov?|!D?~SU@}zMjc#g#%x-(?WQnhDLu=q~E z5l({{TyF>HWcLc*(q`K3WRRq7K-#6wUic7({@}9o9Nc;_m~tL^f~w&=63I zRV)d1%%sl~(k+N|Gj2W$nENS-1n+ej0Devb>Qb`39J=fMdUZKb=f+yW)}Y$ejR8;y zZ=(?JE>@lziac=n5HvlB>JhU8#|~?YfRa-?!%Zn#UIeE3K$=lcH-lj69A_dVDdaoL zf#}&J0~yoxHTD{Djak02s>k)wKPF`(V2)%0wCsVWY1gCx+#lc(7p*4r_0V!0UqIjT~)&5Y<43 zEs*q7>?fp~HXTUAW7W;U#Uo+If}8p-aAGa~d^Ba$6LPsi)v2WK$7&nEC-KvnuxnjG zSL?tGGmxwtvhp4>=;()4Fix=OpE7|CmNhZdlo3b-&8qvK>38o2Vl$Y)sSYYahf6>H zXv~peV1Dx|BTVA0BN~^kXLJS#$IbWeOEe4h0Nb-uQb_16dzoLBKLt}p&;rtv3?ecB z6&@I1f2!a0`4+YsP_uwT#)pq>!v928N;G)NQC z717jD!ilAeElBdW9v%{;-#n)%@LQG#OUfbv7$qsbwMRr|Mhs)Ch2Z|PtOIO&VQb$x zR_vMP%NYh`LfgeXKF`KR|?LP9|`jo;$-c~91%%wgUd4CrLcu#ee3KkO6t6CJ_@)*;;zCm7L=e6 zi!L*_cjPZ#DBM7-{4v|0&n~IfH<|H4Tk)#1*v7u#lwS`);}c0&;)TizA42jD2V|a& zaAr{FdKw;OK@7g7khC{xp3^Ea5NgrN3#LCHZg7|7xeYPPIEw-*9R%0NUBPp>y$N44 z%ORIp9$#%BM$R!>EB~op>2sg6*-@Ty-!?;s9>9s#p2Lnr+cc}JGoFcQe$m&&qMw_i zQg9U2f!r@P1zB=*ehBraCF@>sQkU2uNp zV^L-p#-h~mJcIs2I?E!67u7<<@*6UD?UyCE4Pf>Qo+PO1cRh9O5-(qAhx3vuq^IJ1 zn(>RA4Mr6C*p4s>e2C?O$Olq7z`d5G1a>dT^)sKhHmx+3sH>F*`)&c57HYL^YS$Y7 z>H-xAL|irF`lU+8#OxFI3tl zsJ@QBdxle;MuDt+aIG%?g)3Zjo};rqI;1D zNlSScL#Pa^$X`!4l$^jtN0d3}q6T0IEIWQX^lA%646JSye29RU^TTMdKH#%oV-ZI(jD6&=b?pSKVTM za@1E4#LOB#lf2kEsfpIPE~(jil=H}6-AP+X97-uYZLf*ymsLvhGqI!9Ir-o8$ z=?ZcE5@YwfKxFsf7GqkB{|1)<7uDl$W!7Ytb4v_%v2vtj*6*Q;s5wi}TT={=W-D5& zg~oJc^!vI3gVs-5%3&$g8X;yk1PJv%iDnS#KmCA!q`DlDwF_BWM&*{HFFy0IExL2^ z`uCEgtfEynM49t#%;j=|>KW$(gf}oKC)-$)o+$ncUbrQdutzW8uFrO+UQLBt6TJ zRzv8@yV}ih!-Xj^Wvu~4*OacHMa@cO&2lzy)Xv8j4<8H@Espfe&I?cvt_0 zKKJZ|k{ZyiLKgiX7#0ijL!cqCIYSKxh?Y)nY>K!hx#_Gn>T?Lg<>2tT{ttMx^%;AL z{BxT^lc}_YmY=4^uRZ05?(BcoXS)*Tz6+@*4ft-ba%6uU4hQb`9KMRzhHKL^m50h& zn|@kQml5jTpxZ7SzL3Izq&QOMMZb8_PEnfc_+D|YIoWw#Dz$9jYMQtu=8q8XQs%$6t;c1 zXE`LvR^yIKsb?2iVO0(G+f%Gp4ltY=Im!5+f_p?xWV;Ah|X$fBwbI|?zd20q_4*fxqjpXmYZza)Lw*KRa0Bq2)W5PGHsk{{KPlz%DKb}O5* zL_N&xvn}=y8?Bz*jRC|Rl&*aBxuBM-{ynYlJ;-a3fpOF{4R>gNtzg|Sf0bMIb2cY> zWkg1d>Tqyd&*oT2=^E2xgNL={Qt7Cvf8N!inny@yn^0t4xK`FJ@k#gBG4F*y*aMj(iI%ol?uGHu2#j!FvwP>U$w=FcGG7fEe3hWK|#;iUy%b%2;sUXH4#YgI? zwUo;7=^)zk&MYAraTHL`qPEkFSnd^1M8 z6A9cvaGgxMV_hzN{B&5(eulVr!*=b}pcZ2DDmSyzPS38q`Y78v#y3M3NrU;W2m9J< z?s+YfS9iQ;jyxxE(+pp?B|+*0HAgxG?TV-3bTeUzly=PFuWFv0@~mg6@O%ytpSMY3iXEV>OmgvGfy3B^bz`3vw1lLverRwi z3{KAvLAl(71-LE?@%D)_4eaA~&^nip5BknwCi)ddc^#}Vf-&m&^42qOQ}moA{`id3 z>v?W**6Hz>Lz8RTFDc^7vj02Kt?`uNW&7i@`Zgt{d!o%U-iR0@fV%$1M9KYSgXHbK z9B>CSyjF;Jj@4}+f9s-TX?oVp4d-X4OI0kevGUg6{kK>tU`gEz0KfJw)U}=7&MsL4 zE`SWsqB45;Yp;RLF13sSl2v(cSjQ=S6A~AH^eTkHJWHx{bem6UXkHN)9LU}=+uibV zqAk(at|hh70FnaNbe+t6QRn*ue~tP^T&y(dM>rif|RLF^S9Tdgzo4a|K$7%}Fc2@RxN2u*rRQYg) znG-!l(d8eqG>w^O-IU^3tV)XJUmtO}DiXodavqr;2;J{td4ZVOQkHxLYM}N0Ebw`# z;z<1;g-co-V^Wwsk1`hNIBx(Z4RK7yW)Qt)>)k0ah6*wLDlQv0$3esLl{C$1i7fhJ z1??fr>xwV{0fBNyr9#F-RO_kAqa_zc6)oEfp7j;#CO4!O?-IW(U#|>lSdN7ZPG_fU{SjRNDIrK#x6R=fV zGWH-DX-pq~u}KSQxM&%{$=S^kF1hJ?u%NrAY)Smg5-aP`?g~-2=KX8Ghf+dNimELN zS!d>AArZpj}n|gZbpI>46G{Y=O-wp1I5tOTfO+&F~9f+03 zt=C36-jyLtLosFw#qn#AYLI^VFyYD%mMzq^_W*F4yJ2`xezBu5cR~5@FssZg!sbo2{t1XWUq^R~ z1*||;fSI!?(SAwi`9qsfEJ0ZJ&xzS}L$Q@jIUk1jkSLj>4cCpYor!PS7-+zqt>`!y z;0*?@HvW+u3+yy)ejWL$yvM= zkp~MpF&8}IA;YLoSb`sWFNMx^a8-53+ooY$RJhy1As0m6pfImtYFzlq-e^bB z{@Y8eM^f5p_bW~cnjAv_QO%bKqts)|BxlyAvY%(Ls*^rQiyRRt@>Ms{PH9FZm1Go9 z;D{!$6;jTEBER73fZ0|TcAN%7Dx%r@9+BxfhiB;E+^(33Md*5jz#b|`bp_fWD$W!8 z6EiC>gk2>s!LlfvnCZjsF@x6y1aGX7ZnijcTPj2Lcd*uT>rexcOFADqhuRuNF9x}` zOBL@(sh8hPzW1+I8^s?qAp0M}&Sw}vY+~-)YjYQZ?SRM%GsSsQ$r|((2tCc^m`*b7 z*oUHX1i&rjjJQHsKkq38S#}a-o6alX-3dj@1BY&cK(d+`I+#FciYJjoP%P=2B>AB63F{BY8|rdFd$11EW(c zVoU1Jt7)GW!@&HNf-fviQ7D9V&eq2ut${;_kN@Yq5{rc~vtcEF~qi=inJzrdgEHvqB!Pto#(;=~1nV_$T ziA!9c$`N)#JFi!bqzh5~%?=jsUpf#C3ZJg8Mf3~NqxB~f_+|66LHNZqjcCg`Ap}N0 zA!UoJ=})P;Hn!FZDCKa{HU)Fx*r}pp?u!q`&;+-^3UqgnX+44IW zvPY>2c{{GtDd+K;Zl$!R%o?=kg?;^l24vPz&{`&HHAe%jKnc2FC z6gM32W>V`>2>+RKN}W*i|n)x~8yxj43UMiUwv2AL*^wu?(giO6qm9~1cHJZ2X% zEi*zDD@#%yYvw!Cvsn08U5x{*9?d3;9G!zG^CAA9WgD=V|2UaEv=h1pf!)4xP0qGU zjJlLW60CHj(?$q5sZSMDV~=Ut=ROt{h1%xYsDp?N?{7BU<6OTNv*qT zrUi1zfdHIgzfa*pV1v4*tQmP}RnFY|_Hs=3(4#Ayq8xdnbtrn+q$XC#%OPc76xJ5g z*ZXCDpa`YpZMf?@@;Pzoz_Q9pzy(-3gxs(XWy>iw^IGTBhp=H6cw8DRRplLxe$jMf z^{Kh>I~y80f^Va;p0D6Vqxw-#86C`T zG=ve=Tr|fG*QET=kf#VZh&<8!os5~lUdo)Qv!LE&_fk~+!}tg$0*19{E}APWk4Nc8 zSF}!G1#ueeho{2-LY?n_hj@a8EI^3@#OuBRi6zJ`Q6Wv6j*Qnm_vy&y?i!0Q^4P1y zo#+hxmjp$B^%fIDZUVG-^;9+FJ1tjtd_3T0nfBsz$((86e>=80Z_WPtr<4B+bmz6~ literal 0 HcmV?d00001 From 07a9c42360c178e73e03490fc34406e6ebf49869 Mon Sep 17 00:00:00 2001 From: Adam Harries Date: Tue, 10 Nov 2015 15:59:32 +0000 Subject: [PATCH 03/19] Fix compile error on some platforms by explicitly including stdexcept for statistical timer headers --- src/benchmarks/clsparse-bench/include/statisticalTimer.h | 1 + src/include/clsparseTimer.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/benchmarks/clsparse-bench/include/statisticalTimer.h b/src/benchmarks/clsparse-bench/include/statisticalTimer.h index a100371..d24ff33 100644 --- a/src/benchmarks/clsparse-bench/include/statisticalTimer.h +++ b/src/benchmarks/clsparse-bench/include/statisticalTimer.h @@ -20,6 +20,7 @@ #include #include #include +#include /** * \file clAmdFft.StatisticalTimer.h diff --git a/src/include/clsparseTimer.hpp b/src/include/clsparseTimer.hpp index e9a2a7b..d46a100 100644 --- a/src/include/clsparseTimer.hpp +++ b/src/include/clsparseTimer.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #if defined(__APPLE__) || defined(__MACOSX) # include From 66ad0ee8b68f1d599078cb8cae1956a967381745 Mon Sep 17 00:00:00 2001 From: Kiran Date: Fri, 4 Dec 2015 16:02:18 +0530 Subject: [PATCH 04/19] My changes integrated on Kent's branch --- cmake/ExternalBoost.cmake | 18 ++- .../functions/clfunc-xSpMdM.hpp | 14 +- .../functions/clfunc_xBiCGStab.hpp | 8 +- .../clsparse-bench/functions/clfunc_xCG.hpp | 8 +- .../functions/clfunc_xCoo2Csr.hpp | 24 ++-- .../functions/clfunc_xCsr2Coo.hpp | 38 ++--- .../functions/clfunc_xCsr2Dense.hpp | 20 +-- .../functions/clfunc_xDense2Csr.hpp | 42 +++--- .../functions/clfunc_xSpMSpM.hpp | 26 ++-- .../functions/clfunc_xSpMdV.hpp | 14 +- src/benchmarks/clsparse-bench/src/main.cpp | 6 +- src/include/clSPARSE-1x.h | 20 +-- src/include/clSPARSE-2x.h | 20 +-- src/include/clSPARSE-xx.h | 8 ++ src/include/clSPARSE.h | 3 +- src/library/CMakeLists.txt | 43 +++--- src/library/blas1/atomic-reduce.hpp | 38 ++++- src/library/blas1/cldense-axpby.hpp | 20 ++- src/library/blas1/cldense-axpy.hpp | 18 ++- src/library/blas1/cldense-dot.hpp | 28 +++- src/library/blas1/cldense-scale.hpp | 18 ++- src/library/blas1/elementwise-transform.hpp | 32 ++++- src/library/blas1/reduce.hpp | 28 +++- src/library/blas2/csrmv-adaptive.hpp | 32 ++++- src/library/blas2/csrmv-vector.hpp | 51 +++++-- src/library/blas3/clsparse-csrmm.hpp | 27 +++- src/library/blas3/clsparse-spm-spm.cpp | 25 ++-- src/library/include/clSPARSE-1x.hpp | 7 +- src/library/include/clSPARSE-2x.hpp | 4 +- src/library/include/clSPARSE-private.hpp | 8 ++ src/library/include/external/mmio.h | 36 ++--- src/library/internal/clsparse-control.cpp | 7 +- src/library/internal/clsparse-control.hpp | 3 + src/library/internal/clsparse-validate.cpp | 22 ++- src/library/internal/data-types/csr-meta.cpp | 8 +- src/library/internal/data-types/csr-meta.hpp | 14 +- src/library/io/mm-reader.cpp | 134 +++++++++++------- src/library/kernels/blas1.cl | 6 +- src/library/kernels/csrmm_general.cl | 16 +-- src/library/kernels/elementwise_transform.cl | 2 +- .../preconditioners/preconditioner_utils.hpp | 38 ++++- src/library/transform/clsparse-coo2csr.cpp | 16 +-- src/library/transform/clsparse-csr2coo.cpp | 16 +-- src/library/transform/clsparse-csr2dense.cpp | 12 +- src/library/transform/clsparse-dense2csr.cpp | 6 +- src/library/transform/conversion-utils.hpp | 92 +++++++++--- src/library/transform/reduce-by-key.hpp | 71 +++++++++- src/library/transform/scan.hpp | 42 +++++- .../resources/csr_matrix_environment.cpp | 6 +- src/tests/resources/csr_matrix_environment.h | 26 ++-- src/tests/resources/matrix_utils.h | 48 +++---- .../resources/sparse_matrix_environment.cpp | 6 +- .../resources/sparse_matrix_environment.h | 8 +- src/tests/test-blas2.cpp | 30 ++-- src/tests/test-blas3.cpp | 58 ++++---- src/tests/test-conversion.cpp | 68 ++++----- 56 files changed, 948 insertions(+), 491 deletions(-) diff --git a/cmake/ExternalBoost.cmake b/cmake/ExternalBoost.cmake index c5c735c..912e149 100644 --- a/cmake/ExternalBoost.cmake +++ b/cmake/ExternalBoost.cmake @@ -24,7 +24,7 @@ include( ExternalProject ) # ExternalProject # Change this one line to upgrade to newer versions of boost -set( ext.Boost_VERSION "1.58.0" CACHE STRING "Boost version to download/use" ) +set( ext.Boost_VERSION "1.59.0" CACHE STRING "Boost version to download/use" ) mark_as_advanced( ext.Boost_VERSION ) string( REPLACE "." "_" ext.Boost_Version_Underscore ${ext.Boost_VERSION} ) @@ -41,7 +41,11 @@ else( ) set( Boost_Ext "tar.bz2" ) endif( ) -set( Boost.Command ./b2 --prefix=/package ) +if( WIN32 ) + set( Boost.Command b2 --prefix=/package ) +else( ) + set( Boost.Command ./b2 --prefix=/package ) +endif( ) if( CMAKE_COMPILER_IS_GNUCXX ) list( APPEND Boost.Command cxxflags=-fPIC -std=c++11 ) @@ -67,6 +71,8 @@ else( ) endif( ) message( STATUS "ExternalBoost using ( " ${Cores} " ) cores to build with" ) +message( STATUS "ExternalBoost building [ program_options, serialization, filesystem, system, regex ] components" ) + list( APPEND Boost.Command -j ${Cores} --with-program_options --with-serialization --with-filesystem --with-system --with-regex ) if( BUILD64 ) @@ -141,20 +147,20 @@ mark_as_advanced( ext.Boost_URL ) set( Boost.Bootstrap "" ) set( ext.MD5_HASH "" ) if( WIN32 ) - set( Boost.Bootstrap ".\\bootstrap.bat" ) + set( Boost.Bootstrap "bootstrap.bat" ) if( CMAKE_VERSION VERSION_LESS "3.1.0" ) # .zip file - set( ext.MD5_HASH "b0605a9323f1e960f7434dbbd95a7a5c" ) + set( ext.MD5_HASH "08d29a2d85db3ebc8c6fdfa3a1f2b83c" ) else( ) # .7z file - set( ext.MD5_HASH "f7255aeb692c1c38fe761c32fb0d3ecd" ) + set( ext.MD5_HASH "0a2e512844f3e30a6240f8139ee983f3" ) endif( ) else( ) set( Boost.Bootstrap "./bootstrap.sh" ) # .tar.bz2 - set( ext.MD5_HASH "b8839650e61e9c1c0a89f371dd475546" ) + set( ext.MD5_HASH "6aa9a5c6a4ca1016edd0ed1178e3cb87" ) if( XCODE_VERSION ) list( APPEND Boost.Bootstrap --with-toolset=clang ) diff --git a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp index 3414873..411bc91 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp @@ -25,7 +25,7 @@ template class xSpMdM: public clsparseFunc { public: - xSpMdM( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, size_t columns, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ), num_columns( columns ) + xSpMdM( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, clsparseIdx_t columns, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ), num_columns( columns ) { // Create and initialize our timer class, if the external timer shared library loaded if( sparseGetTimer ) @@ -87,7 +87,7 @@ class xSpMdM: public clsparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return ( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); + return ( sizeof( size_t )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); } std::string bandwidth_formula( ) @@ -104,7 +104,7 @@ class xSpMdM: public clsparseFunc beta = static_cast< T >( pBeta ); // Read sparse data from file and construct a CSR matrix from it - int nnz, row, col; + clsparseIdx_t nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); @@ -119,10 +119,10 @@ class xSpMdM: public clsparseFunc csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( size_t ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, ( csrMtx.num_rows + 1 ) * sizeof( size_t ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -204,7 +204,7 @@ class xSpMdM: public clsparseFunc if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); + size_t sparseBytes = sizeof( size_t )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); @@ -247,7 +247,7 @@ class xSpMdM: public clsparseFunc // host values T alpha; T beta; - size_t num_columns; + clsparseIdx_t num_columns; cl_bool explicit_zeroes; // OpenCL state diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index 31fc470..a9cabc4 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -106,7 +106,7 @@ class xBiCGStab : public clsparseFunc sparseFile = path; // Read sparse data from file and construct a COO matrix from it - int nnz, row, col; + clsparseIdx_t nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); @@ -123,11 +123,11 @@ class xBiCGStab : public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -204,7 +204,7 @@ class xBiCGStab : public clsparseFunc if(/* gpuTimer && */cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); + size_t sparseBytes = sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index cf7bf0c..89adf04 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -104,7 +104,7 @@ class xCG : public clsparseFunc sparseFile = path; // Read sparse data from file and construct a COO matrix from it - int nnz, row, col; + clsparseIdx_t nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); @@ -122,11 +122,11 @@ class xCG : public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -203,7 +203,7 @@ class xCG : public clsparseFunc if(/* gpuTimer && */cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); + size_t sparseBytes = sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp index fe728fb..0c019d0 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp @@ -109,9 +109,9 @@ class xCoo2Csr: public clsparseFunc cooMatx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, cooMatx.num_nonzeros * sizeof(T), NULL, &status ); cooMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - cooMatx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); cooMatx.rowIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - cooMatx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); if (typeid(T) == typeid(float)) fileError = clsparseSCooMatrixfromFile( &cooMatx, path.c_str(), control, explicit_zeroes ); @@ -131,9 +131,9 @@ class xCoo2Csr: public clsparseFunc cooMatx.num_nonzeros * sizeof( T ), NULL, &status ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - cooMatx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - ( cooMatx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + ( cooMatx.num_rows + 1 ) * sizeof( size_t ), NULL, &status ); } @@ -149,12 +149,12 @@ class xCoo2Csr: public clsparseFunc void reset_gpu_write_buffer( ) { - int scalar_i = 0; + size_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.rowOffsets, &scalar_i, sizeof( int ), 0, - sizeof( int ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); - CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( int ), 0, - sizeof( int ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); + CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.rowOffsets, &scalar_i, sizeof( size_t ), 0, + sizeof( size_t ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); + CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( size_t ), 0, + sizeof( size_t ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.values, &scalar_f, sizeof( T ), 0, sizeof( T ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer values" ); } @@ -207,9 +207,9 @@ class xCoo2Csr: public clsparseFunc cl_bool explicit_zeroes; //matrix dimension - int n_rows; - int n_cols; - int n_vals; + clsparseIdx_t n_rows; + clsparseIdx_t n_cols; + clsparseIdx_t n_vals; // OpenCL state cl_command_queue_properties cqProp; diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp index dbf5cb8..ddb4655 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp @@ -107,9 +107,9 @@ class xCsr2Coo : public clsparseFunc sparseFile = path; // Read sparse data from file and construct a CSR matrix from it - int nnz; - int row; - int col; + clsparseIdx_t nnz; + clsparseIdx_t row; + clsparseIdx_t col; clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str()); if (clsparseSuccess != fileError) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); @@ -125,10 +125,10 @@ class xCsr2Coo : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(cl_int), NULL, &status); + csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(cl_int), NULL, &status); + csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); if (typeid(T) == typeid(float)) @@ -141,10 +141,12 @@ class xCsr2Coo : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); +#if 0 // Not needed clsparseCsrMetaSize(&csrMtx, control); csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_WRITE, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute(&csrMtx, control); +#endif // Initialize the output coo matrix clsparseInitCooMatrix(&cooMtx); @@ -157,11 +159,11 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(status, "::clCreateBuffer cooMtx.values"); cooMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, - cooMtx.num_nonzeros * sizeof(cl_int), NULL, &status); + cooMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer cooMtx.colIndices"); cooMtx.rowIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, - cooMtx.num_nonzeros * sizeof(cl_int), NULL, &status); + cooMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer cooMtx.rowIndices"); }// end @@ -176,13 +178,13 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalarZero, sizeof(T), 0, cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); - cl_int scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(cl_int), 0, - cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + size_t scalarIntZero = 0; + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(size_t), 0, + cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(cl_int), 0, - cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(size_t), 0, + cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); }// end @@ -192,13 +194,13 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalar, sizeof(T), 0, cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); - cl_int scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(cl_int), 0, - cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + size_t scalarIntZero = 0; + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(size_t), 0, + cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(cl_int), 0, - cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(size_t), 0, + cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); }// end void read_gpu_buffer() @@ -241,7 +243,7 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); + //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(cooMtx.values), "clReleaseMemObject cooMtx.values"); CLSPARSE_V(::clReleaseMemObject(cooMtx.colIndices), "clReleaseMemObject cooMtx.colIndices"); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp index c4caa27..22fc45e 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp @@ -99,7 +99,7 @@ class xCsr2Dense : public clsparseFunc std::string bandwidth_formula() { //return "GiB/s"; - return "GiElements/s"; + return "GiElements/s"; }// end void setup_buffer(double pAlpha, double pBeta, const std::string& path) @@ -107,15 +107,14 @@ class xCsr2Dense : public clsparseFunc sparseFile = path; // Read sparse data from file and construct a CSR matrix from it - int nnz; - int row; - int col; + clsparseIdx_t nnz; + clsparseIdx_t row; + clsparseIdx_t col; clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str()); if (clsparseSuccess != fileError) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); // Now initialize a CSR matrix from the CSR matrix - // VK we have to handle other cases if input mtx file is not in CSR format clsparseInitCsrMatrix(&csrMtx); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; @@ -125,10 +124,10 @@ class xCsr2Dense : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(cl_int), NULL, &status); + csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(cl_int), NULL, &status); + csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); if (typeid(T) == typeid(float)) @@ -140,18 +139,19 @@ class xCsr2Dense : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); - +#if 0 // Not Required clsparseCsrMetaSize(&csrMtx, control); csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_WRITE, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute(&csrMtx, control); +#endif // Initialize the output dense matrix cldenseInitMatrix(&denseMtx); denseMtx.major = rowMajor; denseMtx.num_rows = row; denseMtx.num_cols = col; - denseMtx.lead_dim = col; // To Check!! VK; + denseMtx.lead_dim = col; // To Check!! VK; denseMtx.values = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, denseMtx.num_rows * denseMtx.num_cols * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer denseMtx.values"); @@ -214,7 +214,7 @@ class xCsr2Dense : public clsparseFunc CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); + //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(denseMtx.values), "clReleaseMemObject denseMtx.values"); } diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp index 8a70bce..9cf6380 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp @@ -27,8 +27,8 @@ class xDense2Csr: public clsparseFunc public: xDense2Csr( PFCLSPARSETIMER sparseGetTimer, size_t profileCount, cl_device_type devType, cl_bool keep_explicit_zeroes = true ): clsparseFunc( devType, CL_QUEUE_PROFILING_ENABLE ), gpuTimer( nullptr ), cpuTimer( nullptr ) { - gpuTimer = nullptr; - cpuTimer = nullptr; + gpuTimer = nullptr; + cpuTimer = nullptr; // Create and initialize our timer class, if the external timer shared library loaded if( sparseGetTimer ) @@ -83,7 +83,7 @@ class xDense2Csr: public clsparseFunc double bandwidth( ) { #if 0 - // Assuming that accesses to the vector always hit in the cache after the first access + // Assuming that accesses to the vector always hit in the cache after the first access // There are NNZ integers in the cols[ ] array // You access each integer value in row_delimiters[ ] once. // There are NNZ float_types in the vals[ ] array @@ -91,14 +91,14 @@ class xDense2Csr: public clsparseFunc // Finally, you write num_rows floats out to DRAM at the end of the kernel. return ( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); #endif - // Number of Elements converted in unit time - return (csrMtx.num_cols * csrMtx.num_rows / time_in_ns()); + // Number of Elements converted in unit time + return (csrMtx.num_cols * csrMtx.num_rows / time_in_ns()); } std::string bandwidth_formula( ) { //return "GiB/s"; - return "GiElements/s"; + return "GiElements/s"; } @@ -107,9 +107,9 @@ class xDense2Csr: public clsparseFunc sparseFile = path; // Read sparse data from file and construct a COO matrix from it - int nnz; - int row; - int col; + clsparseIdx_t nnz; + clsparseIdx_t row; + clsparseIdx_t col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); @@ -120,7 +120,7 @@ class xDense2Csr: public clsparseFunc csrMtx.num_rows = row; csrMtx.num_cols = col; - //clsparseCsrMetaSize( &csrMtx, control ); + //clsparseCsrMetaSize( &csrMtx, control ); cl_int status; csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, @@ -128,11 +128,11 @@ class xDense2Csr: public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -176,11 +176,11 @@ class xDense2Csr: public clsparseFunc CLSPARSE_V(status, "::clCreateBuffer csrMatx.values"); csrMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, - csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.num_nonzeros * sizeof( size_t ), NULL, &status ); CLSPARSE_V(status, "::clCreateBuffer csrMatx.colIndices"); csrMatx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, - (csrMtx.num_rows + 1) * sizeof( cl_int ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof( size_t ), NULL, &status ); CLSPARSE_V(status, "::clCreateBuffer csrMatx.rowOffsets"); }// End of function @@ -194,14 +194,14 @@ class xDense2Csr: public clsparseFunc void reset_gpu_write_buffer( ) { - int scalar_i = 0; + size_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.rowOffsets, &scalar_i, sizeof(int), 0, - sizeof(int) * (csrMatx.num_rows + 1), 0, NULL, NULL), "::clEnqueueFillBuffer row"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.rowOffsets, &scalar_i, sizeof(size_t), 0, + sizeof(size_t) * (csrMatx.num_rows + 1), 0, NULL, NULL), "::clEnqueueFillBuffer row"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.colIndices, &scalar_i, sizeof(int), 0, - sizeof(int) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer col"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.colIndices, &scalar_i, sizeof(size_t), 0, + sizeof(size_t) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer col"); CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.values, &scalar_f, sizeof(T), 0, sizeof(T) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer values"); @@ -247,7 +247,7 @@ class xDense2Csr: public clsparseFunc CLSPARSE_V( ::clReleaseMemObject( csrMatx.colIndices ), "clReleaseMemObject csrMatx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMatx.rowOffsets ), "clReleaseMemObject csrMatx.rowOffsets" ); - CLSPARSE_V( ::clReleaseMemObject( A.values ), "clReleaseMemObject A.values" ); + CLSPARSE_V( ::clReleaseMemObject( A.values ), "clReleaseMemObject A.values" ); }// End of function private: @@ -279,7 +279,7 @@ xDense2Csr::xDense2Csr_Function( bool flush ) //call dense2csr clsparseSdense2csr(&A, &csrMatx, control); - if( flush ) + if( flush ) clFinish( queue ); }// end diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp index bebe2a4..f37ce85 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp @@ -99,7 +99,7 @@ class xSpMSpM : public clsparseFunc { // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return (sizeof(cl_int)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); + return (sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); } // end of function std::string bandwidth_formula() @@ -115,12 +115,12 @@ class xSpMSpM : public clsparseFunc { beta = static_cast(pBeta); // Read sparse data from file and construct a COO matrix from it - int nnz, row, col; + clsparseIdx_t nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str()); if (fileError != clsparseSuccess) throw clsparse::io_exception("Could not read matrix market header from disk"); - // Now initialise a CSR matrix from the COO matrix + // Now initialize a CSR matrix from the COO matrix clsparseInitCsrMatrix(&csrMtx); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; @@ -133,11 +133,11 @@ class xSpMSpM : public clsparseFunc { CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof(cl_int), NULL, &status); + csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, - (csrMtx.num_rows + 1) * sizeof(cl_int), NULL, &status); + (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); #if 0 csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, @@ -245,18 +245,18 @@ class xSpMSpM : public clsparseFunc { { // C = A * B // But here C = A* A, the A & B matrices are same - int nnzA = csrMtx.num_nonzeros; - int Browptrlen = csrMtx.num_rows + 1; // Number of row offsets + size_t nnzA = csrMtx.num_nonzeros; + size_t Browptrlen = csrMtx.num_rows + 1; // Number of row offsets - std::vector colIdxA(nnzA, 0); - std::vector rowptrB (Browptrlen, 0); + std::vector colIdxA(nnzA, 0); + std::vector rowptrB (Browptrlen, 0); cl_int run_status = 0; run_status = clEnqueueReadBuffer(queue, csrMtx.colIndices, CL_TRUE, 0, - nnzA*sizeof(cl_int), + nnzA*sizeof(size_t), colIdxA.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading colIndices from GPU failed"); @@ -265,15 +265,15 @@ class xSpMSpM : public clsparseFunc { run_status = clEnqueueReadBuffer(queue, csrMtx.rowOffsets, CL_TRUE, 0, - Browptrlen*sizeof(cl_int), + Browptrlen*sizeof(size_t), rowptrB.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading row offsets from GPU failed"); size_t flop = 0; - for (int i = 0; i < nnzA; i++) + for (size_t i = 0; i < nnzA; i++) { - int colIdx = colIdxA[i]; // Get colIdx of A + size_t colIdx = colIdxA[i]; // Get colIdx of A flop += rowptrB[colIdx + 1] - rowptrB[colIdx]; // nnz in 'colIdx'th row of B } diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp index d3b93cb..cd22058 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp @@ -88,7 +88,7 @@ class xSpMdV: public clsparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return ( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); + return (sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); } std::string bandwidth_formula( ) @@ -105,12 +105,12 @@ class xSpMdV: public clsparseFunc beta = static_cast< T >( pBeta ); // Read sparse data from file and construct a COO matrix from it - int nnz, row, col; + clsparseIdx_t nnz, row, col; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) ); if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); - // Now initialise a CSR matrix from the COO matrix + // Now initialize a CSR matrix from the COO matrix clsparseInitCsrMatrix( &csrMtx ); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; @@ -122,11 +122,11 @@ class xSpMdV: public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -207,8 +207,8 @@ class xSpMdV: public clsparseFunc if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); - size_t sparseFlops = 2 * csrMtx.num_nonzeros; + clsparseIdx_t sparseBytes = sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); + clsparseIdx_t sparseFlops = 2 * csrMtx.num_nonzeros; cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Print( sparseFlops, "GFLOPs" ); diff --git a/src/benchmarks/clsparse-bench/src/main.cpp b/src/benchmarks/clsparse-bench/src/main.cpp index 9ad9b46..19f4883 100644 --- a/src/benchmarks/clsparse-bench/src/main.cpp +++ b/src/benchmarks/clsparse-bench/src/main.cpp @@ -111,7 +111,7 @@ std::vector< fs::path > enumMatrices( const std::string& root_dir ) int main( int argc, char *argv[ ] ) { cl_double alpha, beta; - size_t rows, columns; + clsparseIdx_t rows, columns; size_t profileCount; std::string function; std::string precision; @@ -123,8 +123,8 @@ int main( int argc, char *argv[ ] ) ( "dirpath,d", po::value( &root_dir ), "Matrix directory" ) ( "alpha,a", po::value( &alpha )->default_value( 1.0f ), "specifies the scalar alpha" ) ( "beta,b", po::value( &beta )->default_value( 0.0f ), "specifies the scalar beta" ) - ( "rows", po::value( &rows )->default_value( 16 ), "specifies the number of rows for matrix data" ) - ( "columns", po::value( &columns )->default_value( 16 ), "specifies the number of columns for matrix data" ) + ( "rows", po::value( &rows )->default_value( 16 ), "specifies the number of rows for matrix data" ) + ( "columns", po::value( &columns )->default_value( 16 ), "specifies the number of columns for matrix data" ) ( "function,f", po::value( &function )->default_value( "SpMdV" ), "Sparse functions to test. Options: " "SpMdV, SpMdM, SpMSpM, CG, BiCGStab, Csr2Dense, Dense2Csr, Csr2Coo, Coo2Csr" ) ( "precision,r", po::value( &precision )->default_value( "s" ), "Options: s,d,c,z" ) diff --git a/src/include/clSPARSE-1x.h b/src/include/clSPARSE-1x.h index 05b4b15..6cc77b7 100644 --- a/src/include/clSPARSE-1x.h +++ b/src/include/clSPARSE-1x.h @@ -42,7 +42,7 @@ typedef struct clsparseScalar_ */ typedef struct cldenseVector_ { - cl_int num_values; /*!< Length of dense vector */ + clsparseIdx_t num_values; /*!< Length of dense vector */ cl_mem values; /*!< OpenCL 1.x memory handle */ @@ -61,9 +61,9 @@ typedef struct clsparseCsrMatrix_ { /** @name CSR matrix data */ /**@{*/ - cl_int num_rows; /*!< Number of rows this matrix has if viewed as dense */ - cl_int num_cols; /*!< Number of columns this matrix has if viewed as dense */ - cl_int num_nonzeros; /*!< Number of values in matrix that are non-zero */ + clsparseIdx_t num_rows; /*!< Number of rows this matrix has if viewed as dense */ + clsparseIdx_t num_cols; /*!< Number of columns this matrix has if viewed as dense */ + clsparseIdx_t num_nonzeros; /*!< Number of values in matrix that are non-zero */ /**@}*/ /** @name OpenCL state */ @@ -97,9 +97,9 @@ typedef struct clsparseCooMatrix_ { /** @name COO matrix data */ /**@{*/ - cl_int num_rows; /*!< Number of rows this matrix has if viewed as dense */ - cl_int num_cols; /*!< Number of columns this matrix has if viewed as dense */ - cl_int num_nonzeros; /*!< Number of values in matrix that are non-zero */ + clsparseIdx_t num_rows; /*!< Number of rows this matrix has if viewed as dense */ + clsparseIdx_t num_cols; /*!< Number of columns this matrix has if viewed as dense */ + clsparseIdx_t num_nonzeros; /*!< Number of values in matrix that are non-zero */ /**@}*/ /** @name OpenCL state */ @@ -127,9 +127,9 @@ typedef struct cldenseMatrix_ { /** @name Dense matrix data */ /**@{*/ - size_t num_rows; /*!< Number of rows */ - size_t num_cols; /*!< Number of columns */ - size_t lead_dim; /*! Stride to the next row or column, in units of elements */ + clsparseIdx_t num_rows; /*!< Number of rows */ + clsparseIdx_t num_cols; /*!< Number of columns */ + clsparseIdx_t lead_dim; /*! Stride to the next row or column, in units of elements */ cldenseMajor major; /*! Memory layout for dense matrix */ /**@}*/ diff --git a/src/include/clSPARSE-2x.h b/src/include/clSPARSE-2x.h index 9cafbe7..5d905bb 100644 --- a/src/include/clSPARSE-2x.h +++ b/src/include/clSPARSE-2x.h @@ -36,7 +36,7 @@ typedef struct clsparseScalar_ */ typedef struct cldenseVector_ { - cl_int num_values; /*!< Length of dense vector */ + clsparseIdx_t num_values; /*!< Length of dense vector */ void* values; /**< OpenCL 2.0 memory pointer */ } cldenseVector; @@ -50,9 +50,9 @@ typedef struct clsparseCsrMatrix_ { /** @name CSR matrix data */ /**@{*/ - cl_int num_rows; /*!< Number of rows this matrix has if viewed as dense */ - cl_int num_cols; /*!< Number of columns this matrix has if viewed as dense */ - cl_int num_nonzeros; /*!< Number of values in matrix that are non-zero */ + clsparseIdx_t num_rows; /*!< Number of rows this matrix has if viewed as dense */ + clsparseIdx_t num_cols; /*!< Number of columns this matrix has if viewed as dense */ + clsparseIdx_t num_nonzeros; /*!< Number of values in matrix that are non-zero */ /**@}*/ /** @name OpenCL state */ @@ -75,9 +75,9 @@ typedef struct clsparseCooMatrix_ { /** @name COO matrix data */ /**@{*/ - cl_int num_rows; /*!< Number of rows this matrix has if viewed as dense */ - cl_int num_cols; /*!< Number of columns this matrix has if viewed as dense */ - cl_int num_nonzeros; /*!< Number of values in matrix that are non-zero */ + clsparseIdx_t num_rows; /*!< Number of rows this matrix has if viewed as dense */ + clsparseIdx_t num_cols; /*!< Number of columns this matrix has if viewed as dense */ + clsparseIdx_t num_nonzeros; /*!< Number of values in matrix that are non-zero */ /**@}*/ /** @name OpenCL state */ @@ -95,9 +95,9 @@ typedef struct cldenseMatrix_ { /** @name Dense matrix data */ /**@{*/ - size_t num_rows; /*!< Number of rows */ - size_t num_cols; /*!< Number of columns */ - size_t lead_dim; /*! Stride to the next row or column, in units of elements */ + clsparseIdx_t num_rows; /*!< Number of rows */ + clsparseIdx_t num_cols; /*!< Number of columns */ + clsparseIdx_t lead_dim; /*! Stride to the next row or column, in units of elements */ cldenseMajor major; /*! Memory layout for dense matrix */ /**@}*/ diff --git a/src/include/clSPARSE-xx.h b/src/include/clSPARSE-xx.h index 98ebcd9..badf974 100644 --- a/src/include/clSPARSE-xx.h +++ b/src/include/clSPARSE-xx.h @@ -38,4 +38,12 @@ typedef enum _cldenseMajor columnMajor } cldenseMajor; + +#if( CLSPARSE_INDEX_SIZEOF == 8 ) +#error clSPARSE does not yet implement 64-bit indices + typedef cl_ulong clsparseIdx_t; +#else + typedef cl_uint clsparseIdx_t; +#endif + #endif diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index 0d2a9d1..b47906d 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -36,6 +36,7 @@ extern "C" { #if( BUILD_CLVERSION < 200 ) #include "clSPARSE-1x.h" #else +#error clSPARSE does not yet implement OpenCL 2.0 interfaces #include "clSPARSE-2x.h" #endif @@ -496,7 +497,7 @@ extern "C" { * \ingroup FILE */ CLSPARSE_EXPORT clsparseStatus - clsparseHeaderfromFile( cl_int* nnz, cl_int* row, cl_int* col, const char* filePath ); + clsparseHeaderfromFile( clsparseIdx_t* nnz, clsparseIdx_t* row, clsparseIdx_t* col, const char* filePath); /*! * \brief Read sparse matrix data from file in single precision COO format diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 5f2e54f..a21a805 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -243,6 +243,11 @@ set( clSPARSE_LIBRARY_TYPE "SHARED" CACHE STRING "Build the clSPARSE library as set_property( CACHE clSPARSE_LIBRARY_TYPE PROPERTY STRINGS SHARED STATIC ) message( STATUS "clSPARSE will build as a '${clSPARSE_LIBRARY_TYPE}' library" ) +# Query the user for which version of OpenCL they wish to build the library for +set( clSPARSE_INDEX_SIZEOF "4" CACHE STRING "Compile the library to assume this index byte size (64-bit indices not implemented yet)" ) +set_property( CACHE clSPARSE_INDEX_SIZEOF PROPERTY STRINGS 4 ) +message( STATUS "clSPARSE will build with '${clSPARSE_INDEX_SIZEOF}' index bitness" ) + add_compiler_export_flags( ) add_library( clSPARSE ${clSPARSE_LIBRARY_TYPE} ${clSPARSE.source.cpp} @@ -263,15 +268,13 @@ add_library( clSPARSE ${clSPARSE_LIBRARY_TYPE} ${clSPARSE.source.cl} ) -# PRIVATE linking prevents transitive library linking of the clBLAS libraries +# PRIVATE linking prevents transitive library linking of dependent libraries target_link_libraries( clSPARSE PRIVATE ${OPENCL_LIBRARIES} ${CMAKE_DL_LIBS} ) #${clBLAS_LIBRARIES} ) -# Package that helps me set visibility for function names exported from shared library -GENERATE_EXPORT_HEADER( clSPARSE ) - set_target_properties( clSPARSE PROPERTIES VERSION ${clSPARSE_VERSION} SOVERSION ${clSPARSE_SOVERSION} ) set_target_properties( clSPARSE PROPERTIES DEBUG_POSTFIX d ) set_target_properties( clSPARSE PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) +target_compile_definitions( clSPARSE PUBLIC CLSPARSE_INDEX_SIZEOF=${clSPARSE_INDEX_SIZEOF} ) # Following Boost conventions of prefixing 'lib' on static built libraries, across all platforms if( ${clSPARSE_LIBRARY_TYPE} STREQUAL "STATIC" ) @@ -298,15 +301,6 @@ endif( ) # VERBATIM # ) -# CPack configuration; include the executable into the package -install( TARGETS clSPARSE - EXPORT clSPARSE-Targets - RUNTIME DESTINATION bin${SUFFIX_BIN} - LIBRARY DESTINATION lib${SUFFIX_LIB} - ARCHIVE DESTINATION lib${SUFFIX_LIB} - INCLUDES DESTINATION include -) - # The following is cmake code to generate a config file package for clSPARSE include( CMakePackageConfigHelpers ) @@ -327,11 +321,16 @@ write_basic_package_version_file( COMPATIBILITY SameMajorVersion ) -# The following installs the basic files of the library -install( FILES - ${CMAKE_CURRENT_BINARY_DIR}/clsparse_export.h - DESTINATION - include +# Package that helps me set visibility for function names exported from shared library +GENERATE_EXPORT_HEADER( clSPARSE ) + +# CPack configuration; include the executable into the package +install( TARGETS clSPARSE + EXPORT clSPARSE-Targets + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB} + INCLUDES DESTINATION include ) # This generates the files that defines the import targets @@ -346,3 +345,11 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/clSPARSEConfigVersion.cmake DESTINATION ${ConfigPackageLocation} ) + +# The following installs the export definitions of the library, so the library may be properly +# dynamically linked +install( FILES + ${CMAKE_CURRENT_BINARY_DIR}/clsparse_export.h + DESTINATION + include +) diff --git a/src/library/blas1/atomic-reduce.hpp b/src/library/blas1/atomic-reduce.hpp index 890de6d..a6452a9 100644 --- a/src/library/blas1/atomic-reduce.hpp +++ b/src/library/blas1/atomic-reduce.hpp @@ -41,13 +41,25 @@ atomic_reduce(clsparseScalarPrivate* pR, const clsparseControl control) { assert(wg_size == pX->num_values); - + std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ReduceOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if (typeid(cl_float) == typeid(T)) { std::string options = std::string() + " -DATOMIC_FLOAT"; @@ -77,8 +89,8 @@ atomic_reduce(clsparseScalarPrivate* pR, kWrapper << pR->value; kWrapper << pX->values; - int blocksNum = (pX->num_values + wg_size - 1) / wg_size; - int globalSize = blocksNum * wg_size; + clsparseIdx_t blocksNum = (pX->num_values + wg_size - 1) / wg_size; + clsparseIdx_t globalSize = blocksNum * wg_size; cl::NDRange local(wg_size); cl::NDRange global(globalSize); @@ -107,11 +119,23 @@ atomic_reduce(clsparse::array_base& pR, assert(wg_size == pX.size()); std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ReduceOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if (typeid(cl_float) == typeid(T)) { std::string options = std::string() + " -DATOMIC_FLOAT"; @@ -141,8 +165,8 @@ atomic_reduce(clsparse::array_base& pR, kWrapper << pR.data(); kWrapper << pX.data(); - int blocksNum = (pX.size() + wg_size - 1) / wg_size; - int globalSize = blocksNum * wg_size; + clsparseIdx_t blocksNum = (pX.size() + wg_size - 1) / wg_size; + clsparseIdx_t globalSize = blocksNum * wg_size; cl::NDRange local(wg_size); cl::NDRange global(globalSize); diff --git a/src/library/blas1/cldense-axpby.hpp b/src/library/blas1/cldense-axpby.hpp index c0862fe..d5ecc74 100644 --- a/src/library/blas1/cldense-axpby.hpp +++ b/src/library/blas1/cldense-axpby.hpp @@ -42,11 +42,23 @@ axpby(clsparse::array_base& pR, const int group_size = 256; // this or higher? control->max_wg_size? - std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string( group_size ) + " -D" + ElementWiseOperatorTrait::operation; + + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } if(typeid(T) == typeid(cl_double)) { @@ -82,8 +94,8 @@ axpby(clsparse::array_base& pR, << pY.data() << offset; - int blocksNum = (size + group_size - 1) / group_size; - int globalSize = blocksNum * group_size; + clsparseIdx_t blocksNum = (size + group_size - 1) / group_size; + clsparseIdx_t globalSize = blocksNum * group_size; cl::NDRange local(group_size); cl::NDRange global (globalSize); diff --git a/src/library/blas1/cldense-axpy.hpp b/src/library/blas1/cldense-axpy.hpp index 6bbb777..ca2c717 100644 --- a/src/library/blas1/cldense-axpy.hpp +++ b/src/library/blas1/cldense-axpy.hpp @@ -37,11 +37,23 @@ axpy(clsparse::array_base& pR, const int group_size = 256; // this or higher? control->max_wg_size? std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string( group_size ) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -72,8 +84,8 @@ axpy(clsparse::array_base& pR, << pY.data() << offset; - int blocksNum = (size + group_size - 1) / group_size; - int globalSize = blocksNum * group_size; + clsparseIdx_t blocksNum = (size + group_size - 1) / group_size; + clsparseIdx_t globalSize = blocksNum * group_size; cl::NDRange local(group_size); cl::NDRange global (globalSize); diff --git a/src/library/blas1/cldense-dot.hpp b/src/library/blas1/cldense-dot.hpp index f182ebc..ff77f51 100644 --- a/src/library/blas1/cldense-dot.hpp +++ b/src/library/blas1/cldense-dot.hpp @@ -40,12 +40,24 @@ inner_product (cldenseVectorPrivate* partial, cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -160,12 +172,24 @@ inner_product (clsparse::array_base& partial, cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); diff --git a/src/library/blas1/cldense-scale.hpp b/src/library/blas1/cldense-scale.hpp index 3a67254..df21e8b 100644 --- a/src/library/blas1/cldense-scale.hpp +++ b/src/library/blas1/cldense-scale.hpp @@ -37,10 +37,22 @@ scale( clsparse::array_base& pResult, //const int group_size = control->max_wg_size; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE="+ OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -69,8 +81,8 @@ scale( clsparse::array_base& pResult, << pAlpha.data() << offset; - int blocksNum = (size + group_size - 1) / group_size; - int globalSize = blocksNum * group_size; + clsparseIdx_t blocksNum = (size + group_size - 1) / group_size; + clsparseIdx_t globalSize = blocksNum * group_size; cl::NDRange local(group_size); cl::NDRange global (globalSize); diff --git a/src/library/blas1/elementwise-transform.hpp b/src/library/blas1/elementwise-transform.hpp index 58b765c..56a349c 100644 --- a/src/library/blas1/elementwise-transform.hpp +++ b/src/library/blas1/elementwise-transform.hpp @@ -60,11 +60,23 @@ elementwise_transform(cldenseVectorPrivate* r, cl_uint wg_size = 256; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -84,7 +96,7 @@ elementwise_transform(cldenseVectorPrivate* r, kWrapper << size << r->values << x->values << y->values; - int blocks = (size + wg_size - 1) / wg_size; + clsparseIdx_t blocks = (size + wg_size - 1) / wg_size; cl::NDRange local(wg_size); cl::NDRange global(blocks * wg_size); @@ -127,11 +139,23 @@ elementwise_transform(clsparse::array_base& r, cl_uint wg_size = 256; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + cl::Kernel kernel = KernelCache::get(control->queue, "elementwise_transform", "transform", params); @@ -139,7 +163,7 @@ elementwise_transform(clsparse::array_base& r, kWrapper << size << r.data() << x.data() << y.data(); - int blocks = (size + wg_size - 1) / wg_size; + clsparseIdx_t blocks = (size + wg_size - 1) / wg_size; cl::NDRange local(wg_size); cl::NDRange global(blocks * wg_size); diff --git a/src/library/blas1/reduce.hpp b/src/library/blas1/reduce.hpp index 3bc5aae..1987fbd 100644 --- a/src/library/blas1/reduce.hpp +++ b/src/library/blas1/reduce.hpp @@ -42,13 +42,25 @@ global_reduce (cldenseVectorPrivate* partial, cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads) + " -D" + ReduceOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -170,13 +182,25 @@ global_reduce (clsparse::array_base& partial, cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads) + " -D" + ReduceOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); diff --git a/src/library/blas2/csrmv-adaptive.hpp b/src/library/blas2/csrmv-adaptive.hpp index 6ecf525..636f712 100644 --- a/src/library/blas2/csrmv-adaptive.hpp +++ b/src/library/blas2/csrmv-adaptive.hpp @@ -39,7 +39,6 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, const cl_uint group_size = 256; std::string params = std::string( ) - + " -DINDEX_TYPE=uint" + " -DROWBITS=" + std::to_string( ROW_BITS ) + " -DWGBITS=" + std::to_string( WG_BITS ) + " -DVALUE_TYPE=" + OclTypeTraits::type @@ -48,6 +47,19 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, + " -DBLOCK_MULTIPLIER=" + std::to_string( BLOCK_MULTIPLIER ) + " -DROWS_FOR_VECTOR=" + std::to_string( ROWS_FOR_VECTOR ); + if( sizeof( clsparseIdx_t ) == 8 ) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + std::string options; if(typeid(T) == typeid(cl_double)) { @@ -89,7 +101,7 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, // Setting global work size to half the row block size because we are only // using half the row blocks buffer for actual work. // The other half is used for the extended precision reduction. - cl_uint global_work_size = ( (pCsrMatx->rowBlockSize/2) - 1 ) * group_size; + clsparseIdx_t global_work_size = ((pCsrMatx->rowBlockSize / 2) - 1) * group_size; cl::NDRange local( group_size ); cl::NDRange global( global_work_size > local[ 0 ] ? global_work_size : local[ 0 ] ); @@ -121,7 +133,6 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, const cl_uint group_size = 256; std::string params = std::string( ) - + " -DINDEX_TYPE=uint" + " -DROWBITS=" + std::to_string( ROW_BITS ) + " -DWGBITS=" + std::to_string( WG_BITS ) + " -DVALUE_TYPE=" + OclTypeTraits::type @@ -130,6 +141,19 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, + " -DBLOCK_MULTIPLIER=" + std::to_string( BLOCK_MULTIPLIER ) + " -DROWS_FOR_VECTOR=" + std::to_string( ROWS_FOR_VECTOR ); + if( sizeof( clsparseIdx_t ) == 8 ) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + std::string options; if(typeid(T) == typeid(cl_double)) { @@ -171,7 +195,7 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, // Setting global work size to half the row block size because we are only // using half the row blocks buffer for actual work. // The other half is used for the extended precision reduction. - cl_uint global_work_size = ( (pCsrMatx->rowBlockSize/2) - 1 ) * group_size; + clsparseIdx_t global_work_size = ((pCsrMatx->rowBlockSize / 2) - 1) * group_size; cl::NDRange local( group_size ); cl::NDRange global( global_work_size > local[ 0 ] ? global_work_size : local[ 0 ] ); diff --git a/src/library/blas2/csrmv-vector.hpp b/src/library/blas2/csrmv-vector.hpp index c376b2a..cdc8ab5 100644 --- a/src/library/blas2/csrmv-vector.hpp +++ b/src/library/blas2/csrmv-vector.hpp @@ -31,10 +31,10 @@ csrmv_vector(const clsparseScalarPrivate* pAlpha, cldenseVectorPrivate* pY, clsparseControl control) { - cl_uint nnz_per_row = pMatx->nnz_per_row(); //average nnz per row - cl_uint wave_size = control->wavefront_size; + clsparseIdx_t nnz_per_row = pMatx->nnz_per_row(); //average nnz per row + clsparseIdx_t wave_size = control->wavefront_size; cl_uint group_size = 256; // 256 gives best performance! - cl_uint subwave_size = wave_size; + clsparseIdx_t subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix @@ -49,13 +49,25 @@ csrmv_vector(const clsparseScalarPrivate* pAlpha, if (nnz_per_row < 4) { subwave_size = 2; } std::string params = std::string() + - "-DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (sizeof(clsparseIdx_t) == 8) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params += " -DDOUBLE"; @@ -87,12 +99,12 @@ csrmv_vector(const clsparseScalarPrivate* pAlpha, // subwave takes care of each row in matrix; // predicted number of subwaves to be executed; - cl_uint predicted = subwave_size * pMatx->num_rows; + clsparseIdx_t predicted = subwave_size * pMatx->num_rows; // if NVIDIA is used it does not allow to run the group size // which is not a multiplication of group_size. Don't know if that // have an impact on performance - cl_uint global_work_size = + clsparseIdx_t global_work_size = group_size* ((predicted + group_size - 1 ) / group_size); cl::NDRange local(group_size); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); @@ -120,10 +132,10 @@ csrmv_vector(const clsparse::array_base& pAlpha, clsparse::array_base& pY, clsparseControl control) { - cl_uint nnz_per_row = pMatx->nnz_per_row(); //average nnz per row - cl_uint wave_size = control->wavefront_size; + clsparseIdx_t nnz_per_row = pMatx->nnz_per_row(); //average nnz per row + clsparseIdx_t wave_size = control->wavefront_size; cl_uint group_size = 256; // 256 gives best performance! - cl_uint subwave_size = wave_size; + clsparseIdx_t subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix @@ -138,13 +150,26 @@ csrmv_vector(const clsparse::array_base& pAlpha, if (nnz_per_row < 4) { subwave_size = 2; } std::string params = std::string() + - "-DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params += " -DDOUBLE"; @@ -178,12 +203,12 @@ csrmv_vector(const clsparse::array_base& pAlpha, // subwave takes care of each row in matrix; // predicted number of subwaves to be executed; - cl_uint predicted = subwave_size * pMatx->num_rows; + clsparseIdx_t predicted = subwave_size * pMatx->num_rows; // if NVIDIA is used it does not allow to run the group size // which is not a multiplication of group_size. Don't know if that // have an impact on performance - cl_uint global_work_size = + clsparseIdx_t global_work_size = group_size* ((predicted + group_size - 1 ) / group_size); cl::NDRange local(group_size); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); diff --git a/src/library/blas3/clsparse-csrmm.hpp b/src/library/blas3/clsparse-csrmm.hpp index 09c5fd7..c5a0b21 100644 --- a/src/library/blas3/clsparse-csrmm.hpp +++ b/src/library/blas3/clsparse-csrmm.hpp @@ -90,10 +90,10 @@ const clsparseScalarPrivate& pBeta, cldenseMatrixPrivate& pDenseC, const clsparseControl control ) { - cl_uint nnz_per_row = pSparseCsrA.nnz_per_row( ); //average nnz per row - cl_uint wave_size = control->wavefront_size; + clsparseIdx_t nnz_per_row = pSparseCsrA.nnz_per_row(); //average nnz per row + clsparseIdx_t wave_size = control->wavefront_size; cl_uint group_size = 256; // 256 gives best performance! - cl_uint subwave_size = wave_size; + clsparseIdx_t subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix @@ -108,13 +108,26 @@ const clsparseControl control ) if( nnz_per_row < 4 ) { subwave_size = 2; } std::string params = std::string( ) + - "-DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string( group_size ) + " -DWAVE_SIZE=" + std::to_string( wave_size ) + " -DSUBWAVE_SIZE=" + std::to_string( subwave_size ); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if( typeid( T ) == typeid( cl_double ) ) { params += " -DDOUBLE"; @@ -142,12 +155,12 @@ const clsparseControl control ) // subwave takes care of each row in matrix; // predicted number of subwaves to be executed; - cl_uint predicted = subwave_size * pSparseCsrA.num_rows; + clsparseIdx_t predicted = subwave_size * pSparseCsrA.num_rows; // if NVIDIA is used it does not allow to run the group size // which is not a multiplication of group_size. Don't know if that // have an impact on performance - cl_uint global_work_size = + clsparseIdx_t global_work_size = group_size* ( ( predicted + group_size - 1 ) / group_size ); cl::NDRange local( group_size ); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); diff --git a/src/library/blas3/clsparse-spm-spm.cpp b/src/library/blas3/clsparse-spm-spm.cpp index d717944..494800a 100644 --- a/src/library/blas3/clsparse-spm-spm.cpp +++ b/src/library/blas3/clsparse-spm-spm.cpp @@ -64,15 +64,14 @@ clsparseStatus compute_nnzCt(int _m, cl_mem csrRowPtrA, cl_mem csrColIndA, cl_me const std::string params = std::string() + "-DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type; - - + cl::Kernel kernel = KernelCache::get(control->queue,"SpGEMM_computeNnzCt_kernels", "compute_nnzCt_kernel", params); size_t szLocalWorkSize[1]; size_t szGlobalWorkSize[1]; int num_threads = GROUPSIZE_256; - int num_blocks = ceil((double)_m / (double)num_threads); + size_t num_blocks = ceil((double)_m / (double)num_threads); szLocalWorkSize[0] = num_threads; szGlobalWorkSize[0] = num_blocks * szLocalWorkSize[0]; @@ -466,21 +465,21 @@ clsparseStatus compute_nnzC_Ct_opencl(int *_h_counter_one, cl_mem queue_one, cl_ if (j == 0) { int num_threads = GROUPSIZE_256; - int num_blocks = ceil((double)counter / (double)num_threads); + size_t num_blocks = ceil((double)counter / (double)num_threads); run_status = compute_nnzC_Ct_0(num_threads, num_blocks, j, counter, _h_counter_one[j], queue_one, csrRowPtrC, control); } else if (j == 1) { int num_threads = GROUPSIZE_256; - int num_blocks = ceil((double)counter / (double)num_threads); + size_t num_blocks = ceil((double)counter / (double)num_threads); run_status = compute_nnzC_Ct_1(num_threads, num_blocks, j, counter, _h_counter_one[j], queue_one, csrRowPtrA, csrColIndA, csrValA, csrRowPtrB, csrColIndB, csrValB, csrRowPtrC, csrRowPtrCt, *csrColIndCt, *csrValCt, control); } else if (j > 1 && j <= 32) { int num_threads = 64; //WARPSIZE_NV_2HEAP; - int num_blocks = ceil((double)counter / (double)num_threads); + size_t num_blocks = ceil((double)counter / (double)num_threads); run_status = compute_nnzC_Ct_2heap_noncoalesced_local(num_threads, num_blocks, j, counter, _h_counter_one[j], queue_one, csrRowPtrA, csrColIndA, csrValA, csrRowPtrB, csrColIndB, csrValB, csrRowPtrC, csrRowPtrCt, *csrColIndCt, *csrValCt, control); } else if (j > 32 && j <= 64) @@ -676,7 +675,7 @@ int copy_Ct_to_C_opencl(int *counter_one, cl_mem csrValC, cl_mem csrRowPtrC, cl_ if (j == 1) { int num_threads = GROUPSIZE_256; - int num_blocks = ceil((double)counter / (double)num_threads); + size_t num_blocks = ceil((double)counter / (double)num_threads); run_status = copy_Ct_to_C_Single( num_threads, num_blocks, counter, counter_one[j], csrValC, csrRowPtrC, csrColIndC, csrValCt, csrRowPtrCt, csrColIndCt, queue_one, control); } else if (j > 1 && j <= 32) @@ -729,12 +728,12 @@ int copy_Ct_to_C_opencl(int *counter_one, cl_mem csrValC, cl_mem csrRowPtrC, cl_ const clsparseCsrMatrixPrivate* matB = static_cast(sparseMatB); clsparseCsrMatrixPrivate* matC = static_cast(sparseMatC); - int m = matA->num_rows; - int k1 = matA->num_cols; - int k2 = matB->num_rows; - int n = matB->num_cols; - int nnzA = matA->num_nonzeros; - int nnzB = matB->num_nonzeros; + size_t m = matA->num_rows; + size_t k1 = matA->num_cols; + size_t k2 = matB->num_rows; + size_t n = matB->num_cols; + size_t nnzA = matA->num_nonzeros; + size_t nnzB = matB->num_nonzeros; if(k1 != k2) { diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index 929ec7d..dcfbd38 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -196,7 +196,7 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix offValues = offColInd = offRowOff = offRowBlocks = rowBlockSize = 0; } - cl_uint nnz_per_row() const + clsparseIdx_t nnz_per_row() const { return num_nonzeros / num_rows; } @@ -233,7 +233,7 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix offValues = offColInd = offRowInd = 0; } - cl_uint nnz_per_row( ) const + clsparseIdx_t nnz_per_row( ) const { return num_nonzeros / num_rows; } @@ -259,7 +259,8 @@ class cldenseMatrixPrivate: public cldenseMatrix public: void clear( ) { - num_rows = num_cols = lead_dim = offValues = 0; + num_rows = num_cols = lead_dim = 0; + offValues = 0; major = rowMajor; values = nullptr; } diff --git a/src/library/include/clSPARSE-2x.hpp b/src/library/include/clSPARSE-2x.hpp index 89f5fb0..7a9b47d 100644 --- a/src/library/include/clSPARSE-2x.hpp +++ b/src/library/include/clSPARSE-2x.hpp @@ -182,7 +182,7 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix rowBlockSize = 0; } - cl_uint nnz_per_row() const + clsparseIdx_t nnz_per_row() const { return num_nonzeros / num_rows; } @@ -217,7 +217,7 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix values = colIndices = rowIndices = nullptr; } - cl_uint nnz_per_row( ) const + clsparseIdx_t nnz_per_row( ) const { return num_nonzeros / num_rows; } diff --git a/src/library/include/clSPARSE-private.hpp b/src/library/include/clSPARSE-private.hpp index 89df7cc..94dd31a 100644 --- a/src/library/include/clSPARSE-private.hpp +++ b/src/library/include/clSPARSE-private.hpp @@ -38,6 +38,12 @@ #include "include/clSPARSE-2x.hpp" #endif +#if( CLSPARSE_INDEX_SIZEOF == 8 ) +#define SIZET "l" +#else +#define SIZET "" +#endif + // Constants used to help generate kernels for the CSR adaptive algorithm; used between coo2csr and csrmv_adaptive const cl_uint WG_BITS = 24; const cl_uint ROW_BITS = 32; @@ -45,4 +51,6 @@ const cl_uint BLKSIZE = 1024; const cl_uint BLOCK_MULTIPLIER = 3; const cl_uint ROWS_FOR_VECTOR = 1; +const size_t GPUADDRESS32WORD = 32; +const size_t GPUADDRESS64WORD = 64; #endif diff --git a/src/library/include/external/mmio.h b/src/library/include/external/mmio.h index 2794f0e..4b92cb1 100644 --- a/src/library/include/external/mmio.h +++ b/src/library/include/external/mmio.h @@ -64,7 +64,7 @@ #define mm_set_hermitian(typecode)((typecode)[3]='H') #define mm_clear_typecode(typecode) ((typecode)[0]=(typecode)[1]= \ - (typecode)[2]=' ',(typecode)[3]='G') + (typecode)[2]=' ',(typecode)[3]='G') #define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) @@ -72,26 +72,26 @@ /********************* Matrix Market error codes ***************************/ -#define MM_COULD_NOT_READ_FILE 11 -#define MM_PREMATURE_EOF 12 -#define MM_NOT_MTX 13 -#define MM_NO_HEADER 14 -#define MM_UNSUPPORTED_TYPE 15 -#define MM_LINE_TOO_LONG 16 -#define MM_COULD_NOT_WRITE_FILE 17 +#define MM_COULD_NOT_READ_FILE 11 +#define MM_PREMATURE_EOF 12 +#define MM_NOT_MTX 13 +#define MM_NO_HEADER 14 +#define MM_UNSUPPORTED_TYPE 15 +#define MM_LINE_TOO_LONG 16 +#define MM_COULD_NOT_WRITE_FILE 17 -#define MM_MTX_STR "matrix" -#define MM_ARRAY_STR "array" -#define MM_DENSE_STR "array" +#define MM_MTX_STR "matrix" +#define MM_ARRAY_STR "array" +#define MM_DENSE_STR "array" #define MM_COORDINATE_STR "coordinate" -#define MM_SPARSE_STR "coordinate" -#define MM_COMPLEX_STR "complex" -#define MM_REAL_STR "real" -#define MM_INT_STR "integer" +#define MM_SPARSE_STR "coordinate" +#define MM_COMPLEX_STR "complex" +#define MM_REAL_STR "real" +#define MM_INT_STR "integer" #define MM_GENERAL_STR "general" -#define MM_SYMM_STR "symmetric" -#define MM_HERM_STR "hermitian" -#define MM_SKEW_STR "skew-symmetric" +#define MM_SYMM_STR "symmetric" +#define MM_HERM_STR "hermitian" +#define MM_SKEW_STR "skew-symmetric" #define MM_PATTERN_STR "pattern" #define MM_MAX_LINE_LENGTH 1025 diff --git a/src/library/internal/clsparse-control.cpp b/src/library/internal/clsparse-control.cpp index e118436..13e3e2e 100644 --- a/src/library/internal/clsparse-control.cpp +++ b/src/library/internal/clsparse-control.cpp @@ -81,6 +81,8 @@ clsparseStatus collectEnvParams(clsparseControl control) control->max_compute_units = device.getInfo(); + control->addressBits = device.getInfo(); + #ifdef CL_DEVICE_DOUBLE_FP_CONFIG if ( device.getInfo( ).find("cl_khr_fp64") != std::string::npos || device.getInfo( ).find("cl_amd_fp64") != std::string::npos ) @@ -89,6 +91,8 @@ clsparseStatus collectEnvParams(clsparseControl control) control->dpfp_support = true; } #endif + + return clsparseSuccess; } clsparseControl @@ -114,6 +118,7 @@ clsparseCreateControl( cl_command_queue queue, clsparseStatus *status ) control->async = false; control->extended_precision = false; control->dpfp_support = false; + control->addressBits = 64; // default 64 bits collectEnvParams( control ); @@ -207,7 +212,7 @@ cl_event *event_wait_list ) control->event_wait_list.clear( ); control->event_wait_list.resize( num_events_in_wait_list ); - for( int i = 0; i < num_events_in_wait_list; i++ ) + for( cl_uint i = 0; i < num_events_in_wait_list; i++ ) { control->event_wait_list[ i ] = event_wait_list[ i ]; } diff --git a/src/library/internal/clsparse-control.hpp b/src/library/internal/clsparse-control.hpp index 0c7c705..07c9d83 100644 --- a/src/library/internal/clsparse-control.hpp +++ b/src/library/internal/clsparse-control.hpp @@ -63,6 +63,9 @@ struct _clsparseControl // current device max compute units; cl_uint max_compute_units; + // current device ADDRESS Bits + cl_uint addressBits; + //clSPARSE async execution; if true user is responsible to call for WaitForEvent; //otherwise after every kernel call we are syncing internally; cl_bool async; diff --git a/src/library/internal/clsparse-validate.cpp b/src/library/internal/clsparse-validate.cpp index 30794ac..42d7080 100644 --- a/src/library/internal/clsparse-validate.cpp +++ b/src/library/internal/clsparse-validate.cpp @@ -17,6 +17,23 @@ #include "clsparse-validate.hpp" #include "ocl-type-traits.hpp" #include +#include + +template< typename T > +std::string to_string_comma( T num ) +{ + std::string num_string = std::to_string( num ); + + // Signed is important; if length < 3 then we need a negative number + int comma_pos = static_cast( num_string.length( ) ) - 3; + while( comma_pos > 0 ) + { + num_string.insert( comma_pos, "," ); + comma_pos -= 3; + } + + return num_string; +} clsparseStatus validateMemObject(clsparseScalarPrivate &scalar, size_t required_size) @@ -66,8 +83,9 @@ validateMemObject( cl_mem mem, size_t required_size) size_t current_size; clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(current_size), ¤t_size, NULL); - std::cout << "[validateMemObject] Buffer size: " << current_size << " bytes. "; - std::cout << "Required size: " << required_size << " bytes." << std::endl; + + std::cout << "[validateMemObject] Buffer size: " << to_string_comma( current_size ) << " bytes. "; + std::cout << "Required size: " << to_string_comma( required_size ) << " bytes." << std::endl; if (current_size < required_size) return clsparseInvalidSize; } diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index be339b6..fa3dc10 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -26,8 +26,8 @@ clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); - clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - cl_int* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); pCsrMatx->rowBlockSize = pCsrMatx->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); return clsparseSuccess; @@ -45,8 +45,8 @@ clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) return clsparseOutOfResources; } - clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - cl_int* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); clMemRAII< cl_ulong > rRowBlocks( control->queue( ), pCsrMatx->rowBlocks ); cl_ulong* ulCsrRowBlocks = rRowBlocks.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowBlocksOffset( ), pCsrMatx->rowBlockSize ); diff --git a/src/library/internal/data-types/csr-meta.hpp b/src/library/internal/data-types/csr-meta.hpp index 91a9b06..21d44d2 100644 --- a/src/library/internal/data-types/csr-meta.hpp +++ b/src/library/internal/data-types/csr-meta.hpp @@ -60,7 +60,7 @@ static inline rowBlockType numThreadsForReduction(const rowBlockType num_rows) return (256 >> (8*sizeof(int)-__builtin_clz(num_rows-1))); #elif defined(_MSC_VER) && (_MSC_VER >= 1400) unsigned long bit_returned; - _BitScanReverse(&bit_returned, (num_rows-1)); + _BitScanReverse( &bit_returned, (num_rows-1) ); return 256 >> (bit_returned+1); #else return flp2(256/num_rows); @@ -91,11 +91,11 @@ static inline rowBlockType numThreadsForReduction(const rowBlockType num_rows) // rowBlockType is currently instantiated as ulong template< typename rowBlockType > -void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const int* rowDelimiters, - const int nRows, const int blkSize, const int blkMultiplier, const int rows_for_vector, const bool allocate_row_blocks = true ) +void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const clsparseIdx_t* rowDelimiters, + const clsparseIdx_t nRows, const int blkSize, const int blkMultiplier, const int rows_for_vector, const bool allocate_row_blocks = true ) { rowBlockType* rowBlocksBase; - int total_row_blocks = 1; // Start at one because of rowBlock[0] + clsparseIdx_t total_row_blocks = 1; // Start at one because of rowBlock[0] if (allocate_row_blocks) { @@ -113,10 +113,10 @@ void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const int* return; } - int consecutive_long_rows = 0; + clsparseIdx_t consecutive_long_rows = 0; for( i = 1; i <= nRows; i++ ) { - int row_length = ( rowDelimiters[ i ] - rowDelimiters[ i - 1 ] ); + clsparseIdx_t row_length = ( rowDelimiters[ i ] - rowDelimiters[ i - 1 ] ); sum += row_length; // The following section of code calculates whether you're moving between @@ -260,7 +260,7 @@ void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const int* rowBlockSize = 2 * total_row_blocks; } -inline size_t ComputeRowBlocksSize( const int* rowDelimiters, const int nRows, const unsigned int blkSize, +inline size_t ComputeRowBlocksSize( const clsparseIdx_t* rowDelimiters, const clsparseIdx_t nRows, const unsigned int blkSize, const unsigned int blkMultiplier, const unsigned int rows_for_vector ) { size_t rowBlockSize; diff --git a/src/library/io/mm-reader.cpp b/src/library/io/mm-reader.cpp index c377520..728fd14 100644 --- a/src/library/io/mm-reader.cpp +++ b/src/library/io/mm-reader.cpp @@ -37,6 +37,7 @@ to copyright protection within the United States. #include #include #include +#include #include #include #include @@ -47,12 +48,17 @@ to copyright protection within the United States. #include "internal/data-types/csr-meta.hpp" #include "internal/clsparse-validate.hpp" +// warning C4996 : 'fopen' : This function or variable may be unsafe. Consider using fopen_s instead. +// We use fopen for compatibility between windows & linux +#pragma warning( push ) +#pragma warning( disable : 4996 ) + // Class declarations template struct Coordinate { - int x; - int y; + clsparseIdx_t x; + clsparseIdx_t y; FloatType val; }; @@ -69,9 +75,9 @@ template class MatrixMarketReader { char Typecode[ 4 ]; - int nNZ; - int nRows; - int nCols; + clsparseIdx_t nNZ; + clsparseIdx_t nRows; + clsparseIdx_t nCols; int isSymmetric; int isDoubleMem; Coordinate *unsym_coords; @@ -92,17 +98,17 @@ class MatrixMarketReader int MMReadMtxCrdSize( FILE* infile ); void MMGenerateCOOFromFile( FILE* infile, cl_bool read_explicit_zeroes ); - int GetNumRows( ) + clsparseIdx_t GetNumRows( ) { return nRows; } - int GetNumCols( ) + clsparseIdx_t GetNumCols( ) { return nCols; } - int GetNumNonZeroes( ) + clsparseIdx_t GetNumNonZeroes( ) { return nNZ; } @@ -221,9 +227,9 @@ bool MatrixMarketReader::MMReadFormat( const std::string &filename, c template void FillCoordData( char Typecode[ ], Coordinate *unsym_coords, - int &unsym_actual_nnz, - int ir, - int ic, + clsparseIdx_t &unsym_actual_nnz, + clsparseIdx_t ir, + clsparseIdx_t ic, FloatType val ) { if( mm_is_symmetric( Typecode ) ) @@ -251,23 +257,27 @@ void FillCoordData( char Typecode[ ], template void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, cl_bool read_explicit_zeroes ) { - int unsym_actual_nnz = 0; + clsparseIdx_t unsym_actual_nnz = 0; FloatType val; - int ir, ic; + clsparseIdx_t ir, ic; const int exp_zeroes = read_explicit_zeroes; //silence warnings from fscanf (-Wunused-result) - int rv = 0; + clsparseIdx_t rv = 0; - for( int i = 0; i < nNZ; i++ ) + for ( clsparseIdx_t i = 0; i < nNZ; i++) { if( mm_is_real( Typecode ) ) { - if( typeid( FloatType ) == typeid( float ) ) - rv = fscanf( infile, "%d %d %f\n", &ir, &ic, (float*)( &val ) ); + fscanf(infile, "%" SIZET "u", &ir); + fscanf(infile, "%" SIZET "u", &ic); + + if (typeid(FloatType) == typeid(float)) + rv = fscanf(infile, "%f\n", (float*)(&val)); + else if( typeid( FloatType ) == typeid( double ) ) - rv = fscanf( infile, "%d %d %lf\n", &ir, &ic, (double*)( &val ) ); + rv = fscanf( infile, "%lf\n", (double*)( &val ) ); if( exp_zeroes == 0 && val == 0 ) continue; @@ -276,10 +286,13 @@ void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, cl_bool } else if( mm_is_integer( Typecode ) ) { + fscanf(infile, "%" SIZET "u", &ir); + fscanf(infile, "%" SIZET "u", &ic); + if(typeid(FloatType) == typeid(float)) - rv = fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)( &val ) ); + rv = fscanf(infile, "%f\n", (float*)( &val ) ); else if(typeid(FloatType) == typeid(double)) - rv = fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)( &val ) ); + rv = fscanf(infile, "%lf\n", (double*)( &val ) ); if( exp_zeroes == 0 && val == 0 ) continue; @@ -289,7 +302,9 @@ void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, cl_bool } else if( mm_is_pattern( Typecode ) ) { - rv = fscanf( infile, "%d %d", &ir, &ic ); + rv = fscanf(infile, "%" SIZET "u", &ir); + rv = fscanf(infile, "%" SIZET "u", &ic); + val = static_cast( MAX_RAND_VAL * ( rand( ) / ( RAND_MAX + 1.0 ) ) ); if( exp_zeroes == 0 && val == 0 ) @@ -396,12 +411,27 @@ int MatrixMarketReader::MMReadMtxCrdSize( FILE *infile ) } while( line[ 0 ] == '%' ); /* line[] is either blank or has M,N, nz */ - if( sscanf( line, "%d %d %d", &nRows, &nCols, &nNZ ) == 3 ) +#if defined( _WIN32 ) || defined(_WIN64) + // if( sscanf( line, "%Iu %Iu %Iu", &nRows, &nCols, &nNZ ) == 3 ) // Not working I don't know why? + std::stringstream s(line); + nRows = 0; + nCols = 0; + nNZ = 0; + s >> nRows >> nCols >> nNZ; + if (nRows && nCols && nNZ ) +#else + if( sscanf( line, "%zu %zu %zu", &nRows, &nCols, &nNZ ) == 3 ) +#endif return 0; else do { - num_items_read = fscanf( infile, "%d %d %d", &nRows, &nCols, &nNZ ); + num_items_read = 0; + num_items_read += fscanf( infile, "%" SIZET "u", &nRows ); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + num_items_read += fscanf(infile, "%" SIZET "u", &nCols); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + num_items_read += fscanf(infile, "%" SIZET "u", &nNZ); if( num_items_read == EOF ) return MM_PREMATURE_EOF; } while( num_items_read != 3 ); @@ -413,7 +443,7 @@ int MatrixMarketReader::MMReadMtxCrdSize( FILE *infile ) // Post-condition: clears clsparseCooMatrix, then sets pCooMatx->m, pCooMatx->n // pCooMatx->nnz clsparseStatus -clsparseHeaderfromFile( cl_int* nnz, cl_int* row, cl_int* col, const char* filePath ) +clsparseHeaderfromFile( clsparseIdx_t* nnz, clsparseIdx_t* row, clsparseIdx_t* col, const char* filePath ) { // Check that the file format is matrix market; the only format we can read right now @@ -473,19 +503,19 @@ clsparseSCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_float > rCooValues( control->queue( ), pCooMatx->values ); - clMemRAII< cl_int > rCooColIndices( control->queue( ), pCooMatx->colIndices ); - clMemRAII< cl_int > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); + clMemRAII< clsparseIdx_t > rCooColIndices( control->queue( ), pCooMatx->colIndices ); + clMemRAII< clsparseIdx_t > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); cl_float* fCooValues = rCooValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->valOffset( ), pCooMatx->num_nonzeros ); - cl_int* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); - cl_int* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); Coordinate< cl_float >* coords = mm_reader.GetUnsymCoordinates( ); //JPA:: Coo matrix is need to be sorted as well because we need to have matrix // which is sorted by row and then column, in the mtx files usually is opposite. std::sort( coords, coords + pCooMatx->num_nonzeros, CoordinateCompare< cl_float > ); - for( cl_int c = 0; c < pCooMatx->num_nonzeros; ++c ) + for( clsparseIdx_t c = 0; c < pCooMatx->num_nonzeros; ++c ) { iCooRowIndices[ c ] = coords[ c ].x; iCooColIndices[ c ] = coords[ c ].y; @@ -523,19 +553,19 @@ clsparseDCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_double > rCooValues( control->queue( ), pCooMatx->values ); - clMemRAII< cl_int > rCooColIndices( control->queue( ), pCooMatx->colIndices ); - clMemRAII< cl_int > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); + clMemRAII< clsparseIdx_t > rCooColIndices( control->queue( ), pCooMatx->colIndices ); + clMemRAII< clsparseIdx_t > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); cl_double* fCooValues = rCooValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->valOffset( ), pCooMatx->num_nonzeros ); - cl_int* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); - cl_int* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); Coordinate< cl_double >* coords = mm_reader.GetUnsymCoordinates( ); //JPA:: Coo matrix is need to be sorted as well because we need to have matrix // which is sorted by row and then column, in the mtx files usually is opposite. std::sort( coords, coords + pCooMatx->num_nonzeros, CoordinateCompare< cl_double > ); - for( cl_int c = 0; c < pCooMatx->num_nonzeros; ++c ) + for( clsparseIdx_t c = 0; c < pCooMatx->num_nonzeros; ++c ) { iCooRowIndices[ c ] = coords[ c ].x; iCooColIndices[ c ] = coords[ c ].y; @@ -580,12 +610,12 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls return validationStatus; validationStatus = validateMemObject(pCsrMatx->colIndices, - mm_reader.GetNumNonZeroes() * sizeof(cl_int)); + mm_reader.GetNumNonZeroes() * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; - validationStatus = validateMemObject(pCsrMatx->rowOffsets, - (mm_reader.GetNumRows() + 1) * sizeof(cl_int)); + validationStatus = validateMemObject(pCsrMatx->rowOffsets, + (mm_reader.GetNumRows() + 1) * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; } @@ -600,20 +630,20 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_float > rCsrValues( control->queue( ), pCsrMatx->values ); - clMemRAII< cl_int > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); - clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clMemRAII< clsparseIdx_t > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); cl_float* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->valOffset( ), pCsrMatx->num_nonzeros ); - cl_int* iCsrColIndices = rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); - cl_int* iCsrRowOffsets = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clsparseIdx_t* iCsrColIndices = rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); + clsparseIdx_t* iCsrRowOffsets = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); // The following section of code converts the sparse format from COO to CSR Coordinate< cl_float >* coords = mm_reader.GetUnsymCoordinates( ); std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_float > ); - int current_row = 1; + clsparseIdx_t current_row = 1; iCsrRowOffsets[ 0 ] = 0; - for( int i = 0; i < pCsrMatx->num_nonzeros; i++ ) + for (clsparseIdx_t i = 0; i < pCsrMatx->num_nonzeros; i++) { iCsrColIndices[ i ] = coords[ i ].y; fCsrValues[ i ] = coords[ i ].val; @@ -664,12 +694,12 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl return validationStatus; validationStatus = validateMemObject(pCsrMatx->colIndices, - mm_reader.GetNumNonZeroes() * sizeof(cl_int)); + mm_reader.GetNumNonZeroes() * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; validationStatus = validateMemObject(pCsrMatx->rowOffsets, - (mm_reader.GetNumRows() + 1) * sizeof(cl_int)); + (mm_reader.GetNumRows() + 1) * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; } @@ -683,8 +713,8 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers cl_int mapStatus = 0; clMemRAII< cl_double > rCsrValues( control->queue( ), pCsrMatx->values); - clMemRAII< cl_int > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); - clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clMemRAII< clsparseIdx_t > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); cl_double* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, @@ -695,7 +725,7 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl return clsparseInvalidMemObj; } - cl_int* iCsrColIndices = + clsparseIdx_t* iCsrColIndices = rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros, &mapStatus ); if (mapStatus != CL_SUCCESS) @@ -704,7 +734,7 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl return clsparseInvalidMemObj; } - cl_int* iCsrRowOffsets = + clsparseIdx_t* iCsrRowOffsets = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1, &mapStatus ); if (mapStatus != CL_SUCCESS) @@ -717,9 +747,9 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl Coordinate< cl_double >* coords = mm_reader.GetUnsymCoordinates( ); std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_double > ); - int current_row = 1; + clsparseIdx_t current_row = 1; iCsrRowOffsets[ 0 ] = 0; - for( int i = 0; i < pCsrMatx->num_nonzeros; i++ ) + for (clsparseIdx_t i = 0; i < pCsrMatx->num_nonzeros; i++) { iCsrColIndices[ i ] = coords[ i ].y; fCsrValues[ i ] = coords[ i ].val; @@ -791,3 +821,5 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl // return clsparseSuccess; //} + +#pragma warning( pop ) \ No newline at end of file diff --git a/src/library/kernels/blas1.cl b/src/library/kernels/blas1.cl index a8613d4..df90ed4 100644 --- a/src/library/kernels/blas1.cl +++ b/src/library/kernels/blas1.cl @@ -69,7 +69,7 @@ void axpy(const SIZE_TYPE size, const SIZE_TYPE pZOffset) { - const int index = get_global_id(0); + const SIZE_TYPE index = get_global_id(0); if (index >= size) return; @@ -95,7 +95,7 @@ void axpby(const SIZE_TYPE size, const SIZE_TYPE pZOffset) { - const int index = get_global_id(0); + const SIZE_TYPE index = get_global_id(0); if (index >= size) return; @@ -117,7 +117,7 @@ void scale (const SIZE_TYPE pRSize, __global const VALUE_TYPE* pAlpha, const SIZE_TYPE pAlphaOffset) { - const int i = get_global_id(0); + const SIZE_TYPE i = get_global_id(0); if (i >= pRSize) return; diff --git a/src/library/kernels/csrmm_general.cl b/src/library/kernels/csrmm_general.cl index 8fe67ed..87f3a65 100644 --- a/src/library/kernels/csrmm_general.cl +++ b/src/library/kernels/csrmm_general.cl @@ -75,23 +75,23 @@ csrmv( const INDEX_TYPE num_rows, ) { //const int vectors_per_block = WG_SIZE/SUBWAVE_SIZE; - const int global_id = get_global_id( 0 ); // global workitem id - const int local_id = get_local_id( 0 ); // local workitem id - const int thread_lane = local_id & ( SUBWAVE_SIZE - 1 ); - const int vector_id = global_id / SUBWAVE_SIZE; // global vector id + const INDEX_TYPE global_id = get_global_id( 0 ); // global workitem id + const INDEX_TYPE local_id = get_local_id( 0 ); // local workitem id + const INDEX_TYPE thread_lane = local_id & ( SUBWAVE_SIZE - 1 ); + const INDEX_TYPE vector_id = global_id / SUBWAVE_SIZE; // global vector id //const int vector_lane = local_id / SUBWAVE_SIZE; // vector id within the workgroup - const int num_vectors = get_global_size( 0 ) / SUBWAVE_SIZE; + const INDEX_TYPE num_vectors = get_global_size( 0 ) / SUBWAVE_SIZE; const VALUE_TYPE _alpha = alpha[ off_alpha ]; const VALUE_TYPE _beta = beta[ off_beta ]; for( INDEX_TYPE row = vector_id; row < num_rows; row += num_vectors ) { - const int row_start = row_offset[ row ]; - const int row_end = row_offset[ row + 1 ]; + const INDEX_TYPE row_start = row_offset[ row ]; + const INDEX_TYPE row_end = row_offset[ row + 1 ]; VALUE_TYPE sum = (VALUE_TYPE)0; - for( int j = row_start + thread_lane; j < row_end; j += SUBWAVE_SIZE ) + for( INDEX_TYPE j = row_start + thread_lane; j < row_end; j += SUBWAVE_SIZE ) { if( _alpha == 1 ) sum = fma( val[ j ], x[ off_x + ( col[ j ] * ldx ) ], sum ); diff --git a/src/library/kernels/elementwise_transform.cl b/src/library/kernels/elementwise_transform.cl index 1ea47fe..874c4d6 100644 --- a/src/library/kernels/elementwise_transform.cl +++ b/src/library/kernels/elementwise_transform.cl @@ -66,7 +66,7 @@ void transform (const SIZE_TYPE size, __global const VALUE_TYPE* pX, __global const VALUE_TYPE* pY) { - const int index = get_global_id(0); + const SIZE_TYPE index = get_global_id(0); if (index >= size) return; diff --git a/src/library/solvers/preconditioners/preconditioner_utils.hpp b/src/library/solvers/preconditioners/preconditioner_utils.hpp index ab105cb..901431d 100644 --- a/src/library/solvers/preconditioners/preconditioner_utils.hpp +++ b/src/library/solvers/preconditioners/preconditioner_utils.hpp @@ -69,13 +69,26 @@ extract_diagonal(cldenseVectorPrivate* pDiag, std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type - + " -DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if (inverse) params.append(" -DOP_DIAG_INVERSE"); @@ -165,13 +178,26 @@ extract_diagonal(clsparse::vector& pDiag, std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type - + " -DINDEX_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if (inverse) params.append(" -DOP_DIAG_INVERSE"); @@ -198,9 +224,9 @@ extract_diagonal(clsparse::vector& pDiag, << pA->colIndices << pA->values; - cl_uint predicted = subwave_size * size; + size_t predicted = subwave_size * size; - cl_uint global_work_size = + size_t global_work_size = wg_size * ((predicted + wg_size - 1 ) / wg_size); cl::NDRange local(wg_size); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); diff --git a/src/library/transform/clsparse-coo2csr.cpp b/src/library/transform/clsparse-coo2csr.cpp index 3f95bfc..05aa49f 100644 --- a/src/library/transform/clsparse-coo2csr.cpp +++ b/src/library/transform/clsparse-coo2csr.cpp @@ -40,12 +40,12 @@ clsparseScoo2csr (const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; @@ -80,12 +80,12 @@ clsparseDcoo2csr ( const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; diff --git a/src/library/transform/clsparse-csr2coo.cpp b/src/library/transform/clsparse-csr2coo.cpp index 4da46ea..a17be04 100644 --- a/src/library/transform/clsparse-csr2coo.cpp +++ b/src/library/transform/clsparse-csr2coo.cpp @@ -40,12 +40,12 @@ clsparseScsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; @@ -80,12 +80,12 @@ clsparseDcsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; diff --git a/src/library/transform/clsparse-csr2dense.cpp b/src/library/transform/clsparse-csr2dense.cpp index b880f63..38e59b4 100644 --- a/src/library/transform/clsparse-csr2dense.cpp +++ b/src/library/transform/clsparse-csr2dense.cpp @@ -42,7 +42,7 @@ clsparseScsr2dense(const clsparseCsrMatrix* csr, clsparseStatus status; //BUG: For big matrices we might have overflow here; - cl_int dense_size = csr->num_cols * csr->num_rows; + size_t dense_size = csr->num_cols * csr->num_rows; status = validateMemObject(A->values, dense_size * sizeof(cl_float)); @@ -50,8 +50,8 @@ clsparseScsr2dense(const clsparseCsrMatrix* csr, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); @@ -86,7 +86,7 @@ cldenseMatrix* A, clsparseStatus status; //BUG: For big matrices we might have overflow here; - cl_int dense_size = csr->num_cols * csr->num_rows; + size_t dense_size = csr->num_cols * csr->num_rows; status = validateMemObject(A->values, dense_size * sizeof(cl_double)); @@ -94,8 +94,8 @@ cldenseMatrix* A, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); diff --git a/src/library/transform/clsparse-dense2csr.cpp b/src/library/transform/clsparse-dense2csr.cpp index 08ccd8a..e0b7d9e 100644 --- a/src/library/transform/clsparse-dense2csr.cpp +++ b/src/library/transform/clsparse-dense2csr.cpp @@ -28,7 +28,7 @@ clsparseSdense2csr(const cldenseMatrix* A, clsparseCsrMatrix* csr, const clsparseControl control) { typedef cl_float ValueType; - typedef cl_int IndexType; + typedef clsparseIdx_t IndexType; typedef cl_ulong SizeType; if (!clsparseInitialized) @@ -67,7 +67,6 @@ clsparseSdense2csr(const cldenseMatrix* A, clsparseCsrMatrix* csr, if (status!= clsparseSuccess) return clsparseInvalidKernelExecution; - cl_int cl_status; clsparseCooMatrix coo; clsparseInitCooMatrix(&coo); @@ -100,7 +99,7 @@ clsparseDdense2csr(const cldenseMatrix* A, const clsparseControl control) { typedef cl_double ValueType; - typedef cl_int IndexType; + typedef clsparseIdx_t IndexType; typedef cl_ulong SizeType; if (!clsparseInitialized) @@ -139,7 +138,6 @@ clsparseDdense2csr(const cldenseMatrix* A, if (status!= clsparseSuccess) return clsparseInvalidKernelExecution; - cl_int cl_status; clsparseCooMatrix coo; clsparseInitCooMatrix(&coo); diff --git a/src/library/transform/conversion-utils.hpp b/src/library/transform/conversion-utils.hpp index 12ca3d6..7e2c8eb 100644 --- a/src/library/transform/conversion-utils.hpp +++ b/src/library/transform/conversion-utils.hpp @@ -18,11 +18,13 @@ #ifndef _CLSPARSE_CONVERSION_UTILS_HPP_ #define _CLSPARSE_CONVERSION_UTILS_HPP_ + #include "internal/data-types/clvector.hpp" #include "scan.hpp" #include "reduce-by-key.hpp" #include "blas1/reduce.hpp" + template //index type clsparseStatus indices_to_offsets(clsparse::vector& offsets, @@ -107,8 +109,7 @@ offsets_to_indices(clsparse::vector& indices, typedef typename clsparse::vector::size_type SizeType; assert (num_rows + 1 == offsets.size()); - - + if (!clsparseInitialized) { return clsparseNotInitialized; @@ -139,16 +140,28 @@ offsets_to_indices(clsparse::vector& indices, if (elements_per_row < 8) { subwave_size = 4; } if (elements_per_row < 4) { subwave_size = 2; } - std::string params = std::string () - + " -DINDEX_TYPE=" + OclTypeTraits::type //not used in this kernel but required by program conversion_utils + " -DVALUE_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -171,9 +184,9 @@ offsets_to_indices(clsparse::vector& indices, // subwave takes care of each row in matrix; // predicted number of subwaves to be executed; - cl_uint predicted = subwave_size * num_rows; + SizeType predicted = subwave_size * num_rows; - cl_uint global_work_size = + SizeType global_work_size = group_size* ((predicted + group_size - 1 ) / group_size); cl::NDRange local(group_size); @@ -232,16 +245,28 @@ transform_csr_2_dense(/*csr matrix*/ if (elements_per_row < 16) { subwave_size = 8; } if (elements_per_row < 8) { subwave_size = 4; } if (elements_per_row < 4) { subwave_size = 2; } - - - const std::string params = std::string () + + std::string params = std::string () + " -DVALUE_TYPE=" + OclTypeTraits::type - + " -DINDEX_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + cl::Kernel kernel = KernelCache::get(control->queue, "conversion_utils", "transform_csr_to_dense", params); @@ -256,9 +281,9 @@ transform_csr_2_dense(/*csr matrix*/ // subwave takes care of each row in matrix; // predicted number of subwaves to be executed; - cl_uint predicted = subwave_size * num_rows; + SizeType predicted = subwave_size * num_rows; - cl_uint global_work_size = + SizeType global_work_size = group_size* ((predicted + group_size - 1 ) / group_size); cl::NDRange local(group_size); @@ -303,13 +328,26 @@ calculate_num_nonzeros(/*dense matrix*/ if (dense_size < workgroup_size) global_work_size = workgroup_size; - const std::string params = std::string() - + " -DINDEX_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(workgroup_size) + " -DSUBWAVE_SIZE=" + std::to_string(2); //required by program; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + //cl::Kernel kernel = KernelCache::get(control->queue,"dense2csr", "process_scaninput", params); cl::Kernel kernel = KernelCache::get(control->queue,"conversion_utils", "scan_nonzero_locations", params); @@ -396,13 +434,27 @@ dense_to_coo(clsparseCooMatrix* coo, if (dense_size < workgroup_size) global_work_size = workgroup_size; - const std::string params = std::string() - + " -DINDEX_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(workgroup_size) + " -DSUBWAVE_SIZE=" + std::to_string(2); //required by program; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DINDEX_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + + //cl::Kernel kernel = KernelCache::get(control->queue,"dense2csr", "spread_value", params); cl::Kernel kernel = KernelCache::get(control->queue,"conversion_utils", "scatter_coo_locations", params); diff --git a/src/library/transform/reduce-by-key.hpp b/src/library/transform/reduce-by-key.hpp index 6039d7a..27c2a6e 100644 --- a/src/library/transform/reduce-by-key.hpp +++ b/src/library/transform/reduce-by-key.hpp @@ -89,11 +89,23 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, // offset calculation { std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + cl::Kernel kernel = KernelCache::get(control->queue, strProgram, "offset_calculation", params); @@ -147,11 +159,23 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, { std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if( typeid(SizeType) == typeid(cl_double) || typeid(ValueType) == typeid(cl_double) || typeid(KeyType) == typeid(cl_double)) @@ -198,11 +222,24 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, // intra block inclusive scan by key { std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + + if( typeid(SizeType) == typeid(cl_double) || typeid(ValueType) == typeid(cl_double) || typeid(KeyType) == typeid(cl_double)) @@ -247,11 +284,23 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, // per block addition by key { std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if( typeid(SizeType) == typeid(cl_double) || typeid(ValueType) == typeid(cl_double) || typeid(KeyType) == typeid(cl_double)) @@ -294,11 +343,23 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, // key value mapping { std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if( typeid(SizeType) == typeid(cl_double) || typeid(ValueType) == typeid(cl_double) || typeid(KeyType) == typeid(cl_double)) diff --git a/src/library/transform/scan.hpp b/src/library/transform/scan.hpp index c3c4da4..fe9499e 100644 --- a/src/library/transform/scan.hpp +++ b/src/library/transform/scan.hpp @@ -101,11 +101,23 @@ scan(VectorType& output, const VectorType& input, std::size_t lds = kernel0_WgSize * 2 * sizeof(T); std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel0_WgSize) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -154,11 +166,23 @@ scan(VectorType& output, const VectorType& input, SizeType workPerThread = sizeScanBuff / kernel1_WgSize; std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel1_WgSize) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); @@ -200,11 +224,23 @@ scan(VectorType& output, const VectorType& input, std::size_t lds = kernel0_WgSize * sizeof(T); //local mem size std::string params = std::string() - + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel1_WgSize) + " -D" + ElementWiseOperatorTrait::operation; + if (control->addressBits == GPUADDRESS64WORD) + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + else + { + std::string options = std::string() + + " -DSIZE_TYPE=" + OclTypeTraits::type; + params.append(options); + } + if(typeid(T) == typeid(cl_double)) { params.append(" -DDOUBLE"); diff --git a/src/tests/resources/csr_matrix_environment.cpp b/src/tests/resources/csr_matrix_environment.cpp index 957cba2..a0291ec 100644 --- a/src/tests/resources/csr_matrix_environment.cpp +++ b/src/tests/resources/csr_matrix_environment.cpp @@ -21,9 +21,9 @@ CSREnvironment::sMatrixType CSREnvironment::ublasSCsr = CSREnvironment::sMatrixT CSREnvironment::dMatrixType CSREnvironment::ublasDCsr = CSREnvironment::dMatrixType(); -cl_int CSREnvironment::n_rows = 0; -cl_int CSREnvironment::n_cols = 0; -cl_int CSREnvironment::n_vals = 0; +clsparseIdx_t CSREnvironment::n_rows = 0; +clsparseIdx_t CSREnvironment::n_cols = 0; +clsparseIdx_t CSREnvironment::n_vals = 0; clsparseCsrMatrix CSREnvironment::csrSMatrix = clsparseCsrMatrix(); clsparseCsrMatrix CSREnvironment::csrDMatrix = clsparseCsrMatrix(); diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index 97a8879..ba81810 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -39,8 +39,8 @@ class CSREnvironment: public ::testing::Environment // We need this long declaration because index vector need to be cl_int. // Also it is more flexible for future use if we will start to play with // row_major / column_major or base indexing which is 0 for now. - using sMatrixType = uBLAS::compressed_matrix >; - using dMatrixType = uBLAS::compressed_matrix >; + using sMatrixType = uBLAS::compressed_matrix >; + using dMatrixType = uBLAS::compressed_matrix >; explicit CSREnvironment( const std::string& path, cl_double alpha, cl_double beta, @@ -68,10 +68,10 @@ class CSREnvironment: public ::testing::Environment csrDMatrix.num_nonzeros * sizeof( cl_double ), NULL, &status ); csrDMatrix.colIndices = ::clCreateBuffer( context, CL_MEM_READ_ONLY, - csrDMatrix.num_nonzeros * sizeof( cl_int ), NULL, &status ); + csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status); csrDMatrix.rowOffsets = ::clCreateBuffer( context, CL_MEM_READ_ONLY, - ( csrDMatrix.num_rows + 1 ) * sizeof( cl_int ), NULL, &status ); + (csrDMatrix.num_rows + 1) * sizeof( clsparseIdx_t ), NULL, &status); clsparseStatus fileError = clsparseDCsrMatrixfromFile( &csrDMatrix, file_name.c_str( ), CLSE::control, read_explicit_zeroes ); if( fileError != clsparseSuccess ) @@ -83,7 +83,7 @@ class CSREnvironment: public ::testing::Environment clsparseCsrMetaCompute( &csrDMatrix, CLSE::control ); - //reassign the new matrix dimmesnions calculated clsparseCCsrMatrixFromFile to global variables + //reassign the new matrix dimensions calculated clsparseCCsrMatrixFromFile to global variables n_vals = csrDMatrix.num_nonzeros; n_cols = csrDMatrix.num_cols; n_rows = csrDMatrix.num_rows; @@ -104,12 +104,12 @@ class CSREnvironment: public ::testing::Environment 0, NULL, NULL ); copy_status = clEnqueueReadBuffer( queue, csrDMatrix.rowOffsets, CL_TRUE, 0, - ( csrDMatrix.num_rows + 1 ) * sizeof( cl_int ), + ( csrDMatrix.num_rows + 1 ) * sizeof( clsparseIdx_t ), ublasDCsr.index1_data().begin(), 0, NULL, NULL ); copy_status = clEnqueueReadBuffer( queue, csrDMatrix.colIndices, CL_TRUE, 0, - csrDMatrix.num_nonzeros * sizeof( cl_int ), + csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), ublasDCsr.index2_data().begin(), 0, NULL, NULL ); @@ -143,12 +143,12 @@ class CSREnvironment: public ::testing::Environment cl_double* dvals = (cl_double*) ::clEnqueueMapBuffer(queue, csrDMatrix.values, CL_TRUE, CL_MAP_READ, 0, csrDMatrix.num_nonzeros * sizeof(cl_double), 0, nullptr, nullptr, &cl_status); // copy the double-precision values over into the single-precision array. - for ( int i = 0; i < ublasDCsr.value_data().size(); i++) + for (clsparseIdx_t i = 0; i < ublasDCsr.value_data().size(); i++) ublasSCsr.value_data()[i] = static_cast(ublasDCsr.value_data()[i]); - for ( int i = 0; i < ublasDCsr.index1_data().size(); i++) - ublasSCsr.index1_data()[i] = static_cast(ublasDCsr.index1_data()[i]); - for ( int i = 0; i < ublasDCsr.index2_data().size(); i++) - ublasSCsr.index2_data()[i] = static_cast(ublasDCsr.index2_data()[i]); + for (clsparseIdx_t i = 0; i < ublasDCsr.index1_data().size(); i++) + ublasSCsr.index1_data()[i] = static_cast(ublasDCsr.index1_data()[i]); + for (clsparseIdx_t i = 0; i < ublasDCsr.index2_data().size(); i++) + ublasSCsr.index2_data()[i] = static_cast(ublasDCsr.index2_data()[i]); // copy the values in single precision on host to single precision matrix container on the device copy_status = clEnqueueWriteBuffer( queue, csrSMatrix.values, CL_TRUE, 0, @@ -209,7 +209,7 @@ class CSREnvironment: public ::testing::Environment static sMatrixType ublasSCsr; static dMatrixType ublasDCsr; - static cl_int n_rows, n_cols, n_vals; + static clsparseIdx_t n_rows, n_cols, n_vals; //cl buffers for above matrix definition; diff --git a/src/tests/resources/matrix_utils.h b/src/tests/resources/matrix_utils.h index 10a746d..6afd530 100644 --- a/src/tests/resources/matrix_utils.h +++ b/src/tests/resources/matrix_utils.h @@ -57,7 +57,7 @@ void indicesToOffsets(const std::vector& row_indices, if (row_offsets.size() != (n_rows + 1)) row_offsets.resize(n_rows + 1); - for (int i = 0; i < nnz; i++) + for (clsparseIdx_t i = 0; i < nnz; i++) { if (row_indices[i] != index) { @@ -83,7 +83,7 @@ void offsetsToIndices(const std::vector& row_offsets, row_indices.resize(nnz); for( size_t i = 0; i < n_rows; i++ ) - for (int j = row_offsets[i]; j < row_offsets[i+1]; j++) + for (clsparseIdx_t j = row_offsets[i]; j < row_offsets[i+1]; j++) row_indices[j] = i; } @@ -102,7 +102,7 @@ void sortByRowCol(std::vector& rows, std::vector matrix;//(size); - for (int i = 0; i < size; i++) + for (clsparseIdx_t i = 0; i < size; i++) { matrix.push_back(std::make_tuple(rows[i], cols[i], vals[i])); } @@ -122,14 +122,14 @@ void sortByRowCol(std::vector& rows, ); - for(int i = 0; i < size; i++) + for(clsparseIdx_t i = 0; i < size; i++) std::tie(rows[i], cols[i], vals[i]) = matrix[i]; } //simple spmv for csr matrix to obtain reference results; template -void csrmv(int n_rows, int n_cols, int nnz, +void csrmv(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& row_offsets, const std::vector& col_indices, const std::vector& values, @@ -147,10 +147,10 @@ void csrmv(int n_rows, int n_cols, int nnz, assert(values.size() == nnz); - for (int i = 0; i < n_rows; i++) + for (clsparseIdx_t i = 0; i < n_rows; i++) { VALUE_TYPE sum = (VALUE_TYPE)0; - for(int j = row_offsets[i]; j < row_offsets[i+1]; j++) + for(clsparseIdx_t j = row_offsets[i]; j < row_offsets[i+1]; j++) { sum += alpha * values[j] * x[col_indices[j]]; } @@ -161,7 +161,7 @@ void csrmv(int n_rows, int n_cols, int nnz, //simple spmv for csr matrix to obtain reference results; template -void csrmm( int n_rows, int n_cols, int nnz, +void csrmm(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& row_offsets, const std::vector& col_indices, const std::vector& values, @@ -179,12 +179,12 @@ void csrmm( int n_rows, int n_cols, int nnz, assert( values.size( ) == nnz ); - for( int c = 0; c < n_cols; ++c ) + for (clsparseIdx_t c = 0; c < n_cols; ++c) { - for( int i = 0; i < n_rows; i++ ) + for (clsparseIdx_t i = 0; i < n_rows; i++) { VALUE_TYPE sum = (VALUE_TYPE)0; - for( int j = row_offsets[ i ]; j < row_offsets[ i + 1 ]; j++ ) + for (clsparseIdx_t j = row_offsets[i]; j < row_offsets[i + 1]; j++) { sum += alpha * values[ j ] * matB.data[ c + ( col_indices[ j ] * matB.leading_dim ) ]; } @@ -195,7 +195,7 @@ void csrmm( int n_rows, int n_cols, int nnz, //simple spmv for csr matrix to obtain reference results; template -void coomv(int n_rows, int n_cols, int nnz, +void coomv(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& row_indices, const std::vector& col_indices, const std::vector& values, @@ -211,7 +211,7 @@ void coomv(int n_rows, int n_cols, int nnz, assert(col_indices.size() == nnz); assert(values.size() == nnz); - for (int i = 0; i < nnz; i++) + for (clsparseIdx_t i = 0; i < nnz; i++) { y[row_indices[i]] += (alpha * values[i] * x[col_indices[i]]) + beta * y[row_indices[i]]; @@ -224,7 +224,7 @@ void coomv(int n_rows, int n_cols, int nnz, */ template -void csr_transpose(int n_rows, int n_cols, int nnz, +void csr_transpose(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& row_offsets, const std::vector& col_indices, const std::vector& values, @@ -293,13 +293,13 @@ void csr_transpose(int n_rows, int n_cols, int nnz, //This looks like gather / reduce operation. maybe with // help of row_offsets it can be done in parallel mode nicely! //or reduce. but requires atomic due to indirect mem access. - for (int i = 0; i < nnz; i++) + for (clsparseIdx_t i = 0; i < nnz; i++) col_nnz[col_indices[i]] += 1; //calculate col offsets; its easy since we know how many nnz in each col //we have from previous loop row_offsets_t[0] = 0; - for (int i = 1; i <= n_cols; i++) + for (clsparseIdx_t i = 1; i <= n_cols; i++) { row_offsets_t[i] = row_offsets_t[i-1] + col_nnz[i - 1]; col_nnz[i - 1] = 0; @@ -308,9 +308,9 @@ void csr_transpose(int n_rows, int n_cols, int nnz, //calculate row_indices; //this might look similar to the csr multiply algorithm //or offsets to indices on gpu - for (int i = 0; i < n_rows; i++) + for (clsparseIdx_t i = 0; i < n_rows; i++) { - for (int j = row_offsets[i]; j < row_offsets[i+1]; j++) + for (clsparseIdx_t j = row_offsets[i]; j < row_offsets[i + 1]; j++) { VALUE_TYPE v = values[j]; int k = col_indices[j]; @@ -326,7 +326,7 @@ void csr_transpose(int n_rows, int n_cols, int nnz, //simple spmv for csr matrix to obtain reference results; template -void csr2dense(int n_rows, int n_cols, int nnz, +void csr2dense(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& row_offsets, const std::vector& col_indices, const std::vector& values, @@ -339,9 +339,9 @@ void csr2dense(int n_rows, int n_cols, int nnz, assert(values.size() == nnz); - for (int i = 0; i < n_rows; i++) + for (clsparseIdx_t i = 0; i < n_rows; i++) { - for(int j = row_offsets[i]; j < row_offsets[i+1]; j++) + for (clsparseIdx_t j = row_offsets[i]; j < row_offsets[i + 1]; j++) { dense[i * n_cols + col_indices[j]] = values[j]; } @@ -351,7 +351,7 @@ void csr2dense(int n_rows, int n_cols, int nnz, //simple spmv for csr matrix to obtain reference results; template -void csr2coo(int n_rows, int n_cols, int nnz, +void csr2coo(clsparseIdx_t n_rows, clsparseIdx_t n_cols, clsparseIdx_t nnz, const std::vector& csr_row_offsets, const std::vector& csr_col_indices, const std::vector& csr_values, @@ -373,9 +373,9 @@ void csr2coo(int n_rows, int n_cols, int nnz, copy(csr_col_indices.begin(), csr_col_indices.end(), coo_col_indices.begin()); copy(csr_values.begin(), csr_values.end(), coo_values.begin()); - for (int i = 0; i < n_rows; i++) + for (clsparseIdx_t i = 0; i < n_rows; i++) { - for(int j = csr_row_offsets[i]; j < csr_row_offsets[i+1]; j++) + for (clsparseIdx_t j = csr_row_offsets[i]; j < csr_row_offsets[i + 1]; j++) { coo_row_indices[j] = i; } diff --git a/src/tests/resources/sparse_matrix_environment.cpp b/src/tests/resources/sparse_matrix_environment.cpp index 2a4789c..8acbfef 100644 --- a/src/tests/resources/sparse_matrix_environment.cpp +++ b/src/tests/resources/sparse_matrix_environment.cpp @@ -22,9 +22,9 @@ CSRSparseEnvironment::sMatrixType CSRSparseEnvironment::ublasSCsrA = CSRSparseEn CSRSparseEnvironment::sMatrixType CSRSparseEnvironment::ublasSCsrB = CSRSparseEnvironment::sMatrixType(); #endif -cl_int CSRSparseEnvironment::n_rows = 0; -cl_int CSRSparseEnvironment::n_cols = 0; -cl_int CSRSparseEnvironment::n_vals = 0; +clsparseIdx_t CSRSparseEnvironment::n_rows = 0; +clsparseIdx_t CSRSparseEnvironment::n_cols = 0; +clsparseIdx_t CSRSparseEnvironment::n_vals = 0; clsparseCsrMatrix CSRSparseEnvironment::csrSMatrix = clsparseCsrMatrix(); diff --git a/src/tests/resources/sparse_matrix_environment.h b/src/tests/resources/sparse_matrix_environment.h index ca0a4fa..0a64dcf 100644 --- a/src/tests/resources/sparse_matrix_environment.h +++ b/src/tests/resources/sparse_matrix_environment.h @@ -40,7 +40,7 @@ namespace uBLAS = boost::numeric::ublas; // Currently only single precision is considered class CSRSparseEnvironment : public ::testing::Environment { public: - using sMatrixType = uBLAS::compressed_matrix >; + using sMatrixType = uBLAS::compressed_matrix >; //using dMatrixType = uBLAS::compressed_matrix >; explicit CSRSparseEnvironment(const std::string& path, cl_command_queue queue, cl_context context, cl_bool explicit_zeroes = true) @@ -288,9 +288,9 @@ class CSRSparseEnvironment : public ::testing::Environment { //static sMatrixType ublasCsrB; //static sMatrixType ublasCsrC; - static cl_int n_rows; - static cl_int n_cols; - static cl_int n_vals; + static clsparseIdx_t n_rows; + static clsparseIdx_t n_cols; + static clsparseIdx_t n_vals; //cl buffers ; static clsparseCsrMatrix csrSMatrix; // input 1 diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index 090e491..f99fe2a 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -144,17 +144,17 @@ class Blas2 : public ::testing::Test ASSERT_EQ(clsparseSuccess, status); float* vals = (float*)&CSRE::ublasSCsr.value_data()[0]; - int* rows = &CSRE::ublasSCsr.index1_data()[0]; - int* cols = &CSRE::ublasSCsr.index2_data()[0]; - for (int row = 0; row < CSRE::n_rows; row++) + clsparseIdx_t* rows = &CSRE::ublasSCsr.index1_data()[0]; + clsparseIdx_t* cols = &CSRE::ublasSCsr.index2_data()[0]; + for ( clsparseIdx_t row = 0; row < CSRE::n_rows; row++) { // Summation done at a higher precision to decrease // summation errors from rounding. hY[row] *= hBeta; - int row_end = rows[row+1]; + clsparseIdx_t row_end = rows[row + 1]; double temp_sum; temp_sum = hY[row]; - for (int i = rows[row]; i < rows[row+1]; i++) + for ( clsparseIdx_t i = rows[row]; i < rows[row + 1]; i++) { // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]]; temp_sum += hAlpha * vals[i] * hX[cols[i]]; @@ -171,7 +171,7 @@ class Blas2 : public ::testing::Test uint64_t max_ulps = 0; uint64_t min_ulps = UINT64_MAX; uint64_t total_ulps = 0; - for (int i = 0; i < hY.size(); i++) + for (size_t i = 0; i < hY.size(); i++) { long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]); intDiff = llabs(intDiff); @@ -196,7 +196,7 @@ class Blas2 : public ::testing::Test } #endif - for (int i = 0; i < hY.size(); i++) + for (size_t i = 0; i < hY.size(); i++) { double compare_val = 0.; if (extended_precision) @@ -229,9 +229,9 @@ class Blas2 : public ::testing::Test ASSERT_EQ(clsparseSuccess, status); double* vals = (double*)&CSRE::ublasDCsr.value_data()[0]; - int* rows = &CSRE::ublasDCsr.index1_data()[0]; - int* cols = &CSRE::ublasDCsr.index2_data()[0]; - for (int row = 0; row < CSRE::n_rows; row++) + clsparseIdx_t* rows = &CSRE::ublasDCsr.index1_data()[0]; + clsparseIdx_t* cols = &CSRE::ublasDCsr.index2_data()[0]; + for ( clsparseIdx_t row = 0; row < CSRE::n_rows; row++) { // Summation done using a compensated summation to decrease // summation errors from rounding. This allows us to get @@ -239,11 +239,11 @@ class Blas2 : public ::testing::Test // This method is like performing summation at quad precision and // casting down to double in the end. hY[row] *= hBeta; - int row_end = rows[row+1]; + clsparseIdx_t row_end = rows[row + 1]; double temp_sum; temp_sum = hY[row]; T sumk_err = 0.; - for (int i = rows[row]; i < rows[row+1]; i++) + for ( clsparseIdx_t i = rows[row]; i < rows[row + 1]; i++) { // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]]; temp_sum = two_sum(temp_sum, hAlpha*vals[i]*hX[cols[i]], &sumk_err); @@ -260,7 +260,7 @@ class Blas2 : public ::testing::Test uint64_t max_ulps = 0; uint64_t min_ulps = ULLONG_MAX; uint64_t total_ulps = 0; - for (int i = 0; i < hY.size(); i++) + for (size_t i = 0; i < hY.size(); i++) { long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]); intDiff = llabs(intDiff); @@ -284,7 +284,7 @@ class Blas2 : public ::testing::Test std::cout << "Double Average ulps: " << (double)total_ulps/(double)hY.size() << " (Size: " << hY.size() << ")" << std::endl; #endif - for (int i = 0; i < hY.size(); i++) + for (size_t i = 0; i < hY.size(); i++) { double compare_val = fabs(hY[i]*1e-14); if (compare_val < 10*DBL_EPSILON) @@ -294,7 +294,7 @@ class Blas2 : public ::testing::Test } else { - for (int i = 0; i < hY.size(); i++) + for (size_t i = 0; i < hY.size(); i++) { double compare_val = 0.; if (boost::math::isnormal(hY[i])) diff --git a/src/tests/test-blas3.cpp b/src/tests/test-blas3.cpp index 8c57ad9..f306ecf 100644 --- a/src/tests/test-blas3.cpp +++ b/src/tests/test-blas3.cpp @@ -119,9 +119,9 @@ class TestCSRSpGeMM : public ::testing::Test { clsparseInitCsrMatrix(&csrMatrixC); }// end - void checkRowOffsets(std::vector& amdRowPtr) + void checkRowOffsets(std::vector& amdRowPtr) { - for (int i = 0; i < amdRowPtr.size(); i++) + for (size_t i = 0; i < amdRowPtr.size(); i++) { //ASSERT_EQ(amdRowPtr[i], this->C.index1_data()[i]); //EXPECT_EQ(amdRowPtr[i], this->C.index1_data()[i]); @@ -133,7 +133,7 @@ class TestCSRSpGeMM : public ::testing::Test { } }// end - void checkInDense(std::vector& amdRowPtr, std::vector& amdColIndices, std::vector& amdVals) + void checkInDense(std::vector& amdRowPtr, std::vector& amdColIndices, std::vector& amdVals) { uBLAS::mapped_matrix sparseDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); uBLAS::mapped_matrix boostDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); @@ -143,25 +143,25 @@ class TestCSRSpGeMM : public ::testing::Test { // Therefore converting to dense and verifying the output in dense format // Convert CSR to Dense - for (int i = 0; i < amdRowPtr.size() - 1; i++) + for (size_t i = 0; i < amdRowPtr.size() - 1; i++) { // i corresponds to row index - for (int j = amdRowPtr[i]; j < amdRowPtr[i + 1]; j++) + for (size_t j = amdRowPtr[i]; j < amdRowPtr[i + 1]; j++) sparseDense(i, amdColIndices[j]) = amdVals[j]; } - for (int i = 0; i < this->C.index1_data().size() - 1; i++) + for (size_t i = 0; i < this->C.index1_data().size() - 1; i++) { - for (int j = this->C.index1_data()[i]; j < this->C.index1_data()[i + 1]; j++) + for (size_t j = this->C.index1_data()[i]; j < this->C.index1_data()[i + 1]; j++) boostDense(i, this->C.index2_data()[j]) = this->C.value_data()[j]; } bool brelativeErrorFlag = false; bool babsErrorFlag = false; - for (int i = 0; i < csrMatrixC.num_rows; i++) + for (size_t i = 0; i < csrMatrixC.num_rows; i++) { - for (int j = 0; j < csrMatrixC.num_cols; j++) + for (size_t j = 0; j < csrMatrixC.num_cols; j++) { //ASSERT_EQ(boostDense(i, j), sparseDense(i, j)); #ifdef _DEBUG_SpMxSpM_ @@ -177,9 +177,9 @@ class TestCSRSpGeMM : public ::testing::Test { } } // Relative Error - for (int i = 0; i < csrMatrixC.num_rows; i++) + for (size_t i = 0; i < csrMatrixC.num_rows; i++) { - for (int j = 0; j < csrMatrixC.num_cols; j++) + for (size_t j = 0; j < csrMatrixC.num_cols; j++) { float diff = fabs(boostDense(i, j) - sparseDense(i, j)); float ratio = diff / boostDense(i, j); @@ -225,7 +225,7 @@ TYPED_TEST(TestCSRSpGeMM, square) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; - typedef typename uBLAS::compressed_matrix > uBlasCSRM; + typedef typename uBLAS::compressed_matrix > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); @@ -245,8 +245,8 @@ TYPED_TEST(TestCSRSpGeMM, square) //std::cout << "nrows =" << (this->csrMatrixC).num_rows << std::endl; //std::cout << "nnz =" << (this->csrMatrixC).num_nonzeros << std::endl; - std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix + std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -262,14 +262,14 @@ TYPED_TEST(TestCSRSpGeMM, square) cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL); + (this->csrMatrixC).num_nonzeros * sizeof(size_t), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, - ((this->csrMatrixC).num_rows + 1) * sizeof(int), resultRowPtr.data(), 0, NULL, NULL); + ((this->csrMatrixC).num_rows + 1) * sizeof(size_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -311,13 +311,13 @@ TYPED_TEST(TestCSRSpGeMM, square) else { /* Check Col Indices */ - for (int i = 0; i < resultColIndices.size(); i++) + for (size_t i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ - for (int i = 0; i < resultVals.size(); i++) + for (size_t i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.1); @@ -346,7 +346,7 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; - typedef typename uBLAS::compressed_matrix > uBlasCSRM; + typedef typename uBLAS::compressed_matrix > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); @@ -380,8 +380,8 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) event.wait(); - std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix + std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -397,14 +397,14 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(int), resultColIndices.data(), 0, NULL, NULL); + (this->csrMatrixC).num_nonzeros * sizeof(size_t), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, - ((this->csrMatrixC).num_rows + 1) * sizeof(int), resultRowPtr.data(), 0, NULL, NULL); + ((this->csrMatrixC).num_rows + 1) * sizeof(size_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -426,13 +426,13 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) else { /* Check Col Indices */ - for (int i = 0; i < resultColIndices.size(); i++) + for (size_t i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ - for (int i = 0; i < resultVals.size(); i++) + for (size_t i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.0); @@ -620,15 +620,15 @@ TYPED_TEST(TestCSRMM, multiply) if(typeid(TypeParam) == typeid(float)) - for (int l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) - for( int i = 0; i < this->C.data().size(); i++ ) + for (size_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) + for (size_t i = 0; i < this->C.data().size(); i++) { ASSERT_NEAR(this->C.data()[i], result[i], 5e-3); } if(typeid(TypeParam) == typeid(double)) - for (int l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) - for( int i = 0; i < this->C.data().size(); i++ ) + for (size_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) + for (size_t i = 0; i < this->C.data().size(); i++) { ASSERT_NEAR(this->C.data()[i], result[i], 5e-10); }; diff --git a/src/tests/test-conversion.cpp b/src/tests/test-conversion.cpp index 2fce8fc..ee523bd 100644 --- a/src/tests/test-conversion.cpp +++ b/src/tests/test-conversion.cpp @@ -304,7 +304,7 @@ class MatrixConversion : public ::testing::Test void test_coo_to_csr() { - cl_int nnz, num_rows, num_cols; + clsparseIdx_t nnz, num_rows, num_cols; clsparseStatus status; cl_int cl_status; @@ -368,7 +368,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); // Compare values; - for (int i = 0; i < values.size(); i++) + for (clsparseIdx_t i = 0; i < values.size(); i++) EXPECT_FLOAT_EQ(values[i], CSRE::ublasSCsr.value_data()[i]); cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.colIndices, @@ -377,7 +377,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); // Compare column indices - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(col_indices[i], CSRE::ublasSCsr.index2_data()[i]); @@ -387,7 +387,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); // Compare row offsets - for (int i = 0; i < row_offsets.size(); i++) + for (clsparseIdx_t i = 0; i < row_offsets.size(); i++) ASSERT_EQ(row_offsets[i], CSRE::ublasSCsr.index1_data()[i]); } @@ -498,16 +498,16 @@ class MatrixConversion : public ::testing::Test // Generate reference. float* vals = (float*)&CSRE::ublasSCsr.value_data()[0]; - int* rows = &CSRE::ublasSCsr.index1_data()[0]; - int* cols = &CSRE::ublasSCsr.index2_data()[0]; + clsparseIdx_t* rows = &CSRE::ublasSCsr.index1_data()[0]; + clsparseIdx_t* cols = &CSRE::ublasSCsr.index2_data()[0]; - int* coo_rows = new int[CSRE::n_vals]; - int* coo_cols = new int[CSRE::n_vals]; + clsparseIdx_t* coo_rows = new clsparseIdx_t[CSRE::n_vals]; + clsparseIdx_t* coo_cols = new clsparseIdx_t[CSRE::n_vals]; float* coo_vals = new float[CSRE::n_vals]; - int total_vals = 0; - for (int row = 0; row < CSRE::n_rows; row++) + clsparseIdx_t total_vals = 0; + for ( clsparseIdx_t row = 0; row < CSRE::n_rows; row++) { - for (int i = rows[row]; i < rows[row+1]; i++) + for ( clsparseIdx_t i = rows[row]; i < rows[row + 1]; i++) { coo_rows[total_vals] = row; coo_cols[total_vals] = cols[i]; @@ -519,28 +519,28 @@ class MatrixConversion : public ::testing::Test // Compare result // Download results from GPU - std::vector row_indices(cooMatrix.num_nonzeros); - std::vector col_indices(cooMatrix.num_nonzeros); + std::vector row_indices(cooMatrix.num_nonzeros); + std::vector col_indices(cooMatrix.num_nonzeros); std::vector values(cooMatrix.num_nonzeros); // row indices cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.rowIndices, - CL_TRUE, 0, row_indices.size() * sizeof(cl_int), + CL_TRUE, 0, row_indices.size() * sizeof( clsparseIdx_t ), row_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < row_indices.size(); i++) + for (clsparseIdx_t i = 0; i < row_indices.size(); i++) ASSERT_EQ(coo_rows[i], row_indices[i]); // col indices cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.colIndices, - CL_TRUE, 0, col_indices.size() * sizeof(cl_int), + CL_TRUE, 0, col_indices.size() * sizeof( clsparseIdx_t ), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(coo_cols[i], col_indices[i]); @@ -550,7 +550,7 @@ class MatrixConversion : public ::testing::Test values.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < values.size(); i++) + for (clsparseIdx_t i = 0; i < values.size(); i++) EXPECT_FLOAT_EQ(coo_vals[i], values[i]); delete[] coo_rows; @@ -573,16 +573,16 @@ class MatrixConversion : public ::testing::Test // Generate reference; double* vals = (double*)&CSRE::ublasDCsr.value_data()[0]; - int* rows = &CSRE::ublasDCsr.index1_data()[0]; - int* cols = &CSRE::ublasDCsr.index2_data()[0]; + clsparseIdx_t* rows = &CSRE::ublasDCsr.index1_data()[0]; + clsparseIdx_t* cols = &CSRE::ublasDCsr.index2_data()[0]; - int* coo_rows = new int[CSRE::n_vals]; - int* coo_cols = new int[CSRE::n_vals]; + clsparseIdx_t* coo_rows = new clsparseIdx_t[CSRE::n_vals]; + clsparseIdx_t* coo_cols = new clsparseIdx_t[CSRE::n_vals]; double* coo_vals = new double[CSRE::n_vals]; - int total_vals = 0; - for (int row = 0; row < CSRE::n_rows; row++) + clsparseIdx_t total_vals = 0; + for ( clsparseIdx_t row = 0; row < CSRE::n_rows; row++) { - for (int i = rows[row]; i < rows[row+1]; i++) + for ( clsparseIdx_t i = rows[row]; i < rows[row + 1]; i++) { coo_rows[total_vals] = row; coo_cols[total_vals] = cols[i]; @@ -594,29 +594,29 @@ class MatrixConversion : public ::testing::Test // Compare result // Download results from GPU - std::vector row_indices(cooMatrix.num_nonzeros); - std::vector col_indices(cooMatrix.num_nonzeros); + std::vector row_indices(cooMatrix.num_nonzeros); + std::vector col_indices(cooMatrix.num_nonzeros); std::vector values(cooMatrix.num_nonzeros); // row indices cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.rowIndices, - CL_TRUE, 0, row_indices.size() * sizeof(cl_int), + CL_TRUE, 0, row_indices.size() * sizeof( clsparseIdx_t ), row_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < row_indices.size(); i++) + for (clsparseIdx_t i = 0; i < row_indices.size(); i++) ASSERT_EQ(coo_rows[i], row_indices[i]); // col indices cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.colIndices, - CL_TRUE, 0, col_indices.size() * sizeof(cl_int), + CL_TRUE, 0, col_indices.size() * sizeof( clsparseIdx_t ), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(coo_cols[i], col_indices[i]); @@ -626,7 +626,7 @@ class MatrixConversion : public ::testing::Test values.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < values.size(); i++) + for (clsparseIdx_t i = 0; i < values.size(); i++) EXPECT_DOUBLE_EQ(coo_vals[i], values[i]); delete[] coo_rows; @@ -679,8 +679,8 @@ int main (int argc, char* argv[]) //pass path to matrix as an argument, We can switch to boost po later std::string path; - double alpha; - double beta; +// double alpha; +// double beta; std::string platform; cl_platform_type pID; cl_uint dID; From c6f161daa613709063edf26032233594b2eb07ab Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 10 Dec 2015 11:23:22 -0600 Subject: [PATCH 05/19] Update the travis build to trusty 14.04 Cleaned up the build file, since trusty already includes a lot of our dependencies --- .travis.yml | 55 ++++++++++++----------------------------------------- 1 file changed, 12 insertions(+), 43 deletions(-) diff --git a/.travis.yml b/.travis.yml index a0e4032..587ff1d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,19 +9,19 @@ # Ubuntu 15.10 (Wily Werewolf) # Ubuntu 16.04 LTS (Xenial Xantus) -# language: instructs travis what compilers && environment to set up in build matrix -language: cpp - -# sudo: false instructs travis to build our project in a docker VM (faster) -# Can not yet install fglrx packages with 'false' -sudo: required # false - # os: expands the build matrix to include multiple os's -# disable linux, as we get sporadic failures on building boost, needs investigation os: - linux - osx +# sudo: false instructs travis to build our project in a docker VM (faster), +# but we cannot yet install fglrx packages with 'false' +sudo: required # false +dist: trusty + +# language: instructs travis what compilers && environment to set up in build matrix +language: cpp + # compiler: expands the build matrix to include multiple compilers (per os) compiler: - gcc @@ -35,32 +35,13 @@ addons: apt: sources: # ubuntu-toolchain-r-test contains newer versions of gcc to install - - ubuntu-toolchain-r-test + # - ubuntu-toolchain-r-test + # llvm-toolchain-precise-3.6 contains newer versions of clang to install # - llvm-toolchain-precise-3.6 - # kubuntu-backports contains newer versions of cmake to install - - kubuntu-backports - # boost-latest contains boost v1.55 - # - boost-latest packages: - # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler - - g++-4.8 - # - clang-3.6 - # We require v2.8.12 minimum - - cmake - # I'm finding problems between pre-compiled versions of boost ublas, with gtest - # stl_algobase.h: error: no matching function for call to swap() - # - libboost-program-options1.55-dev - # - libboost-serialization1.55-dev - # - libboost-filesystem1.55-dev - # - libboost-system1.55-dev - # - libboost-regex1.55-dev # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater -# - opencl-headers - # Uncomment one of the following when fglrx modules are added to the apt whitelist -# - fglrx -# - fglrx=2:8.960-0ubuntu1 -# - fglrx=2:13.350.1-0ubuntu0.0.1 + - opencl-headers # env: specifies additional global variables to define per row in build matrix env: @@ -81,10 +62,6 @@ before_install: sudo apt-get update -qq && sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1; fi - - if [ ${TRAVIS_OS_NAME} == "linux" ]; then - export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers"; - export BUILD_BOOST="ON"; - fi - if [ ${TRAVIS_OS_NAME} == "osx" ]; then brew update; brew outdated boost || brew upgrade boost; @@ -97,18 +74,10 @@ before_install: - ${CXX} --version; install: - # 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website - # Remove when the travis VM upgrades to 'trusty' or beyond - - if [ ${TRAVIS_OS_NAME} == "linux" ]; then - mkdir -p ${OPENCL_ROOT}/include/CL; - pushd ${OPENCL_ROOT}/include/CL; - wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/; - popd; - fi # osx image does not contain cl.hpp file; download from Khronos - if [ ${TRAVIS_OS_NAME} == "osx" ]; then pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/; - sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp; + sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp; popd; fi From 18f3f98b81968f66162c3ba42dd791104c4c514b Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 10 Dec 2015 13:48:59 -0600 Subject: [PATCH 06/19] Updates to the main readme to add link to wiki and license --- README.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index c6237a0..1743775 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,7 @@ Two mailing lists have been created for the clMath projects: developers interested in contributing to the library code itself ### API semantic versioning -Good software is typically the result of the loop of feedback and iteration; -software interfaces no less so. clSPARSE follows the +Good software is typically the result of iteration and feedback. clSPARSE follows the [semantic versioning](http://semver.org/) guidelines, and while the major version number remains '0', the public API should not be considered stable. We release clSPARSE as beta software (0.y.z) early to the community to elicit feedback and @@ -55,6 +54,11 @@ comment. This comes with the expectation that with feedback, we may incorporate breaking changes to the API that might require early users to recompile, or rewrite portions of their code as we iterate on the design. +## clSPARSE Wiki +The [project wiki](https://github.com/clMathLibraries/clSPARSE/wiki) contains helpful documentation. +A [build primer](https://github.com/clMathLibraries/clSPARSE/wiki/Build) is available, +which describes how to use cmake to generate platforms specific build files + ## Samples clSPARSE contains a directory of simple [OpenCL samples](./samples) that demonstrate the use of the API in both C and C++. The [superbuild](http://www.kitware.com/media/html/BuildingExternalProjectsWithCMake2.8.html) @@ -62,7 +66,7 @@ script for clSPARSE also builds the samples as an external project, to demonstra how an application would find and link to clSPARSE with cmake. ### clSPARSE library documentation -**API documentation** is now available http://clmathlibraries.github.io/clSPARSE/ . The included samples will give an excellent +**API documentation** is available at http://clmathlibraries.github.io/clSPARSE/. The samples give an excellent starting point to basic library operations. ### Contributing code @@ -72,14 +76,8 @@ how to contribute code to this open source project. Code in the when commits are merged into /master. Active development and pull-requests should be made to the **develop** branch. -## Build -clSPARSE is primarily written with C++ using C++11 core features. It does export -a 'C' interface for compatibility with other languages. - -### How to build clSPARSE for your platform -A [Build primer](https://github.com/clMathLibraries/clSPARSE/wiki/Build) is available on -the wiki, which describes how to use cmake to generate platforms specific build -files +## License +clSPARSE is licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) ### Compiling for Windows - Windows® 7/8 From 343827dcc5d9dc08974d98b526d818678e219faa Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Mon, 14 Dec 2015 13:36:32 -0600 Subject: [PATCH 07/19] Porting logic to build icd from appveyor.yml, to help us solve problems with travis migration to GCE --- .travis.yml | 34 +++++++++++++++++++++++++++++++--- appveyor.yml | 3 ++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 587ff1d..a8bfcb3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,6 +47,8 @@ addons: env: global: - CLSPARSE_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release + - OPENCL_REGISTRY=https://www.khronos.org/registry/cl + - OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl # The following filters our build matrix; we are interested in linux-gcc & osx-clang matrix: @@ -60,7 +62,8 @@ before_install: # Remove the following linux clause when fglrx can be installed with sudo: false - if [ ${TRAVIS_OS_NAME} == "linux" ]; then sudo apt-get update -qq && - sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1; + sudo apt-get install -qq libboost-all-dev; + export BUILD_BOOST="OFF"; fi - if [ ${TRAVIS_OS_NAME} == "osx" ]; then brew update; @@ -68,7 +71,7 @@ before_install: brew outdated cmake || brew upgrade cmake; export BUILD_BOOST="OFF"; fi - - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi +# - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi - cmake --version; - ${CC} --version; - ${CXX} --version; @@ -80,6 +83,30 @@ install: sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp; popd; fi + # The following linux logic is necessary because of Travis's move to the GCE platform, which does not + # currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221 + # We build our own linkable .so file + - if [ ${TRAVIS_OS_NAME} == "linux" ]; then + mkdir -p ${OPENCL_ROOT}; + pushd ${OPENCL_ROOT}; + wget ${OPENCL_REGISTRY}/specs/opencl-icd-1.2.11.0.tgz; + tar -xf opencl-icd-1.2.11.0.tgz; + mv ./icd/* .; + mkdir -p inc/CL; + pushd inc/CL; + wget -r -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/; + wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp; + popd; + mkdir -p lib; + pushd lib; + cmake -G "Unix Makefiles" ..; + make; + cp ../bin/libOpenCL.so .; + popd; + mv inc/ include/; + popd; + fi + # Use before_script: to run configure steps before_script: @@ -92,6 +119,7 @@ script: - make clSPARSE-samples - cd clSPARSE-build - make package + - popd deploy: provider: releases @@ -101,8 +129,8 @@ deploy: # This uses a personal OAuth token generated by kknox api_key: secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHmDDsdf7yRDJBegNcKfw4+m19MIvLn9fbiNVCtwCAL1T4yWkIEpi4MRMDPtftmkZPbi6UwluOJUTeCeHe4en99Yu2haemNPqXs6rR0LlXGk31GQwzlrNfb+94F5tT2a4Ka4PsruA2NMW/IYCYEE5Gu7PihVDR031Fn9cdCU9kefUgyB07rJD6q/W+ljsU0osyg7VxyfMg8rkw= - file: ${CLSPARSE_ROOT}/clSPARSE-build/*.tar.gz file_glob: true + file: ${CLSPARSE_ROOT}/clSPARSE-build/*.tar.gz on: all_branches: true tags: true diff --git a/appveyor.yml b/appveyor.yml index 2752d67..96f5a61 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -49,6 +49,7 @@ install: # The cmake build files expect a directory called inc - ps: mkdir inc/CL - ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ } + - ps: wget $opencl_registry/api/2.1/cl.hpp -OutFile inc/CL/cl.hpp # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom } # Create the static import lib in a directory called lib, so findopencl() will find it - ps: mkdir lib @@ -59,7 +60,6 @@ install: # Rename the inc directory to include, so FindOpencl() will find it - ps: ren inc include - ps: popd - - ps: popd # before_build is used to run configure steps before_build: @@ -81,6 +81,7 @@ after_build: - echo after_build step - ps: ls $env:CLSPARSE_ROOT\clSPARSE-build - ps: mv $env:CLSPARSE_ROOT\clSPARSE-build\*.zip $env:APPVEYOR_BUILD_FOLDER + - ps: popd # Appyeyor will save a copy of the package in it's personal storage artifacts: From d4dca1874d3b45b8bc18a89df86b310fe6969f50 Mon Sep 17 00:00:00 2001 From: Kiran Date: Fri, 4 Dec 2015 16:55:02 +0530 Subject: [PATCH 08/19] conversion to clsparseIdx_t changes are done Generalization to support 32/64 bit arguments Now kernel arguments are set based on sizeof(clsparseIdx_t) Kernel arguments for csrmm are made consistent with SIZE_TYPE/INDEX_TYPE.Now CSRMM test case passes changed size_t to clsparseIdx_t Changed GPUADDRESSBITS to Host size and fixed errors in testblas1 In Benchmark functions, changed size_t to clsparseIdx_t Fixed bug in reading MM reader, function MMReadMtxCrdSize Removed commented lines Changed offsets type from cl_ulong to clsparseIdx_t changed offsets and size_t to clsparseIdx_t Fixed a bug for 64-bit container in clsparse-csrmm.hpp Test solvers is fixed --- .../functions/clfunc-xSpMdM.hpp | 8 +- .../functions/clfunc_xBiCGStab.hpp | 6 +- .../clsparse-bench/functions/clfunc_xCG.hpp | 6 +- .../functions/clfunc_xCoo2Csr.hpp | 20 ++--- .../functions/clfunc_xCsr2Coo.hpp | 30 ++++---- .../functions/clfunc_xCsr2Dense.hpp | 6 +- .../functions/clfunc_xDense2Csr.hpp | 20 ++--- .../functions/clfunc_xSpMSpM.hpp | 28 +++---- src/include/clSPARSE-1x.h | 20 ++--- src/library/blas1/atomic-reduce.hpp | 8 +- src/library/blas1/cldense-axpby.hpp | 6 +- src/library/blas1/cldense-axpy.hpp | 6 +- src/library/blas1/cldense-dot.hpp | 40 +++++----- src/library/blas1/cldense-scale.hpp | 6 +- src/library/blas1/elementwise-transform.hpp | 12 +-- src/library/blas1/reduce.hpp | 8 +- src/library/blas2/csrmv-vector.hpp | 6 +- src/library/blas3/clsparse-csrmm.hpp | 4 +- src/library/include/clSPARSE-1x.hpp | 20 ++--- .../internal/data-types/clarray-base.hpp | 2 +- src/library/io/mm-reader.cpp | 9 +-- src/library/kernels/csrmm_general.cl | 8 +- .../preconditioners/preconditioner_utils.hpp | 32 ++++---- src/library/transform/clsparse-coo2csr.cpp | 16 ++-- src/library/transform/clsparse-csr2coo.cpp | 16 ++-- src/library/transform/clsparse-csr2dense.cpp | 8 +- src/library/transform/clsparse-dense2csr.cpp | 4 +- src/library/transform/conversion-utils.hpp | 8 +- src/library/transform/reduce-by-key.hpp | 10 +-- src/library/transform/scan.hpp | 6 +- src/tests/resources/matrix_utils.h | 14 ++-- src/tests/test-blas3.cpp | 66 ++++++++--------- src/tests/test-conversion.cpp | 74 +++++++++---------- 33 files changed, 264 insertions(+), 269 deletions(-) diff --git a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp index 411bc91..4da9a74 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp @@ -87,7 +87,7 @@ class xSpMdM: public clsparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return ( sizeof( size_t )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); + return (sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); } std::string bandwidth_formula( ) @@ -119,10 +119,10 @@ class xSpMdM: public clsparseFunc csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( size_t ), NULL, &status ); + csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, ( csrMtx.num_rows + 1 ) * sizeof( size_t ), NULL, &status ); + csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -204,7 +204,7 @@ class xSpMdM: public clsparseFunc if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof( size_t )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ); + clsparseIdx_t sparseBytes = sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index a9cabc4..b3a51b4 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -123,11 +123,11 @@ class xBiCGStab : public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -204,7 +204,7 @@ class xBiCGStab : public clsparseFunc if(/* gpuTimer && */cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); + clsparseIdx_t sparseBytes = sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index 89adf04..8e78b01 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -122,11 +122,11 @@ class xCG : public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -203,7 +203,7 @@ class xCG : public clsparseFunc if(/* gpuTimer && */cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseBytes = sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); + clsparseIdx_t sparseBytes = sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows); cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseBytes, "GiB/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp index 0c019d0..167ccc9 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp @@ -109,9 +109,9 @@ class xCoo2Csr: public clsparseFunc cooMatx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, cooMatx.num_nonzeros * sizeof(T), NULL, &status ); cooMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); cooMatx.rowIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); if (typeid(T) == typeid(float)) fileError = clsparseSCooMatrixfromFile( &cooMatx, path.c_str(), control, explicit_zeroes ); @@ -131,9 +131,9 @@ class xCoo2Csr: public clsparseFunc cooMatx.num_nonzeros * sizeof( T ), NULL, &status ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - cooMatx.num_nonzeros * sizeof( size_t ), NULL, &status ); + cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - ( cooMatx.num_rows + 1 ) * sizeof( size_t ), NULL, &status ); + ( cooMatx.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &status ); } @@ -149,12 +149,12 @@ class xCoo2Csr: public clsparseFunc void reset_gpu_write_buffer( ) { - size_t scalar_i = 0; + clsparseIdx_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.rowOffsets, &scalar_i, sizeof( size_t ), 0, - sizeof( size_t ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); - CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( size_t ), 0, - sizeof( size_t ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMtx.rowOffsets, &scalar_i, sizeof(clsparseIdx_t), 0, + sizeof( clsparseIdx_t ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); + CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( clsparseIdx_t ), 0, + sizeof( clsparseIdx_t ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.values, &scalar_f, sizeof( T ), 0, sizeof( T ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer values" ); } @@ -168,7 +168,7 @@ class xCoo2Csr: public clsparseFunc if( gpuTimer && cpuTimer ) { std::cout << "clSPARSE matrix: " << sparseFile << std::endl; - size_t sparseElements = n_vals; + clsparseIdx_t sparseElements = n_vals; cpuTimer->pruneOutliers( 3.0 ); cpuTimer->Print( sparseElements, "GiElements/s" ); cpuTimer->Reset( ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp index ddb4655..b6580e5 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp @@ -125,10 +125,10 @@ class xCsr2Coo : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); if (typeid(T) == typeid(float)) @@ -159,11 +159,11 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(status, "::clCreateBuffer cooMtx.values"); cooMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, - cooMtx.num_nonzeros * sizeof(size_t), NULL, &status); + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer cooMtx.colIndices"); cooMtx.rowIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, - cooMtx.num_nonzeros * sizeof(size_t), NULL, &status); + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer cooMtx.rowIndices"); }// end @@ -178,13 +178,13 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalarZero, sizeof(T), 0, cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); - size_t scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(size_t), 0, - cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + clsparseIdx_t scalarIntZero = 0; + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(size_t), 0, - cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); }// end @@ -194,13 +194,13 @@ class xCsr2Coo : public clsparseFunc CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalar, sizeof(T), 0, cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); - size_t scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(size_t), 0, - cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + clsparseIdx_t scalarIntZero = 0; + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(size_t), 0, - cooMtx.num_nonzeros * sizeof(size_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); }// end void read_gpu_buffer() @@ -228,7 +228,7 @@ class xCsr2Coo : public clsparseFunc gpuTimer->Reset(); #endif // Calculate Number of Elements transformed per unit time - size_t sparseElements = csrMtx.num_nonzeros; + clsparseIdx_t sparseElements = csrMtx.num_nonzeros; cpuTimer->pruneOutliers(3.0); cpuTimer->Print(sparseElements, "GiElements/s"); cpuTimer->Reset(); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp index 22fc45e..61073e8 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp @@ -124,10 +124,10 @@ class xCsr2Dense : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); if (typeid(T) == typeid(float)) @@ -199,7 +199,7 @@ class xCsr2Dense : public clsparseFunc gpuTimer->Reset(); #endif // Calculate Number of Elements transformed per unit time - size_t sparseElements = csrMtx.num_nonzeros; + clsparseIdx_t sparseElements = csrMtx.num_nonzeros; cpuTimer->pruneOutliers(3.0); cpuTimer->Print(sparseElements, "GiElements/s"); cpuTimer->Reset(); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp index 9cf6380..775d588 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp @@ -128,11 +128,11 @@ class xDense2Csr: public clsparseFunc CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, - (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); if(typeid(T) == typeid(float)) @@ -176,11 +176,11 @@ class xDense2Csr: public clsparseFunc CLSPARSE_V(status, "::clCreateBuffer csrMatx.values"); csrMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, - csrMtx.num_nonzeros * sizeof( size_t ), NULL, &status ); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMatx.colIndices"); csrMatx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, - (csrMtx.num_rows + 1) * sizeof( size_t ), NULL, &status ); + (csrMtx.num_rows + 1) * sizeof( clsparseIdx_t ), NULL, &status ); CLSPARSE_V(status, "::clCreateBuffer csrMatx.rowOffsets"); }// End of function @@ -194,14 +194,14 @@ class xDense2Csr: public clsparseFunc void reset_gpu_write_buffer( ) { - size_t scalar_i = 0; + clsparseIdx_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.rowOffsets, &scalar_i, sizeof(size_t), 0, - sizeof(size_t) * (csrMatx.num_rows + 1), 0, NULL, NULL), "::clEnqueueFillBuffer row"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.rowOffsets, &scalar_i, sizeof(clsparseIdx_t), 0, + sizeof(clsparseIdx_t) * (csrMatx.num_rows + 1), 0, NULL, NULL), "::clEnqueueFillBuffer row"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.colIndices, &scalar_i, sizeof(size_t), 0, - sizeof(size_t) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer col"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.colIndices, &scalar_i, sizeof(clsparseIdx_t), 0, + sizeof(clsparseIdx_t) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer col"); CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.values, &scalar_f, sizeof(T), 0, sizeof(T) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer values"); @@ -227,7 +227,7 @@ class xDense2Csr: public clsparseFunc gpuTimer->Reset( ); #endif // Calculate Number of Elements transformed per unit time - size_t sparseElements = A.num_cols * A.num_rows; + clsparseIdx_t sparseElements = A.num_cols * A.num_rows; cpuTimer->pruneOutliers(3.0); cpuTimer->Print(sparseElements, "GiElements/s"); cpuTimer->Reset(); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp index f37ce85..e810fe3 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp @@ -99,7 +99,7 @@ class xSpMSpM : public clsparseFunc { // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return (sizeof(size_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); + return (sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns(); } // end of function std::string bandwidth_formula() @@ -133,11 +133,11 @@ class xSpMSpM : public clsparseFunc { CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, - csrMtx.num_nonzeros * sizeof(size_t), NULL, &status); + csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, - (csrMtx.num_rows + 1) * sizeof(size_t), NULL, &status); + (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); #if 0 csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, @@ -241,22 +241,22 @@ class xSpMSpM : public clsparseFunc { private: void xSpMSpM_Function(bool flush); - size_t xSpMSpM_Getflopcount(void) + clsparseIdx_t xSpMSpM_Getflopcount(void) { // C = A * B // But here C = A* A, the A & B matrices are same - size_t nnzA = csrMtx.num_nonzeros; - size_t Browptrlen = csrMtx.num_rows + 1; // Number of row offsets + clsparseIdx_t nnzA = csrMtx.num_nonzeros; + clsparseIdx_t Browptrlen = csrMtx.num_rows + 1; // Number of row offsets - std::vector colIdxA(nnzA, 0); - std::vector rowptrB (Browptrlen, 0); + std::vector colIdxA(nnzA, 0); + std::vector rowptrB(Browptrlen, 0); cl_int run_status = 0; run_status = clEnqueueReadBuffer(queue, csrMtx.colIndices, CL_TRUE, 0, - nnzA*sizeof(size_t), + nnzA*sizeof(clsparseIdx_t), colIdxA.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading colIndices from GPU failed"); @@ -265,15 +265,15 @@ class xSpMSpM : public clsparseFunc { run_status = clEnqueueReadBuffer(queue, csrMtx.rowOffsets, CL_TRUE, 0, - Browptrlen*sizeof(size_t), + Browptrlen*sizeof(clsparseIdx_t), rowptrB.data(), 0, nullptr, nullptr); CLSPARSE_V(run_status, "Reading row offsets from GPU failed"); - size_t flop = 0; - for (size_t i = 0; i < nnzA; i++) + clsparseIdx_t flop = 0; + for (clsparseIdx_t i = 0; i < nnzA; i++) { - size_t colIdx = colIdxA[i]; // Get colIdx of A + clsparseIdx_t colIdx = colIdxA[i]; // Get colIdx of A flop += rowptrB[colIdx + 1] - rowptrB[colIdx]; // nnz in 'colIdx'th row of B } @@ -299,7 +299,7 @@ class xSpMSpM : public clsparseFunc { // host values T alpha; T beta; - size_t flopCnt; // Indicates total number of floating point operations + clsparseIdx_t flopCnt; // Indicates total number of floating point operations cl_bool explicit_zeroes; // OpenCL state //cl_command_queue_properties cqProp; diff --git a/src/include/clSPARSE-1x.h b/src/include/clSPARSE-1x.h index 6cc77b7..cbf89b0 100644 --- a/src/include/clSPARSE-1x.h +++ b/src/include/clSPARSE-1x.h @@ -35,7 +35,7 @@ typedef struct clsparseScalar_ /*! Given that cl_mem objects are opaque without pointer arithmetic, this offset is added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - cl_ulong offValue; + clsparseIdx_t offValue; } clsparseScalar; /*! \brief Structure to encapsulate dense vector data to clSPARSE API @@ -49,7 +49,7 @@ typedef struct cldenseVector_ /*! Given that cl_mem objects are opaque without pointer arithmetic, this offset is added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - cl_ulong offValues; + clsparseIdx_t offValues; } cldenseVector; /*! \brief Structure to encapsulate sparse matrix data encoded in CSR @@ -79,10 +79,10 @@ typedef struct clsparseCsrMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - cl_ulong offValues; - cl_ulong offColInd; - cl_ulong offRowOff; - cl_ulong offRowBlocks; + clsparseIdx_t offValues; + clsparseIdx_t offColInd; + clsparseIdx_t offRowOff; + clsparseIdx_t offRowBlocks; /**@}*/ size_t rowBlockSize; /*!< Size of array used by the rowBlocks handle */ @@ -114,9 +114,9 @@ typedef struct clsparseCooMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - cl_ulong offValues; - cl_ulong offColInd; - cl_ulong offRowInd; + clsparseIdx_t offValues; + clsparseIdx_t offColInd; + clsparseIdx_t offRowInd; /**@}*/ } clsparseCooMatrix; @@ -138,7 +138,7 @@ typedef struct cldenseMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - cl_ulong offValues; + clsparseIdx_t offValues; } cldenseMatrix; #endif diff --git a/src/library/blas1/atomic-reduce.hpp b/src/library/blas1/atomic-reduce.hpp index a6452a9..70e93b4 100644 --- a/src/library/blas1/atomic-reduce.hpp +++ b/src/library/blas1/atomic-reduce.hpp @@ -37,7 +37,7 @@ template clsparseStatus atomic_reduce(clsparseScalarPrivate* pR, const cldenseVectorPrivate* pX, - const cl_ulong wg_size, + const clsparseIdx_t wg_size, const clsparseControl control) { assert(wg_size == pX->num_values); @@ -47,7 +47,7 @@ atomic_reduce(clsparseScalarPrivate* pR, + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ReduceOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -113,7 +113,7 @@ template clsparseStatus atomic_reduce(clsparse::array_base& pR, const clsparse::array_base& pX, - const cl_ulong wg_size, + const clsparseIdx_t wg_size, const clsparseControl control) { assert(wg_size == pX.size()); @@ -123,7 +123,7 @@ atomic_reduce(clsparse::array_base& pR, + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ReduceOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; diff --git a/src/library/blas1/cldense-axpby.hpp b/src/library/blas1/cldense-axpby.hpp index d5ecc74..ad38031 100644 --- a/src/library/blas1/cldense-axpby.hpp +++ b/src/library/blas1/cldense-axpby.hpp @@ -47,7 +47,7 @@ axpby(clsparse::array_base& pR, + " -DWG_SIZE=" + std::to_string( group_size ) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -77,10 +77,10 @@ axpby(clsparse::array_base& pR, KernelWrap kWrapper(kernel); - cl_ulong size = pR.size(); + clsparseIdx_t size = pR.size(); //clsparse do not support offset; - cl_ulong offset = 0; + clsparseIdx_t offset = 0; kWrapper << size << pR.data() diff --git a/src/library/blas1/cldense-axpy.hpp b/src/library/blas1/cldense-axpy.hpp index ca2c717..413e80c 100644 --- a/src/library/blas1/cldense-axpy.hpp +++ b/src/library/blas1/cldense-axpy.hpp @@ -41,7 +41,7 @@ axpy(clsparse::array_base& pR, + " -DWG_SIZE=" + std::to_string( group_size ) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -71,8 +71,8 @@ axpy(clsparse::array_base& pR, KernelWrap kWrapper(kernel); - cl_ulong size = pR.size(); - cl_ulong offset = 0; + clsparseIdx_t size = pR.size(); + clsparseIdx_t offset = 0; kWrapper << size << pR.data() diff --git a/src/library/blas1/cldense-dot.hpp b/src/library/blas1/cldense-dot.hpp index ff77f51..1dac7b3 100644 --- a/src/library/blas1/cldense-dot.hpp +++ b/src/library/blas1/cldense-dot.hpp @@ -31,13 +31,13 @@ clsparseStatus inner_product (cldenseVectorPrivate* partial, const cldenseVectorPrivate* pX, const cldenseVectorPrivate* pY, - const cl_ulong size, - const cl_ulong REDUCE_BLOCKS_NUMBER, - const cl_ulong REDUCE_BLOCK_SIZE, + const clsparseIdx_t size, + const clsparseIdx_t REDUCE_BLOCKS_NUMBER, + const clsparseIdx_t REDUCE_BLOCK_SIZE, const clsparseControl control) { - cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; + clsparseIdx_t nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type @@ -45,7 +45,7 @@ inner_product (cldenseVectorPrivate* partial, + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -107,21 +107,21 @@ clsparseStatus dot(clsparseScalarPrivate* pR, // with REDUCE_BLOCKS_NUMBER = 256 final reduction can be performed // within one block; - const cl_ulong REDUCE_BLOCKS_NUMBER = 256; + const clsparseIdx_t REDUCE_BLOCKS_NUMBER = 256; /* For future optimisation //workgroups per compute units; const cl_uint WG_PER_CU = 64; const cl_ulong REDUCE_BLOCKS_NUMBER = control->max_compute_units * WG_PER_CU; */ - const cl_ulong REDUCE_BLOCK_SIZE = 256; + const clsparseIdx_t REDUCE_BLOCK_SIZE = 256; - cl_ulong xSize = pX->num_values - pX->offset(); - cl_ulong ySize = pY->num_values - pY->offset(); + clsparseIdx_t xSize = pX->num_values - pX->offset(); + clsparseIdx_t ySize = pY->num_values - pY->offset(); assert (xSize == ySize); - cl_ulong size = xSize; + clsparseIdx_t size = xSize; if (size > 0) @@ -163,13 +163,13 @@ clsparseStatus inner_product (clsparse::array_base& partial, const clsparse::array_base& pX, const clsparse::array_base& pY, - const cl_ulong size, - const cl_ulong REDUCE_BLOCKS_NUMBER, - const cl_ulong REDUCE_BLOCK_SIZE, + const clsparseIdx_t size, + const clsparseIdx_t REDUCE_BLOCKS_NUMBER, + const clsparseIdx_t REDUCE_BLOCK_SIZE, const clsparseControl control) { - cl_ulong nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; + clsparseIdx_t nthreads = REDUCE_BLOCK_SIZE * REDUCE_BLOCKS_NUMBER; std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type @@ -177,7 +177,7 @@ inner_product (clsparse::array_base& partial, + " -DREDUCE_BLOCK_SIZE=" + std::to_string(REDUCE_BLOCK_SIZE) + " -DN_THREADS=" + std::to_string(nthreads); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -240,21 +240,21 @@ clsparseStatus dot(clsparse::array_base& pR, // with REDUCE_BLOCKS_NUMBER = 256 final reduction can be performed // within one block; - const cl_ulong REDUCE_BLOCKS_NUMBER = 256; + const clsparseIdx_t REDUCE_BLOCKS_NUMBER = 256; /* For future optimisation //workgroups per compute units; const cl_uint WG_PER_CU = 64; const cl_ulong REDUCE_BLOCKS_NUMBER = control->max_compute_units * WG_PER_CU; */ - const cl_ulong REDUCE_BLOCK_SIZE = 256; + const clsparseIdx_t REDUCE_BLOCK_SIZE = 256; - cl_ulong xSize = pX.size(); - cl_ulong ySize = pY.size(); + clsparseIdx_t xSize = pX.size(); + clsparseIdx_t ySize = pY.size(); assert (xSize == ySize); - cl_ulong size = xSize; + clsparseIdx_t size = xSize; if (size > 0) { diff --git a/src/library/blas1/cldense-scale.hpp b/src/library/blas1/cldense-scale.hpp index df21e8b..f6ae024 100644 --- a/src/library/blas1/cldense-scale.hpp +++ b/src/library/blas1/cldense-scale.hpp @@ -40,7 +40,7 @@ scale( clsparse::array_base& pResult, + " -DVALUE_TYPE="+ OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -70,8 +70,8 @@ scale( clsparse::array_base& pResult, params); KernelWrap kWrapper(kernel); - cl_ulong size = pResult.size(); - cl_ulong offset = 0; + clsparseIdx_t size = pResult.size(); + clsparseIdx_t offset = 0; kWrapper << size << pResult.data() diff --git a/src/library/blas1/elementwise-transform.hpp b/src/library/blas1/elementwise-transform.hpp index 56a349c..06fceb5 100644 --- a/src/library/blas1/elementwise-transform.hpp +++ b/src/library/blas1/elementwise-transform.hpp @@ -56,15 +56,15 @@ elementwise_transform(cldenseVectorPrivate* r, assert(x->num_values == y->num_values); assert(x->num_values == r->num_values); - cl_ulong size = x->num_values; - cl_uint wg_size = 256; + clsparseIdx_t size = x->num_values; + clsparseIdx_t wg_size = 256; std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -135,15 +135,15 @@ elementwise_transform(clsparse::array_base& r, assert(x.size() == y.size()); assert(x.size() == r.size()); - cl_ulong size = x.size(); - cl_uint wg_size = 256; + clsparseIdx_t size = x.size(); + clsparseIdx_t wg_size = 256; std::string params = std::string() + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(wg_size) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; diff --git a/src/library/blas1/reduce.hpp b/src/library/blas1/reduce.hpp index 1987fbd..f08f24a 100644 --- a/src/library/blas1/reduce.hpp +++ b/src/library/blas1/reduce.hpp @@ -48,7 +48,7 @@ global_reduce (cldenseVectorPrivate* partial, + " -DN_THREADS=" + std::to_string(nthreads) + " -D" + ReduceOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -78,7 +78,7 @@ global_reduce (cldenseVectorPrivate* partial, KernelWrap kWrapper(kernel); - kWrapper << (cl_ulong)pX->num_values + kWrapper << pX->num_values << pX->values << partial->values; @@ -188,7 +188,7 @@ global_reduce (clsparse::array_base& partial, + " -DN_THREADS=" + std::to_string(nthreads) + " -D" + ReduceOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -218,7 +218,7 @@ global_reduce (clsparse::array_base& partial, KernelWrap kWrapper(kernel); - cl_ulong size = pX.size(); + clsparseIdx_t size = pX.size(); kWrapper << size << pX.data() diff --git a/src/library/blas2/csrmv-vector.hpp b/src/library/blas2/csrmv-vector.hpp index cdc8ab5..fa26fc8 100644 --- a/src/library/blas2/csrmv-vector.hpp +++ b/src/library/blas2/csrmv-vector.hpp @@ -50,7 +50,7 @@ csrmv_vector(const clsparseScalarPrivate* pAlpha, std::string params = std::string() + + " -DVALUE_TYPE=" + OclTypeTraits::type - + " -DSIZE_TYPE=" + OclTypeTraits::type + + " -DSIZE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(group_size) + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); @@ -155,7 +155,7 @@ csrmv_vector(const clsparse::array_base& pAlpha, + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -190,7 +190,7 @@ csrmv_vector(const clsparse::array_base& pAlpha, params); KernelWrap kWrapper(kernel); - cl_ulong offset = 0; + clsparseIdx_t offset = 0; kWrapper << pMatx->num_rows << pAlpha.data() << offset diff --git a/src/library/blas3/clsparse-csrmm.hpp b/src/library/blas3/clsparse-csrmm.hpp index c5a0b21..e060454 100644 --- a/src/library/blas3/clsparse-csrmm.hpp +++ b/src/library/blas3/clsparse-csrmm.hpp @@ -108,12 +108,12 @@ const clsparseControl control ) if( nnz_per_row < 4 ) { subwave_size = 2; } std::string params = std::string( ) + - + " -DVALUE_TYPE=" + OclTypeTraits::type + + " -DVALUE_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string( group_size ) + " -DWAVE_SIZE=" + std::to_string( wave_size ) + " -DSUBWAVE_SIZE=" + std::to_string( subwave_size ); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index dcfbd38..88e4b59 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -164,7 +164,7 @@ class clsparseScalarPrivate: public clsparseScalar offValue = 0; } - cl_ulong offset () const + clsparseIdx_t offset() const { return offValue; } @@ -180,7 +180,7 @@ class cldenseVectorPrivate: public cldenseVector offValues = 0; } - cl_ulong offset () const + clsparseIdx_t offset() const { return offValues; } @@ -201,22 +201,22 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix return num_nonzeros / num_rows; } - cl_ulong valOffset () const + clsparseIdx_t valOffset() const { return offValues; } - cl_ulong colIndOffset () const + clsparseIdx_t colIndOffset() const { return offColInd; } - cl_ulong rowOffOffset () const + clsparseIdx_t rowOffOffset() const { return offRowOff; } - cl_ulong rowBlocksOffset( ) const + clsparseIdx_t rowBlocksOffset() const { return offRowBlocks; } @@ -238,17 +238,17 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix return num_nonzeros / num_rows; } - cl_ulong valOffset( ) const + clsparseIdx_t valOffset() const { return offValues; } - cl_ulong colIndOffset( ) const + clsparseIdx_t colIndOffset() const { return offColInd; } - cl_ulong rowOffOffset( ) const + clsparseIdx_t rowOffOffset() const { return offRowInd; } @@ -265,7 +265,7 @@ class cldenseMatrixPrivate: public cldenseMatrix values = nullptr; } - cl_ulong offset( ) const + clsparseIdx_t offset() const { return offValues; } diff --git a/src/library/internal/data-types/clarray-base.hpp b/src/library/internal/data-types/clarray-base.hpp index 75c0efd..68c9594 100644 --- a/src/library/internal/data-types/clarray-base.hpp +++ b/src/library/internal/data-types/clarray-base.hpp @@ -46,7 +46,7 @@ class array_base public: typedef T value_type; - typedef cl_ulong size_type; + typedef clsparseIdx_t size_type; //typedef typename clContainer::type BUFF_TYPE; typedef typename clContainer::type BUFF_TYPE; diff --git a/src/library/io/mm-reader.cpp b/src/library/io/mm-reader.cpp index 728fd14..7c695a9 100644 --- a/src/library/io/mm-reader.cpp +++ b/src/library/io/mm-reader.cpp @@ -411,17 +411,12 @@ int MatrixMarketReader::MMReadMtxCrdSize( FILE *infile ) } while( line[ 0 ] == '%' ); /* line[] is either blank or has M,N, nz */ -#if defined( _WIN32 ) || defined(_WIN64) - // if( sscanf( line, "%Iu %Iu %Iu", &nRows, &nCols, &nNZ ) == 3 ) // Not working I don't know why? std::stringstream s(line); nRows = 0; nCols = 0; nNZ = 0; s >> nRows >> nCols >> nNZ; - if (nRows && nCols && nNZ ) -#else - if( sscanf( line, "%zu %zu %zu", &nRows, &nCols, &nNZ ) == 3 ) -#endif + if (nRows && nCols && nNZ) return 0; else do @@ -822,4 +817,4 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl // return clsparseSuccess; //} -#pragma warning( pop ) \ No newline at end of file +#pragma warning( pop ) diff --git a/src/library/kernels/csrmm_general.cl b/src/library/kernels/csrmm_general.cl index 87f3a65..f21d72e 100644 --- a/src/library/kernels/csrmm_general.cl +++ b/src/library/kernels/csrmm_general.cl @@ -145,14 +145,14 @@ void csrmv_batched( const INDEX_TYPE num_rows, global const INDEX_TYPE * const restrict col, global const VALUE_TYPE * const restrict val, global const VALUE_TYPE * const restrict denseB, - const SIZE_TYPE ldB, + const INDEX_TYPE ldB, const SIZE_TYPE off_B, global const VALUE_TYPE * const beta, const SIZE_TYPE off_beta, global VALUE_TYPE * restrict denseC, - const SIZE_TYPE num_rows_C, - const SIZE_TYPE num_cols_C, - const SIZE_TYPE ldC, + const INDEX_TYPE num_rows_C, + const INDEX_TYPE num_cols_C, + const INDEX_TYPE ldC, const SIZE_TYPE off_C ) { local VALUE_TYPE sdata[ WG_SIZE + SUBWAVE_SIZE / 2 ]; diff --git a/src/library/solvers/preconditioners/preconditioner_utils.hpp b/src/library/solvers/preconditioners/preconditioner_utils.hpp index 901431d..47eb320 100644 --- a/src/library/solvers/preconditioners/preconditioner_utils.hpp +++ b/src/library/solvers/preconditioners/preconditioner_utils.hpp @@ -48,12 +48,12 @@ extract_diagonal(cldenseVectorPrivate* pDiag, assert (pDiag->num_values == std::min(pA->num_rows, pA->num_cols)); - cl_ulong wg_size = 256; - cl_ulong size = pA->num_rows; + clsparseIdx_t wg_size = 256; + clsparseIdx_t size = pA->num_rows; - cl_ulong nnz_per_row = pA->nnz_per_row(); - cl_ulong wave_size = control->wavefront_size; - cl_ulong subwave_size = wave_size; + clsparseIdx_t nnz_per_row = pA->nnz_per_row(); + clsparseIdx_t wave_size = control->wavefront_size; + clsparseIdx_t subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix @@ -74,7 +74,7 @@ extract_diagonal(cldenseVectorPrivate* pDiag, + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -115,9 +115,9 @@ extract_diagonal(cldenseVectorPrivate* pDiag, << pA->colIndices << pA->values; - cl_uint predicted = subwave_size * size; + clsparseIdx_t predicted = subwave_size * size; - cl_uint global_work_size = + clsparseIdx_t global_work_size = wg_size * ((predicted + wg_size - 1 ) / wg_size); cl::NDRange local(wg_size); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); @@ -157,12 +157,12 @@ extract_diagonal(clsparse::vector& pDiag, assert( pDiag.size( ) == std::min( pA->num_cols, pA->num_rows ) ); - cl_ulong wg_size = 256; - cl_ulong size = pA->num_rows; + clsparseIdx_t wg_size = 256; + clsparseIdx_t size = pA->num_rows; - cl_ulong nnz_per_row = pA->nnz_per_row(); - cl_ulong wave_size = control->wavefront_size; - cl_ulong subwave_size = wave_size; + clsparseIdx_t nnz_per_row = pA->nnz_per_row(); + clsparseIdx_t wave_size = control->wavefront_size; + clsparseIdx_t subwave_size = wave_size; // adjust subwave_size according to nnz_per_row; // each wavefron will be assigned to the row of the csr matrix @@ -183,7 +183,7 @@ extract_diagonal(clsparse::vector& pDiag, + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -224,9 +224,9 @@ extract_diagonal(clsparse::vector& pDiag, << pA->colIndices << pA->values; - size_t predicted = subwave_size * size; + clsparseIdx_t predicted = subwave_size * size; - size_t global_work_size = + clsparseIdx_t global_work_size = wg_size * ((predicted + wg_size - 1 ) / wg_size); cl::NDRange local(wg_size); //cl::NDRange global(predicted > local[0] ? predicted : local[0]); diff --git a/src/library/transform/clsparse-coo2csr.cpp b/src/library/transform/clsparse-coo2csr.cpp index 05aa49f..8c1a7da 100644 --- a/src/library/transform/clsparse-coo2csr.cpp +++ b/src/library/transform/clsparse-coo2csr.cpp @@ -40,12 +40,12 @@ clsparseScoo2csr (const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; @@ -80,12 +80,12 @@ clsparseDcoo2csr ( const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; diff --git a/src/library/transform/clsparse-csr2coo.cpp b/src/library/transform/clsparse-csr2coo.cpp index a17be04..f566334 100644 --- a/src/library/transform/clsparse-csr2coo.cpp +++ b/src/library/transform/clsparse-csr2coo.cpp @@ -40,12 +40,12 @@ clsparseScsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; @@ -80,12 +80,12 @@ clsparseDcsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; diff --git a/src/library/transform/clsparse-csr2dense.cpp b/src/library/transform/clsparse-csr2dense.cpp index 38e59b4..9a1f663 100644 --- a/src/library/transform/clsparse-csr2dense.cpp +++ b/src/library/transform/clsparse-csr2dense.cpp @@ -50,8 +50,8 @@ clsparseScsr2dense(const clsparseCsrMatrix* csr, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); @@ -94,8 +94,8 @@ cldenseMatrix* A, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); + clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); diff --git a/src/library/transform/clsparse-dense2csr.cpp b/src/library/transform/clsparse-dense2csr.cpp index e0b7d9e..2fbc4c0 100644 --- a/src/library/transform/clsparse-dense2csr.cpp +++ b/src/library/transform/clsparse-dense2csr.cpp @@ -29,7 +29,7 @@ clsparseSdense2csr(const cldenseMatrix* A, clsparseCsrMatrix* csr, { typedef cl_float ValueType; typedef clsparseIdx_t IndexType; - typedef cl_ulong SizeType; + typedef clsparseIdx_t SizeType; if (!clsparseInitialized) { @@ -100,7 +100,7 @@ clsparseDdense2csr(const cldenseMatrix* A, { typedef cl_double ValueType; typedef clsparseIdx_t IndexType; - typedef cl_ulong SizeType; + typedef clsparseIdx_t SizeType; if (!clsparseInitialized) { diff --git a/src/library/transform/conversion-utils.hpp b/src/library/transform/conversion-utils.hpp index 7e2c8eb..869ce38 100644 --- a/src/library/transform/conversion-utils.hpp +++ b/src/library/transform/conversion-utils.hpp @@ -147,7 +147,7 @@ offsets_to_indices(clsparse::vector& indices, + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -252,7 +252,7 @@ transform_csr_2_dense(/*csr matrix*/ + " -DWAVE_SIZE=" + std::to_string(wave_size) + " -DSUBWAVE_SIZE=" + std::to_string(subwave_size); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -333,7 +333,7 @@ calculate_num_nonzeros(/*dense matrix*/ + " -DWG_SIZE=" + std::to_string(workgroup_size) + " -DSUBWAVE_SIZE=" + std::to_string(2); //required by program; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type @@ -439,7 +439,7 @@ dense_to_coo(clsparseCooMatrix* coo, + " -DWG_SIZE=" + std::to_string(workgroup_size) + " -DSUBWAVE_SIZE=" + std::to_string(2); //required by program; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DINDEX_TYPE=" + OclTypeTraits::type diff --git a/src/library/transform/reduce-by-key.hpp b/src/library/transform/reduce-by-key.hpp index 27c2a6e..b6bdaae 100644 --- a/src/library/transform/reduce-by-key.hpp +++ b/src/library/transform/reduce-by-key.hpp @@ -93,7 +93,7 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -163,7 +163,7 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -226,7 +226,7 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -288,7 +288,7 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -347,7 +347,7 @@ reduce_by_key( KeyVector& keys_output, ValueVector& values_output, + " -DKEY_TYPE=" + OclTypeTraits::type + " -DWG_SIZE=" + std::to_string(kernel_WgSize); - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; diff --git a/src/library/transform/scan.hpp b/src/library/transform/scan.hpp index fe9499e..fd4d956 100644 --- a/src/library/transform/scan.hpp +++ b/src/library/transform/scan.hpp @@ -105,7 +105,7 @@ scan(VectorType& output, const VectorType& input, + " -DWG_SIZE=" + std::to_string(kernel0_WgSize) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -170,7 +170,7 @@ scan(VectorType& output, const VectorType& input, + " -DWG_SIZE=" + std::to_string(kernel1_WgSize) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; @@ -228,7 +228,7 @@ scan(VectorType& output, const VectorType& input, + " -DWG_SIZE=" + std::to_string(kernel1_WgSize) + " -D" + ElementWiseOperatorTrait::operation; - if (control->addressBits == GPUADDRESS64WORD) + if (sizeof(clsparseIdx_t) == 8) { std::string options = std::string() + " -DSIZE_TYPE=" + OclTypeTraits::type; diff --git a/src/tests/resources/matrix_utils.h b/src/tests/resources/matrix_utils.h index 6afd530..76b5c6e 100644 --- a/src/tests/resources/matrix_utils.h +++ b/src/tests/resources/matrix_utils.h @@ -32,15 +32,15 @@ class matrix data.clear( ); } - matrix( size_t rows, size_t cols, size_t ld ): num_rows( rows ), num_cols( cols ), leading_dim( ld ) + matrix(clsparseIdx_t rows, clsparseIdx_t cols, clsparseIdx_t ld) : num_rows(rows), num_cols(cols), leading_dim(ld) { data.resize( num_rows * leading_dim ); } std::vector< T > data; - size_t num_rows; - size_t num_cols; - size_t leading_dim; + clsparseIdx_t num_rows; + clsparseIdx_t num_cols; + clsparseIdx_t leading_dim; }; // convert row indices vector to csr row_offsets vector @@ -74,7 +74,7 @@ void indicesToOffsets(const std::vector& row_indices, // lenght of offsets == n_rows+1; template void offsetsToIndices(const std::vector& row_offsets, - const size_t n_rows, + const clsparseIdx_t n_rows, std::vector& row_indices) { INDEX_TYPE nnz = row_offsets[n_rows]; @@ -82,7 +82,7 @@ void offsetsToIndices(const std::vector& row_offsets, if(row_indices.size() != nnz) row_indices.resize(nnz); - for( size_t i = 0; i < n_rows; i++ ) + for (clsparseIdx_t i = 0; i < n_rows; i++) for (clsparseIdx_t j = row_offsets[i]; j < row_offsets[i+1]; j++) row_indices[j] = i; @@ -98,7 +98,7 @@ void sortByRowCol(std::vector& rows, { typedef std::tuple Element; - size_t size = vals.size( ); + clsparseIdx_t size = vals.size(); std::vector matrix;//(size); diff --git a/src/tests/test-blas3.cpp b/src/tests/test-blas3.cpp index f306ecf..cf82e79 100644 --- a/src/tests/test-blas3.cpp +++ b/src/tests/test-blas3.cpp @@ -119,9 +119,9 @@ class TestCSRSpGeMM : public ::testing::Test { clsparseInitCsrMatrix(&csrMatrixC); }// end - void checkRowOffsets(std::vector& amdRowPtr) + void checkRowOffsets(std::vector& amdRowPtr) { - for (size_t i = 0; i < amdRowPtr.size(); i++) + for (clsparseIdx_t i = 0; i < amdRowPtr.size(); i++) { //ASSERT_EQ(amdRowPtr[i], this->C.index1_data()[i]); //EXPECT_EQ(amdRowPtr[i], this->C.index1_data()[i]); @@ -133,7 +133,7 @@ class TestCSRSpGeMM : public ::testing::Test { } }// end - void checkInDense(std::vector& amdRowPtr, std::vector& amdColIndices, std::vector& amdVals) + void checkInDense(std::vector& amdRowPtr, std::vector& amdColIndices, std::vector& amdVals) { uBLAS::mapped_matrix sparseDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); uBLAS::mapped_matrix boostDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); @@ -143,25 +143,25 @@ class TestCSRSpGeMM : public ::testing::Test { // Therefore converting to dense and verifying the output in dense format // Convert CSR to Dense - for (size_t i = 0; i < amdRowPtr.size() - 1; i++) + for (clsparseIdx_t i = 0; i < amdRowPtr.size() - 1; i++) { // i corresponds to row index - for (size_t j = amdRowPtr[i]; j < amdRowPtr[i + 1]; j++) + for (clsparseIdx_t j = amdRowPtr[i]; j < amdRowPtr[i + 1]; j++) sparseDense(i, amdColIndices[j]) = amdVals[j]; } - for (size_t i = 0; i < this->C.index1_data().size() - 1; i++) + for (clsparseIdx_t i = 0; i < this->C.index1_data().size() - 1; i++) { - for (size_t j = this->C.index1_data()[i]; j < this->C.index1_data()[i + 1]; j++) + for (clsparseIdx_t j = this->C.index1_data()[i]; j < this->C.index1_data()[i + 1]; j++) boostDense(i, this->C.index2_data()[j]) = this->C.value_data()[j]; } bool brelativeErrorFlag = false; bool babsErrorFlag = false; - for (size_t i = 0; i < csrMatrixC.num_rows; i++) + for (clsparseIdx_t i = 0; i < csrMatrixC.num_rows; i++) { - for (size_t j = 0; j < csrMatrixC.num_cols; j++) + for (clsparseIdx_t j = 0; j < csrMatrixC.num_cols; j++) { //ASSERT_EQ(boostDense(i, j), sparseDense(i, j)); #ifdef _DEBUG_SpMxSpM_ @@ -177,9 +177,9 @@ class TestCSRSpGeMM : public ::testing::Test { } } // Relative Error - for (size_t i = 0; i < csrMatrixC.num_rows; i++) + for (clsparseIdx_t i = 0; i < csrMatrixC.num_rows; i++) { - for (size_t j = 0; j < csrMatrixC.num_cols; j++) + for (clsparseIdx_t j = 0; j < csrMatrixC.num_cols; j++) { float diff = fabs(boostDense(i, j) - sparseDense(i, j)); float ratio = diff / boostDense(i, j); @@ -225,7 +225,7 @@ TYPED_TEST(TestCSRSpGeMM, square) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; - typedef typename uBLAS::compressed_matrix > uBlasCSRM; + typedef typename uBLAS::compressed_matrix > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); @@ -245,8 +245,8 @@ TYPED_TEST(TestCSRSpGeMM, square) //std::cout << "nrows =" << (this->csrMatrixC).num_rows << std::endl; //std::cout << "nnz =" << (this->csrMatrixC).num_nonzeros << std::endl; - std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix + std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -262,14 +262,14 @@ TYPED_TEST(TestCSRSpGeMM, square) cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(size_t), resultColIndices.data(), 0, NULL, NULL); + (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, - ((this->csrMatrixC).num_rows + 1) * sizeof(size_t), resultRowPtr.data(), 0, NULL, NULL); + ((this->csrMatrixC).num_rows + 1) * sizeof(clsparseIdx_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -311,13 +311,13 @@ TYPED_TEST(TestCSRSpGeMM, square) else { /* Check Col Indices */ - for (size_t i = 0; i < resultColIndices.size(); i++) + for (clsparseIdx_t i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ - for (size_t i = 0; i < resultVals.size(); i++) + for (clsparseIdx_t i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.1); @@ -346,7 +346,7 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) { using SPER = CSRSparseEnvironment; using CLSE = ClSparseEnvironment; - typedef typename uBLAS::compressed_matrix > uBlasCSRM; + typedef typename uBLAS::compressed_matrix > uBlasCSRM; cl::Event event; clsparseEnableAsync(CLSE::control, true); @@ -356,8 +356,8 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) std::vector tmpArray; tmpArray.resize(SPER::csrSMatrix.num_nonzeros); - objFillVals.fillMtxTwoPowers(tmpArray.data(), tmpArray.size()); - //objFillVals.fillMtxOnes(tmpArray.data(), tmpArray.size()); + //objFillVals.fillMtxTwoPowers(tmpArray.data(), tmpArray.size()); + objFillVals.fillMtxOnes(tmpArray.data(), tmpArray.size()); // Fill ublas scr with the same matrix values for (size_t i = 0; i < tmpArray.size(); i++) @@ -380,8 +380,8 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) event.wait(); - std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix + std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -397,14 +397,14 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(size_t), resultColIndices.data(), 0, NULL, NULL); + (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultColIndices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, this->csrMatrixC.rowOffsets, CL_TRUE, 0, - ((this->csrMatrixC).num_rows + 1) * sizeof(size_t), resultRowPtr.data(), 0, NULL, NULL); + ((this->csrMatrixC).num_rows + 1) * sizeof(clsparseIdx_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -426,13 +426,13 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) else { /* Check Col Indices */ - for (size_t i = 0; i < resultColIndices.size(); i++) + for (clsparseIdx_t i = 0; i < resultColIndices.size(); i++) { ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); } /* Check Values */ - for (size_t i = 0; i < resultVals.size(); i++) + for (clsparseIdx_t i = 0; i < resultVals.size(); i++) { //TODO: how to define the tolerance ASSERT_NEAR(resultVals[i], this->C.value_data()[i], 0.0); @@ -441,13 +441,13 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size()); //Rest of the col_indices should be zero - for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) + for (clsparseIdx_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) { ASSERT_EQ(0, this->C.index2_data()[i]); } // Rest of the values should be zero - for (size_t i = resultVals.size(); i < this->C.value_data().size(); i++) + for (clsparseIdx_t i = resultVals.size(); i < this->C.value_data().size(); i++) { ASSERT_EQ(0, this->C.value_data()[i]); } @@ -620,15 +620,15 @@ TYPED_TEST(TestCSRMM, multiply) if(typeid(TypeParam) == typeid(float)) - for (size_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) - for (size_t i = 0; i < this->C.data().size(); i++) + for (clsparseIdx_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) + for (clsparseIdx_t i = 0; i < this->C.data().size(); i++) { ASSERT_NEAR(this->C.data()[i], result[i], 5e-3); } if(typeid(TypeParam) == typeid(double)) - for (size_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) - for (size_t i = 0; i < this->C.data().size(); i++) + for (clsparseIdx_t l = 0; l < std::min(this->C.size1(), this->C.size2()); l++) + for (clsparseIdx_t i = 0; i < this->C.data().size(); i++) { ASSERT_NEAR(this->C.data()[i], result[i], 5e-10); }; diff --git a/src/tests/test-conversion.cpp b/src/tests/test-conversion.cpp index ee523bd..d17e863 100644 --- a/src/tests/test-conversion.cpp +++ b/src/tests/test-conversion.cpp @@ -106,7 +106,7 @@ class MatrixConversion : public ::testing::Test result.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for(int i = 0; i < ublas_dense.data().size(); i++) + for (clsparseIdx_t i = 0; i < ublas_dense.data().size(); i++) { // there should be exactly the same data ASSERT_NEAR(ublas_dense.data()[i], result[i], 1e-7); @@ -131,7 +131,7 @@ class MatrixConversion : public ::testing::Test result.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for(int i = 0; i < ublas_dense.data().size(); i++) + for (clsparseIdx_t i = 0; i < ublas_dense.data().size(); i++) { // there should be exactly the same data ASSERT_NEAR(ublas_dense.data()[i], result[i], 1e-14); @@ -164,11 +164,11 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); csrMatx.rowOffsets = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, - CSRE::ublasSCsr.index1_data().size() * sizeof( cl_int ), NULL, &cl_status ); + CSRE::ublasSCsr.index1_data().size() * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); csrMatx.colIndices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, - CSRE::ublasSCsr.index2_data().size() * sizeof( cl_int ), NULL, &cl_status ); + CSRE::ublasSCsr.index2_data().size() * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -180,8 +180,8 @@ class MatrixConversion : public ::testing::Test //Compare // Download GPU data - std::vector row_offsets(CSRE::ublasSCsr.index1_data().size()); - std::vector col_indices(CSRE::ublasSCsr.index2_data().size()); + std::vector row_offsets(CSRE::ublasSCsr.index1_data().size()); + std::vector col_indices(CSRE::ublasSCsr.index2_data().size()); std::vector values(CSRE::ublasSCsr.value_data().size()); @@ -190,27 +190,27 @@ class MatrixConversion : public ::testing::Test values.size() * sizeof(T), values.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < values.size(); i++) + for (clsparseIdx_t i = 0; i < values.size(); i++) ASSERT_NEAR(CSRE::ublasSCsr.value_data()[i], values[i], 1e-7); // Compare row_offsets cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.rowOffsets, CL_TRUE, 0, - row_offsets.size() * sizeof(cl_int), row_offsets.data(), 0, nullptr, nullptr); + row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < row_offsets.size(); i++) + for (clsparseIdx_t i = 0; i < row_offsets.size(); i++) ASSERT_EQ(CSRE::ublasSCsr.index1_data()[i], row_offsets[i]); // Compare col indices cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.colIndices, CL_TRUE, 0, - col_indices.size() * sizeof(cl_int), col_indices.data(), 0, nullptr, nullptr); + col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(CSRE::ublasSCsr.index2_data()[i], col_indices[i]); // Release csrMatrix data @@ -243,11 +243,11 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); csrMatx.rowOffsets = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, - ( CSRE::csrDMatrix.num_rows + 1 ) * sizeof( cl_int ), NULL, &cl_status ); + ( CSRE::csrDMatrix.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); csrMatx.colIndices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, - CSRE::csrDMatrix.num_nonzeros * sizeof( cl_int ), NULL, &cl_status ); + CSRE::csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -259,8 +259,8 @@ class MatrixConversion : public ::testing::Test //Compare // Download GPU data - std::vector row_offsets(CSRE::csrDMatrix.num_rows + 1); - std::vector col_indices(CSRE::csrDMatrix.num_nonzeros); + std::vector row_offsets(CSRE::csrDMatrix.num_rows + 1); + std::vector col_indices(CSRE::csrDMatrix.num_nonzeros); std::vector values (CSRE::csrDMatrix.num_nonzeros); // Compare values @@ -268,29 +268,29 @@ class MatrixConversion : public ::testing::Test values.size() * sizeof(T), values.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < values.size(); i++) -{ + for (clsparseIdx_t i = 0; i < values.size(); i++) + { EXPECT_DOUBLE_EQ(CSRE::ublasDCsr.value_data()[i], values[i]); -} + } // Compare row_offsets cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.rowOffsets, CL_TRUE, 0, - row_offsets.size() * sizeof(cl_int), row_offsets.data(), 0, nullptr, nullptr); + row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < row_offsets.size(); i++) + for (clsparseIdx_t i = 0; i < row_offsets.size(); i++) ASSERT_EQ(CSRE::ublasDCsr.index1_data()[i], row_offsets[i]); // Compare col indices cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.colIndices, CL_TRUE, 0, - col_indices.size() * sizeof(cl_int), col_indices.data(), 0, nullptr, nullptr); + col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(CSRE::ublasDCsr.index2_data()[i], col_indices[i]); // Release csrMatrix data @@ -323,13 +323,13 @@ class MatrixConversion : public ::testing::Test cooMatrix.colIndices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_ONLY, - cooMatrix.num_nonzeros * sizeof(cl_int), + cooMatrix.num_nonzeros * sizeof(clsparseIdx_t), NULL, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); cooMatrix.rowIndices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_ONLY, - cooMatrix.num_nonzeros * sizeof(cl_int), + cooMatrix.num_nonzeros * sizeof(clsparseIdx_t), NULL, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -357,8 +357,8 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(clsparseSuccess, status); // Compare newly generated results with the uBLAS matrix from CSRE - std::vector row_offsets(CSRE::csrSMatrix.num_rows + 1); - std::vector col_indices(CSRE::csrSMatrix.num_nonzeros); + std::vector row_offsets(CSRE::csrSMatrix.num_rows + 1); + std::vector col_indices(CSRE::csrSMatrix.num_nonzeros); std::vector values (CSRE::csrSMatrix.num_nonzeros); //Download GPU data to vectors; @@ -372,7 +372,7 @@ class MatrixConversion : public ::testing::Test EXPECT_FLOAT_EQ(values[i], CSRE::ublasSCsr.value_data()[i]); cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.colIndices, - CL_TRUE, 0, col_indices.size() * sizeof(cl_int), + CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -382,7 +382,7 @@ class MatrixConversion : public ::testing::Test cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.rowOffsets, - CL_TRUE, 0, row_offsets.size() * sizeof(cl_int), + CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -412,8 +412,8 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(clsparseSuccess, status); // Compare newly generated results with the uBLAS matrix from CSRE - std::vector row_offsets(CSRE::csrDMatrix.num_rows + 1); - std::vector col_indices(CSRE::csrDMatrix.num_nonzeros); + std::vector row_offsets(CSRE::csrDMatrix.num_rows + 1); + std::vector col_indices(CSRE::csrDMatrix.num_nonzeros); std::vector values (CSRE::csrDMatrix.num_nonzeros); //Download GPU data to vectors; @@ -423,27 +423,27 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(CL_SUCCESS, cl_status); // Compare values; - for (int i = 0; i < values.size(); i++) + for (clsparseIdx_t i = 0; i < values.size(); i++) EXPECT_DOUBLE_EQ(values[i], CSRE::ublasDCsr.value_data()[i]); cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.colIndices, - CL_TRUE, 0, col_indices.size() * sizeof(cl_int), + CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); // Compare column indices - for (int i = 0; i < col_indices.size(); i++) + for (clsparseIdx_t i = 0; i < col_indices.size(); i++) ASSERT_EQ(col_indices[i], CSRE::ublasDCsr.index2_data()[i]); cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.rowOffsets, - CL_TRUE, 0, row_offsets.size() * sizeof(cl_int), + CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); // Compare row offsets - for (int i = 0; i < row_offsets.size(); i++) + for (clsparseIdx_t i = 0; i < row_offsets.size(); i++) ASSERT_EQ(row_offsets[i], CSRE::ublasDCsr.index1_data()[i]); } @@ -471,13 +471,13 @@ class MatrixConversion : public ::testing::Test cooMatrix.colIndices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, - CSRE::csrSMatrix.num_nonzeros * sizeof(cl_int), + CSRE::csrSMatrix.num_nonzeros * sizeof(clsparseIdx_t), nullptr, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); cooMatrix.rowIndices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, - CSRE::csrSMatrix.num_nonzeros * sizeof(cl_int), + CSRE::csrSMatrix.num_nonzeros * sizeof(clsparseIdx_t), nullptr, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); From 2bbd48273ec8f3659a3145474f40bd2fcdeb6d96 Mon Sep 17 00:00:00 2001 From: Kiran Date: Thu, 17 Dec 2015 14:29:33 +0530 Subject: [PATCH 09/19] Unnecessary memory allocations removed Removed dense matrix allocations for non dense matrix conversion routines in test-conversion.cpp and fixed issue 177 in atomic-reduce.hpp fixed issue 177 for dense2csr Fixed build errors due to new user defined type clsparseIdx_t --- samples/sample-cg.cpp | 6 ++-- samples/sample-spmv.cpp | 6 ++-- src/library/blas1/atomic-reduce.hpp | 4 +-- src/tests/test-conversion.cpp | 51 ++++++++++++++++++++++++++++- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/samples/sample-cg.cpp b/samples/sample-cg.cpp index 74d18e4..d0c9590 100644 --- a/samples/sample-cg.cpp +++ b/samples/sample-cg.cpp @@ -166,7 +166,7 @@ int main (int argc, char* argv[]) // Read matrix from file. Calculates the rowBlocks strucutres as well. - int nnz, row, col; + clsparseIdx_t nnz, row, col; // read MM header to get the size of the matrix; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, matrix_path.c_str( ) ); @@ -186,10 +186,10 @@ int main (int argc, char* argv[]) A.num_nonzeros * sizeof( float ), NULL, &cl_status ); A.colIndices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - A.num_nonzeros * sizeof( cl_int ), NULL, &cl_status ); + A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - ( A.num_rows + 1 ) * sizeof( cl_int ), NULL, &cl_status ); + ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); diff --git a/samples/sample-spmv.cpp b/samples/sample-spmv.cpp index 81e4eb4..8313b1e 100644 --- a/samples/sample-spmv.cpp +++ b/samples/sample-spmv.cpp @@ -198,7 +198,7 @@ int main (int argc, char* argv[]) // Read matrix from file. Calculates the rowBlocks strucutres as well. - int nnz, row, col; + clsparseIdx_t nnz, row, col; // read MM header to get the size of the matrix; clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, matrix_path.c_str( ) ); @@ -218,10 +218,10 @@ int main (int argc, char* argv[]) A.num_nonzeros * sizeof( float ), NULL, &cl_status ); A.colIndices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - A.num_nonzeros * sizeof( cl_int ), NULL, &cl_status ); + A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - ( A.num_rows + 1 ) * sizeof( cl_int ), NULL, &cl_status ); + ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); diff --git a/src/library/blas1/atomic-reduce.hpp b/src/library/blas1/atomic-reduce.hpp index 70e93b4..7728c34 100644 --- a/src/library/blas1/atomic-reduce.hpp +++ b/src/library/blas1/atomic-reduce.hpp @@ -70,7 +70,7 @@ atomic_reduce(clsparseScalarPrivate* pR, std::string options = std::string() + " -DATOMIC_DOUBLE"; params.append(options); } - else if (typeid(cl_int) == typeid(T)) + else if (typeid(cl_int) == typeid(T) || typeid(clsparseIdx_t) == typeid(T)) { std::string options = std::string() + " -DATOMIC_INT"; params.append(options); @@ -146,7 +146,7 @@ atomic_reduce(clsparse::array_base& pR, std::string options = std::string() + " -DATOMIC_DOUBLE"; params.append(options); } - else if (typeid(cl_int) == typeid(T)) + else if (typeid(cl_int) == typeid(T) || typeid(clsparseIdx_t) == typeid(T)) { std::string options = std::string() + " -DATOMIC_INT"; params.append(options); diff --git a/src/tests/test-conversion.cpp b/src/tests/test-conversion.cpp index d17e863..910917b 100644 --- a/src/tests/test-conversion.cpp +++ b/src/tests/test-conversion.cpp @@ -56,6 +56,7 @@ class MatrixConversion : public ::testing::Test public: void SetUp() { +#if 0 // by default it is row_major; cldenseInitMatrix(&A); @@ -69,15 +70,17 @@ class MatrixConversion : public ::testing::Test A.num_cols = CSRE::n_cols; A.num_rows = CSRE::n_rows; A.lead_dim = std::min(A.num_cols, A.num_rows); +#endif } void TearDown() { - +#if 0 ::clReleaseMemObject(A.values); cldenseInitMatrix(&A); +#endif } // uBLAS dense matrix format type @@ -89,6 +92,23 @@ class MatrixConversion : public ::testing::Test void test_csr_to_dense() { + +#if 1 + // by default it is row_major; + cldenseInitMatrix(&A); + + cl_int status; + + A.values = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, + CSRE::n_cols * CSRE::n_rows * sizeof(T), + nullptr, &status); + ASSERT_EQ(CL_SUCCESS, status); + + A.num_cols = CSRE::n_cols; + A.num_rows = CSRE::n_rows; + A.lead_dim = std::min(A.num_cols, A.num_rows); +#endif + if (typeid(T) == typeid(cl_float)) { uBLASDenseM ublas_dense(CSRE::ublasSCsr); @@ -137,11 +157,33 @@ class MatrixConversion : public ::testing::Test ASSERT_NEAR(ublas_dense.data()[i], result[i], 1e-14); } } + +#if 1 + ::clReleaseMemObject(A.values); + + cldenseInitMatrix(&A); +#endif } void test_dense_to_csr() { +#if 1 + // by default it is row_major; + cldenseInitMatrix(&A); + + cl_int status; + + A.values = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, + CSRE::n_cols * CSRE::n_rows * sizeof(T), + nullptr, &status); + ASSERT_EQ(CL_SUCCESS, status); + + A.num_cols = CSRE::n_cols; + A.num_rows = CSRE::n_rows; + A.lead_dim = std::min(A.num_cols, A.num_rows); +#endif + if (typeid(T) == typeid(cl_float)) { //Create dense matrix; @@ -299,6 +341,13 @@ class MatrixConversion : public ::testing::Test cl_status = ::clReleaseMemObject(csrMatx.rowOffsets); } + +#if 0 + ::clReleaseMemObject(A.values); + + cldenseInitMatrix(&A); +#endif + } void test_coo_to_csr() From bd3e6ec58875ff1d7181f325db23119b5577778c Mon Sep 17 00:00:00 2001 From: Kiran Date: Mon, 28 Dec 2015 13:40:38 +0530 Subject: [PATCH 10/19] 64-bit containers for CUDA benchmark functions & changed cl_bool in main.cpp to bool. Changed cuda benchmark functions files to support 32/64-bit containers A new header file (cufunc_sparse-xx.h) is created to define CLSPARSE_INDEX_SIZEOF macro to support 64-bit containers. This header file is included in the cuda benchmark files. Replaced cl_bool with bool in main.cpp, cl_bool is not defined in CUDA --- .../functions/cufunc_xCoo2Csr.hpp | 28 ++--- .../functions/cufunc_xCsr2Coo.hpp | 29 ++--- .../functions/cufunc_xCsr2dense.hpp | 25 +++-- .../functions/cufunc_xDense2Csr.hpp | 39 +++---- .../functions/cufunc_xSpMSpM.hpp | 70 ++++++------ .../functions/cufunc_xSpMSpM_gemm2Timed.hpp | 63 +++++------ .../functions/cufunc_xSpMdV.hpp | 25 +++-- .../cusparse-bench/include/cufunc_sparse-xx.h | 38 +++++++ .../cusparse-bench/include/mm_reader.hpp | 7 +- src/benchmarks/cusparse-bench/src/main.cpp | 2 +- .../cusparse-bench/src/mm_reader.cpp | 103 +++++++++++------- 11 files changed, 248 insertions(+), 181 deletions(-) create mode 100644 src/benchmarks/cusparse-bench/include/cufunc_sparse-xx.h diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xCoo2Csr.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xCoo2Csr.hpp index 3f023c8..86c3017 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xCoo2Csr.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xCoo2Csr.hpp @@ -19,7 +19,7 @@ #include "cufunc_common.hpp" #include "include/io-exception.hpp" - +#include "include/cufunc_sparse-xx.h" template class xCoo2Csr: public cusparseFunc { @@ -81,11 +81,11 @@ class xCoo2Csr: public cusparseFunc } // Input: COO Row Indices - err = cudaMalloc( (void**)&deviceCooRowInd, n_vals * sizeof( int ) ); + err = cudaMalloc( (void**)&deviceCooRowInd, n_vals * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc deviceCooRowInd" ); // Output: CSR - cudaError_t err = cudaMalloc( (void**)&deviceCSRRowOffsets, ( n_rows + 1 ) * sizeof( int ) ); + cudaError_t err = cudaMalloc( (void**)&deviceCSRRowOffsets, ( n_rows + 1 ) * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc deviceCSRRowOffsets" ); }// End of function @@ -96,17 +96,17 @@ class xCoo2Csr: public cusparseFunc void initialize_gpu_buffer( ) { - cudaError_t err = cudaMemcpy( deviceCooRowInd, &row_indices[ 0 ], row_indices.size( ) * sizeof( int ), cudaMemcpyHostToDevice ); + cudaError_t err = cudaMemcpy( deviceCooRowInd, &row_indices[ 0 ], row_indices.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice ); CUDA_V_THROW( err, "cudaMalloc deviceCSRRowOffsets" ); - err = cudaMemset( deviceCSRRowOffsets, 0x0, ( n_rows + 1 ) * sizeof( int ) ); + err = cudaMemset(deviceCSRRowOffsets, 0x0, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW( err, "cudaMemset deviceCSRRowOffsets" ); }// end of function void reset_gpu_write_buffer( ) { - err = cudaMemset( deviceCSRRowOffsets, 0x0, ( n_rows + 1 ) * sizeof( int ) ); + err = cudaMemset(deviceCSRRowOffsets, 0x0, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW( err, "cudaMemset deviceCSRRowOffsets" ); cudaDeviceSynchronize( ); @@ -135,20 +135,20 @@ class xCoo2Csr: public cusparseFunc void xCoo2Csr_Function( bool flush ); //host matrix definition corresponding to CSR Format - std::vector< int > row_indices; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_indices; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; // matrix values - int n_rows; // number of rows - int n_cols; // number of cols - int n_vals; // number of Non-Zero Values (nnz) - int* colIndices; + clsparseIdx_t n_rows; // number of rows + clsparseIdx_t n_cols; // number of cols + clsparseIdx_t n_vals; // number of Non-Zero Values (nnz) + clsparseIdx_t* colIndices; bool explicit_zeroes; // device CUDA pointers - int* deviceCSRRowOffsets; // Input: CSR Row Offsets - int* deviceCooRowInd; // Output: Coordinate format row indices + clsparseIdx_t* deviceCSRRowOffsets; // Input: CSR Row Offsets + clsparseIdx_t* deviceCooRowInd; // Output: Coordinate format row indices }; // class xCoo2Csr template<> diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2Coo.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2Coo.hpp index ec2c4de..23fb79f 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2Coo.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2Coo.hpp @@ -19,6 +19,7 @@ #include "cufunc_common.hpp" #include "include/io-exception.hpp" +#include "include/cufunc_sparse-xx.h" template class xCsr2Coo : public cusparseFunc @@ -65,7 +66,7 @@ class xCsr2Coo : public cusparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return (sizeof(int)*(n_vals + n_rows) + sizeof(T) * (n_vals + n_cols + n_rows)) / time_in_ns(); + return (sizeof( clsparseIdx_t )*(n_vals + n_rows) + sizeof(T) * (n_vals + n_cols + n_rows)) / time_in_ns(); #endif // Number of Elements converted in unit time return (n_vals / time_in_ns()); @@ -95,11 +96,11 @@ class xCsr2Coo : public cusparseFunc n_vals = values.size(); */ // Input: CSR - cudaError_t err = cudaMalloc((void**)&deviceCSRRowOffsets, (n_rows+1) * sizeof(int)); + cudaError_t err = cudaMalloc((void**)&deviceCSRRowOffsets, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc deviceCSRRowOffsets"); // Output: COO Row Indices - err = cudaMalloc((void**)&deviceCooRowInd, n_vals * sizeof(int)); + err = cudaMalloc((void**)&deviceCooRowInd, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc deviceCooRowInd"); }// End of function @@ -109,17 +110,17 @@ class xCsr2Coo : public cusparseFunc void initialize_gpu_buffer() { - cudaError_t err = cudaMemcpy(deviceCSRRowOffsets, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaError_t err = cudaMemcpy(deviceCSRRowOffsets, &row_offsets[0], row_offsets.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc deviceCSRRowOffsets"); - err = cudaMemset(deviceCooRowInd, 0x0, n_vals * sizeof(int)); + err = cudaMemset(deviceCooRowInd, 0x0, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset deviceCooRowInd"); }// end of function void reset_gpu_write_buffer() { - err = cudaMemset(deviceCooRowInd, 0x0, n_vals * sizeof(int)); + err = cudaMemset(deviceCooRowInd, 0x0, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset deviceCooRowInd"); }// end of function @@ -148,20 +149,20 @@ class xCsr2Coo : public cusparseFunc // std::string sparseFile; //host matrix definition corresponding to CSR Format - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; // matrix values - int n_rows; // number of rows - int n_cols; // number of cols - int n_vals; // number of Non-Zero Values (nnz) - int* colIndices; + clsparseIdx_t n_rows; // number of rows + clsparseIdx_t n_cols; // number of cols + clsparseIdx_t n_vals; // number of Non-Zero Values (nnz) + clsparseIdx_t* colIndices; bool explicit_zeroes; // device CUDA pointers - int* deviceCSRRowOffsets; // Input: CSR Row Offsets - int* deviceCooRowInd; // Output: Coordinate format row indices + clsparseIdx_t* deviceCSRRowOffsets; // Input: CSR Row Offsets + clsparseIdx_t* deviceCooRowInd; // Output: Coordinate format row indices }; // class xCsr2Coo template<> diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2dense.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2dense.hpp index 915a3f2..f63b5c2 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2dense.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xCsr2dense.hpp @@ -19,6 +19,7 @@ #include "cufunc_common.hpp" #include "include/io-exception.hpp" +#include "include/cufunc_sparse-xx.h" template class xCsr2Dense : public cusparseFunc @@ -72,7 +73,7 @@ class xCsr2Dense : public cusparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return ( sizeof( int )*( n_vals + n_rows ) + sizeof( T ) * ( n_vals + n_cols + n_rows ) ) / time_in_ns( ); + return (sizeof(clsparseIdx_t)*(n_vals + n_rows) + sizeof(T) * (n_vals + n_cols + n_rows)) / time_in_ns(); #endif // Number of Elements converted in unit time return (n_vals / time_in_ns()); @@ -101,10 +102,10 @@ class xCsr2Dense : public cusparseFunc //n_cols = col_indices.size( ); //n_vals = values.size( ); - cudaError_t err = cudaMalloc( (void**) &device_row_offsets, (n_rows + 1) * sizeof( int ) ); + cudaError_t err = cudaMalloc( (void**) &device_row_offsets, (n_rows + 1) * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc device_row_offsets" ); - err = cudaMalloc( (void**) &device_col_indices, n_vals * sizeof( int ) ); + err = cudaMalloc( (void**) &device_col_indices, n_vals * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc device_col_indices" ); err = cudaMalloc( (void**) &device_values, n_vals * sizeof( T ) ); @@ -120,10 +121,10 @@ class xCsr2Dense : public cusparseFunc void initialize_gpu_buffer( ) { - cudaError_t err = cudaMemcpy( device_row_offsets, &row_offsets[ 0 ], row_offsets.size( ) * sizeof( int ), cudaMemcpyHostToDevice ); + cudaError_t err = cudaMemcpy( device_row_offsets, &row_offsets[ 0 ], row_offsets.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice ); CUDA_V_THROW( err, "cudaMalloc device_row_offsets" ); - err = cudaMemcpy( device_col_indices, &col_indices[ 0 ], col_indices.size( ) * sizeof( int ), cudaMemcpyHostToDevice ); + err = cudaMemcpy( device_col_indices, &col_indices[ 0 ], col_indices.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice ); CUDA_V_THROW( err, "cudaMalloc device_col_indices" ); err = cudaMemcpy( device_values, &values[ 0 ], values.size( ) * sizeof( T ), cudaMemcpyHostToDevice ); @@ -167,21 +168,21 @@ class xCsr2Dense : public cusparseFunc void xCsr2Dense_Function( bool flush ); //host matrix definition - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; - int n_rows; // number of rows - int n_cols; // number of cols - int n_vals; // number of Non-Zero Values (nnz) + clsparseIdx_t n_rows; // number of rows + clsparseIdx_t n_cols; // number of cols + clsparseIdx_t n_vals; // number of Non-Zero Values (nnz) bool explicit_zeroes; cusparseMatDescr_t descrA; // device CUDA pointers - int* device_row_offsets; - int* device_col_indices; + clsparseIdx_t* device_row_offsets; + clsparseIdx_t* device_col_indices; T* device_values; T* device_A; diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xDense2Csr.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xDense2Csr.hpp index bfd6609..d2fa94b 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xDense2Csr.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xDense2Csr.hpp @@ -19,6 +19,7 @@ #include "cufunc_common.hpp" #include "include/io-exception.hpp" +#include "include/cufunc_sparse-xx.h" template class xDense2Csr : public cusparseFunc @@ -99,10 +100,10 @@ class xDense2Csr : public cusparseFunc throw clsparse::io_exception( "Could not read matrix market from disk: " + path); } - cudaError_t err = cudaMalloc((void**)&device_row_offsets, (n_rows + 1) * sizeof(int)); + cudaError_t err = cudaMalloc((void**)&device_row_offsets, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMalloc((void**)&device_col_indices, n_vals * sizeof(int)); + err = cudaMalloc((void**)&device_col_indices, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_col_indices"); err = cudaMalloc((void**)&device_values, n_vals * sizeof(T)); @@ -112,17 +113,17 @@ class xDense2Csr : public cusparseFunc CUDA_V_THROW(err, "cudaMalloc device_A"); // Output CSR - err = cudaMalloc((void**)&devRowOffsets, (n_rows + 1) * sizeof(int)); + err = cudaMalloc((void**)&devRowOffsets, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc devRowOffsets"); - err = cudaMalloc((void**)&devColIndices, n_vals * sizeof(int)); + err = cudaMalloc((void**)&devColIndices, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc devColIndices"); err = cudaMalloc((void**)&devValues, n_vals * sizeof(T)); CUDA_V_THROW(err, "cudaMalloc devValues"); // Allocate memory for nnzPerRow - err = cudaMalloc((void**)&nnzPerRow, n_rows * sizeof(int)); + err = cudaMalloc((void**)&nnzPerRow, n_rows * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc nnzPerRow"); }// end @@ -133,10 +134,10 @@ class xDense2Csr : public cusparseFunc void initialize_gpu_buffer() { - cudaError_t err = cudaMemcpy(device_row_offsets, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaError_t err = cudaMemcpy(device_row_offsets, &row_offsets[0], row_offsets.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMemcpy(device_col_indices, &col_indices[0], col_indices.size() * sizeof(int), cudaMemcpyHostToDevice); + err = cudaMemcpy(device_col_indices, &col_indices[0], col_indices.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_col_indices"); err = cudaMemcpy(device_values, &values[0], values.size() * sizeof(T), cudaMemcpyHostToDevice); @@ -195,10 +196,10 @@ class xDense2Csr : public cusparseFunc void reset_gpu_write_buffer() { - cudaError_t err = cudaMemset(devRowOffsets, 0x0, (n_rows + 1) * sizeof(int)); + cudaError_t err = cudaMemset(devRowOffsets, 0x0, (n_rows + 1) * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset reset_gpu_write_buffer: devRowOffsets"); - err = cudaMemset(devColIndices, 0x0, n_vals * sizeof(int)); + err = cudaMemset(devColIndices, 0x0, n_vals * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset reset_gpu_write_buffer: devColIndices"); err = cudaMemset(devValues, 0x0, n_vals * sizeof(T)); @@ -236,29 +237,29 @@ class xDense2Csr : public cusparseFunc void csr2dense_Function(bool flush); // to get input in dense format //host matrix definition in csr format - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; - int n_rows; // number of rows - int n_cols; // number of cols - int n_vals; // number of Non-Zero Values (nnz) + clsparseIdx_t n_rows; // number of rows + clsparseIdx_t n_cols; // number of cols + clsparseIdx_t n_vals; // number of Non-Zero Values (nnz) bool explicit_zeroes; cusparseMatDescr_t descrA; // device CUDA pointers - int* device_row_offsets; - int* device_col_indices; + clsparseIdx_t* device_row_offsets; + clsparseIdx_t* device_col_indices; T* device_values; // Dense format: output - >input T* device_A; - int* nnzPerRow; // Number of non-zero elements per row + clsparseIdx_t* nnzPerRow; // Number of non-zero elements per row // Output devie CUDA pointers:csr format - int* devRowOffsets; - int* devColIndices; + clsparseIdx_t* devRowOffsets; + clsparseIdx_t* devColIndices; T* devValues; diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM.hpp index 0242e78..0a64a9a 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM.hpp @@ -18,9 +18,11 @@ #define CUSPARSE_BENCHMARK_xSpMSpM_HXX__ #include "cufunc_common.hpp" +#include "include/cufunc_sparse-xx.h" #include "include/mm_reader.hpp" #include "include/io-exception.hpp" + // C = alpha * A * A template class xSpMSpM : public cusparseFunc { @@ -124,10 +126,10 @@ class xSpMSpM : public cusparseFunc { throw clsparse::io_exception("Could not read matrix market header from disk"); } - cudaError_t err = cudaMalloc((void**)&dev_csrRowPtrA, row_offsets.size() * sizeof(int)); + cudaError_t err = cudaMalloc((void**)&dev_csrRowPtrA, row_offsets.size() * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMalloc((void**)&dev_csrColIndA, col_indices.size() * sizeof(int)); + err = cudaMalloc((void**)&dev_csrColIndA, col_indices.size() * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_col_indices"); err = cudaMalloc((void**)&dev_csrValA, values.size() * sizeof(T)); @@ -144,10 +146,10 @@ class xSpMSpM : public cusparseFunc { void initialize_gpu_buffer() { - cudaError_t err = cudaMemcpy(dev_csrRowPtrA, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaError_t err = cudaMemcpy(dev_csrRowPtrA, &row_offsets[0], row_offsets.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMemcpy(dev_csrColIndA, &col_indices[0], col_indices.size() * sizeof(int), cudaMemcpyHostToDevice); + err = cudaMemcpy(dev_csrColIndA, &col_indices[0], col_indices.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); err = cudaMemcpy(dev_csrValA, &values[0], values.size() * sizeof(T), cudaMemcpyHostToDevice); @@ -225,16 +227,16 @@ class xSpMSpM : public cusparseFunc { void createBuffersNNZ_C(void); void xSpMSpM_Function(bool flush); - size_t xSpMSpM_Getflopcount(void) + clsparseIdx_t xSpMSpM_Getflopcount(void) { // C = A * B // But here C = A* A, the A & B matrices are same - int nnzA = col_indices.size(); + clsparseIdx_t nnzA = col_indices.size(); - size_t flop = 0; - for (int i = 0; i < nnzA; i++) + clsparseIdx_t flop = 0; + for (clsparseIdx_t i = 0; i < nnzA; i++) { - int colIdx = col_indices[i]; // Get colIdx of A + clsparseIdx_t colIdx = col_indices[i]; // Get colIdx of A flop += row_offsets[colIdx + 1] - row_offsets[colIdx]; // nnz in 'colIdx'th row of B } @@ -244,16 +246,16 @@ class xSpMSpM : public cusparseFunc { }// end of function //Input host matrix in csr format : A - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; T alpha; T beta; - size_t flopCnt; // Indicates total number of floating point operations - int n_rows; - int n_cols; - int n_vals; + clsparseIdx_t flopCnt; // Indicates total number of floating point operations + clsparseIdx_t n_rows; + clsparseIdx_t n_cols; + clsparseIdx_t n_vals; bool explicit_zeroes; @@ -264,16 +266,16 @@ class xSpMSpM : public cusparseFunc { // device CUDA pointers T* dev_csrValA; - int* dev_csrRowPtrA; - int* dev_csrColIndA; + clsparseIdx_t* dev_csrRowPtrA; + clsparseIdx_t* dev_csrColIndA; T* dev_csrValC; - int* dev_csrRowPtrC; - int* dev_csrColIndC; + clsparseIdx_t* dev_csrRowPtrC; + clsparseIdx_t* dev_csrColIndC; - int* nnzTotalDevHostPtr; // Points to host memory - int baseC; - int nnzC; + clsparseIdx_t* nnzTotalDevHostPtr; // Points to host memory + clsparseIdx_t baseC; + clsparseIdx_t nnzC; void* buffer; size_t bufferSize; }; @@ -283,7 +285,7 @@ template<> void xSpMSpM ::createBuffersNNZ_C(void) { double betaT = 0.0; - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // Step 2: allocate buffer for csrgemm2Nnzand csrgemm2 cuSparseStatus = cusparseScsrgemm2_bufferSizeExt(handle, n_rows, n_cols, n_cols, &alpha, @@ -299,7 +301,7 @@ xSpMSpM ::createBuffersNNZ_C(void) CUDA_V_THROW(err, "cudaMalloc buffer in createBuffersNNZ_C"); // step 3: compute dev_csrRowPtrC - err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof(int) * (n_rows + 1)); + err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof( clsparseIdx_t ) * (n_rows + 1)); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); cuSparseStatus = cusparseXcsrgemm2Nnz(handle, n_rows, n_cols, n_cols, @@ -318,12 +320,12 @@ xSpMSpM ::createBuffersNNZ_C(void) } else { - cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); + cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); nnzC -= baseC; } - err = cudaMalloc((void**)&dev_csrColIndC, sizeof(int)*nnzC); + err = cudaMalloc((void**)&dev_csrColIndC, sizeof( clsparseIdx_t )*nnzC); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); err = cudaMalloc((void**)&dev_csrValC, sizeof(float)*nnzC); @@ -335,7 +337,7 @@ xSpMSpM ::createBuffersNNZ_C(void) template<> void xSpMSpM ::createBuffersNNZ_C(void) { - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // Step 2: allocate buffer for csrgemm2Nnzand csrgemm2 cuSparseStatus = cusparseDcsrgemm2_bufferSizeExt(handle, n_rows, n_cols, n_cols, &alpha, @@ -351,7 +353,7 @@ xSpMSpM ::createBuffersNNZ_C(void) CUDA_V_THROW(err, "cudaMalloc buffer in createBuffersNNZ_C"); // step 3: compute dev_csrRowPtrC - err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof(int) * (n_rows + 1)); + err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof( clsparseIdx_t ) * (n_rows + 1)); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); cuSparseStatus = cusparseXcsrgemm2Nnz(handle, n_rows, n_cols, n_cols, @@ -370,12 +372,12 @@ xSpMSpM ::createBuffersNNZ_C(void) } else { - cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); + cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); nnzC -= baseC; } - err = cudaMalloc((void**)&dev_csrColIndC, sizeof(int)*nnzC); + err = cudaMalloc((void**)&dev_csrColIndC, sizeof( clsparseIdx_t )*nnzC); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); err = cudaMalloc((void**)&dev_csrValC, sizeof(double)*nnzC); @@ -396,7 +398,7 @@ xSpMSpM ::xSpMSpM_Function(bool flush) // step 2: allocate buffer for csrgemm2Nnz and csrgemm2 createBuffersNNZ_C(); - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // step 4: finish sparsity pattern and value of C // Remark: set csrValC to null if only sparsity pattern is required. cuSparseStatus = cusparseScsrgemm2(handle, n_rows, n_cols, n_cols, &alpha, @@ -424,7 +426,7 @@ xSpMSpM ::xSpMSpM_Function(bool flush) // step 2: allocate buffer for csrgemm2Nnz and csrgemm2 createBuffersNNZ_C(); - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // step 4: finish sparsity pattern and value of C // Remark: set csrValC to null if only sparsity pattern is required. cuSparseStatus = cusparseDcsrgemm2(handle, n_rows, n_cols, n_cols, &alpha, diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM_gemm2Timed.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM_gemm2Timed.hpp index 27bb2de..cd87d88 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM_gemm2Timed.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMSpM_gemm2Timed.hpp @@ -18,6 +18,7 @@ #define CUSPARSE_BENCHMARK_xSpMSpM_HXX__ #include "cufunc_common.hpp" +#include "include/cufunc_sparse-xx.h" #include "include/mm_reader.hpp" #include "include/io-exception.hpp" @@ -108,7 +109,7 @@ class xSpMSpM : public cusparseFunc { // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return (sizeof(int)*(n_vals + n_rows) + sizeof(T) * (n_vals + n_cols + n_rows)) / time_in_ns(); + return (sizeof( clsparseIdx_t )*(n_vals + n_rows) + sizeof(T) * (n_vals + n_cols + n_rows)) / time_in_ns(); } std::string bandwidth_formula() @@ -130,10 +131,10 @@ class xSpMSpM : public cusparseFunc { throw clsparse::io_exception("Could not read matrix market header from disk"); } - cudaError_t err = cudaMalloc((void**)&dev_csrRowPtrA, row_offsets.size() * sizeof(int)); + cudaError_t err = cudaMalloc((void**)&dev_csrRowPtrA, row_offsets.size() * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMalloc((void**)&dev_csrColIndA, col_indices.size() * sizeof(int)); + err = cudaMalloc((void**)&dev_csrColIndA, col_indices.size() * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMalloc device_col_indices"); err = cudaMalloc((void**)&dev_csrValA, values.size() * sizeof(T)); @@ -148,10 +149,10 @@ class xSpMSpM : public cusparseFunc { void initialize_gpu_buffer() { - cudaError_t err = cudaMemcpy(dev_csrRowPtrA, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice); + cudaError_t err = cudaMemcpy(dev_csrRowPtrA, &row_offsets[0], row_offsets.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); - err = cudaMemcpy(dev_csrColIndA, &col_indices[0], col_indices.size() * sizeof(int), cudaMemcpyHostToDevice); + err = cudaMemcpy(dev_csrColIndA, &col_indices[0], col_indices.size() * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice); CUDA_V_THROW(err, "cudaMalloc device_row_offsets"); err = cudaMemcpy(dev_csrValA, &values[0], values.size() * sizeof(T), cudaMemcpyHostToDevice); @@ -172,10 +173,10 @@ class xSpMSpM : public cusparseFunc { cudaError_t err = cudaMemset(dev_csrValC, 0x0, nnzC * sizeof(T)); CUDA_V_THROW(err, "cudaMemset dev_csrValC " + std::to_string(nnzC)); - err = cudaMemset(dev_csrColIndC, 0x0, nnzC * sizeof(int)); + err = cudaMemset(dev_csrColIndC, 0x0, nnzC * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset dev_csrColIndC " + std::to_string(nnzC)); - err = cudaMemset(dev_csrRowPtrC, 0x0, (n_rows+1) * sizeof(int)); + err = cudaMemset(dev_csrRowPtrC, 0x0, (n_rows+1) * sizeof( clsparseIdx_t )); CUDA_V_THROW(err, "cudaMemset dev_csrRowPtrC " + std::to_string(nnzC)); } @@ -217,15 +218,15 @@ class xSpMSpM : public cusparseFunc { void xSpMSpM_Function(bool flush); //Input host matrix in csr format : A - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; T alpha; T beta; - int n_rows; - int n_cols; - int n_vals; + clsparseIdx_t n_rows; + clsparseIdx_t n_cols; + clsparseIdx_t n_vals; bool explicit_zeroes; @@ -236,16 +237,16 @@ class xSpMSpM : public cusparseFunc { // device CUDA pointers T* dev_csrValA; - int* dev_csrRowPtrA; - int* dev_csrColIndA; + clsparseIdx_t* dev_csrRowPtrA; + clsparseIdx_t* dev_csrColIndA; T* dev_csrValC; - int* dev_csrRowPtrC; - int* dev_csrColIndC; + clsparseIdx_t* dev_csrRowPtrC; + clsparseIdx_t* dev_csrColIndC; - int* nnzTotalDevHostPtr; // Points to host memory - int baseC; - int nnzC; + clsparseIdx_t* nnzTotalDevHostPtr; // Points to host memory + clsparseIdx_t baseC; + clsparseIdx_t nnzC; void* buffer; size_t bufferSize; }; @@ -255,7 +256,7 @@ template<> void xSpMSpM ::createBuffersNNZ_C(void) { double betaT = 0.0; - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // Step 2: allocate buffer for csrgemm2Nnzand csrgemm2 cuSparseStatus = cusparseScsrgemm2_bufferSizeExt(handle, n_rows, n_cols, n_cols, &alpha, @@ -271,7 +272,7 @@ xSpMSpM ::createBuffersNNZ_C(void) CUDA_V_THROW(err, "cudaMalloc buffer in createBuffersNNZ_C"); // step 3: compute dev_csrRowPtrC - err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof(int) * (n_rows + 1)); + err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof( clsparseIdx_t ) * (n_rows + 1)); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); cuSparseStatus = cusparseXcsrgemm2Nnz(handle, n_rows, n_cols, n_cols, @@ -290,12 +291,12 @@ xSpMSpM ::createBuffersNNZ_C(void) } else { - cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); + cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); nnzC -= baseC; } - err = cudaMalloc((void**)&dev_csrColIndC, sizeof(int)*nnzC); + err = cudaMalloc((void**)&dev_csrColIndC, sizeof( clsparseIdx_t )*nnzC); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); err = cudaMalloc((void**)&dev_csrValC, sizeof(float)*nnzC); @@ -307,7 +308,7 @@ xSpMSpM ::createBuffersNNZ_C(void) template<> void xSpMSpM ::createBuffersNNZ_C(void) { - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // Step 2: allocate buffer for csrgemm2Nnzand csrgemm2 cuSparseStatus = cusparseDcsrgemm2_bufferSizeExt(handle, n_rows, n_cols, n_cols, &alpha, @@ -323,7 +324,7 @@ xSpMSpM ::createBuffersNNZ_C(void) CUDA_V_THROW(err, "cudaMalloc buffer in createBuffersNNZ_C"); // step 3: compute dev_csrRowPtrC - err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof(int) * (n_rows + 1)); + err = cudaMalloc((void**)&dev_csrRowPtrC, sizeof( clsparseIdx_t ) * (n_rows + 1)); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); cuSparseStatus = cusparseXcsrgemm2Nnz(handle, n_rows, n_cols, n_cols, @@ -342,12 +343,12 @@ xSpMSpM ::createBuffersNNZ_C(void) } else { - cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(&nnzC, dev_csrRowPtrC + n_rows, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); + cudaMemcpy(&baseC, dev_csrRowPtrC, sizeof( clsparseIdx_t ), cudaMemcpyDeviceToHost); nnzC -= baseC; } - err = cudaMalloc((void**)&dev_csrColIndC, sizeof(int)*nnzC); + err = cudaMalloc((void**)&dev_csrColIndC, sizeof( clsparseIdx_t )*nnzC); CUDA_V_THROW(err, "cudaMalloc dev_csrRowPtrC failed in createBuffersNNZ_C"); err = cudaMalloc((void**)&dev_csrValC, sizeof(double)*nnzC); @@ -359,7 +360,7 @@ xSpMSpM ::createBuffersNNZ_C(void) template<> void xSpMSpM ::xSpMSpM_Function(bool flush) { - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // step 4: finish sparsity pattern and value of C // Remark: set csrValC to null if only sparsity pattern is required. cuSparseStatus = cusparseScsrgemm2(handle, n_rows, n_cols, n_cols, &alpha, @@ -378,7 +379,7 @@ xSpMSpM ::xSpMSpM_Function(bool flush) template<> void xSpMSpM ::xSpMSpM_Function(bool flush) { - size_t nnzA = values.size(); + clsparseIdx_t nnzA = values.size(); // step 4: finish sparsity pattern and value of C // Remark: set csrValC to null if only sparsity pattern is required. cuSparseStatus = cusparseDcsrgemm2(handle, n_rows, n_cols, n_cols, &alpha, diff --git a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMdV.hpp b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMdV.hpp index 82999da..23e4f8b 100644 --- a/src/benchmarks/cusparse-bench/functions/cufunc_xSpMdV.hpp +++ b/src/benchmarks/cusparse-bench/functions/cufunc_xSpMdV.hpp @@ -20,6 +20,7 @@ #include "cufunc_common.hpp" #include "include/mm_reader.hpp" #include "include/io-exception.hpp" +#include "include/cufunc_sparse-xx.h" template class xSpMdV : public cusparseFunc @@ -69,7 +70,7 @@ class xSpMdV : public cusparseFunc // There are NNZ float_types in the vals[ ] array // You read num_cols floats from the vector, afterwards they cache perfectly. // Finally, you write num_rows floats out to DRAM at the end of the kernel. - return ( sizeof( int )*( n_vals + n_rows ) + sizeof( T ) * ( n_vals + n_cols + n_rows ) ) / time_in_ns( ); + return ( sizeof( clsparseIdx_t )*( n_vals + n_rows ) + sizeof( T ) * ( n_vals + n_cols + n_rows ) ) / time_in_ns( ); } std::string bandwidth_formula( ) @@ -91,10 +92,10 @@ class xSpMdV : public cusparseFunc throw clsparse::io_exception( "Could not read matrix market from disk: " + path); } - cudaError_t err = cudaMalloc( (void**) &device_row_offsets, row_offsets.size( ) * sizeof( int ) ); + cudaError_t err = cudaMalloc( (void**) &device_row_offsets, row_offsets.size( ) * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc device_row_offsets" ); - err = cudaMalloc( (void**) &device_col_indices, col_indices.size( ) * sizeof( int ) ); + err = cudaMalloc( (void**) &device_col_indices, col_indices.size( ) * sizeof( clsparseIdx_t ) ); CUDA_V_THROW( err, "cudaMalloc device_col_indices" ); err = cudaMalloc( (void**) &device_values, values.size( ) * sizeof( T ) ); @@ -115,10 +116,10 @@ class xSpMdV : public cusparseFunc void initialize_gpu_buffer( ) { - cudaError_t err = cudaMemcpy( device_row_offsets, &row_offsets[ 0 ], row_offsets.size( ) * sizeof( int ), cudaMemcpyHostToDevice ); + cudaError_t err = cudaMemcpy( device_row_offsets, &row_offsets[ 0 ], row_offsets.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice ); CUDA_V_THROW( err, "cudaMalloc device_row_offsets" ); - err = cudaMemcpy( device_col_indices, &col_indices[ 0 ], col_indices.size( ) * sizeof( int ), cudaMemcpyHostToDevice ); + err = cudaMemcpy( device_col_indices, &col_indices[ 0 ], col_indices.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice ); CUDA_V_THROW( err, "cudaMalloc device_row_offsets" ); err = cudaMemcpy( device_values, &values[ 0 ], values.size( ) * sizeof( T ), cudaMemcpyHostToDevice ); @@ -168,13 +169,13 @@ class xSpMdV : public cusparseFunc void xSpMdV_Function( bool flush ); //host matrix definition - std::vector< int > row_offsets; - std::vector< int > col_indices; + std::vector< clsparseIdx_t > row_offsets; + std::vector< clsparseIdx_t > col_indices; std::vector< T > values; std::vector< T > x; - int n_rows; - int n_cols; - int n_vals; + clsparseIdx_t n_rows; + clsparseIdx_t n_cols; + clsparseIdx_t n_vals; bool explicit_zeroes; @@ -184,8 +185,8 @@ class xSpMdV : public cusparseFunc cusparseMatDescr_t descrA; // device CUDA pointers - int* device_row_offsets; - int* device_col_indices; + clsparseIdx_t* device_row_offsets; + clsparseIdx_t* device_col_indices; T* device_values; T* device_x; T* device_y; diff --git a/src/benchmarks/cusparse-bench/include/cufunc_sparse-xx.h b/src/benchmarks/cusparse-bench/include/cufunc_sparse-xx.h new file mode 100644 index 0000000..8a6eb6d --- /dev/null +++ b/src/benchmarks/cusparse-bench/include/cufunc_sparse-xx.h @@ -0,0 +1,38 @@ +/* ************************************************************************ + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ************************************************************************ */ +/*! \file + * \brief cufunc_sparse-xx.h defines generic type for 32/64 bit indices + */ + +#pragma once +#ifndef _CU_SPARSE_xx_H_ +#define _CU_SPARSE_xx_H_ + +#if( CLSPARSE_INDEX_SIZEOF == 8 ) +#error Wait till clSPARSE implements 64-bit indices + typedef unsigned long long clsparseIdx_t; +#else + typedef int clsparseIdx_t; +#endif + +#if( CLSPARSE_INDEX_SIZEOF == 8 ) +#define SIZET "l" +#else +#define SIZET "" +#endif + + +#endif // ifndef _CU_SPARSE_xx_H_ diff --git a/src/benchmarks/cusparse-bench/include/mm_reader.hpp b/src/benchmarks/cusparse-bench/include/mm_reader.hpp index 8b5b545..c1f0648 100644 --- a/src/benchmarks/cusparse-bench/include/mm_reader.hpp +++ b/src/benchmarks/cusparse-bench/include/mm_reader.hpp @@ -19,15 +19,16 @@ #define CUBLAS_BENCHMARK_MM_READER_HXX__ #include +#include "cufunc_sparse-xx.h" -int sparseHeaderfromFile( int* nnz, int* rows, int* cols, const char* filePath ); +int sparseHeaderfromFile(clsparseIdx_t* nnz, clsparseIdx_t* rows, clsparseIdx_t* cols, const char* filePath); template< class T > int -cooMatrixfromFile( std::vector< int >& row_indices, std::vector< int >& col_indices, +cooMatrixfromFile(std::vector< clsparseIdx_t >& row_indices, std::vector< clsparseIdx_t >& col_indices, std::vector< T >& values, const char* filePath, bool read_explicit_zeroes = true ); template< class T > int -csrMatrixfromFile( std::vector< int >& row_offsets, std::vector< int >& col_indices, +csrMatrixfromFile(std::vector< clsparseIdx_t >& row_offsets, std::vector< clsparseIdx_t >& col_indices, std::vector< T >& values, const char* filePath, bool read_explicit_zeroes = true ); #endif diff --git a/src/benchmarks/cusparse-bench/src/main.cpp b/src/benchmarks/cusparse-bench/src/main.cpp index c3055d2..50350e4 100644 --- a/src/benchmarks/cusparse-bench/src/main.cpp +++ b/src/benchmarks/cusparse-bench/src/main.cpp @@ -155,7 +155,7 @@ int main(int argc, char *argv[]) return false; } - cl_bool explicit_zeroes = true; + bool explicit_zeroes = true; if (vm["no_zeroes"].as()) explicit_zeroes = false; diff --git a/src/benchmarks/cusparse-bench/src/mm_reader.cpp b/src/benchmarks/cusparse-bench/src/mm_reader.cpp index 4c870ca..83c5810 100644 --- a/src/benchmarks/cusparse-bench/src/mm_reader.cpp +++ b/src/benchmarks/cusparse-bench/src/mm_reader.cpp @@ -37,6 +37,7 @@ to copyright protection within the United States. #include #include #include +#include #include #include #include @@ -44,13 +45,14 @@ to copyright protection within the United States. #include #include "include/external/mmio.h" +#include "include/cufunc_sparse-xx.h" // Class declarations template struct Coordinate { - int x; - int y; + clsparseIdx_t x; + clsparseIdx_t y; FloatType val; }; @@ -58,9 +60,9 @@ template class MatrixMarketReader { char Typecode[ 4 ]; - int nNZ; - int nRows; - int nCols; + clsparseIdx_t nNZ; + clsparseIdx_t nRows; + clsparseIdx_t nCols; int isSymmetric; int isDoubleMem; Coordinate *unsym_coords; @@ -81,17 +83,17 @@ class MatrixMarketReader int MMReadMtxCrdSize( FILE* infile ); void MMGenerateCOOFromFile( FILE* infile, bool explicit_zeroes ); - int GetNumRows( ) + clsparseIdx_t GetNumRows( ) { return nRows; } - int GetNumCols( ) + clsparseIdx_t GetNumCols( ) { return nCols; } - int GetNumNonZeroes( ) + clsparseIdx_t GetNumNonZeroes( ) { return nNZ; } @@ -210,9 +212,9 @@ bool MatrixMarketReader::MMReadFormat( const std::string &filename, b template void FillCoordData( char Typecode[ ], Coordinate *unsym_coords, - int &unsym_actual_nnz, - int ir, - int ic, + clsparseIdx_t &unsym_actual_nnz, + clsparseIdx_t ir, + clsparseIdx_t ic, FloatType val ) { if( mm_is_symmetric( Typecode ) ) @@ -240,23 +242,27 @@ void FillCoordData( char Typecode[ ], template void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, bool explicit_zeroes ) { - int unsym_actual_nnz = 0; + clsparseIdx_t unsym_actual_nnz = 0; FloatType val; - int ir, ic; + clsparseIdx_t ir, ic; const int exp_zeroes = explicit_zeroes; //silence warnings from fscanf (-Wunused-result) - int rv = 0; + clsparseIdx_t rv = 0; - for( int i = 0; i < nNZ; i++ ) + for ( clsparseIdx_t i = 0; i < nNZ; i++) { if( mm_is_real( Typecode ) ) { - if( typeid( FloatType ) == typeid( float ) ) - rv = fscanf( infile, "%d %d %f\n", &ir, &ic, &val ); + fscanf(infile, "%" SIZET "u", &ir); + fscanf(infile, "%" SIZET "u", &ic); + + if (typeid(FloatType) == typeid(float)) + rv = fscanf(infile, "%f\n", (float*)(&val)); + else if( typeid( FloatType ) == typeid( double ) ) - rv = fscanf( infile, "%d %d %lf\n", &ir, &ic, &val ); + rv = fscanf( infile, "%lf\n", (double*)( &val ) ); if( exp_zeroes == 0 && val == 0 ) continue; @@ -265,10 +271,13 @@ void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, bool ex } else if( mm_is_integer( Typecode ) ) { - if( typeid( FloatType ) == typeid( float ) ) - rv = fscanf(infile, "%d %d %f\n", &ir, &ic, &val); - else if( typeid( FloatType ) == typeid( double ) ) - rv = fscanf(infile, "%d %d %lf\n", &ir, &ic, &val); + fscanf(infile, "%" SIZET "u", &ir); + fscanf(infile, "%" SIZET "u", &ic); + + if(typeid(FloatType) == typeid(float)) + rv = fscanf(infile, "%f\n", (float*)( &val ) ); + else if(typeid(FloatType) == typeid(double)) + rv = fscanf(infile, "%lf\n", (double*)( &val ) ); if( exp_zeroes == 0 && val == 0 ) continue; @@ -278,7 +287,9 @@ void MatrixMarketReader::MMGenerateCOOFromFile( FILE *infile, bool ex } else if( mm_is_pattern( Typecode ) ) { - rv = fscanf( infile, "%d %d", &ir, &ic ); + rv = fscanf(infile, "%" SIZET "u", &ir); + rv = fscanf(infile, "%" SIZET "u", &ic); + val = static_cast( MAX_RAND_VAL * ( rand( ) / ( RAND_MAX + 1.0 ) ) ); if( exp_zeroes == 0 && val == 0 ) @@ -385,12 +396,22 @@ int MatrixMarketReader::MMReadMtxCrdSize( FILE *infile ) } while( line[ 0 ] == '%' ); /* line[] is either blank or has M,N, nz */ - if( sscanf( line, "%d %d %d", &nRows, &nCols, &nNZ ) == 3 ) + std::stringstream s(line); + nRows = 0; + nCols = 0; + nNZ = 0; + s >> nRows >> nCols >> nNZ; + if (nRows && nCols && nNZ) return 0; else do { - num_items_read = fscanf( infile, "%d %d %d", &nRows, &nCols, &nNZ ); + num_items_read = 0; + num_items_read += fscanf( infile, "%" SIZET "u", &nRows ); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + num_items_read += fscanf(infile, "%" SIZET "u", &nCols); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + num_items_read += fscanf(infile, "%" SIZET "u", &nNZ); if( num_items_read == EOF ) return MM_PREMATURE_EOF; } while( num_items_read != 3 ); @@ -409,7 +430,7 @@ bool CoordinateCompare( const Coordinate &c1, const Coordinate int -cooMatrixfromFile( std::vector< int >& row_indices, std::vector< int >& col_indices, +cooMatrixfromFile( std::vector< clsparseIdx_t >& row_indices, std::vector< clsparseIdx_t >& col_indices, std::vector< T >& values, const char* filePath, bool read_explicit_zeroes ) { // Check that the file format is matrix market; the only format we can read right now @@ -429,9 +450,9 @@ cooMatrixfromFile( std::vector< int >& row_indices, std::vector< int >& col_indi if( mm_reader.MMReadFormat( filePath, read_explicit_zeroes ) ) return 2; - int m = mm_reader.GetNumRows( ); - int n = mm_reader.GetNumCols( ); - int nnz = mm_reader.GetNumNonZeroes( ); + clsparseIdx_t m = mm_reader.GetNumRows( ); + clsparseIdx_t n = mm_reader.GetNumCols( ); + clsparseIdx_t nnz = mm_reader.GetNumNonZeroes( ); row_indices.clear( ); col_indices.clear( ); @@ -441,7 +462,7 @@ cooMatrixfromFile( std::vector< int >& row_indices, std::vector< int >& col_indi values.reserve( nnz ); Coordinate< T >* coords = mm_reader.GetUnsymCoordinates( ); - for( int c = 0; c < nnz; ++c ) + for( clsparseIdx_t c = 0; c < nnz; ++c ) { row_indices.push_back( coords[ c ].x ); col_indices.push_back( coords[ c ].y ); @@ -452,15 +473,15 @@ cooMatrixfromFile( std::vector< int >& row_indices, std::vector< int >& col_indi } // Explicit template instantiations for float, double -template int cooMatrixfromFile<>( std::vector< int >& row_indices, std::vector< int >& col_indices, +template int cooMatrixfromFile<>( std::vector< clsparseIdx_t >& row_indices, std::vector< clsparseIdx_t >& col_indices, std::vector< float >& values, const char* filePath, bool read_explicit_zeroes ); -template int cooMatrixfromFile<>( std::vector< int >& row_indices, std::vector< int >& col_indices, +template int cooMatrixfromFile<>( std::vector< clsparseIdx_t >& row_indices, std::vector< clsparseIdx_t >& col_indices, std::vector< double >& values, const char* filePath, bool read_explicit_zeroes ); // This function reads the file at the given filepath, and returns the sparse // matrix in the CSR struct. template< typename T > int -csrMatrixfromFile( std::vector< int >& row_offsets, std::vector< int >& col_indices, +csrMatrixfromFile( std::vector< clsparseIdx_t >& row_offsets, std::vector< clsparseIdx_t >& col_indices, std::vector< T >& values, const char* filePath, bool read_explicit_zeroes ) { // Check that the file format is matrix market; the only format we can read right now @@ -480,9 +501,9 @@ std::vector< T >& values, const char* filePath, bool read_explicit_zeroes ) if( mm_reader.MMReadFormat( filePath, read_explicit_zeroes ) ) return 2; - int m = mm_reader.GetNumRows( ); - int n = mm_reader.GetNumCols( ); - int nnz = mm_reader.GetNumNonZeroes( ); + clsparseIdx_t m = mm_reader.GetNumRows( ); + clsparseIdx_t n = mm_reader.GetNumCols( ); + clsparseIdx_t nnz = mm_reader.GetNumNonZeroes( ); row_offsets.clear( ); col_indices.clear( ); @@ -495,9 +516,9 @@ std::vector< T >& values, const char* filePath, bool read_explicit_zeroes ) std::sort( coords, coords + nnz, CoordinateCompare< T > ); - int current_row = 1; + clsparseIdx_t current_row = 1; row_offsets.push_back( 0 ); - for( int i = 0; i < nnz; i++ ) + for( clsparseIdx_t i = 0; i < nnz; i++ ) { col_indices.push_back( coords[ i ].y ); values.push_back( coords[ i ].val ); @@ -514,14 +535,14 @@ std::vector< T >& values, const char* filePath, bool read_explicit_zeroes ) }// end // Explicit template instantiations for float, double -template int csrMatrixfromFile<>( std::vector< int >& row_offsets, std::vector< int >& col_indices, +template int csrMatrixfromFile<>( std::vector< clsparseIdx_t >& row_offsets, std::vector< clsparseIdx_t >& col_indices, std::vector< float >& values, const char* filePath, bool read_explicit_zeroes ); -template int csrMatrixfromFile<>( std::vector< int >& row_offsets, std::vector< int >& col_indices, +template int csrMatrixfromFile<>( std::vector< clsparseIdx_t >& row_offsets, std::vector< clsparseIdx_t >& col_indices, std::vector< double >& values, const char* filePath, bool read_explicit_zeroes ); // This function reads the file header at the given filepath, and gets the // matrix dimensions -int sparseHeaderfromFile(int* nnz, int* rows, int* cols, const char* filePath) +int sparseHeaderfromFile(clsparseIdx_t* nnz, clsparseIdx_t* rows, clsparseIdx_t* cols, const char* filePath) { std::string strPath(filePath); if (strPath.find_last_of(".") != std::string::npos) From 49b047ba1a344ead5a7929f05ceb7c2c01ce3ab8 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 18 Dec 2015 16:58:57 -0600 Subject: [PATCH 11/19] Initial work on refactoring meta information hiding, using a pimpl-like idiom in the C-struct to hide private data --- src/include/clSPARSE-1x.h | 7 ++-- src/include/clSPARSE-2x.h | 6 ++- src/include/clSPARSE.h | 12 ++++++ src/library/blas2/clsparse-csrmv.hpp | 14 ++++--- src/library/blas2/csrmv-adaptive.hpp | 12 ++++-- src/library/include/clSPARSE-1x.hpp | 35 ++++++++++++---- src/library/internal/data-types/csr-meta.cpp | 40 +++++++++++++++---- src/tests/resources/csr_matrix_environment.h | 15 +++---- .../resources/sparse_matrix_environment.h | 3 -- src/tests/test-blas2.cpp | 23 +---------- 10 files changed, 103 insertions(+), 64 deletions(-) diff --git a/src/include/clSPARSE-1x.h b/src/include/clSPARSE-1x.h index cbf89b0..09cc14d 100644 --- a/src/include/clSPARSE-1x.h +++ b/src/include/clSPARSE-1x.h @@ -71,7 +71,6 @@ typedef struct clsparseCsrMatrix_ cl_mem values; /*!< non-zero values in sparse matrix of size num_nonzeros */ cl_mem colIndices; /*!< column index for corresponding value of size num_nonzeros */ cl_mem rowOffsets; /*!< Invariant: rowOffsets[i+1]-rowOffsets[i] = number of values in row i */ - cl_mem rowBlocks; /*!< Meta-data used for csr-adaptive algorithm; can be NULL */ /**@}*/ /** @name Buffer offsets */ @@ -82,10 +81,12 @@ typedef struct clsparseCsrMatrix_ clsparseIdx_t offValues; clsparseIdx_t offColInd; clsparseIdx_t offRowOff; - clsparseIdx_t offRowBlocks; /**@}*/ - size_t rowBlockSize; /*!< Size of array used by the rowBlocks handle */ + /*! Pointer to a private structure that contains meta-information the library keeps on a + csr-encoded sparse matrix + */ + void* meta; } clsparseCsrMatrix; /*! \brief Structure to encapsulate sparse matrix data encoded in COO diff --git a/src/include/clSPARSE-2x.h b/src/include/clSPARSE-2x.h index 5d905bb..b1e77ad 100644 --- a/src/include/clSPARSE-2x.h +++ b/src/include/clSPARSE-2x.h @@ -60,10 +60,12 @@ typedef struct clsparseCsrMatrix_ void* values; /*!< non-zero values in sparse matrix of size num_nonzeros */ void* colIndices; /*!< column index for corresponding value of size num_nonzeros */ void* rowOffsets; /*!< Invariant: rowOffsets[i+1]-rowOffsets[i] = number of values in row i */ - void* rowBlocks; /*!< Meta-data used for csr-adaptive algorithm; can be NULL */ /**@}*/ - size_t rowBlockSize; /*!< Size of array used by the rowBlocks pointer */ + /*! Pointer to a private structure that contains meta-information the library keeps on a + csr-encoded sparse matrix + */ + void* meta; } clsparseCsrMatrix; /*! \brief Structure to encapsulate sparse matrix data encoded in COO diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index b47906d..6c21655 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -630,6 +630,18 @@ extern "C" { */ CLSPARSE_EXPORT clsparseStatus clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ); + + /*! + * \brief Delete meta data associated with a CSR encoded matrix + * \details Meta data for a sparse matrix may occupy device memory, and this informs the library to release it + * \param[in,out] csrMatx The CSR sparse structure that represents the matrix in device memory + * \note This function assumes that the memory for rowBlocks has already been allocated by client program + * + * \ingroup FILE + */ + CLSPARSE_EXPORT clsparseStatus + clsparseCsrMetaDelete( clsparseCsrMatrix* csrMatx ); + /**@}*/ /*! diff --git a/src/library/blas2/clsparse-csrmv.hpp b/src/library/blas2/clsparse-csrmv.hpp index 17dbd11..82c810f 100644 --- a/src/library/blas2/clsparse-csrmv.hpp +++ b/src/library/blas2/clsparse-csrmv.hpp @@ -33,15 +33,16 @@ csrmv (const clsparseScalarPrivate *pAlpha, cldenseVectorPrivate *pY, clsparseControl control) { - if( (pCsrMatx->rowBlocks == nullptr) && (pCsrMatx->rowBlockSize == 0) ) + if( pCsrMatx->meta == nullptr ) { return csrmv_vector(pAlpha, pCsrMatx, pX, pBeta, pY, control); } else { - if( ( pCsrMatx->rowBlocks == nullptr ) || ( pCsrMatx->rowBlockSize == 0 ) ) + const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); + if( meta_ptr->rowBlockSize == 0 ) { - // rowBlockSize varible is not zero but no pointer + // rowBlockSize variable is not zero but no pointer return clsparseStructInvalid; } @@ -67,15 +68,16 @@ csrmv (const clsparse::array_base& pAlpha, clsparse::array_base& pY, clsparseControl control) { - if( (pCsrMatx->rowBlocks == nullptr) && (pCsrMatx->rowBlockSize == 0) ) + if( pCsrMatx->meta == nullptr ) { return csrmv_vector(pAlpha, pCsrMatx, pX, pBeta, pY, control); } else { - if( ( pCsrMatx->rowBlocks == nullptr ) || ( pCsrMatx->rowBlockSize == 0 ) ) + const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); + if( meta_ptr->rowBlockSize == 0 ) { - // rowBlockSize varible is not zero but no pointer + // rowBlockSize variable is not zero but no pointer return clsparseStructInvalid; } diff --git a/src/library/blas2/csrmv-adaptive.hpp b/src/library/blas2/csrmv-adaptive.hpp index 636f712..9c234bb 100644 --- a/src/library/blas2/csrmv-adaptive.hpp +++ b/src/library/blas2/csrmv-adaptive.hpp @@ -88,10 +88,12 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, KernelWrap kWrapper( kernel ); + const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); + kWrapper << pCsrMatx->values << pCsrMatx->colIndices << pCsrMatx->rowOffsets << pX->values << pY->values - << pCsrMatx->rowBlocks + << meta_ptr->rowBlocks << pAlpha->value << pBeta->value; //<< h_alpha << h_beta; @@ -101,7 +103,7 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, // Setting global work size to half the row block size because we are only // using half the row blocks buffer for actual work. // The other half is used for the extended precision reduction. - clsparseIdx_t global_work_size = ((pCsrMatx->rowBlockSize / 2) - 1) * group_size; + clsparseIdx_t global_work_size = ( ( meta_ptr->rowBlockSize/2) - 1 ) * group_size; cl::NDRange local( group_size ); cl::NDRange global( global_work_size > local[ 0 ] ? global_work_size : local[ 0 ] ); @@ -182,10 +184,12 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, KernelWrap kWrapper( kernel ); + const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); + kWrapper << pCsrMatx->values << pCsrMatx->colIndices << pCsrMatx->rowOffsets << pX.data() << pY.data() - << pCsrMatx->rowBlocks + << meta_ptr->rowBlocks << pAlpha.data() << pBeta.data(); //<< h_alpha << h_beta; @@ -195,7 +199,7 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, // Setting global work size to half the row block size because we are only // using half the row blocks buffer for actual work. // The other half is used for the extended precision reduction. - clsparseIdx_t global_work_size = ((pCsrMatx->rowBlockSize / 2) - 1) * group_size; + clsparseIdx_t global_work_size = ( ( meta_ptr->rowBlockSize/2) - 1 ) * group_size; cl::NDRange local( group_size ); cl::NDRange global( global_work_size > local[ 0 ] ? global_work_size : local[ 0 ] ); diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index 88e4b59..50acfef 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -41,6 +41,31 @@ // return buf; //} +// Structure to encapsulate the meta data for a sparse matrix +struct matrix_meta +{ + matrix_meta( ) : rowBlockSize( 0 ), offRowBlocks( 0 ) + { + } + + ~matrix_meta( ) + { + std::cout << "matrix_meta destructor" << std::endl; + } + + void clear( ) + { + offRowBlocks = rowBlockSize = 0; + rowBlocks = ::cl::Buffer( ); + } + + ::cl::Buffer rowBlocks; /*!< Meta-data used for csr-adaptive algorithm; can be NULL */ + + clsparseIdx_t rowBlockSize; /*!< Size of array used by the rowBlocks handle */ + + clsparseIdx_t offRowBlocks; +}; + template< typename pType > class clMemRAII { @@ -192,8 +217,8 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix void clear( ) { num_rows = num_cols = num_nonzeros = 0; - values = colIndices = rowOffsets = rowBlocks = nullptr; - offValues = offColInd = offRowOff = offRowBlocks = rowBlockSize = 0; + values = colIndices = rowOffsets = nullptr; + offValues = offColInd = offRowOff = 0; } clsparseIdx_t nnz_per_row() const @@ -215,12 +240,6 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix { return offRowOff; } - - clsparseIdx_t rowBlocksOffset() const - { - return offRowBlocks; - } - }; class clsparseCooMatrixPrivate: public clsparseCooMatrix diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index fa3dc10..0ff84ba 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -20,15 +20,15 @@ #include "include/clSPARSE-private.hpp" #include "internal/clsparse-control.hpp" - clsparseStatus clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); + matrix_meta* meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); - pCsrMatx->rowBlockSize = pCsrMatx->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); + meta_ptr->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); return clsparseSuccess; } @@ -37,6 +37,7 @@ clsparseStatus clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); + matrix_meta* meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); // Check to ensure nRows can fit in 32 bits if( static_cast( pCsrMatx->num_rows ) > static_cast( pow( 2, ( 64 - ROW_BITS ) ) ) ) @@ -45,13 +46,38 @@ clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) return clsparseOutOfResources; } - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + if( meta_ptr == nullptr ) + { + pCsrMatx->meta = new matrix_meta; + clsparseCsrMetaSize( csrMatx, control ); + } + + if( pCsrMatx->meta ) + { + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + + cl_ulong* ulCsrRowBlocks = static_cast< cl_ulong* >( control->queue.enqueueMapBuffer( meta_ptr->rowBlocks, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, meta_ptr->offRowBlocks, meta_ptr->rowBlockSize ) ); + + ComputeRowBlocks( ulCsrRowBlocks, meta_ptr->rowBlockSize, rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR, true ); + control->queue.enqueueUnmapMemObject( meta_ptr->rowBlocks, ulCsrRowBlocks ); + } + return clsparseSuccess; +} - clMemRAII< cl_ulong > rRowBlocks( control->queue( ), pCsrMatx->rowBlocks ); - cl_ulong* ulCsrRowBlocks = rRowBlocks.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowBlocksOffset( ), pCsrMatx->rowBlockSize ); +clsparseStatus +clsparseCsrMetaDelete( clsparseCsrMatrix* csrMatx ) +{ + clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); + + if( pCsrMatx->meta == nullptr ) + { + return clsparseSuccess; + } - ComputeRowBlocks( ulCsrRowBlocks, pCsrMatx->rowBlockSize, rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR, true ); + matrix_meta* meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); + delete meta_ptr; + pCsrMatx->meta = nullptr; return clsparseSuccess; } diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index ba81810..40221c5 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -77,9 +77,6 @@ class CSREnvironment: public ::testing::Environment if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + file_name ); - clsparseCsrMetaSize( &csrDMatrix, CLSE::control ); - csrDMatrix.rowBlocks = ::clCreateBuffer( context, CL_MEM_READ_WRITE, - csrDMatrix.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); clsparseCsrMetaCompute( &csrDMatrix, CLSE::control ); @@ -124,11 +121,11 @@ class CSREnvironment: public ::testing::Environment csrSMatrix.num_nonzeros = csrDMatrix.num_nonzeros; csrSMatrix.num_cols = csrDMatrix.num_cols; csrSMatrix.num_rows = csrDMatrix.num_rows; - csrSMatrix.rowBlockSize = csrDMatrix.rowBlockSize; // Don't use adaptive kernel in double precision yet. - csrSMatrix.rowBlocks = csrDMatrix.rowBlocks; - ::clRetainMemObject( csrSMatrix.rowBlocks ); + //csrSMatrix.meta = csrDMatrix.meta; + //::clRetainMemObject( csrSMatrix.rowBlocks ); + clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); csrSMatrix.colIndices = csrDMatrix.colIndices; ::clRetainMemObject( csrSMatrix.colIndices ); @@ -139,8 +136,8 @@ class CSREnvironment: public ::testing::Environment csrSMatrix.values = ::clCreateBuffer( context, CL_MEM_READ_ONLY, csrSMatrix.num_nonzeros * sizeof( cl_float ), NULL, &status ); - cl_int cl_status; - cl_double* dvals = (cl_double*) ::clEnqueueMapBuffer(queue, csrDMatrix.values, CL_TRUE, CL_MAP_READ, 0, csrDMatrix.num_nonzeros * sizeof(cl_double), 0, nullptr, nullptr, &cl_status); + //cl_int cl_status; + //cl_double* dvals = (cl_double*) ::clEnqueueMapBuffer(queue, csrDMatrix.values, CL_TRUE, CL_MAP_READ, 0, csrDMatrix.num_nonzeros * sizeof(cl_double), 0, nullptr, nullptr, &cl_status); // copy the double-precision values over into the single-precision array. for (clsparseIdx_t i = 0; i < ublasDCsr.value_data().size(); i++) @@ -194,11 +191,9 @@ class CSREnvironment: public ::testing::Environment ::clReleaseMemObject( csrSMatrix.values ); ::clReleaseMemObject( csrSMatrix.colIndices ); ::clReleaseMemObject( csrSMatrix.rowOffsets ); - ::clReleaseMemObject( csrSMatrix.rowBlocks ); ::clReleaseMemObject( csrDMatrix.values ); ::clReleaseMemObject( csrDMatrix.colIndices ); ::clReleaseMemObject( csrDMatrix.rowOffsets ); - ::clReleaseMemObject( csrDMatrix.rowBlocks ); //bring csrSMatrix csrDMatrix to its initial state clsparseInitCsrMatrix( &csrSMatrix ); diff --git a/src/tests/resources/sparse_matrix_environment.h b/src/tests/resources/sparse_matrix_environment.h index 0a64dcf..c0392d4 100644 --- a/src/tests/resources/sparse_matrix_environment.h +++ b/src/tests/resources/sparse_matrix_environment.h @@ -74,8 +74,6 @@ class CSRSparseEnvironment : public ::testing::Environment { throw std::runtime_error("Could not read matrix market data from disk"); clsparseCsrMetaSize(&csrSMatrix, CLSE::control); - csrSMatrix.rowBlocks = ::clCreateBuffer( context, CL_MEM_READ_WRITE, - csrSMatrix.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); // Download sparse matrix data to host @@ -277,7 +275,6 @@ class CSRSparseEnvironment : public ::testing::Environment { ::clReleaseMemObject(csrSMatrix.values); ::clReleaseMemObject(csrSMatrix.colIndices); ::clReleaseMemObject(csrSMatrix.rowOffsets); - ::clReleaseMemObject(csrSMatrix.rowBlocks); //bring csrSMatrix to its initial state clsparseInitCsrMatrix(&csrSMatrix); diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index f99fe2a..80d387d 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -350,17 +350,8 @@ TYPED_TEST(Blas2, csrmv_vector) // To call csrmv vector we need to artificially get rid of the rowBlocks data using CSRE = CSREnvironment; - cl_int cl_status; - cl_status = clReleaseMemObject(CSRE::csrSMatrix.rowBlocks); - ASSERT_EQ(CL_SUCCESS, cl_status); - CSRE::csrSMatrix.rowBlocks = nullptr; - - cl_status = clReleaseMemObject(CSRE::csrDMatrix.rowBlocks); - ASSERT_EQ(CL_SUCCESS, cl_status); - CSRE::csrDMatrix.rowBlocks = nullptr; - - CSRE::csrSMatrix.rowBlockSize = 0; - CSRE::csrDMatrix.rowBlockSize = 0; + clsparseCsrMetaDelete( &CSRE::csrSMatrix ); + clsparseCsrMetaDelete( &CSRE::csrDMatrix ); this->test_csrmv(); @@ -374,16 +365,6 @@ TYPED_TEST(Blas2, csrmv_vector) status = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control ); ASSERT_EQ(clsparseSuccess, status); - CSRE::csrSMatrix.rowBlocks = - ::clCreateBuffer( CLSE::context, CL_MEM_READ_WRITE, - CSRE::csrSMatrix.rowBlockSize * sizeof( cl_ulong ), - NULL, &cl_status ); - - ASSERT_EQ(CL_SUCCESS, cl_status); - - CSRE::csrDMatrix.rowBlocks = CSRE::csrSMatrix.rowBlocks; - ::clRetainMemObject( CSRE::csrDMatrix.rowBlocks ); - status = clsparseCsrMetaCompute(&CSRE::csrSMatrix, CLSE::control ); ASSERT_EQ (clsparseSuccess, status); status = clsparseCsrMetaCompute(&CSRE::csrDMatrix, CLSE::control ); From 844bf2696808b029d04bbca9aa126fb0b2703192 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 18 Dec 2015 17:23:10 -0600 Subject: [PATCH 12/19] Added an output parameter to clsparseCsrMetaSize() clsparseCsrMetaCompute computes the metasize itself, making the clsparseCsrMetaSize() an optional user call Refactored test and benchmark code to conform. --- .../functions/clfunc-xSpMdM.hpp | 5 +--- .../functions/clfunc_xBiCGStab.hpp | 8 ++---- .../clsparse-bench/functions/clfunc_xCG.hpp | 6 +--- .../functions/clfunc_xCsr2Coo.hpp | 9 ++---- .../functions/clfunc_xCsr2Dense.hpp | 8 ++---- .../functions/clfunc_xSpMdV.hpp | 6 +--- src/include/clSPARSE.h | 2 +- src/library/include/clSPARSE-1x.hpp | 7 +---- src/library/internal/data-types/csr-meta.cpp | 28 +++++++++++++------ src/tests/resources/csr_matrix_environment.h | 8 ++---- .../resources/sparse_matrix_environment.h | 6 ++-- src/tests/test-blas2.cpp | 13 +++++---- 12 files changed, 43 insertions(+), 63 deletions(-) diff --git a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp index 4da9a74..d173e6c 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp @@ -129,9 +129,6 @@ class xSpMdM: public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile); - clsparseCsrMetaSize( &csrMtx, control ); - csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute( &csrMtx, control ); // Initialize the dense B & C matrices that we multiply against the sparse matrix @@ -216,10 +213,10 @@ class xSpMdM: public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor + clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" ); CLSPARSE_V( ::clReleaseMemObject( denseB.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( denseC.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index b3a51b4..2c6bdab 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -111,7 +111,7 @@ class xBiCGStab : public clsparseFunc if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile ); - // Now initialise a CSR matrix from the COO matrix + // Now initialize a CSR matrix from the COO matrix clsparseInitCsrMatrix( &csrMtx ); csrMtx.num_nonzeros = nnz; csrMtx.num_rows = row; @@ -140,10 +140,6 @@ class xBiCGStab : public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaSize( &csrMtx, control ); - csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix @@ -216,10 +212,10 @@ class xBiCGStab : public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor + clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index 8e78b01..8317142 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -139,10 +139,6 @@ class xCG : public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaSize( &csrMtx, control ); - csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix @@ -215,10 +211,10 @@ class xCG : public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor + clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp index b6580e5..5a95cc9 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp @@ -141,12 +141,7 @@ class xCsr2Coo : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); -#if 0 // Not needed - clsparseCsrMetaSize(&csrMtx, control); - csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_WRITE, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status ); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute(&csrMtx, control); -#endif // Initialize the output coo matrix clsparseInitCooMatrix(&cooMtx); @@ -240,10 +235,10 @@ class xCsr2Coo : public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor - CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); + clsparseCsrMetaDelete( &csrMtx ); + CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); - //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(cooMtx.values), "clReleaseMemObject cooMtx.values"); CLSPARSE_V(::clReleaseMemObject(cooMtx.colIndices), "clReleaseMemObject cooMtx.colIndices"); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp index 61073e8..89e9e1f 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp @@ -139,12 +139,8 @@ class xCsr2Dense : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); -#if 0 // Not Required - clsparseCsrMetaSize(&csrMtx, control); - csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_WRITE, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); + clsparseCsrMetaCompute(&csrMtx, control); -#endif // Initialize the output dense matrix cldenseInitMatrix(&denseMtx); @@ -211,10 +207,10 @@ class xCsr2Dense : public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor + clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); - //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(denseMtx.values), "clReleaseMemObject denseMtx.values"); } diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp index cd22058..b6fc548 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp @@ -139,10 +139,6 @@ class xSpMdV: public clsparseFunc if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaSize( &csrMtx, control ); - csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, - csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status ); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" ); clsparseCsrMetaCompute( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix @@ -222,10 +218,10 @@ class xSpMdV: public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor + clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index 6c21655..0511823 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -614,7 +614,7 @@ extern "C" { * \ingroup FILE */ CLSPARSE_EXPORT clsparseStatus - clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ); + clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, size_t* metaSize ); /*! * \brief Calculate the meta-data for csr-adaptive SpM-dV algorithm diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index 50acfef..3ede02b 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -74,12 +74,7 @@ class clMemRAII pType* clMem; public: - -// clMemRAII() : clQueue(nullptr), clBuff(nullptr), clMem(nullptr) -// { - -// } - + clMemRAII( const cl_command_queue cl_queue, const cl_mem cl_buff, const size_t cl_size = 0, const cl_mem_flags cl_flags = CL_MEM_READ_WRITE) : clMem( nullptr ) diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index 0ff84ba..9ace14a 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -21,14 +21,20 @@ #include "internal/clsparse-control.hpp" clsparseStatus -clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ) +clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, size_t* metaSize ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); - matrix_meta* meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); + + if( csrMatx->meta ) + { + *metaSize = static_cast< matrix_meta* >( pCsrMatx->meta )->rowBlockSize; + + return clsparseSuccess; + } clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); - meta_ptr->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); + *metaSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); return clsparseSuccess; } @@ -37,7 +43,6 @@ clsparseStatus clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); - matrix_meta* meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); // Check to ensure nRows can fit in 32 bits if( static_cast( pCsrMatx->num_rows ) > static_cast( pow( 2, ( 64 - ROW_BITS ) ) ) ) @@ -46,16 +51,21 @@ clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) return clsparseOutOfResources; } - if( meta_ptr == nullptr ) + clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + cl_int* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + + matrix_meta* meta_ptr = nullptr; + if( pCsrMatx->meta == nullptr ) { - pCsrMatx->meta = new matrix_meta; - clsparseCsrMetaSize( csrMatx, control ); + meta_ptr = new matrix_meta; + meta_ptr->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); } + else + meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); if( pCsrMatx->meta ) { - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + meta_ptr->rowBlocks = ::cl::Buffer( control->getContext( ), CL_MEM_READ_WRITE, meta_ptr->rowBlockSize * sizeof( cl_ulong ) ); cl_ulong* ulCsrRowBlocks = static_cast< cl_ulong* >( control->queue.enqueueMapBuffer( meta_ptr->rowBlocks, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, meta_ptr->offRowBlocks, meta_ptr->rowBlockSize ) ); diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index 40221c5..d448194 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -122,17 +122,15 @@ class CSREnvironment: public ::testing::Environment csrSMatrix.num_cols = csrDMatrix.num_cols; csrSMatrix.num_rows = csrDMatrix.num_rows; - // Don't use adaptive kernel in double precision yet. - //csrSMatrix.meta = csrDMatrix.meta; - //::clRetainMemObject( csrSMatrix.rowBlocks ); - clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); - csrSMatrix.colIndices = csrDMatrix.colIndices; ::clRetainMemObject( csrSMatrix.colIndices ); csrSMatrix.rowOffsets = csrDMatrix.rowOffsets; ::clRetainMemObject( csrSMatrix.rowOffsets ); + // Don't use adaptive kernel in double precision yet. + clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); + csrSMatrix.values = ::clCreateBuffer( context, CL_MEM_READ_ONLY, csrSMatrix.num_nonzeros * sizeof( cl_float ), NULL, &status ); diff --git a/src/tests/resources/sparse_matrix_environment.h b/src/tests/resources/sparse_matrix_environment.h index c0392d4..b177f9f 100644 --- a/src/tests/resources/sparse_matrix_environment.h +++ b/src/tests/resources/sparse_matrix_environment.h @@ -73,7 +73,6 @@ class CSRSparseEnvironment : public ::testing::Environment { if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk"); - clsparseCsrMetaSize(&csrSMatrix, CLSE::control); clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); // Download sparse matrix data to host @@ -183,7 +182,8 @@ class CSRSparseEnvironment : public ::testing::Environment { csrSMatrixA.num_nonzeros = nnzA; csrSMatrixA.num_rows = m; csrSMatrixA.num_cols = k; - clsparseCsrMetaSize(&csrSMatrixA, CLSE::control); + size_t metaSize; + clsparseCsrMetaSize(&csrSMatrixA, CLSE::control, &metaSize ); // Load single precision data from file; this API loads straight into GPU memory csrSMatrixA.values = ::clCreateBuffer(context, CL_MEM_READ_ONLY, @@ -216,7 +216,7 @@ class CSRSparseEnvironment : public ::testing::Environment { csrSMatrixB.num_nonzeros = nnzB; csrSMatrixB.num_rows = k; csrSMatrixB.num_cols = n; - clsparseCsrMetaSize(&csrSMatrixB, CLSE::control); + clsparseCsrMetaSize(&csrSMatrixB, CLSE::control, &metaSize ); // Load single precision data from file; this API loads straight into GPU memory csrSMatrixB.values = ::clCreateBuffer(context, CL_MEM_READ_ONLY, diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index 80d387d..e13abc6 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -109,17 +109,17 @@ class Blas2 : public ::testing::Test } // Knuth's Two-Sum algorithm, which allows us to add together two floating - // point numbers and exactly tranform the answer into a sum and a + // point numbers and exactly transform the answer into a sum and a // rounding error. - // Inputs: x and y, the two inputs to be aded together. + // Inputs: x and y, the two inputs to be added together. // In/Out: *sumk_err, which is incremented (by reference) -- holds the // error value as a result of the 2sum calculation. // Returns: The non-corrected sum of inputs x and y. T two_sum(T x, T y, T *sumk_err) { // We use this 2Sum algorithm to perform a compensated summation, - // which can reduce the cummulative rounding errors in our SpMV - // summation. Our compensated sumation is based on the SumK algorithm + // which can reduce the cumulative rounding errors in our SpMV + // summation. Our compensated summation is based on the SumK algorithm // (with K==2) from Ogita, Rump, and Oishi, "Accurate Sum and Dot // Product" in SIAM J. on Scientific Computing 26(6) pp 1955-1988, // Jun. 2005. @@ -359,10 +359,11 @@ TYPED_TEST(Blas2, csrmv_vector) // later use. clsparseStatus status; - status = clsparseCsrMetaSize( &CSRE::csrSMatrix, CLSE::control ); + size_t metaSize; + status = clsparseCsrMetaSize( &CSRE::csrSMatrix, CLSE::control, &metaSize ); ASSERT_EQ(clsparseSuccess, status); - status = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control ); + status = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control, &metaSize ); ASSERT_EQ(clsparseSuccess, status); status = clsparseCsrMetaCompute(&CSRE::csrSMatrix, CLSE::control ); From 67ce57cb6db94bebee50336227613950940a0759 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 18 Dec 2015 18:03:11 -0600 Subject: [PATCH 13/19] Rename clsparseCsrMetaCompute to clsparseCsrMetaCreate to make the name pair with clsparseCsrMetaDelete --- src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp | 2 +- src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp | 2 +- src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp | 2 +- src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp | 2 +- src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp | 2 +- src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp | 2 +- src/include/clSPARSE.h | 2 +- src/library/internal/data-types/csr-meta.cpp | 2 +- src/tests/resources/csr_matrix_environment.h | 4 ++-- src/tests/resources/sparse_matrix_environment.h | 2 +- src/tests/test-blas2.cpp | 4 ++-- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp index d173e6c..6da18bc 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp @@ -129,7 +129,7 @@ class xSpMdM: public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile); - clsparseCsrMetaCompute( &csrMtx, control ); + clsparseCsrMetaCreate( &csrMtx, control ); // Initialize the dense B & C matrices that we multiply against the sparse matrix // We are shaping B, such that no matter what shape A is, C will result in a square matrix diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index 2c6bdab..cdbdf88 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -140,7 +140,7 @@ class xBiCGStab : public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaCompute( &csrMtx, control ); + clsparseCsrMetaCreate( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix clsparseInitVector( &x ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index 8317142..c574666 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -139,7 +139,7 @@ class xCG : public clsparseFunc if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaCompute( &csrMtx, control ); + clsparseCsrMetaCreate( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix clsparseInitVector( &x ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp index 5a95cc9..294cee2 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp @@ -141,7 +141,7 @@ class xCsr2Coo : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); - clsparseCsrMetaCompute(&csrMtx, control); + clsparseCsrMetaCreate(&csrMtx, control); // Initialize the output coo matrix clsparseInitCooMatrix(&cooMtx); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp index 89e9e1f..95049f6 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp @@ -140,7 +140,7 @@ class xCsr2Dense : public clsparseFunc if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile); - clsparseCsrMetaCompute(&csrMtx, control); + clsparseCsrMetaCreate(&csrMtx, control); // Initialize the output dense matrix cldenseInitMatrix(&denseMtx); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp index b6fc548..a4b7564 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp @@ -139,7 +139,7 @@ class xSpMdV: public clsparseFunc if( fileError != clsparseSuccess ) throw clsparse::io_exception( "Could not read matrix market data from disk: " + sparseFile ); - clsparseCsrMetaCompute( &csrMtx, control ); + clsparseCsrMetaCreate( &csrMtx, control ); // Initialize the dense X & Y vectors that we multiply against the sparse matrix clsparseInitVector( &x ); diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index 0511823..f3e274b 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -629,7 +629,7 @@ extern "C" { * \ingroup FILE */ CLSPARSE_EXPORT clsparseStatus - clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ); + clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ); /*! * \brief Delete meta data associated with a CSR encoded matrix diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index 9ace14a..d04ec0a 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -40,7 +40,7 @@ clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, size_t } clsparseStatus -clsparseCsrMetaCompute( clsparseCsrMatrix* csrMatx, clsparseControl control ) +clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index d448194..f989e9e 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -77,7 +77,7 @@ class CSREnvironment: public ::testing::Environment if( fileError != clsparseSuccess ) throw std::runtime_error( "Could not read matrix market data from disk: " + file_name ); - clsparseCsrMetaCompute( &csrDMatrix, CLSE::control ); + clsparseCsrMetaCreate( &csrDMatrix, CLSE::control ); //reassign the new matrix dimensions calculated clsparseCCsrMatrixFromFile to global variables @@ -129,7 +129,7 @@ class CSREnvironment: public ::testing::Environment ::clRetainMemObject( csrSMatrix.rowOffsets ); // Don't use adaptive kernel in double precision yet. - clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); + clsparseCsrMetaCreate( &csrSMatrix, CLSE::control ); csrSMatrix.values = ::clCreateBuffer( context, CL_MEM_READ_ONLY, csrSMatrix.num_nonzeros * sizeof( cl_float ), NULL, &status ); diff --git a/src/tests/resources/sparse_matrix_environment.h b/src/tests/resources/sparse_matrix_environment.h index b177f9f..be8fa80 100644 --- a/src/tests/resources/sparse_matrix_environment.h +++ b/src/tests/resources/sparse_matrix_environment.h @@ -73,7 +73,7 @@ class CSRSparseEnvironment : public ::testing::Environment { if (fileError != clsparseSuccess) throw std::runtime_error("Could not read matrix market data from disk"); - clsparseCsrMetaCompute( &csrSMatrix, CLSE::control ); + clsparseCsrMetaCreate( &csrSMatrix, CLSE::control ); // Download sparse matrix data to host // First, create space on host to hold the data diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index e13abc6..5e7604e 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -366,9 +366,9 @@ TYPED_TEST(Blas2, csrmv_vector) status = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control, &metaSize ); ASSERT_EQ(clsparseSuccess, status); - status = clsparseCsrMetaCompute(&CSRE::csrSMatrix, CLSE::control ); + status = clsparseCsrMetaCreate(&CSRE::csrSMatrix, CLSE::control ); ASSERT_EQ (clsparseSuccess, status); - status = clsparseCsrMetaCompute(&CSRE::csrDMatrix, CLSE::control ); + status = clsparseCsrMetaCreate(&CSRE::csrDMatrix, CLSE::control ); ASSERT_EQ (clsparseSuccess, status); } From f334698d6bbcc394c85f64ececcf7b6588125ae1 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Mon, 21 Dec 2015 15:52:18 -0600 Subject: [PATCH 14/19] For functions that have out parameters, following CppCoreGuidelines and changing their signature to return values intead of output parameters. This is in accordance to F.20 & F.21 https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md Samples changed to conform to new API. --- samples/sample-axpy.cpp | 15 ++--- samples/sample-cg.cpp | 46 +++++--------- samples/sample-norm1-c.c | 20 +++--- samples/sample-spmv.cpp | 30 +++------ .../functions/clfunc_common.hpp | 3 +- .../functions/clfunc_xBiCGStab.hpp | 5 +- .../clsparse-bench/functions/clfunc_xCG.hpp | 3 +- src/include/clSPARSE.h | 56 ++++++++++------- src/library/include/clSPARSE-1x.hpp | 8 +-- src/library/internal/clsparse-control.cpp | 62 +++++++++---------- src/library/internal/data-types/csr-meta.cpp | 17 +++-- src/library/solvers/solver-control.cpp | 27 ++++---- src/tests/resources/clsparse_environment.h | 3 +- src/tests/test-blas3.cpp | 22 +++---- src/tests/test-clsparse-utils.cpp | 9 +-- src/tests/test-solvers.cpp | 6 +- src/tests/test_interface_c.c | 4 +- 17 files changed, 166 insertions(+), 170 deletions(-) diff --git a/samples/sample-axpy.cpp b/samples/sample-axpy.cpp index e5921c6..a046951 100644 --- a/samples/sample-axpy.cpp +++ b/samples/sample-axpy.cpp @@ -28,7 +28,8 @@ #include #endif -#include +#include "clSPARSE.h" +#include "clSPARSE-error.h" /*! * \brief Sample AXPY (C++) @@ -174,15 +175,11 @@ int main(int argc, char* argv[]) // Create clsparseControl object - clsparseControl control = clsparseCreateControl(queue(), &status); - if (status != CL_SUCCESS) - { - std::cout << "Problem with creating clSPARSE control object" - <<" error [" << status << "]" << std::endl; - } + clsparseCreateResult createResult = clsparseCreateControl( queue( ) ); + CLSPARSE_V( createResult.status, "Failed to create clsparse control" ); /** Step 4. Execute AXPY algorithm **/ - status = cldenseSaxpy(&gpuY, &gpuAlpha, &gpuX, &gpuY, control); + status = cldenseSaxpy(&gpuY, &gpuAlpha, &gpuX, &gpuY, createResult.control ); if (status != clsparseSuccess) { @@ -191,7 +188,7 @@ int main(int argc, char* argv[]) } /** Step 5. Shutdown clSPARSE library & OpenCL **/ - status = clsparseReleaseControl(control); + status = clsparseReleaseControl( createResult.control ); status = clsparseTeardown(); if (status != clsparseSuccess) diff --git a/samples/sample-cg.cpp b/samples/sample-cg.cpp index d0c9590..d0cfedc 100644 --- a/samples/sample-cg.cpp +++ b/samples/sample-cg.cpp @@ -28,7 +28,8 @@ #include #endif -#include +#include "clSPARSE.h" +#include "clSPARSE-error.h" /*! * \brief Sample Conjugate Gradients Solver (CG C++) @@ -155,17 +156,11 @@ int main (int argc, char* argv[]) } - // Create clsparseControl object - clsparseControl control = clsparseCreateControl(queue(), &status); - if (status != CL_SUCCESS) - { - std::cout << "Problem with creating clSPARSE control object" - <<" error [" << status << "]" << std::endl; - return -4; - } - + // Create clSPARSE control object it require queue for kernel execution + clsparseCreateResult createResult = clsparseCreateControl( queue( ) ); + CLSPARSE_V( createResult.status, "Failed to create clsparse control" ); - // Read matrix from file. Calculates the rowBlocks strucutres as well. + // Read matrix from file. Calculates the rowBlocks structures as well. clsparseIdx_t nnz, row, col; // read MM header to get the size of the matrix; clsparseStatus fileError @@ -191,20 +186,14 @@ int main (int argc, char* argv[]) A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); - A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); - // Read matrix market file with explicit zero values included. - fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control, true ); + fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), createResult.control, true ); // This function allocates memory for rowBlocks structure. If not called // the structure will not be calculated and clSPARSE will run the vectorized // version of SpMV instead of adaptive; - clsparseCsrMetaSize( &A, control ); - A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_WRITE, - A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); - clsparseCsrMetaCompute( &A, control ); + clsparseCsrMetaCreate( &A, createResult.control ); if (fileError != clsparseSuccess) { @@ -249,28 +238,27 @@ int main (int argc, char* argv[]) // relative tolerance: 1e-2 // absolute tolerance: 1e-5 // max iters: 1000 - clSParseSolverControl solverControl = - clsparseCreateSolverControl(DIAGONAL, 1000, 1e-2, 1e-5); + clsparseCreateSolverResult solverResult = + clsparseCreateSolverControl( DIAGONAL, 1000, 1e-2, 1e-5 ); + CLSPARSE_V( solverResult.status, "Failed to create clsparse solver control" ); // We can set different print modes of the solver status: // QUIET - print no messages (default) // NORMAL - print summary // VERBOSE - per iteration status; - clsparseSolverPrintMode(solverControl, VERBOSE); + clsparseSolverPrintMode( solverResult.control, VERBOSE); /* TODO: provide various solver statuses for different scenarios - * Solver reached max numer of iterations is not a failure. + * Solver reached max number of iterations is not a failure. */ - status = clsparseScsrcg(&x, &A, &b, solverControl, control); + status = clsparseScsrcg(&x, &A, &b, solverResult.control, createResult.control ); //release solver control structure after finishing execution; - clsparseReleaseSolverControl(solverControl); - - + clsparseReleaseSolverControl( solverResult.control ); /** Step 5. Close & release resources */ - status = clsparseReleaseControl(control); + status = clsparseReleaseControl( createResult.control ); if (status != clsparseSuccess) { std::cout << "Problem with releasing control object." @@ -285,10 +273,10 @@ int main (int argc, char* argv[]) << " Error: " << status << std::endl; } //release mem; + clsparseCsrMetaDelete( &A ); clReleaseMemObject ( A.values ); clReleaseMemObject ( A.colIndices ); clReleaseMemObject ( A.rowOffsets ); - clReleaseMemObject ( A.rowBlocks ); clReleaseMemObject ( x.values ); clReleaseMemObject ( b.values ); diff --git a/samples/sample-norm1-c.c b/samples/sample-norm1-c.c index f87ebe9..cf9cb91 100644 --- a/samples/sample-norm1-c.c +++ b/samples/sample-norm1-c.c @@ -17,7 +17,8 @@ #include #include -#include +#include "clSPARSE.h" +#include "clSPARSE-error.h" /*! \file * \brief Simple demonstration code for calculating Norm1 from within 'C' host code @@ -34,7 +35,7 @@ * 5. Shutdown clSPARSE library & OpenCL * * UNIX Hint: Before allocating more than 3GB of VRAM define GPU_FORCE_64BIT_PTR=1 - * in your system environment to enable 64bit addresing; + * in your system environment to enable 64bit addressing; */ int main( int argc, char* argv[ ] ) { @@ -55,7 +56,7 @@ int main( int argc, char* argv[ ] ) if (num_platforms == 0) { - printf ("No OpenCL platforms found. Exitting.\n"); + printf ("No OpenCL platforms found. Exiting.\n"); return 0; } @@ -67,7 +68,7 @@ int main( int argc, char* argv[ ] ) if (cl_status != CL_SUCCESS) { - printf("Poblem with getting platform IDs. Err: %d\n", cl_status); + printf("Problem with getting platform IDs. Err: %d\n", cl_status); free(platforms); return -1; } @@ -78,7 +79,7 @@ int main( int argc, char* argv[ ] ) if (num_devices == 0) { - printf("No OpenCL GPU devices found on platform 0. Exitting\n"); + printf("No OpenCL GPU devices found on platform 0. Exiting\n"); free(platforms); return -2; } @@ -91,7 +92,7 @@ int main( int argc, char* argv[ ] ) if (cl_status != CL_SUCCESS) { - printf("Problem with getting device id from platform. Exitting\n"); + printf("Problem with getting device id from platform. Exiting\n"); free(devices); free(platforms); return -3; @@ -134,9 +135,10 @@ int main( int argc, char* argv[ ] ) } // Create clSPARSE control object it require queue for kernel execution - clsparseControl control = clsparseCreateControl(queue, &status); + clsparseCreateResult createResult = clsparseCreateControl( queue ); + CLSPARSE_V( createResult.status, "Failed to create clsparse control" ); - status = cldenseSnrm1(&norm_x, &x, control); + status = cldenseSnrm1(&norm_x, &x, createResult.control ); // Read result float* host_norm_x = @@ -148,7 +150,7 @@ int main( int argc, char* argv[ ] ) cl_status = clEnqueueUnmapMemObject(queue, norm_x.value, host_norm_x, 0, NULL, NULL); - status = clsparseReleaseControl(control); + status = clsparseReleaseControl( createResult.control ); status = clsparseTeardown(); if (status != clsparseSuccess) diff --git a/samples/sample-spmv.cpp b/samples/sample-spmv.cpp index 8313b1e..cc166a5 100644 --- a/samples/sample-spmv.cpp +++ b/samples/sample-spmv.cpp @@ -28,7 +28,8 @@ #include #endif -#include +#include "clSPARSE.h" +#include "clSPARSE-error.h" /** * \brief Sample Sparse Matrix dense Vector multiplication (SPMV C++) @@ -188,16 +189,11 @@ int main (int argc, char* argv[]) // Create clsparseControl object - clsparseControl control = clsparseCreateControl(queue(), &status); - if (status != CL_SUCCESS) - { - std::cout << "Problem with creating clSPARSE control object" - <<" error [" << status << "]" << std::endl; - return -4; - } + clsparseCreateResult createResult = clsparseCreateControl( queue( ) ); + CLSPARSE_V( createResult.status, "Failed to create clsparse control" ); - // Read matrix from file. Calculates the rowBlocks strucutres as well. + // Read matrix from file. Calculates the rowBlocks structures as well. clsparseIdx_t nnz, row, col; // read MM header to get the size of the matrix; clsparseStatus fileError @@ -223,20 +219,14 @@ int main (int argc, char* argv[]) A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); - A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, - A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); - // Read matrix market file with explicit zero values included. - fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), control, true ); + fileError = clsparseSCsrMatrixfromFile( &A, matrix_path.c_str( ), createResult.control, true ); // This function allocates memory for rowBlocks structure. If not called // the structure will not be calculated and clSPARSE will run the vectorized // version of SpMV instead of adaptive; - clsparseCsrMetaSize( &A, control ); - A.rowBlocks = ::clCreateBuffer( context(), CL_MEM_READ_WRITE, - A.rowBlockSize * sizeof( cl_ulong ), NULL, &cl_status ); - clsparseCsrMetaCompute( &A, control ); + clsparseCsrMetaCreate( &A, createResult.control ); if (fileError != clsparseSuccess) { @@ -283,7 +273,7 @@ int main (int argc, char* argv[]) /**Step 4. Call the spmv algorithm */ - status = clsparseScsrmv(&alpha, &A, &x, &beta, &y, control); + status = clsparseScsrmv(&alpha, &A, &x, &beta, &y, createResult.control ); if (status != clsparseSuccess) { @@ -293,7 +283,7 @@ int main (int argc, char* argv[]) /** Step 5. Close & release resources */ - status = clsparseReleaseControl(control); + status = clsparseReleaseControl( createResult.control ); if (status != clsparseSuccess) { std::cout << "Problem with releasing control object." @@ -310,10 +300,10 @@ int main (int argc, char* argv[]) //release mem; + clsparseCsrMetaDelete( &A ); clReleaseMemObject ( A.values ); clReleaseMemObject ( A.colIndices ); clReleaseMemObject ( A.rowOffsets ); - clReleaseMemObject ( A.rowBlocks ); clReleaseMemObject ( x.values ); clReleaseMemObject ( y.values ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_common.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_common.hpp index 51cdb9c..41c7993 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_common.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_common.hpp @@ -81,7 +81,8 @@ class clsparseFunc CLSPARSE_V( ::clReleaseContext( ctx ), "releasing context" ); } - control = clsparseCreateControl( queue, NULL ); + clsparseCreateResult createResult = clsparseCreateControl( queue ); + control = ( createResult.status == clsparseSuccess ) ? createResult.control : nullptr; } diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index cdbdf88..1483c07 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -50,8 +50,9 @@ class xBiCGStab : public clsparseFunc clsparseEnableAsync( control, false ); explicit_zeroes = keep_explicit_zeroes; - solverControl = clsparseCreateSolverControl(DIAGONAL, 1000, 1e-6, 0); - clsparseSolverPrintMode(solverControl, VERBOSE); + clsparseCreateSolverResult solverResult = clsparseCreateSolverControl(DIAGONAL, 1000, 1e-6, 0); + solverControl = solverResult.control; + clsparseSolverPrintMode( solverControl, VERBOSE); } ~xBiCGStab( ) diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index c574666..f2b8cbc 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -48,7 +48,8 @@ class xCG : public clsparseFunc clsparseEnableAsync( control, false ); explicit_zeroes = keep_explicit_zeroes; - solverControl = clsparseCreateSolverControl(NOPRECOND, 10000, 1e-4, 1e-8); + clsparseCreateSolverResult solverResult = clsparseCreateSolverControl( NOPRECOND, 10000, 1e-4, 1e-8 ); + solverControl = solverResult.control; clsparseSolverPrintMode(solverControl, NORMAL); } diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index f3e274b..275dcb0 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -215,11 +215,22 @@ extern "C" { */ /**@{*/ - /*! \brief clsparseControl keeps state relevant for OpenCL operations - * like kernel execution, memory allocation and synchronization behavior + /*! \brief clsparseControl keeps OpenCL state like kernel execution, + * memory allocation and synchronization behavior + * \details Struct implementation hidden to clients using C PIMPL idiom + * to make private to library */ typedef struct _clsparseControl* clsparseControl; + /*! \brief A structure returned by value from the clsparseCreateControl + * function. This serves as result/status pair for the creation operation + */ + typedef struct _clsparseCreateResult + { + clsparseControl control; + clsparseStatus status; + } clsparseCreateResult; + /*! * \brief setup the clsparse control object from external OpenCL queue * @@ -230,8 +241,8 @@ extern "C" { * * \ingroup STATE */ - CLSPARSE_EXPORT clsparseControl - clsparseCreateControl( cl_command_queue queue, clsparseStatus *status ); + CLSPARSE_EXPORT clsparseCreateResult + clsparseCreateControl( cl_command_queue queue ); /*! * \brief Enable/Disable asynchronous behavior for clSPARSE @@ -260,22 +271,14 @@ extern "C" { CLSPARSE_EXPORT clsparseStatus clsparseEnableExtendedPrecision( clsparseControl control, cl_bool extPrecision ); - /*! - * \brief Configure the library to use an array of events - * \warning NOT WORKING! NDRange throws Failure - * - * \param[in] control A valid clsparseControl created with clsparseCreateControl - * \param[in] num_events_in_wait_list Size of the event_wait_list array - * \param[out] event_wait_list An array of OpenCL event objects for client to wait on - * - * \returns \b clsparseSuccess - * - * \ingroup STATE + /*! \brief A structure returned by value from the clsparseGetEvent + * function. This serves as result/status pair for the creation operation */ - CLSPARSE_EXPORT clsparseStatus - clsparseSetupEventWaitList( clsparseControl control, - cl_uint num_events_in_wait_list, - cl_event* event_wait_list ); + typedef struct _clsparseEventResult + { + cl_event event; + clsparseStatus status; + } clsparseEventResult; /*! * \brief Return an event from the last kernel execution @@ -288,8 +291,8 @@ extern "C" { * * \ingroup STATE */ - CLSPARSE_EXPORT clsparseStatus - clsparseGetEvent( clsparseControl control, cl_event* event ); + CLSPARSE_EXPORT clsparseEventResult + clsparseGetEvent( clsparseControl control ); /*! * \brief Sets internal control fields to 0 or Null and frees allocated structures @@ -344,6 +347,15 @@ extern "C" { */ typedef struct _solverControl* clSParseSolverControl; + /*! \brief A structure returned by value from the clsparseCreateSolverControl + * function. This serves as result/status pair for the creation operation + */ + typedef struct _clsparseCreateSolverResult + { + clSParseSolverControl control; + clsparseStatus status; + } clsparseCreateSolverResult; + /*! * \brief Create a clSParseSolverControl object to control clsparse iterative * solver operations @@ -357,7 +369,7 @@ extern "C" { * * \ingroup SOLVER */ - CLSPARSE_EXPORT clSParseSolverControl + CLSPARSE_EXPORT clsparseCreateSolverResult clsparseCreateSolverControl( PRECONDITIONER precond, cl_int maxIters, cl_double relTol, cl_double absTol ); diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index 3ede02b..e769f22 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -24,7 +24,7 @@ #include "clSPARSE-1x.h" #include "clSPARSE-error.h" -// C++ wrapper classes that inherit from the extenrally visible C classes, +// C++ wrapper classes that inherit from the externally visible C classes, // for the purpose of providing convenience methods to abstract away the // differences between cl1.2 and cl2.0 // Users are responsible for creating and destroying the OpenCL objects @@ -48,11 +48,6 @@ struct matrix_meta { } - ~matrix_meta( ) - { - std::cout << "matrix_meta destructor" << std::endl; - } - void clear( ) { offRowBlocks = rowBlockSize = 0; @@ -214,6 +209,7 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix num_rows = num_cols = num_nonzeros = 0; values = colIndices = rowOffsets = nullptr; offValues = offColInd = offRowOff = 0; + meta = nullptr; } clsparseIdx_t nnz_per_row() const diff --git a/src/library/internal/clsparse-control.cpp b/src/library/internal/clsparse-control.cpp index 13e3e2e..a56956e 100644 --- a/src/library/internal/clsparse-control.cpp +++ b/src/library/internal/clsparse-control.cpp @@ -95,32 +95,29 @@ clsparseStatus collectEnvParams(clsparseControl control) return clsparseSuccess; } -clsparseControl -clsparseCreateControl( cl_command_queue queue, clsparseStatus *status ) +clsparseCreateResult +clsparseCreateControl( cl_command_queue queue ) { - clsparseControl control = new _clsparseControl( queue ); + clsparseCreateResult cPair; + cPair.status = clsparseSuccess; + cPair.control = new _clsparseControl( queue ); - clsparseStatus err = clsparseSuccess; - if( !control ) + if( !cPair.control ) { - control = nullptr; - err = clsparseOutOfHostMemory; + cPair.control = nullptr; + cPair.status = clsparseOutOfHostMemory; + return cPair; } - control->event = nullptr; -// control->off_alpha = 0; -// control->off_beta = 0; -// control->off_x = 0; -// control->off_y = 0; + cPair.control->event = nullptr; + cPair.control->wavefront_size = 0; + cPair.control->max_wg_size = 0; + cPair.control->async = false; + cPair.control->extended_precision = false; + cPair.control->dpfp_support = false; + cPair.control->addressBits = 64; // default 64 bits - control->wavefront_size = 0; - control->max_wg_size = 0; - control->async = false; - control->extended_precision = false; - control->dpfp_support = false; - control->addressBits = 64; // default 64 bits - - collectEnvParams( control ); + collectEnvParams( cPair.control ); // Discover and load the timer module if present void* timerLibHandle = LoadSharedLibrary( "lib", "clsparseTimer", false ); @@ -135,16 +132,11 @@ clsparseCreateControl( cl_command_queue queue, clsparseStatus *status ) // Create and initialize our timer class, if the external timer shared library loaded if( pfclsparseTimer ) { - control->pDeviceTimer = static_cast ( pfclsparseTimer( CLSPARSE_GPU ) ); + cPair.control->pDeviceTimer = static_cast ( pfclsparseTimer( CLSPARSE_GPU ) ); } } - if( status != NULL ) - { - *status = err; - } - - return control; + return cPair; } clsparseStatus @@ -221,21 +213,25 @@ cl_event *event_wait_list ) return clsparseSuccess; } -clsparseStatus -clsparseGetEvent( clsparseControl control, cl_event *event ) +clsparseEventResult +clsparseGetEvent( clsparseControl control ) { + clsparseEventResult resPair; + resPair.status = clsparseSuccess; + resPair.event = nullptr; + if( control == NULL ) { - return clsparseInvalidControlObject; + resPair.status = clsparseInvalidControlObject; + return resPair; } //keeps the event valid on the user side ::clRetainEvent( control->event( ) ); - *event = control->event( ); - - return clsparseSuccess; + resPair.event = control->event( ); + return resPair; } //clsparseStatus diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index d04ec0a..4fc058f 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -55,15 +55,18 @@ clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) cl_int* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); matrix_meta* meta_ptr = nullptr; - if( pCsrMatx->meta == nullptr ) + if( pCsrMatx->meta ) { - meta_ptr = new matrix_meta; - meta_ptr->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); - } - else meta_ptr = static_cast< matrix_meta* >( pCsrMatx->meta ); + delete meta_ptr; + meta_ptr = nullptr; + pCsrMatx->meta = nullptr; + } - if( pCsrMatx->meta ) + meta_ptr = new matrix_meta; + meta_ptr->rowBlockSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); + + if( meta_ptr ) { meta_ptr->rowBlocks = ::cl::Buffer( control->getContext( ), CL_MEM_READ_WRITE, meta_ptr->rowBlockSize * sizeof( cl_ulong ) ); @@ -72,6 +75,8 @@ clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) ComputeRowBlocks( ulCsrRowBlocks, meta_ptr->rowBlockSize, rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR, true ); control->queue.enqueueUnmapMemObject( meta_ptr->rowBlocks, ulCsrRowBlocks ); } + + pCsrMatx->meta = meta_ptr; return clsparseSuccess; } diff --git a/src/library/solvers/solver-control.cpp b/src/library/solvers/solver-control.cpp index dada5ac..bb9151a 100644 --- a/src/library/solvers/solver-control.cpp +++ b/src/library/solvers/solver-control.cpp @@ -23,25 +23,30 @@ #include "solver-control.hpp" -clSParseSolverControl +clsparseCreateSolverResult clsparseCreateSolverControl(PRECONDITIONER precond, cl_int maxIters, cl_double relTol, cl_double absTol) { - clSParseSolverControl solver_control = new _solverControl(); - if(!solver_control) + clsparseCreateSolverResult cPair; + cPair.status = clsparseSuccess; + cPair.control = new _solverControl( ); + + if( !cPair.control ) { - solver_control = nullptr; + cPair.control = nullptr; + cPair.status = clsparseOutOfHostMemory; + return cPair; } - solver_control->absoluteTolerance = absTol; - solver_control->relativeTolerance = relTol; - solver_control->nIters = 0; - solver_control->maxIters = maxIters; - solver_control->initialResidual = 0; - solver_control->preconditioner = precond; + cPair.control->absoluteTolerance = absTol; + cPair.control->relativeTolerance = relTol; + cPair.control->nIters = 0; + cPair.control->maxIters = maxIters; + cPair.control->initialResidual = 0; + cPair.control->preconditioner = precond; - return solver_control; + return cPair; } diff --git a/src/tests/resources/clsparse_environment.h b/src/tests/resources/clsparse_environment.h index 5aa084d..46bb7b6 100644 --- a/src/tests/resources/clsparse_environment.h +++ b/src/tests/resources/clsparse_environment.h @@ -40,7 +40,8 @@ class ClSparseEnvironment : public ::testing::Environment clsparseSetup(); - control = clsparseCreateControl(queue, NULL); + clsparseCreateResult createResult = clsparseCreateControl( queue ); + control = ( createResult.status == clsparseSuccess ) ? createResult.control : nullptr; //size of the vector used in blas1 test. //this->N = N; diff --git a/src/tests/test-blas3.cpp b/src/tests/test-blas3.cpp index cf82e79..ae3bdec 100644 --- a/src/tests/test-blas3.cpp +++ b/src/tests/test-blas3.cpp @@ -227,7 +227,6 @@ TYPED_TEST(TestCSRSpGeMM, square) using CLSE = ClSparseEnvironment; typedef typename uBLAS::compressed_matrix > uBlasCSRM; - cl::Event event; clsparseEnableAsync(CLSE::control, true); #ifdef TEST_LONG @@ -238,8 +237,9 @@ TYPED_TEST(TestCSRSpGeMM, square) EXPECT_EQ(clsparseSuccess, status); - status = clsparseGetEvent(CLSE::control, &event()); - EXPECT_EQ(clsparseSuccess, status); + clsparseEventResult sparseEvent = clsparseGetEvent( CLSE::control ); + EXPECT_EQ(clsparseSuccess, sparseEvent.status ); + cl::Event event = sparseEvent.event; event.wait(); //std::cout << "nrows =" << (this->csrMatrixC).num_rows << std::endl; @@ -348,7 +348,6 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) using CLSE = ClSparseEnvironment; typedef typename uBLAS::compressed_matrix > uBlasCSRM; - cl::Event event; clsparseEnableAsync(CLSE::control, true); clsparse_matrix_fill objFillVals(42, -14, 14); @@ -375,9 +374,10 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) EXPECT_EQ(clsparseSuccess, status); - status = clsparseGetEvent(CLSE::control, &event()); - EXPECT_EQ(clsparseSuccess, status); - event.wait(); + clsparseEventResult sparseEvent = clsparseGetEvent( CLSE::control ); + EXPECT_EQ( clsparseSuccess, sparseEvent.status ); + cl::Event event = sparseEvent.event; + event.wait( ); std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix @@ -585,7 +585,6 @@ TYPED_TEST(TestCSRMM, multiply) using CSRE = CSREnvironment; using CLSE = ClSparseEnvironment; - cl::Event event; clsparseEnableAsync(CLSE::control, true); //control object is global and it is updated here; @@ -595,9 +594,10 @@ TYPED_TEST(TestCSRMM, multiply) EXPECT_EQ(clsparseSuccess, status); - status = clsparseGetEvent(CLSE::control, &event()); - EXPECT_EQ(clsparseSuccess, status); - event.wait(); + clsparseEventResult sparseEvent = clsparseGetEvent( CLSE::control ); + EXPECT_EQ( clsparseSuccess, sparseEvent.status ); + cl::Event event = sparseEvent.event; + event.wait( ); std::vector result(this->C.data().size()); diff --git a/src/tests/test-clsparse-utils.cpp b/src/tests/test-clsparse-utils.cpp index 13cafb7..f060781 100644 --- a/src/tests/test-clsparse-utils.cpp +++ b/src/tests/test-clsparse-utils.cpp @@ -85,7 +85,8 @@ TEST( clsparseInit, control ) clsparseSetup( ); - auto control = clsparseCreateControl( queue, NULL ); + clsparseCreateResult createResult = clsparseCreateControl( queue ); + auto control = ( createResult.status == clsparseSuccess ) ? createResult.control : nullptr; clsparseReleaseControl( control ); clsparseTeardown( ); @@ -167,8 +168,8 @@ TEST( clsparseInit, cpp_interface ) // Create clsparseControl object - clsparseControl control = clsparseCreateControl( queue( ), &status ); - if( status != CL_SUCCESS ) + clsparseCreateResult createResult = clsparseCreateControl( queue( ) ); + if( createResult.status != CL_SUCCESS ) { std::cout << "Problem with creating clSPARSE control object" << " error [" << status << "]" << std::endl; @@ -176,7 +177,7 @@ TEST( clsparseInit, cpp_interface ) } //cleanup; - status = clsparseReleaseControl( control ); + status = clsparseReleaseControl( createResult.control ); ASSERT_EQ( clsparseSuccess, status ); diff --git a/src/tests/test-solvers.cpp b/src/tests/test-solvers.cpp index 87063a7..a219205 100644 --- a/src/tests/test-solvers.cpp +++ b/src/tests/test-solvers.cpp @@ -73,14 +73,14 @@ class Solver : public ::testing::Test void SetUp() { // Setup solver control - clsparseStatus status; - solverControl = clsparseCreateSolverControl(precond, + clsparseCreateSolverResult solverResult = clsparseCreateSolverControl(precond, maxIterations, relativeTolerance, absoluteTolerance); + solverControl = solverResult.control; ASSERT_NE(nullptr, solverControl); - status = clsparseSolverPrintMode(solverControl, printMode); + clsparseStatus status = clsparseSolverPrintMode(solverControl, printMode); ASSERT_EQ(clsparseSuccess, status); // Setup rhs and vector of unknowns diff --git a/src/tests/test_interface_c.c b/src/tests/test_interface_c.c index 044e94a..a2b8253 100644 --- a/src/tests/test_interface_c.c +++ b/src/tests/test_interface_c.c @@ -55,13 +55,13 @@ int main( int argc, char* argv[ ] ) // Library init code starts here clsparseSetup( ); - clsparseControl control = clsparseCreateControl( queue, NULL ); + clsparseCreateResult createResult = clsparseCreateControl( queue ); clsparseCooMatrix myCooMatx; clsparseInitCooMatrix( &myCooMatx ); // Library termination - clsparseReleaseControl( control ); + clsparseReleaseControl( createResult.control ); clsparseTeardown( ); // OpenCL termination From 70c01344274bb9e83abd74f82061b3b398457e4d Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 7 Jan 2016 12:13:52 -0600 Subject: [PATCH 15/19] Upgrading boost dependency to 1.60 Changed the output of clsparseCsrMetaSize to be clsparseIdx_t --- cmake/ExternalBoost.cmake | 10 +++++----- src/benchmarks/CMakeLists.txt | 2 +- src/include/clSPARSE.h | 2 +- src/library/internal/data-types/csr-meta.cpp | 10 +++++----- src/library/internal/data-types/csr-meta.hpp | 4 ++-- src/tests/CMakeLists.txt | 2 +- src/tests/resources/csr_matrix_environment.h | 2 ++ src/tests/test-blas2.cpp | 2 +- 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/cmake/ExternalBoost.cmake b/cmake/ExternalBoost.cmake index 912e149..b56d3b8 100644 --- a/cmake/ExternalBoost.cmake +++ b/cmake/ExternalBoost.cmake @@ -24,7 +24,7 @@ include( ExternalProject ) # ExternalProject # Change this one line to upgrade to newer versions of boost -set( ext.Boost_VERSION "1.59.0" CACHE STRING "Boost version to download/use" ) +set( ext.Boost_VERSION "1.60.0" CACHE STRING "Boost version to download/use" ) mark_as_advanced( ext.Boost_VERSION ) string( REPLACE "." "_" ext.Boost_Version_Underscore ${ext.Boost_VERSION} ) @@ -105,7 +105,7 @@ elseif( DEFINED ENV{CC} ) list( APPEND Boost.Command toolset=${gccToolset} ) endif( ) -if( WIN32 ) +if( WIN32 AND (ext.Boost_VERSION VERSION_LESS "1.60.0") ) list( APPEND Boost.Command define=BOOST_LOG_USE_WINNT6_API ) endif( ) @@ -151,16 +151,16 @@ if( WIN32 ) if( CMAKE_VERSION VERSION_LESS "3.1.0" ) # .zip file - set( ext.MD5_HASH "08d29a2d85db3ebc8c6fdfa3a1f2b83c" ) + set( ext.MD5_HASH "0cc5b9cf9ccdf26945b225c7338b4288" ) else( ) # .7z file - set( ext.MD5_HASH "0a2e512844f3e30a6240f8139ee983f3" ) + set( ext.MD5_HASH "7ce7f5a4e396484da8da6b60d4ed7661" ) endif( ) else( ) set( Boost.Bootstrap "./bootstrap.sh" ) # .tar.bz2 - set( ext.MD5_HASH "6aa9a5c6a4ca1016edd0ed1178e3cb87" ) + set( ext.MD5_HASH "65a840e1a0b13a558ff19eeb2c4f0cbe" ) if( XCODE_VERSION ) list( APPEND Boost.Bootstrap --with-toolset=clang ) diff --git a/src/benchmarks/CMakeLists.txt b/src/benchmarks/CMakeLists.txt index aace6ad..9c64853 100644 --- a/src/benchmarks/CMakeLists.txt +++ b/src/benchmarks/CMakeLists.txt @@ -27,7 +27,7 @@ endif( ) # set( Boost_DEBUG ON ) set( Boost_USE_MULTITHREADED ON ) set( Boost_DETAILED_FAILURE_MSG ON ) -set( Boost_ADDITIONAL_VERSIONS 1.59.0 1.59 1.58.0 1.58 1.57.0 1.57 ) +set( Boost_ADDITIONAL_VERSIONS 1.60.0 1.60 1.59.0 1.59 1.58.0 1.58 1.57.0 1.57 ) # On windows, Boost prefers to link statically unless the user defines BOOST_ALL_DYN_LINK # Therefore, we prefer linking static libs first to avoid extra #defines diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index 275dcb0..ea5a8ac 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -626,7 +626,7 @@ extern "C" { * \ingroup FILE */ CLSPARSE_EXPORT clsparseStatus - clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, size_t* metaSize ); + clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, clsparseIdx_t* metaSize ); /*! * \brief Calculate the meta-data for csr-adaptive SpM-dV algorithm diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index 4fc058f..0341bec 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -21,7 +21,7 @@ #include "internal/clsparse-control.hpp" clsparseStatus -clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, size_t* metaSize ) +clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, clsparseIdx_t* metaSize ) { clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); @@ -51,8 +51,8 @@ clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) return clsparseOutOfResources; } - clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - cl_int* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); matrix_meta* meta_ptr = nullptr; if( pCsrMatx->meta ) @@ -70,10 +70,10 @@ clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) { meta_ptr->rowBlocks = ::cl::Buffer( control->getContext( ), CL_MEM_READ_WRITE, meta_ptr->rowBlockSize * sizeof( cl_ulong ) ); - cl_ulong* ulCsrRowBlocks = static_cast< cl_ulong* >( control->queue.enqueueMapBuffer( meta_ptr->rowBlocks, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, meta_ptr->offRowBlocks, meta_ptr->rowBlockSize ) ); + clMemRAII< cl_ulong > rRowBlocks( control->queue( ), meta_ptr->rowBlocks( ) ); + cl_ulong* ulCsrRowBlocks = rRowBlocks.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, meta_ptr->offRowBlocks, meta_ptr->rowBlockSize ); ComputeRowBlocks( ulCsrRowBlocks, meta_ptr->rowBlockSize, rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR, true ); - control->queue.enqueueUnmapMemObject( meta_ptr->rowBlocks, ulCsrRowBlocks ); } pCsrMatx->meta = meta_ptr; diff --git a/src/library/internal/data-types/csr-meta.hpp b/src/library/internal/data-types/csr-meta.hpp index 21d44d2..50deb67 100644 --- a/src/library/internal/data-types/csr-meta.hpp +++ b/src/library/internal/data-types/csr-meta.hpp @@ -91,7 +91,7 @@ static inline rowBlockType numThreadsForReduction(const rowBlockType num_rows) // rowBlockType is currently instantiated as ulong template< typename rowBlockType > -void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const clsparseIdx_t* rowDelimiters, +void ComputeRowBlocks( rowBlockType* rowBlocks, clsparseIdx_t& rowBlockSize, const clsparseIdx_t* rowDelimiters, const clsparseIdx_t nRows, const int blkSize, const int blkMultiplier, const int rows_for_vector, const bool allocate_row_blocks = true ) { rowBlockType* rowBlocksBase; @@ -263,7 +263,7 @@ void ComputeRowBlocks( rowBlockType* rowBlocks, size_t& rowBlockSize, const clsp inline size_t ComputeRowBlocksSize( const clsparseIdx_t* rowDelimiters, const clsparseIdx_t nRows, const unsigned int blkSize, const unsigned int blkMultiplier, const unsigned int rows_for_vector ) { - size_t rowBlockSize; + clsparseIdx_t rowBlockSize; ComputeRowBlocks( (cl_ulong*)NULL, rowBlockSize, rowDelimiters, nRows, blkSize, blkMultiplier, rows_for_vector, false ); return rowBlockSize; } diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 8b5c341..5e2673e 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -29,7 +29,7 @@ endif( ) # set( Boost_DEBUG ON ) set( Boost_USE_MULTITHREADED ON ) set( Boost_DETAILED_FAILURE_MSG ON ) -set( Boost_ADDITIONAL_VERSIONS 1.59.0 1.59 1.58.0 1.58 1.57.0 1.57 ) +set( Boost_ADDITIONAL_VERSIONS 1.60.0 1.60 1.59.0 1.59 1.58.0 1.58 1.57.0 1.57 ) # On windows, Boost prefers to link statically unless the user defines BOOST_ALL_DYN_LINK # Therefore, we prefer linking static libs first to avoid extra #defines diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index f989e9e..8a09c72 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -192,6 +192,8 @@ class CSREnvironment: public ::testing::Environment ::clReleaseMemObject( csrDMatrix.values ); ::clReleaseMemObject( csrDMatrix.colIndices ); ::clReleaseMemObject( csrDMatrix.rowOffsets ); + clsparseCsrMetaDelete( &csrSMatrix ); + clsparseCsrMetaDelete( &csrDMatrix ); //bring csrSMatrix csrDMatrix to its initial state clsparseInitCsrMatrix( &csrSMatrix ); diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index 5e7604e..de9bc2d 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -359,7 +359,7 @@ TYPED_TEST(Blas2, csrmv_vector) // later use. clsparseStatus status; - size_t metaSize; + clsparseIdx_t metaSize; status = clsparseCsrMetaSize( &CSRE::csrSMatrix, CLSE::control, &metaSize ); ASSERT_EQ(clsparseSuccess, status); From 00158e3d963354327f03b636980683f7388896b7 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 8 Jan 2016 16:13:18 -0600 Subject: [PATCH 16/19] Changing clsparseCsrMetaSize to return meta data size as struct return value --- src/include/clSPARSE.h | 14 ++++++++++++-- src/library/internal/data-types/csr-meta.cpp | 14 ++++++++------ src/tests/test-blas2.cpp | 10 +++++----- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index ea5a8ac..d50fdb2 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -613,6 +613,16 @@ extern "C" { CLSPARSE_EXPORT clsparseStatus clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, clsparseControl control, cl_bool read_explicit_zeroes ); + /*! \brief A structure returned by value from the clsparseCsrMetaSize + * function. This serves as a result/status pair for the size of the + * meta data associated with a sparse matrix. + */ + typedef struct _clsparseMetaSizeResult + { + clsparseIdx_t metaSize; + clsparseStatus status; + } clsparseMetaSizeResult; + /*! * \brief Calculate the amount of device memory required to hold meta-data for csr-adaptive SpM-dV algorithm * \details CSR-adaptive is a high performance sparse matrix times dense vector algorithm. It requires a pre-processing @@ -625,8 +635,8 @@ extern "C" { * * \ingroup FILE */ - CLSPARSE_EXPORT clsparseStatus - clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, clsparseIdx_t* metaSize ); + CLSPARSE_EXPORT clsparseMetaSizeResult + clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ); /*! * \brief Calculate the meta-data for csr-adaptive SpM-dV algorithm diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index 0341bec..0018ae3 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -20,23 +20,25 @@ #include "include/clSPARSE-private.hpp" #include "internal/clsparse-control.hpp" -clsparseStatus -clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control, clsparseIdx_t* metaSize ) +clsparseMetaSizeResult +clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ) { + clsparseMetaSizeResult sizeResult; + sizeResult.status = clsparseSuccess; clsparseCsrMatrixPrivate* pCsrMatx = static_cast( csrMatx ); if( csrMatx->meta ) { - *metaSize = static_cast< matrix_meta* >( pCsrMatx->meta )->rowBlockSize; + sizeResult.metaSize = static_cast< matrix_meta* >( pCsrMatx->meta )->rowBlockSize; - return clsparseSuccess; + return sizeResult; } clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); - *metaSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); + sizeResult.metaSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); - return clsparseSuccess; + return sizeResult; } clsparseStatus diff --git a/src/tests/test-blas2.cpp b/src/tests/test-blas2.cpp index de9bc2d..84d9d8d 100644 --- a/src/tests/test-blas2.cpp +++ b/src/tests/test-blas2.cpp @@ -359,12 +359,12 @@ TYPED_TEST(Blas2, csrmv_vector) // later use. clsparseStatus status; - clsparseIdx_t metaSize; - status = clsparseCsrMetaSize( &CSRE::csrSMatrix, CLSE::control, &metaSize ); - ASSERT_EQ(clsparseSuccess, status); + clsparseMetaSizeResult sizeResult; + sizeResult = clsparseCsrMetaSize( &CSRE::csrSMatrix, CLSE::control ); + ASSERT_EQ(clsparseSuccess, sizeResult.status ); - status = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control, &metaSize ); - ASSERT_EQ(clsparseSuccess, status); + sizeResult = clsparseCsrMetaSize( &CSRE::csrDMatrix, CLSE::control ); + ASSERT_EQ(clsparseSuccess, sizeResult.status ); status = clsparseCsrMetaCreate(&CSRE::csrSMatrix, CLSE::control ); ASSERT_EQ (clsparseSuccess, status); From e92f41b2269793e3c26b40429793e93bc3eabd4d Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 8 Jan 2016 16:42:53 -0600 Subject: [PATCH 17/19] Changing a few member variables of our public classes to be more consistent with each other Following a style advocated by cppcoreguidelines: https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rl-name --- samples/sample-cg.cpp | 8 +- samples/sample-spmv.cpp | 8 +- .../functions/clfunc-xSpMdM.hpp | 12 +-- .../functions/clfunc_xBiCGStab.hpp | 12 +-- .../clsparse-bench/functions/clfunc_xCG.hpp | 12 +-- .../functions/clfunc_xCoo2Csr.hpp | 20 ++-- .../functions/clfunc_xCsr2Coo.hpp | 44 ++++----- .../functions/clfunc_xCsr2Dense.hpp | 16 ++-- .../functions/clfunc_xDense2Csr.hpp | 28 +++--- .../functions/clfunc_xSpMSpM.hpp | 30 +++--- .../functions/clfunc_xSpMdV.hpp | 12 +-- src/include/clSPARSE-1x.h | 26 +++--- src/include/clSPARSE-2x.h | 8 +- src/library/blas2/csrmv-adaptive.hpp | 4 +- src/library/blas2/csrmv-vector.hpp | 8 +- src/library/blas3/clsparse-csrmm.hpp | 4 +- src/library/blas3/clsparse-spm-spm.cpp | 18 ++-- src/library/include/clSPARSE-1x.hpp | 32 +++---- src/library/include/clSPARSE-2x.hpp | 4 +- src/library/internal/data-types/csr-meta.cpp | 8 +- src/library/io/mm-reader.cpp | 92 +++++++++---------- .../preconditioners/preconditioner_utils.hpp | 8 +- src/library/transform/clsparse-coo2csr.cpp | 16 ++-- src/library/transform/clsparse-csr2coo.cpp | 16 ++-- src/library/transform/clsparse-csr2dense.cpp | 8 +- src/library/transform/clsparse-dense2csr.cpp | 8 +- src/library/transform/conversion-utils.hpp | 8 +- src/tests/resources/csr_matrix_environment.h | 24 ++--- .../resources/sparse_matrix_environment.h | 32 +++---- src/tests/test-blas3.cpp | 58 ++++++------ src/tests/test-conversion.cpp | 56 +++++------ 31 files changed, 320 insertions(+), 320 deletions(-) diff --git a/samples/sample-cg.cpp b/samples/sample-cg.cpp index d0cfedc..d073c10 100644 --- a/samples/sample-cg.cpp +++ b/samples/sample-cg.cpp @@ -180,10 +180,10 @@ int main (int argc, char* argv[]) A.values = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.num_nonzeros * sizeof( float ), NULL, &cl_status ); - A.colIndices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, + A.col_indices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); - A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, + A.row_pointer = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); @@ -275,8 +275,8 @@ int main (int argc, char* argv[]) //release mem; clsparseCsrMetaDelete( &A ); clReleaseMemObject ( A.values ); - clReleaseMemObject ( A.colIndices ); - clReleaseMemObject ( A.rowOffsets ); + clReleaseMemObject ( A.col_indices ); + clReleaseMemObject ( A.row_pointer ); clReleaseMemObject ( x.values ); clReleaseMemObject ( b.values ); diff --git a/samples/sample-spmv.cpp b/samples/sample-spmv.cpp index cc166a5..961210e 100644 --- a/samples/sample-spmv.cpp +++ b/samples/sample-spmv.cpp @@ -213,10 +213,10 @@ int main (int argc, char* argv[]) A.values = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.num_nonzeros * sizeof( float ), NULL, &cl_status ); - A.colIndices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, + A.col_indices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); - A.rowOffsets = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, + A.row_pointer = ::clCreateBuffer( context(), CL_MEM_READ_ONLY, ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); @@ -302,8 +302,8 @@ int main (int argc, char* argv[]) //release mem; clsparseCsrMetaDelete( &A ); clReleaseMemObject ( A.values ); - clReleaseMemObject ( A.colIndices ); - clReleaseMemObject ( A.rowOffsets ); + clReleaseMemObject ( A.col_indices ); + clReleaseMemObject ( A.row_pointer ); clReleaseMemObject ( x.values ); clReleaseMemObject ( y.values ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp index 6da18bc..791a504 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc-xSpMdM.hpp @@ -119,11 +119,11 @@ class xSpMdM: public clsparseFunc csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); + csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.col_indices" ); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); + csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.row_pointer" ); fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); if( fileError != clsparseSuccess ) @@ -215,8 +215,8 @@ class xSpMdM: public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( denseB.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( denseC.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp index 1483c07..926de7f 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xBiCGStab.hpp @@ -123,13 +123,13 @@ class xBiCGStab : public clsparseFunc csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.col_indices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.row_pointer" ); if(typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -215,8 +215,8 @@ class xBiCGStab : public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp index f2b8cbc..82e59d1 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCG.hpp @@ -122,13 +122,13 @@ class xCG : public clsparseFunc csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.col_indices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.row_pointer" ); if(typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -214,8 +214,8 @@ class xCG : public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp index 167ccc9..a520f87 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCoo2Csr.hpp @@ -108,9 +108,9 @@ class xCoo2Csr: public clsparseFunc cooMatx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, cooMatx.num_nonzeros * sizeof(T), NULL, &status ); - cooMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + cooMatx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); - cooMatx.rowIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + cooMatx.row_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); if (typeid(T) == typeid(float)) @@ -130,9 +130,9 @@ class xCoo2Csr: public clsparseFunc csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, cooMatx.num_nonzeros * sizeof( T ), NULL, &status ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, + csrMtx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, cooMatx.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, + csrMtx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE, ( cooMatx.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &status ); } @@ -151,9 +151,9 @@ class xCoo2Csr: public clsparseFunc clsparseIdx_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMtx.rowOffsets, &scalar_i, sizeof(clsparseIdx_t), 0, + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMtx.row_pointer, &scalar_i, sizeof(clsparseIdx_t), 0, sizeof( clsparseIdx_t ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); - CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( clsparseIdx_t ), 0, + CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.col_indices, &scalar_i, sizeof( clsparseIdx_t ), 0, sizeof( clsparseIdx_t ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" ); CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.values, &scalar_f, sizeof( T ), 0, sizeof( T ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer values" ); @@ -181,12 +181,12 @@ class xCoo2Csr: public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( cooMatx.values ), "clReleaseMemObject cooMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( cooMatx.colIndices ), "clReleaseMemObject cooMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( cooMatx.rowIndices ), "clReleaseMemObject cooMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( cooMatx.col_indices ), "clReleaseMemObject cooMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( cooMatx.row_indices ), "clReleaseMemObject cooMtx.row_pointer" ); } private: diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp index 294cee2..11b0878 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Coo.hpp @@ -86,7 +86,7 @@ class xCsr2Coo : public clsparseFunc { #if 0 //Check VK - //Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) + //Host to GPU: CSR-> [row_pointer(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) //GPU to Host: Coo - > row_indices + Col_indices + Values- > [sizeof(T) * num_nonzero] + sizeof(int) size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (cooMtx.num_nonzeros) + sizeof(cl_int) * (cooMtx.num_nonzeros * 2); @@ -125,11 +125,11 @@ class xCsr2Coo : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); + csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.col_indices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); + csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.row_pointer"); if (typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control, explicit_zeroes); @@ -153,13 +153,13 @@ class xCsr2Coo : public clsparseFunc cooMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer cooMtx.values"); - cooMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, + cooMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, cooMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer cooMtx.colIndices"); + CLSPARSE_V(status, "::clCreateBuffer cooMtx.col_indices"); - cooMtx.rowIndices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, + cooMtx.row_indices = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, cooMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer cooMtx.rowIndices"); + CLSPARSE_V(status, "::clCreateBuffer cooMtx.row_indices"); }// end @@ -174,12 +174,12 @@ class xCsr2Coo : public clsparseFunc cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); clsparseIdx_t scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, - cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.row_indices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.row_indices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, - cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.col_indices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.col_indices"); }// end @@ -190,12 +190,12 @@ class xCsr2Coo : public clsparseFunc cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values"); clsparseIdx_t scalarIntZero = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, - cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.row_indices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.row_indices"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(clsparseIdx_t), 0, - cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices"); + CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.col_indices, &scalarIntZero, sizeof(clsparseIdx_t), 0, + cooMtx.num_nonzeros * sizeof(clsparseIdx_t), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.col_indices"); }// end void read_gpu_buffer() @@ -210,7 +210,7 @@ class xCsr2Coo : public clsparseFunc #if 0 // Need to verify this calculation VK //size_t sparseBytes = sizeof(cl_int) * (csrMtx.nnz + csrMtx.m) + sizeof(T) * (csrMtx.nnz + csrMtx.n + csrMtx.m); - //Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) + //Host to GPU: CSR-> [row_pointer(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) //GPU to Host: Coo - > row_indices + Col_indices + Values- > [sizeof(T) * num_nonzero] + sizeof(int) size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (cooMtx.num_nonzeros) + sizeof(cl_int) * (cooMtx.num_nonzeros * 2); @@ -237,12 +237,12 @@ class xCsr2Coo : public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.col_indices), "clReleaseMemObject csrMtx.col_indices"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.row_pointer), "clReleaseMemObject csrMtx.row_pointer"); CLSPARSE_V(::clReleaseMemObject(cooMtx.values), "clReleaseMemObject cooMtx.values"); - CLSPARSE_V(::clReleaseMemObject(cooMtx.colIndices), "clReleaseMemObject cooMtx.colIndices"); - CLSPARSE_V(::clReleaseMemObject(cooMtx.rowIndices), "clReleaseMemObject cooMtx.rowIndices"); + CLSPARSE_V(::clReleaseMemObject(cooMtx.col_indices), "clReleaseMemObject cooMtx.col_indices"); + CLSPARSE_V(::clReleaseMemObject(cooMtx.row_indices), "clReleaseMemObject cooMtx.row_indices"); } private: diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp index 95049f6..48134c4 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xCsr2Dense.hpp @@ -87,7 +87,7 @@ class xCsr2Dense : public clsparseFunc { #if 0 //Check VK - //Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) + //Host to GPU: CSR-> [row_pointer(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) //GPU to Host: Dense - > [sizeof(T) * denseMtx.num_rows * denseMTx.num_cols] size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (denseMtx.num_rows * denseMtx.num_cols); return (sparseBytes / time_in_ns()); @@ -124,11 +124,11 @@ class xCsr2Dense : public clsparseFunc csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); + csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.col_indices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); + csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.row_pointer"); if (typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes ); @@ -183,7 +183,7 @@ class xCsr2Dense : public clsparseFunc #if 0 // Need to verify this calculation VK //size_t sparseBytes = sizeof(cl_int) * (csrMtx.nnz + csrMtx.m) + sizeof(T) * (csrMtx.nnz + csrMtx.n + csrMtx.m); - //Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) + //Host to GPU: CSR-> [row_pointer(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero) //GPU to Host: Dense - > [sizeof(T) * denseMtx.num_rows * denseMTx.num_cols] size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (denseMtx.num_rows * denseMtx.num_cols); cpuTimer->pruneOutliers(3.0); @@ -209,8 +209,8 @@ class xCsr2Dense : public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.col_indices), "clReleaseMemObject csrMtx.col_indices"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.row_pointer), "clReleaseMemObject csrMtx.row_pointer"); CLSPARSE_V(::clReleaseMemObject(denseMtx.values), "clReleaseMemObject denseMtx.values"); } diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp index 775d588..a9a3d90 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xDense2Csr.hpp @@ -127,13 +127,13 @@ class xDense2Csr: public clsparseFunc csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.col_indices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.row_pointer" ); if(typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -175,13 +175,13 @@ class xDense2Csr: public clsparseFunc csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V(status, "::clCreateBuffer csrMatx.values"); - csrMatx.colIndices = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, + csrMatx.col_indices = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMatx.colIndices"); + CLSPARSE_V(status, "::clCreateBuffer csrMatx.col_indices"); - csrMatx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, + csrMatx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_WRITE_ONLY, (csrMtx.num_rows + 1) * sizeof( clsparseIdx_t ), NULL, &status ); - CLSPARSE_V(status, "::clCreateBuffer csrMatx.rowOffsets"); + CLSPARSE_V(status, "::clCreateBuffer csrMatx.row_pointer"); }// End of function void initialize_cpu_buffer( ) @@ -197,10 +197,10 @@ class xDense2Csr: public clsparseFunc clsparseIdx_t scalar_i = 0; T scalar_f = 0; - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.rowOffsets, &scalar_i, sizeof(clsparseIdx_t), 0, + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.row_pointer, &scalar_i, sizeof(clsparseIdx_t), 0, sizeof(clsparseIdx_t) * (csrMatx.num_rows + 1), 0, NULL, NULL), "::clEnqueueFillBuffer row"); - CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.colIndices, &scalar_i, sizeof(clsparseIdx_t), 0, + CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.col_indices, &scalar_i, sizeof(clsparseIdx_t), 0, sizeof(clsparseIdx_t) * csrMatx.num_nonzeros, 0, NULL, NULL), "::clEnqueueFillBuffer col"); CLSPARSE_V(::clEnqueueFillBuffer(queue, csrMatx.values, &scalar_f, sizeof(T), 0, @@ -240,12 +240,12 @@ class xDense2Csr: public clsparseFunc //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( csrMatx.values ), "clReleaseMemObject csrMatx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMatx.colIndices ), "clReleaseMemObject csrMatx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMatx.rowOffsets ), "clReleaseMemObject csrMatx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMatx.col_indices ), "clReleaseMemObject csrMatx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMatx.row_pointer ), "clReleaseMemObject csrMatx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( A.values ), "clReleaseMemObject A.values" ); }// End of function diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp index e810fe3..cd73302 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMSpM.hpp @@ -132,13 +132,13 @@ class xSpMSpM : public clsparseFunc { csrMtx.num_nonzeros * sizeof(T), NULL, &status); CLSPARSE_V(status, "::clCreateBuffer csrMtx.values"); - csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, + csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices"); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.col_indices"); - csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, + csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets"); + CLSPARSE_V(status, "::clCreateBuffer csrMtx.row_pointer"); #if 0 csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status); @@ -192,8 +192,8 @@ class xSpMSpM : public clsparseFunc { { // Every call to clsparseScsrSpGemm() allocates memory to csrMtxC, therefore freeing the memory CLSPARSE_V(::clReleaseMemObject(csrMtxC.values), "clReleaseMemObject csrMtxC.values"); - CLSPARSE_V(::clReleaseMemObject(csrMtxC.colIndices), "clReleaseMemObject csrMtxC.colIndices"); - CLSPARSE_V(::clReleaseMemObject(csrMtxC.rowOffsets), "clReleaseMemObject csrMtxC.rowOffsets"); + CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices"); + CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer"); // Initilize the output CSR Matrix clsparseInitCsrMatrix(&csrMtxC); @@ -221,18 +221,18 @@ class xSpMSpM : public clsparseFunc { //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp) //need to do this before we eventually hit the destructor CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices"); - CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.col_indices), "clReleaseMemObject csrMtx.col_indices"); + CLSPARSE_V(::clReleaseMemObject(csrMtx.row_pointer), "clReleaseMemObject csrMtx.row_pointer"); //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks"); if (csrMtxC.values != nullptr) CLSPARSE_V(::clReleaseMemObject(csrMtxC.values), "clReleaseMemObject csrMtxC.values"); - if (csrMtxC.colIndices != nullptr) - CLSPARSE_V(::clReleaseMemObject(csrMtxC.colIndices), "clReleaseMemObject csrMtxC.colIndices"); + if (csrMtxC.col_indices != nullptr) + CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices"); - if (csrMtxC.rowOffsets != nullptr) - CLSPARSE_V(::clReleaseMemObject(csrMtxC.rowOffsets), "clReleaseMemObject csrMtxC.rowOffsets"); + if (csrMtxC.row_pointer != nullptr) + CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer"); //CLSPARSE_V(::clReleaseMemObject(csrMtxC.rowBlocks), "clReleaseMemObject csrMtxC.rowBlocks"); CLSPARSE_V(::clReleaseMemObject(a.value), "clReleaseMemObject alpha.value"); @@ -254,16 +254,16 @@ class xSpMSpM : public clsparseFunc { cl_int run_status = 0; run_status = clEnqueueReadBuffer(queue, - csrMtx.colIndices, + csrMtx.col_indices, CL_TRUE, 0, nnzA*sizeof(clsparseIdx_t), colIdxA.data(), 0, nullptr, nullptr); - CLSPARSE_V(run_status, "Reading colIndices from GPU failed"); + CLSPARSE_V(run_status, "Reading col_indices from GPU failed"); // copy rowptrs run_status = clEnqueueReadBuffer(queue, - csrMtx.rowOffsets, + csrMtx.row_pointer, CL_TRUE, 0, Browptrlen*sizeof(clsparseIdx_t), rowptrB.data(), 0, nullptr, nullptr); diff --git a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp index a4b7564..32914b1 100644 --- a/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp +++ b/src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp @@ -121,13 +121,13 @@ class xSpMdV: public clsparseFunc csrMtx.num_nonzeros * sizeof( T ), NULL, &status ); CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" ); - csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.col_indices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.col_indices" ); - csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, + csrMtx.row_pointer = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status); - CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" ); + CLSPARSE_V( status, "::clCreateBuffer csrMtx.row_pointer" ); if(typeid(T) == typeid(float)) fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control, explicit_zeroes ); @@ -220,8 +220,8 @@ class xSpMdV: public clsparseFunc //need to do this before we eventually hit the destructor clsparseCsrMetaDelete( &csrMtx ); CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" ); - CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.col_indices ), "clReleaseMemObject csrMtx.col_indices" ); + CLSPARSE_V( ::clReleaseMemObject( csrMtx.row_pointer ), "clReleaseMemObject csrMtx.row_pointer" ); CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" ); CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" ); diff --git a/src/include/clSPARSE-1x.h b/src/include/clSPARSE-1x.h index 09cc14d..c51bf18 100644 --- a/src/include/clSPARSE-1x.h +++ b/src/include/clSPARSE-1x.h @@ -35,7 +35,7 @@ typedef struct clsparseScalar_ /*! Given that cl_mem objects are opaque without pointer arithmetic, this offset is added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - clsparseIdx_t offValue; + clsparseIdx_t off_value; } clsparseScalar; /*! \brief Structure to encapsulate dense vector data to clSPARSE API @@ -49,7 +49,7 @@ typedef struct cldenseVector_ /*! Given that cl_mem objects are opaque without pointer arithmetic, this offset is added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - clsparseIdx_t offValues; + clsparseIdx_t off_values; } cldenseVector; /*! \brief Structure to encapsulate sparse matrix data encoded in CSR @@ -69,8 +69,8 @@ typedef struct clsparseCsrMatrix_ /** @name OpenCL state */ /**@{*/ cl_mem values; /*!< non-zero values in sparse matrix of size num_nonzeros */ - cl_mem colIndices; /*!< column index for corresponding value of size num_nonzeros */ - cl_mem rowOffsets; /*!< Invariant: rowOffsets[i+1]-rowOffsets[i] = number of values in row i */ + cl_mem col_indices; /*!< column index for corresponding value of size num_nonzeros */ + cl_mem row_pointer; /*!< Invariant: row_pointer[i+1]-row_pointer[i] = number of values in row i */ /**@}*/ /** @name Buffer offsets */ @@ -78,9 +78,9 @@ typedef struct clsparseCsrMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - clsparseIdx_t offValues; - clsparseIdx_t offColInd; - clsparseIdx_t offRowOff; + clsparseIdx_t off_values; + clsparseIdx_t off_col_indices; + clsparseIdx_t off_row_pointer; /**@}*/ /*! Pointer to a private structure that contains meta-information the library keeps on a @@ -106,8 +106,8 @@ typedef struct clsparseCooMatrix_ /** @name OpenCL state */ /**@{*/ cl_mem values; /*!< CSR non-zero values of size num_nonzeros */ - cl_mem colIndices; /*!< column index for corresponding element; array size num_nonzeros */ - cl_mem rowIndices; /*!< row index for corresponding element; array size num_nonzeros */ + cl_mem col_indices; /*!< column index for corresponding element; array size num_nonzeros */ + cl_mem row_indices; /*!< row index for corresponding element; array size num_nonzeros */ /**@}*/ /** @name Buffer offsets */ @@ -115,9 +115,9 @@ typedef struct clsparseCooMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - clsparseIdx_t offValues; - clsparseIdx_t offColInd; - clsparseIdx_t offRowInd; + clsparseIdx_t off_values; + clsparseIdx_t off_col_indices; + clsparseIdx_t off_row_indices; /**@}*/ } clsparseCooMatrix; @@ -139,7 +139,7 @@ typedef struct cldenseMatrix_ /*! Given that cl_mem objects are opaque without pointer arithmetic, these offsets are added to * the cl_mem locations on device to define beginning of the data in the cl_mem buffers */ - clsparseIdx_t offValues; + clsparseIdx_t off_values; } cldenseMatrix; #endif diff --git a/src/include/clSPARSE-2x.h b/src/include/clSPARSE-2x.h index b1e77ad..370aa45 100644 --- a/src/include/clSPARSE-2x.h +++ b/src/include/clSPARSE-2x.h @@ -58,8 +58,8 @@ typedef struct clsparseCsrMatrix_ /** @name OpenCL state */ /**@{*/ void* values; /*!< non-zero values in sparse matrix of size num_nonzeros */ - void* colIndices; /*!< column index for corresponding value of size num_nonzeros */ - void* rowOffsets; /*!< Invariant: rowOffsets[i+1]-rowOffsets[i] = number of values in row i */ + void* col_indices; /*!< column index for corresponding value of size num_nonzeros */ + void* row_pointer; /*!< Invariant: row_pointer[i+1]-row_pointer[i] = number of values in row i */ /**@}*/ /*! Pointer to a private structure that contains meta-information the library keeps on a @@ -85,8 +85,8 @@ typedef struct clsparseCooMatrix_ /** @name OpenCL state */ /**@{*/ void* values; /*!< CSR non-zero values of size num_nonzeros */ - void* colIndices; /*!< column index for corresponding element; array size num_nonzeros */ - void* rowIndices; /*!< row index for corresponding element; array size num_nonzeros */ + void* col_indices; /*!< column index for corresponding element; array size num_nonzeros */ + void* row_indices; /*!< row index for corresponding element; array size num_nonzeros */ /**@}*/ } clsparseCooMatrix; diff --git a/src/library/blas2/csrmv-adaptive.hpp b/src/library/blas2/csrmv-adaptive.hpp index 9c234bb..b51354b 100644 --- a/src/library/blas2/csrmv-adaptive.hpp +++ b/src/library/blas2/csrmv-adaptive.hpp @@ -91,7 +91,7 @@ csrmv_adaptive( const clsparseScalarPrivate* pAlpha, const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); kWrapper << pCsrMatx->values - << pCsrMatx->colIndices << pCsrMatx->rowOffsets + << pCsrMatx->col_indices << pCsrMatx->row_pointer << pX->values << pY->values << meta_ptr->rowBlocks << pAlpha->value << pBeta->value; @@ -187,7 +187,7 @@ csrmv_adaptive( const clsparse::array_base& pAlpha, const matrix_meta* meta_ptr = static_cast< const matrix_meta* >( pCsrMatx->meta ); kWrapper << pCsrMatx->values - << pCsrMatx->colIndices << pCsrMatx->rowOffsets + << pCsrMatx->col_indices << pCsrMatx->row_pointer << pX.data() << pY.data() << meta_ptr->rowBlocks << pAlpha.data() << pBeta.data(); diff --git a/src/library/blas2/csrmv-vector.hpp b/src/library/blas2/csrmv-vector.hpp index fa26fc8..3e6005a 100644 --- a/src/library/blas2/csrmv-vector.hpp +++ b/src/library/blas2/csrmv-vector.hpp @@ -90,8 +90,8 @@ csrmv_vector(const clsparseScalarPrivate* pAlpha, kWrapper << pMatx->num_rows << pAlpha->value << pAlpha->offset() - << pMatx->rowOffsets - << pMatx->colIndices + << pMatx->row_pointer + << pMatx->col_indices << pMatx->values << pX->values << pX->offset() << pBeta->value << pBeta->offset() @@ -194,8 +194,8 @@ csrmv_vector(const clsparse::array_base& pAlpha, kWrapper << pMatx->num_rows << pAlpha.data() << offset - << pMatx->rowOffsets - << pMatx->colIndices + << pMatx->row_pointer + << pMatx->col_indices << pMatx->values << pX.data() << offset << pBeta.data() << offset diff --git a/src/library/blas3/clsparse-csrmm.hpp b/src/library/blas3/clsparse-csrmm.hpp index e060454..7fb3e3e 100644 --- a/src/library/blas3/clsparse-csrmm.hpp +++ b/src/library/blas3/clsparse-csrmm.hpp @@ -59,7 +59,7 @@ // // KernelWrap kWrapper( kernel ); // -// kWrapper << pSparseCsrA.values << pSparseCsrA.colIndices << pSparseCsrA.rowOffsets << pSparseCsrA.rowBlocks +// kWrapper << pSparseCsrA.values << pSparseCsrA.col_indices << pSparseCsrA.row_pointer << pSparseCsrA.rowBlocks // << pDenseB.values << pDenseB.lead_dim // << pDenseC.values << pDenseC.num_rows << pDenseC.num_cols << pDenseC.lead_dim // << pAlpha.value << pBeta.value; @@ -148,7 +148,7 @@ const clsparseControl control ) kWrapper << pSparseCsrA.num_rows << pAlpha.value << pAlpha.offset( ) - << pSparseCsrA.rowOffsets << pSparseCsrA.colIndices << pSparseCsrA.values + << pSparseCsrA.row_pointer << pSparseCsrA.col_indices << pSparseCsrA.values << pDenseB.values << pDenseB.lead_dim << pDenseB.offset( ) << pBeta.value << pBeta.offset( ) << pDenseC.values << pDenseC.num_rows << pDenseC.num_cols << pDenseC.lead_dim << pDenseC.offset( ); diff --git a/src/library/blas3/clsparse-spm-spm.cpp b/src/library/blas3/clsparse-spm-spm.cpp index 494800a..bd1374a 100644 --- a/src/library/blas3/clsparse-spm-spm.cpp +++ b/src/library/blas3/clsparse-spm-spm.cpp @@ -741,21 +741,21 @@ int copy_Ct_to_C_opencl(int *counter_one, cl_mem csrValC, cl_mem csrRowPtrC, cl_ return clsparseInvalidKernelExecution; } - cl_mem csrRowPtrA = matA->rowOffsets; - cl_mem csrColIndA = matA->colIndices; + cl_mem csrRowPtrA = matA->row_pointer; + cl_mem csrColIndA = matA->col_indices; cl_mem csrValA = matA->values; - cl_mem csrRowPtrB = matB->rowOffsets; - cl_mem csrColIndB = matB->colIndices; + cl_mem csrRowPtrB = matB->row_pointer; + cl_mem csrColIndB = matB->col_indices; cl_mem csrValB = matB->values; cl::Context cxt = control->getContext(); - matC->rowOffsets = ::clCreateBuffer( cxt(), CL_MEM_READ_WRITE, (m + 1) * sizeof( cl_int ), NULL, &run_status ); + matC->row_pointer = ::clCreateBuffer( cxt(), CL_MEM_READ_WRITE, (m + 1) * sizeof( cl_int ), NULL, &run_status ); int pattern = 0; - clEnqueueFillBuffer(control->queue(), matC->rowOffsets, &pattern, sizeof(cl_int), 0, (m + 1)*sizeof(cl_int), 0, NULL, NULL); + clEnqueueFillBuffer(control->queue(), matC->row_pointer, &pattern, sizeof(cl_int), 0, (m + 1)*sizeof(cl_int), 0, NULL, NULL); - cl_mem csrRowPtrC = matC->rowOffsets; + cl_mem csrRowPtrC = matC->row_pointer; std::vector csrRowPtrC_h(m + 1, 0); @@ -835,10 +835,10 @@ int copy_Ct_to_C_opencl(int *counter_one, cl_mem csrValC, cl_mem csrRowPtrC, cl_ int nnzC = csrRowPtrC_h[m]; //std::cout << "nnzC = " << nnzC << std::endl; - matC->colIndices = ::clCreateBuffer( cxt(), CL_MEM_READ_WRITE, nnzC * sizeof( cl_int ), NULL, &run_status ); + matC->col_indices = ::clCreateBuffer( cxt(), CL_MEM_READ_WRITE, nnzC * sizeof( cl_int ), NULL, &run_status ); matC->values = ::clCreateBuffer( cxt(), CL_MEM_READ_WRITE, nnzC * sizeof( cl_float ), NULL, &run_status ); - cl_mem csrColIndC = matC->colIndices; + cl_mem csrColIndC = matC->col_indices; cl_mem csrValC = matC->values; run_status = clEnqueueWriteBuffer(control->queue(), diff --git a/src/library/include/clSPARSE-1x.hpp b/src/library/include/clSPARSE-1x.hpp index e769f22..eb89b02 100644 --- a/src/library/include/clSPARSE-1x.hpp +++ b/src/library/include/clSPARSE-1x.hpp @@ -176,12 +176,12 @@ class clsparseScalarPrivate: public clsparseScalar void clear( ) { value = nullptr; - offValue = 0; + off_value = 0; } clsparseIdx_t offset() const { - return offValue; + return off_value; } }; @@ -192,12 +192,12 @@ class cldenseVectorPrivate: public cldenseVector { num_values = 0; values = nullptr; - offValues = 0; + off_values = 0; } clsparseIdx_t offset() const { - return offValues; + return off_values; } }; @@ -207,8 +207,8 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix void clear( ) { num_rows = num_cols = num_nonzeros = 0; - values = colIndices = rowOffsets = nullptr; - offValues = offColInd = offRowOff = 0; + values = col_indices = row_pointer = nullptr; + off_values = off_col_indices = off_row_pointer = 0; meta = nullptr; } @@ -219,17 +219,17 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix clsparseIdx_t valOffset() const { - return offValues; + return off_values; } clsparseIdx_t colIndOffset() const { - return offColInd; + return off_col_indices; } clsparseIdx_t rowOffOffset() const { - return offRowOff; + return off_row_pointer; } }; @@ -239,8 +239,8 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix void clear( ) { num_rows = num_cols = num_nonzeros = 0; - values = colIndices = rowIndices = nullptr; - offValues = offColInd = offRowInd = 0; + values = col_indices = row_indices = nullptr; + off_values = off_col_indices = off_row_indices = 0; } clsparseIdx_t nnz_per_row( ) const @@ -250,17 +250,17 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix clsparseIdx_t valOffset() const { - return offValues; + return off_values; } clsparseIdx_t colIndOffset() const { - return offColInd; + return off_col_indices; } clsparseIdx_t rowOffOffset() const { - return offRowInd; + return off_row_indices; } }; @@ -270,14 +270,14 @@ class cldenseMatrixPrivate: public cldenseMatrix void clear( ) { num_rows = num_cols = lead_dim = 0; - offValues = 0; + off_values = 0; major = rowMajor; values = nullptr; } clsparseIdx_t offset() const { - return offValues; + return off_values; } }; diff --git a/src/library/include/clSPARSE-2x.hpp b/src/library/include/clSPARSE-2x.hpp index 7a9b47d..7a5f0aa 100644 --- a/src/library/include/clSPARSE-2x.hpp +++ b/src/library/include/clSPARSE-2x.hpp @@ -178,7 +178,7 @@ class clsparseCsrMatrixPrivate: public clsparseCsrMatrix void clear( ) { num_rows = num_cols = num_nonzeros = 0; - values = colIndices = rowOffsets = rowBlocks = nullptr; + values = col_indices = row_pointer = rowBlocks = nullptr; rowBlockSize = 0; } @@ -214,7 +214,7 @@ class clsparseCooMatrixPrivate: public clsparseCooMatrix void clear( ) { num_rows = num_cols = num_nonzeros = 0; - values = colIndices = rowIndices = nullptr; + values = col_indices = row_indices = nullptr; } clsparseIdx_t nnz_per_row( ) const diff --git a/src/library/internal/data-types/csr-meta.cpp b/src/library/internal/data-types/csr-meta.cpp index 0018ae3..4626676 100644 --- a/src/library/internal/data-types/csr-meta.cpp +++ b/src/library/internal/data-types/csr-meta.cpp @@ -34,8 +34,8 @@ clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ) return sizeResult; } - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clMemRAII< clsparseIdx_t > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); + clsparseIdx_t* rowDelimiters = rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); sizeResult.metaSize = ComputeRowBlocksSize( rowDelimiters, pCsrMatx->num_rows, BLKSIZE, BLOCK_MULTIPLIER, ROWS_FOR_VECTOR ); return sizeResult; @@ -53,8 +53,8 @@ clsparseCsrMetaCreate( clsparseCsrMatrix* csrMatx, clsparseControl control ) return clsparseOutOfResources; } - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); - clsparseIdx_t* rowDelimiters = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clMemRAII< clsparseIdx_t > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); + clsparseIdx_t* rowDelimiters = rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_READ, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); matrix_meta* meta_ptr = nullptr; if( pCsrMatx->meta ) diff --git a/src/library/io/mm-reader.cpp b/src/library/io/mm-reader.cpp index 7c695a9..48f378a 100644 --- a/src/library/io/mm-reader.cpp +++ b/src/library/io/mm-reader.cpp @@ -498,12 +498,12 @@ clsparseSCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_float > rCooValues( control->queue( ), pCooMatx->values ); - clMemRAII< clsparseIdx_t > rCooColIndices( control->queue( ), pCooMatx->colIndices ); - clMemRAII< clsparseIdx_t > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); + clMemRAII< clsparseIdx_t > rCoocol_indices( control->queue( ), pCooMatx->col_indices ); + clMemRAII< clsparseIdx_t > rCoorow_indices( control->queue( ), pCooMatx->row_indices ); cl_float* fCooValues = rCooValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->valOffset( ), pCooMatx->num_nonzeros ); - clsparseIdx_t* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); - clsparseIdx_t* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCoocol_indices = rCoocol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCoorow_indices = rCoorow_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); Coordinate< cl_float >* coords = mm_reader.GetUnsymCoordinates( ); //JPA:: Coo matrix is need to be sorted as well because we need to have matrix @@ -512,8 +512,8 @@ clsparseSCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl for( clsparseIdx_t c = 0; c < pCooMatx->num_nonzeros; ++c ) { - iCooRowIndices[ c ] = coords[ c ].x; - iCooColIndices[ c ] = coords[ c ].y; + iCoorow_indices[ c ] = coords[ c ].x; + iCoocol_indices[ c ] = coords[ c ].y; fCooValues[ c ] = coords[ c ].val; } @@ -548,12 +548,12 @@ clsparseDCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_double > rCooValues( control->queue( ), pCooMatx->values ); - clMemRAII< clsparseIdx_t > rCooColIndices( control->queue( ), pCooMatx->colIndices ); - clMemRAII< clsparseIdx_t > rCooRowIndices( control->queue( ), pCooMatx->rowIndices ); + clMemRAII< clsparseIdx_t > rCoocol_indices( control->queue( ), pCooMatx->col_indices ); + clMemRAII< clsparseIdx_t > rCoorow_indices( control->queue( ), pCooMatx->row_indices ); cl_double* fCooValues = rCooValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->valOffset( ), pCooMatx->num_nonzeros ); - clsparseIdx_t* iCooColIndices = rCooColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); - clsparseIdx_t* iCooRowIndices = rCooRowIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCoocol_indices = rCoocol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->colIndOffset( ), pCooMatx->num_nonzeros ); + clsparseIdx_t* iCoorow_indices = rCoorow_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCooMatx->rowOffOffset( ), pCooMatx->num_nonzeros ); Coordinate< cl_double >* coords = mm_reader.GetUnsymCoordinates( ); //JPA:: Coo matrix is need to be sorted as well because we need to have matrix @@ -562,8 +562,8 @@ clsparseDCooMatrixfromFile( clsparseCooMatrix* cooMatx, const char* filePath, cl for( clsparseIdx_t c = 0; c < pCooMatx->num_nonzeros; ++c ) { - iCooRowIndices[ c ] = coords[ c ].x; - iCooColIndices[ c ] = coords[ c ].y; + iCoorow_indices[ c ] = coords[ c ].x; + iCoocol_indices[ c ] = coords[ c ].y; fCooValues[ c ] = coords[ c ].val; } @@ -604,12 +604,12 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls if (validationStatus != clsparseSuccess) return validationStatus; - validationStatus = validateMemObject(pCsrMatx->colIndices, + validationStatus = validateMemObject(pCsrMatx->col_indices, mm_reader.GetNumNonZeroes() * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; - validationStatus = validateMemObject(pCsrMatx->rowOffsets, + validationStatus = validateMemObject(pCsrMatx->row_pointer, (mm_reader.GetNumRows() + 1) * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; @@ -625,30 +625,30 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls // Transfers data from CPU buffer to GPU buffers clMemRAII< cl_float > rCsrValues( control->queue( ), pCsrMatx->values ); - clMemRAII< clsparseIdx_t > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clMemRAII< clsparseIdx_t > rCsrcol_indices( control->queue( ), pCsrMatx->col_indices ); + clMemRAII< clsparseIdx_t > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); cl_float* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->valOffset( ), pCsrMatx->num_nonzeros ); - clsparseIdx_t* iCsrColIndices = rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); - clsparseIdx_t* iCsrRowOffsets = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); + clsparseIdx_t* iCsrcol_indices = rCsrcol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); + clsparseIdx_t* iCsrrow_pointer = rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1 ); // The following section of code converts the sparse format from COO to CSR Coordinate< cl_float >* coords = mm_reader.GetUnsymCoordinates( ); std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_float > ); clsparseIdx_t current_row = 1; - iCsrRowOffsets[ 0 ] = 0; + iCsrrow_pointer[ 0 ] = 0; for (clsparseIdx_t i = 0; i < pCsrMatx->num_nonzeros; i++) { - iCsrColIndices[ i ] = coords[ i ].y; + iCsrcol_indices[ i ] = coords[ i ].y; fCsrValues[ i ] = coords[ i ].val; while( coords[ i ].x >= current_row ) - iCsrRowOffsets[ current_row++ ] = i; + iCsrrow_pointer[ current_row++ ] = i; } - iCsrRowOffsets[ current_row ] = pCsrMatx->num_nonzeros; + iCsrrow_pointer[ current_row ] = pCsrMatx->num_nonzeros; while( current_row <= pCsrMatx->num_rows ) - iCsrRowOffsets[ current_row++ ] = pCsrMatx->num_nonzeros; + iCsrrow_pointer[ current_row++ ] = pCsrMatx->num_nonzeros; return clsparseSuccess; } @@ -688,12 +688,12 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl if (validationStatus != clsparseSuccess) return validationStatus; - validationStatus = validateMemObject(pCsrMatx->colIndices, + validationStatus = validateMemObject(pCsrMatx->col_indices, mm_reader.GetNumNonZeroes() * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; - validationStatus = validateMemObject(pCsrMatx->rowOffsets, + validationStatus = validateMemObject(pCsrMatx->row_pointer, (mm_reader.GetNumRows() + 1) * sizeof(clsparseIdx_t)); if (validationStatus != clsparseSuccess) return validationStatus; @@ -708,8 +708,8 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl // Transfers data from CPU buffer to GPU buffers cl_int mapStatus = 0; clMemRAII< cl_double > rCsrValues( control->queue( ), pCsrMatx->values); - clMemRAII< clsparseIdx_t > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); - clMemRAII< clsparseIdx_t > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); + clMemRAII< clsparseIdx_t > rCsrcol_indices( control->queue( ), pCsrMatx->col_indices ); + clMemRAII< clsparseIdx_t > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); cl_double* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, @@ -720,21 +720,21 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl return clsparseInvalidMemObj; } - clsparseIdx_t* iCsrColIndices = - rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, + clsparseIdx_t* iCsrcol_indices = + rCsrcol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros, &mapStatus ); if (mapStatus != CL_SUCCESS) { - CLSPARSE_V(mapStatus, "Error: Mapping rCsrColIndices failed"); + CLSPARSE_V(mapStatus, "Error: Mapping rCsrcol_indices failed"); return clsparseInvalidMemObj; } - clsparseIdx_t* iCsrRowOffsets = - rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, + clsparseIdx_t* iCsrrow_pointer = + rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->num_rows + 1, &mapStatus ); if (mapStatus != CL_SUCCESS) { - CLSPARSE_V(mapStatus, "Error: Mapping rCsrRowOffsets failed"); + CLSPARSE_V(mapStatus, "Error: Mapping rCsrrow_pointer failed"); return clsparseInvalidMemObj; } @@ -743,18 +743,18 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_double > ); clsparseIdx_t current_row = 1; - iCsrRowOffsets[ 0 ] = 0; + iCsrrow_pointer[ 0 ] = 0; for (clsparseIdx_t i = 0; i < pCsrMatx->num_nonzeros; i++) { - iCsrColIndices[ i ] = coords[ i ].y; + iCsrcol_indices[ i ] = coords[ i ].y; fCsrValues[ i ] = coords[ i ].val; while( coords[ i ].x >= current_row ) - iCsrRowOffsets[ current_row++ ] = i; + iCsrrow_pointer[ current_row++ ] = i; } - iCsrRowOffsets[ current_row ] = pCsrMatx->num_nonzeros; + iCsrrow_pointer[ current_row ] = pCsrMatx->num_nonzeros; while( current_row <= pCsrMatx->num_rows ) - iCsrRowOffsets[ current_row++ ] = pCsrMatx->num_nonzeros; + iCsrrow_pointer[ current_row++ ] = pCsrMatx->num_nonzeros; return clsparseSuccess; } @@ -791,28 +791,28 @@ clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, cl // // Transfers data from CPU buffer to GPU buffers // clMemRAII< cl_float > rCsrValues( control->queue( ), pCsrMatx->values ); -// clMemRAII< cl_int > rCsrColIndices( control->queue( ), pCsrMatx->colIndices ); -// clMemRAII< cl_int > rCsrRowOffsets( control->queue( ), pCsrMatx->rowOffsets ); +// clMemRAII< cl_int > rCsrcol_indices( control->queue( ), pCsrMatx->col_indices ); +// clMemRAII< cl_int > rCsrrow_pointer( control->queue( ), pCsrMatx->row_pointer ); // cl_float* fCsrValues = rCsrValues.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->valOffset( ), pCsrMatx->num_nonzeros ); -// cl_int* iCsrColIndices = rCsrColIndices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); -// cl_int* iCsrRowOffsets = rCsrRowOffsets.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->m + 1 ); +// cl_int* iCsrcol_indices = rCsrcol_indices.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->colIndOffset( ), pCsrMatx->num_nonzeros ); +// cl_int* iCsrrow_pointer = rCsrrow_pointer.clMapMem( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, pCsrMatx->rowOffOffset( ), pCsrMatx->m + 1 ); // // The following section of code converts the sparse format from COO to CSR // Coordinate< cl_float >* coords = mm_reader.GetUnsymCoordinates( ); // std::sort( coords, coords + pCsrMatx->num_nonzeros, CoordinateCompare< cl_float > ); // int current_row = 1; -// iCsrRowOffsets[ 0 ] = 0; +// iCsrrow_pointer[ 0 ] = 0; // for( int i = 0; i < pCsrMatx->num_nonzeros; i++ ) // { -// iCsrColIndices[ i ] = coords[ i ].y; +// iCsrcol_indices[ i ] = coords[ i ].y; // fCsrValues[ i ] = coords[ i ].val; // if( coords[ i ].x >= current_row ) -// iCsrRowOffsets[ current_row++ ] = i; +// iCsrrow_pointer[ current_row++ ] = i; // } -// iCsrRowOffsets[ current_row ] = pCsrMatx->num_nonzeros; +// iCsrrow_pointer[ current_row ] = pCsrMatx->num_nonzeros; // return clsparseSuccess; //} diff --git a/src/library/solvers/preconditioners/preconditioner_utils.hpp b/src/library/solvers/preconditioners/preconditioner_utils.hpp index 47eb320..0358312 100644 --- a/src/library/solvers/preconditioners/preconditioner_utils.hpp +++ b/src/library/solvers/preconditioners/preconditioner_utils.hpp @@ -111,8 +111,8 @@ extract_diagonal(cldenseVectorPrivate* pDiag, kWrapper << size << pDiag->values - << pA->rowOffsets - << pA->colIndices + << pA->row_pointer + << pA->col_indices << pA->values; clsparseIdx_t predicted = subwave_size * size; @@ -220,8 +220,8 @@ extract_diagonal(clsparse::vector& pDiag, kWrapper << size << pDiag.data() - << pA->rowOffsets - << pA->colIndices + << pA->row_pointer + << pA->col_indices << pA->values; clsparseIdx_t predicted = subwave_size * size; diff --git a/src/library/transform/clsparse-coo2csr.cpp b/src/library/transform/clsparse-coo2csr.cpp index 8c1a7da..89bb8a2 100644 --- a/src/library/transform/clsparse-coo2csr.cpp +++ b/src/library/transform/clsparse-coo2csr.cpp @@ -40,12 +40,12 @@ clsparseScoo2csr (const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->row_indices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->col_indices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; @@ -80,12 +80,12 @@ clsparseDcoo2csr ( const clsparseCooMatrix* coo, csr->num_nonzeros = coo->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->row_indices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->col_indices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); csr_col_indices = coo_col_indices; diff --git a/src/library/transform/clsparse-csr2coo.cpp b/src/library/transform/clsparse-csr2coo.cpp index f566334..0b3e313 100644 --- a/src/library/transform/clsparse-csr2coo.cpp +++ b/src/library/transform/clsparse-csr2coo.cpp @@ -40,12 +40,12 @@ clsparseScsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->row_indices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->col_indices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; @@ -80,12 +80,12 @@ clsparseDcsr2coo(const clsparseCsrMatrix* csr, coo->num_nonzeros = csr->num_nonzeros; // how to obtain proper type of the matrix indices? int assumed - clsparse::vector csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector csr_col_indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector csr_row_offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector csr_col_indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector csr_values (control, csr->values, csr->num_nonzeros); - clsparse::vector coo_row_indices (control, coo->rowIndices, coo->num_nonzeros); - clsparse::vector coo_col_indices (control, coo->colIndices, coo->num_nonzeros); + clsparse::vector coo_row_indices (control, coo->row_indices, coo->num_nonzeros); + clsparse::vector coo_col_indices (control, coo->col_indices, coo->num_nonzeros); clsparse::vector coo_values (control, coo->values, coo->num_nonzeros); coo_col_indices = csr_col_indices; diff --git a/src/library/transform/clsparse-csr2dense.cpp b/src/library/transform/clsparse-csr2dense.cpp index 9a1f663..7698bc4 100644 --- a/src/library/transform/clsparse-csr2dense.cpp +++ b/src/library/transform/clsparse-csr2dense.cpp @@ -50,8 +50,8 @@ clsparseScsr2dense(const clsparseCsrMatrix* csr, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); @@ -94,8 +94,8 @@ cldenseMatrix* A, return status; - clsparse::vector offsets (control, csr->rowOffsets, csr->num_rows + 1); - clsparse::vector indices (control, csr->colIndices, csr->num_nonzeros); + clsparse::vector offsets (control, csr->row_pointer, csr->num_rows + 1); + clsparse::vector indices (control, csr->col_indices, csr->num_nonzeros); clsparse::vector values (control, csr->values, csr->num_nonzeros); clsparse::vector Avalues (control, A->values, dense_size); diff --git a/src/library/transform/clsparse-dense2csr.cpp b/src/library/transform/clsparse-dense2csr.cpp index 2fbc4c0..b79cdfb 100644 --- a/src/library/transform/clsparse-dense2csr.cpp +++ b/src/library/transform/clsparse-dense2csr.cpp @@ -86,8 +86,8 @@ clsparseSdense2csr(const cldenseMatrix* A, clsparseCsrMatrix* csr, clReleaseMemObject(coo.values); - clReleaseMemObject(coo.colIndices); - clReleaseMemObject(coo.rowIndices); + clReleaseMemObject(coo.col_indices); + clReleaseMemObject(coo.row_indices); return status; } @@ -157,8 +157,8 @@ clsparseDdense2csr(const cldenseMatrix* A, clReleaseMemObject(coo.values); - clReleaseMemObject(coo.colIndices); - clReleaseMemObject(coo.rowIndices); + clReleaseMemObject(coo.col_indices); + clReleaseMemObject(coo.row_indices); return status; diff --git a/src/library/transform/conversion-utils.hpp b/src/library/transform/conversion-utils.hpp index 869ce38..2e64f95 100644 --- a/src/library/transform/conversion-utils.hpp +++ b/src/library/transform/conversion-utils.hpp @@ -414,11 +414,11 @@ dense_to_coo(clsparseCooMatrix* coo, coo->num_nonzeros * sizeof(V), NULL, &cl_status ); CLSPARSE_V(cl_status, "Create coo values buffer"); - coo->colIndices = clCreateBuffer( control->getContext()(), CL_MEM_READ_WRITE, + coo->col_indices = clCreateBuffer( control->getContext()(), CL_MEM_READ_WRITE, coo->num_nonzeros * sizeof(I), NULL, &cl_status ); CLSPARSE_V(cl_status, "Create coo col indices buffer"); - coo->rowIndices = clCreateBuffer(control->getContext()(), CL_MEM_READ_WRITE, + coo->row_indices = clCreateBuffer(control->getContext()(), CL_MEM_READ_WRITE, coo->num_nonzeros * sizeof(I), NULL, &cl_status ); CLSPARSE_V(cl_status, "Create coo row indices buffer"); @@ -467,8 +467,8 @@ dense_to_coo(clsparseCooMatrix* coo, << A.data() << nnz_locations.data() << coo_indexes.data() - << coo->rowIndices - << coo->colIndices + << coo->row_indices + << coo->col_indices << coo->values; cl::NDRange local(workgroup_size); diff --git a/src/tests/resources/csr_matrix_environment.h b/src/tests/resources/csr_matrix_environment.h index 8a09c72..1051c1f 100644 --- a/src/tests/resources/csr_matrix_environment.h +++ b/src/tests/resources/csr_matrix_environment.h @@ -67,10 +67,10 @@ class CSREnvironment: public ::testing::Environment csrDMatrix.values = ::clCreateBuffer( context, CL_MEM_READ_ONLY, csrDMatrix.num_nonzeros * sizeof( cl_double ), NULL, &status ); - csrDMatrix.colIndices = ::clCreateBuffer( context, CL_MEM_READ_ONLY, + csrDMatrix.col_indices = ::clCreateBuffer( context, CL_MEM_READ_ONLY, csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &status); - csrDMatrix.rowOffsets = ::clCreateBuffer( context, CL_MEM_READ_ONLY, + csrDMatrix.row_pointer = ::clCreateBuffer( context, CL_MEM_READ_ONLY, (csrDMatrix.num_rows + 1) * sizeof( clsparseIdx_t ), NULL, &status); clsparseStatus fileError = clsparseDCsrMatrixfromFile( &csrDMatrix, file_name.c_str( ), CLSE::control, read_explicit_zeroes ); @@ -100,12 +100,12 @@ class CSREnvironment: public ::testing::Environment ublasDCsr.value_data().begin( ), 0, NULL, NULL ); - copy_status = clEnqueueReadBuffer( queue, csrDMatrix.rowOffsets, CL_TRUE, 0, + copy_status = clEnqueueReadBuffer( queue, csrDMatrix.row_pointer, CL_TRUE, 0, ( csrDMatrix.num_rows + 1 ) * sizeof( clsparseIdx_t ), ublasDCsr.index1_data().begin(), 0, NULL, NULL ); - copy_status = clEnqueueReadBuffer( queue, csrDMatrix.colIndices, CL_TRUE, 0, + copy_status = clEnqueueReadBuffer( queue, csrDMatrix.col_indices, CL_TRUE, 0, csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), ublasDCsr.index2_data().begin(), 0, NULL, NULL ); @@ -122,11 +122,11 @@ class CSREnvironment: public ::testing::Environment csrSMatrix.num_cols = csrDMatrix.num_cols; csrSMatrix.num_rows = csrDMatrix.num_rows; - csrSMatrix.colIndices = csrDMatrix.colIndices; - ::clRetainMemObject( csrSMatrix.colIndices ); + csrSMatrix.col_indices = csrDMatrix.col_indices; + ::clRetainMemObject( csrSMatrix.col_indices ); - csrSMatrix.rowOffsets = csrDMatrix.rowOffsets; - ::clRetainMemObject( csrSMatrix.rowOffsets ); + csrSMatrix.row_pointer = csrDMatrix.row_pointer; + ::clRetainMemObject( csrSMatrix.row_pointer ); // Don't use adaptive kernel in double precision yet. clsparseCsrMetaCreate( &csrSMatrix, CLSE::control ); @@ -187,11 +187,11 @@ class CSREnvironment: public ::testing::Environment { //release buffers; ::clReleaseMemObject( csrSMatrix.values ); - ::clReleaseMemObject( csrSMatrix.colIndices ); - ::clReleaseMemObject( csrSMatrix.rowOffsets ); + ::clReleaseMemObject( csrSMatrix.col_indices ); + ::clReleaseMemObject( csrSMatrix.row_pointer ); ::clReleaseMemObject( csrDMatrix.values ); - ::clReleaseMemObject( csrDMatrix.colIndices ); - ::clReleaseMemObject( csrDMatrix.rowOffsets ); + ::clReleaseMemObject( csrDMatrix.col_indices ); + ::clReleaseMemObject( csrDMatrix.row_pointer ); clsparseCsrMetaDelete( &csrSMatrix ); clsparseCsrMetaDelete( &csrDMatrix ); diff --git a/src/tests/resources/sparse_matrix_environment.h b/src/tests/resources/sparse_matrix_environment.h index be8fa80..e30f73f 100644 --- a/src/tests/resources/sparse_matrix_environment.h +++ b/src/tests/resources/sparse_matrix_environment.h @@ -63,10 +63,10 @@ class CSRSparseEnvironment : public ::testing::Environment { csrSMatrix.values = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrix.num_nonzeros * sizeof(cl_float), NULL, &status); - csrSMatrix.colIndices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrix.col_indices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrix.num_nonzeros * sizeof(cl_int), NULL, &status); - csrSMatrix.rowOffsets = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrix.row_pointer = ::clCreateBuffer(context, CL_MEM_READ_ONLY, (csrSMatrix.num_rows + 1) * sizeof(cl_int), NULL, &status); clsparseStatus fileError = clsparseSCsrMatrixfromFile(&csrSMatrix, file_name.c_str(), CLSE::control, explicit_zeroes); @@ -90,12 +90,12 @@ class CSRSparseEnvironment : public ::testing::Environment { ublasSCsr.value_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueReadBuffer(queue, csrSMatrix.rowOffsets, CL_TRUE, 0, + copy_status = clEnqueueReadBuffer(queue, csrSMatrix.row_pointer, CL_TRUE, 0, (csrSMatrix.num_rows + 1) * sizeof(cl_int), ublasSCsr.index1_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueReadBuffer(queue, csrSMatrix.colIndices, CL_TRUE, 0, + copy_status = clEnqueueReadBuffer(queue, csrSMatrix.col_indices, CL_TRUE, 0, csrSMatrix.num_nonzeros * sizeof(cl_int), ublasSCsr.index2_data().begin(), 0, NULL, NULL); @@ -121,12 +121,12 @@ class CSRSparseEnvironment : public ::testing::Environment { ublasSCsr.value_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrix.rowOffsets, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrix.row_pointer, CL_TRUE, 0, (csrSMatrix.num_rows + 1) * sizeof(cl_int), ublasSCsr.index1_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrix.colIndices, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrix.col_indices, CL_TRUE, 0, csrSMatrix.num_nonzeros * sizeof(cl_int), ublasSCsr.index2_data().begin(), 0, NULL, NULL); @@ -189,10 +189,10 @@ class CSRSparseEnvironment : public ::testing::Environment { csrSMatrixA.values = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrixA.num_nonzeros * sizeof(cl_float), NULL, &status); - csrSMatrixA.colIndices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrixA.col_indices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrixA.num_nonzeros * sizeof(cl_int), NULL, &status); - csrSMatrixA.rowOffsets = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrixA.row_pointer = ::clCreateBuffer(context, CL_MEM_READ_ONLY, (csrSMatrixA.num_rows + 1) * sizeof(cl_int), NULL, &status); //load data to device @@ -201,12 +201,12 @@ class CSRSparseEnvironment : public ::testing::Environment { ublasSCsrA.value_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrixA.rowOffsets, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrixA.row_pointer, CL_TRUE, 0, (csrSMatrixA.num_rows + 1) * sizeof(cl_int), ublasSCsrA.index1_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrixA.colIndices, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrixA.col_indices, CL_TRUE, 0, csrSMatrixA.num_nonzeros * sizeof(cl_int), ublasSCsrA.index2_data().begin(), 0, NULL, NULL); @@ -222,10 +222,10 @@ class CSRSparseEnvironment : public ::testing::Environment { csrSMatrixB.values = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrixB.num_nonzeros * sizeof(cl_float), NULL, &status); - csrSMatrixB.colIndices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrixB.col_indices = ::clCreateBuffer(context, CL_MEM_READ_ONLY, csrSMatrixB.num_nonzeros * sizeof(cl_int), NULL, &status); - csrSMatrixB.rowOffsets = ::clCreateBuffer(context, CL_MEM_READ_ONLY, + csrSMatrixB.row_pointer = ::clCreateBuffer(context, CL_MEM_READ_ONLY, (csrSMatrixB.num_rows + 1) * sizeof(cl_int), NULL, &status); //load data to device @@ -234,12 +234,12 @@ class CSRSparseEnvironment : public ::testing::Environment { ublasSCsrB.value_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrixB.rowOffsets, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrixB.row_pointer, CL_TRUE, 0, (csrSMatrixB.num_rows + 1) * sizeof(cl_int), ublasSCsrB.index1_data().begin(), 0, NULL, NULL); - copy_status = clEnqueueWriteBuffer(queue, csrSMatrixB.colIndices, CL_TRUE, 0, + copy_status = clEnqueueWriteBuffer(queue, csrSMatrixB.col_indices, CL_TRUE, 0, csrSMatrixB.num_nonzeros * sizeof(cl_int), ublasSCsrB.index2_data().begin(), 0, NULL, NULL); @@ -273,8 +273,8 @@ class CSRSparseEnvironment : public ::testing::Environment { { //release buffers; ::clReleaseMemObject(csrSMatrix.values); - ::clReleaseMemObject(csrSMatrix.colIndices); - ::clReleaseMemObject(csrSMatrix.rowOffsets); + ::clReleaseMemObject(csrSMatrix.col_indices); + ::clReleaseMemObject(csrSMatrix.row_pointer); //bring csrSMatrix to its initial state clsparseInitCsrMatrix(&csrSMatrix); diff --git a/src/tests/test-blas3.cpp b/src/tests/test-blas3.cpp index ae3bdec..a18c4f2 100644 --- a/src/tests/test-blas3.cpp +++ b/src/tests/test-blas3.cpp @@ -113,13 +113,13 @@ class TestCSRSpGeMM : public ::testing::Test { void TearDown() { ::clReleaseMemObject(csrMatrixC.values); - ::clReleaseMemObject(csrMatrixC.colIndices); - ::clReleaseMemObject(csrMatrixC.rowOffsets); + ::clReleaseMemObject(csrMatrixC.col_indices); + ::clReleaseMemObject(csrMatrixC.row_pointer); clsparseInitCsrMatrix(&csrMatrixC); }// end - void checkRowOffsets(std::vector& amdRowPtr) + void checkrow_pointer(std::vector& amdRowPtr) { for (clsparseIdx_t i = 0; i < amdRowPtr.size(); i++) { @@ -127,13 +127,13 @@ class TestCSRSpGeMM : public ::testing::Test { //EXPECT_EQ(amdRowPtr[i], this->C.index1_data()[i]); if (amdRowPtr[i] != this->C.index1_data()[i]) { - this->browOffsetsMisFlag = true; + this->brow_pointerMisFlag = true; break; } } }// end - void checkInDense(std::vector& amdRowPtr, std::vector& amdColIndices, std::vector& amdVals) + void checkInDense(std::vector& amdRowPtr, std::vector& amdcol_indices, std::vector& amdVals) { uBLAS::mapped_matrix sparseDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); uBLAS::mapped_matrix boostDense(csrMatrixC.num_rows, csrMatrixC.num_cols, 0); @@ -147,7 +147,7 @@ class TestCSRSpGeMM : public ::testing::Test { { // i corresponds to row index for (clsparseIdx_t j = amdRowPtr[i]; j < amdRowPtr[i + 1]; j++) - sparseDense(i, amdColIndices[j]) = amdVals[j]; + sparseDense(i, amdcol_indices[j]) = amdVals[j]; } for (clsparseIdx_t i = 0; i < this->C.index1_data().size() - 1; i++) @@ -212,7 +212,7 @@ class TestCSRSpGeMM : public ::testing::Test { typedef typename uBLAS::compressed_matrix > uBlasCSRM; uBlasCSRM C; - bool browOffsetsMisFlag; + bool brow_pointerMisFlag; clsparseCsrMatrix csrMatrixC; }; // End of class TestCSRSpGeMM @@ -246,7 +246,7 @@ TYPED_TEST(TestCSRSpGeMM, square) //std::cout << "nnz =" << (this->csrMatrixC).num_nonzeros << std::endl; std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultcol_indices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -261,14 +261,14 @@ TYPED_TEST(TestCSRSpGeMM, square) cl_status = clEnqueueReadBuffer(CLSE::queue, - this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultColIndices.data(), 0, NULL, NULL); + this->csrMatrixC.col_indices, CL_TRUE, 0, + (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultcol_indices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, - this->csrMatrixC.rowOffsets, CL_TRUE, 0, + this->csrMatrixC.row_pointer, CL_TRUE, 0, ((this->csrMatrixC).num_rows + 1) * sizeof(clsparseIdx_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -300,20 +300,20 @@ TYPED_TEST(TestCSRSpGeMM, square) { ASSERT_EQ(resultRowPtr[i], this->C.index1_data()[i]); }*/ - this->browOffsetsMisFlag = false; - this->checkRowOffsets(resultRowPtr); + this->brow_pointerMisFlag = false; + this->checkrow_pointer(resultRowPtr); //if (::testing::Test::HasFailure()) - if (this->browOffsetsMisFlag == true) + if (this->brow_pointerMisFlag == true) { // Check the values in Dense format - this->checkInDense(resultRowPtr, resultColIndices, resultVals); + this->checkInDense(resultRowPtr, resultcol_indices, resultVals); } else { /* Check Col Indices */ - for (clsparseIdx_t i = 0; i < resultColIndices.size(); i++) + for (clsparseIdx_t i = 0; i < resultcol_indices.size(); i++) { - ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); + ASSERT_EQ(resultcol_indices[i], this->C.index2_data()[i]); } /* Check Values */ @@ -326,7 +326,7 @@ TYPED_TEST(TestCSRSpGeMM, square) ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size()); //Rest of the col_indices should be zero - for (size_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) + for (size_t i = resultcol_indices.size(); i < this->C.index2_data().size(); i++) { ASSERT_EQ(0, this->C.index2_data()[i]); } @@ -381,7 +381,7 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) std::vector resultRowPtr((this->csrMatrixC).num_rows + 1); // Get row ptr of Output CSR matrix - std::vector resultColIndices((this->csrMatrixC).num_nonzeros); // Col Indices + std::vector resultcol_indices((this->csrMatrixC).num_nonzeros); // Col Indices std::vector resultVals((this->csrMatrixC).num_nonzeros); // Values this->C = uBlasCSRM((this->csrMatrixC).num_rows, (this->csrMatrixC).num_cols, (this->csrMatrixC).num_nonzeros); @@ -396,14 +396,14 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) cl_status = clEnqueueReadBuffer(CLSE::queue, - this->csrMatrixC.colIndices, CL_TRUE, 0, - (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultColIndices.data(), 0, NULL, NULL); + this->csrMatrixC.col_indices, CL_TRUE, 0, + (this->csrMatrixC).num_nonzeros * sizeof(clsparseIdx_t), resultcol_indices.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); cl_status = clEnqueueReadBuffer(CLSE::queue, - this->csrMatrixC.rowOffsets, CL_TRUE, 0, + this->csrMatrixC.row_pointer, CL_TRUE, 0, ((this->csrMatrixC).num_rows + 1) * sizeof(clsparseIdx_t), resultRowPtr.data(), 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, cl_status); @@ -415,20 +415,20 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) this->C = uBLAS::sparse_prod(SPER::ublasSCsr, SPER::ublasSCsr, this->C); } - this->browOffsetsMisFlag = false; - this->checkRowOffsets(resultRowPtr); + this->brow_pointerMisFlag = false; + this->checkrow_pointer(resultRowPtr); //if (::testing::Test::HasFailure()) - if (this->browOffsetsMisFlag == true) + if (this->brow_pointerMisFlag == true) { // Check the values in Dense format - this->checkInDense(resultRowPtr, resultColIndices, resultVals); + this->checkInDense(resultRowPtr, resultcol_indices, resultVals); } else { /* Check Col Indices */ - for (clsparseIdx_t i = 0; i < resultColIndices.size(); i++) + for (clsparseIdx_t i = 0; i < resultcol_indices.size(); i++) { - ASSERT_EQ(resultColIndices[i], this->C.index2_data()[i]); + ASSERT_EQ(resultcol_indices[i], this->C.index2_data()[i]); } /* Check Values */ @@ -441,7 +441,7 @@ TYPED_TEST(TestCSRSpGeMM, Powersof2) ASSERT_EQ(resultRowPtr.size(), this->C.index1_data().size()); //Rest of the col_indices should be zero - for (clsparseIdx_t i = resultColIndices.size(); i < this->C.index2_data().size(); i++) + for (clsparseIdx_t i = resultcol_indices.size(); i < this->C.index2_data().size(); i++) { ASSERT_EQ(0, this->C.index2_data()[i]); } diff --git a/src/tests/test-conversion.cpp b/src/tests/test-conversion.cpp index 910917b..55d15cc 100644 --- a/src/tests/test-conversion.cpp +++ b/src/tests/test-conversion.cpp @@ -205,11 +205,11 @@ class MatrixConversion : public ::testing::Test CSRE::ublasSCsr.value_data().size() * sizeof( T ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); - csrMatx.rowOffsets = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, + csrMatx.row_pointer = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, CSRE::ublasSCsr.index1_data().size() * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); - csrMatx.colIndices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, + csrMatx.col_indices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, CSRE::ublasSCsr.index2_data().size() * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -237,7 +237,7 @@ class MatrixConversion : public ::testing::Test // Compare row_offsets - cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.rowOffsets, CL_TRUE, 0, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.row_pointer, CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -247,7 +247,7 @@ class MatrixConversion : public ::testing::Test // Compare col indices - cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.colIndices, CL_TRUE, 0, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.col_indices, CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -257,8 +257,8 @@ class MatrixConversion : public ::testing::Test // Release csrMatrix data cl_status = ::clReleaseMemObject(csrMatx.values); - cl_status = ::clReleaseMemObject(csrMatx.colIndices); - cl_status = ::clReleaseMemObject(csrMatx.rowOffsets); + cl_status = ::clReleaseMemObject(csrMatx.col_indices); + cl_status = ::clReleaseMemObject(csrMatx.row_pointer); } @@ -284,11 +284,11 @@ class MatrixConversion : public ::testing::Test CSRE::csrDMatrix.num_nonzeros * sizeof( T ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); - csrMatx.rowOffsets = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, + csrMatx.row_pointer = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, ( CSRE::csrDMatrix.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); - csrMatx.colIndices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, + csrMatx.col_indices = ::clCreateBuffer( CLSE::context, CL_MEM_READ_ONLY, CSRE::csrDMatrix.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status ); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -317,7 +317,7 @@ class MatrixConversion : public ::testing::Test // Compare row_offsets - cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.rowOffsets, CL_TRUE, 0, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.row_pointer, CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -327,7 +327,7 @@ class MatrixConversion : public ::testing::Test // Compare col indices - cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.colIndices, CL_TRUE, 0, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, csrMatx.col_indices, CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -337,8 +337,8 @@ class MatrixConversion : public ::testing::Test // Release csrMatrix data cl_status = ::clReleaseMemObject(csrMatx.values); - cl_status = ::clReleaseMemObject(csrMatx.colIndices); - cl_status = ::clReleaseMemObject(csrMatx.rowOffsets); + cl_status = ::clReleaseMemObject(csrMatx.col_indices); + cl_status = ::clReleaseMemObject(csrMatx.row_pointer); } @@ -370,13 +370,13 @@ class MatrixConversion : public ::testing::Test cooMatrix.num_cols = num_cols; cooMatrix.num_rows = num_rows; - cooMatrix.colIndices = + cooMatrix.col_indices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_ONLY, cooMatrix.num_nonzeros * sizeof(clsparseIdx_t), NULL, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); - cooMatrix.rowIndices = + cooMatrix.row_indices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_ONLY, cooMatrix.num_nonzeros * sizeof(clsparseIdx_t), NULL, &cl_status); @@ -420,7 +420,7 @@ class MatrixConversion : public ::testing::Test for (clsparseIdx_t i = 0; i < values.size(); i++) EXPECT_FLOAT_EQ(values[i], CSRE::ublasSCsr.value_data()[i]); - cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.colIndices, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.col_indices, CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -430,7 +430,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(col_indices[i], CSRE::ublasSCsr.index2_data()[i]); - cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.rowOffsets, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrSMatrix.row_pointer, CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -476,7 +476,7 @@ class MatrixConversion : public ::testing::Test EXPECT_DOUBLE_EQ(values[i], CSRE::ublasDCsr.value_data()[i]); - cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.colIndices, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.col_indices, CL_TRUE, 0, col_indices.size() * sizeof(clsparseIdx_t), col_indices.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -486,7 +486,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(col_indices[i], CSRE::ublasDCsr.index2_data()[i]); - cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.rowOffsets, + cl_status = ::clEnqueueReadBuffer(CLSE::queue, CSRE::csrDMatrix.row_pointer, CL_TRUE, 0, row_offsets.size() * sizeof(clsparseIdx_t), row_offsets.data(), 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -497,9 +497,9 @@ class MatrixConversion : public ::testing::Test } - cl_status = ::clReleaseMemObject(cooMatrix.colIndices); + cl_status = ::clReleaseMemObject(cooMatrix.col_indices); ASSERT_EQ(CL_SUCCESS, cl_status); - cl_status = ::clReleaseMemObject(cooMatrix.rowIndices); + cl_status = ::clReleaseMemObject(cooMatrix.row_indices); ASSERT_EQ(CL_SUCCESS, cl_status); cl_status = ::clReleaseMemObject(cooMatrix.values); ASSERT_EQ(CL_SUCCESS, cl_status); @@ -518,13 +518,13 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(clsparseSuccess, status); - cooMatrix.colIndices = + cooMatrix.col_indices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, CSRE::csrSMatrix.num_nonzeros * sizeof(clsparseIdx_t), nullptr, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); - cooMatrix.rowIndices = + cooMatrix.row_indices = ::clCreateBuffer(CLSE::context, CL_MEM_READ_WRITE, CSRE::csrSMatrix.num_nonzeros * sizeof(clsparseIdx_t), nullptr, &cl_status); @@ -573,7 +573,7 @@ class MatrixConversion : public ::testing::Test std::vector values(cooMatrix.num_nonzeros); // row indices - cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.rowIndices, + cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.row_indices, CL_TRUE, 0, row_indices.size() * sizeof( clsparseIdx_t ), row_indices.data(), 0, nullptr, nullptr); @@ -583,7 +583,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(coo_rows[i], row_indices[i]); // col indices - cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.colIndices, + cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.col_indices, CL_TRUE, 0, col_indices.size() * sizeof( clsparseIdx_t ), col_indices.data(), 0, nullptr, nullptr); @@ -649,7 +649,7 @@ class MatrixConversion : public ::testing::Test // row indices - cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.rowIndices, + cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.row_indices, CL_TRUE, 0, row_indices.size() * sizeof( clsparseIdx_t ), row_indices.data(), 0, nullptr, nullptr); @@ -659,7 +659,7 @@ class MatrixConversion : public ::testing::Test ASSERT_EQ(coo_rows[i], row_indices[i]); // col indices - cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.colIndices, + cl_status = clEnqueueReadBuffer(CLSE::queue, cooMatrix.col_indices, CL_TRUE, 0, col_indices.size() * sizeof( clsparseIdx_t ), col_indices.data(), 0, nullptr, nullptr); @@ -683,9 +683,9 @@ class MatrixConversion : public ::testing::Test delete[] coo_vals; } - cl_status = ::clReleaseMemObject(cooMatrix.colIndices); + cl_status = ::clReleaseMemObject(cooMatrix.col_indices); ASSERT_EQ(CL_SUCCESS, cl_status); - cl_status = ::clReleaseMemObject(cooMatrix.rowIndices); + cl_status = ::clReleaseMemObject(cooMatrix.row_indices); ASSERT_EQ(CL_SUCCESS, cl_status); cl_status = ::clReleaseMemObject(cooMatrix.values); ASSERT_EQ(CL_SUCCESS, cl_status); From 16adb73338ee43fed63ac30bd09f599e1f16a135 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Mon, 11 Jan 2016 18:13:28 -0600 Subject: [PATCH 18/19] Updated readme for v0.10 Bumped cmake version to v.10 --- CMakeLists.txt | 4 ++-- README.md | 36 +++++++++++++++++++++++++++--------- src/CMakeLists.txt | 4 ++-- src/include/clSPARSE.h | 8 ++++---- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e35ee49..8958f68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ endif() if( POLICY CMP0048 ) cmake_policy( SET CMP0048 NEW ) - project( SuperBuild.clSPARSE VERSION 0.9.0.0 ) + project( SuperBuild.clSPARSE VERSION 0.10.0.0 ) else( ) project( SuperBuild.clSPARSE ) @@ -48,7 +48,7 @@ else( ) endif( ) if( NOT DEFINED SuperBuild.clSPARSE_VERSION_MINOR ) - set( SuperBuild.clSPARSE_VERSION_MINOR 9 ) + set( SuperBuild.clSPARSE_VERSION_MINOR 10 ) endif( ) if( NOT DEFINED SuperBuild.clSPARSE_VERSION_PATCH ) diff --git a/README.md b/README.md index 1743775..09b117a 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,26 @@ an OpenCL™ library implementing Sparse linear algebra routines. This proj a collaboration between [AMD Inc.](http://www.amd.com/) and [Vratis Ltd.](http://www.vratis.com/). -### What's new in clSPARSE **v0.8** -- New single precision SpM-SpM (SpGEMM) function -- Optimizations to the sparse matrix conversion routines -- [API documentation](http://clmathlibraries.github.io/clSPARSE/) available -- SpM-dV routines now provide [higher precision accuracy] (https://github.com/clMathLibraries/clSPARSE/wiki/Precision) -- Various bug fixes integrated +### What's new in clSPARSE **v0.10** +**This release introduces breaking API changes from the prior version**. clSPARSE is still in a beta phase, and we may need to change the API at times to increase maintainability or fix design issues. A few changes are introduced to more closely follow the recently published [Cpp Core Guidelines](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines.html), a strong candidate to be the coding guidelines to be used in clSPARSE. Changes are noted below. +- The API to create meta data for a csr encoded sparse matrix has changed. This is an attempt to hide the implementation details of how meta data is stored from the user. This allows the library freedom to change and iterate meta data without breaking clients. + - `clsparseCsrMetaCompute()` renamed to `clsparseCsrMetaCreate()`, to more intuitively pair with the + - New API `clsparseCsrMetaDelete()` +- A few routines changed pure 'out' parameters to be returned on the stack as structs[1](#return-tuple) + - `clsparseCreateControl()` + - `clsparseGetEvent()` + - `clsparseCreateSolverControl()` + - `clsparseCsrMetaSize()` +- A new index type has been introduced `clsparseIdx_t` to abstract the size of an index from the library interface; the only choice currently is 4 bytes. If users use this datatype for indices in their code, changing to 8-byte indices in the future should only be a recompile. +- The names of member variables in our public structs have been renamed for consistency. Before, our member variables was not consistent with camel case and underscore naming. Member variables are now standardized to use underscores, but we keep camel casing for function and struct names[2](#consistent-naming) + - `colIndices` to `col_indices` + - `rowIndices` to `row_indices` + - `rowOffsets` to `row_pointer` (renamed to pointer to remove confusion with buffer offsets for cl1.2) + - `offValues` to `off_values` + - `offColInd` to `off_col_indices` + - `offRowOff` to `off_row_pointer` + - `offValues` to `off_values` +- All samples have been changed to compile with the above changes. ## clSPARSE features @@ -34,7 +48,7 @@ projects to build wrappers around clSPARSE in any language they need. A great d of thought and effort went into designing the API’s to make them less ‘cluttered’ compared to the older clMath libraries. OpenCL state is not explicitly passed through the API, which enables the library to be forward compatible when users are -ready to switch from OpenCL 1.2 to OpenCL 2.0 [1](#opencl-2) +ready to switch from OpenCL 1.2 to OpenCL 2.0 [3](#opencl-2) ### Google Groups Two mailing lists have been created for the clMath projects: @@ -105,5 +119,9 @@ clSPARSE is licensed under the [Apache License, Version 2.0](http://www.apache.o - Googletest v1.7 - Boost v1.58 -## Clarifications -[1]: OpenCL 2.0 support is not yet fully implemented; only the interfaces have been designed +## Footnotes +[1]: Changed to reflect CppCoreGuidelines: [F.21](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines.html#a-namerf-out-multia-f21-to-return-multiple-out-values-prefer-returning-a-tuple-or-struct) + +[2]: Changed to reflect CppCoreGuidelines: [NL.8](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines.html#a-namerl-namea-nl8-use-a-consistent-naming-style) + +[3]: OpenCL 2.0 support is not yet fully implemented; only the interfaces have been designed diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09ac4e2..08f61dd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,7 +37,7 @@ endif() # clSPARSE becomes the name of the project with a particular version if( POLICY CMP0048 ) cmake_policy( SET CMP0048 NEW ) - project( clSPARSE VERSION 0.9.0.0 LANGUAGES C CXX ) + project( clSPARSE VERSION 0.10.0.0 LANGUAGES C CXX ) else( ) project( clSPARSE C CXX ) # Define a version for the code @@ -46,7 +46,7 @@ else( ) endif( ) if( NOT DEFINED clSPARSE_VERSION_MINOR ) - set( clSPARSE_VERSION_MINOR 9 ) + set( clSPARSE_VERSION_MINOR 10 ) endif( ) if( NOT DEFINED clSPARSE_VERSION_PATCH ) diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index d50fdb2..3227c96 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -227,8 +227,8 @@ extern "C" { */ typedef struct _clsparseCreateResult { - clsparseControl control; clsparseStatus status; + clsparseControl control; } clsparseCreateResult; /*! @@ -276,8 +276,8 @@ extern "C" { */ typedef struct _clsparseEventResult { - cl_event event; clsparseStatus status; + cl_event event; } clsparseEventResult; /*! @@ -352,8 +352,8 @@ extern "C" { */ typedef struct _clsparseCreateSolverResult { - clSParseSolverControl control; clsparseStatus status; + clSParseSolverControl control; } clsparseCreateSolverResult; /*! @@ -619,8 +619,8 @@ extern "C" { */ typedef struct _clsparseMetaSizeResult { - clsparseIdx_t metaSize; clsparseStatus status; + clsparseIdx_t metaSize; } clsparseMetaSizeResult; /*! From 95f3a4d631165eeb61db8c1ca653123ec6d53ba7 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 13 Jan 2016 14:55:37 -0600 Subject: [PATCH 19/19] Fixes to API documentation --- docs/Doxyfile | 2 +- src/include/clSPARSE-xx.h | 5 ++++- src/include/clSPARSE.h | 40 +++++++++++++++++---------------------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/docs/Doxyfile b/docs/Doxyfile index ab0069c..9b76fab 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = clSPARSE # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = v0.8.0.0 +PROJECT_NUMBER = v0.10.0.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/src/include/clSPARSE-xx.h b/src/include/clSPARSE-xx.h index badf974..b8b4b1c 100644 --- a/src/include/clSPARSE-xx.h +++ b/src/include/clSPARSE-xx.h @@ -39,8 +39,11 @@ typedef enum _cldenseMajor } cldenseMajor; +/*! \brief An abstraction for the size of indices supported by the library. Clients should use this + * index type when declaring their own indices and using the library. +*/ #if( CLSPARSE_INDEX_SIZEOF == 8 ) -#error clSPARSE does not yet implement 64-bit indices +#error clSPARSE does not yet implement 64-bit indices typedef cl_ulong clsparseIdx_t; #else typedef cl_uint clsparseIdx_t; diff --git a/src/include/clSPARSE.h b/src/include/clSPARSE.h index 3227c96..0af7f5a 100644 --- a/src/include/clSPARSE.h +++ b/src/include/clSPARSE.h @@ -217,7 +217,7 @@ extern "C" { /*! \brief clsparseControl keeps OpenCL state like kernel execution, * memory allocation and synchronization behavior - * \details Struct implementation hidden to clients using C PIMPL idiom + * \details Struct implementation hidden to clients using C PIMPL idiom * to make private to library */ typedef struct _clsparseControl* clsparseControl; @@ -227,17 +227,16 @@ extern "C" { */ typedef struct _clsparseCreateResult { - clsparseStatus status; - clsparseControl control; + clsparseStatus status; /*!< Returned error code */ + clsparseControl control; /*!< Returned control object that abstracts clsparse state */ } clsparseCreateResult; /*! - * \brief setup the clsparse control object from external OpenCL queue + * \brief Setup the clsparse control object from external OpenCL queue * * \param[in] queue cl_command_queue created - * \param[out] status clsparse error return value from function * - * \returns \b On successful completion, a valid clsparseControl object + * \returns \b A clsparseCreateResult which contains a return code and result handle * * \ingroup STATE */ @@ -276,18 +275,16 @@ extern "C" { */ typedef struct _clsparseEventResult { - clsparseStatus status; - cl_event event; + clsparseStatus status; /*!< Returned error code */ + cl_event event; /*!< Returned event object client can synchronize with */ } clsparseEventResult; /*! * \brief Return an event from the last kernel execution * * \param[in] control A valid clsparseControl created with clsparseCreateControl - * \param[out] event The returned event for the last kernel queued into the cl_command_queue inside the - * clsparseControl object * - * \returns \b clsparseSuccess + * \returns \b A clsparseEventResult which contains a return code and result handle * * \ingroup STATE */ @@ -352,8 +349,8 @@ extern "C" { */ typedef struct _clsparseCreateSolverResult { - clsparseStatus status; - clSParseSolverControl control; + clsparseStatus status; /*!< Returned error code */ + clSParseSolverControl control; /*!< Returned control object that abstracts clsparse state */ } clsparseCreateSolverResult; /*! @@ -614,22 +611,20 @@ extern "C" { clsparseDCsrMatrixfromFile( clsparseCsrMatrix* csrMatx, const char* filePath, clsparseControl control, cl_bool read_explicit_zeroes ); /*! \brief A structure returned by value from the clsparseCsrMetaSize - * function. This serves as a result/status pair for the size of the + * function. This serves as a result/status pair for the size of the * meta data associated with a sparse matrix. */ typedef struct _clsparseMetaSizeResult { - clsparseStatus status; - clsparseIdx_t metaSize; + clsparseStatus status; /*!< Returned error code */ + clsparseIdx_t metaSize; /*!< Returned size of the memory needed to store meta data */ } clsparseMetaSizeResult; /*! * \brief Calculate the amount of device memory required to hold meta-data for csr-adaptive SpM-dV algorithm * \details CSR-adaptive is a high performance sparse matrix times dense vector algorithm. It requires a pre-processing * step to calculate meta-data on the sparse matrix. This meta-data is stored alongside and carried along - * with the other matrix data. This function initializes the rowBlockSize member variable of the csrMatx - * variable with the appropriate size. The client program is responsible to allocate device memory in rowBlocks - * of this size before calling into the library compute routines. + * with the other matrix data. * \param[in,out] csrMatx The CSR sparse structure that represents the matrix in device memory * \param[in] control A valid clsparseControl created with clsparseCreateControl * @@ -639,11 +634,11 @@ extern "C" { clsparseCsrMetaSize( clsparseCsrMatrix* csrMatx, clsparseControl control ); /*! - * \brief Calculate the meta-data for csr-adaptive SpM-dV algorithm + * \brief Allocate memory and calculate the meta-data for csr-adaptive SpM-dV algorithm * \details CSR-adaptive is a high performance sparse matrix times dense vector algorithm. It requires a pre-processing * step to calculate meta-data on the sparse matrix. This meta-data is stored alongside and carried along - * with the other matrix data. This function calculates the meta data and stores it into the rowBlocks member of - * the clsparseCsrMatrix. + * with the other matrix data. This function allocates memory for the meta-data and initializes it with proper values. + * It is important to remember to deallocate the meta memory with clsparseCsrMetaDelete * \param[in,out] csrMatx The CSR sparse structure that represents the matrix in device memory * \param[in] control A valid clsparseControl created with clsparseCreateControl * \note This function assumes that the memory for rowBlocks has already been allocated by client program @@ -657,7 +652,6 @@ extern "C" { * \brief Delete meta data associated with a CSR encoded matrix * \details Meta data for a sparse matrix may occupy device memory, and this informs the library to release it * \param[in,out] csrMatx The CSR sparse structure that represents the matrix in device memory - * \note This function assumes that the memory for rowBlocks has already been allocated by client program * * \ingroup FILE */