From 70ee2e69c9ed4bb9ecb69c8ab8f9eeac66700e36 Mon Sep 17 00:00:00 2001 From: Zhengda Lu Date: Wed, 18 Dec 2024 16:30:50 -0500 Subject: [PATCH 1/3] respect keep quotes config in metadata collection --- cpu.prof | Bin 0 -> 13724 bytes normalizer.go | 4 +--- normalizer_test.go | 16 ++++++++-------- obfuscate_and_normalize_bench_test.go | 22 ++++++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) create mode 100644 cpu.prof diff --git a/cpu.prof b/cpu.prof new file mode 100644 index 0000000000000000000000000000000000000000..c0b4cb30ec1a085fb8057ca56425336b640fff04 GIT binary patch literal 13724 zcmV;NHDk&jiwFP!00004|E#=ud{ouhIQ~2LW+o@WB+uL^JOPx;wbe`7CNRq4T3HmS zyI^g#EuBm*$;d2WW+I}k4FUp!f(WuID9DbWAfOH#gROacskycgGFI{2b0e$-JoBkMBg9plxOm4vFakUPYjVXN0Vu!% z{XBC;t`XunIC1&Mf`%Y`EolfsAr|UI<~vL)giE1r@99@rehyPqqB(%e4ojMI;Ux*5 z3xJ>p%>~Rr9>im>?HBks>{X2b&x8J|*p@>GL%L?}QA-=~JSfIu{e1JJVuW}u&Us~@ zSd$OiS_|NOxBxHE8=J4UHbR`^*iGBS`$n)mAl^5ECfGzrb78;;@jN{7*bedjJXoNJ z_vb+gmgr5*DTF^Cw+xr-3t)YocwYd`u$kW69F}JwZ-ghFd{?Y5gd^Mt@FF;tFTe`n zLcCCKZXV#Jjd&4Ugcs>8%su&92!c@e#5>LNc_5g}^YRp9fEVDLjoV%x1V(^sFkXsPgJ0k;^k14| zm=WSdcw%{f@xB<|knfA(61+se)O?xV10L8e-=7c56+!#?V4|sCYCffCg}{JpE_ zy2EnaMX*jTxCq)|JH0(IfV>%gcTB!-0b2>L4ojt0TR;cwptm=-kgRpM0XPJ;h1#eU&3$kw|Wcnvw#M$fI;2KXVwMt0~fW>Tjm%| zxPhy-6H5Vp3B1Y#XH^34C2$>Hrdq}zl}*hz119woX< zam#iBBb`?Zl59s0wkoyB{uOTdQYy6-oR&&$1)Z_89yEVY4dB1V)o+nfb9gCq6=qWk zU9gL;nU@+N{u|siV6)KKuV4wOyb=EuT#wi5H<&Xh+?V0P$M=Z$zlK8vMu7hY_Fo`W z^lRvfUG>K1fr8RT{5NnT-l#V=-?_jD@p4?bNva`3-P`++=Msjjm;S?j1UjwfMwFN+rTPG za~rrBZ`O;<=}Zej0o1+yuWR!8uYUcTvhvn#!p4RCG8{g0!5}blco<5J9DW&Oi~zqJ zUL%?3^Dx|kx9GQ;Mrl633~s~Q^jpnzX(N6)+>W>Ft<4uG%Uq5V-JtXhtEY_1F1LeD zTim_w3qg8Y$M4(19e9U+r@4;ov>ncHO?Ly~2sv)b+qQH4v^}o*pRBCCYh@j9`37gJ zZ4#C3D7=G*>I$4Y^d*s3+QE7$t9H;GyX$4$0nUTENt3R~&%NSGX{;fB z70&pOtT4daJ4VtTdSDN|y}6LW@zuC&2fZgN?IXO)rX;t+RaXl$ZNjg?Ap$1AJHRK> z)l)*X9pEm!OD{7&CRewuJ8+Fy72?<8On2v{UYCe0f#?eOJ^r2? z-XD>#{jH7ZIy`PC#Ky3K#N~zi>s*<3#Azd>w628tiokj$+>LkZW#)7;flfGMJuw>K zAhNht1o=?+=C1PmV8>2&=IxA&_DGnk-~fg8mM~2U{wnwb{y{G@_xm~REa(H?1xJqk zMAU*;1NlTgzZ(9Cf7I_W$tAAGT|*BDySfGjNy4syd+}cVPv%5sG~qYk%Hc$K4!;bt zMu1-l3n}n~97XUh*WVxs1l|>ox|xEKXMqEh(^aO48*%wCLPls_Bs4`1MDRw5B*eSn z@Iy}aBp>vNvzsf<-{J5&;wiwdg^f8voY%si@y~jZxt>z+O?dcYiR8DiOUmfC@E81x z{%dn11)rPo@H{EX>tHjL4jo~aWd1tni9Pk!=37)cbcDa+U-j1JdlWHm!6mQL8mdT4 zVT-m+<^Jxbn{O#B0u3CO%WZp=V;#5RqUp3Mz%Pdx^;y2v@$K7i-#A%ic7mPxf}2hd z!HC}6+?;QO`0aS&Jy~UThG!+2ouL9N^ycPJGM+o|#8&yf3%o43?E+DZ>dnn*QNGZ0W8Q9X{|dB7$%oo4}0X6*Fz;%>MhJOdA{6p+wFJU*(j&G5#l{?RlNY@t7Ln$ zb5q@2IK|FBCVW<(-MgH;@_QUINQwkuCOM2(Wd2@MIZgQ8Sl6GdGKb#)O^pD*238VZ z`TPd3(9(OEBy77j%Dvk*z!3if9&{tucETKTbj1JQ(Ab3k5y#nnE;1y=mMZXj&L4&6 zg!nzUYJp5)UEvEZ^wJfouu88sH|824elPwsgs3F?exMi<;(x-iPdL2UXce2)kq~>MaPk5n>qA%gmjAg8yO@+!GhL zQO5Q!yH*SKbo}eDc;=AYyge+U%{?Fdt2A2R5ghG0vJOluaKxAsk&aw}XYA;C5xhl6 zylSPwO*T5)E8*g;j2~OKJ>Hx*o8Z$ndNyX6HpM zYkK2)K18)Px7*P2h!@_u-1nv5B@!y>u;?QlQduUDJ1Y5NE9zykTc zEBwesEPe=AyF&en?5dgL0$MtgRdZ_Z@B2W%2OqMd9Pq#4KvBfy@Wzl9rv7(a=%%wx zUOLl?97Mst{oS_ohjIBkKMMBmfSox)Ja<4}?5jT@o7aEfuE$B80gf_&c{m>ahX_s@ zuVXmUEjFnM^?5=av;7}=e})JDQ|;i@NBx7@TN7w*1kWu79K8WE0;Z*fI zL#@b-l!KUX1>}!19R8o_wUqc;5|d@#e&i=|$Bz@{7{z`MeCc|yEB;30fS|C_lX z$4Am&=IB%7Lx^{$S{X;q4v-2WN{&9y26JsJKkx0IKyd6q^;itjyWsEmcl}{=RM61);|z~2 zmjJ(q0}|l(@DKcl{;;`|0G?p@(?Thgt}vPWQ{y8T?j0^EL{WKbjU9Ow3}=tCC)m9s zm@u>wK9b=Gn`b+`=`fpE_jEgqjdYoRlHoMhlLix<7oneIw*P2+6vHD+9I7Vd*E5w- z>>(Mg42?g<@QB;Di>lYt%Tw;AqZuxELGp7qJWqyM!0(1ShILGDZ4OinjgMitY@7sX z246^d+f@M{&Bid%;%WS8hNDM1U~dwbmztjzK)!KQ%&ss?_*jNJPRkhi2iQh~(?5zS zLun!Z1N3LupXrY<^If&HkpB?|FdV@2fy^9CgpFf3dFW0N4eo*Y)Z!QNdtea5K>}u$ zYG`~s!^tb9wC;sbjewOF@O$A=hL1A6+8o}<(D(#~yIzqXUE%9IU|JK0zrwL>92?Ij zFuVBC_(XIE3k% zd5fX(XBZytFE{*)sGv#<_+MZs!=X&S!5qm9jX%q9^(Z+D=m}dI3$lB{V+w$lvK zE0|{qUNt_O;ThM*zLd^mhw0#K;XH=M=P;b>0@)ts0L_s=$mcSg`JXd{J-H9gl{}X4 zc?^f_mDKlw6S5_r&V=rJ!6=5Km|kXn;g7-dhZy2S-}SskQkl%$^Ksd@;i_q6MVpO3VSgl6J8d%a<_RFQ95RAX&*iNPsp;4v7(a14QbM%~t%43`{~Aon?AxW54f=KJ7jhEEgVN6Z&cR2xxWLRU{?^=c@>$lrpt+!UF-Y5Ty zW%cTNvXY5j#rI@6lYO$r-(om&fMgoQEY4SLy(Lf^8ehk7$8HJK5+;yUc_sBaApoEL zo@7pxE#d1K{$xi`xB8*kq0g!+*2_rh8y8+DPnb0RHp5kJ==fglYmZEVZ#$u51H(mb zDNkcuL+2YA9@{4gw)gD~Q>dNz9H#fh|H#EML;o-+6NCE7j(#0z+-(g$WRz}k= zjlauq_}THpCN=o3qv>r7ce|mDdU#*KxQ)fBt?)gG**&+$w=-PxsnY-qp}gxAn%nIL zAjEetT<;EY+ls#LJ%;n2At`(DKs4VT4?Eal7ja!{As;}seu8qeyxFL89pmu4iU@; z3=gc9FnwT-w3|LKnc-vsGeYsX<6gFpeLw+kKf{ytr(~X}_A?oLG=6~L6nA*_LVdj* zu!|RsA7r@WH5uE|ut{d(G)!SQg);H0WQB(q?wTb*xhGbQy_oW$%rnRjGu-i?Ml&8Ohh5DbVK~KYolaA>Dd8V6JmF@wcjR7Y zWwj&hBe`3Mf6Q>JD4VDztbd~Tu@ec*wWCwwK#qjV4vPePKGo z=>)R7nW6Eo84g%1LCRop2~bOKvsG|{on&9Kuh`eDb(^pj0uFUYM_rQ7_jsk=DTYUE z*>{C$WEGmTJnxjx_P=4c%f2&FA}@TPxHI9CPv{#b7Jtj|)T?q1_yE+ACJXojFoWR? zrk`h?rvB?Yh9fpgkbdxybj*G*li^GPIYf^6J;NDmB}fh|&j)5`{GSZx&y%Lo6%G=) zEgWZU{4M*Aeb4NX-9LS_|G@C*At|B@-Q$DB))qZkjJ?vS3uM==4q#X5O>Acrb;zmyjcH=TZBIW#*| zaQGL}x-SL0;d7AIDbgJ^-e19K<0X{5r{k;r`YSXq@lm|e%`A^7cwqT%ku&}VpDZtA zRPJm<%nVZS>yPEdm51SEk=XBHc#+|Yq`xCYhQ=RNaM?UZfA6BOpuq}Gb*Bo4S^(3U zIx+~!PJfW{sM30{Lan|}g$sNt9HQXXdIz^!2<+;P4^iA%*H8ubeJKTbDXgon>7lN; zA5(COn~|vs)VynIk14Vz(=KpmJWpPKkmS)gQNbhj0P2r0i}pYVv}l+j$3M%;_KSCNqM(j)}8`(n9_Zn9p#&m{+~QN{i^t0)`8i{t`1kW=1i8T*1>TDVYa( ze+K<2n-|fqml?h+(z$?oLczVKj|>DO$RA-akB$`z>DNMr3z^=+oEtET`3MDv?V?LX zK|X-NTnRLQ!6Jr>2xvPgc%*{EmP)_s3X{mHn>(x(4_6*no=|c|C?gejG}4qmso>YA zH;KImGWd)#$sh)|0I@fa!7B`3p()m62?PQOa-@|?ZtL* z&V5Q+fL7u#1=xG^rA&YxCr z{;Sd>>a$q}KTz)QLKgAyv_ki6G(J|rrEYKjqfD?i_S~_G>}8wraSG0yOz}F%hcH+i zFoJw2gDq0ILm0fq@HGnVQv#)h^kx~uW%TzuWD(;P9PvE$0zv*5gE@2$t&n~#XSke# z!$*ybVm?8^5kn-v2ZLi)9m;Yz|e zG~X!Z&nh_eIN=QPCm4*Ed_Tcp6~k2obSlRv=93jX{HX*Q!C-EVu+0$+-eC9!0nN$r z`MQYR&nV9-la(A_%h7B$TqmXjG(uHy(PWCW zUN0tQGhSwWL7J)WNmKi*rI=4waDqnOW8@WdxFXg`u|RoAx%Xv7IF!a0DtNr!Y{O?(3l(Q6@5&1I=j_T=J@%o#(+RML5E13Z6JD ztzkTaInsf~GuX^SA`i^$(C zOA82KE5ogXp;u`k0lv%dT{7NTlxtQfIN0{KXBix%U|C4NwlUmBCCY)I&pfDbT&}#X ztWYS%ucSmkNjAtQGnjs%kmFr{NpdA$*+Q^zagLiFNYT|0;XN)IL!HkPM$ce zR8}c&t@nn46WwZJr8E`W2KdEq$a2Ea_?rrzawmIhXq=$&)e3GLD^0u^9Hmg}InkTS z`Ky()%KB=16kFoZr}7;bK( zEIEzA9)^3EzL%M+DXgzoaOw$J=Dt9crO2f(Fxba%pMY9KP;V=^Y_!~DI#ssl} z1zRK#z_X-l??Y*POyG>2` z76rc-h8g5D7_3%|fOBELkRI%3xS#2Fnr|o?IDKjE7HO|OGm5!pv{k{S?kIBsao|PH zt%^+g8h=;8I+wPiu4oZdY)Tdz3++ym$w?+ZB>n z3E!dMSlfk4SHP zHw%1U!O`w4z`h=D=j!15&YpV|oU}#WAG!k$lGl4&?opfxbyL1q!GU5LD9mK7XC^+Q z+AGb%(D*(D$GInU^bvzkPWu$+6zBs5H@Ok#v-*2~;LD5q69K9ln8c|egf^bkL&;3;w3mcv_$?@l~D98|I{#2Okutl;pujsia*1$qa*hn;fH(D)GrrwPw=Jkoao z?1&V#kBZ~&Fy|u$>qa`Z_9j`m7ehW0diU|Tz`0-&;vXwG&b`8TVZAElW2MHPjFj+C z6x=saqPKlXem>@-ycaYCyur|k_ zns1=&8fP*%!0-Um|7Nbv@$vGhA}`DMDCr^FEuT>@VP5C`nSyg|R{o8L&ukuw`7s5D ze@_et`78$C$Sz0D3Ei`!W6E)1S_bmZ6`Wf~E>0J; zHE~8S4%NM&@wrzwexcw=`|PZlya?lZ?ihQ?1QIBp10M+fxuC7ciFgh(rz=0v2t zSUacG2=S8&?psX@>Ep?`xQk+|A$ZcsG+!z>^J#(!@MiDe z{2K)~xjh51F>I48Uz z{-c7!_tP9Bhc^~N_cZvUV)r&7{x1d3xJg>xH?XZO@Gp_1HU5)=-@C^dYh+xsVe)=* zR;YzBuI9-^ZD z{d`}Dyr_lVU!}JD5fu-*M^j?B>siSos#D1fP;t4NcEpzp9{&SWn}353RB_zK!(zNS zhrzs{5#Vzf^fZEe9)nZVbrsQz4;g;Q^ux@Y6)Y_zh$9S-F#RKD{<*Y>KtE>qG1EU` z<}vD42C4W{e_6@C$Y3jl$0GW5l;KeUwV4~m{81HW)JdrM3^q%s`3yc~_^E(e&kcKy{Fs^Qbyl9nU7XBk2-8M8zZXWhS#=bgqce_UVajw|PTU zFWe4QamWm5n*Vk{H&k_+>c><(a$tj~ZWl0!hz4u{jcD*QrXOQwOPZXkxXf*n#6{d< zXUF`<)M1X;H9lO$apEd+!1(~n)8TMwA3pzR?#wq;i55@e!^XU57A*VWKP zTuhmy;zChDo;3{hgf&r}q{^wi#-CAfil`<6{2F*orbXK<_%o_qu{Gt-syJ=S4x!1H z8B9}+0PhF$XtGj74?bu3xfmf&RejUpXI1w+*3B+W`D7Ijd@WfQy-|=aWbmOZi54>W zg5eib5^bQ8Xo`xH?dwo2<@ah{5peNjb&5*mHeCc(aj6?c?Ax|>6tS7fd+t9tNiSJh z7{&Z~6-NuA1U0+K@VzvwpSP&0TJC6F*w2S zgg8%u(n5N3lHp1Edp}wBbQLFVrp2VUL5+a<%77Qt>3&7II-jB9`e~HCgM2X^_!$A7 zfTyLR7Bl#g;g@vX>z~YM%4e#$TKqSTAYZ~@4>#!Far{eW$t4WFV)zwJ0^jF8dCgF_ zu&r#Sn$z9qv`y`<*eK?+RNU(6!nkH z^AWRcNx=oGJ@k1=#Z4~R^a+`d?3Zk^eUI9D@$+RBXTD0N5a4&g%RzC=#71B6vMN3t zaW5k+RB`=kBE_pxZP4IChmu7qZk<9f0e&|Ol$y1H_#$6glg}hB!=+IR2;lt9uF;LFe@O!!cqobGyIwc)-wZ|=7>t1?!KljQ*(SpYd0Y+ znzyUC>Y%KiUS&WrzL0*LVt7haPkBCjUGA@xURQCqd$=-y2ASTDudDaUi}k)|)xLPS zLdD}xQz8rSZqkXpwJTKnGe)C?uT*i-5^0HXc_Z~M7+v2WS4xAlk86tgDisHMab#P4 zYg^^Sk*53&6?X`@z*&ijhUzLV+a+zKC5#{=@I3wv^-USji}`95_X<4;%`78zo>j}N zR_TyH<7-qL;&(D1NT<6$= zfqcD+>z|dp$gf|To_%z?Ue+qULiA$Ua?(9(6_2`cg#LwskMp-}7xr=fydy)6Z%}c8 zT@|~NZ94+xZjj}$kF$$i&YJO!D$f0eC=T-180@B#gN007&MYmYC*Lsqh6@F_=XkDi+bNZyA0oK2RExXB6|zDvmuZ&(wx9_>?T%y9BgJ-K@IT z6Eyygign{<4Cw@W>1fR}&Ue(?<$n<<=37+UXcNAW$(yU51;pBIaY)qoRuzxd@7q1V zts-mx#B#bDP5HYjE;~iCa7|?)gCpc&p7&n-u8r6y&!(g__ljrZs#eqho|`v-=pHmr-*O*2Av90 zkL^8fi{-72weu-*uZqX({{se3uzQ6fn-+5hb#)sC0@l?X90cZftOE=Gf8pFrBvBK$ zGT}fpRh!I))o4v^xKT2aOl7QSDp{Ec7o}^H*;v9VOGGN~k7cazb>*p=Y}gFHkPo;GyO6>DF(J`)@oH|CF8M7_U=@=x0P;? z+!IyNnn=`2(^e(^tq5nUvWiGNl8jp6YZ|~oqAJ=qmF`WLT}!Wuc1cHK$%f)fL?k}L zq148AOC;T!2)5E)tjbt4lC>&5!>x*Ts`3DY;f5RLYE?ob*Hy)`)wLC6(Nv1`FgYBSMDHkEEwcANd%>XGVgC4V1@*IME0%kRoq=}h^9 z)mE~qGEx?=Ew4(6=<~m$E>GFWQbJ#s|L@S=)*X|Tx2DpGNIdqS6~6QTy1CUelC12R zbP$q$yPboQPF6BnP0G8?W+0YI`iP3q-$F5M!himMOkU3l+Pi1g%4CVs5dDZHtIEvM zPOZw)waHt5RvfWcmkXrC5aBGgpGutZNNqA&?U{TwRZ|_wR5#4xA|eJib<8~xsfk3R zRwfhf*(;LCX0qu9g{(xR##vAFo9ED8EYmraNJNsA;cg98j^i9XYqPO))xn?Uk|J6B&h-JFpaq~@9zrLw-Ww`x0fa_D-HyyKm zK77T`TIf>P{f?W%jUrZttfwMX*)M#K=uj5R^oUf%t!_yX0#oU5**Q19nIcxJvZVDu zwo|`w=l`pENwL5FIh%&V|1EST8cBA~ren#f|9*$=(MU2}=r~Vr>wYVl^&B&4WouIL zc(_Xga+|%jXYW3>R;>tu7dqQyvXOMQN40f#Djl!9JC?2PZY3h&3!K+g(e4BwqTOZx zg#q4O9m`r7%F*F=KSx&GDy?KT)+=VEf8ILZer`SB)M3$7va)C2NG$8Chpj{;6Su4y z?@=<9wdA9QRA}*3w0BR!O14zsNG)-DB;DKlGm}l#B)mGYS1eh1XKgY`*6&qynP|0D zSsS-J3#f^td-qIuXcEy#JRWYI5oVR4@Ld~=SIT%()30;2747}ADis@1d4)tn99(2S z^CQ2TLW6C~*PK(Q{xBG+sj-rkJ{7g7O~#_BN~^p!+p9xa7mI%1S(~)FIll69RN%)Y z5mu$bm!5;bSmtIco3+y6Hs=5|onRABrFwU)wj!11bpBYTdn{QMw{DIlElNS;zW;T2 zsj;ly=Y&pMHStK)>JqDpWi#Pc=U^e0xvj=ZM<}$q6>Y*wBvSWV;Z6;RBKBIh@d-KR zUX@Pu&BS9-t3kb+SV_8biOgivltny`ra~e^or_ZkoHIyNT9vgmR%e-qtx7i&pDQTI zb)sD%{kiL0UMa9wkk@-Lk_xMIDwXXd9=mOt^Elktl~cXa-s?uwsoG>^ZB2&i-5VRC z7+Tp=bkJm|R$7!SK0>Isu8M}QZU}C%nACGFQ;uc2q-rbT)*ZE}tmXAH2`dqer*BQ& zXhmvnPsNg1E1hXr87L6B*5`(Mv$Q5sWpyMTTZ!1(uvrHoad~a&sH`4x@w!K9;bRSJnQaubqnq_jA7(+1n7q{6n5b-3mbQA) zG^0V5BT&$`ZbLxZO1xCU-H^7daFLU7dZg~Esj<=x%3uzTs#G}S{O*yCCAy13PryI5^#ay6kI#pXEYM9^H?34@d zPDbM8k!&gviDJ)P>+^A;gV?TO;y}yMYI1!!)f<@lzY2a(O4o9zqK||VWrPG zQ*x^~S}NDXqUS8+{==&OqMh~;>1Wt=J(>;Q6-#DovOh1#+4fOx`!?yu&3)tswsU2K zRy-3Uj%@+^vRS6DW!2oAiuR_qA)D^kG1{j#mbMxc(GpcrfsQD0W2AUOQ)IWAtEdvs zyevR9Ry5l&lZj=rozqrj%-7{c;<2jaT{R6f-P#Hf6I_?7m5rTCT)H;drz#z(@T`!Y zWU^Ky?mZHJ(s(x9!WKP+X$qY7DaEapRhhZRN~b(MxEVZ_jAcbGb=`+{pmBXauV9Q) zt9EB?vQOsL)a@R1nc58XOI?3f3baWqx8nfm!mTTu3RHCOZM^5{+GKYpD&LR{U*h06 zE67s#jwQ2~h3&ZQD-S4Xb+@uqA7rd-!>osJWGq3kJ8tL|OU5$Q!d`kNyvP12MA)+| z`?2>RmFX9=;+0;@EGlv@Zqc7K zFV;koxAmg*?g^3nvvVq$vC{WPva$QEa1#d?z3G*XBrH0Qu+p9>(F+-gU5mC)6mIpD zTNRa_B?{s;UXH5w+_s2c+FSlFn(tUy=^99xSsG0x$!)Dfe99qaYP?!ENH-Yb>9J`%0Iy&E+m zl&9U`SQVvXmoBNKml{j=vjVkQtaZHfqdW^yB88ApnFbAv#Ea@|GAzn8J04sO-92W9cgm>Oie7A zq#37ckI6_fMJj7hher#0CR4p4y+zyL3%{9YkX1Cq?=$@p*+>Pc zz;!^gG?t9VlGa_xzOkg3YdRbJoUx$otM&%5q-cEIsy>^JM6HTQwD;|uGYvxPA|TRT zucXPWmCgv!wDG*sejLtASP83-I2GykGnyYKqLo+lqY|a6-0D*siMtaJ*LLmPPt)OZ zApWg( z72Sk8qOda?t)pVhN_VS_Tb{)2+;2|10RR7$l203* Ga{vG>R^c1~ literal 0 HcmV?d00001 diff --git a/normalizer.go b/normalizer.go index 6681e01..b1ba582 100644 --- a/normalizer.go +++ b/normalizer.go @@ -167,10 +167,8 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe } else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION { tokenVal := token.Value if token.Type == QUOTED_IDENT { - // We always want to trim the quotes for collected metadata such as table names - // This is because the metadata is used as tags, and we don't want them to be normalized as underscores later on - tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:]) if !n.config.KeepIdentifierQuotation { + tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:]) token.Value = tokenVal } } diff --git a/normalizer_test.go b/normalizer_test.go index 730ae63..b3ca636 100644 --- a/normalizer_test.go +++ b/normalizer_test.go @@ -756,11 +756,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM "users" WHERE id = ?`, expected: `SELECT * FROM "users" WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`users`}, + Tables: []string{`"users"`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 11, + Size: 13, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -791,11 +791,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM "public"."users" WHERE id = ?`, expected: `SELECT * FROM "public"."users" WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`public.users`}, + Tables: []string{`"public"."users"`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 18, + Size: 22, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -829,11 +829,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: "SELECT * FROM `public`.`users` WHERE id = ?", expected: "SELECT * FROM `public`.`users` WHERE id = ?", statementMetadata: StatementMetadata{ - Tables: []string{`public.users`}, + Tables: []string{"`public`.`users`"}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 18, + Size: 22, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -870,11 +870,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM [public].[users] WHERE id = ?`, expected: `SELECT * FROM [public].[users] WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`public.users`}, + Tables: []string{`[public].[users]`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 18, + Size: 22, }, normalizationConfig: &normalizerConfig{ CollectComments: true, diff --git a/obfuscate_and_normalize_bench_test.go b/obfuscate_and_normalize_bench_test.go index 32e8d44..7848d37 100644 --- a/obfuscate_and_normalize_bench_test.go +++ b/obfuscate_and_normalize_bench_test.go @@ -118,16 +118,17 @@ ORDER BY var backtickQuotedQuery = "SELECT `orders`.`OrderID`, `customers`.`CustomerName`, `products`.`ProductName`, `order_details`.`Quantity`, `order_details`.`UnitPrice`, (`order_details`.`Quantity` * `order_details`.`UnitPrice`) AS `TotalPrice`, `orders`.`OrderDate`, `orders`.`ShippedDate`, CASE WHEN `orders`.`ShippedDate` IS NULL THEN 'Pending' ELSE 'Shipped' END AS `OrderStatus` FROM `orders` INNER JOIN `customers` ON `orders`.`CustomerID` = `customers`.`CustomerID` INNER JOIN `order_details` ON `orders`.`OrderID` = `order_details`.`OrderID` INNER JOIN `products` ON `order_details`.`ProductID` = `products`.`ProductID` WHERE `orders`.`OrderDate` >= '2024-01-01' AND `orders`.`OrderDate` <= '2024-12-31' AND `customers`.`Region` = 'North America' GROUP BY `orders`.`OrderID`, `customers`.`CustomerName`, `products`.`ProductName`, `order_details`.`Quantity`, `order_details`.`UnitPrice`, `orders`.`OrderDate`, `orders`.`ShippedDate` HAVING SUM(`order_details`.`Quantity`) > 10 ORDER BY `orders`.`OrderDate` DESC;" benchmarks := []struct { - name string - query string + name string + query string + lexerOptions []lexerOption }{ - {"Escaping", `INSERT INTO delayed_jobs (attempts, created_at, failed_at, handler, last_error, locked_at, locked_by, priority, queue, run_at, updated_at) VALUES (0, '2016-12-04 17:09:59', NULL, '--- !ruby/object:Delayed::PerformableMethod\nobject: !ruby/object:Item\n store:\n - a simple string\n - an \'escaped \' string\n - another \'escaped\' string\n - 42\n string: a string with many \\\\\'escapes\\\\\'\nmethod_name: :show_store\nargs: []\n', NULL, NULL, NULL, 0, NULL, '2016-12-04 17:09:59', '2016-12-04 17:09:59')`}, - {"Grouping", `INSERT INTO delayed_jobs (created_at, failed_at, handler) VALUES (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL)`}, - {"Large", LargeQuery}, - {"Complex", ComplexQuery}, - {"SuperLarge", fmt.Sprintf(superLargeQuery, 1)}, - {"BracketQuoted", bracketQuotedQuery}, - {"BacktickQuoted", backtickQuotedQuery}, + {"Escaping", `INSERT INTO delayed_jobs (attempts, created_at, failed_at, handler, last_error, locked_at, locked_by, priority, queue, run_at, updated_at) VALUES (0, '2016-12-04 17:09:59', NULL, '--- !ruby/object:Delayed::PerformableMethod\nobject: !ruby/object:Item\n store:\n - a simple string\n - an \'escaped \' string\n - another \'escaped\' string\n - 42\n string: a string with many \\\\\'escapes\\\\\'\nmethod_name: :show_store\nargs: []\n', NULL, NULL, NULL, 0, NULL, '2016-12-04 17:09:59', '2016-12-04 17:09:59')`, nil}, + {"Grouping", `INSERT INTO delayed_jobs (created_at, failed_at, handler) VALUES (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL), (0, '2016-12-04 17:09:59', NULL)`, nil}, + {"Large", LargeQuery, nil}, + {"Complex", ComplexQuery, nil}, + {"SuperLarge", fmt.Sprintf(superLargeQuery, 1), nil}, + {"BracketQuoted", bracketQuotedQuery, []lexerOption{WithDBMS(DBMSSQLServer)}}, + {"BacktickQuoted", backtickQuotedQuery, []lexerOption{WithDBMS(DBMSMySQL)}}, } obfuscator := NewObfuscator( WithReplaceDigits(true), @@ -141,6 +142,7 @@ ORDER BY WithKeepSQLAlias(false), WithUppercaseKeywords(true), WithRemoveSpaceBetweenParentheses(true), + WithKeepIdentifierQuotation(true), ) for _, bm := range benchmarks { @@ -148,7 +150,7 @@ ORDER BY b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - _, _, err := ObfuscateAndNormalize(bm.query, obfuscator, normalizer) + _, _, err := ObfuscateAndNormalize(bm.query, obfuscator, normalizer, bm.lexerOptions...) if err != nil { b.Fatal(err) } From 31d6a30d51296f98a922da744c41ed927212a0d2 Mon Sep 17 00:00:00 2001 From: Zhengda Lu Date: Wed, 18 Dec 2024 16:41:15 -0500 Subject: [PATCH 2/3] add tests --- normalizer_test.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/normalizer_test.go b/normalizer_test.go index b3ca636..e8ec423 100644 --- a/normalizer_test.go +++ b/normalizer_test.go @@ -887,6 +887,27 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { WithDBMS(DBMSSQLServer), }, }, + { + input: `SELECT * FROM [public].[my users] WHERE id = ?`, + expected: `SELECT * FROM [public].[my users] WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`[public].[my users]`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 25, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + KeepIdentifierQuotation: true, + }, + lexerOptions: []lexerOption{ + WithDBMS(DBMSSQLServer), + }, + }, } for _, test := range tests { From 01a778653aa5bdb0f2b82bd543e70b0b0566645f Mon Sep 17 00:00:00 2001 From: Zhengda Lu Date: Wed, 18 Dec 2024 21:42:19 -0500 Subject: [PATCH 3/3] remove quotes from tables in metadata --- normalizer.go | 24 ++++++++++++++---------- normalizer_test.go | 20 ++++++++++---------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/normalizer.go b/normalizer.go index b1ba582..341ca97 100644 --- a/normalizer.go +++ b/normalizer.go @@ -165,27 +165,31 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe // Collect comments statementMetadata.Comments = append(statementMetadata.Comments, token.Value) } else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION { - tokenVal := token.Value if token.Type == QUOTED_IDENT { if !n.config.KeepIdentifierQuotation { - tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:]) - token.Value = tokenVal + token.Value = trimQuotes(token.Value, token.Value[0:1], token.Value[len(token.Value)-1:]) } } - if n.config.CollectCommands && isCommand(tokenVal) { + if n.config.CollectCommands && isCommand(token.Value) { // Collect commands - statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(tokenVal)) + statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(token.Value)) } else if isWith(lastToken.Value) && token.Type == IDENT { // Collect CTEs so we can skip them later in table collection - ctes[tokenVal] = true - } else if n.config.CollectTables && isTableIndicator(lastToken.Value) && !isSQLKeyword(tokenVal) { + ctes[token.Value] = true + } else if n.config.CollectTables && isTableIndicator(lastToken.Value) && !isSQLKeyword(token.Value) { // Collect table names the token is not a CTE - if _, ok := ctes[tokenVal]; !ok { - statementMetadata.Tables = append(statementMetadata.Tables, tokenVal) + if _, ok := ctes[token.Value]; !ok { + table := token.Value + // Remove quotes from table name if KeepIdentifierQuotation is false + // Quotes need to be removed from the table name because the table names are used as tags + if token.Type == QUOTED_IDENT && n.config.KeepIdentifierQuotation { + table = trimQuotes(token.Value, token.Value[0:1], token.Value[len(token.Value)-1:]) + } + statementMetadata.Tables = append(statementMetadata.Tables, table) } } else if n.config.CollectProcedure && isProcedure(lastToken) { // Collect procedure names - statementMetadata.Procedures = append(statementMetadata.Procedures, tokenVal) + statementMetadata.Procedures = append(statementMetadata.Procedures, token.Value) } } } diff --git a/normalizer_test.go b/normalizer_test.go index e8ec423..f6753a2 100644 --- a/normalizer_test.go +++ b/normalizer_test.go @@ -756,11 +756,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM "users" WHERE id = ?`, expected: `SELECT * FROM "users" WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`"users"`}, + Tables: []string{`users`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 13, + Size: 11, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -791,11 +791,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM "public"."users" WHERE id = ?`, expected: `SELECT * FROM "public"."users" WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`"public"."users"`}, + Tables: []string{`public.users`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 22, + Size: 18, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -829,11 +829,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: "SELECT * FROM `public`.`users` WHERE id = ?", expected: "SELECT * FROM `public`.`users` WHERE id = ?", statementMetadata: StatementMetadata{ - Tables: []string{"`public`.`users`"}, + Tables: []string{"public.users"}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 22, + Size: 18, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -870,11 +870,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM [public].[users] WHERE id = ?`, expected: `SELECT * FROM [public].[users] WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`[public].[users]`}, + Tables: []string{`public.users`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 22, + Size: 18, }, normalizationConfig: &normalizerConfig{ CollectComments: true, @@ -891,11 +891,11 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { input: `SELECT * FROM [public].[my users] WHERE id = ?`, expected: `SELECT * FROM [public].[my users] WHERE id = ?`, statementMetadata: StatementMetadata{ - Tables: []string{`[public].[my users]`}, + Tables: []string{`public.my users`}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 25, + Size: 21, }, normalizationConfig: &normalizerConfig{ CollectComments: true,