From 293a4a1152094d18a08eb8e32ef603fb18fccd29 Mon Sep 17 00:00:00 2001 From: John <43506685+Coniferish@users.noreply.github.com> Date: Fri, 10 May 2024 11:55:17 -0500 Subject: [PATCH 01/10] remove unused links param (#3001) The `links` param in `partition_pdf` was never used by the partitioner, but added when that metadata element was created. This removes the unused parameter since `links` are extracted during partitioning. --- CHANGELOG.md | 3 ++- unstructured/__version__.py | 2 +- unstructured/partition/pdf.py | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 301d47d1e4..d46ea896bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.8-dev2 +## 0.13.8-dev3 ### Enhancements @@ -11,6 +11,7 @@ * **Add missing starting_page_num param to partition_image** * **Make the filename and file params for partition_image and partition_pdf match the other partitioners** * **Re-apply: skip accuracy calculation feature** Overwritten by mistake +* **Remove links param from partition_pdf** `links` is extracted during partitioning and is not needed as a paramter in partition_pdf. ## 0.13.7 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index bc81876002..bdb1cf99ab 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev2" # pragma: no cover +__version__ = "0.13.8-dev3" # pragma: no cover diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py index fbf364936a..0371866a77 100644 --- a/unstructured/partition/pdf.py +++ b/unstructured/partition/pdf.py @@ -6,7 +6,7 @@ import os import re import warnings -from typing import IO, TYPE_CHECKING, Any, Iterator, Optional, Sequence, cast +from typing import IO, TYPE_CHECKING, Any, Iterator, Optional, cast import numpy as np import pdf2image @@ -128,7 +128,6 @@ def partition_pdf( metadata_filename: Optional[str] = None, # used by decorator metadata_last_modified: Optional[str] = None, chunking_strategy: Optional[str] = None, # used by decorator - links: Sequence[Link] = [], hi_res_model_name: Optional[str] = None, extract_images_in_pdf: bool = False, extract_image_block_types: Optional[list[str]] = None, From 593aa47802f286e34cd8de8b84787625e4110bbc Mon Sep 17 00:00:00 2001 From: John <43506685+Coniferish@users.noreply.github.com> Date: Fri, 10 May 2024 12:57:36 -0500 Subject: [PATCH 02/10] fix: ppt parameters include_page_breaks and include_slide_notes (#2996) Pass the parameters `include_slide_notes` and `include_page_breaks` to `partition_pptx` from `partition_ppt`. Also update the .ppt example doc we use for testing so it has slide notes and a PageBreak (and second page) --- CHANGELOG.md | 1 + example-docs/language-docs/eng_spa_mult.ppt | Bin 46537 -> 53015 bytes test_unstructured/partition/pptx/test_ppt.py | 14 +++++++++++--- unstructured/partition/ppt.py | 9 +++++++-- unstructured/partition/pptx.py | 6 +++--- 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d46ea896bc..ea2f32fb12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * **Add missing starting_page_num param to partition_image** * **Make the filename and file params for partition_image and partition_pdf match the other partitioners** +* **Fix include_slide_notes and include_page_breaks params in partition_ppt** * **Re-apply: skip accuracy calculation feature** Overwritten by mistake * **Remove links param from partition_pdf** `links` is extracted during partitioning and is not needed as a paramter in partition_pdf. diff --git a/example-docs/language-docs/eng_spa_mult.ppt b/example-docs/language-docs/eng_spa_mult.ppt index 43ebc36573d173b7f347629a55144fde59eb65f9..d19bfc3bf11360a29267447d54d4b0163cc649c8 100644 GIT binary patch literal 53015 zcmeFZWl){#mNklmV8PuT0t9!L;O_43ZovY<-QC^Y-GVy=mj%I{;LcrPclUR??zi_o zxBC7%eW+p;Q2beQJ!8%>#~9BmCkg%z6$Ahr>pB=)InvSoeyxa= zmIN305P0^C*0IAc8VIc zn~^`p!O>=}VfpbMcbA#-(ZNj%u%z8}n0XMaL2aDclI$KAS{FwJ07vDM|2|_>sKoX; zF<3(s%Xhl#r&zW1ie_}yhZVYH6N2;vuSGS-JG}=S9Ae7%O7F4)=09C)I29^vek3WW zBYG^tPIAmEF@jABXzaom>7(~>A9|rtn32?RQjqZNbtW_FG;zAj5*Zz6`cMZ3n}D6U zqAOSikDfL46--VN5(@4;&&CSy9rJ^MfXMx6O^|^9VPtD4=U{8+NM~s4U`*>~ZDp-6 zEYnAi*l~##a~2X;FRVva@~)VIil_lCIIX6rgvkDrN9!|>%uJxl6|3zpoXN$dD>>C3 z_;SUF3323pQV<&K5~q~7IR|v!=x(BgSk;KYbljd8bNeLdJ|V-+an|>rsO)?ZW+I{E z;^1=i<2h@5Q`?5P>M<4~DmdKDBrTfnHp!-#BhSp?hR8%^R=TG?glwZy_9L1%HXefI zXUhq|VdF{X`oDZZvO2NB-;akitvEN^D7^jzf9j{`XWs=7eUvx7zLnh{d~%+Sz@}QO zlmMY*q)e{vuBj0gq4TXGW7kg*=(^BpC+4nfrfu@(gV?ug*#87RXVdu;e(xn>9%g%X z+H`e8CQa`;1ojE4oZ#+LM^vV6B9lVCJtzE@I3^!$95fDJt<%*KYX|&3r{-eGO&O~A zf|Jil#oEmg&bu2DxbB7G>Tj96Nw1x_xl6M zh#8duV?YhO>?c3DE|872Wu4U(OGGP4H zI_@MJc3=tl`V!1oFJX4TM&e4H=bHQ@th^cxwrP$KB$V02*`nqix6?HL5OPl-vbEtA zQ@G!JY&1vp-`R?Tk75Efe1AX?S&+{8NKg(cLL2eN=PL?c=NE1f^)(rY?#$hz zweyXH85IR~;r1<*UFyD_aJG~NWMDm8f!}|A!cl?sw6k-fb22lwHvZ$4;df2PRWxnW z=rO$jRn<@H*ig(EEG31`)A&@<8f^p{nBY1Q0*wo_uil-cT3{!&q1yb!H+x%`OORPn zT7ajI9B#P^H4^3eZdm;M5G4xa7c5T?)g5a5+05vay9wEr%|$TlR_oeZ3AAS7zA3an zV5;wNHRD~!*dk2^2iH>;{H|ZfpkxO<7Wb-1k`MQgq#OuRJRP6mJ zc+v*+CCi6Zi0NHy7LdxI<}q!^RnTZ&5pb>Ade=UTFP%h@t!0~zAX33gqx7!0G_jv} z^|%NDxDKK@9Ugb^uU0-;bcsE&SLb!;1W{(buJ%)XgEYKY({3Dtm-xKqzf?(ce8aNUH9}z#!EaSy0rT^uqC@x$s%Kc(d-cm4#Ev zHAmAdUP;-X;@j|Pj20UZG6&{v3jMK7q+E(a8`grkIcUxMx80Z6v~eF93coj$`AalG+-rKd5t_N7tBy{$do_jdLb9mDTjbf11Xg@jm*9o)35pkk*qdMp9!NxRN z4sZ|e?_cRLH`Z!*KFc0ycoydT1pHD=55G5WNuOxLTi&-ynpeZ<<{e8hjJ12cZeaB& zj$FSg=zh2~*i5U1#?A{2lVO#-rVHCP=rveGa=Hn6CoOU%MIMa;_Z;BFrP91zbi-n+8pw>xW`98?}tSXshHb3-4f#*IO^>s9;I`bCu)k$V; zSAveTA;)A89*r;K?g0?E5MW?p!BRoUQzdSoFx}Z)>cdfM(ho<+QP+KYhcT~0E%?EC zR?lW6l(H!?LF1cq&V+dB=G>BmKw;c)y4VH`oBK1@1LG*W@k%VOWT6%Pw3Lr?N!mBn z_aqQAJ9p!L`#Ca*wi6}ZNagb>?%!ZKig;hMoA)5czm37p7%IJDW)SegM(FU9y8R@O zVzqPt^~ZBa`HEc$Iz&AhDIGyXvw_ZEmUfH~0XEy}2Z)IEi5p_&n0<{XBc>Shz zQ0PJLyA<;0cm=Agic&PZNH01Bu^OFRa{5bQ77Bfaomry6XEOicE=3unIbJu^aHW?akMut_g8`Wt{!Nz7r?>C#)_Y&PYJF31=w?*@ZuVX6WmZhEc@A+BZ&{{Hm! z81T{~wr4B(p5W$kY?tD93x*O&pCbXb;036^YQY5mK~Vl!P3#2x(2Q|R|hGYYF5MFh|_ z?S8&XLgvxlMe-(k6Liv&%v)#W5V1w(0%OgPmE=6rorR0XM<4R~k^*0^RHRARk5T#H z3D**+gD*{c*o+DxM1M_Qu#F)DKExdbddm2n2*XW|angT{m1i%3s!e1w+##B`kV#a6 zKPIz8m7)4Jt1-!25yp`*Lzo-v59OWxC>CrCRTJW`7J!HlgLs5UEJ;Ecnr$#2q=HjT zCvva){E0)9qCb~3?yJ}}-0$kvh;UaU0jZz+`@gC#&3{za(aPM&SW5qkt+SIO-5;g? zhd(i}{a39cqQ+r*=@I?Uc!d|+#8>=+nSom&e!G~LPqfvn-sNXxVZb`B=J|%bi%FOM z@{2XbMQ?JrE3SnGr~pL=ql{y7RMIojgU!Oa9a4+XjFlkq&LA1QFcL*!8aWQT;7v|GD1ZbaVL^hT4 z!)x?}e1NrC~YnO_Kj53;rj|!20hp01pTQ%fEOT zKK)am_>;r1H18Ljd_ydVpp(zbFV<*XeNPmUYik=x<95!@RR^Fiy0JFB$pvBp%t8m0 zq^OZ$SLZjLLuREHM8Y5YA2<|Jp_PCjs#$nEVR^;-4!6!=D_6|7CrF{ojyqc4GV! z()2HL7XC>{{CiaX-najxKmPBcO$>k182)7-@z2qwKhatKcbx^sKiQ`KWzNDs*&hGh z7=DXQf3ivZ%N&M(t}*<{sQh2n82)`0eiMd2VUPbRXMy@()3XjgzI264uYYu1nKG{?yF4;l&-^U7~24Jf)#1 zDpSZMYmzaW8(i{6A*oEJ^-sP2^}JSW9~^V3gr;QC|L8W3$BXC5$-qMqM%_9O8|5R| zf`78ra$6`GZ0M+?&{PGdYR5X)n2+D+A_5~x;M*FXKWF74E*{W34DkF8P&;kD`0=^9 zDrPY%djCL)-aBV0_g!V^cLyx-H7(30Xv$ID^Mr5eCs#ktn6$~M46aC;M8bv=SzIqi zI9~;4mWe2rM3JdWywsB4+hL2D5a*vCCF6^DTW7nwUu=mCvxvxxw!Kpfgp`wFO@vUW zf7ge|yqSoxA6eze?X)}LfUjb4-=A93(f}Txx#7{#-XMnROKq=r@gaKEWz3?5B|SQ6 z8zhCNE8<2)QPbvT8tfuRnc(wYWnbEHrXX^#^5AiUBWu@}g{*NpfC)Ph7ihqz!{|Jl z1e0rTYNaA|YG-Q7E!~Ds&B8gTq&>);4h8(q1xtRPCHzDL%oILbIwYizanCK&kh}Iu zW@gWEe8G;A+-C5rh(7tmwXb5a{l)tS95f%&MKww*Q^@q8%Y_X-l98lQ`K4|%t-RV* z&CVxg#7k-Op3|o!ipskmZ$_~=oy>ORZX9(^yv(> zwJ>V3Jev`;NnW`|q{AXZY%Scj$>^~m(hB0@)ka8Ni%#N06aQ`f%Y5XSQi@e5Q-C?z zx0i&b%f5CNY}VS*%TW<;h7lZ7(7yl?uO!YL15pHs9cH)KFw{cRiU`gITvG zFYNm}c^2J|jqXNV(Bdo%)SAzH?04Y5?W|=kr|3(dXDSW(SFYaQLFf;Lb)u?oyUdRH z;@#DIA^s>LEePd{Mn03bBCZ@%xrRU=05BU_zX!v!=N*h%5b1tT9zPs|gdzJ#CSHEgPN~{L@?x(nfBI1U2 z)6&EOL)g~AN0^7#AX0APlG;71^4vokZC9VV!0}HKI;j~e_#!5%pRc!nZYgay-CjKZ zBoUF3YVdXQp^%$C4%`!)f&ptP*!-X^rq56GZbwvq#ZgWhKs8_`X|-PZP?%W;eGz5c zx;}d^WPzz!54vu4)*x*0UKK&l*{fPSQl)MXYhqYl&;`SKM5alkuJJ2kDZn-%)iJfkHFi&mNE7p?K0ZJSh%B-? zt_c5k5#>|)0gcPXj7*Td4vU#*E8MZC40XH{bLS*%XB2nZr+Py5txRG@IPX z9{lJJIYBtie@LV?NvRC64ye_Pr3Odt?@FY-Ulz1d8ybyuYtHmRwKM=xv(cIOJ7Yy&y_}C0I4`@?Bv1U1dft;HWutp zH{u;V_y$_!xiLwn9V>9YZ%&culs{5yuyGi#?GI`$EHjD&e>YegEcdu@%&{Hrx_(5f zOkX&H>7&|cIS23>Wu3oYN~-svj*@gJ@|AW)aNuW9yDFfbfQgeP7m96 z(0^{*E}Q=jgP$s~S4zkJ^5Irh@^bmmI30-KX7_D7U*hqGvXBP0!(qPP2&N1|>2d%E z0ZBsmpCFjwjRc$|18j)dPzTN)Q4^_8ltZvg?oe2(Ve*$$LTihMd3}Dlbw>>1F9mk1 zc-`WAa910N$GcDqD{p>ti;U{xc$uI}9P1B~rkqa2p{~c?+%AtmQkB{hq!GEhe0H|< ziE&oi0Zv0vw^5oG`j^dIY>gDaRD}-5JLD1g11P$TvOs^e#%;0&)7GCbyD-)gha1zM z#BBCS#FAp2mQV00OXS!qt90r*8CM;4iv1j-LXsDP_j3`$b81j^7EwBF0Np(d;) z#2W$j6TV?tJ8w=T)8?U;T#>LPpbOBL6P!{Xyr9HP_=*kd)YZhM1}V>hjl-}qVJI|P zFXD^XXbRlIbTDS^b{V32GFw8CxqTQna%B_^O+;+Qkh#c`3oO*&IHwp}Ib63p6}5Pt zR&t#i3zvVo+i5yV5g@*GkbfUX0d2n|pjL6~3wu$)p8qMcGXkD1fYNo62n;@V916T$ z^Hmz&^nQRyMYhiH((3-8|Jv&Q@)8DPJDad8tZ;in^F;l!ovn1%Ha%5*~bca;zT zhdgWG-4XxoM}!FiIwc(bmpt*f<>0dBVvPY8F2G2#s%&gkSdBbKe3vVObgXw(Bk^eM zWhUT6l@`7=b7Ehp2{61&Xx5hBu&EofkDLa&gB^_ZbrTA-$Ln~rUt_Ed1 zL8oV$68sa}^_Xgt9oWFdkyPac7RwdeP)3le*kP1(rAF4-#^Mv0_QffoW>1$b^)HGN zB!nr2wW{+I76sKX{B(|^|M*#4Yax04XP}1pl6TqAih<1h%?-F z0lZA{^7Al_i3^|LQEPwf#Db}%z)YKckc7mS}kYh`;Drk z-o&c7@FdrhALOq{6dv5q&7rN{7AN*WlbC8?pO*vSvxmpo!9k0w%>hA`gm2>NqTn#7 z%uO1m{jieG&=Lq>U5go>zBY0s9fs^4q^hNVu#e8m%S_l?e8FN9kzdS14ASj1otN}` zEeBo6nrf<~S{jw20;D1nNlVTxh&D_tYZ{)yjNnhqDgbg7T$2{(q0gx#;VuoK1t$$A zpQ4pKmsWKfKz)E6Ve7!q+w`c+gJJ7#p@iN4r5 zwm?U|*il=NsOtu-+`GG>vYiG2;nNsmFmhR3gT+L3TDCymFYxfRbwzE{eRDtJ5ia#W z*QP9da*P&Uq#IT5M~GBt>XPP{p7zRqS$TKj!?%ZD3gxbB|w6X(f}9YQDK$D(|9wQ%s zx*wD@89SCk`NNYujM~g2#(QVj*jpOz8KN_;Iw-qS)nTa_?{n1?ochHfVcBMyHEE|; z>Q8uNk~#KpuxjP=g!{U{kQ8Z|00BuV5C(4fH5xWRj?vHsYN)A?Xkj)nr;VDjmY-9D zQ9kYPT0lyNQu4u98mYGARnL0XxcrPp9hwQ1M5olHy($@@ekmJ4fO2ugRUE3tsNdIS zUSaQsj||7P%kPfMXH*?z@fBDD%I4x;9Ot>rStq%~4V|WW&5iPGs-`7p{RYHSGn`j2 zDrV)2ZZY@}ePPwW&=58QJ$muH*3w!?s*+$!K~HV2hjxFG&t2;*9ll!jef>QvNfquy zF@Z3NgB%p#9ComCx_@ny|7oj7uim7%1cBC4PXZ63`d_=0S}jM)FWdNEj2*ig(;A< z*p#2NuRP$9O9C4_aS4?iF-DzNomAH~H%YGlA=nxucI7ChB!3>BQD&tMrYfDAa-MI} zLtQ5wLYnm}fqQnOxEJ5J^n+5f!mX5Qs0ZuJIsoIq8&7JGAji;Yd;%BD=l{y{|c#8Im zvPhJlG_OD^?vUQP=ACQMb##m!zkl;H{{+-+P|@5Fz^F{jhAEP2JR#)7ubMP4KlT1+ zuY$H6(G!=y(5=jR2fd4ZRx5PK%C*oA4rFQ%9Te5N_*Pk7ny>ZyIum~5M!6|^#xn3% zH*@8l&mt^wvH*ZqiV^&a?L`VrRW@1Rl_hE1eZ0j3f}ZIM-nUlX5u>9U?1%j>IkF;n zMzp6a&F>8=<7e6Q3fhk`6s!{Gt0!}O`}fCEX7#yXn9pcG*8Eyd$-EL^=_l#2EoFp3 zPHRgYuV+|dHwWTm*_n8^$L zS@eZ5r7KGCJ|oXqKGi}z@lKrJI@s*(v9pPLba-EA9l0RKHpXEby)@2C!>hC1A4o5- z{P2D)IOt@*W!NhW3SV*F20Y%wh)$FlWA$iuJN`=QGz5~}^~8D~k|@cCt`{Ru_!AE6 z7?~!31o@VZC1d2)eA<>H;{h3lDTe*i*LrDxKUhLEQGL-Wf1dlxxz$Yej&fBvHv(0J zvkBQ|UYEI=T_K(asNV<{BK+w5KWDpt7^Wwx(>5!tKm@BIVow!uFdM7at}P1B%&++n zG9+Gsn}ycS$>sCUM0V)Ec1niU9(^CKZRD3{c8?w&?duzLjDr4kHC~8pHr_y)fGfJ4 zk3&S8G9uZ6DN0~lm?7XFUcgu+D9#(%xGDr(Skvc8NG@Pom8rvg1XGdlL?tbckOl&2 zz6g(pl(zWS2p)uL>>ol*uylYQN^CT*NZSibVg=k^JMJ!|;?UAGM4vJsKjR`Rqmpf% zp=v63&qbTja+jzlHY^ng#X@>257#r6R#QkR`@$?7MjmM-WJWk4+zLPF3L+CkD;;y- zlofYs&*5#9>oZ*{#Je0dVGy-d4lG?94&Phdzt{&VdPYaD2&dI9 zR7{tjoCO>rXcm}!V|-~t<}f>z@B{y}kye6QOpDsaB1Sbe1RbGiCP~|u@4kfHJ;$<< zD)97$k(~bV>XzyTYIsH@lvF&hWaY$^3jSfDN+84=okAO-+aps0=h3$ere%PACY=Li z8cAChbA?|s7*>*nCg9u>D&#c19a8`JC$BRKa#NaPf-RmId>3Ky-DhlFyfs#8?UTc_lL+6-=n?#wi&bJrC;A2uT~KibBVPa zd$tGqUC|ZCDnh1Uzf9r%oKVqT7u3&hy`hn{ivQ@?qzaf0yVuN_PKD9Cl9js!*^Q(- zDAV%!GBBM&TLRVX1>9G4fcxsI`-Qz}>oT8{$&+>ZJ(>w=KEds28w1hvC+TlE;@Ak_ zKZ9!W;Er1#zPh}V&A244H<@RSB!V8+=sE`Bi*g)h*t_YBa*SgheLo!LtXuy0>1bfE zJk9g=^FoUQz1tzmMt=)mi(?d4)7enh%_Z7tdQutHH)SAvzj_VwZo;)wn}fRICtU$L zK8+W4Y}UthcmR$_P4gG9WZJ~{%0*{*iS`s@3txrWge2@h^M8Ik-cG2l`s&qCU6r9T zU-rH|sx0XCmST0aEmh5ZyPdLVkMs=3W& z^T-jijRVY`PFPD0r6{7ZW-kzrk33`}CwbU8ygMpM zHSCZ4RY=nD>KT599talSLw)V!w8S&m=2nJC(D;S4ZWp8*m4%m<6W7nNwqGC$lCv5e zdEV>6MEbo3qNBrA8v4?4meNih2?a11< z>83+e@Z&?uCbZNi`ot>M)g8?XUBYM3Lg`c#!-SB!k)g<&kQIQlw!+2N&;bKiVYewu zBkuilDubKJ9X=S}e3=y9iWfe4b5%$+#zl zQWcT~73J31n_`PbYFu#D8VfelVzK~F35N`k0bEY?%9M=IOD1`*7 zklNH)_ZK97%e4q;&*I*TNa6O#R4XQv>~y>XXd#@V zna;bcjW?kheqv+iT^ZvF>lGDz?A6)&K4D5RlZ%$%gISe-s-wYPpmgj2y^FQUrXrVu z>{@P7Dc>r>IHPuqIlsnNT_8vVjLECcCh z!)lgs69upNW&Nm;JkERm@WaH#toZGTc$>!+o^1$}9jF{MqW6>FgX*O2*mGHSQWTK; z_8rEog{A+ned3(=?2Gscysrxh)xS-}Lj%MfG|c~m{4%~RnJ21iHrO1P9e|>0lL8$Y z2Jk+9vuxseacW`8<$MG^ZhX5fAo>vZEdU)^Joq1J#Ai<=l;<26?)L7+e%-?-a(L8~ zywhMZY>CYy2T@Lp zi!{&ZARXVp>L6D6d*;V+(y@dQrSAd@pX~I9c;r;UT$fL9RsBavD;H$dtKjOSIDhg3 z2j?X!vT|a2ZBl3@)kdlK2Nd5Qv@MNX;^sFHy8R{cDha3lrBn*~6{jiFp^%OBUd>hY zx4L4Skfdl{BLh?hrz))w>Da7Hd*9lu7j?d^s(ilw@${_u@#chFphvteu!kAzGsi*1 z$7oWt533Foi@Ii|`QTrpzoAJ}@K>0*23nVN+E9Awh2GJXH6mq~Qs-PiZQ zqws+{?tFHEU8ImhKQEhNHVc{ODhWioW$LlWqPHu){_In4v=5%2y;|hf?(}lv(e89l z-uyrSbhhT1Bs0t_)f%@EXFRQr@D+PEO z#$$?j1rJ4+SCyCLz*-78&mpTh0zk^3dGEQdGC ztTmg=21k6L(4#Mq5%Lt5phy-KYEekY$L*P71bT0KHKw{?hbwZ=5<6@l^5Gt*`tlrJ z;0X&zd)MI7a|p5}*71y3o)R@n_*CG!{CdTlQ8auPsd`|$7N!`&?v*g>$-yxDC|W&0 zE-8{eDAN*g=NTEWBOx`bDqF(Mvm|bk$3>AcaV3sTyvB{4K^dmraIT5YChum<2H%7*l>WH3*>Nozj_>5_5q7>r$c-W%+NGlm-44OHaKCK45o5KXEiT#2>OTD{1yq1C;B^`%iaKnlM|E#B{bsqXGj1N$ zQ)MSEhlOu!Yf*ZMAPD;vn}0EFX1?BAZDx9OY1sjJtqZ~#T~2dn>%8H(ZtO47x6SlJ zZ-q1voQog+3Y>p87~lNtf1tIvKw2AbHSDmq7_jyUNNZVs)7p^Q7CBOZ|3hnkP}Vwk z)0cY#X{|>T{3H+g{%BL1NHz^qA~i?Pf$%q^xPsDVE$Q>Y$D?&9lB44CV!2r9^Gx+8 zmxr|id`URP1zZpZ6w+JPJ#BTr&d13p7_m(#AB*)e9L}5)e`|{N_oLo3apKQY)AJXk1UnA}7 z^yR~AZ6}_0SCiP;`1V%E{Jx%`ft>C)rM>UdbQ%0+YW)OZ8~s_*vYFC-ri((=Z)xsR z%60A0m*vq|%y{`RDzOD8AijHrkHV+2pa-W~}4{?E+^)SK&`uNFJ&ZXH^3aCTX$ zQMgsv`5hIFHK%k#K!y&SozSiAZgN0)4*exO|4umi<1WWg1M@%SmGIc>^NkC<0oZu{EbS1}uh`fY_1~l$`SyH)bDi--C;Mz%q z^o)$8o~S%QYus5bP=+)e?jc`S9KTeEN&(oPQ?4wCDc~74#>)?mMUbLa|I)y|3{YKt zq#$17iQ)V@JYZ3JmQ)I!3N%^D+>F`0JusIdz8xkwl<+INZQWObNljzTADuJ7qSkBvUzSXjT5L7;Pm2$_ufT1L9yAleuA_MsEW9E!ga2 zI?;ln@3)$!ds=^7H=c=h3)JHq__%Im8huTypczR!c$)I;ygNUB@ceZ&3$tfLN7NL? zYJ8@-rM?2dN+_&QRFrYNN!x}1N=APE)rf#k1%%EtmmYrp@1oMD^Wm)fh1K%Wb5`dQ z7{9Z`Z66s)$=w{BX>mtveX=eWi{1X@-1a=u?>#wzyhz*#K=P_=dMV;Pdst4 zI}ehhWXLwzS$PLiakxgypI@GQKP8=;<3SY}(ga-eid>Z1d0gZ1WJx!RP>%>qfBn|x zp?_M9+&Mzl&Gg++TOJ?GRh^~9{YYa}Y;>E+$Rud!_=U^p1ti>l^(d3KkRilFJ@p;- zR=o#0@e33Ca(3lWR}cxkbI-_Bs>UoL3WfdrMJ?RjYxD}A(CNWZbXvapylmt?{d-muNAG4w3YFhzmS;?EWdh-=)Mu)!hM*g|GCNXW|U^cuh{^X z&p{xD6Dh7~lZ`x$P!-_h8^cOJ=XwB_Pg3HSXlt`v^Rx9WQhxI`Xc`O`PtS?Ty(h2O z18p)1r;P!0i3z*KBg5}xisnR3OixGVSl=<#MU5pS$?iWatp04h{yO5vT-IKcz+ zuSenRYjJcJ3^wM#_;C}zi+saq{LZrrEFk3YGPOK%UTrpXwQo%6=)4P>xol}U#AuxB> z!7E0Icg7RvWCI;v$q@h%I!6iOa<_LwgMA~}#Mk+eKEpUcw8y=l5KQ~x+k^Z@Z%)je z6pTgE=OZBUKhJ&myfE)gn$*_r5ZthGoadGJ`ee#IdiE-l92@5Ei8dIIv!*c~c~mDi zEnloGj`f-|5|4ul)_iRN{t*Npbw!;N?c>%C1mwj?-rUqA=a~&g=g?qBx1EGDyjjQ4 zkyiPsR3lTfMdi$LR;%w4;6u9wpV1U1?*YJX+6^=4*8bd@AsN!|7d!kZ=upw-?Lw!fEZfG@dC6U5KCq?~=eWUNo->Cc6V{^3yH?hN1Tq(I zkN>jFl!%1`=4cw<{S|-v+X3+hflIcwUFJj`1e__VrPQnDkrR=2!_4k416L@z))QVI zaopg;#Te_4R?DtkgOT8Qk_0z|Ip&)X?1O`^xUZ84)FTHg(mhkD5(%)pH&1buK_6v> zQZM@#ADtYhvE0PWdlh@%6&cz}CfvGQ{H({NOw@_3P&0prtQ9e|sA-`cNv|>-i!PKA z9P5xg>i;P?P_m`Fgw&N#n;zGx4T)B4f~Tp|bK*fkLAZq+d&C+!W)cZFAt6 zl!;>*sY?^Tl(>-HZ@Rh*XNu(r70M#Zvr}q?a#2afXtH#9G}CpxG704xP19m}z!u(@vV%Ka z{Gz-rCNUmd#W!@(nm!EeKv4H&!4vmUHX=Rl*Llagrengfg~x8n7ad#$^xHELku)jv zHcK4P*C~@@&{oaM2n5puk-Yp=nbh65ovx@#*~C2r>hzy}-^OSUV=HVEOs}uki{18q zgva=MqkbwtPO;HT+Px?%c$ZHr>`;b?u@S?7y-?D??iw~yVA;r8;q+yW^raDVC*Jzr z1H+?}Kr{hO9}}LWZpBs$_X;j;BE%clUpvj`ma{;`C#eSK zvnw@C?W(d8<`Lq-VT(LU%k-}-3=Jnur!0Op8$mYSmrKDgo3PWZefYE? zJj`FZUVs8M_XaBN<}m%!5)9p_YF}5HaW#vu7i8CGJ2!NA&gV|PeSeJEY$h*)te*WR z?sO3bp{=U*%4pn=J__Ofne7G-`Jg%OHBM zi6L8iT(7M$ZN=Kv&}Z$cbzCoKOirOu6k);xHk3H1wrATyD~EYiy)(9jSTxGDCus%& ztzTg4AOGt^$&_TZE1i@ew}h*Bv#$1`=UcY)^A5s}fr}N(`JA2t-0y5@hc7T&3JF_P zL`+GQlKXTAW3Iq)6(})jWRI{$H%u3!wL@ZYAlohT6Sy@e4lAFSf1tU)O#f^jICZg9cSWg! zWqoCBxDv~kW=7d{>M;?*SAe_!WdCz+x7~$ym<&yR2M{F{%WGg)3dsrPjoNl}%cXZp88 zjYP4vmWefomW!`%lD~yq5W^{OGAjOM!u0RJ^On*6Pi5omw_(Q?lt?pNi_>N|laGU# zKM?B;LL<+L6G|$!T*Gz@a21z#N#_)3ME@8ng24FYi<{e*QwE~UPQVY`iV{|-P>3(nBA96g z;+6(7`MF>NR3(-MIb|5lKeq9UEh9bFYyvHNg;00PlEze1v8g`Qy!my=$m+`F*#t&4 zrM_iE)A1e(bw(q{3ZZdTcNQvUz{lgr!l*-1m2dORNia^LyCU486}<-KyYeew&@HJ+ zU#&?v`zZF9?1nS7U9W^iuTd3erhiQdy|~fwPW>7ZjiFdYgfEIxY-ZmlNh${Ct7*a^ zyr-Zh2PJSXUv=`OSiUJ$VFCFlN@J}!B{Tb+z3}K-ibRETw;OX))QA3r@YDS7@tBHB z7&cfIGhH}yU3o^5{c$;*g#L2A0%{ACjVvy8zvEJ-D*e}EEx0ZIk zry2iX*DA&o{_&@AAY?^64|m}?Xz8^B&c45nm;N1m-!7{I6Epwm*fIWg?99R9 z(a6mftQF>R8y-Oi?7PbhMH_Rf&0oD1fUGu8`Ibm}HZ@33SC@bw%cJ_|)7~V38e#{m#gwYWP8u30v#RHpV^Bscz0gK zD;zGPE;bolnE~1`V1}$_YK|%0(gAj=pj*6ct%q!r5$3XNaIA`%NlL@O_mmg$852<% zHN93*7=ui2jxZG(!D6?a5l6!Q0bFOeL`4Mc#;25#e^X+OQJeB~`sdAQ=g(tR{#d-| zSY8uDvT3%kQix+YSHs8OS zrRiIT5HzR!%r`MH;L|d<;-sA`upDVxJj=?;EOLWBa>E}Pa7+j9!E@<}+{p+gH=-nt zT5n*!LcvqkwUx!Zluqpw0?|vU0Y_YYx!m#V^rNP_(2;(tZTbWqLAvao?WrjH0GIzN zE%t~`lSW-(7aEm0n}qbcoa2l?y!Ou;Lsdn{KAfso1OE}1QNXdoB3Hf~0I94bs>1&c zh+_a1p`=#^SY-hHbO=bo923kEK z0u}Igc>&502;ml_vVg8X;xczq7#Mcs?;o7W>$^BSs4kQr}w`_%ZuHi4^}|v{z5B3{z@&JugddT$BkQdJ?V&zszya4_lKM77SqZ}9<|-=@zO-I;R+Ou z2s`g+{qJbA1#)#rFHWv=Sb9Lio$R;auHzU42-i(d&WYP$`GdaxHr!X=40n#Ik~hP> ztiGZIXt>AB%s^&Wk{y_+XZ=w6n0_(tcoDG^NiEA0-xl?qH%Le|LGMYhP8d!LE@X!> zwzY-cMo~fM>ANbdNyBUcpI)ns()WExn+Wt6nn{=2gxeQ*ao3>y-$^&=ljmM1pV@Ac zLP40W$d$gKDmW)u+1DqK@lIg8G@)J+YS-<b@2=!UG{3)(NR# zMoV3KP>e=r!|uG)F=m0RUaPvyCuCzinegN%5HWKu{f5=|GDDNV%TmvGtejfPjq78E z@3w`?kls^N^~D`mUL=T?tD8ImL#BOk_g3Eb?0wKnlpl|CX*dq&&E_YX1f9cFN<#9( zyJlj_55i}dxV_Ck`^Y|rqGdlC3K-F zx47~J5d_vV=%Ih9wMqs&&ZW>UfHDWzA}V#N(z*;bf&<_SPkg9Y7wpvNZBbqidXFUX zL$M3hygmI{OOT%c^Gw>S-S){Nw$WM%C>95z-7;HTq;RGmReuYXf!=|CSnryr+nZ_) zKkMuK1hIS}RLe$p7b+j++~f zy2nZOON_DPy^hn(IGeG@wJra<$;D|37tL1|u5U#XZV4hNXV`<=!rng~jKS|-*_*m1 zTY_F&9sQ^w{QwisC#!?$ZMQ=-VE0w)pc@n3b;S(r#tqVLKl){XZoGy@PXhz$a{)Ww z1TAf-j;Wo?mu1SptL>;U;zYO@h{f|bfz!PQ=_7D zKDYMQLpR70sb!WZmannlU2l&ORGXxJkOE@!ul>Hi#pWMoeDd49t2e!f!@LT3@k-1G ztx#}fEx#0a@d^@n@k;*f;uY7LE5Hz#g(=WJb3sl;*YUZ$`^6XaCK%lZS@%o@{vjCE z{fA(*v-s%wb?qMnBhiFg{fpgppkTBfouOv=AA*tjn_v_Q6pTXtk6`q@gf!~E3r6Gr zAQ)BsAs8u;9)ZeAA(U#oz~egP%v@@3PwFpU(bPpkqzM=g3;icVDtzSjNpaM$eZ80 z9=B7SkrkMHXCpBFqB^njt|dy{yDddBzDQ(Z*zHdIo2CouZrbM!+^^K|l%qwiBze~4 zVb2W_K3~naZa~&D?gs|zoXX@Cq$#8?hYzDL7AVTf_RuDN$qSfZI1kS^jnq9n=qb?^ zXQVR4P5~e!ON0t>77JRQEGR)saALSWeur=~3LkK`pqW_9b=XO*kfzqwVF~G?72z<} zeCg@WJR6iZ7RC(h8#=zB#(?zdD|E~yfYe_g-|&;6On4C!_8rqZ7Ei?t9tyUyiLX>R zLPgpA$!R*a?q`2xcFY4~h|*L*t=haUBiza)4todjd5NOZ%GHQx>RL}80ZLW{OeRQ3 zvgm8g{QpDVTSe8uE!o0o(BKx_9fCt}cXxMpcY?bHcXxMp3m)9v-8D%54d3~?``poe z&VR>!Y+e|fx3xC4s%qApvj|7Egc4$DHom8@T%kNxov4-DY`r+_siedgIS>wK#k5sF z`0$`(r^dT*Z}j9mp*s#Y`zu0uuSeNT$ffY`8ZR(#V}d$W+oku-b}>bI zr)+{cWSij9b5BS6W#rP1R#&~#XU8lTcs%Xq?c+5|)Uu5x zna+=C!)rPISnR(Ajs9(fe{kLY&vAq7Gy!ypu-=N$9KjTqj~zDB*4L#V($xENE@_bX z#G=0A=D9gi3v=O8f{PIckgZ=%AMQB!H zn`A>f{1?kj_FM$P14{6~ZTL~ME(1&%rbd%VRwaHAMdm0Tl4dJ#0T2iZvE=4&qKe$vi^*j3~XNnKz*YkceEpK znW~sOt>%VeW`k@%iNC^AH(brd=(bW!;|0FVUYaK;;7h^(3KpY+SP`VhBIMF~Da3tF z9HdoREtNJY{9~bkZ6-z2Jxi-#Jn7#b;|926xsd<&al>C;7;DO-)>s56YXEcYgj$D7&Qs1a@(bm#>0)PY zAjU=9Kwz-N8jZUR54~hI>2xOXRx=a&pwT^Bwg=e9OH(;0rh~ym@q63awZRaH!Z3Mh z_eW>fspWwoc1jq^_+75{T{OHq9-Lz@sxYcnMaEzt{$;P_K81r@Zh}N9H=(n}Y0ch6 z)s!&y!&E2=*baz8@tFFx^VqONq@Pc}X>J;}5+y`%CfC69*FkazcVe#?2aHhx9a&H~ zka!*Z3kub<)PO*@g`Cj`@XzbX7%@_u^R?Ib>2&ubCc*M!iH0~JGZMQbH z+#~34HSWn)m?kqeRTCyNH%W+a!9t@OIOJX<9qWDzUR^@HGrTm>!eL;-DUg;A6-j+fd3SQ!IE(&2e0decYb!A{^ zG{i9rl1n(U61CSf8{WLUFjvzdR_qG2!uMCf-FBqvTT#s!xUA zudxee$1y`kO4w8d;E=7*x8S<4xxUeZl7&08 zPDw-q@wWiuYzICV^M)9}Xv?`|e^#khHrJ2?;QwTS1*||?V$)_#@M1!Ny=Gl*W8V#0 zc19H79WOv@qJgK3C@^?tEY*OOKkK(dWLpzNtjLdCbGRnxDsyQ41}!WD6N>2Enm0w9 zs=a?}sa?PDalGfHyifL5n$`X)1w;24+n6-}GR>7U;$@Kc?)DT}SWckL&n!qpR@9jS zCf^j?muic|J8DqfO_sEh-NEP^JW+$Y!FWN+GNQQ3qGwI2I&m0KA}?J_LQLaX;G8)+ z8-Ys?fzkJ8GskLLtA>o9kfy=&0l}k1!j7M?fzH z=7<~s?lA5|ru>CfP#DYbrabDWfnv@luRc=MB=5t%aJme5)bsF}%m8pl0!xYzWj%bL zq}v_gQ)Yz3(2qCi0qev&cngYaP9AR9qF3pGJnc&(`P_wz|KKbKy>pfk0G#E}L!bZP zEV~otNBoL`UITM`=q2taa!Lw$zN6BzugMER}ZQ)NI=_y;|lSR+lC68{WaX z$*MKbpBZWBtZ(R}(0vNs4jEYuvk>ikjU+T%URa{uUrg5W>>ipo%nyo8D%yEc>Z5+u zT9+}VM6~U^skO0k{|;*s1ZxH=27}~rCw&bs`nCTi^xh=_J^Tbvmdny-FWxE31CgB{ zUDC9=q$@2T`e6PgW%=KMj~~X`1xj-7{x07ys)q6zJN4K&5oB#=#aF(Fq?GdD?}Fs- z$4Vk0EFXDakS2l3*`?p(Mj76mC9T;!B+k?+QG5*EFgH)DLji*~bpG*Ak6Yho5C4`5 z00WSVUqM;YQX9kDsg-xVMA@>HG5}3%#}V@OVtUdnj(Hzyx>N&ANpMBRkKSprY+4dC zW`HJEKF9W&74Pb=A`0PLgZQ3^*qt3U!g~s!iB%B-1YW*1ZNy4I{9e_gcu%>r`!Re9 zEix+o?@*0AA;ZzsHaDX)ZvwNknD|qo(1clTT1im0&%}&~@{dlE@I+lKvRqtGZe_ce zdBu5JeiVHIRroA*@C%JT;4ya4_#RriePu^B%6Xih_NP2zR#en26(WdNXs$|NSz zn0Pc`0*6#GsSxA(Q__!X@LYcPyHifM6A(ZfE10m65RO`V*Siod){z}H&I}8=Z?(;% z|X?1z>7_l||_Mruya46iY2R zOPP~jD84hi%(ZPs9)+N%xu$Nopx#uBgl>>btQ8c7NPOU0I>+ME<0>>IvMrVps>N*C zeCX_Wa~DbU`tgu5{+|os_<`}6cgwfO0d_q72FKGX!SGUGM7x& z>wBT5DGRMz%qP2b%)RlK(v|k#{{P|bvgLbAmIZJ8M*QLK(x$^zn-P$onw(JahKKzI z{*C~i+?$PF0cLOiZ|*Mi9|834?y^FO^4h86`;zaaE`F+8G@L4FwO|=nO6zXr4b;PS z71je1OPY4S;nKsV)>>b6aV91slg7hl+ky>ufJH34&oEZT5*985W#p%RBm9y9fCn0T6^xk=$vM+YL252jpH5m zLFziFBZ=+wnQ?P&K}7F|z03ILq(t>>4@f~C2tib#;YW>uL{wd7{8Sitb2KBX#(4c} zP(Zm@5kYy1sG#K2+t7N@s$3uv-uC<)HscKvd+PfSwVcL0PB%a;R|inb?YyhyN&sp( zZLf5%u$UydDtsgngJP04v~TY6={q*61`wRT_B!(jt^lt#V08c0`v7;Q?lg`DP5-H9#bnO^e#b z_?Jj-Wfn~{NbD}RqLIpI$l}x0gP|nv{jCe-{dRK}vpfR-r<9d57fRfsiFCfu`mC5r znQo4BfJiRtT_i{TA(A7|(!yNHZ=|55p36t+tpe1IF4yOQHQ^q|qzFwXj0rZHA9?JG z?{AS@1E6l?@44zOhkGoN0Ep!1%wssc{@5GEetYcNW7N>@0me9FP0`#}C_T!G;kdDY zJeHQCnT1od6h%_;Y(WD8i){8`s^!p$sk`jKvI?NM%K!TyH->B0DdS z?HmD5HbKcajtz@xHkh;h$J9-Rti+EbD#hCU%j7C6<2Ah_t#$WH(Rp=j%Ie zU)E&;NM~QZ@>`fo?Ib7nzs`SM3DEi3@-sMn<=G}(qW1frCn-lJztw#h0NDyyz zNkGkoIy7aF%D}%!yq&OId$C+Kra@@Kqi&NN7!IAky4wZA#?}jvcFas{#v5i>#8)i}@C>B=mItT-jUeaC~6&?|N42oaUBlmYRE=p+~Iu2u&*R5RnID zxY2(}EdF;H?!)Hu{~WIoK;cnI23D@*mkL-sfJW@_D*ueK6wsS_d#=QR5Em&s7f3Bk z2;S7h!_&#~Xzu-4wLk*k6gv_O&_PBo1MB;Zlq1~eT^=2m&%wm72pR@R^aS+;9c|tn zF4-|pa&nMiwJ<@F2jIr&Rnrw>>Xsk*B?#gT14zZA1(cz_D$GAq2w8cx?6DhGC}_n9 zK)O2J$9{{`j|UKZn&uMB#T6jDY0o}YoHl~eWDeC|{c$WLcVs25_X4V;CQH&VHjbE3 zP}G~yu2&ixXaKYFd;lG%LlAl|D*ij!KS5CIYZ;oYvTd?j4@{liPneh`MOiKfBAzks ztPt2tV0-;i)c!tQFw}x1P65Aogl0-uldp0Cr1BUDP`;q4HGRj&;`GKM`gh7oNpp&k zF@ID`!{-!p2gvGLN53vHl$k5Gbfj!C-@buFX&#?y&%eYm%vcyx0eepu{VvO4 zPr6knBH$yU`jF*ZeyGtkl%oM^^fSYH6XzqqJS{+vwW1(or#WkRW?jA}Nwv+LB-R?G z2?yxHm9k)5gH*y0qK}u}TuShhp;M*QXDfUb>Rq*-mKA>(M}xgJ7y~Sq5{s;6wP>Tp zn(Qy0{@E`bla=hHUy_aKEItWN1_lK2MP9#n^4X!s&eUJ9oX1Vx{V2NUMj$PcV7CjRM(2m z4B!>>Q}y~zd9}BC(5-X;oR{GCzkDJq#a{(=QJZUFKf)Zg-5G{RU;4H?b_js!S;sJf z#AaJg2H7NA%}&=a3gtG40Mj*q^U}y)(=~gZR7@=n(uh95bd6^7kDayWP)kPF&MMoP zv&WS|#Y$*jJr0lBLsx^f{ZQ+bj!kQ0&b_;g)WdMg!#7qg!`1%SKj#}N&Gy>Vqd?AT z7ig;|+Ys%(O(f%6BL;DAn9y6?U^{4eJp<CE1k%U%Vb~t-V4mEL5`3uNI4Jd=#iJx`u53emeedLB5ap z1yI9m2npT|CHNNeZP0#7S1g;)z@J%9*Bh_A0U64QeZlupcB%X0^%XX+L1*Q3q7O2u zZ_D=lZesYTH2!0@c4ybP(f`pJ8hgFi@|Zi)*=LseiC4JdRkidL`F|xVJTSDAT5#V& zlqFxt^WuD1zlF53oRB!aZ`gvKFI-NX?VFtt)jL9#O^U5g9cOvBTsmXTTC$;b(R4fJ zd#_L#KPprm^G0&tGQt{7VR=^cK`UaTC#p_)y0VPS-(H2IfvfCHiCy8~hKn4sv^!j(J5Q-klR+ZIWel6|C{<1{Tn@)mG@+}bb*g*yKxceZRm zAgakZ&4FHiqRRYgN)1jnfEva+S|gMT`A!W(2T;Sv-l<_wZc+eh*yxO@4RgcZKd52! zAJnk(cWPLv(fvGo+Q#>HYS;y1dJGQV7FxsZp~jvX*9K;Si$&2OpfQwsZw#+_k*K%I zDo~0mnLQju%`V9?q|8mFDyK;qy)%yk%Fu=#{=p4{uRSf+Qu}e>)`VmIJT@?)S3I`c zza>XyBMh{3jdAjVxMfakdu z+Ie6mUmZHd?<%y$Kj%Sp;7RY^r)iT|$xWeuEVu3yiG@%V;_-9{q)L6(WlvJ5t+Ss= zo={`U^d)ItffvO%1*Rt%TFg0D`9@!$)S%DtO*X4mhWODNwh@u?ZngdzruAw8hH0ee ze+|>JcF*CcnJ7`~I0Ltjb{PNShKU2XVT}>9%unBqaNCuVD|p|!?jWS>B zH=u9)1LCAI3ny#*4t0XpoL$(koiUe+nJssps^<>1o#8MK(9!ciRYy2X#y4Dv1bNN* z#IrB`xsIZ{&c|QuE=NTcd>*_$s9~iR$~C_&{$V~iP#3WH@2O$`WN0g;=iq2$|4%E$ z{`%8@ni~(ykD3G4DT7aXb@H3)oc!pw3p=wECzw*`O~JCv|fFc z(YMeYl+mSl{_L_?T;4?kjoHMA{P_KQ)Bt{1JyQZYEMM)e&bf%vz6T~!oz3<}K&COK z2A)||PSdW9o;gt%hPx|u50(pb?T2^27i%|YHxz)pVLa&kqASncz?j@TqsCf-P*izP z_kPWsziR#3R$!ICGQ=5BrEoO*kra)>dJi{h6=Av7jJts#pL6yIw8PNLw&401nz?JW zL=i^1MQoVtMjJlLHC-0c-eaRBr`-(@C^i(`m=6DOsTh~VvcluV^2I*`+ipTFr|f*hn*E>%V_YeTKm+br z{$JklKh<%6|0{o3(f;+}FB3lXHTyI+l+KMJ-R8p#m=L_3dTBDLV{4OXCWWH91%@FO zWM2#d4rdo<;|DM2EGr$oXyO37)1dtEe6z-u?&+*h@--fyw*K zwnm|_fC4potmQ0P;f}>dO5bo41?8BW4$_3z4Y({sIr4{V(|1dhD-xsEp7+-sH$!!X`J2GA8|Y7)hAbbR)b0Diunktyv@()mn&>~ zGiHe~$cfbUmjxNN{0TGWnF&hBvFN9dqjdpwVNwG(0ecNpMTRN?Nz@AKoGu>P+K818A3=5+VUM@+vi!sKh<%CO)MK^Yn z?JYu&CJTH0e@Ks3dZZCV+3jL+G2Sur>HDYa^EYKdDyxmW!5$Z+Gy@NH}G zYWQV5iEC)K0VKALj-aglC_UwkRIXBN_S*o|Q^FLAxipPrWa*YWF|~}_NbKkcB}TsJ z{t~_HPFpcVf`df4#3eNYpIGD!S%x#U(wUl2msswF+pDNaE*=;kHQ9=|KDIdKLMX3w z$crdJdSqU#$~KK&3kjA$TAbkGbW~DN!lW$dk9|L}`I7L--qzTXB?gY^O8u(ip@Cy& zi3KD;_7ElaQc)D}*aQa#8(hJeI+t9=(Ij^GG{R6)HF%R3s5?_0CF`L)BL~r9DFi*D zznw-s!OL-SWSY+nWS={qq)*Y5f9Qqjzxo|_Ifk81g(TkVR~+CLdW9x; z8k5sWX&3u~1|5Zcp!zL^t(hs_73>V=kdx{5WwP;b0ZFpZw4|!OH1z=#^+zQ$f`tR4ibqM0P9AJ%r zlKXYZvalO+d1vshS(7+NPjcBBv&ypUWC{RQ=_E)sHY)_8w-B)QC<^aV?;=fjup8Pqz4 z-%83E?Tx$)ztNtHWlwSY-d`wX7gVpRCHhxKn=5@9xN~t4hWsq6>n!Y#@fmk~qIhao zI2wz_`l!13P8t$dOqzgzTV9aSe(FZ=cV!gf&_k+Zq-sbhT6p&fmOiu3Fn_|N6tyLO z+`yv)5f`5-QNC9{^i=M?%_S!#p37Fr*AxS7M1oKygXo^zF@fieuD8aE`rsQsk%nLc z7{0pg6=%B*1MxP(sa*8c>{)K98R{s%w&}4O|K~ilKc7+~7oGy9Xro@Ae!=(G(2J@5 zP{%XZ&xj?-kqM9=k{yX#ZXcjP`N`@)3AdIta(7qWnr!l77CT}iIwx|)Eblfoo)4Yp z(T?->%h9|aO>-=ZxBx_eLMod>4tn4XzEa`PH&gq*jxy~#V@c;vuvRftmV9W4L$VTF zM~Q@rO!5*!0m==b9vOMAQ*JM)(C?V_%U*oDmYv_&)1uR%_Aczqq>MA)BA{r9HL7dS zsbo4+Zwz6Mk{r>bkwg&}Pl9Uj4b12|YaR!YFeG%Tb7}$xk-osh_h6%K*_)Q(VP*Co zRW0kP+n*ob{HDd@(G1DS!5XOtaA1K-pa0si?I9khY7uXUk`34q_p-iD;ZA zzNYa(h)`@V>~YwxFNpa%U1}(^smR&~<=D%I1r{3llB%gmaXGI~*ZJrx$~3kq`Xk~D z^-1}S8>fG+TbIksegiM~*}^Es-{ImeD-e`&{Va*2ps;<7Rmu-A`LY9!Ho1pPw)cps zh(3Fz$!ZUsp5)Bf=|1!r7MB_i0z>yeOY%Y>u9~<9CFDehr;cZ!{~LyhA3 z(ledja7f`bcjn;<_=b1ngSqg{Epd^eQ@*vmU<>n4M^gn~-Tav!C+p@R1}NWzZM3xP z`3=j`yLRS^{fd-t;)K(GeQi!C>SkV{v;&T>&A{?8UQBIHuL8R^Vod-ww7;=(wfS+C zdqxG-zpM^xmHi~IopD!WNKh`wBYp0kDam7voh2E4v4AhIKria9C#h^VL(llCl>Oox zItxAk<@U=ITCB*p(3jbj3uV8E-C$VBDgjoX81xLISWcV8zqEuT5!Do}B|E?8C|G(S zxpk_1Oh;%+q!{l=7L1QBB)=(xd@yI%4mq^?0=yE0jfxAkYJj5`T7o5rg)FqX0Q=W3 z^PHSdgaagTLE!@g5=lr*K^$Q5nTdCkxY_tXZLUp3tr>Pr z{-cH?*|j12R_$!bP>FlJBdQd_P=yFRzIZ`{T(7iy*{wsN)C$GSgWFf9*rjGW*}}&# z_&>uqQAR-|N7fGzQ8vE{tb`M|D{Ju)91^I-xqSz(qFEAcI9+KOBimt#)4H=5)9@dz zRiQ*PUNOC!e1y1eI-gIQ%TfC6YH^{CrKoXAlX$sfhK-W7ue)8Fk@Fmc%wN9 zO6<2`R6#^hV!h7_aSWv7wNGnO*d#uRuc9m;Lrv>tp`W+h4}gFT^_Q z-4<_*zD=`$Lcoj7*V77CdwHS=8kp|u`ugG?mf=J4R^FM=BO$?9)N8CM`S&}SLB2Qe z=8*CGlk(mYGKtb+EG4$D`yT64vYB8a9koLwK#;| z^3E~ln7Fb~c@G??VvLG9^4JbNeymvT2>-YkWMzr&#Sfa(uXAC#+~c19)$SAz*=cnX zGOtgZ7+-v!o_mL*2{R)GWxc%YFPD&Pug8Gx)G|-FtfjNbg)3m1s+a5Q6ZRLIGg-CEX{7hTP-Ur!utGv* z7mkPjn_h(MSI6iVZQDndV;sSscE31;MS{r=f$repyqi|~M|W6_G#iD7=j&rc3PmBS07lQ_PUKS~jKE za=&zf@if5P#gEOK2}|`$?Stb~H%1ODbT7%_^Bv9WR!0*4K3Yhrh4fNKF2|M{ zPJeY_;4;mEGX@kHQRe0>d6_wDf%q+X@m%JY3Rj7~Lr!#Qfb#qozTY~MB*IP24Dzp8 z?lBIVW{$?I?vbt(t5Oqv9LDImI45sP#42%w9mCAsI_j$n>oA1@Dte!V_AhYlb#dt0 zt>Tn`O%uysiH``9)Hxqt+R{kI{RpP@Hu*9k>{$}~nr~{_A$L8AO~_-=4Zf|y3wOal zJG6TW-OBobOCtqvM&nAvY#SyL&H}G!-Bwb|-1OYdOzZV+82q*WJYrfp+87uJXaj)O z|Igpk-(9q|jiZsn-yPmx`_KQ&pZ>Fp{--6!VrBS*QXYsoDkbm;(^l%1-$io|SK21S z!Qr*hi~*%$|UEY?5(ie>Iq`PM1IpQMzW`2+xXk8pvS0j4IKfC_axQlIG1>hctK)2ODDlX zN#24XlG?=CVV~;2CX&V(9@1U$i_=sS%060LrrolbBEY{PGT;*a=L`6|digJ>b@=*U zKK#|{p@^CsKXyTnkt<$cH7^Iy=reP#hhLR3`GU2qRCc*M+bzw6LRWm|FAKQj#5$yt}x{GJZD$&TfOjF2Uhk_X9d0TR_?*vygJ_}>; z&#$vp^n2Q@oo+*q!bn2RgP2I`V1#%egd?&0yNbK4%(@Nuv2-w{l*`3!@qUj~?Wz0n zNUnVe zL)}+lUWi37xLV>^(qvA$MsUJIcBXHO4?(NOC^>yva&FS7M0Mwg=Kp{|K>AvDD zg+rT#602ESiy=Mk!4H);9H{l&MNi9<0|j)pWvB?+4w#D}@?NVL^k6N!kl%d@s8)X< z*nJwj(d6}csTgi~zsu49Nl0w*o={@PR(GUhVUX$Mr`c));YcmMxK%(Gn0fy6MM&`v zdf`msmiq|2UlR$|0$L$<4p)f`#$S8cTA4p<+dU83IH+jtyMLz+3xvY#`&u9GwK*~o z?HJ@dxcDXcAOWcHK;8LKgjGGhezaJq_9*SEg_sZSN)IfS7g77e6ZGvFj3nDVH*}TxS`}Hpsa_O4_+3NKUgA`}Pa*d+4OS#$rqE5us=cikZX&Vz^WY;a zJYKH7rrbj6+3I7E)2MvV_#vTFR{0t9+M4>IJX}?TiE0fnJ<{cAvKw~$9AjUs^`#&4 z$LKA#rrL*VsUtMj`*q2V*zJ0|*=g*yi6w)!tPw?cd{}I8z&u8thG6_>%i#pwfHew7 ztvnroo4~aw&YJP)X-;_{3V-;Sk1}~+`7d+bP6a&km@g&TRs&n&Yu@GultDk4cG5JR zco2u@Qe!u-Q5*#iM_Eb3?=8-k|5lpzma@nm1#c0`5{!Ie@6x7!Z6(A|o-GUg{%Xlu zJ+5RqUapH=w@qfGoWth_(-$FvOdSxXFZOSO80Eno>O(`%3N?(#d| zh)J8`%IY$ABY4Ao2S2On=)Sm%ydiXXXcfUZ$+2X!m`N=pisTfif zkJWkWWJD8U>V~$CvyQSja5U)G>vfXle2OE5KfX?O3NE#=mQH`JMKs@52RtRL#g!LH zUVo`MvLE8%1b5WUnE`A$ ze6ar2yM_2)60EGfjjaQ%tCeMBOb1*q9c+M0X6^_a1uUdey%qwIz7IsTdXlht`VgGA zO*935&)}|V(zD}%lh=)aGanjyG1HBjHA6mWeUx;;H{8aVgkfUf20F0b=~X#NEvfmM zkO;qc{1S&>DfXh!tQk@0 zf2RPLQi4?BYt18ZzkIOIeCkh$d>dwI6Ze7WK7%R1P`r=^1&hd1viy*GWWO=}22a({ z+F-0d=p1m3Ja(>P5H1MnlZGdS$Bev0Y1j5TSi7yE6fDI9g%q{c}* zS~n9PR@b9zi<^69boD~)s6NpzXJ_x|iAghrHuUaEU@fPrxM;s`H#swxaWh=c2xb1^ zNkAr_a}cwE!wmI%Xw7K<`>Padnyz9WIs>IQh00ddo-R5`!v$^WI+XlHS{;8LL`dCknraqLuhx`xPyAI z^D~&&KUBWR=^NHd(4f7rVE@b6CCXn03;;v}J3OHM>$ z2N{+>NsdTOM}t(R9m+y<$J4A2tiV*?5qhY_x%x)GeGGzYjMOwJ0F@!o zzr_FX|2fwFstZ3N?t^0W-z+t!BLAoeH^q)Y0}NL_rQyXYp01=Bz~FLDOq{Yx&Uuar zu4CjnCklwc;F}gGEW=@Eu332p6|O76rwfk!U^B-Pd#m};mmJ$ghqf0YEO^Yhe0ocZ z@uLJfhBt|QUa$)3e@CK?)ztm2XujkPEO zRj<3>clyo?nKGKm1!&Y`$=LSwgJedYIxAQKI2hT#d}jXVqknLt|92k!f7(C!zu;T{ z9~O{`{VzEBe~zmEx%>NHaPjz%Y+h;qL{*nZjLhbI1L;XX{$fD2InQEeo=Vm^QzWio^QuSlgHQD4`B|f~DNT8t@Bv#Jwk|Ofqvo+u zR2MgwW(2)`rnz*&6yZ;ft;>}o9hdQbiYR>5HmGsCJugOr{u=SwS*=uIQw*v|fSkTk z3(5NE6R(9(kblqTl9Jixa2su;4b`oH`&oh91Kv{!UP(1^(l?g!3ScT<)f~d&GQ*vF z3Zpy|*9OCRHih33#>a4BOvXf(28M#8pNgD}7P>$gIDt!+B%sSmv>L|2by;L32@-DG zRBf8S){s{_%MZ}R;GThqq%2geCBNGq_DYYrd(o}_ zIN02*eX(Oz2Bi_;rLJjkp`pE8^oC^mSm*u}E{32kx7wCr)* z&XX1d8Y4G?OM;P&wfvo$Ggo^n6e=z~YbRV=S=pO;{2HwAls|S1=E)0$o;eEUw?;oV z>{G|rrKjEHHn2Jtc=-~x&}ma-Hm>)rqQh;pPF-FmkF;#4moU_&GRu39ELeQBL%1$4 zc13LGvg4GX-mV+t#wThgQWRX2k@qmdsMJG`IW1XZ=#Pcg@C`3X7Gh84vfS?YVEBftr zhuIN#HYfu3gp3w^@(L5CNT$ENi**h4R>MSBhi-8u|IJN|M592`6@ z^IO%FblA16qE9VILF0_r)z;SiW;+mGbHZx2Qk;H)=hgLb@Tm1^Bv`8DYqCrjX@K=u zGJ2$W3s1ka8fMv41E=hQREnb~Uv`C_NZ!;Jiqb5Ofqz&)+= z{I7oc^GM2{>_&)XHx4JNC=$qIF4bwU-OKxOx*LP_acJXDWXbC7>#y1NOoyPMkh5`EW8>LFzbG!AO}~uN;``xH3Iq(JkV)3 z)&9KiIrZ|}DL|r8fLxefR=U+_Jm^ih_@<7pgrhI;_vc7OIC1%mJL?Yb%n_*L&tW3) zXkvNfOjw!_wktBnshsi8mL6Uk{v?A~TT!FeMRnL-YMHn=Sp$$w@fox>RClGQzGKx zXhv&M-57WCl2hWmnVBGQWKJgYgne?Vk|8nG!$}{7)jX*vKj^AS$2~*3jm?O2ROvJ% z+n$VD-s>5JAmITkFll?UB2eAW2@?CC`CNkfg41K8#l>aT z5=diT!kW&sAN@5mi#`dwgs7I1X5`SA~4;e!U))JE^7P`PoG_?o6M4RxDf@ zNg_@w{KV3XG2R2RS&!=2hD>k3n?YhFSOnPaG3OS%O{?bUm9tb90JmE6HRoaBTcLh| z`^1`GAip!Q(b>Pjz7+_^#G8E}x+)hgIU`{dtUh2QFW3lgng;N`=SCc&<4p;xGHsTI;fz&*CuyAZF7S5lC zLCmwQLiJ$Ix6{(2!mN0y;p-fB%AXF3-pbvc4T3!Ekz~hm;lCXvGM_v{wzP4C-Z~vcUv85rEo{Q>^ZwGhUEJpwkpH6=cFb& z$t#0ZSn2zS_N=RR&W~%mFi~0k<@jSj&aMe}eNoG+&Gi~z4Mq!eSpxOG28oI6II?TFXQMh%&4Kv04q zPlhJ4@LZRes`?qjWYxcq9w1u~S!7CG4ieS$2YtUi@C%}#+;%u2nmSohby9swkuvl= z7Kz&|qmw=q0{qBc@{@hYjG>3eIZj*uaiQFznp__o&DE?}X=^0kuO+cC*1c~jl!2*A z63nGwiZ;>3$i)oOd6<+?)L{6#mGPzm`gPoSO+g9Zpmgh zIA$GRuFs`BG;Y2)-QLv2xm+2&l?DHzDSe&7$}i&h`jx|Wm#tKj=q-y7JcKp(yr$=2 zv8|1}lWUV_p^m3Da}HH~hW!P58Tfk|34ZV|uHhtd`)2>xH}&P(D@E`vrqA0whd>eu z5%7_|ZS=ok_k?=ay83O&WSW*Xi|-k^*1e$e3I(0YwT^w7N`)lGyDWDb<$*CnpWAQ zFx%HiT01+gZc$U?=GjpsZ};!5m=*l#;hIF%`89OU}r%4`;f{8XO~+)$e~Ko~ed&6X^X zEJ`#g&QNvLMGIragS^DyW%SAx`L7;N&gzf}xsS#-Ts0XabJJ|fmsuExU+&5ZjXK8|X%4WHL+TMRTOb};{RAPX&f@ma$y|FbDd zAZdpMd}SN{%5DiKY0=3u95p>Rw2J~t*ZSUR__njF13Dw8oV6!bS=s2DvEu4y;bDL5 zALTb&8FVyZ@FdQD;Mix5H|xQt%WZ;nEHd&X^&!)ySj=3EaqoqM2-oR4W@ofC>Wg34 zvCNqS!0XEg#=gkkmc3XSLa}bJ8wRg+PLo3eLSZyYPgwdWUt^VOd@JzciK3&L`r=bs zoezs5$?zwC?P!0IhBqLuuSOWA;DnAcfKGx*GVlj8_kw)HiI?bmA%P`ZEWbs8iB;ya zu<{4gB1%sPj9T1NsbBbH7usAbO~_r`utP3Zglh)B5$LK|bAL=lARUck@#c_;*qJr5 zcRdV~vc8{@tmGKlU%NK09}w~x6oIKu?;-E_A0f{UWeEuR3uYyJx^4TCoHOFQxY1AQ z$nMtt+yju~Im34K2|0YyvNn$PhLf%-0W#8`ZX(tCTmpLw!RiX4JixB|;-Ml#Rzk>! zNHSoDW{d5IS26mkresS00M9ATv=Gbpm~t0IefN}`P{~5uP@l3^!#Q?~sjP(5Q5o2c zSNeYSeJl+g*G6dV`P0Vp`o_Zx9Nfkl@%LkQIThE`xvi7RjJ`j{Fhf^Z;@86g*znHz zJSPp73<`W~QvR+??A^*FI|c@u-J#t49zSfeY3Z>YXAtSxN;@JBbcYb&ShQ1UOqC-@ zJwNL#>+NQu^RV?^`XqN}70aU1kC1Lca9>I?4Yf z)m7HA#$p9%-6-@~m!TXzKqR`mDV>=y5ug zXNy!(-Kx*@G*^UR9Hjkqfu_Yg%*v+8Wsw%9KvUyTF1Nsf2(d*u8NP_kqR+agkI9wHjH-_{{LrB`7jj%DcLPXbc#K{cLJ_kPcy+E?WQ~5KWhDZ!0yw()W}qvP!{|N}r@R#^90Sa5l5jCkNo7-4tqgJC;9I*x?@vMi9Rp2kt4s4i%Bi{Rj%O)8g_ z{*ZB^_d0zk1*UWvL|w|$@byZG9z-W*1>guNdj_Le7w8N69{Ub6pA<$X`o_|G#iE;4 zQu}-_UKQMp)z&|=7y(xli&J31{`Ry)RC-4H4QL`Jp<^c_5tJNEKk|KLf$4$)Aolfd zKsEo4{hc$Op46mjLXfZGp7QUpj|7N)HB1r{KYCX`zrMT0Bh)TGflB zNXb@cBSqGCX6E(g-rK!n>h??1<@|m3JKtIEIp6udCBAjNF4nfI+9Y%Z*s32CG=|jK z< zUbQ`DkHUbsSaeeDwSlR2w&}0VlNqfA5yU4pORvxWz+(7NB)Q`psfUN+cI+u^mFal5dIhF-G1#tIv4T_mk(K-&KHieuCFQjT@s)()ze+2H;|oX%#&JV+O~+*C znI!I$X76WRZu&Cf(!G9{bcgjG?}$$0#gW1f)@(cS`Cg1)gXe^!@$<_ZWqluqM5_n& z3U&>Sco~b4vjsFWMR0aR@dEZodG{a4?_rA_d*$){vG_C7({ZQb-JbQC!n!7svOnwg4!-hFL*H`%;&$BLny&XL(p+LO5BHw7)NH&_*Rw!}?vnejZk4y$pWr9CN9Y*d$$p zzp-;qSaYeA>iaGeEjjzBsVK3>_je|9tt16+KC99(M=W6^dUM(nQf0KA$W`-B{gkE# zSB)1^I_WZ*8?&y2Rb185Ub{M;(8s2~%Bf@X-3y1l#Jsu1!>7LMZQtr#@4yJXv2(d9 zo>de6<_Q&dNu56XIA5PkMh`o|l(NYH4NEqq(l z96R!+PTW@HFiy%x`3=#pyF;bbQ6;Nz<>ztF$@-kk!p^X^qHgz6A6-$s0uleYhfORy zmiKF#*SYWI+AG_))Z#XWh3K?VZ(Ws?TqJ*pkEvnI-1P0IJFA{Qa0zc}lwMMp9AY@k zo!Dd3t2B(O-4flUch`d7YspO)oWOOxG`rA0d+c=%n&t&|9C&aNZHKCk_U2tyrmy&OBf+$IusDDVelRKIrq z7$89=Q1mMXwpCU$N8qqjfA*|K3q8 z!ig=oS~m9#mU5S_G6^D)tvqqp=9mgugF9|lZx}@+#R=WfRT7q0Y1NfiYtrM={3}IW zRDNrW_$l}N^A;&h9EBp1Ha_`f*#V7HJDgYf;WxkTzslE=OA_7$i>5wS zZ%91!Dch@SzpC+;fXwgsKZY|6=bzACmuGbT`$u)hUUOj+UWfiGc+S9(@brwQDf!h` z#`Yh6nLIkP?~>%FndD0FP7{UP;R!SOb|SO zBqs|CW%#9EC&6=32sHg8-Pcj;BLQFPQ49!j0H{nSflpt-5(ouGb(vZ377-X0`eVie zxur%8r|^sDNw8zX1e)%rgH!>w)N_DUFoCA~!64DkdrcfK{HG8lm_oRLz0Qut78**1 ztD0Yk8L}FEddWy=IC=pXeP$P$7Iz-C7NJtZ^&tk#cXl+krm-tg0xYS|PNrq}{6(Rc zi!g(x`wB7AG+Z4@AYs+)LQ|7-pvsJ`4z-2MkZH-ne-W*_h#9mRw$#|>VYrwXG&R@I zFABX5I2X^ZX?5)8f~{)nCCs4J=nhrP+lT#9X3%thB_tVJXeV)I$ZFWC#;!zHFu=^N z>fadyJS3PQt5XwVpxOsiXn3FJC&>(2?RQiiB*hf^clwWTX=c#amWOR0(K5`C)qh9m zI9X=U*e$Vbb!hSzFdL!-O$4fiT=8-!Waz|$YRSS`AgnStK<6hNd zjH7kw$j4Qk*j^I|#G_=ffj@4PE@K|8w?*>mdldDR0dF3d+QJ`KZw+G}>daS$_g0Aw zz)=!;z{RUJWQ?N?j!5yKna1Jb;Y}FxsBHy_gXTPji#Ke>7)P7RAbHR{w{Y<;TQTOL z+8#8sCR{w64PzXwYe0$zO*RM@@3S3a9(7KL#6gpv!EwqCjB%(v0W={K9QWRdF^)Rf zK}rYBAp=+5IRaxGZElU^K{Hmsc_!Ot;i2{f&?E(Lo~*|#Jk+KJjl~Y<#d^)cLv0(- z=+bcBpwBEkRBM4o+l5!J?@q=%>I)jOI-sFJ;o=nqGRDyY_9A)EScP!$tV0;{sB^f4jADk2Jyn1Pbb+fB zw-<#BkruT#goXlu<8Y%41kqGE1 zC<%Ib@Cla+L!ch|VxGpAhDF0()xef*-bpMP_RJfb#-5Hv!+u4;rp;i|u$wycGzPpD z#xt;F!!}Z=H#giYc(Y(Ms&$~YL8!YX9I-g>X9Vi72lfAgBccm_Mxc&#P=69QA^=2m z`2UsRKl?@<+@L$saD*FJnnWT{2Q=vJts<<6xL>3p=)1JA&IC`{+QzOX=$02)8-QF9 sWj}N2zXP=ohVC#>cFE{hb|vbbnVmHUSoCILu?2q)gH^G}YRdoq1)d`9cQop&uI6Hy{v|_rZYzBz6;QLanEm?h;bu{Wj{&a@K8#Mw=4%+ zRqc_seW56$S0*z3y91N_69%~ZByDD#3I#b-8hM-i*c>HM{CqOXq}H7D%4ek(m4IpV z9p&-^dTV22pcfbtV{ybiz${P?7R>A6*}4vBQ{>(jr;Gh#cX3I?atXR5v1u9Z`t<96Lr zS;csZYd}K{-l1h2Hf}jjRBBzS_rqf^PiAN%bnycjsXE-Mx1SfFnEQlnQ(1Q|n6e(D zwRLw^`&~S|!dEJEv!Op0f^QOxc^dhI<#hU0vbAP*Xm`KBz7D{6xxc|7_ zy9>#u4ppg-pBg~l@v(DZ%_ro|^K{;yS9$7Uxa0ltyLQv-Z3bbE?DALohk!c~@G1?+kZe}u4&lID{?b8g53VsYx`Xy0+HEUjwww!J5~>t!7mlqw@HtdBr+}JeOYAUXJHvLYW>ZE=6Hf%9WlgKZ z)tQXEsF9iea-^8hxF*EcLF8@0ywg-nRK2gQBBuS7T+A^7p|Qdb6vsMdj>EusW?a*b zY2P&pS;4<Y}k}^`b_imJP7$l3kN6R7(xNmB3LU zs9+P}A(P4Sk%%vwSdE$rt>a*(#j>1oS)?h(%FkBAyQ0GC61nTGMcT%xITmuWRuhzd z%cl@-6;Mw^nui%aK!O}w40@p>eYU9g3>L2D;jbY_HKm3oD!pSjB$jkp-k~1n%z5=Z z|JlD=;M6`+(ZKU?Rwh`CYJYVMN|E-vHWLY%r)GYbhUuF3z#Px zC2nLA4zM=@TKL)Br?)KUV5eg!Qeqiy+`Dzqlf1tPYK@1;Mf}hMu%6yve}u8UF;Uv` zN>`M#FKd>Q4U{oC%sJ7$IBxO-ORr8qAxJSu%5N3bNc#i&R;9CHLA5o*CD64Sj1yIE z7O^ytW5o;HRr|q}-c9bs1LTQyLNC*?79I;S`n?6ZO zGRMByx&#DS5Iry)t<%&{=fiM%da}mGtd28Yl?JLW1nuvuzte>je50#vH|*>1IoxeV zcp8(P)XaM^Pn%&EQFwJ;OrWvf_E>?sSw%hFLCK>LS&fR}M4t+rNr3=`e(|D%y%mr} zPZbX_tSn;E9Nx~gM&k}6!{~R578aF1a^Oq~I6a=3P+_<^KDcMsSG-ScZlR*>2;PgS z^8e+=#`uEwWwJ%=cL;7HF7u+YX0OR5I)O%7W&ITm7|UeM6H8>FwFP+crexr(MORQ$ z%lb}Br_&)|!%U&cF#8md*@_CZfjtX9geQbI?cxRsBG5kdL#RDZDY6E&omp+93M&egP@=G%y}a2`mx|=1xq~mR?4%?nBMUPYDv15r(#Ok3w0CwuzCG7PP}Tilj98 z98n}&`ZfA3q*uS6mDH%{qz&ibnLaxNLg|gzWo?xZ7ngtLp6Y4;sf_Iv+GogB zgS_*T@gWfP92!$IKcPF-x~=gW6SYyjIDmq3Y7DiU@)qDYBmW?N)peu3^j+1(-`f+R zbIn3Y4F|o0x1SBSLsi{ck;X$m3=x;tinfwg{ZS`O+wQS&)^zFlFgrkplRf4zN4`sByUi!89cx}5D*-uk1e`F8=TjlB7 zbjcg0{{T9^8kNH(LrVW*&Ws)-AhrA5dbm$u6Q3UhugTHC{Stq$l%Lb5F$j?+uoap> z{1oV%qy&6TKjIhDd7Y`q=qJF(wab_qBUyRjCNXC#uOR#F-9Z70A}Z?Xm*&y zVI)(NOF&;CmpvnBJI}Xcn7W@))&_O(^ok@9Y-L(P8*EE=80{#FcPyWbF7Q>2`})05 z@s>rE(00x007QDtSLnFnDV!`pl;2`oWy$DBTZ!@25HRG0IE4dz%b}pRbDMibl3KWA zS%{679)>9DV>NbsVVhyGm}1i{s$#pC?T;f&_4N4AK@)S4PMLly`Yog74pJe+(7`?M zMC|RGy;P``uS4hlgljG%L8?8m6?{gTuIX1F=j^}zaoC~rhv`P0S^?>2GdK|C@@rCLc+S@+k5Rjo^ zkMN+Q%{t!1aOlQI>*>OjnnPfqDQkN%>CS_=@SDze(bBRjq*=Qt%)|RU!wWi=D2FJA zferhF*Bdr>{FGPL;$=Z~07!nC`dk0z9o=td0OG9bvEIb-h zdYN@g_?b0FHSu+dW91$*8CpS;Lc(Tg+iZQ(kh0x}3bq`D@VUKy0S!;;MY%iA#Lkuo z(FTrPIa8VpH^&pO;)+mfHm-p`7%{@jB&TA~Z;goe)vmbAS#0&f*I-RbTx73lIs2*N z__#mQ*=2Xer99A>s@OKQzfzakQ=ihLJrxE6-~(^V(%4_GvpvGpI%jm80DPNlww4lX z(89xG07><%Q!@dY7_yjV_#{QQz5&As9K|(P8y3d>ic*FPu_4_%YwD?b`+x}aqfBmOsx3YFHw3pJgcQCZGr~j81 z{}AwMnpV54NWAV z0|c2x0^~AUaA?*Vt-ahdWhmlRJ@^Im&V5j*$S53PDNjNVLw|Llp&b4i_%d)1p(5Ht zt?j}g(wN;wq;>T2qf&s>G5zWF+s(3vmeC{VXcAiC48?W|AvBPn+(M68S3Q=vYNTXk{*L0TY$Ez;zq}DD-G^>geiMABf%d3lPUs^Mippi$0~Tu2a9D_+J84!X z>NtK!QKbuE_9-)asg3m@b>Q#P*~f%lVR1Gh&{V9>wMLam5XthfE)1~t@-yiX$OG$a zRbOinzkY}Y$5_Kr2I{d#N>VWs9hkG($y(K40;pTKm+nD5Gc z?pyNH5%G6taS>8?;aZxjrPDc3;}Z1aQ&5R{wXe6KwIbbjR6%{oL5$Pc!jbGHk$%lW z4SIJuiq-?QLJHL1+zf=`RfTziW|Fec5Hn6{VOsR{1X+cc5JEsNtYCKgni)?0mG*ZR zr$-6GWhL86FgWgB%=1X@YLem%n)>#IllXeB7EeE=zu&4709wqhyj7D_|8m?=}q4}A#_uC zpThJa32q>X#6QBG|3Y+pS`%^Lqo>9Nw9y{fNg^ZqG3>;S>)W?VI?0N+u)70PWfXs+MOB_eAXpHR@V+N zz0hRX;REDlfI7E-uZuu9Ic9|4wmr!6%>^V7e1lJJuMY%mHi={$MyBmFx~vDLtO0PK zF5|7X!$`mS_ugKg9rP&*Z0?iPOPvw;Mjzo|V0c=>zynvrz+ic+I`9Ns2xc>2_}%GR zd>F(+jT&%fUN{O<>zo5@?`BFiRp`{mXLzQ!*zs@I{t#X$=qF_OIVX#dd46$UpHZqolX5M_R?AW zzP#@P(_?aFnKneLVVLt9bZ2L|iICB#gm6Jc!y$!#*o$9s$Z7tyq(1eXb>>2glNPI{ zpQ$4pB?-N?!wbXwTs#im9aiKy8%*BYtK{inxKzD$SM4Q&QfOxG<(EBuXuD2_bH$M& zLIR1Hmovd4cXse6RlhasYEHa)%ArA&J+IZ}-;6T+yh(%)>Q~Z-o-o(dV9g({xi>63 ze?R^aFLekz{`albZ$ES%%%d7_tl6#K!T%tByEiF4;S=!_#Q#pW)BNucuWe^&@xOwe z?$6oC$#Iw-1~|w|@=HAN^M-UtWI$s$RO^U`&y10#?OS&F&c;R$9RjxtG|Bb{tZ7SQ z<}j>2?Fjq$Hm}A5IUyx%QQs+^ohk$W;&~d*1W~eNnxtqxA)RfC9fI*d zT*g$GpyziL*1Txty&2(gzZk@vSiI8Xp`AwHvjLL!9VB-pP^=`Mk0meM$iDDonQ*!f z_HMgjJz$}^6b{)I=>@gf9bj{uhNtqk&zS`h3c6d_tLF2U5(EFcYeDd@*FwR<)WGnc z7vb;m-)5Jod}g=8iR9_m;V<%mx(r5<#{JE3_uFX>=4nF9S zaSe`XL2ZR+g@;q^=4n*NgVl>{=KPAAuex*gQtIKCJtJ!GSLWSA!!6^XKVhKjqVVH4)~xyGoC&_RcM-v zy;z$X9reh&oC9r&19BgOG;Y4tsQ4)*tbe^Pocyqpz30meJ5ZCM1?~W%e}hp zCKNQ5kB~X?ye9Y^0h0DaxPyG5HWM_F+JQ0>J;pce6&7n*kf)D{fYDxHb2@}E> zz*xAxF?J8iEbSChn)rHtU&FAphE+RpKzzA;f>7=jBHtPeNeqdME@{?5Sg zs(bEdIG_daVe%|8p(c6HGsPs zp&(ukYIAxIeZyUC2^bJ_ODWj5kEW;Xd~n%y0$p34OU8&lDWNt$ag#Oc2cV3+Fpu!E ztXpUw$VGb1krH;jgC?+V@rfj1@b$yg-?`JTtp*f0%S&8V)rFs9{`8$`^m+CJNU z32(zfu$sMXQryzBTnAO?W{qeuh|L2R-_q)4@#lpJv*EBq#@N&5Ra;DhKGk37nCI(+ zpp!6pvQhF7PH+F+9$2~@4r6=1$8O`CI9j0+K5uJ@_dZ+wZPN9pZN3Hoy~xAYo%l;+ ztHt)cqz!c}lK|E&tl<=OLZ4+PmG+n8M9Bqlqnxny4|K^q6q&9|2(O|rEO>H#KCCZH zV>R1*k$QEZxFj1uzCI>BSn=4ZW=%}iCNm1!%GEbny!OTMqH0t(&deGz8wmM}r^M&1Fapqr?RfNL%Z$4)0R0E_E@pr636 z^j17$xr6O(L>cr;Px#+gX~D?6H__b;bkay2aql&cn4%Zd>Z}W6%lTxl+3R`pF9lq? zi@)d=I=8gtdrc*oxpOghnIstiE}UM22-s9zkOMfOJ3lH1g72l8U5TEX5!i`tAFXx8 zUt|Owc$&6^tDU|78WB(cc6S$0AfRLDe^T1RF)!;GJvJ`r6!?*Vvyf~yU5 zFFnFugHpZy)2xq^VVc6>*PkueV|R&1d`_n?lv6Tr4}q#5AvU;)dl9(_u;H$RcRFXr z-t~&BD8Ln#Ysc&N@PC+AOx^d;`1xa5{}W96YtdvXuUf6LBfa^(LHb=llMXuJHHybN zHS+t`eOp*Q^eZ%xfU=gDsd;y@)N zgt~B4tsLfc@}{v2T6jW1Nj?XS{xXW|_31oX#;COJ5P`qRPC;#J#GQ$>uR2WPxT1a= zW*})r{CIn|Le%s(giZmWKMfA5n0er;i@R8x7M=OP$D9Ri^EGsz9Y0V0dFe_Mf>e#f z7D+w5xqW{f^C#4V&YY1Q5NE%*SkBk=H^s5kuKX@Q{H7qbTo;>b&zdZ0O5+^kHum@h z9%KJ4Ojud&gv-Z{bQ4%^!NIWP^!sgd%`8!!6oze5F|U}8HjveAkb0hZeI+c!<_{@v z(+7^dUN+emnu@Ra^H>}Pz$UIvb+Tjx?T`jGU;<_9>hl?brYA5+s33A{VQTFRv~_{& zfZyYY$J~vz$6E8>6&Eft(x+@8agxS`u+)sRXb-#P$NgFCnQCnxjhSkT>0r6Bb@Ph@ z)_E(7x1l=3PNtvZzu>`N!x+z}I<#o2VX0AoTIy@HUXtZRTl46Z#N)XE?i_lOmAo@A z$=1R0*-Rvsxwm;_27!-~WNmC6&jVuI{KkpI;G~ zbnYy~P+fk_LMVNG-|CaeZi#M~NNNxNfPc<|+x63G!H*oCsX>`hWZ5+gg{*|i3~+Gt zF#AyZP5c*e4WWiuQXYKm(ZO>sIZy0AWE*N+-UtUz_tH!lwBF>WY#TuTBim*Zniveg ztNeEP)1s;f>jf>_I7qc<(OC?&(KKPmG{L?_phA|cmM%2>K)wCK&ObIP|5J{X`TLl; zR;v`ydJi*)8a9XNw+TV1$QrY4k1;_cPE)Xpj5|sNvtF8fd`4cuUNP6s+*swlCWLH? zelEhWw1l7#1VKD%E;@l#2V{}Z<9M)p=q)fSygp1Ip(r}Q7J9-#H(JGcUB zncJG^K1l`cum!cF4?b}B-$my|2V26mm zWHOy`Rp|E4V9|S+-^J^Wsn;|!xfK?X!SOLG#LxXxwypD)g&0A5>@MWp{grKK(#xp^ zHCmY4>J-F);5OFxNKXRw)_?)&mXm_{U0`iBPp+-VR%jb*d2KDp^Y5AfmBmbXQe!cS zi(QHrRfK_E=5{Jk@q@|VHx&3*tECU0lUfB3(hOF1U$@=f(DhC*g`7z6L(dz@sje{H zz_Jjw1YRWl*FaAz;otKVQmbk7LH=dT>y^0005#`9#VYtby3|9HD3t#IToZ0{$S2@h z8Y`GkQwC|?hCd!rip#ekd@^^9H7unrXcc?26hrJQl_4KQnor#wRe2UCw*LTb>?MSF zf23wlIe5=X|qUeas_GEHoF$95%;^YgnO8-IU-E7#v&9^q6 z!+IaO;N9se)G3D1pj|eQ88Z1e7tmV}Y#0e?Y{K$_nKoT184p1Jfcx`Ujx>>J!5D^d z4@^IOWt+kKj}=@^!`^OEsM-%*mNzZXN1hBt1wGO1-di5!pspvw51LTl)q%U9*D?8Z zx=gh>%dBs)gqSBrR=og4PnW0q1n{+vuRI% znFB8J+t}P%ZA4yIrk}2X-*eQY^>eq;>ajL+{D*sHuc)&sb3oek+mvWFGWT4Zn-r|P zP~MinaFMNi=yN1qU`bleHEdpRod01wGMo*=2K-;)+uySOZ@w8AGXIlrcB@o>2h*Sr z=(yeDDD{H)`wb(`c-Ab=2WLoP8q*#X;L-b4XW-!^)WzNO=?K;CbKM6UAkWu2WNy>c z>c}|o5juqg1~H}Oq0;XkJf7_-*?pWu5S|2TJxlUf{J0)zFv&r`fR$~y#b)YhbIH@7?lg*`v>PQKb)BG1L zH9q{Sg1b(UHd{Vme97a{@L{|X>Wh;WmC_v==zY(j8^n1NMzPuSxO{^v>bJzmo!<@A zFWE+$$Jj_;eSM9VslM%64H(o zVKa48<+Q1-InOH5!4cN1p_l?8t(w$X?QVvFu?7E}QyYG~>1bM$F+>BN@;JE6)QgjNN{|k^N?Zt05}dd$sYnZW11#MMe@0t4A@Km;|O>hS1L! z!5-o`zGgfO2Hw2WY?D~s0G^1Z<)97Q1JFy?-mQThORoYVDz{uqvQMr^2p(Xtjh9`n z#ranRrBSSXbiNDfj*G$ma@0susry|*U$*8Z^OIvvjHEpAX-85u_i?`|%l0Hh;ftj60ByDw z)iGMg-aL>$b2TEzXW?f-_Hfg#KP4suOl|9Qv_$?4(6l5UIFfLNvq z{j}QjoKq;lt%VMd;35St&74(;pe;`&yq7m6)3qpxAVlDm3Y7S<07T7>U+^33|8J1n z!^}mV=YEB>_EeI5WGnh_tT|++e*;OZ+aRZwFW3Vm6E$ zPJgA=ib*_bjY8%*OE~vb;%NTz>e5ym{50d*a@lve92fv@oL&Q{tcpGl5zcY_FSWQA z(as~|&bb7uKEyqEm!uA+-hqOP?Xc%wdsZKRE%i>V%9N1*D~$VF+W*Zs14Gt-Fz$c2 zalW5!oQN&%|8(Pi+w&{bkpwi7nl8V;Ac%bmxyo$_kUnG%dv<#l(c|5s$Stwa2+IT- zS*O(80I|{$VP){=r`J0@Ka`UY8fcX`G?P)9XNQMHk^d1f$Y-zojTq9P_2~c1s(&Mf zS;Z!11NhHbRW2>6nlMsOvyFpXi#8eF(3`piMY07-50e~0zR@wyGMem&!^STqIk6*K z+{8zfxy^PJsKwtDwvN$mMOPyv2-|Re<|(Cw&|w8;gX_e;a4rZS%46UW5mbBaB}7~v zYRmH-@EI{Az?Z-w^Mr6JIcWq`{3e!0VIhG&W$LO|W6Dz^c6RLEaxccBJEIGx@dm9Q z-9ZuH%-r_;Gpj;=&Z;L5J`o7tMcX~g(())Y{~0lC4EK z&$!NYdS^K3J#UZ`ltPnTLQ zDSI z-p5c#viS(Qbs7Bu+X?(9a&SV@66e3X+y}UZcisR|4})C$2VB!KmR-o(2#+W&b)5paCmZ0- ze0Kk|<0!v+-G=>Nq1@l%{;xr~|5zKc{}Z7~>bhDxI@r_y%ZtBB>OA1nkYoL8$aTF* z-|7x0t#b;k*7F0?!ylXin}jt4$dic7M6K(6*EV<0;2jn{o((OhTvl6x(39J1Pd?Pp zj3d7uRawno$DuJ8IQsIKjBmprL^FJ32IJM~?u`s8LM4hSirKGF)r->B^8B0hr)b>R zR#bUn-Va$0UxgT|e)0-xxiEVQdaOakXxh}FoS8~Hz5M= z1`Q*qkDe*cK}9rvFfv)wN5=7{U%C6mGEKN)GUi^ssuPYOLc)yowfrXx>6EIloRs<+ zdsrxvH3f{k@wmCeWaNq8E|^F$RLzdiuX31gd)^2mDFOwQfa8?jBap-(R6q`tkQRhm zU7+?0u8$-Yxx`x5iP?=GRBe)`(~|K$q|H{M8wR$5qXp(E=;|c^##a6UA~)^rko|8* z9Q=OmgbeFE6>0u*;#y>ar)KmtqQZP%g;Ri{jH2av+(7Nyg)bO|s))ssj*9Z?et)5u z1+TcR5weeCud8~oVRJX;BJD)QOh;eWfaN`G>{;2cunLSPRbYT9nB)$`<lt>Uw6mYj$T<6QG;Ut9Fj%Oe%~}zD?dPrv-l|8wvnfVJHyxa zBD!r!9w$(jIqcpZS;Uv^Ry@iV1N@+H{EKu`%rSQD+P6qbBuq-3=WQc0C0(^tRnZzS z158pb{pKkZDP;Ks#w%s$^tl}?81ycHN4C6*4*>+Sjp&Q8-7W8Ix3O%gIz_fv4Dky3 z1IY1nlJ~D1hWaIJMka-JEA2`&btBRlkT5JF`SJNEoz)GbO>Qtx+NxNzdX7eZ<`6a) zK!a7H18WR3cs1LzTM6gs$aKHkdo%vR6TO60b;mShwk4U%Q+g0FAKR`~WDOYZoX;C7 z1TM1=B+J%T05BqUmoB`EO^;76gpDF7f()k391#A3PaY}+w*^qNSy%7pff>*s%hohB zXcSJ@F1C$FkIx`_RjYT{<&H(g1WzLh1m1NQ9}VK``!%72g)=4Up4%0Hfk2Lrp9|76 zf3h@Yq<5R7EIBto#(JK+RZ#_{Xl21r3gS^peXP3MAZkFpcf2G^(i4A`4%t27YfeVkLzBMxnH+WrI zWFpn7h#1GEaW*!ydiD2E((IXo3+*a>t)!`6Nlm>f^et{!swLFC^6BQu<}ga5`Bh2< zso3fHRX;T!h8F`AfrpZ}H!IhV!qlVo?f@h*j;UAWAk*rp`T535@3+^plT9Nc!=l-n zC0ZXcBe8kP-C!q9}Ea+=ie?u{>PTgzd+2M{vWCGznuEZwSOYM{c}qO>F<_I&ly>{e+cs#k)MC~ z5h(6(J$!@MkkE7^TOK&%;LGz{bD{fqyohSLlzYKa1T*m{(r~gE!AwMRf~hR>I3A>Ydg)5(t>yPt zhW=qQQF3+vrVipa{ooGgAcSsiqw6P;fSq9gC$X`3U_Z0$0^z>1ceUau3UI0A+|K$v z=$}woQ1*no{{I*%|DOQ=H+tGvz4!z?lJ{rg(&Z^lF&tOk*H46ejnOi>e{fw21 z1?@=A`e-3;>l+4t$DfM6)hejpfWR?S0mV{mtVefp#SP|Zce+x+TK6YRfd9l&}JJ=#>J)}kx35lc;Jq%_zF!XI8&FB}& zv!dt#KPyHl2?XyvNKqI!hy!42>LIv^ghQmhowHAC?2yCo0!%Y0A{|Ds+7C0kvR305 zqC7viD^bu>YXp@_92%*%ZD@SOw}GJyT8)ag;+ynH231b~nrgbO*pZ78dSnf^foMqz z#BtPB1p{v13jotpIKdQXM(7D7gqa*xXaqv=wpmk3;LVK24YVGM#DoBduAf*zey{Bg zHh>;_{9vvD*Y60jIH{j9W$pydnFfgVvXM+2;!5Is4=qw$xwi%jmz-HAji6t5SP9YL z&k~9N0elx3rkIU5l>4Iuxz?7ph}Z<*IiGtlQ(?aEketEo5=BDCSK?2{*W0`6XVwH2 zbQ_wUcJj0YU-TRLRTjmi_uK7+8_V4H_bKy3fz5s~5v4+IHZ2t2*&3cH(6U;hMU$hwwiB5kL~h zejDs{ri%g|+IdEXB~dWWf?8mVgPO1Nm2g!?nQcX}G%_qN-&|lJ)4`I(_!8m+swh=9 zS-3ewSsPUl{#W6$NCo*3O-K)>YyUxT(yb@LC7WQndu~W%bP+2HKAP`-g}3(Sm-kwP zoG#%uT=ma<8qfCSL#lE)<8!}Y4_3;Es_cO-!ZfS5(AcQKQLLBStk!YP%0;KkgK2;F zuiYNa&n(+L7oZ^qdGV0f?$c%jSESZFk!R<1I@qn%{DKaq-QEg;4#wN&mtg1C>+z4W znM1+H{kC09jN2HHM@)w69Ps&;#}H^luAaQ4%gYexXwDun2^WXHfcU6OfCM{-UXO7U z!v?RAfg>jyIf2!&U)C|G znR4AeZsGId^73*cu{421^-vZd#L>R=}tcUV7v4){0(u_0+ zl*AknQk0Wb0V2G$dQlhO9h21Vb+**8r0r_cCY@7die^cOjG5I8NRkUsq~&RVJ5tCc z=T>S=FdVTCeqx*w=3;@OHHxJTIp!#q0iwpiCq)vY{Aohj(HyAeGvtEurWqy7bsI+G zB2a;+VEZ(UmY^S!u<|$#YLt;CYw)Rdz$V!z%t&u3SN0`}t?nYo;pbUV>YQaKj_|f4 z$eItp{XHdTz3Q<+?FfozC=hhT_l=ePLF#h42g3D+PV)LTWc?0Lg0*H#3U&$<1JK8a zxTUTqk)yuro>V548&=;x&DFL1 zojNbYttRDKc>aA-HBu^s%iOVEkpLh@OvRN~$d;zs zb(k>GUD=IG;Ig&P2VN&f?5sQm^u5{T464kAHa7vEBE~ z5}WAu)R(Uq%FVajV3p=7#P;hX^f=r+W_pI;jN+RM2Xf7qav>PO+@8FpK6BC%5AdUH z?lc82Dp%d9YiQNffwMu4i1Dv>z!|X(1YqI3B-tK*fo(mw)Vr8%w6=ranwlSVeL2A3 zma*lj534+*G2*1-VN9Wg;0pw_Mjge&?iq!S!Mikd6*Q}^p;ML1sVO5!rEw6KFnO{@ z$oER8SNrdx-Ifp_6QkGZ=m?+C##1CaX*JbBORJW#aaIs5oczb9mrmch4w#oTBFC2z zEv)>PA$E@b)hpa2}IH^AzJAATNYObzW7?I>eRA=ext5c z&*c2YS;-r#*?WG9eV(Nm-*i+_VQ53LW8B)U09de%y?FmQYY{hSBaM{~8D)-` zs8rep+fVlM`}Y7wgG&$qW_{c3mkEv_&eMqTmrZXLXR#r8*e@<=;H3Z`$2APyZQh=p zWNGx(lGgM<&D!tMKCg@USE~e>%nh|3Z15tpP-hGt^NrJI3ajA!Z19bRx~pH?BHrvv z&sK8MgNjVSDsC9LUR|Z@HH{o*FU_&TdQYsbtcq4DL|aV3(t<9)D_i;rY)j8}3ets( zYas;A%dDEqckx%i6ZIImun?5r{FCXVMh^RIxdzdmVP;Mv%xu( zFgd&?S2z$2KNK&RddeR;E?hW5T{v={J6fDMW}P}toj8J=I5NAPrxrBTfz0{)p<`<# z;jw4#Z(V=LC6wBzgX7P$NbM52y)qZSBgC9FsV6^w9Fj4Tbb;yl%XcT*Ry~gg?d=OE zi#%w*zdpZ@YMXrp&_JGi`AQf)pEN_JuPHZS)&Z`*FY@^v1+O?`bjjeSaQ9@}MM4+~ zKMN(Mh5O9jL>+F~{^-@cJo}j4&jwrLLU2{!jxEgT6dly9Gko2$(LM6a_L_uit`m>ec8>NN17wI$ zY;f?VvxCb-^-*$Vl3Xzj=A<8qlafPm8BBP4AL33)nOB>K^H#AqnaSxaLuhJQg=%33 z=li)K=a7K6sLHSgMK;j{#GI0`Kt8C{Xd70ziBn2GO;EL14>8qNk_^kwSsYosK%gd!GQc271Xv|2Tg#+qJ$gaAl`lyw=Ly3;7W#_=noA%E`x=z_2e*$#fQ-K`t% zy9T1Z2)*a28?RqL3Y#bEQ;y9UwrnW;;?k6rPA@!b-awEuX!^7cF$=3cTU9JXIzXSe z)py*Y3c$i@&sGJ1XKfuYI)h+s?LL@WI$i(MfTGMSCaiS0MygFueA#Jh)Dt=Ds46$& zHEo@p`d5t?yCz#lX{ed{8c6^Fl9H#9H0HS7+KGnnel*wK(zX1jz0MdaJPa|G;$;c0 z7%;ND0X|TMA@5)S%-8`5neEnSqE4XZ@dWoP;O{~ zS40o5=26JTCyT-xE8eCZe>fb#q}Wj-A3k9n0Mr4M(M$lrDZ|)P01v-W*re9xh)j@5 z0F&#|$7y+=wJg#~z({I{Y+$x8po6Xvkn4b2LnRP5eghF0tYKAK@te$GF@R>)Ya#nG zd(;yB$9g~pC+t_jEEWpxBfAe}a6<Uj zuQXTfkf$9%&n+|Nmb2x^v2g0d(7UqnvRy0s!Oi+@31j3*gj#W(GBKxHXqvy*6prqL zpuY^IF=;neGk$6^ZxEyXmFxQwAr8U($E1`2ln-(pr1+1X$*BxYQQ6t>n9HCt!GV^HfRS_?xNBlg{EP18n|GE-l zQm%c6`|)=zpHN1hmihCxD;$#ld`J4%?T`P`9qDgGJg&ML^=T-*`Mvvh@m?J3tuKVK ziq04yciqt)pZ)j=la@>K6=gWCZc$o}uyfu+x`X7$y0bnFE-e$q}HQ(88t($V~#41K>w;%^XwM`d-O z?m1j?ibb_COZC_cWmQc(eym$V4Q18dytSp0K13kR#D+um9L;UN9H%yJL+;5MzbS3@ zW#xq55P~x(=Gm3k0>udBL`7Xns$OehuszhkNGG{8w>c1Jx=zq5;;h!+=t})WD_jc4 zQ(OawJya4iYuOFOzm|X;v+U~YvWGszGsNLatOMt#n}GWrqfP$l!20VRd&YF^zJOn6E1-{Q^Dk}18h=pb0h@KmIAWF1JeDp z{qkq)ZQhVj4@X!IfPPB6O`1Feaz_%pL0X{`y@An^k8Vr~wXl^+Xi3vJU<1#U{A*-R zFY+Y#rN5QWIys#07@zcU^9^lJDOtfNvF?y8CJ)S zT6FokmmQ?IH;ro;on?*BP{B+Obt)RFr`sDu^-25__Zme!#NN+C8rP+CqRCjUvF$`0 z0y8UcCfJbL)iz-1J-+s6 zjo8!Zn6E)FxdAJUsekWM^sCU=r17(|<{(%tdb+I&}k9Uf#tHZFIKUo?+T z_^0#Pz`9wzyV}4)Sm{pEvLjk;chMd<2Of6QVmI~kG#+s`^@BE+WvP4MuT_{?-R}z0 zq#Z9LBD9v3O)4%Pxj05jS5^p_jcGmjLm&{0THf_KvfdLW9rq?}$g&U(>lSRe2;J8XIfjmi3;48cPPq;2Wk zs6ZHw3okBe^L@PBEKX{V3ASV*9F!;kNsK2Q{~(Jp(pd(<&-G|eMayc`-$}yu zI9_?LFEf5TKZU02k-;UASfnRaC0C@_FE_I^9A;tq zVN|Y8As(#h{lHIOVbLjbaus_-{lL2pH1H5iTI#vm?6<-*+ctFWmk#; z&nDeRO*6J}1Hhv$tf=xv89O>{lJPC}UPCb)#JJfQAuom7gln0ma71H*<%AhUGP1}6 zE)zCt)Wc5GbQn2q^C(>8W}b1`9zNIHt;95wdMlTH14WYK&oiu;4kxjt5dD-PicWeB% zM1a|~Ut!YIH&e7OXu=4#^$;%yP%qoVSZ4>DA{m})ROtzf(i2Fj8s z^8SC=d&lrhqc%%4PQ|uu+fK!{ZQHggwq3DpI~CiuD%MHW*FF99In#aS^vtg_^XI*u zz4PYByRT=jd)-?1HMbj1;H~7uAd88bOibUR=Q;!Q;iRv+9$1WE1e>6v0P#)fJ!p60kvl4#evl2cezGMWX2Eadi{lQ zdTlBEW0b+z2_GQnH^a3m)?bu~qJau{m=Xmd=B(t=d3ns5i7J7!*q9RKBIa!5m3es$ znu#)jv$&WNB_b!R2@^PI%h7QtWPgTUKRnhpvnD)nS&C3{E(5Fyxp%*Lcp0- zb%CIrcOfwTe0eCts_e|}9~F-qCd1=-d*(V#x7ldqn}58UvE-$fU6g-*YYLTRqc}b? zocD5?TgWm*eA%n!32J*5;v=Xh*vH6bGmAVc#(WQ|NRQQbi(xfOI5Q!64>}KtwM_ed z58)|YmC^GX$-&}_%G+o4@g|%da=r(xhr}ATe|Lub^H`8lcAl@g$S?W);(Yz9hV$%u8T0gIRZ#yIB!U0A&gXw?tHSoDn(^Y_lEAs% zGNs*3=PDGlHZVYiG4iH4;G+4|d5EKN4Ti4hzm`^UOJrw>?x$s}dZFlh@KEtl2ispa zu44AHPMWV+mCY-|V?hgQFLXGYM9lhT185Cjn?rw~#aW97n@32Bid)%+H*9IY|4x=Q zPTSm}(WHRFFU{Oto=VzP3^IyOfEzz;}N1N2Tq(`qtPc7W6h9!x>$7E>Dp+^r>tYGld9nc!?v4UoZ# ziYyZYNuX9-HPP5HYO5MkNz2)dha&WE)Au5WA-nNO9wHtVYy>!|^8o!~?L|GxP<aZ10nDbUSbQNd(uJ=T=Af-AdI%&s>d1^iQ%Z-#$myX<}It71mNjc zZxZ6(Fv_2m{Wi2Zz^as1`628PW0jaRpCHbcvHu9}l<>9$yjtq)=aPAPZQ%gEd%M;171bl1AZ-l*rW0Wx%#TGOj#2}NSrmuiTy;y zlD7|Ox1V`5?I|~C)a)H~Ej&Q_-c8cjN_M>m%DQ~rJC6HV4d4wY*&+sX@ws-OYAB(n zruqPniaJ-Y6esRMdZ$CD8hJb-#8O%`QsO77g(f1CPD+&yu$zU82PAhlnL1NCm3gK8 zifsVTiE}a7Z_JZDiM>l&1z!)B>Y_N~H?6f%9?1zY#!fcW zWFaK0bT=I~S=O3@)^oUK8@)S=IGLMmkxH`tsu^ve{URn!WgpH+KLTpeg5);_zY}da z4!>G+L+9JC4Zer-y>y4ONxu!+a1@o`vSFIp4@ZM>PT3dugW0W{<>KP8VUqv&+A3T4 z+S;{WKfYFH#d}GeMYd@%p<^wFMMJhX1ruCAYSB_y{)*biS-27Pf0s431D##ilP!5} zx8vb+dzkt7Tu==GBJ^L)B?yv%UF3^|*T8dfzJFia;`9A{_~B9*#dU2c0U&)Jcd9T1 zr_3?0+au>K$+S?8oI~HOi?wQpr6lY{uc)>4VX>eCdhrJ9c>g!+n~;&mh42Nplm81y z{9ly$KQ%YbPV9f+_TX>O9{o?3bFOwte5r1`a|Ek&B*34N8|PlJ2Aafp_6V0f*nf)I zzYr5q#MR-bC#{jz)_xV#s^LYAX=o|W*rJ+8LIAt#v``@;R&3bV%`gZwv zBd3Xu>Sgn5?VB0V%A51@ee7UhB(rmbjvGut;}X(^+8Iru6A6t|?qQ7SJs0AW9~mbE zn8e;layBAD@?HPJX!5fLR{@(_uUMPoy@(toj-Dwe%r+%ARfdnCH^TPekQD!!Z)~4q zhf#QSv`Ks)-9c?a;ibT2!&4dnzhM|%qo|8k2oP^yb zyuJw2X=m$X$Q_vY6JV72H3D87vINBn~AlOH+7tk&Ib zu7}?Ei?QkviR^laOg19j>TXKv9DoR5w>Z;N>QlhzgX%s6k2*#&I z<)zXyO=Zip8rEGYWkgt8GG-)GnUxltG%p&Bhys$^Uo=d+luR}bdn?A08*VPE3Tp~{ zdX;abxvKt_J1FEjwh;zn?FzG=r(m+9{7hCwO9M6}htC9Prl8LwfFGPf+LW=cF-I>C zoIdQt4Mw(daTtUQj1+iwiu?V8?xSU8Wf3E^P?@CFictr_au}Sw^J@IONDRQv*NqzN zXu}=wOUkt)wa6rlM$L4QMii7A*GyLosl)XF+4Pj2oo$$D412d79OL709Cq9?3q9p% zB;mRx*e4v+D=39;xVx!f+Lbr{g1kawx`(m1jUkoD$^g0(>I~wRvna^n-A0B@17Z)< zao`MP?tM!-5b$(0+;^Hitx|F-#{Ap|u7Y16>qa+ID5G6lcgo0uX<0-^T2Y{*?ZMQ& z?K>z*E7EO=63LW?5k<#H zq)1CxjMzkeUSJ3R^sjF(VwbXcUmwL`Cgx zu3Ki-y`c6p*lEc_;wFve`?M*lJ*i(cZ%G#g$v>9=UZAa*kzAG)Qep-G5Bh)`VKK+k zICsqw{mT*0B;<;;OuArRhDeSXBy>1y_acAwx9|nCpfu5S|M#q2P2om!-&J$rm#Q81 z%>tPln*0|Tnc=VGevIi=8%0s2On5@1dsF>NIJb@cn^-yy-8Zf6pfj7{$9%brEPt%G z?oJtNYYp$46c^7nEbA4~6PpHO-~9HOWbn^RQO)eqb}Vamr3;&e1B{xl&Cv2o(F;d) zTo(^-A6>2^SDhSLJ3`eTx)(23BY;)S_Hv)yEL1{cY1fScLAm*kxlfxg2MfZc!#RBK zcY8;&pRBeA&sQTPxznER<!d{Dt%GPCSRCvoc zpWuHI-wBz0(T87YhGMe+`9RoT)A4_7sIwEtUmL2r_NQ{{A87`%wzWFfBoWpNFsK$8 zxe_>UO7w`jpd_*6LY#IH|M$v`=@q(@*fvrI16w~{7XAtZjw8D%JNDwOuC;}v`XYs- z%%Ty#$525}nP3Jd>;AN;;)DJElbsQO?<5$J%88py5G}*mjjBwL> z;v0=3B)W&rVTT(nv7%~vf(5?{DC$`Q z4Lw_Dxaro^GV~^tihfI6QCVDAA7oULtAj-|iI@t+N=a$x^@TubdjV2JmDugl*bk}% zpaT=G>@i33(iKk9iIBgoehaO!x4ZqUI&BJ*r9iX<;~C)tf&`OW4h$g5wF400A?F5)0Fzq}%qPlq1CaKR z^8#VU;Ge@~<^W2E%`F6%{^1u7$b2R+ity`vTfYgf0PLZ{*`L6f$*-Jcy`SX8)aCTh+`dA>h)>O@^k79KPubH>GKmhD-VR^7Z!9d^Jwqk5s>kuz!KUPukZ8C+EKL$ zfptotvlWzH8i#`S*gziZ5iA>0b8FA?rShn5P0Pw^%}#RR&yG5wzV!&qrQSiDciz^g zzfmBas;A5QuU@YM=>K_0{fi|3J3uPozsT+VQyav;@AabpgQ=71<2LK8$bT_4Us5xF zlPMf~z{UW?4x3135%l|RRM2s6eCCZ*0MLr-jCTE`Z9~o;9fdfLDfQ2$NK8h(aKx)l z&d|0t?tO0G+DQfLKnlqSZh-4$0L-a=LWx^t0A3$A?|tBC`^4|$G!EE)I40YJqj1xs znME_8t0${!8x}HP?v8`M9)G)O4;{q#st~Z%d;I3MWl6!G8 zxiGWYc_%qg)-VXeV?eH(E1RW9fl`OjH(fNH)MD`u3hLPaAn6)7!x_(T#+&GDQYso< zQz)t4@F%(cGv?Xbx}(Wwx)i*v>@JUL5A>qlS#)& zRp|%=tu!<3rX+w{pCa@rU>vLm8rT^g5|rRH`KX{-%P*&g&|z1peiq7AyF-nbT3Osl z2;(|?hP(zY)tHSyQD1@U7tK#Fq>`=4haC-sS_GLq@9gsahW|3?h zP(9GAKv~Hybm$PeWQ0$k25*q^g+qZZG&bajty4bQM3iMH6BV z<&l#sW700B7_cf$exuuh;y!mA=C`#|iJb&v%w#sI54K{7ofH>p(im|8UP)&*Y7ahV zj=c~Ua?u#E0bWUCHfjz&XNlbq7kbbb@c~}RU^eOw=C;ItO$gyujJ8-V(W$us9>FuBwG)0GE2oCgZfGc6t$J^%kv0U|UraDZ~u9H_JedbuFP>152 zIBh0I=`EJxzEZw3*aaCrq7wc6mBj?KkWk}|JoHo3{lzHatWl+qj|#4NiT9|Ekhn89 z#h=i?>aOHPFBs30Rv>v_z3KF^&hHYEzBS3WFO7+hdNjt11`$Ow2b`WYnYY%{4LG_d z6l=~jNFJ{qW3}GZ$%=~;4mldqqtvM}*=75R)r_Cb*75cw{?QR%AC&=LAA>I~b3G33 z>*jjBoR`csM%b>HY7Nm`FmxGhgQqcOs4!#gp3a}DX=Qe2kUu&arHGVyyU-W55SVFR zx8*)R7ga)p=@A+QXuch6<3B5+PmRN6<*fU>pACOi3pG;--(V{P-HQ1rOvc{RV%)pT zY16qLD&I?-XbNvyBcDroWu#{R)D*saN-3@k-+aEf*vI?B;oWb8?GV2>eD8mncA)w1 zA?*LN_v|mu{vV6u?8NXdLd}+cL{0xpC9v})YcP6Vp%j_j02Ar7sX4**%OjWkrYkuc z6P?dr-RPP5mB*eRmx*2u1B~3a>$1h&kNSQ2u5xYGA61z+A?L-xkC32rkfddNd{vD{ zS4>_PGm>dXq8{Qhk^iV_yLr-j@EmYHzbe8{aFJLfGG(9~cQuq;`W7St*DC5%Nq z33(L44cNHNjsZQS7AuhtW;@keo?lA-sy)-FH@iUmC5+YMt*NrwWEGGt^d*d?b@oRX zD`T14Gs;?9*plF)sNu~)I3?K^ zduFH-u)}mdRF%t>xJn&ssZxPzDj$Ax&FMj9SR5^XSj5J&phqjJP3*l?a{FetP@2VQ z;6Njn6-Ec<3afdTi9wo7oHIQHeeT9Bbs(X?B5L-;=obiACd_bPf7qxN)|pKjVqA{V z2>oenO2TAcn?|ED1ntTY0iIgdT{;}SV4?7=LK@su(EegMv^==#`A$LuM*5&RqaebT zu9`HT3V%2L;s%0{S21CI^^MWDb%)pKbo6618{6@;9r-{Cl{}C;WRzx#Do?F1cS{YXG0*{Pi2e43+=sy3pC}i8G0#gR)=SN(P5B zOFbyuzR~X*#j7P%58P%KhAMp}t9~Zrx{D9{TnFtEw9};d4h!73qtp-gbZxj<;I2I^ zLI34^=A-*DD=$DSm(2}wFG1+-5hcMnuf05n@8kXA=*MRV0osIr)-NEVOUW|HG}!yK zMVcxs?8`Owsx_};o%J{I-zm~|SUOZOX>bCC@0Nus7s}V0;2R5{f52|C-=Zw~i(RAs zr3-&f8M=73<!hl-78xIv=W7hh(KREJio?e_ZSN{j9O2Y# zEo?Tl6HJi+>d=W*#pm1Q?Zi@JKp%4uyX1_TZoZ(e%gv-aFQwfRCMq&QbedQs0`?g~ z(ens>1il~yb+!?Q#7d{RD5BVkDkGC1&=%7i1nWd^4+|oCiN`ZH$NsYwXEg9{Z`28- zLC?y%hsxyATxct;50=DRoQk@(EC)B*NlN&N=Kdh|2L_!tQ+Q*rFrijm?( zJ|+o0dJSSVilb1cG$bYZrQl&{dhGp2uqXi#2xYK7^;=NVKJ=+?QeVPUOEn}A2O>PR zNTr-&72bX~+@3$gvIossXkG3kbAKvoJ_DT7C?=uFE)yMaGpXp%Tk6$_D#2#bF&I=~ z_|G_aE;4IS3=c{_fq6nAV3#xtgfQjn)*PU(Z_7#Q_br+ska38bvS*9@ornj-gk%kf!ITQL(ZbHQ*Z_TBktc=n%oPA{TNIf>+&%iNeK?pg z$TG8QSHb0RGARw>Sje$n+M;WLb*nx@&ss5|gcNgGOuVBS65ztNxbo1jp97YX$5jTZ zfL>83@LMs^2LAn5u5~GFltzh{-E8ZHe5-{LUA)sVmooDN3Z_bM$ci#^1q!B1aL9r(^EnEpOmN7CGIJ&hrcH3joHFw|3Z_nQ$eJ>9 z2MVT7aLAG}^BW3gKO{_dxf!(MLRGLqE407zwOQ$}b{>F7OklgKYXEN8;1=is_n;@= zT{{5mxd#`}-DDW8_8$YTqPzB(`H8)DCM$=rz^WmK8bG5cT$bvLic>_p-k4dA77{cFPV-aB4OgJMWMN+Z-qZ`~nlD z_1y+x7pP&r)k0o;JvJtglRfE>YI^QHU!$Tv-(ElBHYPaJ;~FWAyyqTe;a2XgcYInp3D$+>oIYSO;6$Id8?I4Vlq0(g49)wbd3|*!=ACx5^y_{``2l)109^)TAh7ZTSHVZDD~bb`jvRhCQYR_k zEQBZP(#D(#kHditJLeb4_-c`9jP3n-r1buJjOhLI2yErz9eNqrj{wVZFvys|(qN3j*6#rtc`uM(UnpOG^-ZK@ap?0Ga zO+1i&TrFrMe$_T(#+)hvS1TKF00`1@_JwBEWHAkfJ%l?-KxN62b0~|p4}D? zlYsQnW`X6RQx;E~iuBS(f#s4@Cr_J<^wL(r*`iaZr&A{O7JaZ|AfQMZ4!Y?oM6LqhVVjh-$T}!2J^r}Tr|NZ^&M)Y(rmD>v7Yg=Vf#%Wg=Eh`Vm6Z3 z9{*Uj{asSY{U*+VAGHR}&E7{MY_05BOz;vUZbqU1`%iWJ9XgYvgR(l_<|R3GKkUaI zin3s1*Hc_L`5kmuPXI>0_JN5X|9hLNMOhjQWO&BRf`xP!zZD-CWLCNw<^bbgXJx>} zE+x5e@LTCFoc#9xyPIVViq(hc_rBT2^B*a1k^hjo>jYNxs+ynO_mi526<($ z2dlTbn{PiXbnl2g7E7Zf>aeE{`;D?KH<=Yk-PZSSLjl-wEcCvnxhcF z)(NB!lC_4)csIsd&in_sm|Pn5n!+0W->i-3 zRW3U*KMX8z1ASIrtobaQoY6Dmv5Gju=$0$VS}T9Z=(T~i%i}o*Ri&4AeLN7$dYeW$ zG+7C7REq&;wQ|}{_g!d$WK?DeWr-kF_702dlgaJHYN5>jw}2!`^AM^lX^AjHl$7}_ zi++@4NNbY&ng)_l8gF1ZiV-_{cCz?INY|T(R5xv8mOVaZO^GTmD1+8q+_3gC)~fFb z=jiO>7iY7X^fWO1)E4ru&LA5-%osWvNiOJQbtWoY+E4Ha5@WNX5t}R63A1XKejY|% zrO@ARd{qAoukiHiNYwQkvDDC@^n`y;JAZ(0%lq-vAlEJu@Qko6+HG&?RLR=b^OQ7u zj(ZUz#P|ZxsBaHRu)#y5`K6Z!{Qx^gyjSf9{m5_D_~B*=oA1;HG(h4{ZX;5ulZLvw z3&Bj75zZYoH9>QvU!4Bk(!|7~;JGzTL+JoxS$C(8!sr;JTiBBG z5@u}_l@Nq)`N|kj{&XZS{i@`v&*aY( z?2r$%qranxjhxbI-H%Ppr9LfDU(l=#jOnZ|opul8g>+Gg`Ex zM-QRxy&Gu7ga+OTkU6lU!}S}eQc6cVf5cga_YwCeGyUl2wa(78)%Hvg8+8r}$oInG zMxzf9E>kPI=xnYnyP)icibCy99KU`vV#YQkc0rq}3B~NmDZNv)FeitvYsa-s?z49O zOr96qdc{#3smO)0P0>Mf9&RW?l_Wg{nF z=0?Z>D%Y_m!j|C?ib>h@Xguwq`6T?-=|27fBJKe;!MY^g`i@cGSMhFE7~2h!yZO<6 zJZRBYyP1~w!-wqi@308F!P3PWAOL`_|Mbq_|NA@hPmuI)Zd7HQfXo0rw1{lapD{SA z>i9vVSulc_<|_2q_!8@^`Of*vO!>`ZR=G(gy#Ynt-CLjWkD=4<3aZrDNa|+Jz>uRQ z@a8qwK0d9!AcMQ)EffeEYOy=NHZFAS9TaTcqSBOu;Wg1kZW9>Jzh~l{CB~f=%Nn~V z3^S&lzlCD`I!{0BO=JOXgHgl@40eE~DZPuop*x>{oY`}{O~DYi5k}88=tA z0vrrjwDdN#aE=CWY+t$L%aBLhZ@$`>uGMw7e*S69s(ak^SJ_F8--dho>!(kCfy)0k zK>4dAa-q6mgGKSx|LhGu;N2HxSrWb`o@Bx{BeAkrRuAXrf0Ik!17tYt{le1|<*Pwe zm#=d!aWi=B`t;Q0xb4U1<6(oM0dZ6`gF-tE6%}L>BH3ASz4gP@H03Mq&QdaFCchJX z{WN!Pi+3?k6{A=8ps2A!3c^J)P%qjrpMyok9JMgi$jm=S1L5iflUqqxkl%t&lZ5$r zw?T0ytu0CF!&YG#42v-UAp#8l7~SB*Z@EHYN8>D;#ba392v8d<5h4w=(N{sa&cSoR zFr_h?c~cB)Kh;oV)n>7vXCGc>N1{({Rz@&Y!9pW;w|bz4Nyr93uHV# zYRNAOj^D8I)?pcw|GSOw3tH6sD1PRo$`9>WvWHn!xlwm|I|&Y?>uhjwH8Z7zbXS9Ds zYS℞xQOc=ZjHG&5xpe~%j3X-fr2Jd{d=RO105~Y z<7A!_?=sk2zCvjk0@d!kq#PWt?nE!1{W)7cagc3>kr6ygseY(sZ9Cr5vAoYNAJx}F zccozZSQIzb7>PCiRTH%X*mdyzA%UpjG z^hX^sSL$C0<#!~;=Y{kNY0xZ5NocNM^-?A-tx5Ndk;#8l$mf8bbgI}VPI+oCXLpqP1MfOar7zH0$W*kx`h-X zK~Y2^kzQwNdG`U9=SW&Nt9^38u5SugedI@b3@7u(%jvzr$Rz^*t!DVSPLR=~n9vGaVD%@fB|(h5gU2|9^~)|0X9?8P_W@ zKo1|1)8qQhk?lkUK2G|PurGj28xNm&j@60_iBY}Q)|NYr=Das>`1vU3>Qn#W4>sbZ zk}#?-pMW99AN|Efk1lTjl0kVpBqR+rx$R#Y=U>=Zv^j#FPzp&QASm2zy3?$_*W%k zK}FjJi|tQzo&P#dwo#R8Jch~?huy{+#15&z1&&_Qp)shHSjfScs`&)NBS@=4$U&7a zTn}9oFCEAtLnm7%x+WCk~GC_R>N zd%1ioX?D3pA!|{479z~t5u?+3rMDMDeIBTV z3$Jherdl@@a%+V8ub|tD+>l=wY1wUe%Q9<)cabi{@d|4(xeK2HZLeabhOfb~!a=Ty z+YM&a1`Kp@`dYdY$@qjqAOodEk{SQvk710>Gc0W68Zlhm{3sK)JUeb_dUakYIfs|h z93!SPlRzZF?36m_N~0!FqYI51=pCn8B6!=amM+}z&Dz7eCF2Q!td zNrJYozD*X~QlAqsE9EH&6mv}f?NF!{XF9Qrj~?>IZn+soT0u^(y5+jDo%rf{Ik}0_ zyTEJ2hsIR#aAN62O!K(};SY$%4k?fYg(5nYgN%7Q0ysa7?Z(1cFtO`SvyesWYa<8?Bu1`! zG-yx z^9OgoUuxD$SOy)cIk3AY=8?$wv2kKBb`YfvMDheaFaCD(1QxnHM8JLoY28hPs0;%a z0_Ydu#0WDNyO_1JgytjW^SzX2Z)aO~%fCJX-Ly#Keqb)k4Uc!mMi!=xb+UqK;IOMB z#7@!U_NaRawnM@f)Nf@He!9N5Wkwn3O{xAgYY^EDZw6^VVF4Pfv(W-L0xbg;V27dP zg%IAII}0jS90sWzzT(3Kw7vgE)aNxsdziBl{$?5VvF=MQ^u!3YYg(;*Ljsi zTk_t+G(!3(-EFj*)#bav9kSloJK)0{jNHJCdAET6We{qGae0NYFFA|q!!+zJOM17bY0a}^zGUilm36i=t>GHWk?LgrE zDx-t)nv{~9qhWF%{P5Qhl>HVTG9?*`=qhP{ev@Z~n1W49!? zrA+D2&uRFniIMcP`d{j)pj>8V-=m{d8cn$=hMF;1Qe;$Wu#m4g#x%&d4t+lzQPN@Fb!_dDUEe*DjNg8Np|II873TM5~u@wla>6`0!?ppUi!R@2RA; zMu5ta{K_X<+wbc0@8=YM*k_LT_x5IIH<9WsZ6{aC8A%Zey*QRX!x`r$^DyL%vyLFpxhXjC9vfFJmm zX8M_;EXbVel;449g#+}v9gdc8e~ZG zwoK`_Qeo3Mje9mKe(vD{sxC9;A>0_i2UEwv(k{=N?Zme*gy`mf{`_eEIphRO@l%nt z-~9&SIT>qIZr#^5IHh2BOJ$!!@N=CVr~Fdz-o~O zEkyp!HAR@!=gDZH6H?rl*X9H05AF0x&*m%tqMaT8MJW6?$(KJa`ClmP!KqkTl7SNZ zwr~QbTs8nX1}OSKTq`z|JS?*RkP1o|*QVF|7YZfo`MhKbUUI1k>Vhk6_z~KVfge#f zr-mni$tgx~hewR`N&SU9>wWVOBs=2>h~-iG4D`uKcrb}yUO zuKq${=f6OqJJj?3zd)hk66kl?8Ew*C2a>)KbZ(wKY`P)?hDRl8v(9fL;ah-}fH0CP2(uKY@gHGwR@pipJ`1pdY3+D)-kD6YmAtcArAa8ax?5(;gL zXmV%fk{x;PD%XXYcBW{$-f9|XCgSZl*NVX%Zshn|5X;(C{UNago~mEV=r2jX_?ILs zW4}!Ug4@atLUB)p>y9l*LO_6U`P4P^Pg=Kzr>MHGe;--^OH} zV`U!vGOm|~u{MjtUr9v1in4w((U-B}-Z78AGIhVZ52CU4;nmNA!7>8R2)RaVMY4|$ zZNZIJrxY-7!z$XW@4$Dsv-%F@EV2K186>0}HpDA2= zN+61ppQwMf03yCopLRW-c&0q-}L)edy9AYEHhSpDoMJyyMCWQ){i)L6LdCocI<7e3q`;msI z2=WL8`lQ(w5*qRBIbMu@@Yy&f4*Bls%*nbTe*d9Y_db|i^hmeXeY^6pa|G#Q{BTCO zyKNhUW`|c|^<*tp?3qCQ(YE#Rmj>!-MB-$8Vc!eXf6e9oH$j&F8}>!=|Nj8{UKN|n z|7+OyS5n~1<#zZ|zxr~y{d$FREE1IV`8b@0>@8(Y*9VYV!PWe8!x+Bd1CUF$N>BeH z+BZ-KDD**${=Ro2bcertwFb9Ei>lV?-E2joZg&=>LP8VcDA8ijhG?NP+l^=YV~WBa zR|!;sN@o_u_4mdCo$chnXhd$U0}{FUUT2z7njwsBFOFqDO4Y(@)3&|Yk-G5?hPY;0 z^z9q7r2zTr+M3Y0vo_R_?^1uvae0|ahxyC^zaP2)Ly8|t~DX-ujOjKV|^vJZ;* z5_4p{i;A>IOo0l0c~+HoO2|Rre;y7#s?@gU8MAj0&Pq3AwIcEW$Xg+9Mct1 z&^naoDS@=zeHP*Z97Hn}K9fnE}Lo(ypoHx3Esm@cy09Ri%l>zki_HhyBW2R zThtRbMP$^{b(7SVyqCRohd+3Zt(NiER+(E;1}<*S46e^)&RX}0+g#mU?9C&hBYR_> z4m_jRP@FEz&7=dl?bF?&Y{gC6G-e3E028dQ&L2W|q~$XYM!WObF`}nsT!UZ9W+v%2@%Nb(dF#?wmlaZw+q$c_QR)3vI}Ta+t-7|Fl$3Dkx8*d!JQtSA1HzK>JxS>>Y zRa?$bNh|clJNcZdeu>FW3V9*4WxX)n{Ip3RxulBei6sbpX(=Bj+v8>lkW& z8wC^7`?d$V?Ux00SE7Orw^w#p#5xa3k8@0hvtr#vDUrCxXRb4w##;tc3%yjfB^5vH zMP9u>5ExA0{mT&|ugxJT_TzMEI_n|jN84`uAD)D95^^N0A??^9@!*KFK~$AT?qp9Iu@LXNC#MV& zKdm6r34eb^&(S*Jlr3>3SZ_{KC#6{_O4Uny8_#}#9_|s|>NO5^w1+E_Djk7r@Yg-T zJ)dIzAvt@`7Kp`PFsT1ugu#DP5czlBu&M*%w}0;quM+*k8;(@Ksyogff4)0)w8c58=#!j#(^}T3Ftg!O8$Uok&*g}j4zxJW9ul{fqR7`e$yUf6WXiyx38|NN&^*>R- ztSSUO8@9C%Q>K{U{d+EV<#SaXjR(HQiBqVHzg;$SFQp9s?=bkK3;Yw-fbeD@(!2^eVa?CB%X}WcGxqS{VZ&%?GaFrCaY)d-uq5l~ z&$}Ex9c!N+;!48m8C2?w5^8xBgP+#gHPzFPEAb?}-!vfuRZuDvv#l3psZzKUbXM>M zm3-t7ITnxd^`yVi9<7gy`d>RbjSXo8gAfi7Ep!mq{_vvriYuh;Jk@;@yYei3;-5b%gPW3K||PDJ^qE0M^sF+9AmY~WRgpgH%y7m%<(O) zSubSVW-Q@oWg|jA{>qnOOjHgXz!(M&UxLO0k)HorXRUAJ6U#Cgb6Qse%DZAQ#JevYT{Sd2Jgw-}-`n)9&Z%IY;pxagS)IBP4Gp z6|~~yqQteS-5?J7w=7;Or}H{cH5dls9v$m9&nb9=w>FOr;s%U`iERp1dZU}GO!cb! z+j@4#>hTbKSQHp~2FRoq>v=jo_o%7SAOM{kl#Eu!x*JZmEKje@7C;)k??0(vnmqfY zh^j+H5#?5#Ix?g8+*is}JdCtI?JY z3MjIY9K?vCUC2=kcF}$|80$&_A$y~G-aAw6u2sd2liKXCy zHP&@dvhy%RpX0;}KA+?=TDwMW-fr8?pB=62dHc*XaKd-PFJSb8Dn2&bARij=|c2hy)A-Uo~%2-d41tW>hdI$qDA^UfFTCGMx^ge{mh-Ku3HWx;iQ74k0!w_ zeLwqi%3gMbpKiuArz&}pBEktpE&~IpHQ?iF4Kng`_>vPq)Ft)^`?ZjnMZn8`r99jI zIH*Wenh`XFNkc6~wg^#dm}|>iX(HrGH@wG}(D1V*GKyi*8mr6_e}kHLwDF8VTH!DO zwYWWCc{Zrm5As1N9NT&Ljv(%?9lBbp&}hNYDoQ(i+ek?!yiX!lfnoY07wN+IimY6h z%7U4Yi9nIKY?i<@&X9KDxCxfBwC=#(TFaoN{20m*kCWVnGSc)+M$8z}jCfQT)-wNq zAb2-tqJ=$gCsxxX*n!+#N)1ZY*46WzHUmB5!qU>4rldLuT*y3~wxJ4WQRN6!=F~qO z=0-;7=F0ACA*!6-6(wvSgqoCzQY~yp5Eo2ElbxsN)-(@A)D|ETfnD!OqD7mgMMYq8Bn!GwAZ zGLp@=YIqX-OG&%#o=~5NF&U_@ zbqnWZ!$+`RROJelqmbMQn9qqnwxh`G6wH5*IpCgN3Yk5hSSpkA^qPRO087jMJ+SK} ziPw(PcVOt@mXN0Fh{eur&AS>+0S_kCcBgYXF8pAyjMyYk;#RPsaTtD&Q^pbam3cSZ zW>u{+rq+5n+_iQFZP%iDV^LkT_Q>YBn+{`oae=8CO2wf)#2AUK!xgQrYT~1g`FsV2 z^cq83?EmWPT)?49`v5-cE)uf&a>-a?EtTX_ZtE6nT(+oPlBFKkFPAiIlSCnx`rI!W zvWY^nW&5nYOiHLo`e;-x$z<)9O<@&n+tQvnv-_R*J?A~Wo@btMIluYMd**-MkTo@k-8P&;I}TYtF7uA8Fd}6r($AH*eWFr8(pOm%>C5$dh&ii+7!Pn zZpHhOb{h**eb1zAMpZ6K#t}W+4}BwmuM+PYo$S(`CT%7PV;iIH zy&O{H@fSF97nSDn$FFA$g_aM@Sr3gqG_{U6CF~)e&PfZPZ>-BiX>qBhq#HGVho5Ck!qoX%`EbUwB zuekOde{q4OHV|>Wxmcsu=d^B$?y(~kM%xU6dIXX0V|h}$HBYRLE^?A`X>90;HorKq z@7bdP=P&XRpQ2Hib3(WbUPQHp$C?AM-4|y1G<#`l@s3tr+K?1JdIuo|g<) zmiTiSgIZ?a@V~EPsr*)ZEJxuiuR$^{an0`zQU8pU_iR69oRzS$w>vGC)ow3%c6{q% zYfDv*L&nWQnaJL3#X~9%Mm0_YUlsm#_j|`z|J^7jy&+%Gko&s$RR*j3(X}yxfPM-bCRLl zS%oZbX0eis*sa30Jhqq44%eg0CT4?X`tB5VhjDVB>n)x2_nkWMhiQc`x1KMyyT&kZ zL3u`Ncv<_InehiBm2RJVCak_>RCPm_ncCs#gzfj++-WjX{lD|;%J5q?hPzenCG3r? z`QQke<=9{NIYvjtVxOphZ3p%Sv;FZYza_)_l7+qI71OQbO*;0&JBF27CZ5*SU`eme z)=GEnuAr(1m!lbx=TaV+gp{fWzAYHZ31%OD%Hf7S5mTv7PCqj#-66gzbK}qa+^Zf_ zR^nr)L##u%0>%7~nJaTV(+Y|tUb}~8YjeZbTr{{DCbh{i;0uAXYw8`Yd3C8*SLJ?l z$sxI9+boGkt#K}@ww+-dIoYz%A>O$u)eDYoXYSt52yfqzO_tBD73MY-qsh1Z zjLXe3E;0>=1x7`jX3f@GA197EcWbzO(h}`_-7MycK5GmAgma9ZD-qv?+N!F}m~HSp zU%Srz7yV-O%iCBU*MsZ*-z?1O@WlG#mQLI?tN3n@o$7^;%KaqTK4}&A%e)e$Tlb5P zMMUvdKRwk~LH{lpG8xo}iR3n$>)&~}Wvz*%f`h+hXAs-4w>VtDsS%j64aTjxe|$ZsRQiog{(O1=W-iAt?cOv%xk+nWt4PIU2Mmq_sT~HVsFc{j9KqD4$LPAt@Y%u%p(YL@i zNFmu9UnL4Z;KFzoZZbub6IcaMaH4=FL-7D7i^}+JJjlVW%FksP4B@qxC{@^2SN|t_ z3wKCmi=^@ANX=TqjyIPvf;p!789~CHg{`2%080XilvMlSlzk>9DT7VPf+hE>Qn~p1 z#)-&NLoexpoo@0wCQA~dZOAog{IW|f6)Cw>gUTf&@jhiTj9zjLlL{kC65Z5fuK1Uu zX2P0WLa9k>>|;uP^MoYc+e3!YODeCX!pM?D2NIbJ?${2WZf!j(mypEMvSb*&h)G8f!56ut~it`l+DAKLnui~~2_ zgfBxarU;zK6LXlff02VA0e-1Q`&;*HlB=4BbZc124zW}1)Lddoj?8}3N6i+}1K#0D51v{vO z-B3hXTOy)Cy@BKi13Ne1!)v&r!=XMc^5I|}6;5x|l@YHFW=+WiFf$1M>ymg%L=aU2 zGW$w~fw?zfGLa}#6;NuCd?1*@5vJydGI$!9sN3QuiKQ|b3`^{50;{<0D&cp30!>pX AivR!s diff --git a/test_unstructured/partition/pptx/test_ppt.py b/test_unstructured/partition/pptx/test_ppt.py index a29909d481..61c172ab49 100644 --- a/test_unstructured/partition/pptx/test_ppt.py +++ b/test_unstructured/partition/pptx/test_ppt.py @@ -7,7 +7,7 @@ from test_unstructured.unit_utils import assert_round_trips_through_JSON, example_doc_path from unstructured.chunking.title import chunk_by_title -from unstructured.documents.elements import ListItem, NarrativeText, Title +from unstructured.documents.elements import ListItem, NarrativeText, PageBreak, Title from unstructured.partition.ppt import partition_ppt from unstructured.partition.utils.constants import UNSTRUCTURED_INCLUDE_DEBUG_METADATA @@ -181,9 +181,17 @@ def test_add_chunking_strategy_by_title_on_partition_ppt(): assert chunk_elements == chunks -def test_partition_ppt_element_metadata_has_languages(): - elements = partition_ppt(example_doc_path("fake-power-point.ppt")) +def test_partition_ppt_params(): + """Integration test of params: languages, include_page_break, and include_slide_notes.""" + elements = partition_ppt( + example_doc_path("language-docs/eng_spa_mult.ppt"), + include_page_breaks=True, + include_slide_notes=True, + ) assert elements[0].metadata.languages == ["eng"] + assert any(isinstance(element, PageBreak) for element in elements) + # The example doc contains a slide note with the text "This is a slide note." + assert any(element.text == "This is a slide note." for element in elements) def test_partition_ppt_respects_detect_language_per_element(): diff --git a/unstructured/partition/ppt.py b/unstructured/partition/ppt.py index e9ada569cf..c2428a7f9a 100644 --- a/unstructured/partition/ppt.py +++ b/unstructured/partition/ppt.py @@ -24,6 +24,7 @@ def partition_ppt( file: Optional[IO[bytes]] = None, include_page_breaks: bool = False, include_metadata: bool = True, + include_slide_notes: Optional[bool] = None, infer_table_structure: bool = True, metadata_filename: Optional[str] = None, metadata_last_modified: Optional[str] = None, @@ -44,6 +45,8 @@ def partition_ppt( A file-like object using "rb" mode --> open(filename, "rb"). include_page_breaks If True, includes a PageBreak element between slides + include_slide_notes + If True, includes the slide notes as element infer_table_structure If True, any Table elements that are extracted will also have a metadata field named "text_as_html" where the table's text content is rendered into an html string. @@ -102,11 +105,13 @@ def partition_ppt( pptx_filename = os.path.join(tmpdir, f"{base_filename}.pptx") elements = partition_pptx( filename=pptx_filename, + detect_language_per_element=detect_language_per_element, + include_page_breaks=include_page_breaks, + include_slide_notes=include_slide_notes, infer_table_structure=infer_table_structure, + languages=languages, metadata_filename=metadata_filename, metadata_last_modified=metadata_last_modified or last_modification_date, - languages=languages, - detect_language_per_element=detect_language_per_element, starting_page_number=starting_page_number, ) diff --git a/unstructured/partition/pptx.py b/unstructured/partition/pptx.py index 58020ea07e..7cc1924fd1 100644 --- a/unstructured/partition/pptx.py +++ b/unstructured/partition/pptx.py @@ -92,7 +92,7 @@ def partition_pptx( date_from_file_object: bool = False, detect_language_per_element: bool = False, include_page_breaks: bool = True, - include_slide_notes: bool = False, + include_slide_notes: Optional[bool] = None, infer_table_structure: bool = True, languages: Optional[list[str]] = ["auto"], metadata_filename: Optional[str] = None, @@ -376,7 +376,7 @@ def __init__( file: Optional[IO[bytes]], file_path: Optional[str], include_page_breaks: bool, - include_slide_notes: bool, + include_slide_notes: Optional[bool], infer_table_structure: bool, metadata_file_path: Optional[str], metadata_last_modified: Optional[str], @@ -413,7 +413,7 @@ def include_page_breaks(self) -> bool: @lazyproperty def include_slide_notes(self) -> bool: """When True, also partition any text found in slide notes as part of each slide.""" - return self._include_slide_notes + return False if self._include_slide_notes is None else self._include_slide_notes def increment_page_number(self) -> Iterator[PageBreak]: """Increment page-number by 1 and generate a PageBreak element if enabled.""" From 8eee14d5892e696c83524ba2df9d60bb138f6819 Mon Sep 17 00:00:00 2001 From: John <43506685+Coniferish@users.noreply.github.com> Date: Fri, 10 May 2024 14:37:07 -0500 Subject: [PATCH 03/10] paragraph grouper type hint (#3002) Fix type hint for paragraph_grouper param. `paragraph_grouper` can be set to `False`, but the type hint did not not reflect this previously. --- CHANGELOG.md | 1 + unstructured/partition/auto.py | 4 ++-- unstructured/partition/text.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea2f32fb12..3e75f7df0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * **Make the filename and file params for partition_image and partition_pdf match the other partitioners** * **Fix include_slide_notes and include_page_breaks params in partition_ppt** * **Re-apply: skip accuracy calculation feature** Overwritten by mistake +* **Fix type hint for paragraph_grouper param** `paragraph_grouper` can be set to `False`, but the type hint did not not reflect this previously. * **Remove links param from partition_pdf** `links` is extracted during partitioning and is not needed as a paramter in partition_pdf. ## 0.13.7 diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py index 69dd809779..4d5b4da8c0 100644 --- a/unstructured/partition/auto.py +++ b/unstructured/partition/auto.py @@ -3,7 +3,7 @@ from __future__ import annotations import io -from typing import IO, Any, Callable, Optional +from typing import IO, Any, Callable, Literal, Optional import requests @@ -139,7 +139,7 @@ def partition( include_page_breaks: bool = False, strategy: str = PartitionStrategy.AUTO, encoding: Optional[str] = None, - paragraph_grouper: Optional[Callable[[str], str]] = None, + paragraph_grouper: Optional[Callable[[str], str]] | Literal[False] = None, headers: dict[str, str] = {}, skip_infer_table_types: list[str] = [], ssl_verify: bool = True, diff --git a/unstructured/partition/text.py b/unstructured/partition/text.py index 78d2b63318..96cd105250 100644 --- a/unstructured/partition/text.py +++ b/unstructured/partition/text.py @@ -3,7 +3,7 @@ import copy import re import textwrap -from typing import IO, Any, Callable, Optional +from typing import IO, Any, Callable, Literal, Optional from unstructured.chunking import add_chunking_strategy from unstructured.cleaners.core import ( @@ -49,7 +49,7 @@ def partition_text( file: Optional[IO[bytes]] = None, text: Optional[str] = None, encoding: Optional[str] = None, - paragraph_grouper: Optional[Callable[[str], str]] = None, + paragraph_grouper: Optional[Callable[[str], str]] | Literal[False] = None, metadata_filename: Optional[str] = None, include_metadata: bool = True, languages: Optional[list[str]] = ["auto"], @@ -126,7 +126,7 @@ def _partition_text( file: Optional[IO[bytes]] = None, text: Optional[str] = None, encoding: Optional[str] = None, - paragraph_grouper: Optional[Callable[[str], str]] = None, + paragraph_grouper: Optional[Callable[[str], str]] | Literal[False] = None, metadata_filename: Optional[str] = None, include_metadata: bool = True, languages: Optional[list[str]] = ["auto"], From e4c895923d6f8d1bbfe7baa8abc47dbe833aaacc Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Fri, 10 May 2024 14:19:31 -0700 Subject: [PATCH 04/10] fix(csv): partition_csv() raises on long lines (#2998) **Summary** The CSV delimiter-sniffer requires whole lines to properly detect the delimiter character. Limiting bytes read produced partial lines when lines were very long. Limit bytes but read whole lines. Fixes #2643. --- CHANGELOG.md | 3 ++- example-docs/csv-with-long-lines.csv | 11 +++++++++++ test_unstructured/partition/csv/test_csv.py | 7 ++++++- unstructured/__version__.py | 2 +- unstructured/partition/csv.py | 22 ++++++++++++--------- 5 files changed, 33 insertions(+), 12 deletions(-) create mode 100644 example-docs/csv-with-long-lines.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e75f7df0b..8baf4122b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.8-dev3 +## 0.13.8-dev4 ### Enhancements @@ -14,6 +14,7 @@ * **Re-apply: skip accuracy calculation feature** Overwritten by mistake * **Fix type hint for paragraph_grouper param** `paragraph_grouper` can be set to `False`, but the type hint did not not reflect this previously. * **Remove links param from partition_pdf** `links` is extracted during partitioning and is not needed as a paramter in partition_pdf. +* **Improve CSV delimeter detection.** `partition_csv()` would raise on CSV files with very long lines. ## 0.13.7 diff --git a/example-docs/csv-with-long-lines.csv b/example-docs/csv-with-long-lines.csv new file mode 100644 index 0000000000..421f2533f6 --- /dev/null +++ b/example-docs/csv-with-long-lines.csv @@ -0,0 +1,11 @@ +SpearmanCorrelationCoefficient,1Freq CD161CD8TfrequencyUnstim,1Freq Ki67CD8frequencyUnstim,1Freq pDCfrequencyUnstim,1Freq GranulocytesfrequencyUnstim,2Unstim BasophilsCREBUnstim,2Unstim BasophilsSTAT5Unstim,2Unstim BasophilsS6Unstim,2Unstim BasophilsP38Unstim,2Unstim BasophilsZap70_SykUnstim,2Unstim Basophils4EBP1Unstim,2Unstim CD4TeffSTAT3Unstim,2Unstim CD4Teff4EBP1Unstim,2Unstim CD4TmemCD38Ki67Ki67Unstim,2Unstim CD4TnaiveCD38Ki67CD38Unstim,2Unstim CD4TnaiveCD38Ki67STAT1Unstim,2Unstim CD4TnaiveCD38Ki67STAT4Unstim,2Unstim CD4TnaiveCD38Ki67HLADRUnstim,2Unstim CD8TeffCD38Unstim,2Unstim CD8TeffHLADRUnstim,2Unstim CD8TmemCD38Ki67CREBUnstim,2Unstim CD8TnaiveCD38Ki67CD38Unstim,2Unstim CD8TnaiveCD38Ki67CREBUnstim,2Unstim CD8TnaiveCD38Ki67STAT5Unstim,2Unstim CD8TnaiveCD38Ki67IkBaUnstim,2Unstim CD8TnaiveCD38Ki67Ki67Unstim,2Unstim CD8TnaiveCD38Ki674EBP1Unstim,2Unstim CD8TnaiveCD38Ki67STAT6Unstim,2Unstim CD38BcellIgMUnstim,2Unstim CD38BcellHLADRUnstim,2Unstim CD45RACD14negCD123neg4EBP1Unstim,2Unstim CD56brightCD16negNKS6Unstim,2Unstim CD56dimCD16posNKPLCg2Unstim,2Unstim CD161CD8TSTAT1Unstim,2Unstim CD161CD8TTBK1Unstim,2Unstim GranulocytesCD38Unstim,2Unstim GranulocytesSTAT5Unstim,2Unstim GranulocytesErk1_2Unstim,2Unstim GranulocytesIkBaUnstim,2Unstim Granulocytes4EBP1Unstim,2Unstim GranulocytesHLADRUnstim,2Unstim IgMnBcellIgMUnstim,2Unstim IgMnBcellHLADRUnstim,2Unstim intMCSTAT3Unstim,2Unstim intMCCREBUnstim,2Unstim intMCP38Unstim,2Unstim intMCHLADRUnstim,2Unstim intMCSTAT6Unstim,2Unstim Ki67CD8STAT3Unstim,2Unstim Ki67CD8CREBUnstim,2Unstim Ki67CD8S6Unstim,2Unstim Ki67CD8IkBaUnstim,2Unstim Ki67CD38CD8TCD38Unstim,2Unstim Ki67CD38CD8TSTAT1Unstim,2Unstim Ki67CD38CD8TZap70_SykUnstim,2Unstim mDCBDCA3CD38Unstim,2Unstim mDCBDCA3S6Unstim,2Unstim mDCBDCA3HLADRUnstim,2Unstim MDSC4EBP1Unstim,2Unstim MDSCHLADRUnstim,2Unstim ncMCSTAT3Unstim,2Unstim ncMCSTAT5Unstim,2Unstim ncMCIkBaUnstim,2Unstim NKTKi67STAT3Unstim,2Unstim pDCCD38Unstim,2Unstim pDCSTAT3Unstim,2Unstim pDCS6Unstim,2Unstim pDCP38Unstim,2Unstim pDCHLADRUnstim,2Unstim PlasmablastSTAT6Unstim,2Unstim TregSTAT4Unstim,2Unstim TregHLADRUnstim,2Unstim BcellIgMUnstim,2Unstim mDCIkBaUnstim,2Unstim mDC4EBP1Unstim,2Unstim NKcellHLADRUnstim,2Unstim CD4TnaiveTBK1Unstim,2Unstim CD4Tnaive4EBP1Unstim,2Unstim CD8TmemIkBaUnstim,2Unstim CD8TmemHLADRUnstim,2Unstim NKTSTAT3Unstim,2Unstim NKTCREBUnstim,3LPS CD38BcellCD38LPSCI,3LPS CD38BcellHLADRLPSCI,3LPS CD45RACD14negCD123negCD38LPSCI,3LPS CD45RACD14negCD123negS6LPSCI,3LPS CD45RACD14negCD123neg4EBP1LPSCI,3LPS CD56dimCD16posNKCD38LPSCI,3LPS GranulocytesCD38LPSCI,3LPS GranulocytesTBK1LPSCI,3LPS mDCCD1c4EBP1LPSCI,3LPS mDCCD1cHLADRLPSCI,3LPS MDSCCD38LPSCI,3LPS MDSCIkBaLPSCI,3LPS ncMCCD38LPSCI,3LPS ncMCErk1_2LPSCI,3LPS ncMCIkBaLPSCI,3LPS ncMCHLADRLPSCI,3LPS NKTKi67CD38LPSCI,3LPS pDCS6LPSCI,3LPS pDCKi67LPSCI,3LPS PlasmablastZap70_SykLPSCI,3LPS PlasmablastHLADRLPSCI,3LPS TregHLADRLPSCI,3LPS DCCD38LPSCI,3LPS mDCHLADRLPSCI,3LPS NKcellZap70_SykLPSCI,4IFN BasophilsTBK1IFNIL,4IFN BasophilsIkBaIFNIL,4IFN CD4nCD8nTCD38IFNIL,4IFN CD4nCD8nTZap70_SykIFNIL,4IFN CD4nCD8nTHLADRIFNIL,4IFN CD4TmemCD38Ki67CD38IFNIL,4IFN CD4TmemCD38Ki67PLCg2IFNIL,4IFN CD4TmemCD38Ki67Ki67IFNIL,4IFN CD4TmemCD38Ki67HLADRIFNIL,4IFN CD4TnaiveCD38Ki67Zap70_SykIFNIL,4IFN CD4TnaiveCD38Ki67Ki67IFNIL,4IFN CD4TnaiveCD38Ki674EBP1IFNIL,4IFN CD4TnaiveCD38Ki67HLADRIFNIL,4IFN CD8TeffSTAT5IFNIL,4IFN CD8TmemCD38Ki67CD38IFNIL,4IFN CD8TmemCD38Ki67CREBIFNIL,4IFN CD8TmemCD38Ki67STAT5IFNIL,4IFN CD8TmemCD38Ki67IkBaIFNIL,4IFN CD8TmemCD38Ki674EBP1IFNIL,4IFN CD8TmemCD38Ki67HLADRIFNIL,4IFN CD8TnaiveCD38Ki67STAT3IFNIL,4IFN CD8TnaiveCD38Ki67CREBIFNIL,4IFN CD8TnaiveCD38Ki67S6IFNIL,4IFN CD8TnaiveCD38Ki67Zap70_SykIFNIL,4IFN CD8TnaiveCD38Ki67IkBaIFNIL,4IFN CD8TnaiveCD38Ki67Ki67IFNIL,4IFN CD8TnaiveCD38Ki674EBP1IFNIL,4IFN CD8TnaiveCD38Ki67HLADRIFNIL,4IFN CD56brightCD16negNKS6IFNIL,4IFN GranulocytesCD38IFNIL,4IFN GranulocytesSTAT5IFNIL,4IFN GranulocytesS6IFNIL,4IFN GranulocytesIkBaIFNIL,4IFN GranulocytesKi67IFNIL,4IFN GranulocytesHLADRIFNIL,4IFN GranulocytesSTAT6IFNIL,4IFN IgMpBcellCD38IFNIL,4IFN Ki67CD8STAT5IFNIL,4IFN Ki67CD8STAT6IFNIL,4IFN Ki67CD38CD4TKi67IFNIL,4IFN Ki67CD38CD4THLADRIFNIL,4IFN Ki67CD38CD8TCD38IFNIL,4IFN Ki67CD38CD8TSTAT3IFNIL,4IFN Ki67CD38CD8TSTAT5IFNIL,4IFN Ki67CD38CD8TIkBaIFNIL,4IFN Ki67CD38CD8TSTAT4IFNIL,4IFN Ki67CD38CD8T4EBP1IFNIL,4IFN Ki67CD38CD8TSTAT6IFNIL,4IFN mDCCD1cIkBaIFNIL,4IFN MDSC4EBP1IFNIL,4IFN MDSCHLADRIFNIL,4IFN ncMCHLADRIFNIL,4IFN ncMCSTAT6IFNIL,4IFN pDCSTAT1IFNIL,4IFN pDCS6IFNIL,4IFN pDCKi67IFNIL,4IFN pDCSTAT6IFNIL,4IFN PlasmablastCD38IFNIL,4IFN PlasmablastS6IFNIL,4IFN TregCREBIFNIL,4IFN TregHLADRIFNIL,4IFN cMC4EBP1IFNIL,4IFN DCCD38IFNIL,4IFN DCCREBIFNIL,4IFN mDCSTAT1IFNIL,4IFN TCD38IFNIL,4IFN CD4TCD38IFNIL,4IFN CD4TmemCD38IFNIL,4IFN CD8TmemHLADRIFNIL,5PI CD4nCD8nTCD38PI,5PI CD4TeffHLADRPI,5PI CD4TmemCD38Ki67HLADRPI,5PI CD8TeffZap70_SykPI,5PI CD38BcellCD38PI,5PI CD56brightCD16negNKCD38PI,5PI CD56brightCD16negNKP38PI,5PI CD56dimCD16posNKZap70_SykPI,5PI CD161CD8TMAPKAPK2PI,5PI GranulocytesSTAT3PI,5PI GranulocytesS6PI,5PI GranulocytesErk1_2PI,5PI GranulocytesP38PI,5PI GranulocytesIkBaPI,5PI Granulocytes4EBP1PI,5PI IgMpBcellHLADRPI,5PI Ki67CD4THLADRPI,5PI Ki67CD8STAT1PI,5PI Ki67CD8TBK1PI,5PI Ki67CD38CD4TKi67PI,5PI Ki67CD38CD4THLADRPI,5PI Ki67CD38CD8TCD38PI,5PI Ki67CD38CD8TSTAT3PI,5PI Ki67CD38CD8THLADRPI,5PI mDCCD1cHLADRPI,5PI NKTKi67STAT1PI,5PI NKTKi67HLADRPI,5PI PlasmablastHLADRPI,5PI PlateletsSTAT3PI,5PI PlateletsErk1_2PI,5PI PlateletsIkBaPI,5PI PlateletsHLADRPI,5PI TregSTAT3PI,5PI TregHLADRPI,5PI cMCHLADRPI,5PI mDCSTAT3PI,5PI mDCPLCg2PI,5PI mDCErk1_2PI,5PI mDCKi67PI,5PI mDCHLADRPI,5PI NKcellS6PI,5PI TCD38PI,5PI CD4TnaiveTBK1PI,5PI CD4Tnaive4EBP1PI,5PI CD8TnaiveIkBaPI,5PI NKTS6PI,5PI NKTIkBaPI,6Prot ACE2,6Prot ACP5,6Prot ACP6,6Prot ACTN4,6Prot ADA,6Prot ADAMTS13,6Prot ADAMTS8,6Prot ADGRG1,6Prot ALDH3A1,6Prot AMBN,6Prot ANGPTL3,6Prot ANXA5,6Prot AOC1,6Prot AZU1,6Prot BAIAP2,6Prot CALB2,6Prot CBLN4,6Prot CCL15,6Prot CCL27,6Prot CD209,6Prot CD276,6Prot CDCP1,6Prot CDHR1,6Prot CDHR2,6Prot CEACAM5,6Prot CLEC10A,6Prot CLEC4G,6Prot CLSPN,6Prot CNTN4,6Prot CNTN5,6Prot COL1A1,6Prot CPVL,6Prot CRH,6Prot CRHR1,6Prot CRLF1,6Prot CRNN,6Prot CTSB,6Prot CTSH,6Prot CX3CL1,6Prot DCBLD2,6Prot DDAH1,6Prot DEFB4A_DEFB4B,6Prot DPEP2,6Prot DSG4,6Prot ECE1,6Prot EPHA10,6Prot FABP9,6Prot FCGR2B,6Prot FCN2,6Prot FCRLB,6Prot FKBP7,6Prot FLT3LG,6Prot FLT4,6Prot FOLR1,6Prot FUCA1,6Prot FUT8,6Prot GALNT10,6Prot GALNT7,6Prot GCG,6Prot GCNT1,6Prot GFOD2,6Prot GGA1,6Prot GGH,6Prot GH2,6Prot GHRHR,6Prot GZMH,6Prot HPGDS,6Prot HSD11B1,6Prot HYAL1,6Prot IDS,6Prot IDUA,6Prot IFNG,6Prot IFNGR2,6Prot IL13,6Prot IL15,6Prot IL15RA,6Prot IL17F,6Prot IL17RA,6Prot IL19,6Prot IL1B,6Prot IL1RAP,6Prot IL22RA1,6Prot IL24,6Prot IL33,6Prot IL4,6Prot IL5,6Prot ITGB6,6Prot ITIH3,6Prot KCNIP4,6Prot KDR,6Prot KIR3DL1,6Prot L1CAM,6Prot LAMP3,6Prot LEP,6Prot LGALS1,6Prot LGALS9,6Prot LHB,6Prot LRP1,6Prot LRPAP1,6Prot LY75,6Prot LY9,6Prot MDK,6Prot MERTK,6Prot MLN,6Prot MME,6Prot MSLN,6Prot NGF,6Prot NID1,6Prot NINJ1,6Prot NTRK3,6Prot OPTC,6Prot PADI2,6Prot PDCD1,6Prot PDCD1LG2,6Prot PDGFC,6Prot PKLR,6Prot PLIN1,6Prot PON2,6Prot PRELP,6Prot PRL,6Prot PRSS27,6Prot PRSS8,6Prot PTPRF,6Prot RANGAP1,6Prot S100A16,6Prot SCGB1A1,6Prot SCLY,6Prot SCP2,6Prot SDC1,6Prot SFRP1,6Prot SFTPA2,6Prot SIGLEC9,6Prot SIRT5,6Prot SLAMF1,6Prot SLC39A5,6Prot SOST,6Prot SPOCK1,6Prot TDGF1,6Prot TIGAR,6Prot TNFRSF11A,6Prot TNR,6Prot TP53,6Prot TPP1,6Prot VAT1,6Prot WARS,6Prot WFDC2,6Prot XRCC4 +1Freq CD161CD8TfrequencyUnstim,1,-0.285021097,0.312877946,-0.296319737,0.215627978,0.17301315,0.194282447,0.429845626,0.326592318,0.168591576,-0.030716119,0.084662173,-0.282135586,0.033216783,0.187762238,-0.223208042,0.011101399,0.143489422,-0.182867765,0.375699301,0.074959447,0.224622215,0.084265346,0.149150517,-0.142235123,-0.001536754,0.159566294,0.059111415,0.170580998,0.362384451,0.087596017,0.052358168,0.076948855,-0.173611199,0.326684533,0.181122675,0.156868448,0.277373094,0.237016148,0.163397701,0.258715968,0.120348006,-0.178661733,0.282886216,0.196916435,-0.062843047,0.138822078,0.045865527,0.361903919,0.262787845,0.217010837,0.090460323,0.34154017,0.402783354,0.092508711,0.278222997,0.143031359,0.092437864,-0.289582232,0.106657183,0.241194879,-0.015248933,0.009724453,0.088466222,-0.039134809,0.387290409,0.052079142,0.078973843,-0.089644381,-0.10370642,-0.059244553,0.117768308,0.25767903,0.290405797,-0.307730244,0.042198308,-0.109801591,0.267492002,-0.477234647,0.054464523,0.40580938,0.022146275,-0.051332878,0.137984148,0.271701982,-0.174231032,0.143882433,-0.248773903,0.077170198,0.020851371,0.005699856,-0.047498238,-0.027202255,0.268562111,0.242218997,0.127560071,-0.21053056,0.176760082,-0.002049005,-0.016818919,0.080451128,0.269924812,0.011825017,-0.024709923,-0.383321941,0.097711854,-0.219499341,-0.279841897,-0.192423721,-0.241989904,-0.134051882,-0.121958349,-0.135732554,0.320168067,-0.049141396,-0.432157249,0.06432065,0.01553416,-0.124550125,0.284765431,-0.193939394,-0.278787879,0.324747475,-0.00966811,0.022222222,0.353174603,0.184831051,-0.018556033,0.088566567,-0.212705615,0.020014586,-0.123085649,-0.012235637,0.156470302,0.328963574,-0.299190754,0.15951772,-0.071830471,-0.070917062,-0.096930946,-0.059810011,0.250968213,-0.327543236,0.261052247,0.214724151,0.305334308,-0.136755572,-0.137201692,0.220009578,0.295873573,-0.036475377,0.374850347,0.076941496,0.308324687,-0.348790805,0.205047573,0.349459765,-0.224033286,0.184563118,0.009695047,-0.033869709,-0.261514442,-0.011004508,-0.244752175,-0.033202969,-0.302046036,-0.348666423,-0.017647059,-0.253456449,-0.185202777,0.040811107,-0.36815057,-0.28038892,-0.098536177,-0.096382901,0.391078225,-0.010288936,-0.131836966,-0.033121917,-0.097533474,0.151011779,0.050135911,0.095620113,0.160676533,-0.019591261,0.036081748,-0.057223397,-0.033967583,-0.016208598,-0.131642001,0.006624383,0.018604651,-0.077801268,-0.044256519,0.055673009,-0.194221283,0.08989547,0.189721254,0.033623693,0.189692894,-0.016490486,0.254686399,0.309059233,0.033262861,0.07230444,-0.140803383,-0.053136011,0.114164905,0.006624383,0.185623679,0.120084567,0.12769556,0.080902044,-0.259901339,-0.232135307,0.221987315,0.113248002,-0.162649753,-0.164059197,-0.228188865,0.310782241,-0.125863284,-0.398921707,-0.42721519,-0.212072199,-0.25676428,-0.254078762,0.216080638,-0.078024367,-0.224660103,-0.128364559,0.305133615,-0.216549461,-0.429864041,-0.071659634,-0.343084857,-0.263595874,-0.332817628,-0.096530708,-0.218039252,-0.308251289,-0.184857009,-0.163994374,-0.363877168,0.030661041,-0.333169245,-0.245981282,-0.061978434,-0.283053698,-0.279947726,0.115166432,0.209329583,-0.032770745,-0.196952649,0.273699015,0.065283638,-0.113197375,0.088818565,-0.146155649,-0.429254571,-0.39172527,-0.302320675,-0.380543835,-0.185185185,-0.249296765,0.309188936,-0.197328864,-0.190214429,-0.007032349,-0.097304266,-0.279254571,-0.366315049,-0.213548992,-0.018682607,-0.329910924,-0.322362869,-0.254946085,-0.153211439,-0.369259079,-0.111650912,-0.145874355,-0.323935325,0.054758556,-0.058954524,-0.356633849,0.231879981,0.019104548,-0.20100797,0.140154712,-0.083052039,-0.285818097,-0.259827938,-0.120651664,-0.290765886,-0.33588842,-0.047491796,-0.430707923,-0.306469761,-0.050820441,-0.107712143,-0.110384435,-0.304969526,0.108907642,-0.059399906,-0.258931083,-0.380956288,0.162634787,0.248499766,-0.227449602,-0.29798406,-0.043834974,0.018495077,-0.050797,0.055625879,-0.490881388,-0.366268167,-0.390271917,-0.543369335,-0.123956868,-0.293506798,-0.360806376,0.053750586,-0.193459916,-0.45140647,-0.467314037,-0.187810595,-0.291608064,-0.399132677,-0.308358582,-0.395593061,-0.237132191,0.211157993,-0.045616503,-0.236099391,-0.28870136,-0.337412096,-0.319081106,-0.113173933,-0.253070792,-0.253656821,-0.167909048,-0.324941397,-0.057853807,-0.436099391,-0.44092827,0.150961088,-0.32620722,-0.171801288,-0.310337553,-0.062423816,-0.486615096,-0.498218472,-0.260103141,-0.310548523,-0.142429339,-0.298101266,-0.212353493,-0.170979841,-0.488888889,-0.078527895,-0.231387717,-0.53185654,-0.138654477,-0.176793249,-0.346507267,-0.291912799,-0.452531646,-0.462001875,-0.261720581 +1Freq Ki67CD8frequencyUnstim,-0.285021097,1,-0.326082932,0.117750678,-0.076882028,-0.292243187,-0.099447303,-0.298646846,-0.497615425,-0.258395273,-0.045258539,-0.137116049,0.138987595,0.174475524,-0.175174825,0.01097028,0.280638112,0.229378217,0.086938378,-0.223164336,0.001195253,-0.345855033,-0.341330146,-0.33911039,0.290873389,-0.419704602,-0.371723726,-0.243937116,-0.199781271,-0.097930867,-0.258463727,-0.27548767,-0.353686527,-0.108461159,0.137730826,-0.089946446,-0.120399221,-0.217553258,-0.053383642,-0.071498996,-0.115189873,-0.150316456,-0.063459759,-0.241813136,-0.195066297,0.279432624,-0.190379278,-0.174001947,-0.222906524,-0.166966894,-0.075219085,0.003794891,-0.369770455,-0.435122047,0.308710801,-0.190243902,0.122473868,-0.159527587,0.203747073,-0.12745377,-0.228961593,-0.256216216,-0.091893866,0.063910959,-0.206036217,0.072266935,-0.349698189,0.032226693,-0.162048364,0.034980526,0.057132425,-0.14751704,-0.159871956,-0.238753651,0.03882668,-0.063023369,-0.068743914,-0.186295034,0.049854247,-0.06587147,-0.327458617,0.110799727,-0.247983595,0.165963302,0.188038278,-0.0784689,0.12809296,0.279704028,-0.148803828,0.170707071,0.050721501,0.236786469,0.144890768,0.2823398,0.085470085,0.008466376,-0.101193356,0.095557075,-0.138393238,-0.035345343,-0.297402597,0.06937799,-0.248462064,0.176763103,0.036910458,0.011791042,0.020948617,-0.012121212,0.079188244,0.190673121,-0.316368286,0.123931312,-0.388819876,-0.033284618,-0.156046767,0.025439109,-0.072072349,0.008151589,-0.117782768,-0.158529603,0.184776335,0.169480519,0.026479076,-0.199206349,0.113131313,-0.112265512,0.007535856,0.064095292,0.099586743,0.035248359,-0.006563488,-0.034762175,0.117737623,-0.144315696,-0.23970358,0.224232977,-0.178407015,0.069784436,0.072524662,0.015052978,-0.13737669,-0.274899525,0.063141989,-0.066898064,-0.09810011,-0.045780051,-0.24238217,0.259438104,-0.03396121,-0.075744273,-0.212068002,-0.313353021,0.043020193,-0.241000878,0.035916673,8.06E-05,-0.204644412,0.019620567,-0.021261977,0.116869381,-0.133237642,0.024552391,0.004759387,0.153324288,-0.045893527,0.026123493,-0.156046767,-0.143660943,0.175044047,0.030398246,-0.290464012,0.114912131,0.165144674,0.038637963,-0.144208988,0.134963338,-0.145313601,0.044809983,-0.041296688,0.076250881,0.125037753,-0.062971912,0.003889632,-0.189711064,-0.388724454,-0.128118393,-0.235658915,-0.350951374,0.065539112,-0.41465821,-0.264129669,-0.03030303,-0.31205074,0.124735729,0.217758985,-0.041014799,0.333623693,-0.274216028,0.18902439,-0.100559112,-0.174066244,-0.08893587,0.035714286,-0.151374207,-0.07751938,0.131078224,0.275828048,-0.394221283,-0.134460888,0.072727273,-0.222410148,-0.013812544,-0.021282593,-0.084143763,0.337420719,-0.126990839,0.345518037,0.052854123,-0.316701903,0.13615222,-0.244679352,0.094009866,0.266418248,0.209101174,0.17195122,0.213065792,0.345460705,0.033017164,0.067536515,-0.028726287,0.146613294,-0.166937669,0.059665763,0.210840108,0.049616079,0.181436314,0.197877145,0.224480578,-0.102303523,0.241916452,0.134823848,0.091779584,0.186924119,0.222560976,-0.087488708,0.085004517,0.272370553,0.024616079,0.111461786,0.253558342,-0.060365854,-0.196906052,0.047922313,0.132746161,-0.029132791,0.073915989,0.165763324,0.006278229,0.210817525,0.092276423,0.374887082,0.308536585,0.29098916,0.097244806,0.074480578,-0.217457091,0.305726658,-0.047866124,-0.112240289,0.179358627,0.072764228,0.204065041,0.329561879,0.213753388,0.159146341,0.366644083,-0.069534779,0.256504065,0.358335356,0.108966288,0.299751581,0.248172133,0.087330623,0.190311653,0.169715447,-0.028229449,-0.017479675,0.017344173,-0.119173442,0.058107498,0.07366757,0.288285643,0.036630533,0.219050469,0.098893406,0.204968383,0.313233966,0.294308943,0.033288166,0.251806685,0.046589883,0.271657633,-0.022109304,0.124345077,0.161246612,0.113731446,-0.020121951,-0.022086721,0.284439928,0.271047877,-0.049277326,-0.104832882,0.008468835,-0.063075881,0.388211382,0.180555556,0.332339657,0.226751506,0.298396567,0.380894309,0.387895212,0.147922313,0.244873532,0.383242999,0.268068361,0.220799458,0.118495935,0.183152665,0.147042384,0.350045167,0.324335616,-0.163121048,0.120415537,0.208152665,0.34200542,0.145889792,0.209981933,0.071183379,0.238030714,0.278635953,0.206775068,0.098870822,0.124855322,0.270934959,0.187646793,0.113098464,0.257791328,0.205601883,0.200790425,0.214905149,0.408175248,0.259778681,0.304652213,0.137240289,0.239477413,0.120822042,0.277551942,0.133852755,0.14498645,-0.053748871,0.266147245,0.323712737,0.284191509,0.299277326,0.339476061,0.260998193,0.371386631,0.308288166,0.309439928 +1Freq pDCfrequencyUnstim,0.312877946,-0.326082932,1,0.037054177,-0.132941176,-0.049321267,-0.098280543,0.103529412,0.071006475,0.176289593,0.0898055,0.265861838,0.187726358,-0.216973886,0.216781874,-0.169546851,-0.015793011,-0.184875922,0.121294433,0.270174511,0.030316742,0.371312217,0.346968326,0.333755656,-0.157104072,0.290226244,0.295837104,0.16898055,0.063078471,0.221395037,0.013445378,0.062619193,0.181943175,0.09704896,0.01863437,0.053890007,0.122904091,0.27123646,0.3120389,0.28365429,0.198759222,0.150100604,-0.023819591,0.1863284,0.182804792,-0.062156448,0.126568006,0.089738431,0.338732394,0.149329309,0.215291751,-0.104744818,0.389701111,0.275706734,-0.223827392,0.07260788,-0.163414634,0.206971975,-0.233082707,0.242096055,0.249514478,-0.033295425,0.019114688,-0.411418389,-0.078571429,-0.146881288,0.12082495,-0.322199866,0.191076896,0.078906774,-0.085177733,0.149061033,0.126324614,0.046143528,-0.114755198,0.132930919,0.193628437,0.257813548,-0.224884514,0.069181757,0.320892019,-0.152717304,0.209482342,-0.024433652,0.056119981,0.072327044,-0.0890179,-0.022779043,0.296161909,0.054023545,-0.034833091,-0.143554007,0.120034843,-0.061913783,0.35270108,0.056230492,0.091860744,-0.069585551,0.227951848,0.084948348,0.232946299,-0.014271892,0.319787131,-0.349749027,-0.032978552,0.211865261,-0.115826034,0.070220477,-0.14857258,-0.197379313,0.051144689,-0.13003663,-0.015659341,0.073443223,0.060805861,-0.26886181,0.052955665,-0.185377236,-0.22485092,0.274542125,-0.046956373,-0.003500384,0.35430718,0.03987023,-0.032698711,-0.033893964,0.330200146,0.002511952,0.18013127,-0.125030387,-0.034762175,-0.069929503,0.003160198,0.081273803,0.327092934,-0.218294567,0.130311355,0.077289377,0.03264652,-0.116437729,-0.085805861,0.346749084,-0.221492889,0.298397436,0.229441392,0.18992674,-0.033928571,-0.141993088,0.113623272,0.348982335,0.045362903,0.415370584,0.063556068,0.305059524,-0.075045788,0.119795918,0.268265306,-0.056822344,0.233287546,-0.082219033,-0.037445544,-0.271738309,-0.230792476,-0.302515361,0.170506912,-0.181135531,-0.031501832,-0.038278388,-0.354869997,-0.261355311,0.054532967,-0.16167798,-0.020184003,-0.006253659,-0.152106227,0.188235595,-0.049509764,-0.125140713,-0.052102747,-0.121465035,0.10380034,0.163763066,-0.152446175,0.199254517,-0.023741998,-0.000729276,0.083704724,0.170083462,-0.200388947,0.24284904,-0.071225995,-0.128757799,0.034275991,-0.067660643,-0.018393971,-0.040920509,0.198785425,0.061538462,-0.326315789,-0.181010453,0.11401021,-0.115792886,0.178137652,0.082246171,0.155497934,0.243497285,0.055668098,0.058747265,0.170083462,-0.288226238,-0.067822705,0.18240013,0.124544202,-0.02050077,-0.087432137,0.066040029,0.043690587,0.07641196,-0.074791346,0.120654728,0.004456689,-0.120330605,-0.31358756,-0.442243613,-0.42049241,-0.331762401,-0.44921634,0.068554856,0.038797658,-0.2488893,-0.090723272,0.208811551,-0.072195483,-0.16191534,0.05923732,-0.362797729,-0.326113785,-0.182432432,-0.180087622,-0.365176209,-0.266475379,0.109990127,-0.179563125,-0.360422066,0.064420585,-0.207639146,-0.145795892,0.079415031,-0.410854088,-0.175245089,0.325249907,0.352246082,-0.010644206,-0.161452548,0.224669875,0.113106257,-0.230377638,0.114000987,0.135289399,-0.468746143,-0.387387387,-0.274065161,-0.412532395,-0.16870295,-0.443385166,0.302388004,-0.222511898,-0.191010976,0.009564359,-0.166203875,-0.236116253,-0.492595335,-0.259533506,-0.070621992,-0.494569912,-0.360823152,-0.331266198,-0.346908552,-0.515276096,-0.278664373,-0.263050722,-0.343656236,-0.109681599,-0.163396273,-0.455417747,0.094255214,0.139361965,-0.426292731,0.197365173,-0.179717389,-0.20045045,-0.38499275,-0.274311983,-0.295631248,-0.361656177,0.064173763,-0.331327903,-0.322812539,-0.140441812,-0.165062323,-0.152073306,-0.411884487,0.158830063,-0.212853264,-0.248642478,-0.335325923,0.125293101,-0.106472911,-0.105146242,-0.264994447,-0.056614834,-0.409138591,0.009811181,0.233617179,-0.532210292,-0.315222757,-0.449401456,-0.502595509,-0.30099963,-0.213161792,-0.430797236,-0.196840676,-0.510891028,-0.537609527,-0.437187131,-0.232660743,-0.280760212,-0.38963964,-0.234128055,-0.501789461,-0.341110863,0.287270147,-0.077748982,-0.259996298,-0.448691843,-0.40133284,-0.469702579,-0.083950389,-0.098420338,-0.390596076,-0.173022337,-0.304516846,-0.154034833,-0.336017524,-0.408274713,-0.189065778,-0.181537702,-0.102647168,-0.347834136,-0.237813156,-0.54587807,-0.523448106,-0.286961619,-0.389948167,-0.152691539,-0.252992719,-0.138343823,-0.155405405,-0.288257436,-0.099654449,-0.405220289,-0.435332593,-0.174194743,-0.299950636,-0.472325065,-0.287671233,-0.508947303,-0.375971862,-0.421016907 +1Freq GranulocytesfrequencyUnstim,-0.296319737,0.117750678,0.037054177,1,-0.27890223,-0.020011435,-0.262435678,-0.408576329,-0.340329114,-0.107985516,0.305504622,0.16255896,0.208007183,-0.126835664,0.105944056,0.368138112,0.336363636,-0.035091491,0.300193477,-0.173208042,0.118415436,-0.112183044,-0.041065483,-0.116878682,0.319559464,-0.037650474,0.115939554,-0.097744361,-0.172467532,-0.007035054,-0.374736842,-0.30756086,0.207110656,0.201709683,-0.287327807,0.380598832,0.549391431,-0.431746808,-0.041942551,-0.064086179,-0.173953262,-0.08395813,0.234289238,-0.360962072,-0.260807894,0.226888683,-0.12599445,0.229771178,-0.294109056,-0.403164557,-0.278383642,-0.068092065,0.109311366,-0.099731541,0.071428571,-0.218292683,-0.263937282,0.048374506,0.221223339,0.113001422,-0.38398293,-0.175049787,0.288777994,0.021393353,0.383467471,-0.187894031,-0.070053655,0.101576123,0.413997155,-0.091382668,0.480111977,-0.052312561,-0.033179365,-0.090311587,0.519912366,0.2719815,0.38909445,-0.081864654,0.412648568,0.221713729,-0.242210321,-0.481339713,0.09924812,-0.107154944,-0.079289132,-0.140396446,-0.489063568,-0.018865668,-0.118318524,-0.173448773,-0.274603175,-0.003100775,-0.027906977,-0.137530122,0.116110305,0.088372843,0.055313659,-0.435680109,-0.208144796,0.326304107,-0.177306904,-0.024128503,-0.161107314,-0.164049283,0.086466165,-0.240092961,0.031488801,-0.034782609,-0.070786678,-0.007205564,-0.220094995,0.20189989,-0.047570332,-0.219802704,-0.190464012,-0.044203144,-0.051462672,-0.183426128,-0.079208834,-0.34175669,0.092279942,0.123953824,-0.224747475,-0.212626263,-0.107503608,-0.165223665,-0.212543554,-0.288226238,0.020824893,-0.367312211,0.040434325,-0.090997488,-0.217567458,-0.150798152,-0.065817764,0.000512282,-0.319729631,0.122981366,-0.120935331,0.093423456,-0.235732554,-0.198684691,-0.015995473,-0.286189258,-0.312056997,-0.185933504,-0.169419072,0.158831511,-0.185250219,-0.182017719,-0.087636683,-0.061537234,-0.109226594,-0.253252454,0.146460212,-0.290920819,-0.402838252,0.016872161,-0.144978433,-0.076276095,0.123164866,-0.046259223,-0.097580016,0.10204326,0.163380956,-0.113737669,0.044976251,-0.07194008,0.041350719,-0.087248813,0.199013518,0.061150856,0.081181601,0.066431591,-0.262842528,0.103332638,-0.279633545,-0.146908678,-0.409302326,-0.117829457,0.042887345,-0.344306856,-0.250314007,-0.399718111,-0.43551797,-0.359548978,-0.466525722,-0.335588443,0.069203665,-0.141649049,-0.402959831,-0.194080338,-0.412403101,-0.328118393,-0.190979563,-0.027202255,-0.025261324,-0.261672474,0.110452962,-0.031845069,-0.075264271,-0.1602537,-0.038501742,-0.41987315,-0.261028894,-0.251585624,-0.06483439,-0.436504581,0.277237491,0.107399577,-0.457787174,-0.353911205,-0.360958421,-0.406624383,0.167019027,-0.313742072,0.292319961,-0.294573643,-0.321212121,0.183509514,-0.422269204,0.084143763,0.349728997,0.098396567,0.262240289,0.243000469,0.299209575,-0.294738031,-0.055341324,0.328139115,0.013188873,-0.054539295,0.091824752,0.48934056,0.093812105,0.414927733,0.235162602,0.412759711,-0.248261066,0.294942948,0.28134598,0.077484192,0.020189702,0.483062331,-0.007407407,0.1967028,0.407748463,-0.049345077,0.06181155,0.455151621,-0.187240289,-0.288888889,-0.029494128,0.168518519,-0.32068654,0.124480578,0.16104336,-0.094963866,0.152122855,0.327551942,0.216892502,0.222831978,0.476738934,0.161088528,0.440808491,-0.424887082,0.151988211,-0.021736798,-0.048961156,0.114611563,0.198622403,0.273825655,0.371544715,0.088053297,0.193563686,0.253161698,0.037285456,0.116892502,0.267492477,-0.085580881,0.111901536,0.286361147,-0.098735321,0.348419151,0.195189702,-0.075112918,0.15070009,0.268224932,-0.388911472,-0.060749774,0.177416441,0.093339958,0.143450768,0.438450985,0.242795845,0.051671183,0.316327913,0.267886179,0.053161698,0.232655827,0.047402891,0.383897922,-0.078952123,0.288527552,0.251806685,0.365020128,0.066034327,-0.07265131,0.062962963,0.514250226,-0.029200542,-0.110749774,-0.018179765,-0.028026197,0.419715447,0.175722674,0.481233062,0.461746058,0.188346883,0.240831075,0.425835592,-0.175248419,0.126490515,0.409101174,0.372901834,0.269241192,0.172538392,0.361653117,0.136032837,0.34397019,0.265358318,-0.138075881,-0.016282746,0.461969286,0.128974706,0.227190605,0.366418248,0.055103884,0.361495032,0.309236676,0.15203252,0.141169828,-0.068440635,0.239182475,0.190605239,0.081052394,-0.057746161,0.175610748,0.339679313,0.255420054,0.295167118,0.395799458,0.420189702,0.341237579,0.284328615,0.309214092,0.223690154,0.313482385,0.255984643,0.150564589,0.436224029,0.359914182,0.090198735,0.303884372,0.323554652,0.393744354,0.426693767,0.4217028,0.405691057 +2Unstim BasophilsCREBUnstim,0.215627978,-0.076882028,-0.132941176,-0.27890223,1,0.494110921,0.457137412,0.304898037,0.417636748,0.051648561,-0.214331999,0.014979989,-0.172327044,0.2386679,0.457909343,0.351295097,0.058163737,0.21113017,-0.151591386,0.49433395,-0.022128852,0.559383754,0.483193277,0.388795518,-0.379831933,0.262464986,0.418487395,0.088856636,0.46186099,0.249704593,0.270821422,0.297350867,0.481160304,0.321821994,0.176876794,0.355212502,-0.059805603,0.621926583,0.044330093,-0.148694492,0.372670097,0.327768249,-0.22456446,0.778571429,0.422648084,-0.007491289,0.62630662,-0.103792643,0.535658472,0.379988565,0.495635601,0.258103532,0.421786809,0.470825446,0.256451613,0.145967742,0.404032258,-0.316337286,-0.141238472,-0.043541364,0.650540235,0.521528786,0.001867734,0.222414617,-0.171040724,0.17800905,0.441176471,0.272760181,0.189646831,0.327310844,-0.04936154,0.387383267,0.212197446,-0.055993901,-0.096779112,0.465751858,0.100209644,0.395082905,0.104783686,-0.158604917,0.572936916,0.36535726,-0.430134588,0.144897395,0.005142795,-0.366232629,0.508042455,0.418777699,-0.531239742,-0.090710143,-0.058102637,0.302785924,0.086143695,0.154564803,0.171025276,-0.569099464,-0.120253857,0.214356056,0.141312741,-0.2996139,-0.106904475,-0.121566911,-0.212605318,0.324232643,0.014990699,0.242805559,-0.479051383,-0.353096179,-0.439501885,-0.603517537,-0.525727312,-0.112353452,-0.167281806,0.141663048,-0.128962223,-0.458876914,0.026497042,-0.021473138,-0.096831699,0.170755536,-0.309109312,-0.351214575,0.353441296,-0.02854251,-0.191497976,-0.044736842,-0.261904762,-0.044566545,0.200854701,-0.127594628,-0.117826618,0.211233211,-0.01037851,-0.025641026,-0.009878419,-0.098226214,0.294398611,-0.281914894,-0.465371255,-0.125488493,-0.378093791,0.128310899,-0.083143012,0.235453756,0.208098133,0.127116804,-0.196157186,-0.187907078,-0.330655667,0.273447677,-0.317303517,0.113004776,-0.29038211,0.098784195,-0.460811984,-0.002814259,-0.051782364,-0.459075119,0.302214503,0.387351779,0.007641634,-0.335573123,0.240316206,-0.437472861,-0.244789405,-0.239687364,-0.177377334,-0.276921407,-0.074095017,-0.268345636,-0.068280504,-0.156445001,-0.135728761,0.127487746,-0.302865827,0.08594416,0.01953602,-0.021538462,-0.065323565,0.120879121,0.59035409,0.010989011,0.064615385,0.123931624,-0.061660562,-0.062271062,0.152625153,0.110500611,-0.365689866,-0.115995116,-0.123321123,-0.332722833,-0.068986569,-0.520757021,-0.144078144,-0.285714286,0.099487179,-0.217777778,0.02017094,-0.12957265,-0.094017094,-0.349206349,-0.103478261,-0.116605617,0.091575092,-0.431013431,-0.242979243,-0.016483516,0.047008547,0.053724054,-0.215506716,0.137362637,-0.092796093,0.074481074,-0.166666667,-0.150793651,0.218580174,-0.028083028,-0.308302808,-0.26007326,-0.063492063,-0.155067155,-0.24833238,-0.017876882,0.044711264,-0.027292306,-0.055231561,0.06163522,-0.126281685,-0.026033924,0.114963121,-0.013074138,0.002248904,-0.262740614,0.059653135,0.062854965,-0.204497808,-0.156165428,-0.023518201,-0.384226877,0.129788451,-0.209376787,-0.072994092,-0.083056985,-0.076958262,-0.20434534,-0.356553393,0.048827902,-0.086755989,-0.203354298,-0.06918239,-0.058662093,0.062245092,-0.19245283,0.076119687,-0.11637126,-0.063312369,0.084200496,-0.243682104,-0.115227749,-0.159214789,-0.189022298,-0.154183343,0.039908519,-0.199923766,0.109281494,-0.114277002,0.263851036,0.056070135,0.217228893,-0.046540881,-0.152506194,0.030226796,0.034343434,0.094187155,-0.106537069,0.06445588,0.125824281,-0.027101717,0.049362481,0.070478369,-0.110120069,0.082294645,-0.064379646,0.093043644,0.071316943,0.015437393,0.106613303,0.088850772,-0.290642272,-0.042271774,-0.259467495,0.102801601,-0.126360327,-0.209910425,-0.101200686,-0.249170955,-0.093882218,0.0739089,-0.165237278,-0.001943968,-0.012235563,0.121174004,0.054011816,-0.019401563,-0.312108102,0.036249285,0.297045931,-0.232170764,-0.072841624,0.191385554,0.22302268,-0.062778731,-0.462473795,-0.121021536,0.153649705,-0.043415285,-0.062779927,0.136878216,0.006365542,-0.003468649,-0.09731275,0.035410711,-0.159138555,-0.249861823,0.092052601,-0.268763103,-0.121402706,-0.05370688,-0.067047837,-0.118124643,-0.055917667,-0.135582237,-0.049818944,-0.098989899,-0.082523347,-0.047379455,-0.000266819,-0.128035068,0.107299409,-0.152734896,-0.020087669,-0.095029351,-0.034343434,0.060034305,-0.058128454,-0.059958071,-0.044787498,-0.09220507,0.014675052,-0.116599962,-0.14129979,-0.003392415,0.087402325,-0.094951305,0.107909281,-0.0378502,-0.039832285,-0.419020393,-0.49731275,-0.00933867,-0.212502382,0.178120831,0.087173623,0.01726701,-0.135048599,-0.057366114,-0.23095102,-0.044939966 +2Unstim BasophilsSTAT5Unstim,0.17301315,-0.292243187,-0.049321267,-0.020011435,0.494110921,1,0.452639604,0.453554412,0.478002333,0.590165809,0.085953878,0.417800648,-0.05767105,-0.121415356,0.437442183,0.633209991,-0.133556892,0.011473223,-0.000495521,0.365633673,-0.320448179,0.589355742,0.531652661,0.582633053,-0.34929972,0.482633053,0.607563025,0.002418965,0.086357039,0.468267581,0.216923957,0.569658853,0.515313804,0.579645512,-0.255446118,0.58452449,0.157690109,0.431174476,0.328301887,-0.123765962,0.249857061,0.14488279,-0.036236934,0.48815331,0.508710801,0.090766551,0.623867596,0.195578426,0.4810749,0.273260911,0.417571946,-0.112239961,0.399209805,0.385701044,-0.007258065,0.415725806,0.459677419,-0.13715415,-0.019104084,0.325270118,0.596839219,0.569424286,0.272955975,-0.198821063,0.147239819,-0.05438914,0.708144796,0.207692308,0.579342042,0.381360778,0.070859539,0.196569468,0.553344768,0.264875167,0.344920907,0.657861635,0.61113017,0.681646655,0.182008767,0.126891557,0.58376215,0.466243572,-0.073859284,0.307651062,0.169712222,-0.383521173,0.458146406,0.132516277,-0.182186235,-0.056351898,-0.107779845,0.190249267,-0.060117302,0.073358571,0.111062479,-0.643943539,-0.0791115,0.505197505,0.093951094,-0.223423423,0.103184156,0.100995733,-0.027683554,0.319636702,-0.252434621,0.339533866,-0.267457181,-0.180500659,-0.237811654,-0.390077084,-0.264871906,0.077507599,-0.063721233,0.088580113,-0.16337386,-0.182886314,0.046916781,-0.165221619,0.052750993,0.262158055,-0.220647773,-0.22145749,0.184615385,-0.063562753,-0.523279352,-0.118825911,-0.106837607,-0.080586081,-0.027472527,0.008547009,-0.105006105,0.135531136,-0.037240537,-0.068986569,0.005970473,-0.135604332,0.229483283,-0.328267477,-0.230568823,-0.291901867,-0.25054277,0.305579679,-0.198294132,0.306990881,0.21819366,0.042336083,-0.200607903,-0.12592271,-0.171841077,0.278549718,-0.266283109,0.247611811,-0.367998263,0.265740339,-0.267151541,-0.087617261,-0.133020638,-0.158271819,0.302757273,0.127799736,-0.146772069,-0.44743083,0.095783926,-0.192683456,0.175531915,-0.168150239,-0.020625271,-0.393182805,-0.038928508,-0.261940947,0.165653495,0.093587634,-0.061105221,0.010432423,-0.095636127,0.262585151,0.052503053,0.007863248,0.01037851,0.316239316,0.527472527,0.084249084,0.002393162,0.344322344,0.198412698,0.133699634,0.31990232,0.184371184,-0.399267399,0.082417582,0.125763126,-0.071428571,0.203907204,-0.222222222,-0.299145299,-0.098290598,0.151452991,0.054358974,0.007863248,-0.075555556,0.057387057,-0.161782662,0.056521739,0.036019536,0.308913309,-0.207570208,-0.296092796,0.158730159,-0.032356532,0.014652015,0.010989011,0.386446886,0.142857143,-0.042124542,-0.382783883,0.054945055,0.103255665,-0.033577534,-0.054945055,-0.351037851,0.185592186,-0.250915751,-0.148618258,-0.018715456,-0.046388412,-0.022908765,-0.294682676,-0.212426148,-0.141604727,0.035258243,-0.230154949,-0.206022489,0.146255003,-0.06559939,0.039374881,-0.041585668,0.026033924,-0.024433009,-0.273413379,-0.137605062,0.063998475,-0.027406137,-0.241928721,-0.020392605,-0.175605108,-0.010024776,-0.164401837,-0.104326282,-0.03297185,-0.139470173,-0.368553459,-0.254278635,-0.042805413,-0.214865638,-0.065828092,0.099371069,-0.036630455,-0.201677149,-0.15242996,0.047150753,-0.191842958,-0.321593291,-0.113245664,-0.205260149,-0.143434343,-0.203430532,-0.288360746,0.321751892,-0.107833047,-0.010634648,0.071012007,-0.029388222,-0.072307986,-0.098761197,0.103106537,-0.078559177,0.013074138,-0.156622832,-0.159522766,-0.288436982,0.126586621,-0.146712407,0.051038689,-0.101200686,0.008271393,0.05294454,-0.066361731,0.208157042,-0.062854965,-0.159138555,-0.148770726,-0.359031047,0.032285115,-0.160704416,-0.345683248,-0.182466171,-0.264036592,-0.170040023,0.041356966,-0.184295788,-0.106003431,-0.181398895,0.0450162,0.07787307,-0.050581285,-0.048485773,-0.21982085,0.110653707,-0.34232895,0.016809605,0.028473413,0.10089575,-0.063007433,-0.472536688,-0.094949495,0.027253669,0.009872308,0.04311117,-0.062473795,-0.197026872,-0.029083286,-0.170954831,-0.10920526,-0.097541452,-0.216508796,0.184295788,-0.106003431,0.116981132,0.214865638,-0.053173242,-0.169277682,-0.068267581,-0.051038689,-0.223861254,-0.176748618,-0.209071851,-0.161654279,-0.231637126,-0.077415666,0.036554221,-0.011473223,0.016123499,-0.350956774,0.021459882,-0.006670478,-0.027787307,-0.068724986,-0.009109968,-0.117591004,-0.109281494,-0.106689537,-0.142595769,-0.093043644,-0.085115304,-0.141988603,0.189479703,-0.074290071,0.064303411,-0.269296741,-0.255727082,-0.027711073,-0.196340766,-0.014293882,0.035715647,0.008652563,-0.186735277,0.007661521,0.023060797,-0.093196112 +2Unstim BasophilsS6Unstim,0.194282447,-0.099447303,-0.098280543,-0.262435678,0.457137412,0.452639604,1,0.544044216,0.41946832,0.3260911,0.114846579,0.214255765,-0.12391843,0.109967623,0.182354302,0.122109158,-0.030758557,0.091442729,0.050657519,0.527983349,-0.152661064,0.342577031,0.083193277,0.014565826,-0.114285714,0.020728291,0.072829132,-0.018384132,0.014916949,0.175223937,0.49693158,0.486335049,0.382016048,0.205183915,-0.019508188,0.387459501,0.214636935,0.209164031,0.015056223,0.030836669,0.170954831,-0.002401372,0.201916376,0.578397213,0.548780488,-0.020209059,0.417073171,0.251229274,0.515608919,0.627063084,0.321364589,-0.04039671,0.289388808,0.15286907,-0.004032258,0.2125,0.020967742,-0.082081686,-0.053359684,0.238509918,0.48056765,0.417674569,0.174766533,-0.008711466,0.197918552,-0.047873303,0.394208145,-0.103076923,0.166747299,0.346750524,-0.10165809,0.098913665,0.14129979,-0.109052792,-0.093805984,0.295063846,0.163407662,0.365504098,-0.116523728,0.159900896,0.563331427,0.02352555,0.014771857,0.212228052,-0.114126272,-0.321807638,0.42094321,0.187120425,-0.394025605,-0.407593829,0.111062479,0.348240469,-0.030058651,0.174127089,-0.022650181,-0.662545136,-0.137761243,0.327278696,0.171171171,-0.308365508,-0.074734654,0.096618886,0.068825911,0.237675768,-0.148046832,0.189845716,-0.434782609,-0.201185771,-0.174562305,-0.221691457,-0.200499349,-0.032240556,-0.08662614,0.155775076,0.09411637,0.003160198,-0.074467223,0.002025768,0.013370067,0.294181502,-0.02145749,-0.370040486,0.17854251,-0.014979757,-0.213157895,0.147165992,-0.197802198,0.103785104,0.147130647,0.326007326,0.075091575,0.180708181,0.238095238,0.043345543,-0.23957881,0.215902089,-0.029960921,-0.271710812,-0.366261398,0.022145028,-0.053842814,0.110616587,0.242402866,0.177377334,0.241858446,0.053082935,0.096396005,0.15881459,-0.321320017,0.219062093,-0.063178463,0.077833261,-0.085323491,0.256838906,-0.392748589,0.114634146,0.029268293,-0.003690838,0.309921841,-0.007641634,0.140843215,-0.134123847,0.037417655,-0.190729483,0.032132002,-0.134715588,0.192574902,-0.008901433,0.217977834,-0.095527573,-0.235670864,0.162159307,0.270608838,0.193265057,-0.09813287,0.274070776,-0.184371184,0.220512821,-0.179487179,0.023199023,0.183150183,0.401098901,-0.301196581,0.486568987,0.209401709,-0.169108669,0.033577534,0.134920635,-0.174603175,0.176434676,0.034188034,-0.036019536,0.136141636,-0.147741148,-0.175213675,0.199023199,0.062564103,0.272478632,-0.044102564,-0.177435897,0.246031746,-0.114774115,0.074782609,0.012820513,0.266788767,0.032967033,-0.298534799,0.235042735,0.26007326,-0.236263736,0.169108669,0.380952381,0.283882784,0.1001221,-0.262515263,-0.033577534,-0.069731098,0.104395604,0.228327228,-0.410866911,0.277777778,-0.498778999,-0.298418144,-0.108976558,-0.284772251,-0.1613143,-0.327692014,0.022908329,-0.083133219,-0.152506194,-0.09891555,0.099599771,-0.036325519,-0.290337336,0.128797408,-0.13657328,-0.110196303,-0.129483514,-0.307261292,-0.164935486,0.06559939,-0.159214789,-0.278139889,-0.297960739,-0.163712598,-0.071393177,-0.159751472,-0.03670669,-0.03956622,-0.122393749,-0.212578616,-0.280198209,0.009186202,-0.368400991,0.114236707,0.171031065,-0.268229464,-0.014903755,-0.169887555,-0.120564132,-0.311987803,-0.372365161,-0.382275586,-0.174080427,-0.355517439,-0.078787879,-0.142331663,0.152890278,-0.113703068,0.096397942,-0.071926815,-0.105698494,-0.205260149,-0.150905279,-0.041966838,-0.347665333,0.012159329,-0.237278445,-0.347100193,-0.206750653,-0.10127692,-0.388755479,0.148923194,-0.24718887,-0.063236135,0.162187917,0.026491328,0.088317134,-0.063617305,-0.228206594,-0.382504288,-0.533762793,0.002477606,-0.305399379,-0.221040595,-0.106384601,-0.341719078,-0.260377358,0.103792643,-0.329597865,-0.245587955,-0.248408614,0.004307223,0.024280541,0.037621498,-0.202595819,-0.018867925,0.026948733,-0.162492853,-0.178502001,0.230798552,0.026033924,0.012235563,-0.454240518,-0.289422527,-0.162035449,-0.243072232,-0.3432503,-0.17941681,-0.254278635,-0.214789403,-0.288888889,-0.323422908,-0.303754526,-0.483142427,-0.056222603,-0.148694492,-0.119039451,-0.120259196,-0.257861635,-0.393177054,-0.029616924,-0.126357919,-0.253211359,-0.304516867,-0.392262245,-0.11751477,-0.13222794,-0.173318087,-0.245816657,-0.401105394,-0.263579188,-0.301478997,-0.194434915,-0.017953116,0.072079283,0.192605298,-0.135124833,-0.185591767,-0.158299981,-0.192147894,-0.312826377,-0.231484658,-0.277987421,-0.312413044,0.034419668,-0.226376977,0.035639413,-0.315494568,-0.069792262,-0.070478369,-0.488012197,0.099676005,-0.070402135,-0.161501811,-0.283018868,-0.139165237,-0.347360396,-0.132380408 +2Unstim BasophilsP38Unstim,0.429845626,-0.298646846,0.103529412,-0.408576329,0.304898037,0.453554412,0.544044216,1,0.407486782,0.669677911,0.150295407,0.372441395,-0.270821422,0.139454209,0.113205365,0.165703053,-0.252312673,0.210444063,-0.286296932,0.562095282,-0.092717087,0.53697479,0.377310924,0.222128852,-0.222408964,0.234453782,0.27394958,-0.069746815,0.025560393,0.470173432,0.268076996,0.56432247,0.278755074,0.236668573,0.156333791,0.210977702,0.044939966,0.399839897,0.362454736,0.18848866,0.008271393,0.059043263,0.165853659,0.579965157,0.690766551,0.016550523,0.447735192,0.284391081,0.59679817,0.524985706,0.500971984,0.099983874,0.326560232,0.316706325,0.135483871,0.217741935,0.323790323,0.033465086,-0.051646904,0.261570714,0.522173843,0.312207708,0.262359443,-0.001088933,0.107420814,0.023167421,0.473122172,-0.192217195,0.1963393,0.250009529,-0.119649323,-0.000190585,0.284467315,0.165770917,-0.058890795,0.387688203,0.310691824,0.531694302,-0.183685916,0.174385363,0.569658853,0.190502243,-0.014553015,0.35234861,0.229018492,-0.484845169,0.515701937,-0.052415604,-0.230769231,-0.024619761,0.002735529,0.130865103,-0.193548387,0.232245275,0.041689463,-0.467118941,-0.239522924,0.513951198,0.323294723,-0.177091377,-0.0263705,0.230112704,0.065105591,0.252448433,-0.250246198,0.276069592,-0.386561265,-0.167720685,-0.066198036,-0.328737381,-0.172166739,0.091836735,-0.102692141,0.23501954,0.007815892,-0.157118548,-0.030872701,-0.123571834,0.049671826,0.223404255,-0.027327935,-0.334817814,0.233805668,0.011336032,-0.342105263,0.008502024,-0.136752137,-0.186202686,-0.153846154,0.141025641,-0.134920635,0.19047619,-0.055555556,0.105006105,-0.202887538,-0.091054832,-0.037451151,-0.349544073,-0.164025185,-0.216999566,-0.182262267,0.169995658,0.055317148,0.224381242,0.227746418,0.094550586,-0.139600521,0.075445072,-0.243269648,0.155123752,-0.106708641,0.129722102,-0.231980026,0.2967868,-0.367564047,0.029643527,0.100375235,-0.167933131,0.28582284,-0.146640316,0.022002635,-0.311462451,-0.131488801,-0.025075988,0.152192792,-0.157837603,0.074576639,-0.17032132,-0.086635012,-0.370929223,-0.094224924,0.097651141,0.058013588,0.206880253,-0.062092922,0.269318104,-0.004884005,0.247863248,-0.083638584,0.30952381,0.324786325,0.383394383,-0.032478632,0.293040293,0.108058608,-0.16971917,0.047619048,0.000610501,-0.263125763,-0.178266178,0.285714286,0.119047619,0.178876679,-0.054334554,-0.322344322,0.035409035,0.158290598,-0.020854701,-0.074871795,-0.269059829,-0.057997558,0.148962149,0.293913043,0.015262515,0.256410256,-0.137973138,-0.401098901,0.108058608,-0.092185592,-0.342490842,0.086691087,0.260683761,0.247863248,0.015262515,-0.453601954,0.084859585,0.066378642,0.152625153,0.075091575,-0.384004884,0.262515263,-0.548229548,-0.299409186,-0.169506385,-0.294453974,-0.179267758,-0.274023251,0.038307604,-0.175223937,0.000114351,-0.014827803,0.01841052,-0.195349724,-0.273947017,0.13733562,-0.307718696,-0.127272727,-0.062626263,-0.250543168,-0.153728869,-0.04898037,-0.131694302,-0.077568134,-0.263198018,-0.179721746,-0.034190966,-0.099792258,-0.051267391,-0.101812499,-0.187497618,-0.088317134,-0.141757195,-0.037392796,-0.4089575,0.066209262,0.112940728,-0.217991233,0.117972175,-0.115456451,-0.226300743,-0.237430913,-0.336611397,-0.249475891,-0.173851725,-0.404078521,0.054164284,-0.030379843,0.076731022,-0.076882028,-0.067810177,-0.066056794,-0.191233086,-0.174537831,-0.25115304,-0.247265104,-0.164398704,-0.075509815,-0.194129979,-0.232556367,-0.135508586,-0.131999238,-0.136878216,-0.134819897,-0.187802554,-0.232323232,0.018639222,0.040747094,-0.003621117,0.108290452,-0.030760435,-0.154716981,-0.377861213,-0.045549838,-0.485734434,-0.311911569,-0.144806556,-0.263579188,-0.328378121,0.088545836,-0.157766343,0.044863732,-0.272651039,0.056680008,0.104326282,-0.058585859,-0.233547428,-0.12109777,-0.082065942,-0.053859348,-0.369620736,0.09334858,-0.071316943,-0.135201067,-0.173927959,-0.280503145,-0.158376215,-0.210977702,-0.312870457,-0.313131313,-0.197103107,-0.136115876,0.134819897,-0.333638269,-0.235067658,-0.44605386,-0.048294263,-0.269220507,-0.043034115,0.091214027,-0.146636173,-0.30268725,0.062245092,-0.125290642,-0.309243377,-0.370154374,-0.329292929,-0.30268725,-0.11278826,0.02199352,-0.084505432,-0.047455689,-0.33912712,-0.062056873,-0.101505622,-0.107070707,0.037240328,-0.012388031,0.139851344,-0.160129598,-0.199466362,-0.225538403,-0.131618067,-0.345835716,-0.396302649,-0.020812289,-0.094568325,-0.151438917,0.138479131,-0.370688012,-0.159062321,-0.147246045,-0.386544692,0.073832666,-0.103182771,-0.106308367,-0.184753192,-0.2396417,-0.318772632,-0.211206404 +2Unstim BasophilsZap70_SykUnstim,0.326592318,-0.497615425,0.071006475,-0.340329114,0.417636748,0.478002333,0.41946832,0.407486782,1,0.324531799,0.053764292,0.217461108,-0.274850388,-0.161644793,0.447631735,0.204904294,-0.348273683,0.028045958,-0.234174209,0.431519161,-0.143697479,0.645098039,0.475630252,0.496638655,-0.265266106,0.281512605,0.580392157,0.359138105,0.259364066,0.339871221,0.307666066,0.525241648,0.580886741,0.290647703,0.116713843,0.333308085,0.112069358,0.418584446,0.177090192,0.075514219,0.24508733,0.220895307,0.105125531,0.495006029,0.393631866,-0.07973837,0.644799007,0.182355963,0.478612858,0.355668536,0.391994729,-0.076009137,0.575676419,0.586526314,-0.386290323,0.218951613,0.24233871,0.146647558,-0.468189817,0.25508573,0.600743038,0.512916052,0.165871808,-0.092587299,0.073406567,-0.074131123,0.563070481,0.088893948,0.129641671,0.229290015,-0.044530113,0.395810506,0.238218932,0.175182303,0.043843273,0.335826498,0.145800823,0.41504202,-0.090243116,0.063837942,0.497615425,0.25868578,0.160857909,0.270759122,0.131750287,-0.110302566,0.221042841,0.094769096,-0.016304645,0.060294359,-0.136674509,0.015763908,-0.204380901,0.09002556,0.001094271,-0.413744051,-0.164031296,0.369097774,0.41559946,-0.101164811,0.296109865,0.103846364,0.033703562,0.277303568,-0.010176725,0.158997648,-0.255486011,-0.090807254,-0.419531146,-0.445253138,-0.217117443,-0.157814729,0.169110484,0.400564847,0.04018682,-0.145149528,-0.061188104,0.085014994,-0.251641141,0.3626589,-0.100304003,-0.385005263,0.086930136,0.304356792,-0.145896731,-0.089159114,-0.065934066,-0.022588523,0.212454212,0.002442002,0.07020757,0.236263736,0.208791209,0.045787546,-0.027370483,-0.305927783,0.292386271,-0.187900538,-0.092429687,0.10079289,-0.145324231,0.303247574,-0.232212737,0.295427436,0.327251053,0.36624313,-0.188660829,-0.105789089,0.056804614,0.261757397,0.032583908,0.491365338,0.050505058,0.525578442,-0.253176968,0.07492961,0.293145669,-0.211578178,0.449223483,0.005601318,-0.117298194,-0.19261945,0.153212529,-0.244379313,-0.005539264,-0.311393551,-0.175518653,-0.200282423,-0.214823162,-0.134571542,0.14097971,-0.163136769,-0.195970454,0.031491187,-0.107201058,-0.017033009,0.104411541,0.171311338,0.066860022,0.437185168,0.21309724,0.492749204,0.173704909,0.597466043,0.293695622,0.010990689,0.368493364,0.402381321,-0.099832088,0.344069612,-0.208823083,0.203938332,0.393833007,0.160586172,-0.036635629,0.029003206,0.040348778,0.249957261,-0.078303985,-0.066678066,0.163944438,-0.253701728,-0.262608696,0.315066406,0.428331557,0.23477332,-0.235994507,0.234468023,-0.163639141,-0.223172037,0.339184861,0.597160746,0.540070225,0.325751797,-0.222561444,0.296137998,-0.045600371,0.29094795,0.226225006,-0.274767214,0.57701115,-0.321172344,-0.262334641,-0.073682646,-0.310260794,-0.157060358,-0.486244411,0.214561118,0.023009133,-0.112374621,-0.08787901,0.12229564,0.001488153,-0.482047057,0.081237884,-0.296829261,-0.171366527,-0.236082098,-0.018888094,-0.181710744,-0.215476904,-0.111153572,-0.196703283,-0.293700324,-0.079177364,-0.019956512,-0.393833808,-0.019193356,-0.090435629,-0.280192475,-0.135803489,-0.129545615,-0.018582832,-0.098103616,0.149006076,-0.085664184,-0.149998178,0.069714238,-0.315679197,-0.023009133,-0.274850388,-0.448620854,-0.447934014,-0.095661519,-0.376960569,0.062464263,-0.282563642,0.205826912,0.089785223,0.017514415,-0.12519563,-0.147632396,-0.307895013,-0.116495659,-0.004693405,-0.352921177,0.18121123,-0.274239863,-0.298132307,-0.04456912,-0.074522117,-0.277445116,0.133971916,-0.395505243,0.090624694,-0.060785321,-0.026214385,-0.215705851,0.22784002,-0.271110927,-0.041935385,-0.415164405,0.073377384,-0.199988663,-0.21776637,-0.262029378,-0.290952965,-0.288205606,-0.09940098,-0.358644842,0.035219618,-0.423360413,-0.073301068,-0.147708712,-0.134124547,-0.295155944,-0.099019402,0.103369388,-0.27446881,-0.312321314,-0.066203724,0.065287937,-0.007135502,-0.307895013,-0.339489643,0.012248643,-0.343381735,-0.314540459,-0.117487761,-0.378028987,-0.318502872,-0.113366722,-0.352234337,-0.4409893,-0.547383584,-0.187774366,-0.305529232,-0.267905674,-0.014461793,-0.261571485,-0.400771015,-0.123669319,-0.120006174,-0.241195238,-0.198229594,-0.255160981,-0.356508007,-0.351929075,-0.302705557,-0.193040138,-0.354142225,-0.012859167,-0.078722472,-0.13526928,-0.185942793,-0.059030064,0.172587575,-0.274392495,-0.270805665,-0.195405919,-0.275918805,-0.409547301,-0.198534856,-0.269737247,-0.328124883,-0.190674356,-0.069027399,-0.257374131,-0.282024048,-0.066890564,-0.332926507,-0.431678806,0.036135404,-0.206929564,-0.230892641,-0.400771015,-0.353531701,-0.330713357,-0.333002823 +2Unstim Basophils4EBP1Unstim,0.168591576,-0.258395273,0.176289593,-0.107985516,0.051648561,0.590165809,0.3260911,0.669677911,0.324531799,1,0.314884696,0.502877835,-0.170802363,-0.011910268,0.289777983,0.531567993,-0.215425532,0.023594435,-0.077339432,0.296831637,-0.099159664,0.562184874,0.41092437,0.469747899,-0.261344538,0.462745098,0.515966387,-0.098451863,-0.141428802,0.427024967,0.146712407,0.559291023,0.229087652,0.448904136,-0.190176089,0.198475319,0.095635601,0.308733276,0.414141414,0.094797027,-0.12544311,0.038078902,0.244076655,0.304878049,0.510278746,-0.061672474,0.444947735,0.417038308,0.388374309,0.252601487,0.365656566,-0.006611837,0.326318336,0.213395706,0.050806452,0.349193548,0.278225806,0.225032938,-0.053359684,0.460328979,0.299064667,0.326318336,0.391881075,-0.226316628,0.292036199,-0.091402715,0.529773756,-0.268506787,0.404934688,0.182313703,-0.09769392,-0.172708214,0.391499905,0.257175529,0.315951973,0.446845817,0.55715647,0.654964742,0.063998475,0.343624929,0.383495331,0.186781924,0.06401138,0.332558856,0.272786957,-0.4312288,0.1732137,-0.058981233,-0.152423679,-0.088302878,-0.266659372,-0.132331378,-0.204545455,0.147627016,0.034686508,-0.407374986,-0.204070467,0.379800853,0.295238095,0.037065637,0.12331765,0.139730824,0.058540322,0.291623352,-0.301455301,0.151329467,-0.297760211,-0.098418972,0.148908723,-0.233850832,-0.044615719,0.133630048,-0.085649153,0.128962223,-0.146113765,-0.14139859,-0.095211085,-0.226967021,0.049185641,0.119192358,0.067813765,-0.127530364,0.003643725,0.00242915,-0.422064777,-0.129352227,-0.035409035,-0.25030525,-0.092185592,0.027472527,-0.252136752,0.307081807,-0.128205128,-0.114163614,-0.060030395,-0.136038961,-0.081524099,-0.271168042,-0.062961355,-0.215805471,-0.25,0.140251845,-0.210840865,0.136235345,0.108445506,-0.00293096,-0.197568389,0.071971342,-0.176291793,0.017585758,-0.145028224,0.100412505,-0.323925315,0.233825445,-0.126682588,-0.127016886,-0.058724203,-0.02485888,0.142422927,-0.348221344,-0.143083004,-0.401581028,-0.265744401,0.141337386,0.419344333,-0.134607034,0.034520191,-0.327616153,-0.18139376,-0.204732957,0.060138949,0.224762704,0.044252789,0.049509804,0.068606166,0.216642651,-0.117826618,0.335384615,-0.047619048,0.380952381,0.399267399,0.271062271,-0.275213675,0.333333333,0.095238095,0.169108669,0.201465201,0.024420024,-0.344322344,-0.052503053,0.031135531,0.228937729,0.111721612,0.030525031,-0.473137973,0.227106227,0.126837607,0.04957265,-0.077606838,-0.284786325,-0.079365079,0.158119658,0.104347826,-0.001221001,0.476190476,-0.036019536,-0.370573871,0.107448107,0.03968254,-0.167887668,0.0995116,0.405982906,0.346153846,-0.114774115,-0.492063492,0.121489621,0.081129451,-0.017094017,0.070818071,-0.536630037,0.247252747,-0.423076923,-0.293615399,-0.320983419,-0.251229274,-0.168442318,-0.221879169,-0.074290071,-0.066056794,0.04936154,-0.333606511,-0.029464456,0.050047646,-0.008042691,0.2360587,-0.347360396,-0.06994473,0.022908329,-0.237202211,-0.013150622,-0.046083476,-0.004307223,-0.14053745,-0.280274443,-0.050657519,-0.006975415,-0.017229221,-0.119801791,-0.040061751,-0.080922432,-0.12902611,-0.0810749,0.09731275,-0.237659615,-0.100743282,0.182313703,-0.099142367,0.04219554,0.089918048,-0.146712407,-0.129712217,-0.286220698,-0.14884696,-0.190242043,-0.146864875,-0.007204117,-0.000914826,0.261640207,0.000419287,-0.027101201,0.101810558,-0.221040595,-0.108900324,-0.24757004,-0.085572708,-0.044177625,-0.036249285,-0.326624738,-0.222417046,-0.15826488,-0.010939585,-0.133523918,0.063846007,-0.223098914,-0.302839718,-0.027024967,0.00293501,-0.050047646,0.013988946,0.103030303,-0.163026491,-0.164554308,-0.058280922,-0.316720349,-0.398894606,-0.184372022,-0.164170002,-0.281570421,-0.043720221,-0.096397942,0.025271584,-0.277987421,0.103640175,0.088088431,-0.090299219,-0.038117746,-0.163407662,-0.097998856,-0.052792072,-0.076272156,-0.060644178,-0.095711835,0.00102916,-0.143053173,-0.146941109,-0.164093768,-0.111111111,-0.164821133,-0.315951973,-0.197103107,-0.102191729,0.118810749,-0.259996188,-0.072231751,-0.243877337,0.044025157,-0.186963979,-0.015361159,0.212121212,-0.070478369,-0.299409186,0.099218601,0.002248904,-0.183076043,-0.228206594,-0.169201448,-0.199923766,-0.132151706,-0.067200305,-0.080083857,0.114999047,-0.165465981,-0.104139666,-0.064684582,-0.14808462,0.07314656,0.027558605,0.20038117,-0.156241662,-0.183990852,-0.074290071,-0.112330856,-0.272422337,-0.282180294,-0.096437897,0.078406709,-0.047684391,0.079473985,-0.221879169,0.066133028,-0.165465981,-0.226529445,0.028549647,-0.156699066,-0.097617686,-0.136420812,-0.183457214,-0.033733562,-0.226758148 diff --git a/test_unstructured/partition/csv/test_csv.py b/test_unstructured/partition/csv/test_csv.py index 4572a00ccc..466d8f0a85 100644 --- a/test_unstructured/partition/csv/test_csv.py +++ b/test_unstructured/partition/csv/test_csv.py @@ -15,7 +15,7 @@ from unstructured.chunking.title import chunk_by_title from unstructured.cleaners.core import clean_extra_whitespace from unstructured.documents.elements import Table -from unstructured.partition.csv import partition_csv +from unstructured.partition.csv import get_delimiter, partition_csv from unstructured.partition.utils.constants import UNSTRUCTURED_INCLUDE_DEBUG_METADATA EXPECTED_FILETYPE = "text/csv" @@ -270,3 +270,8 @@ def test_partition_csv_header(): == "Stanley Cups Unnamed: 1 Unnamed: 2 " + EXPECTED_TEXT_XLSX ) assert "" in elements[0].metadata.text_as_html + + +def test_partition_csv_detects_the_right_csv_delimiter(): + # -- Issue #2643: previously raised `_csv.Error: Could not determine delimiter` on this file -- + assert get_delimiter("example-docs/csv-with-long-lines.csv") == "," diff --git a/unstructured/__version__.py b/unstructured/__version__.py index bdb1cf99ab..e6c019fbac 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev3" # pragma: no cover +__version__ = "0.13.8-dev4" # pragma: no cover diff --git a/unstructured/partition/csv.py b/unstructured/partition/csv.py index bb47428c06..78ed29ea5b 100644 --- a/unstructured/partition/csv.py +++ b/unstructured/partition/csv.py @@ -111,19 +111,23 @@ def partition_csv( return list(elements) -def get_delimiter(file_path=None, file=None): - """ - Use the standard csv sniffer to determine the delimiter. - Read just a small portion in case the file is large. +def get_delimiter(file_path: str | None = None, file: IO[bytes] | None = None): + """Use the standard csv sniffer to determine the delimiter. + + Reads just a small portion in case the file is large. """ sniffer = csv.Sniffer() + num_bytes = 65536 - num_bytes = 8192 + # -- read whole lines, sniffer can be confused by a trailing partial line -- if file: - data = file.read(num_bytes).decode("utf-8") + lines = file.readlines(num_bytes) file.seek(0) - else: + data = "\n".join(ln.decode("utf-8") for ln in lines) + elif file_path is not None: with open(file_path) as f: - data = f.read(num_bytes) + data = "\n".join(f.readlines(num_bytes)) + else: + raise ValueError("either `file_path` or `file` argument must be provided") - return sniffer.sniff(data, delimiters=[",", ";"]).delimiter + return sniffer.sniff(data, delimiters=",;").delimiter From 45d7bcb3996f5bf9ac97f94c8dbfdff07e6ce468 Mon Sep 17 00:00:00 2001 From: John <43506685+Coniferish@users.noreply.github.com> Date: Mon, 13 May 2024 08:56:55 -0500 Subject: [PATCH 05/10] get params with defaults (#3004) Extract repeated logic into `get_call_args_with_defaults` function --- CHANGELOG.md | 2 +- unstructured/__version__.py | 2 +- unstructured/chunking/dispatch.py | 13 ++----------- unstructured/documents/elements.py | 13 ++++--------- unstructured/file_utils/filetype.py | 27 ++++++++------------------- unstructured/utils.py | 17 ++++++++++++++++- 6 files changed, 32 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8baf4122b4..40d0576bb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.8-dev4 +## 0.13.8-dev5 ### Enhancements diff --git a/unstructured/__version__.py b/unstructured/__version__.py index e6c019fbac..1e8fd23481 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev4" # pragma: no cover +__version__ = "0.13.8-dev5" # pragma: no cover diff --git a/unstructured/chunking/dispatch.py b/unstructured/chunking/dispatch.py index 69a416bdf3..a229d59432 100644 --- a/unstructured/chunking/dispatch.py +++ b/unstructured/chunking/dispatch.py @@ -16,7 +16,7 @@ from unstructured.chunking.basic import chunk_elements from unstructured.chunking.title import chunk_by_title from unstructured.documents.elements import Element -from unstructured.utils import lazyproperty +from unstructured.utils import get_call_args_applying_defaults, lazyproperty _P = ParamSpec("_P") @@ -70,20 +70,11 @@ def add_chunking_strategy(func: Callable[_P, list[Element]]) -> Callable[_P, lis def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]: """The decorated function is replaced with this one.""" - def get_call_args_applying_defaults() -> dict[str, Any]: - """Map both explicit and default arguments of decorated func call by param name.""" - sig = inspect.signature(func) - call_args: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs) - for param in sig.parameters.values(): - if param.name not in call_args and param.default is not param.empty: - call_args[param.name] = param.default - return call_args - # -- call the partitioning function to get the elements -- elements = func(*args, **kwargs) # -- look for a chunking-strategy argument -- - call_args = get_call_args_applying_defaults() + call_args = get_call_args_applying_defaults(func, *args, **kwargs) chunking_strategy = call_args.pop("chunking_strategy", None) # -- no chunking-strategy means no chunking -- diff --git a/unstructured/documents/elements.py b/unstructured/documents/elements.py index 976ab1271b..b2cc258219 100644 --- a/unstructured/documents/elements.py +++ b/unstructured/documents/elements.py @@ -6,7 +6,6 @@ import enum import functools import hashlib -import inspect import os import pathlib import re @@ -23,7 +22,7 @@ RelativeCoordinateSystem, ) from unstructured.partition.utils.constants import UNSTRUCTURED_INCLUDE_DEBUG_METADATA -from unstructured.utils import lazyproperty +from unstructured.utils import get_call_args_applying_defaults, lazyproperty Point: TypeAlias = "tuple[float, float]" Points: TypeAlias = "tuple[Point, ...]" @@ -568,20 +567,16 @@ def decorator(func: Callable[_P, list[Element]]) -> Callable[_P, list[Element]]: @functools.wraps(func) def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]: elements = func(*args, **kwargs) - sig = inspect.signature(func) - params: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs) - for param in sig.parameters.values(): - if param.name not in params and param.default is not param.empty: - params[param.name] = param.default + call_args = get_call_args_applying_defaults(func, *args, **kwargs) - regex_metadata: dict["str", "str"] = params.get("regex_metadata", {}) + regex_metadata: dict["str", "str"] = call_args.get("regex_metadata", {}) # -- don't write an empty `{}` to metadata.regex_metadata when no regex-metadata was # -- requested, otherwise it will serialize (because it's not None) when it has no # -- meaning or is even misleading. Also it complicates tests that don't use regex-meta. if regex_metadata: elements = _add_regex_metadata(elements, regex_metadata) - unique_element_ids: bool = params.get("unique_element_ids", False) + unique_element_ids: bool = call_args.get("unique_element_ids", False) if unique_element_ids is False: elements = assign_and_map_hash_ids(elements) diff --git a/unstructured/file_utils/filetype.py b/unstructured/file_utils/filetype.py index 244323d35e..be6efd451b 100644 --- a/unstructured/file_utils/filetype.py +++ b/unstructured/file_utils/filetype.py @@ -2,12 +2,11 @@ import enum import functools -import inspect import json import os import re import zipfile -from typing import IO, Any, Callable, Dict, List, Optional +from typing import IO, Callable, List, Optional from typing_extensions import ParamSpec @@ -20,6 +19,7 @@ remove_element_metadata, set_element_hierarchy, ) +from unstructured.utils import get_call_args_applying_defaults try: import magic @@ -580,18 +580,14 @@ def add_metadata(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element @functools.wraps(func) def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]: elements = func(*args, **kwargs) - sig = inspect.signature(func) - params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs) - for param in sig.parameters.values(): - if param.name not in params and param.default is not param.empty: - params[param.name] = param.default - include_metadata = params.get("include_metadata", True) + call_args = get_call_args_applying_defaults(func, *args, **kwargs) + include_metadata = call_args.get("include_metadata", True) if include_metadata: - if params.get("metadata_filename"): - params["filename"] = params.get("metadata_filename") + if call_args.get("metadata_filename"): + call_args["filename"] = call_args.get("metadata_filename") metadata_kwargs = { - kwarg: params.get(kwarg) for kwarg in ("filename", "url", "text_as_html") + kwarg: call_args.get(kwarg) for kwarg in ("filename", "url", "text_as_html") } # NOTE (yao): do not use cast here as cast(None) still is None if not str(kwargs.get("model_name", "")).startswith("chipper"): @@ -620,16 +616,9 @@ def decorator(func: Callable[_P, List[Element]]) -> Callable[_P, List[Element]]: @functools.wraps(func) def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]: elements = func(*args, **kwargs) - sig = inspect.signature(func) - params: Dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs) - for param in sig.parameters.values(): - if param.name not in params and param.default is not param.empty: - params[param.name] = param.default + params = get_call_args_applying_defaults(func, *args, **kwargs) include_metadata = params.get("include_metadata", True) if include_metadata: - if params.get("metadata_filename"): - params["filename"] = params.get("metadata_filename") - for element in elements: # NOTE(robinson) - Attached files have already run through this logic # in their own partitioning function diff --git a/unstructured/utils.py b/unstructured/utils.py index 8ebeb2bca5..84f1c52100 100644 --- a/unstructured/utils.py +++ b/unstructured/utils.py @@ -3,6 +3,7 @@ import functools import html import importlib +import inspect import json import os import platform @@ -33,7 +34,7 @@ from unstructured.__version__ import __version__ if TYPE_CHECKING: - from unstructured.documents.elements import Text + from unstructured.documents.elements import Element, Text # Box format: [x_bottom_left, y_bottom_left, x_top_right, y_top_right] Box: TypeAlias = Tuple[float, float, float, float] @@ -46,6 +47,20 @@ _P = ParamSpec("_P") +def get_call_args_applying_defaults( + func: Callable[_P, List[Element]], + *args: _P.args, + **kwargs: _P.kwargs, +) -> dict[str, Any]: + """Map both explicit and default arguments of decorated func call by param name.""" + sig = inspect.signature(func) + call_args: dict[str, Any] = dict(**dict(zip(sig.parameters, args)), **kwargs) + for arg in sig.parameters.values(): + if arg.name not in call_args and arg.default is not arg.empty: + call_args[arg.name] = arg.default + return call_args + + def htmlify_matrix_of_cell_texts(matrix: Sequence[Sequence[str]]) -> str: """Form an HTML table from "rows" and "columns" of `matrix`. From f4b01a4aad4a37ff275ea319b83f4cf31df348b2 Mon Sep 17 00:00:00 2001 From: Matt Robinson Date: Mon, 13 May 2024 11:30:09 -0400 Subject: [PATCH 06/10] build(deps): bump versions for security hygiene (#3008) ### Summary Version bumps to keep on top of security scans. --- docs/requirements.txt | 2 +- requirements/base.txt | 6 +++--- requirements/build.txt | 2 +- requirements/dev.txt | 2 +- requirements/extra-docx.txt | 2 +- requirements/extra-odt.txt | 2 +- requirements/extra-paddleocr.txt | 6 +++--- requirements/extra-pdf-image.txt | 6 +++--- requirements/extra-pptx.txt | 2 +- requirements/huggingface.txt | 2 +- requirements/ingest/azure.txt | 2 +- requirements/ingest/delta-table.txt | 2 +- requirements/ingest/embed-aws-bedrock.txt | 6 +++--- requirements/ingest/embed-huggingface.txt | 8 ++++---- requirements/ingest/embed-octoai.txt | 4 ++-- requirements/ingest/embed-openai.txt | 10 +++++----- requirements/ingest/embed-vertexai.txt | 14 ++++++-------- requirements/ingest/google-drive.txt | 2 +- requirements/ingest/salesforce.txt | 2 +- requirements/ingest/weaviate.txt | 2 +- requirements/test.txt | 6 +++--- 21 files changed, 44 insertions(+), 46 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 51ada53f22..43b2b2232a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -48,7 +48,7 @@ markdown-it-py==3.0.0 # myst-parser markupsafe==2.1.5 # via jinja2 -mdit-py-plugins==0.4.0 +mdit-py-plugins==0.4.1 # via myst-parser mdurl==0.1.2 # via markdown-it-py diff --git a/requirements/base.txt b/requirements/base.txt index 8fa2c493b0..98e2c29f1d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -21,7 +21,7 @@ charset-normalizer==3.3.2 # unstructured-client click==8.1.7 # via nltk -dataclasses-json==0.6.5 +dataclasses-json==0.6.6 # via -r ./base.in dataclasses-json-speakeasy==0.5.11 # via unstructured-client @@ -39,7 +39,7 @@ jsonpath-python==1.0.6 # via unstructured-client langdetect==1.0.9 # via -r ./base.in -lxml==5.2.1 +lxml==5.2.2 # via -r ./base.in marshmallow==3.21.2 # via @@ -67,7 +67,7 @@ python-magic==0.4.27 # via -r ./base.in rapidfuzz==3.9.0 # via -r ./base.in -regex==2024.4.28 +regex==2024.5.10 # via nltk requests==2.31.0 # via diff --git a/requirements/build.txt b/requirements/build.txt index 51ada53f22..43b2b2232a 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -48,7 +48,7 @@ markdown-it-py==3.0.0 # myst-parser markupsafe==2.1.5 # via jinja2 -mdit-py-plugins==0.4.0 +mdit-py-plugins==0.4.1 # via myst-parser mdurl==0.1.2 # via markdown-it-py diff --git a/requirements/dev.txt b/requirements/dev.txt index 8def7400cb..c9bee6f6ee 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -253,7 +253,7 @@ platformdirs==3.10.0 # -c ./test.txt # jupyter-core # virtualenv -pre-commit==3.7.0 +pre-commit==3.7.1 # via -r ./dev.in prometheus-client==0.20.0 # via jupyter-server diff --git a/requirements/extra-docx.txt b/requirements/extra-docx.txt index 651e20874a..6b1f08bec9 100644 --- a/requirements/extra-docx.txt +++ b/requirements/extra-docx.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-docx.in # -lxml==5.2.1 +lxml==5.2.2 # via # -c ./base.txt # python-docx diff --git a/requirements/extra-odt.txt b/requirements/extra-odt.txt index 3240561f00..913651f5e6 100644 --- a/requirements/extra-odt.txt +++ b/requirements/extra-odt.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-odt.in # -lxml==5.2.1 +lxml==5.2.2 # via # -c ./base.txt # python-docx diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index 153b30a366..8149077ded 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -8,7 +8,7 @@ attrdict==2.0.1 # via unstructured-paddleocr babel==2.15.0 # via flask-babel -bce-python-sdk==0.9.7 +bce-python-sdk==0.9.9 # via visualdl blinker==1.8.2 # via flask @@ -77,7 +77,7 @@ lazy-loader==0.4 # via scikit-image lmdb==1.4.1 # via unstructured-paddleocr -lxml==5.2.1 +lxml==5.2.2 # via # -c ./base.txt # premailer @@ -199,7 +199,7 @@ six==1.16.0 # imgaug # python-dateutil # visualdl -tifffile==2024.5.3 +tifffile==2024.5.10 # via scikit-image tqdm==4.66.4 # via diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index b2ce938e86..58a58d11dd 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -85,7 +85,7 @@ kiwisolver==1.4.5 # via matplotlib layoutparser[layoutmodels,tesseract]==0.3.4 # via unstructured-inference -lxml==5.2.1 +lxml==5.2.2 # via # -c ./base.txt # pikepdf @@ -198,7 +198,7 @@ pyparsing==3.0.9 # matplotlib pypdf==4.2.0 # via -r ./extra-pdf-image.in -pypdfium2==4.29.0 +pypdfium2==4.30.0 # via pdfplumber pytesseract==0.3.10 # via layoutparser @@ -222,7 +222,7 @@ rapidfuzz==3.9.0 # via # -c ./base.txt # unstructured-inference -regex==2024.4.28 +regex==2024.5.10 # via # -c ./base.txt # transformers diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt index 2657ec992c..f532344908 100644 --- a/requirements/extra-pptx.txt +++ b/requirements/extra-pptx.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-pptx.in # -lxml==5.2.1 +lxml==5.2.2 # via python-pptx pillow==10.3.0 # via python-pptx diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index a425660958..e2865dd155 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -64,7 +64,7 @@ pyyaml==6.0.1 # via # huggingface-hub # transformers -regex==2024.4.28 +regex==2024.5.10 # via # -c ./base.txt # sacremoses diff --git a/requirements/ingest/azure.txt b/requirements/ingest/azure.txt index d7523906bd..8a855d7438 100644 --- a/requirements/ingest/azure.txt +++ b/requirements/ingest/azure.txt @@ -23,7 +23,7 @@ azure-datalake-store==0.0.53 # via adlfs azure-identity==1.16.0 # via adlfs -azure-storage-blob==12.19.1 +azure-storage-blob==12.20.0 # via adlfs certifi==2024.2.2 # via diff --git a/requirements/ingest/delta-table.txt b/requirements/ingest/delta-table.txt index 434d1a3b98..9c5e3d2fde 100644 --- a/requirements/ingest/delta-table.txt +++ b/requirements/ingest/delta-table.txt @@ -4,7 +4,7 @@ # # pip-compile ./ingest/delta-table.in # -deltalake==0.17.3 +deltalake==0.17.4 # via -r ./ingest/delta-table.in fsspec==2024.3.1 # via -r ./ingest/delta-table.in diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt index ed497d1168..bef1c51f31 100644 --- a/requirements/ingest/embed-aws-bedrock.txt +++ b/requirements/ingest/embed-aws-bedrock.txt @@ -30,7 +30,7 @@ charset-normalizer==3.3.2 # via # -c ./ingest/../base.txt # requests -dataclasses-json==0.6.5 +dataclasses-json==0.6.6 # via # -c ./ingest/../base.txt # langchain-community @@ -51,11 +51,11 @@ jsonpatch==1.33 # via langchain-core jsonpointer==2.4 # via jsonpatch -langchain-community==0.0.37 +langchain-community==0.0.38 # via -r ./ingest/embed-aws-bedrock.in langchain-core==0.1.52 # via langchain-community -langsmith==0.1.54 +langsmith==0.1.57 # via # langchain-community # langchain-core diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt index 8732773aff..d176e4bbd7 100644 --- a/requirements/ingest/embed-huggingface.txt +++ b/requirements/ingest/embed-huggingface.txt @@ -23,7 +23,7 @@ charset-normalizer==3.3.2 # via # -c ./ingest/../base.txt # requests -dataclasses-json==0.6.5 +dataclasses-json==0.6.6 # via # -c ./ingest/../base.txt # langchain-community @@ -62,11 +62,11 @@ jsonpatch==1.33 # via langchain-core jsonpointer==2.4 # via jsonpatch -langchain-community==0.0.37 +langchain-community==0.0.38 # via -r ./ingest/embed-huggingface.in langchain-core==0.1.52 # via langchain-community -langsmith==0.1.54 +langsmith==0.1.57 # via # langchain-community # langchain-core @@ -120,7 +120,7 @@ pyyaml==6.0.1 # langchain-community # langchain-core # transformers -regex==2024.4.28 +regex==2024.5.10 # via # -c ./ingest/../base.txt # transformers diff --git a/requirements/ingest/embed-octoai.txt b/requirements/ingest/embed-octoai.txt index 65866ca487..4ed74d29c2 100644 --- a/requirements/ingest/embed-octoai.txt +++ b/requirements/ingest/embed-octoai.txt @@ -38,13 +38,13 @@ idna==3.7 # anyio # httpx # requests -openai==1.26.0 +openai==1.28.1 # via -r ./ingest/embed-octoai.in pydantic==2.7.1 # via openai pydantic-core==2.18.2 # via pydantic -regex==2024.4.28 +regex==2024.5.10 # via # -c ./ingest/../base.txt # tiktoken diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt index 8684d9beb4..c2d9488e80 100644 --- a/requirements/ingest/embed-openai.txt +++ b/requirements/ingest/embed-openai.txt @@ -30,7 +30,7 @@ charset-normalizer==3.3.2 # via # -c ./ingest/../base.txt # requests -dataclasses-json==0.6.5 +dataclasses-json==0.6.6 # via # -c ./ingest/../base.txt # langchain-community @@ -59,11 +59,11 @@ jsonpatch==1.33 # via langchain-core jsonpointer==2.4 # via jsonpatch -langchain-community==0.0.37 +langchain-community==0.0.38 # via -r ./ingest/embed-openai.in langchain-core==0.1.52 # via langchain-community -langsmith==0.1.54 +langsmith==0.1.57 # via # langchain-community # langchain-core @@ -83,7 +83,7 @@ numpy==1.26.4 # via # -c ./ingest/../base.txt # langchain-community -openai==1.26.0 +openai==1.28.1 # via -r ./ingest/embed-openai.in orjson==3.10.3 # via langsmith @@ -104,7 +104,7 @@ pyyaml==6.0.1 # via # langchain-community # langchain-core -regex==2024.4.28 +regex==2024.5.10 # via # -c ./ingest/../base.txt # tiktoken diff --git a/requirements/ingest/embed-vertexai.txt b/requirements/ingest/embed-vertexai.txt index 8197b28ca1..1d2be4dabf 100644 --- a/requirements/ingest/embed-vertexai.txt +++ b/requirements/ingest/embed-vertexai.txt @@ -29,7 +29,7 @@ charset-normalizer==3.3.2 # via # -c ./ingest/../base.txt # requests -dataclasses-json==0.6.5 +dataclasses-json==0.6.6 # via # -c ./ingest/../base.txt # langchain @@ -55,7 +55,7 @@ google-auth==2.29.0 # google-cloud-core # google-cloud-resource-manager # google-cloud-storage -google-cloud-aiplatform==1.50.0 +google-cloud-aiplatform==1.51.0 # via langchain-google-vertexai google-cloud-bigquery==3.22.0 # via google-cloud-aiplatform @@ -98,14 +98,12 @@ idna==3.7 # requests # yarl jsonpatch==1.33 - # via - # langchain - # langchain-core + # via langchain-core jsonpointer==2.4 # via jsonpatch -langchain==0.1.17 +langchain==0.1.20 # via -r ./ingest/embed-vertexai.in -langchain-community==0.0.37 +langchain-community==0.0.38 # via # -r ./ingest/embed-vertexai.in # langchain @@ -119,7 +117,7 @@ langchain-google-vertexai==1.0.3 # via -r ./ingest/embed-vertexai.in langchain-text-splitters==0.0.1 # via langchain -langsmith==0.1.54 +langsmith==0.1.57 # via # langchain # langchain-community diff --git a/requirements/ingest/google-drive.txt b/requirements/ingest/google-drive.txt index 6c600c3c31..c0bc61e783 100644 --- a/requirements/ingest/google-drive.txt +++ b/requirements/ingest/google-drive.txt @@ -17,7 +17,7 @@ charset-normalizer==3.3.2 # requests google-api-core==2.19.0 # via google-api-python-client -google-api-python-client==2.128.0 +google-api-python-client==2.129.0 # via -r ./ingest/google-drive.in google-auth==2.29.0 # via diff --git a/requirements/ingest/salesforce.txt b/requirements/ingest/salesforce.txt index 2ce352a8c9..66881b4c3a 100644 --- a/requirements/ingest/salesforce.txt +++ b/requirements/ingest/salesforce.txt @@ -25,7 +25,7 @@ idna==3.7 # requests isodate==0.6.1 # via zeep -lxml==5.2.1 +lxml==5.2.2 # via # -c ./ingest/../base.txt # zeep diff --git a/requirements/ingest/weaviate.txt b/requirements/ingest/weaviate.txt index 7c72e31f37..18209177d3 100644 --- a/requirements/ingest/weaviate.txt +++ b/requirements/ingest/weaviate.txt @@ -81,7 +81,7 @@ urllib3==1.26.18 # requests validators==0.28.1 # via weaviate-client -weaviate-client==4.5.7 +weaviate-client==4.6.0 # via # -c ./ingest/../deps/constraints.txt # -r ./ingest/weaviate.in diff --git a/requirements/test.txt b/requirements/test.txt index 0f1fa949cd..f8e829eb59 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -37,7 +37,7 @@ flake8==7.0.0 # flake8-print flake8-print==5.0.0 # via -r ./test.in -freezegun==1.5.0 +freezegun==1.5.1 # via -r ./test.in grpcio==1.63.0 # via -r ./test.in @@ -52,7 +52,7 @@ label-studio-sdk==0.0.32 # via -r ./test.in label-studio-tools==0.0.4 # via label-studio-sdk -lxml==5.2.1 +lxml==5.2.2 # via # -c ./base.txt # label-studio-sdk @@ -114,7 +114,7 @@ requests==2.31.0 # via # -c ./base.txt # label-studio-sdk -ruff==0.4.3 +ruff==0.4.4 # via -r ./test.in six==1.16.0 # via From 3f8e6b79c5ee24d9b2ddf2622ff24ed210b74d5d Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Mon, 13 May 2024 15:05:12 -0700 Subject: [PATCH 07/10] rfctr(docx): move docx unit tests to bottom (#3011) No code changes, strictly this single block move. Move `Describe_DocxPartitioner` unit-test class to bottom so `DescribeDocxPartitionerOptions` unit-test to follow in subsequent commit will be together with it. Integration tests first, then unit tests, for consistency with other test modules e.g. test_pptx. I added `Describe_DocxPartitioner` soon after I arrived, before we adopted the convention of placing unit-tests after integration tests. Move this so we can maintain that consistency with the block of tests to follow in a closely subsequent PR. --- test_unstructured/partition/docx/test_docx.py | 771 +++++++++--------- 1 file changed, 389 insertions(+), 382 deletions(-) diff --git a/test_unstructured/partition/docx/test_docx.py b/test_unstructured/partition/docx/test_docx.py index 52ed3361e8..f1391f8c6f 100644 --- a/test_unstructured/partition/docx/test_docx.py +++ b/test_unstructured/partition/docx/test_docx.py @@ -29,322 +29,6 @@ from unstructured.partition.docx import _DocxPartitioner, partition_docx from unstructured.partition.utils.constants import UNSTRUCTURED_INCLUDE_DEBUG_METADATA - -class Describe_DocxPartitioner: - """Unit-test suite for `unstructured.partition.docx._DocxPartitioner`.""" - - # -- table behaviors ------------------------------------------------------------------------- - - def it_can_convert_a_table_to_html(self): - table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] - assert _DocxPartitioner()._convert_table_to_html(table) == ( - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "
Header Col 1 Header Col 2
Lorem ipsum A link example
" - ) - - def and_it_can_convert_a_nested_table_to_html(self): - """ - Fixture table is: - - +---+-------------+---+ - | a | >b< | c | - +---+-------------+---+ - | | +-----+---+ | | - | | | e | f | | | - | d | +-----+---+ | i | - | | | g&t | h | | | - | | +-----+---+ | | - +---+-------------+---+ - | j | k | l | - +---+-------------+---+ - """ - table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] - - # -- re.sub() strips out the extra padding inserted by tabulate -- - html = re.sub(r" +<", "<", _DocxPartitioner()._convert_table_to_html(table)) - - expected_lines = [ - "", - "", - "", - "", - "", - "", - "", - "", - "
a>b<c
d", - "", - "", - "", - "", - "
ef
g&th
i
jkl
", - ] - actual_lines = html.splitlines() - for expected, actual in zip(expected_lines, actual_lines): - assert actual == expected, f"\nexpected: {repr(expected)}\nactual: {repr(actual)}" - - def it_can_convert_a_table_to_plain_text(self): - table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( - "Header Col 1 Header Col 2 Lorem ipsum A link example" - ) - - def and_it_can_convert_a_nested_table_to_plain_text(self): - """ - Fixture table is: - - +---+-------------+---+ - | a | >b< | c | - +---+-------------+---+ - | | +-----+---+ | | - | | | e | f | | | - | d | +-----+---+ | i | - | | | g&t | h | | | - | | +-----+---+ | | - +---+-------------+---+ - | j | k | l | - +---+-------------+---+ - """ - table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( - "a >b< c d e f g&t h i j k l" - ) - - def but_the_text_of_a_merged_cell_appears_only_once(self): - """ - Fixture table is: - - +---+-------+ - | a | b | - | +---+---+ - | | c | d | - +---+---+ | - | e | | - +-------+---+ - """ - table = docx.Document(example_doc_path("docx-tables.docx")).tables[2] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == "a b c d e" - - def it_can_partition_tables_with_incomplete_rows(self): - """DOCX permits table rows to start late and end early. - - It is relatively rare in the wild, but DOCX tables are unique (as far as I know) in that - they allow rows to start late, like in column 3, and end early, like the last cell is in - column 5 of a 7 column table. - - A practical example might look like this: - - +------+------+ - | East | West | - +----------+------+------+ - | Started | 25 | 32 | - +----------+------+------+ - | Finished | 17 | 21 | - +----------+------+------+ - """ - elements = iter(partition_docx(example_doc_path("tables-with-incomplete-rows.docx"))) - - e = next(elements) - assert e.text.startswith("Example of DOCX table ") - # -- - # ┌───┬───┐ - # │ a │ b │ - # ├───┼───┤ - # │ c │ d │ - # └───┴───┘ - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "a b c d" - assert e.metadata.text_as_html == ( - "\n" - "\n\n\n" - "\n\n\n" - "
a b
c d
" - ) - # -- - # ┌───┐ - # │ a │ - # ├───┼───┐ - # │ b │ c │ - # └───┴───┘ - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "a b c", f"actual {e.text=}" - assert e.metadata.text_as_html == ( - "\n" - "\n\n\n" - "\n\n\n" - "
a
b c
" - ), f"actual {e.metadata.text_as_html=}" - # -- - # ┌───────┐ - # │ a │ - # ├───┬───┼───┐ - # │ b │ c │ d │ - # └───┴───┴───┘ - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "a b c d", f"actual {e.text=}" - assert e.metadata.text_as_html == ( - "\n" - "\n\n\n" - "\n\n\n" - "
a a
b c d
" - ), f"actual {e.metadata.text_as_html=}" - # -- - # ┌───┬───┐ - # │ │ b │ - # │ a ├───┼───┐ - # │ │ c │ d │ - # └───┴───┴───┘ - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "a b c d", f"actual {e.text=}" - assert e.metadata.text_as_html == ( - "\n" - "\n\n\n" - "\n\n\n" - "
a b
a c d
" - ), f"actual {e.metadata.text_as_html=}" - # -- late-start, early-end, and >2 rows vertical span -- - # ┌───────┬───┬───┐ - # │ a │ b │ c │ - # └───┬───┴───┼───┘ - # │ d │ - # ┌───┤ ├───┐ - # │ e │ │ f │ - # └───┤ ├───┘ - # │ │ - # └───────┘ - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "a b c d e f", f"actual {e.text=}" - assert e.metadata.text_as_html == ( - "\n" - "\n" - "\n" - "\n\n" - "\n" - "\n" - "\n" - "\n" - "
a a b c
d d
e d d f
d d
" - ), f"actual {e.metadata.text_as_html=}" - # -- - # -- The table from the specimen file we received with the bug report. -- - e = next(elements) - assert type(e).__name__ == "Table" - assert e.text == "Data More Dato WTF? Strange Format", f"actual {e.text=}" - assert e.metadata.text_as_html == ( - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "
Data Data
Data Data
Data Data
More
Dato
WTF? WTF?
StrangeStrange
Format Format
" - ), f"actual {e.metadata.text_as_html=}" - - # -- page-break behaviors -------------------------------------------------------------------- - - def it_places_page_breaks_precisely_where_they_occur(self): - """Page-break behavior has some subtleties. - - * A hard page-break does not generate a PageBreak element (because that would double-count - it). Word inserts a rendered page-break for the hard break at the effective location. - * A (rendered) page-break mid-paragraph produces two elements, like `Text, PageBreak, Text`, - so each Text (subclass) element gets the right page-number. - * A rendered page-break mid-hyperlink produces two text elements, but the hyperlink itself - is not split; the entire hyperlink goes on the page where the hyperlink starts, even - though some of its text appears on the following page. The rest of the paragraph, after - the hyperlink, appears on the following page. - * Odd and even-page section starts can lead to two page-breaks, like an odd-page section - start could go from page 3 to page 5 because 5 is the next odd page. - """ - - def str_repr(e: Element) -> str: - """A more detailed `repr()` to aid debugging when assertion fails.""" - return f"{e.__class__.__name__}('{e}')" - - expected = [ - # NOTE(scanny) - -- page 1 -- - NarrativeText( - "First page, tab here:\t" - "followed by line-break here:\n" - "here:\n" - "and here:\n" - "no-break hyphen here:-" - "and hard page-break here>>" - ), - PageBreak(""), - # NOTE(scanny) - -- page 2 -- - NarrativeText( - "<> <>"), - NarrativeText("<>"), - PageBreak(""), - # NOTE(scanny) - -- page 4 -- - PageBreak(""), - # NOTE(scanny) - -- page 5 -- - NarrativeText("<> ' - ), - PageBreak(""), - # NOTE(scanny) - -- page 6 -- - Title("< str: return str(pathlib.Path(__file__).parent.parent.parent.parent / "example-docs" / filename) -@pytest.fixture() -def expected_elements() -> List[Text]: - return [ - Title("These are a few of my favorite things:"), - ListItem("Parrots"), - ListItem("Hockey"), - Title("Analysis"), - NarrativeText("This is my first thought. This is my second thought."), - NarrativeText("This is my third thought."), - Text("2023"), - Address("DOYLESTOWN, PA 18901"), - ] +@pytest.fixture() +def expected_elements() -> List[Text]: + return [ + Title("These are a few of my favorite things:"), + ListItem("Parrots"), + ListItem("Hockey"), + Title("Analysis"), + NarrativeText("This is my first thought. This is my second thought."), + NarrativeText("This is my third thought."), + Text("2023"), + Address("DOYLESTOWN, PA 18901"), + ] + + +@pytest.fixture() +def expected_emphasized_text_contents() -> List[str]: + return ["bold", "italic", "bold-italic", "bold-italic"] + + +@pytest.fixture() +def expected_emphasized_text_tags() -> List[str]: + return ["b", "i", "b", "i"] + + +@pytest.fixture() +def expected_emphasized_texts(): + return [ + {"text": "bold", "tag": "b"}, + {"text": "italic", "tag": "i"}, + {"text": "bold-italic", "tag": "b"}, + {"text": "bold-italic", "tag": "i"}, + ] + + +@pytest.fixture() +def mock_document(): + document = docx.Document() + + document.add_paragraph("These are a few of my favorite things:", style="Heading 1") + # NOTE(robinson) - this should get picked up as a list item due to the • + document.add_paragraph("• Parrots", style="Normal") + # NOTE(robinson) - this should get dropped because it's empty + document.add_paragraph("• ", style="Normal") + document.add_paragraph("Hockey", style="List Bullet") + # NOTE(robinson) - this should get dropped because it's empty + document.add_paragraph("", style="List Bullet") + # NOTE(robinson) - this should get picked up as a title + document.add_paragraph("Analysis", style="Normal") + # NOTE(robinson) - this should get dropped because it is empty + document.add_paragraph("", style="Normal") + # NOTE(robinson) - this should get picked up as a narrative text + document.add_paragraph("This is my first thought. This is my second thought.", style="Normal") + document.add_paragraph("This is my third thought.", style="Body Text") + # NOTE(robinson) - this should just be regular text + document.add_paragraph("2023") + # NOTE(robinson) - this should be an address + document.add_paragraph("DOYLESTOWN, PA 18901") + + return document + + +@pytest.fixture() +def mock_document_file_path(mock_document: Document, tmp_path: pathlib.Path) -> str: + filename = str(tmp_path / "mock_document.docx") + mock_document.save(filename) + return filename + + +def test_ids_are_unique_and_deterministic(): + elements = partition_docx("example-docs/duplicate-paragraphs.docx") + + ids = [e.id for e in elements] + assert ids == [ + "2f22d82eea1faf5f40dac60cef52700e", + "ca9e1f448e531a5152d960e14eefc360", + "9ddeacb172ac17fb45e6f3f15f3c703d", + "a4fd85d3f4141acae38c8f9c936ed2f3", + "44ebaaf66640719c918246d4ccba1c45", + "f36e8ebcb3b6a051940a168fe73cbc44", + "532b395177652c7d61e1e4d855f1dc1d", + ], "IDs are not deterministic" + + +# ================================================================================================ +# ISOLATED UNIT TESTS +# ================================================================================================ +# These test components used by `partition_docx()` in isolation such that all edge cases can be +# exercised. +# ================================================================================================ + + +class Describe_DocxPartitioner: + """Unit-test suite for `unstructured.partition.docx._DocxPartitioner`.""" + + # -- table behaviors ------------------------------------------------------------------------- + + def it_can_convert_a_table_to_html(self): + table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] + assert _DocxPartitioner()._convert_table_to_html(table) == ( + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "
Header Col 1 Header Col 2
Lorem ipsum A link example
" + ) + + def and_it_can_convert_a_nested_table_to_html(self): + """ + Fixture table is: + + +---+-------------+---+ + | a | >b< | c | + +---+-------------+---+ + | | +-----+---+ | | + | | | e | f | | | + | d | +-----+---+ | i | + | | | g&t | h | | | + | | +-----+---+ | | + +---+-------------+---+ + | j | k | l | + +---+-------------+---+ + """ + table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] + + # -- re.sub() strips out the extra padding inserted by tabulate -- + html = re.sub(r" +<", "<", _DocxPartitioner()._convert_table_to_html(table)) + + expected_lines = [ + "", + "", + "", + "", + "", + "", + "", + "", + "
a>b<c
d", + "", + "", + "", + "", + "
ef
g&th
i
jkl
", + ] + actual_lines = html.splitlines() + for expected, actual in zip(expected_lines, actual_lines): + assert actual == expected, f"\nexpected: {repr(expected)}\nactual: {repr(actual)}" + + def it_can_convert_a_table_to_plain_text(self): + table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] + assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( + "Header Col 1 Header Col 2 Lorem ipsum A link example" + ) + + def and_it_can_convert_a_nested_table_to_plain_text(self): + """ + Fixture table is: + + +---+-------------+---+ + | a | >b< | c | + +---+-------------+---+ + | | +-----+---+ | | + | | | e | f | | | + | d | +-----+---+ | i | + | | | g&t | h | | | + | | +-----+---+ | | + +---+-------------+---+ + | j | k | l | + +---+-------------+---+ + """ + table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] + assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( + "a >b< c d e f g&t h i j k l" + ) + + def but_the_text_of_a_merged_cell_appears_only_once(self): + """ + Fixture table is: + + +---+-------+ + | a | b | + | +---+---+ + | | c | d | + +---+---+ | + | e | | + +-------+---+ + """ + table = docx.Document(example_doc_path("docx-tables.docx")).tables[2] + assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == "a b c d e" + + def it_can_partition_tables_with_incomplete_rows(self): + """DOCX permits table rows to start late and end early. + + It is relatively rare in the wild, but DOCX tables are unique (as far as I know) in that + they allow rows to start late, like in column 3, and end early, like the last cell is in + column 5 of a 7 column table. + + A practical example might look like this: + + +------+------+ + | East | West | + +----------+------+------+ + | Started | 25 | 32 | + +----------+------+------+ + | Finished | 17 | 21 | + +----------+------+------+ + """ + elements = iter(partition_docx(example_doc_path("tables-with-incomplete-rows.docx"))) + e = next(elements) + assert e.text.startswith("Example of DOCX table ") + # -- + # ┌───┬───┐ + # │ a │ b │ + # ├───┼───┤ + # │ c │ d │ + # └───┴───┘ + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "a b c d" + assert e.metadata.text_as_html == ( + "\n" + "\n\n\n" + "\n\n\n" + "
a b
c d
" + ) + # -- + # ┌───┐ + # │ a │ + # ├───┼───┐ + # │ b │ c │ + # └───┴───┘ + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "a b c", f"actual {e.text=}" + assert e.metadata.text_as_html == ( + "\n" + "\n\n\n" + "\n\n\n" + "
a
b c
" + ), f"actual {e.metadata.text_as_html=}" + # -- + # ┌───────┐ + # │ a │ + # ├───┬───┼───┐ + # │ b │ c │ d │ + # └───┴───┴───┘ + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "a b c d", f"actual {e.text=}" + assert e.metadata.text_as_html == ( + "\n" + "\n\n\n" + "\n\n\n" + "
a a
b c d
" + ), f"actual {e.metadata.text_as_html=}" + # -- + # ┌───┬───┐ + # │ │ b │ + # │ a ├───┼───┐ + # │ │ c │ d │ + # └───┴───┴───┘ + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "a b c d", f"actual {e.text=}" + assert e.metadata.text_as_html == ( + "\n" + "\n\n\n" + "\n\n\n" + "
a b
a c d
" + ), f"actual {e.metadata.text_as_html=}" + # -- late-start, early-end, and >2 rows vertical span -- + # ┌───────┬───┬───┐ + # │ a │ b │ c │ + # └───┬───┴───┼───┘ + # │ d │ + # ┌───┤ ├───┐ + # │ e │ │ f │ + # └───┤ ├───┘ + # │ │ + # └───────┘ + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "a b c d e f", f"actual {e.text=}" + assert e.metadata.text_as_html == ( + "\n" + "\n" + "\n" + "\n\n" + "\n" + "\n" + "\n" + "\n" + "
a a b c
d d
e d d f
d d
" + ), f"actual {e.metadata.text_as_html=}" + # -- + # -- The table from the specimen file we received with the bug report. -- + e = next(elements) + assert type(e).__name__ == "Table" + assert e.text == "Data More Dato WTF? Strange Format", f"actual {e.text=}" + assert e.metadata.text_as_html == ( + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "
Data Data
Data Data
Data Data
More
Dato
WTF? WTF?
StrangeStrange
Format Format
" + ), f"actual {e.metadata.text_as_html=}" -@pytest.fixture() -def expected_emphasized_text_contents() -> List[str]: - return ["bold", "italic", "bold-italic", "bold-italic"] + # -- page-break behaviors -------------------------------------------------------------------- + def it_places_page_breaks_precisely_where_they_occur(self): + """Page-break behavior has some subtleties. -@pytest.fixture() -def expected_emphasized_text_tags() -> List[str]: - return ["b", "i", "b", "i"] + * A hard page-break does not generate a PageBreak element (because that would double-count + it). Word inserts a rendered page-break for the hard break at the effective location. + * A (rendered) page-break mid-paragraph produces two elements, like `Text, PageBreak, Text`, + so each Text (subclass) element gets the right page-number. + * A rendered page-break mid-hyperlink produces two text elements, but the hyperlink itself + is not split; the entire hyperlink goes on the page where the hyperlink starts, even + though some of its text appears on the following page. The rest of the paragraph, after + the hyperlink, appears on the following page. + * Odd and even-page section starts can lead to two page-breaks, like an odd-page section + start could go from page 3 to page 5 because 5 is the next odd page. + """ + def str_repr(e: Element) -> str: + """A more detailed `repr()` to aid debugging when assertion fails.""" + return f"{e.__class__.__name__}('{e}')" -@pytest.fixture() -def expected_emphasized_texts(): - return [ - {"text": "bold", "tag": "b"}, - {"text": "italic", "tag": "i"}, - {"text": "bold-italic", "tag": "b"}, - {"text": "bold-italic", "tag": "i"}, - ] + expected = [ + # NOTE(scanny) - -- page 1 -- + NarrativeText( + "First page, tab here:\t" + "followed by line-break here:\n" + "here:\n" + "and here:\n" + "no-break hyphen here:-" + "and hard page-break here>>" + ), + PageBreak(""), + # NOTE(scanny) - -- page 2 -- + NarrativeText( + "<> <>"), + NarrativeText("<>"), + PageBreak(""), + # NOTE(scanny) - -- page 4 -- + PageBreak(""), + # NOTE(scanny) - -- page 5 -- + NarrativeText("<> ' + ), + PageBreak(""), + # NOTE(scanny) - -- page 6 -- + Title("< str: - filename = str(tmp_path / "mock_document.docx") - mock_document.save(filename) - return filename + element = next(header_iter) + assert element.text == "First header para\nTable cell1 Table cell2\nLast header para" + def it_includes_table_cell_text_in_Footer_text(self): + """This case also verifies nested-table and merged-cell behaviors.""" + partitioner = _DocxPartitioner(example_doc_path("docx-hdrftr.docx")) + section = partitioner._document.sections[0] -def test_ids_are_unique_and_deterministic(): - elements = partition_docx("example-docs/duplicate-paragraphs.docx") + footer_iter = partitioner._iter_section_footers(section) - ids = [e.id for e in elements] - assert ids == [ - "2f22d82eea1faf5f40dac60cef52700e", - "ca9e1f448e531a5152d960e14eefc360", - "9ddeacb172ac17fb45e6f3f15f3c703d", - "a4fd85d3f4141acae38c8f9c936ed2f3", - "44ebaaf66640719c918246d4ccba1c45", - "f36e8ebcb3b6a051940a168fe73cbc44", - "532b395177652c7d61e1e4d855f1dc1d", - ], "IDs are not deterministic" + element = next(footer_iter) + assert element.text == "para1\ncell1 a b c d e f\npara2" From b4a6009c09c1da8bd310c4245a305d0b6a885561 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 14 May 2024 12:32:17 -0700 Subject: [PATCH 08/10] rfctr(docx): improve typing etc. in prep for docx image extraction (#3015) **Summary** Noisy but trivial changes to `partition_docx()` environs and tests in preparation for DOCX image extraction. These changes are extracted here so they don't distract on the changes of substance to follow in the next PR. --- CHANGELOG.md | 2 +- test_unstructured/partition/docx/test_docx.py | 88 +++++++++---------- unstructured/__version__.py | 2 +- unstructured/partition/docx.py | 54 +++++------- 4 files changed, 64 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40d0576bb9..d504a2a1ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.8-dev5 +## 0.13.8-dev6 ### Enhancements diff --git a/test_unstructured/partition/docx/test_docx.py b/test_unstructured/partition/docx/test_docx.py index f1391f8c6f..59e90540a3 100644 --- a/test_unstructured/partition/docx/test_docx.py +++ b/test_unstructured/partition/docx/test_docx.py @@ -1,16 +1,19 @@ # pyright: reportPrivateUsage=false +"""Test suite for `unstructured.partition.docx` module.""" + +from __future__ import annotations + import pathlib import re -from tempfile import SpooledTemporaryFile -from typing import Dict, List +import tempfile import docx import pytest from docx.document import Document from pytest_mock import MockFixture -from test_unstructured.unit_utils import assert_round_trips_through_JSON +from test_unstructured.unit_utils import assert_round_trips_through_JSON, example_doc_path from unstructured.chunking.title import chunk_by_title from unstructured.documents.elements import ( Address, @@ -33,8 +36,7 @@ def test_partition_docx_from_filename( - mock_document_file_path: str, - expected_elements: List[Element], + mock_document_file_path: str, expected_elements: list[Element] ): elements = partition_docx(mock_document_file_path) @@ -52,7 +54,7 @@ def test_partition_docx_from_filename_with_metadata_filename(mock_document_file_ def test_partition_docx_with_spooled_file( - mock_document_file_path: str, expected_elements: List[Text] + mock_document_file_path: str, expected_elements: list[Text] ): """`partition_docx()` accepts a SpooledTemporaryFile as its `file` argument. @@ -60,7 +62,7 @@ def test_partition_docx_with_spooled_file( to ensure the source file is appropriately converted in this case. """ with open(mock_document_file_path, "rb") as test_file: - spooled_temp_file = SpooledTemporaryFile() + spooled_temp_file = tempfile.SpooledTemporaryFile() spooled_temp_file.write(test_file.read()) spooled_temp_file.seek(0) elements = partition_docx(file=spooled_temp_file) @@ -69,7 +71,7 @@ def test_partition_docx_with_spooled_file( assert element.metadata.filename is None -def test_partition_docx_from_file(mock_document_file_path: str, expected_elements: List[Text]): +def test_partition_docx_from_file(mock_document_file_path: str, expected_elements: list[Text]): with open(mock_document_file_path, "rb") as f: elements = partition_docx(file=f) assert elements == expected_elements @@ -78,7 +80,7 @@ def test_partition_docx_from_file(mock_document_file_path: str, expected_element def test_partition_docx_from_file_with_metadata_filename( - mock_document_file_path: str, expected_elements: List[Text] + mock_document_file_path: str, expected_elements: list[Text] ): with open(mock_document_file_path, "rb") as f: elements = partition_docx(file=f, metadata_filename="test") @@ -282,7 +284,7 @@ def test_partition_docx_from_file_metadata_date_with_custom_metadata(mocker: Moc def test_partition_docx_from_file_without_metadata_date(): """Test partition_docx() with file that are not possible to get last modified date""" with open(example_doc_path("fake.docx"), "rb") as f: - sf = SpooledTemporaryFile() + sf = tempfile.SpooledTemporaryFile() sf.write(f.read()) sf.seek(0) elements = partition_docx(file=sf, date_from_file_object=True) @@ -290,9 +292,9 @@ def test_partition_docx_from_file_without_metadata_date(): assert elements[0].metadata.last_modified is None -def test_get_emphasized_texts_from_paragraph(expected_emphasized_texts: List[Dict[str, str]]): +def test_get_emphasized_texts_from_paragraph(expected_emphasized_texts: list[dict[str, str]]): partitioner = _DocxPartitioner( - "example-docs/fake-doc-emphasized-text.docx", + example_doc_path("fake-doc-emphasized-text.docx"), None, None, False, @@ -315,9 +317,9 @@ def test_get_emphasized_texts_from_paragraph(expected_emphasized_texts: List[Dic assert emphasized_texts == [] -def test_iter_table_emphasis(expected_emphasized_texts: List[Dict[str, str]]): +def test_iter_table_emphasis(expected_emphasized_texts: list[dict[str, str]]): partitioner = _DocxPartitioner( - "example-docs/fake-doc-emphasized-text.docx", + example_doc_path("fake-doc-emphasized-text.docx"), None, None, False, @@ -330,11 +332,11 @@ def test_iter_table_emphasis(expected_emphasized_texts: List[Dict[str, str]]): def test_table_emphasis( - expected_emphasized_text_contents: List[str], - expected_emphasized_text_tags: List[str], + expected_emphasized_text_contents: list[str], + expected_emphasized_text_tags: list[str], ): partitioner = _DocxPartitioner( - "example-docs/fake-doc-emphasized-text.docx", + example_doc_path("fake-doc-emphasized-text.docx"), None, None, False, @@ -348,8 +350,8 @@ def test_table_emphasis( def test_partition_docx_grabs_emphasized_texts( - expected_emphasized_text_contents: List[str], - expected_emphasized_text_tags: List[str], + expected_emphasized_text_contents: list[str], + expected_emphasized_text_tags: list[str], ): elements = partition_docx(example_doc_path("fake-doc-emphasized-text.docx")) @@ -373,7 +375,7 @@ def test_partition_docx_with_json(mock_document_file_path: str): def test_parse_category_depth_by_style(): partitioner = _DocxPartitioner( - "example-docs/category-level.docx", + example_doc_path("category-level.docx"), None, None, False, @@ -470,27 +472,27 @@ def test_add_chunking_strategy_on_partition_docx(): def test_partition_docx_element_metadata_has_languages(): - filename = "example-docs/handbook-1p.docx" + filename = example_doc_path("handbook-1p.docx") elements = partition_docx(filename=filename) assert elements[0].metadata.languages == ["eng"] def test_partition_docx_respects_detect_language_per_element(): - filename = "example-docs/language-docs/eng_spa_mult.docx" + filename = example_doc_path("language-docs/eng_spa_mult.docx") elements = partition_docx(filename=filename, detect_language_per_element=True) langs = [element.metadata.languages for element in elements] assert langs == [["eng"], ["spa", "eng"], ["eng"], ["eng"], ["spa"]] def test_partition_docx_respects_languages_arg(): - filename = "example-docs/handbook-1p.docx" + filename = example_doc_path("handbook-1p.docx") elements = partition_docx(filename=filename, languages=["deu"]) assert elements[0].metadata.languages == ["deu"] def test_partition_docx_raises_TypeError_for_invalid_languages(): with pytest.raises(TypeError): - filename = "example-docs/handbook-1p.docx" + filename = example_doc_path("handbook-1p.docx") partition_docx( filename=filename, languages="eng", # pyright: ignore[reportArgumentType] @@ -584,6 +586,18 @@ def test_partition_docx_includes_hyperlink_metadata(): assert metadata.link_urls is None +def test_partition_docx_assigns_deterministic_and_unique_element_ids(): + document_path = example_doc_path("duplicate-paragraphs.docx") + + ids = [element.id for element in partition_docx(document_path)] + ids_2 = [element.id for element in partition_docx(document_path)] + + # -- ids match even though partitioned separately (deterministic on content) -- + assert ids == ids_2 + # -- ids are unique -- + assert len(ids) == len(set(ids)) + + # -- shape behaviors ----------------------------------------------------------------------------- @@ -601,13 +615,8 @@ def test_it_considers_text_inside_shapes(): # -- module-level fixtures ----------------------------------------------------------------------- -def example_doc_path(filename: str) -> str: - """String path to a file in the example-docs/ directory.""" - return str(pathlib.Path(__file__).parent.parent.parent.parent / "example-docs" / filename) - - @pytest.fixture() -def expected_elements() -> List[Text]: +def expected_elements() -> list[Text]: return [ Title("These are a few of my favorite things:"), ListItem("Parrots"), @@ -621,12 +630,12 @@ def expected_elements() -> List[Text]: @pytest.fixture() -def expected_emphasized_text_contents() -> List[str]: +def expected_emphasized_text_contents() -> list[str]: return ["bold", "italic", "bold-italic", "bold-italic"] @pytest.fixture() -def expected_emphasized_text_tags() -> List[str]: +def expected_emphasized_text_tags() -> list[str]: return ["b", "i", "b", "i"] @@ -674,21 +683,6 @@ def mock_document_file_path(mock_document: Document, tmp_path: pathlib.Path) -> return filename -def test_ids_are_unique_and_deterministic(): - elements = partition_docx("example-docs/duplicate-paragraphs.docx") - - ids = [e.id for e in elements] - assert ids == [ - "2f22d82eea1faf5f40dac60cef52700e", - "ca9e1f448e531a5152d960e14eefc360", - "9ddeacb172ac17fb45e6f3f15f3c703d", - "a4fd85d3f4141acae38c8f9c936ed2f3", - "44ebaaf66640719c918246d4ccba1c45", - "f36e8ebcb3b6a051940a168fe73cbc44", - "532b395177652c7d61e1e4d855f1dc1d", - ], "IDs are not deterministic" - - # ================================================================================================ # ISOLATED UNIT TESTS # ================================================================================================ diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 1e8fd23481..0e859a38f0 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev5" # pragma: no cover +__version__ = "0.13.8-dev6" # pragma: no cover diff --git a/unstructured/partition/docx.py b/unstructured/partition/docx.py index 377045b886..97aa88e7ed 100644 --- a/unstructured/partition/docx.py +++ b/unstructured/partition/docx.py @@ -7,19 +7,7 @@ import itertools import os import tempfile -from tempfile import SpooledTemporaryFile -from typing import ( - IO, - Any, - Dict, - Iterator, - List, - Optional, - Tuple, - Type, - Union, - cast, -) +from typing import IO, Any, Iterator, Optional, Type, cast # -- CT_* stands for "complex-type", an XML element type in docx parlance -- import docx @@ -80,8 +68,8 @@ import pypandoc DETECTION_ORIGIN: str = "docx" -BlockElement: TypeAlias = Union[CT_P, CT_Tbl] -BlockItem: TypeAlias = Union[Paragraph, DocxTable] +BlockElement: TypeAlias = "CT_P | CT_Tbl" +BlockItem: TypeAlias = "Paragraph | DocxTable" @requires_dependencies("pypandoc") @@ -93,10 +81,10 @@ def convert_and_partition_docx( infer_table_structure: bool = True, metadata_filename: Optional[str] = None, metadata_last_modified: Optional[str] = None, - languages: Optional[List[str]] = ["auto"], + languages: Optional[list[str]] = ["auto"], detect_language_per_element: bool = False, starting_page_number: int = 1, -) -> List[Element]: +) -> list[Element]: """Converts a document to DOCX and then partitions it using partition_docx. Works with any file format support by pandoc. @@ -183,18 +171,16 @@ def extract_docx_filename(file_path: str) -> str: def partition_docx( filename: Optional[str] = None, file: Optional[IO[bytes]] = None, - metadata_filename: Optional[str] = None, include_page_breaks: bool = True, - include_metadata: bool = True, # used by decorator infer_table_structure: bool = True, + metadata_filename: Optional[str] = None, metadata_last_modified: Optional[str] = None, - chunking_strategy: Optional[str] = None, # used by decorator - languages: Optional[List[str]] = ["auto"], + languages: Optional[list[str]] = ["auto"], detect_language_per_element: bool = False, date_from_file_object: bool = False, starting_page_number: int = 1, - **kwargs: Any, # used by decorator -) -> List[Element]: + **kwargs: Any, +) -> list[Element]: """Partitions Microsoft Word Documents in .docx format into its document elements. Parameters @@ -203,6 +189,9 @@ def partition_docx( A string defining the target filename path. file A file-like object using "rb" mode --> open(filename, "rb"). + include_page_breaks + When True, add a `PageBreak` element to the element-stream when a page-break is detected in + the document. Note that not all DOCX files include page-break information. infer_table_structure If True, any Table elements that are extracted will also have a metadata field named "text_as_html" where the table's text content is rendered into an html string. @@ -225,9 +214,8 @@ def partition_docx( Applies only when providing file via `file` parameter. If this option is True, attempt infer last_modified metadata from bytes, otherwise set it to None. starting_page_number - Indicates what page number should be assigned to the first page in the document. - This information will be reflected in elements' metadata and can be be especially - useful when partitioning a document that is part of a larger document. + Assign this number to the first page of this document and increment the page number from + there. """ # -- verify that only one file-specifier argument was provided -- exactly_one(filename=filename, file=file) @@ -315,7 +303,7 @@ def _iter_document_elements(self) -> Iterator[Element]: # -- This implementation composes a collection of iterators into a "combined" iterator # -- return value using `yield from`. You can think of the return value as an Element # -- stream and each `yield from` as "add elements found by this function to the stream". - # -- This is functionally analogous to declaring `elements: List[Element] = []` at the top + # -- This is functionally analogous to declaring `elements: list[Element] = []` at the top # -- and using `elements.extend()` for the results of each of the function calls, but is # -- more perfomant, uses less memory (avoids producing and then garbage-collecting all # -- those small lists), is more flexible for later iterator operations like filter, @@ -470,7 +458,7 @@ def _document(self) -> Document: return docx.Document(filename) assert file is not None - if isinstance(file, SpooledTemporaryFile): + if isinstance(file, tempfile.SpooledTemporaryFile): file.seek(0) file = io.BytesIO(file.read()) return docx.Document(file) @@ -595,7 +583,7 @@ def iter_paragraph_items(paragraph: Paragraph) -> Iterator[Paragraph | RenderedP else: yield from self._increment_page_number() - def _iter_paragraph_emphasis(self, paragraph: Paragraph) -> Iterator[Dict[str, str]]: + def _iter_paragraph_emphasis(self, paragraph: Paragraph) -> Iterator[dict[str, str]]: """Generate e.g. {"text": "MUST", "tag": "b"} for each emphasis in `paragraph`.""" for run in paragraph.runs: text = run.text.strip() if run.text else "" @@ -728,7 +716,7 @@ def _iter_table_element(self, table: DocxTable) -> Iterator[Table]: ), ) - def _iter_table_emphasis(self, table: DocxTable) -> Iterator[Dict[str, str]]: + def _iter_table_emphasis(self, table: DocxTable) -> Iterator[dict[str, str]]: """Generate e.g. {"text": "word", "tag": "b"} for each emphasis in `table`.""" for row in table.rows: for cell in row.cells: @@ -800,12 +788,12 @@ def _page_number(self) -> Optional[int]: """ return self._page_counter if self._document_contains_pagebreaks else None - def _paragraph_emphasis(self, paragraph: Paragraph) -> Tuple[List[str], List[str]]: + def _paragraph_emphasis(self, paragraph: Paragraph) -> tuple[list[str], list[str]]: """[contents, tags] pair describing emphasized text in `paragraph`.""" iter_p_emph, iter_p_emph_2 = itertools.tee(self._iter_paragraph_emphasis(paragraph)) return ([e["text"] for e in iter_p_emph], [e["tag"] for e in iter_p_emph_2]) - def _paragraph_link_meta(self, paragraph: Paragraph) -> Tuple[List[str], List[str], List[Link]]: + def _paragraph_link_meta(self, paragraph: Paragraph) -> tuple[list[str], list[str], list[Link]]: """Describes hyperlinks in `paragraph`, if any.""" if not paragraph.hyperlinks: return [], [], [] @@ -977,7 +965,7 @@ def _style_based_element_type(self, paragraph: Paragraph) -> Optional[Type[Text] # in the mapping. Unknown style names will also return None. return STYLE_TO_ELEMENT_MAPPING.get(style_name) - def _table_emphasis(self, table: DocxTable) -> Tuple[List[str], List[str]]: + def _table_emphasis(self, table: DocxTable) -> tuple[list[str], list[str]]: """[contents, tags] pair describing emphasized text in `table`.""" iter_tbl_emph, iter_tbl_emph_2 = itertools.tee(self._iter_table_emphasis(table)) return ([e["text"] for e in iter_tbl_emph], [e["tag"] for e in iter_tbl_emph_2]) From db186dc23ba04ccdfc68d0128e97ea21cc88a84f Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 14 May 2024 13:57:31 -0700 Subject: [PATCH 09/10] rfctr(doc): organize test_doc.py (#3017) **Summary** Organize DOC tests into related groups with markers. This makes it easier to assess coverage and find tests related to particular behaviors. This is in preparation for adding tests related to DOC image extraction. No code changes, purely line-block moves. - Move module-level fixtures to the bottom. - Organize tests into related groups with markers. --- CHANGELOG.md | 2 +- test_unstructured/partition/docx/test_doc.py | 255 ++++++++++--------- unstructured/__version__.py | 2 +- 3 files changed, 140 insertions(+), 119 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d504a2a1ce..89caeaa410 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.8-dev6 +## 0.13.8-dev7 ### Enhancements diff --git a/test_unstructured/partition/docx/test_doc.py b/test_unstructured/partition/docx/test_doc.py index 2d80e18a47..a87722a968 100644 --- a/test_unstructured/partition/docx/test_doc.py +++ b/test_unstructured/partition/docx/test_doc.py @@ -20,59 +20,15 @@ from unstructured.partition.docx import partition_docx -def test_partition_doc_for_deterministic_and_unique_ids(): - ids = [element.id for element in partition_doc("example-docs/duplicate-paragraphs.doc")] - - assert ids == [ - "ade273c622c48d67a7be7b3816d5b4d8", - "7d0b32fdf169f9578723486cb4bc1235", - "1feb6e8e9c1662cfaef75907aeeb0900", - "aa2a8ac10143b12f0fe2087837ea11d2", - "da31ba7ed3919067d2c6572dc1617271", - "1914359c179a160df921b769acf8c353", - "f9d0d379fc791bae487b7a45f65caa50", - ] - - -@pytest.fixture() -def mock_document(): - document = docx.Document() - - document.add_paragraph("These are a few of my favorite things:", style="Heading 1") - # NOTE(robinson) - this should get picked up as a list item due to the • - document.add_paragraph("• Parrots", style="Normal") - # NOTE(robinson) - this should get dropped because it's empty - document.add_paragraph("• ", style="Normal") - document.add_paragraph("Hockey", style="List Bullet") - # NOTE(robinson) - this should get dropped because it's empty - document.add_paragraph("", style="List Bullet") - # NOTE(robinson) - this should get picked up as a title - document.add_paragraph("Analysis", style="Normal") - # NOTE(robinson) - this should get dropped because it is empty - document.add_paragraph("", style="Normal") - # NOTE(robinson) - this should get picked up as a narrative text - document.add_paragraph("This is my first thought. This is my second thought.", style="Normal") - document.add_paragraph("This is my third thought.", style="Body Text") - # NOTE(robinson) - this should just be regular text - document.add_paragraph("2023") - # NOTE(robinson) - this should be an address - document.add_paragraph("DOYLESTOWN, PA 18901") +def test_partition_doc_matches_partition_docx(mock_document, expected_elements, tmpdir): + docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") + doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") + mock_document.save(docx_filename) + convert_office_doc(docx_filename, tmpdir.dirname, "doc") + assert partition_doc(filename=doc_filename) == partition_docx(filename=docx_filename) - return document - -@pytest.fixture() -def expected_elements(): - return [ - Title("These are a few of my favorite things:"), - ListItem("Parrots"), - ListItem("Hockey"), - Title("Analysis"), - NarrativeText("This is my first thought. This is my second thought."), - NarrativeText("This is my third thought."), - Text("2023"), - Address("DOYLESTOWN, PA 18901"), - ] +# -- document-source (file or filename) ---------------------------------------------------------- def test_partition_doc_from_filename(mock_document, expected_elements, tmpdir, capsys): @@ -88,36 +44,6 @@ def test_partition_doc_from_filename(mock_document, expected_elements, tmpdir, c assert capsys.readouterr().err == "" -def test_partition_doc_from_filename_with_metadata_filename( - mock_document, - expected_elements, - tmpdir, -): - docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") - doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") - mock_document.save(docx_filename) - convert_office_doc(docx_filename, tmpdir.dirname, "doc") - - elements = partition_doc(filename=doc_filename, metadata_filename="test") - assert elements == expected_elements - assert all(element.metadata.filename == "test" for element in elements) - - -def test_partition_doc_matches_partition_docx(mock_document, expected_elements, tmpdir): - docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") - doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") - mock_document.save(docx_filename) - convert_office_doc(docx_filename, tmpdir.dirname, "doc") - assert partition_doc(filename=doc_filename) == partition_docx(filename=docx_filename) - - -def test_partition_raises_with_missing_doc(mock_document, expected_elements, tmpdir): - doc_filename = os.path.join(tmpdir.dirname, "asdf.doc") - - with pytest.raises(ValueError): - partition_doc(filename=doc_filename) - - def test_partition_doc_from_file_with_filter(mock_document, expected_elements, tmpdir, capsys): docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") @@ -148,18 +74,6 @@ def test_partition_doc_from_file_with_no_filter(mock_document, expected_elements assert element.metadata.filename is None -def test_partition_doc_from_file_with_metadata_filename(mock_document, tmpdir): - docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") - doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") - mock_document.save(docx_filename) - convert_office_doc(docx_filename, tmpdir.dirname, "doc") - - with open(doc_filename, "rb") as f: - elements = partition_doc(file=f, metadata_filename="test") - for element in elements: - assert element.metadata.filename == "test" - - def test_partition_doc_raises_with_both_specified(mock_document, tmpdir): docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") @@ -175,33 +89,76 @@ def test_partition_doc_raises_with_neither(): partition_doc() -def test_partition_doc_from_file_exclude_metadata(mock_document, tmpdir): +def test_partition_raises_with_missing_doc(mock_document, expected_elements, tmpdir): + doc_filename = os.path.join(tmpdir.dirname, "asdf.doc") + + with pytest.raises(ValueError): + partition_doc(filename=doc_filename) + + +# -- `include_metadata` arg ---------------------------------------------------------------------- + + +def test_partition_doc_from_filename_exclude_metadata(mock_document, tmpdir): docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") mock_document.save(docx_filename) convert_office_doc(docx_filename, tmpdir.dirname, "doc") - with open(doc_filename, "rb") as f: - elements = partition_doc(file=f, include_metadata=False) + elements = partition_doc(filename=doc_filename, include_metadata=False) assert elements[0].metadata.filetype is None assert elements[0].metadata.page_name is None assert elements[0].metadata.filename is None -def test_partition_doc_from_filename_exclude_metadata(mock_document, tmpdir): +def test_partition_doc_from_file_exclude_metadata(mock_document, tmpdir): docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") mock_document.save(docx_filename) convert_office_doc(docx_filename, tmpdir.dirname, "doc") - elements = partition_doc(filename=doc_filename, include_metadata=False) + with open(doc_filename, "rb") as f: + elements = partition_doc(file=f, include_metadata=False) assert elements[0].metadata.filetype is None assert elements[0].metadata.page_name is None assert elements[0].metadata.filename is None +# -- .metadata.filename -------------------------------------------------------------------------- + + +def test_partition_doc_from_filename_with_metadata_filename( + mock_document, + expected_elements, + tmpdir, +): + docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") + doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") + mock_document.save(docx_filename) + convert_office_doc(docx_filename, tmpdir.dirname, "doc") + + elements = partition_doc(filename=doc_filename, metadata_filename="test") + assert elements == expected_elements + assert all(element.metadata.filename == "test" for element in elements) + + +def test_partition_doc_from_file_with_metadata_filename(mock_document, tmpdir): + docx_filename = os.path.join(tmpdir.dirname, "mock_document.docx") + doc_filename = os.path.join(tmpdir.dirname, "mock_document.doc") + mock_document.save(docx_filename) + convert_office_doc(docx_filename, tmpdir.dirname, "doc") + + with open(doc_filename, "rb") as f: + elements = partition_doc(file=f, metadata_filename="test") + for element in elements: + assert element.metadata.filename == "test" + + +# -- .metadata.last_modified --------------------------------------------------------------------- + + def test_partition_doc_metadata_date( mocker, filename="example-docs/fake.doc", @@ -283,6 +240,19 @@ def test_partition_doc_from_file_explicit_get_metadata_date( assert elements[0].metadata.last_modified == mocked_last_modification_date +def test_partition_doc_from_file_without_metadata_date( + filename="example-docs/fake.doc", +): + """Test partition_doc() with file that are not possible to get last modified date""" + with open(filename, "rb") as f: + sf = SpooledTemporaryFile() + sf.write(f.read()) + sf.seek(0) + elements = partition_doc(file=sf, date_from_file_object=True) + + assert elements[0].metadata.last_modified is None + + def test_partition_doc_from_file_metadata_date_with_custom_metadata( mocker, filename="example-docs/fake.doc", @@ -302,17 +272,23 @@ def test_partition_doc_from_file_metadata_date_with_custom_metadata( assert elements[0].metadata.last_modified == expected_last_modified_date -def test_partition_doc_from_file_without_metadata_date( - filename="example-docs/fake.doc", -): - """Test partition_doc() with file that are not possible to get last modified date""" - with open(filename, "rb") as f: - sf = SpooledTemporaryFile() - sf.write(f.read()) - sf.seek(0) - elements = partition_doc(file=sf, date_from_file_object=True) +# -- language-recognition metadata --------------------------------------------------------------- - assert elements[0].metadata.last_modified is None + +def test_partition_doc_element_metadata_has_languages(): + filename = "example-docs/fake-doc-emphasized-text.doc" + elements = partition_doc(filename=filename) + assert elements[0].metadata.languages == ["eng"] + + +def test_partition_doc_respects_detect_language_per_element(): + filename = "example-docs/language-docs/eng_spa_mult.doc" + elements = partition_doc(filename=filename, detect_language_per_element=True) + langs = [element.metadata.languages for element in elements] + assert langs == [["eng"], ["spa", "eng"], ["eng"], ["eng"], ["spa"]] + + +# -- miscellaneous ------------------------------------------------------------------------------- def test_partition_doc_grabs_emphasized_texts(): @@ -352,14 +328,59 @@ def test_add_chunking_strategy_on_partition_doc(filename="example-docs/fake.doc" assert chunk_elements == chunks -def test_partition_doc_element_metadata_has_languages(): - filename = "example-docs/fake-doc-emphasized-text.doc" - elements = partition_doc(filename=filename) - assert elements[0].metadata.languages == ["eng"] +def test_partition_doc_for_deterministic_and_unique_ids(): + ids = [element.id for element in partition_doc("example-docs/duplicate-paragraphs.doc")] + assert ids == [ + "ade273c622c48d67a7be7b3816d5b4d8", + "7d0b32fdf169f9578723486cb4bc1235", + "1feb6e8e9c1662cfaef75907aeeb0900", + "aa2a8ac10143b12f0fe2087837ea11d2", + "da31ba7ed3919067d2c6572dc1617271", + "1914359c179a160df921b769acf8c353", + "f9d0d379fc791bae487b7a45f65caa50", + ] -def test_partition_doc_respects_detect_language_per_element(): - filename = "example-docs/language-docs/eng_spa_mult.doc" - elements = partition_doc(filename=filename, detect_language_per_element=True) - langs = [element.metadata.languages for element in elements] - assert langs == [["eng"], ["spa", "eng"], ["eng"], ["eng"], ["spa"]] + +# == module-level fixtures ======================================================================= + + +@pytest.fixture() +def expected_elements(): + return [ + Title("These are a few of my favorite things:"), + ListItem("Parrots"), + ListItem("Hockey"), + Title("Analysis"), + NarrativeText("This is my first thought. This is my second thought."), + NarrativeText("This is my third thought."), + Text("2023"), + Address("DOYLESTOWN, PA 18901"), + ] + + +@pytest.fixture() +def mock_document(): + document = docx.Document() + + document.add_paragraph("These are a few of my favorite things:", style="Heading 1") + # NOTE(robinson) - this should get picked up as a list item due to the • + document.add_paragraph("• Parrots", style="Normal") + # NOTE(robinson) - this should get dropped because it's empty + document.add_paragraph("• ", style="Normal") + document.add_paragraph("Hockey", style="List Bullet") + # NOTE(robinson) - this should get dropped because it's empty + document.add_paragraph("", style="List Bullet") + # NOTE(robinson) - this should get picked up as a title + document.add_paragraph("Analysis", style="Normal") + # NOTE(robinson) - this should get dropped because it is empty + document.add_paragraph("", style="Normal") + # NOTE(robinson) - this should get picked up as a narrative text + document.add_paragraph("This is my first thought. This is my second thought.", style="Normal") + document.add_paragraph("This is my third thought.", style="Body Text") + # NOTE(robinson) - this should just be regular text + document.add_paragraph("2023") + # NOTE(robinson) - this should be an address + document.add_paragraph("DOYLESTOWN, PA 18901") + + return document diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 0e859a38f0..4c908fc3d0 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev6" # pragma: no cover +__version__ = "0.13.8-dev7" # pragma: no cover From 12b30d28109434de825445a860cd4bf2f2436a58 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Tue, 14 May 2024 17:50:31 -0700 Subject: [PATCH 10/10] rfctr(docx): extract DocxPartitionerOptions (#3018) **Reviewers:** Probably easier to review first and second commits separately as the first one adds all the new code and tests (without installing it), and the second one installs it into the partitioner along with the required changes to code and tests. **Summary** Enable communication of partitioning options to sub-partitioners, in particular to the pluggable `PicturePartitioner` coming in a closely subsequent PR to implement image-extraction and OCR for DOCX, DOC, and ODT formats. **Additional Context** In general, validation of partitioning options as well as assigning default values and computing derived partitioning settings can be extracted from partitioners into a neatly encapsulated separate object. This simplifies the core partitioning code by removing the noise associated with computing metadata values and deciding how to access the source document, etc. However, better factoring aside, having the partition-time "settings" available in a single object allows partitioning of certain document features, for example images, to be readily _delegated_ to a sub-partitioner while still giving it access to all the relevant partitioning settings for the current document. This is particularly important when a sub-partitioner is "pluggable" at runtime and must rely on a clearly-defined (and simple as possible) interface to operate smoothly. --- CHANGELOG.md | 4 +- test_unstructured/partition/docx/test_docx.py | 425 +++++++++++++++--- unstructured/__version__.py | 2 +- unstructured/partition/docx.py | 312 +++++++------ 4 files changed, 543 insertions(+), 200 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89caeaa410..0496479af1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ -## 0.13.8-dev7 +## 0.13.8-dev8 ### Enhancements -**Faster evaluation** Support for concurrent processing of documents during evaluation +* **Faster evaluation** Support for concurrent processing of documents during evaluation ### Features diff --git a/test_unstructured/partition/docx/test_docx.py b/test_unstructured/partition/docx/test_docx.py index 59e90540a3..9e89d99737 100644 --- a/test_unstructured/partition/docx/test_docx.py +++ b/test_unstructured/partition/docx/test_docx.py @@ -4,16 +4,26 @@ from __future__ import annotations +import io import pathlib import re import tempfile +from typing import Any import docx import pytest from docx.document import Document from pytest_mock import MockFixture -from test_unstructured.unit_utils import assert_round_trips_through_JSON, example_doc_path +from test_unstructured.unit_utils import ( + FixtureRequest, + Mock, + assert_round_trips_through_JSON, + example_doc_path, + function_mock, + instance_mock, + property_mock, +) from unstructured.chunking.title import chunk_by_title from unstructured.documents.elements import ( Address, @@ -29,7 +39,7 @@ Text, Title, ) -from unstructured.partition.docx import _DocxPartitioner, partition_docx +from unstructured.partition.docx import DocxPartitionerOptions, _DocxPartitioner, partition_docx from unstructured.partition.utils.constants import UNSTRUCTURED_INCLUDE_DEBUG_METADATA # -- docx-file loading behaviors ----------------------------------------------------------------- @@ -89,14 +99,16 @@ def test_partition_docx_from_file_with_metadata_filename( assert element.metadata.filename == "test" -def test_partition_docx_raises_with_both_specified(mock_document_file_path: str): - with open(mock_document_file_path, "rb") as f: - with pytest.raises(ValueError, match="Exactly one of filename and file must be specified"): - partition_docx(filename=mock_document_file_path, file=f) +def test_partition_docx_uses_file_path_when_both_are_specified( + mock_document_file_path: str, expected_elements: list[Text] +): + f = io.BytesIO(b"abcde") + elements = partition_docx(filename=mock_document_file_path, file=f) + assert elements == expected_elements def test_partition_docx_raises_with_neither(): - with pytest.raises(ValueError, match="Exactly one of filename and file must be specified"): + with pytest.raises(ValueError, match="either `filename` or `file` argument must be provided"): partition_docx() @@ -292,15 +304,13 @@ def test_partition_docx_from_file_without_metadata_date(): assert elements[0].metadata.last_modified is None -def test_get_emphasized_texts_from_paragraph(expected_emphasized_texts: list[dict[str, str]]): - partitioner = _DocxPartitioner( - example_doc_path("fake-doc-emphasized-text.docx"), - None, - None, - False, - True, - None, - ) +def test_get_emphasized_texts_from_paragraph( + opts_args: dict[str, Any], expected_emphasized_texts: list[dict[str, str]] +): + opts_args["file_path"] = example_doc_path("fake-doc-emphasized-text.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) + paragraph = partitioner._document.paragraphs[1] emphasized_texts = list(partitioner._iter_paragraph_emphasis(paragraph)) assert paragraph.text == "I am a bold italic bold-italic text." @@ -317,34 +327,31 @@ def test_get_emphasized_texts_from_paragraph(expected_emphasized_texts: list[dic assert emphasized_texts == [] -def test_iter_table_emphasis(expected_emphasized_texts: list[dict[str, str]]): - partitioner = _DocxPartitioner( - example_doc_path("fake-doc-emphasized-text.docx"), - None, - None, - False, - True, - None, - ) +def test_iter_table_emphasis( + opts_args: dict[str, Any], expected_emphasized_texts: list[dict[str, str]] +): + opts_args["file_path"] = example_doc_path("fake-doc-emphasized-text.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) table = partitioner._document.tables[0] + emphasized_texts = list(partitioner._iter_table_emphasis(table)) + assert emphasized_texts == expected_emphasized_texts def test_table_emphasis( + opts_args: dict[str, Any], expected_emphasized_text_contents: list[str], expected_emphasized_text_tags: list[str], ): - partitioner = _DocxPartitioner( - example_doc_path("fake-doc-emphasized-text.docx"), - None, - None, - False, - True, - None, - ) + opts_args["file_path"] = example_doc_path("fake-doc-emphasized-text.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) table = partitioner._document.tables[0] + emphasized_text_contents, emphasized_text_tags = partitioner._table_emphasis(table) + assert emphasized_text_contents == expected_emphasized_text_contents assert emphasized_text_tags == expected_emphasized_text_tags @@ -373,15 +380,10 @@ def test_partition_docx_with_json(mock_document_file_path: str): assert_round_trips_through_JSON(elements) -def test_parse_category_depth_by_style(): - partitioner = _DocxPartitioner( - example_doc_path("category-level.docx"), - None, - None, - False, - True, - None, - ) +def test_parse_category_depth_by_style(opts_args: dict[str, Any]): + opts_args["file_path"] = example_doc_path("category-level.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) # Category depths are 0-indexed and relative to the category type # Title, list item, bullet, narrative text, etc. @@ -411,9 +413,9 @@ def test_parse_category_depth_by_style(): ), f"expected paragraph[{idx}] to have depth=={depth}, got {actual_depth}" -def test_parse_category_depth_by_style_name(): - partitioner = _DocxPartitioner(None, None, None, False, True, None) - +def test_parse_category_depth_by_style_name(opts_args: dict[str, Any]): + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) test_cases = [ (0, "Heading 1"), (1, "Heading 2"), @@ -436,8 +438,9 @@ def test_parse_category_depth_by_style_name(): ), f"test case {test_cases[idx]} failed" -def test_parse_category_depth_by_style_ilvl(): - partitioner = _DocxPartitioner(None, None, None, False, True, None) +def test_parse_category_depth_by_style_ilvl(opts_args: dict[str, Any]): + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) assert partitioner._parse_category_depth_by_style_ilvl() == 0 @@ -683,6 +686,24 @@ def mock_document_file_path(mock_document: Document, tmp_path: pathlib.Path) -> return filename +@pytest.fixture() +def opts_args() -> dict[str, Any]: + """All default arguments for `DocxPartitionerOptions`. + + Individual argument values can be changed to suit each test. Makes construction of opts more + compact for testing purposes. + """ + return { + "date_from_file_object": False, + "file": None, + "file_path": None, + "include_page_breaks": True, + "infer_table_structure": True, + "metadata_file_path": None, + "metadata_last_modified": None, + } + + # ================================================================================================ # ISOLATED UNIT TESTS # ================================================================================================ @@ -691,14 +712,280 @@ def mock_document_file_path(mock_document: Document, tmp_path: pathlib.Path) -> # ================================================================================================ +class DescribeDocxPartitionerOptions: + """Unit-test suite for `unstructured.partition.docx.DocxPartitionerOptions` objects.""" + + # -- .document ------------------------------- + + def it_loads_the_docx_document( + self, + request: FixtureRequest, + opts_args: dict[str, Any], + ): + document_ = instance_mock(request, Document) + docx_Document_ = function_mock( + request, "unstructured.partition.docx.docx.Document", return_value=document_ + ) + _docx_file_prop_ = property_mock( + request, DocxPartitionerOptions, "_docx_file", return_value="abcde.docx" + ) + opts = DocxPartitionerOptions(**opts_args) + + document = opts.document + + _docx_file_prop_.assert_called_once_with() + docx_Document_.assert_called_once_with("abcde.docx") + assert document is document_ + + # -- .include_page_breaks -------------------- + + @pytest.mark.parametrize("arg_value", [True, False]) + def it_knows_whether_to_emit_PageBreak_elements_as_part_of_the_output_element_stream( + self, arg_value: bool, opts_args: dict[str, Any] + ): + opts_args["include_page_breaks"] = arg_value + opts = DocxPartitionerOptions(**opts_args) + + assert opts.include_page_breaks is arg_value + + # -- .infer_table_structure ------------------ + + @pytest.mark.parametrize("arg_value", [True, False]) + def it_knows_whether_to_include_text_as_html_in_Table_metadata( + self, arg_value: bool, opts_args: dict[str, Any] + ): + opts_args["infer_table_structure"] = arg_value + opts = DocxPartitionerOptions(**opts_args) + + assert opts.infer_table_structure is arg_value + + # -- .increment_page_number() ---------------- + + def it_generates_a_PageBreak_element_when_the_page_number_is_incremented( + self, opts_args: dict[str, Any] + ): + opts = DocxPartitionerOptions(**opts_args) + + page_break_iter = opts.increment_page_number() + + assert isinstance(next(page_break_iter, None), PageBreak) + assert opts.page_number == 2 + with pytest.raises(StopIteration): + next(page_break_iter) + + def but_it_does_not_generate_a_PageBreak_element_when_include_page_breaks_option_is_off( + self, opts_args: dict[str, Any] + ): + opts_args["include_page_breaks"] = False + opts = DocxPartitionerOptions(**opts_args) + + page_break_iter = opts.increment_page_number() + + with pytest.raises(StopIteration): + next(page_break_iter) + assert opts.page_number == 2 + + # -- .last_modified -------------------------- + + def it_gets_the_last_modified_date_of_the_document_from_the_caller_when_provided( + self, opts_args: dict[str, Any] + ): + opts_args["metadata_last_modified"] = "2024-03-05T17:02:53" + opts = DocxPartitionerOptions(**opts_args) + + assert opts.last_modified == "2024-03-05T17:02:53" + + def and_it_falls_back_to_the_last_modified_date_of_the_file_when_a_path_is_provided( + self, opts_args: dict[str, Any], get_last_modified_date_: Mock + ): + opts_args["file_path"] = "a/b/document.docx" + get_last_modified_date_.return_value = "2024-04-02T20:32:35" + opts = DocxPartitionerOptions(**opts_args) + + last_modified = opts.last_modified + + get_last_modified_date_.assert_called_once_with("a/b/document.docx") + assert last_modified == "2024-04-02T20:32:35" + + def and_it_falls_back_to_the_last_modified_date_of_the_file_when_a_file_like_object_is_provided( + self, opts_args: dict[str, Any], get_last_modified_date_from_file_: Mock + ): + file = io.BytesIO(b"abcdefg") + opts_args["file"] = file + opts_args["date_from_file_object"] = True + get_last_modified_date_from_file_.return_value = "2024-04-02T20:42:07" + opts = DocxPartitionerOptions(**opts_args) + + last_modified = opts.last_modified + + get_last_modified_date_from_file_.assert_called_once_with(file) + assert last_modified == "2024-04-02T20:42:07" + + def but_it_falls_back_to_None_for_the_last_modified_date_when_date_from_file_object_is_False( + self, opts_args: dict[str, Any], get_last_modified_date_from_file_: Mock + ): + file = io.BytesIO(b"abcdefg") + opts_args["file"] = file + opts_args["date_from_file_object"] = False + get_last_modified_date_from_file_.return_value = "2024-04-02T20:42:07" + opts = DocxPartitionerOptions(**opts_args) + + last_modified = opts.last_modified + + get_last_modified_date_from_file_.assert_not_called() + assert last_modified is None + + # -- .metadata_file_path --------------------- + + def it_uses_the_user_provided_file_path_in_the_metadata_when_provided( + self, opts_args: dict[str, Any] + ): + opts_args["file_path"] = "x/y/z.docx" + opts_args["metadata_file_path"] = "a/b/c.docx" + opts = DocxPartitionerOptions(**opts_args) + + assert opts.metadata_file_path == "a/b/c.docx" + + @pytest.mark.parametrize("file_path", ["u/v/w.docx", None]) + def and_it_falls_back_to_the_document_file_path_otherwise( + self, file_path: str | None, opts_args: dict[str, Any] + ): + opts_args["file_path"] = file_path + opts_args["metadata_file_path"] = None + opts = DocxPartitionerOptions(**opts_args) + + assert opts.metadata_file_path == file_path + + # -- ._metadata_page_number ------------------ + + @pytest.mark.parametrize( + ("page_count", "document_contains_pagebreaks", "expected_value"), + [(7, True, 7), (1, False, None)], + ) + def it_reports_None_when_no_rendered_page_breaks_are_found_in_document( + self, + request: FixtureRequest, + opts_args: dict[str, Any], + page_count: int, + document_contains_pagebreaks: bool, + expected_value: int | None, + ): + _document_contains_pagebreaks_prop_ = property_mock( + request, + DocxPartitionerOptions, + "_document_contains_pagebreaks", + return_value=document_contains_pagebreaks, + ) + opts = DocxPartitionerOptions(**opts_args) + opts._page_counter = page_count + + metadata_page_number = opts.metadata_page_number + + _document_contains_pagebreaks_prop_.assert_called_once_with() + assert metadata_page_number is expected_value + + # -- .page_number ---------------------------- + + def it_keeps_track_of_the_page_number(self, opts_args: dict[str, Any]): + """In DOCX, page-number is the slide number.""" + opts = DocxPartitionerOptions(**opts_args) + + assert opts.page_number == 1 + list(opts.increment_page_number()) + assert opts.page_number == 2 + list(opts.increment_page_number()) + assert opts.page_number == 3 + + def it_assigns_the_correct_page_number_when_starting_page_number_is_given( + self, opts_args: dict[str, Any] + ): + opts = DocxPartitionerOptions(**opts_args, starting_page_number=3) + + assert opts.page_number == 3 + list(opts.increment_page_number()) + assert opts.page_number == 4 + + # -- ._document_contains_pagebreaks ---------- + + @pytest.mark.parametrize( + ("file_name", "expected_value"), [("page-breaks.docx", True), ("teams_chat.docx", False)] + ) + def it_knows_whether_the_document_contains_page_breaks( + self, opts_args: dict[str, Any], file_name: str, expected_value: bool + ): + opts_args["file_path"] = example_doc_path(file_name) + opts = DocxPartitionerOptions(**opts_args) + + assert opts._document_contains_pagebreaks is expected_value + + # -- ._docx_file ----------------------------- + + def it_uses_the_path_to_open_the_presentation_when_file_path_is_provided( + self, opts_args: dict[str, Any] + ): + opts_args["file_path"] = "l/m/n.docx" + opts = DocxPartitionerOptions(**opts_args) + + assert opts._docx_file == "l/m/n.docx" + + def and_it_uses_a_BytesIO_file_to_replaces_a_SpooledTemporaryFile_provided( + self, opts_args: dict[str, Any] + ): + spooled_temp_file = tempfile.SpooledTemporaryFile() + spooled_temp_file.write(b"abcdefg") + opts_args["file"] = spooled_temp_file + opts = DocxPartitionerOptions(**opts_args) + + docx_file = opts._docx_file + + assert docx_file is not spooled_temp_file + assert isinstance(docx_file, io.BytesIO) + assert docx_file.getvalue() == b"abcdefg" + + def and_it_uses_the_provided_file_directly_when_not_a_SpooledTemporaryFile( + self, opts_args: dict[str, Any] + ): + file = io.BytesIO(b"abcdefg") + opts_args["file"] = file + opts = DocxPartitionerOptions(**opts_args) + + docx_file = opts._docx_file + + assert docx_file is file + assert isinstance(docx_file, io.BytesIO) + assert docx_file.getvalue() == b"abcdefg" + + def but_it_raises_ValueError_when_neither_a_file_path_or_file_is_provided( + self, opts_args: dict[str, Any] + ): + opts = DocxPartitionerOptions(**opts_args) + + with pytest.raises(ValueError, match="No DOCX document specified, either `filename` or "): + opts._docx_file + + # -- fixtures -------------------------------------------------------------------------------- + + @pytest.fixture() + def get_last_modified_date_(self, request: FixtureRequest) -> Mock: + return function_mock(request, "unstructured.partition.docx.get_last_modified_date") + + @pytest.fixture() + def get_last_modified_date_from_file_(self, request: FixtureRequest): + return function_mock( + request, "unstructured.partition.docx.get_last_modified_date_from_file" + ) + + class Describe_DocxPartitioner: """Unit-test suite for `unstructured.partition.docx._DocxPartitioner`.""" # -- table behaviors ------------------------------------------------------------------------- - def it_can_convert_a_table_to_html(self): + def it_can_convert_a_table_to_html(self, opts_args: dict[str, Any]): + opts = DocxPartitionerOptions(**opts_args) table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] - assert _DocxPartitioner()._convert_table_to_html(table) == ( + + assert _DocxPartitioner(opts)._convert_table_to_html(table) == ( "\n" "\n" "\n" @@ -709,7 +996,7 @@ def it_can_convert_a_table_to_html(self): "
Header Col 1 Header Col 2
" ) - def and_it_can_convert_a_nested_table_to_html(self): + def and_it_can_convert_a_nested_table_to_html(self, opts_args: dict[str, Any]): """ Fixture table is: @@ -725,10 +1012,11 @@ def and_it_can_convert_a_nested_table_to_html(self): | j | k | l | +---+-------------+---+ """ + opts = DocxPartitionerOptions(**opts_args) table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] # -- re.sub() strips out the extra padding inserted by tabulate -- - html = re.sub(r" +<", "<", _DocxPartitioner()._convert_table_to_html(table)) + html = re.sub(r" +<", "<", _DocxPartitioner(opts)._convert_table_to_html(table)) expected_lines = [ "", @@ -750,13 +1038,15 @@ def and_it_can_convert_a_nested_table_to_html(self): for expected, actual in zip(expected_lines, actual_lines): assert actual == expected, f"\nexpected: {repr(expected)}\nactual: {repr(actual)}" - def it_can_convert_a_table_to_plain_text(self): + def it_can_convert_a_table_to_plain_text(self, opts_args: dict[str, Any]): + opts = DocxPartitionerOptions(**opts_args) table = docx.Document(example_doc_path("docx-tables.docx")).tables[0] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( + + assert " ".join(_DocxPartitioner(opts)._iter_table_texts(table)) == ( "Header Col 1 Header Col 2 Lorem ipsum A link example" ) - def and_it_can_convert_a_nested_table_to_plain_text(self): + def and_it_can_convert_a_nested_table_to_plain_text(self, opts_args: dict[str, Any]): """ Fixture table is: @@ -772,12 +1062,14 @@ def and_it_can_convert_a_nested_table_to_plain_text(self): | j | k | l | +---+-------------+---+ """ + opts = DocxPartitionerOptions(**opts_args) table = docx.Document(example_doc_path("docx-tables.docx")).tables[1] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == ( + + assert " ".join(_DocxPartitioner(opts)._iter_table_texts(table)) == ( "a >b< c d e f g&t h i j k l" ) - def but_the_text_of_a_merged_cell_appears_only_once(self): + def but_the_text_of_a_merged_cell_appears_only_once(self, opts_args: dict[str, Any]): """ Fixture table is: @@ -789,8 +1081,9 @@ def but_the_text_of_a_merged_cell_appears_only_once(self): | e | | +-------+---+ """ + opts = DocxPartitionerOptions(**opts_args) table = docx.Document(example_doc_path("docx-tables.docx")).tables[2] - assert " ".join(_DocxPartitioner()._iter_table_texts(table)) == "a b c d e" + assert " ".join(_DocxPartitioner(opts)._iter_table_texts(table)) == "a b c d e" def it_can_partition_tables_with_incomplete_rows(self): """DOCX permits table rows to start late and end early. @@ -921,7 +1214,7 @@ def it_can_partition_tables_with_incomplete_rows(self): # -- page-break behaviors -------------------------------------------------------------------- - def it_places_page_breaks_precisely_where_they_occur(self): + def it_places_page_breaks_precisely_where_they_occur(self, opts_args: dict[str, Any]): """Page-break behavior has some subtleties. * A hard page-break does not generate a PageBreak element (because that would double-count @@ -940,6 +1233,8 @@ def str_repr(e: Element) -> str: """A more detailed `repr()` to aid debugging when assertion fails.""" return f"{e.__class__.__name__}('{e}')" + opts_args["file_path"] = example_doc_path("page-breaks.docx") + opts = DocxPartitionerOptions(**opts_args) expected = [ # NOTE(scanny) - -- page 1 -- NarrativeText( @@ -975,7 +1270,7 @@ def str_repr(e: Element) -> str: Title("< str: # -- header/footer behaviors ----------------------------------------------------------------- - def it_includes_table_cell_text_in_Header_text(self): - partitioner = _DocxPartitioner(example_doc_path("docx-hdrftr.docx")) + def it_includes_table_cell_text_in_Header_text(self, opts_args: dict[str, Any]): + opts_args["file_path"] = example_doc_path("docx-hdrftr.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) section = partitioner._document.sections[0] header_iter = partitioner._iter_section_headers(section) @@ -995,9 +1292,11 @@ def it_includes_table_cell_text_in_Header_text(self): element = next(header_iter) assert element.text == "First header para\nTable cell1 Table cell2\nLast header para" - def it_includes_table_cell_text_in_Footer_text(self): + def it_includes_table_cell_text_in_Footer_text(self, opts_args: dict[str, Any]): """This case also verifies nested-table and merged-cell behaviors.""" - partitioner = _DocxPartitioner(example_doc_path("docx-hdrftr.docx")) + opts_args["file_path"] = example_doc_path("docx-hdrftr.docx") + opts = DocxPartitionerOptions(**opts_args) + partitioner = _DocxPartitioner(opts) section = partitioner._document.sections[0] footer_iter = partitioner._iter_section_footers(section) diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 4c908fc3d0..83c1597480 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.8-dev7" # pragma: no cover +__version__ = "0.13.8-dev8" # pragma: no cover diff --git a/unstructured/partition/docx.py b/unstructured/partition/docx.py index 97aa88e7ed..430f87a301 100644 --- a/unstructured/partition/docx.py +++ b/unstructured/partition/docx.py @@ -217,19 +217,19 @@ def partition_docx( Assign this number to the first page of this document and increment the page number from there. """ - # -- verify that only one file-specifier argument was provided -- - exactly_one(filename=filename, file=file) - - elements = _DocxPartitioner.iter_document_elements( - filename, - file, - metadata_filename, - include_page_breaks, - infer_table_structure, - metadata_last_modified, - date_from_file_object, + opts = DocxPartitionerOptions( + date_from_file_object=date_from_file_object, + file=file, + file_path=filename, + include_page_breaks=include_page_breaks, + infer_table_structure=infer_table_structure, + metadata_file_path=metadata_filename, + metadata_last_modified=metadata_last_modified, starting_page_number=starting_page_number, ) + + elements = _DocxPartitioner.iter_document_elements(opts) + elements = apply_lang_metadata( elements=elements, languages=languages, @@ -238,56 +238,169 @@ def partition_docx( return list(elements) -class _DocxPartitioner: - """Provides `.partition()` for MS-Word 2007+ (.docx) files.""" +class DocxPartitionerOptions: + """Encapsulates partitioning option validation, computation, and application of defaults.""" def __init__( self, - # -- NOTE(scanny): default values here are unnecessary for production use because - # -- `.iter_document_elements()` is the only interface method and always calls with all - # -- args. However, providing defaults eases unit-testing and decouples unit-tests from - # -- future changes to args. - filename: Optional[str] = None, - file: Optional[IO[bytes]] = None, - metadata_filename: Optional[str] = None, - include_page_breaks: bool = True, - infer_table_structure: bool = True, - metadata_last_modified: Optional[str] = None, - date_from_file_object: bool = False, + *, + date_from_file_object: bool, + file: IO[bytes] | None, + file_path: str | None, + include_page_breaks: bool, + infer_table_structure: bool, + metadata_file_path: Optional[str], + metadata_last_modified: Optional[str], starting_page_number: int = 1, - ) -> None: - self._filename = filename + ): + self._date_from_file_object = date_from_file_object self._file = file - self._metadata_filename = metadata_filename + self._file_path = file_path self._include_page_breaks = include_page_breaks self._infer_table_structure = infer_table_structure + self._metadata_file_path = metadata_file_path self._metadata_last_modified = metadata_last_modified + # -- options object maintains page-number state -- self._page_counter = starting_page_number - self._date_from_file_object = date_from_file_object + + @lazyproperty + def document(self) -> Document: + """The python-docx `Document` object loaded from file or filename.""" + return docx.Document(self._docx_file) + + @lazyproperty + def include_page_breaks(self) -> bool: + """When True, include `PageBreak` elements in element-stream. + + Note that regardless of this setting, page-breaks are detected, and page-number is tracked + and included in element metadata. Only the presence of distinct `PageBreak` elements (which + contain no text) in the element stream is affected. + """ + return self._include_page_breaks + + def increment_page_number(self) -> Iterator[PageBreak]: + """Increment page-number by 1 and generate a PageBreak element if enabled.""" + self._page_counter += 1 + # -- only emit page-breaks when enabled -- + if self._include_page_breaks: + yield PageBreak("", detection_origin=DETECTION_ORIGIN) + + @lazyproperty + def infer_table_structure(self) -> bool: + """True when partitioner should compute and apply `text_as_html` metadata for tables.""" + return self._infer_table_structure + + @lazyproperty + def last_modified(self) -> Optional[str]: + """The best last-modified date available, None if no sources are available.""" + # -- Value explicitly specified by caller takes precedence. This is used for example when + # -- this file was converted from another format, and any last-modified date for the file + # -- would be just now. + if self._metadata_last_modified: + return self._metadata_last_modified + + if self._file_path: + return ( + None + if is_temp_file_path(self._file_path) + else get_last_modified_date(self._file_path) + ) + + if self._file: + return ( + get_last_modified_date_from_file(self._file) + if self._date_from_file_object + else None + ) + + return None + + @lazyproperty + def metadata_file_path(self) -> str | None: + """The best available file-path for this document or `None` if unavailable.""" + return self._metadata_file_path or self._file_path + + @property + def metadata_page_number(self) -> Optional[int]: + """The current page number to report in metadata, or None if we can't really tell. + + Page numbers are not added to element metadata if we can't find any page-breaks in the + document (which may be a common case). + + In the DOCX format, determining page numbers is strictly a best-efforts attempt since + actual page-breaks are determined at rendering time (e.g. printing) based on the + font-metrics of the target device. Explicit (hard) page-breaks are always recorded in the + docx file but the rendered page-breaks are only added optionally. + """ + return self._page_counter if self._document_contains_pagebreaks else None + + @property + def page_number(self) -> int: + """The current page number. + + Note this value may not represent the actual rendered page number when rendered page-break + indicators are not present in the document (not uncommon). Use `.metadata_page_number` for + metadata purposes, which is `None` when rendered page-breaks are not present in this + document. + """ + return self._page_counter + + @lazyproperty + def _document_contains_pagebreaks(self) -> bool: + """True when there is at least one page-break detected in the document. + + Only `w:lastRenderedPageBreak` elements reliably indicate a page-break. These are reliably + inserted by Microsoft Word, but probably don't appear in documents converted into .docx + format from for example .odt format. + """ + xpath = ( + # NOTE(scanny) - w:lastRenderedPageBreak (lrpb) is run (w:r) inner content. `w:r` can + # appear in a paragraph (w:p). w:r can also appear in a hyperlink (w:hyperlink), which + # is w:p inner-content and both of these can occur inside a table-cell as well as the + # document body + "./w:body/w:p/w:r/w:lastRenderedPageBreak" + " | ./w:body/w:p/w:hyperlink/w:r/w:lastRenderedPageBreak" + " | ./w:body/w:tbl/w:tr/w:tc/w:p/w:r/w:lastRenderedPageBreak" + " | ./w:body/w:tbl/w:tr/w:tc/w:p/w:hyperlink/w:r/w:lastRenderedPageBreak" + ) + + return bool(self.document.element.xpath(xpath)) + + @lazyproperty + def _docx_file(self) -> str | IO[bytes]: + """The Word 2007+ document file to be partitioned. + + This is either a `str` path or a file-like object. `python-docx` accepts either for opening + a document file. + """ + if self._file_path: + return self._file_path + + # -- In Python <3.11 SpooledTemporaryFile does not implement ".seekable" which triggers an + # -- exception when Zipfile tries to open it. The docx format is a zip archive so we need + # -- to work around that bug here. + if isinstance(self._file, tempfile.SpooledTemporaryFile): + self._file.seek(0) + return io.BytesIO(self._file.read()) + + if self._file: + return self._file + + raise ValueError( + "No DOCX document specified, either `filename` or `file` argument must be provided" + ) + + +class _DocxPartitioner: + """Provides `.partition()` for MS-Word 2007+ (.docx) files.""" + + def __init__(self, opts: DocxPartitionerOptions) -> None: + self._opts = opts @classmethod - def iter_document_elements( - cls, - filename: Optional[str] = None, - file: Optional[IO[bytes]] = None, - metadata_filename: Optional[str] = None, - include_page_breaks: bool = True, - infer_table_structure: bool = True, - metadata_last_modified: Optional[str] = None, - date_from_file_object: bool = False, - starting_page_number: int = 1, - ) -> Iterator[Element]: + def iter_document_elements(cls, opts: DocxPartitionerOptions) -> Iterator[Element]: """Partition MS Word documents (.docx format) into its document elements.""" - self = cls( - filename=filename, - file=file, - metadata_filename=metadata_filename, - include_page_breaks=include_page_breaks, - infer_table_structure=infer_table_structure, - metadata_last_modified=metadata_last_modified, - date_from_file_object=date_from_file_object, - starting_page_number=starting_page_number, - ) + self = cls(opts) # NOTE(scanny): It's possible for a Word document to have no sections. In particular, a # Microsoft Teams chat transcript exported to DOCX contains no sections. Such a # "section-less" document has to be interated differently and has no headers or footers and @@ -452,37 +565,7 @@ def iter_row_cells_as_text(row: _Row) -> Iterator[str]: @lazyproperty def _document(self) -> Document: """The python-docx `Document` object loaded from file or filename.""" - filename, file = self._filename, self._file - - if filename is not None: - return docx.Document(filename) - - assert file is not None - if isinstance(file, tempfile.SpooledTemporaryFile): - file.seek(0) - file = io.BytesIO(file.read()) - return docx.Document(file) - - @lazyproperty - def _document_contains_pagebreaks(self) -> bool: - """True when there is at least one page-break detected in the document. - - Only `w:lastRenderedPageBreak` elements reliably indicate a page-break. These are reliably - inserted by Microsoft Word, but probably don't appear in documents converted into .docx - format from for example .odt format. - """ - xpath = ( - # NOTE(scanny) - w:lastRenderedPageBreak (lrpb) is run (w:r) inner content. `w:r` can - # appear in a paragraph (w:p). w:r can also appear in a hyperlink (w:hyperlink), which - # is w:p inner-content and both of these can occur inside a table-cell as well as the - # document body - "./w:body/w:p/w:r/w:lastRenderedPageBreak" - " | ./w:body/w:p/w:hyperlink/w:r/w:lastRenderedPageBreak" - " | ./w:body/w:tbl/w:tr/w:tc/w:p/w:r/w:lastRenderedPageBreak" - " | ./w:body/w:tbl/w:tr/w:tc/w:p/w:hyperlink/w:r/w:lastRenderedPageBreak" - ) - - return bool(self._document.element.xpath(xpath)) + return self._opts.document @lazyproperty def _document_contains_sections(self) -> bool: @@ -524,12 +607,6 @@ def iter_hdrftr_texts(hdrftr: _Header | _Footer) -> Iterator[str]: return "\n".join(text for text in iter_hdrftr_texts(hdrftr) if text) - def _increment_page_number(self) -> Iterator[PageBreak]: - """Increment page-number by 1 and generate a PageBreak element if enabled.""" - self._page_counter += 1 - if self._include_page_breaks: - yield PageBreak("", detection_origin=DETECTION_ORIGIN) - def _is_list_item(self, paragraph: Paragraph) -> bool: """True when `paragraph` can be identified as a list-item.""" if is_bulleted_text(paragraph.text): @@ -581,7 +658,7 @@ def iter_paragraph_items(paragraph: Paragraph) -> Iterator[Paragraph | RenderedP if isinstance(item, Paragraph): yield from self._classify_paragraph_to_element(item) else: - yield from self._increment_page_number() + yield from self._opts.increment_page_number() def _iter_paragraph_emphasis(self, paragraph: Paragraph) -> Iterator[dict[str, str]]: """Generate e.g. {"text": "MUST", "tag": "b"} for each emphasis in `paragraph`.""" @@ -616,7 +693,7 @@ def iter_footer(footer: _Footer, header_footer_type: str) -> Iterator[Footer]: text=text, detection_origin=DETECTION_ORIGIN, metadata=ElementMetadata( - filename=self._metadata_filename, + filename=self._opts.metadata_file_path, header_footer_type=header_footer_type, category_depth=0, ), @@ -645,7 +722,7 @@ def maybe_iter_header(header: _Header, header_footer_type: str) -> Iterator[Head text=text, detection_origin=DETECTION_ORIGIN, metadata=ElementMetadata( - filename=self._metadata_filename, + filename=self._opts.metadata_file_path, header_footer_type=header_footer_type, category_depth=0, # -- headers are always at the root level} ), @@ -668,7 +745,7 @@ def _iter_section_page_breaks(self, section_idx: int, section: Section) -> Itera """ def page_is_odd() -> bool: - return self._page_counter % 2 == 1 + return self._opts.page_number % 2 == 1 start_type = section.start_type @@ -682,14 +759,14 @@ def page_is_odd() -> bool: # -- on an even page we need two total, add one to supplement the rendered page break # -- to follow. There is no "first-document-page" special case because 1 is odd. if not page_is_odd(): - yield from self._increment_page_number() + yield from self._opts.increment_page_number() elif start_type == WD_SECTION_START.ODD_PAGE: # -- the first page of the document is an implicit "new" odd-page, so no page-break -- if section_idx == 0: return if page_is_odd(): - yield from self._increment_page_number() + yield from self._opts.increment_page_number() # -- otherwise, start-type is one of "continuous", "new-column", or "next-page", none of # -- which need our help to get the page-breaks right. @@ -699,7 +776,9 @@ def _iter_table_element(self, table: DocxTable) -> Iterator[Table]: """Generate zero-or-one Table element for a DOCX `w:tbl` XML element.""" # -- at present, we always generate exactly one Table element, but we might want # -- to skip, for example, an empty table. - html_table = self._convert_table_to_html(table) if self._infer_table_structure else None + html_table = ( + self._convert_table_to_html(table) if self._opts.infer_table_structure else None + ) text_table = " ".join(self._iter_table_texts(table)) emphasized_text_contents, emphasized_text_tags = self._table_emphasis(table) @@ -708,9 +787,9 @@ def _iter_table_element(self, table: DocxTable) -> Iterator[Table]: detection_origin=DETECTION_ORIGIN, metadata=ElementMetadata( text_as_html=html_table, - filename=self._metadata_filename, - page_number=self._page_number, - last_modified=self._last_modified, + filename=self._opts.metadata_file_path, + page_number=self._opts.metadata_page_number, + last_modified=self._opts.last_modified, emphasized_text_contents=emphasized_text_contents or None, emphasized_text_tags=emphasized_text_tags or None, ), @@ -753,41 +832,6 @@ def iter_cell_texts(cell: _Cell) -> Iterator[str]: # -- do not generate empty strings -- yield from (text for text in iter_cell_texts(_Cell(tc, table)) if text) - @lazyproperty - def _last_modified(self) -> Optional[str]: - """Last-modified date suitable for use in element metadata.""" - # -- if this file was converted from another format, any last-modified date for the file - # -- will be today, so we get it from the conversion step in `._metadata_last_modified`. - if self._metadata_last_modified: - return self._metadata_last_modified - - file_path, file = self._filename, self._file - - # -- if the file is on the filesystem, get its date from there -- - if file_path is not None: - return None if is_temp_file_path(file_path) else get_last_modified_date(file_path) - - # -- otherwise, as long as user explicitly requested it, try getting it from the file-like - # -- object (unlikely since BytesIO and its brethren have no such metadata). - assert file is not None - if self._date_from_file_object: - return get_last_modified_date_from_file(file) - return None - - @property - def _page_number(self) -> Optional[int]: - """The current page number, or None if we can't really tell. - - Page numbers are not added to element metadata if we can't find any page-breaks in the - document (which may be a common case). - - In the DOCX format, determining page numbers is strictly a best-efforts attempt since actual - page-breaks are determined at rendering time (e.g. printing) based on the fontmetrics of the - target device. Explicit (hard) page-breaks are always recorded in the docx file but the - rendered page-breaks are only added optionally. - """ - return self._page_counter if self._document_contains_pagebreaks else None - def _paragraph_emphasis(self, paragraph: Paragraph) -> tuple[list[str], list[str]]: """[contents, tags] pair describing emphasized text in `paragraph`.""" iter_p_emph, iter_p_emph_2 = itertools.tee(self._iter_paragraph_emphasis(paragraph)) @@ -842,12 +886,12 @@ def _paragraph_metadata(self, paragraph: Paragraph) -> ElementMetadata: category_depth=category_depth, emphasized_text_contents=emphasized_text_contents or None, emphasized_text_tags=emphasized_text_tags or None, - filename=self._metadata_filename, - last_modified=self._last_modified, + filename=self._opts.metadata_file_path, + last_modified=self._opts.last_modified, link_texts=link_texts or None, link_urls=link_urls or None, links=links or None, - page_number=self._page_number, + page_number=self._opts.metadata_page_number, ) element_metadata.detection_origin = "docx" return element_metadata