From ef4d7b7a132e718e43a3a71d4af1e1e6ac877a21 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Thu, 17 Aug 2023 15:36:42 -0400 Subject: [PATCH 1/3] Run pre-commit hook --- apis/python/src/tiledbsoma/_dataframe.py | 11 +++++++---- apis/python/src/tiledbsoma/io/conversions.py | 2 +- apis/python/src/tiledbsoma/io/ingest.py | 7 ++++--- apis/python/tests/test_type_system.py | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 083ba7f626..5ac943546f 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -6,9 +6,10 @@ """ Implementation of a SOMA DataFrame """ -from typing import Any, Optional, Sequence, Tuple, Type, Union, cast +from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union, cast import numpy as np +import pandas as pd import pyarrow as pa import somacore import tiledb @@ -133,7 +134,9 @@ def create( platform_config: Optional[options.PlatformConfig] = None, context: Optional[SOMATileDBContext] = None, tiledb_timestamp: Optional[OpenTimestamp] = None, - enumerations: Optional[dict[str, Sequence[Any]]] = None, + enumerations: Optional[ + Dict[str, Union[Sequence[Any], np.ndarray[Any, Any]]] + ] = None, ordered_enumerations: Optional[Sequence[str]] = None, column_to_enumerations: Optional[dict[str, str]] = None, ) -> "DataFrame": @@ -400,8 +403,8 @@ def write( _util.check_type("values", values, (pa.Table,)) del platform_config # unused - dim_cols_map = {} - attr_cols_map = {} + dim_cols_map: Dict[str, pd.DataFrame] = {} + attr_cols_map: Dict[str, pd.DataFrame] = {} dim_names_set = self.index_column_names n = None diff --git a/apis/python/src/tiledbsoma/io/conversions.py b/apis/python/src/tiledbsoma/io/conversions.py index 00e0a62804..13359e1369 100644 --- a/apis/python/src/tiledbsoma/io/conversions.py +++ b/apis/python/src/tiledbsoma/io/conversions.py @@ -89,4 +89,4 @@ def csr_from_tiledb_df(df: pd.DataFrame, num_rows: int, num_cols: int) -> sp.csr return sp.csr_matrix( (df["soma_data"], (df["soma_dim_0"], df["soma_dim_1"])), shape=(num_rows, num_cols), - ) \ No newline at end of file + ) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 306f6710f4..093c5babf5 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -13,6 +13,7 @@ import time from typing import ( Any, + Dict, List, Mapping, Optional, @@ -739,7 +740,7 @@ def _write_dataframe_impl( try: soma_df = _factory.open(df_uri, "w", soma_type=DataFrame, context=context) except DoesNotExistError: - enums = {} + enums: Dict[str, Union[Sequence[Any], np.ndarray[Any, Any]]] = {} col_to_enums = {} for att in arrow_table.schema: if pa.types.is_dictionary(att.type): @@ -749,7 +750,7 @@ def _write_dataframe_impl( else: enums[att.name] = cat col_to_enums[att.name] = att.name - + soma_df = DataFrame.create( df_uri, schema=arrow_table.schema, @@ -1948,4 +1949,4 @@ def to_anndata( logging.log_io(None, _util.format_elapsed(s, "FINISH Experiment.to_anndata")) - return anndata \ No newline at end of file + return anndata diff --git a/apis/python/tests/test_type_system.py b/apis/python/tests/test_type_system.py index d6bc34f946..951911a60a 100644 --- a/apis/python/tests/test_type_system.py +++ b/apis/python/tests/test_type_system.py @@ -31,7 +31,7 @@ (pa.binary(), pa.large_binary()), (pa.large_string(),) * 2, (pa.large_binary(),) * 2, - (pa.dictionary(pa.int32(), pa.string()), pa.int32()) + (pa.dictionary(pa.int32(), pa.string()), pa.int32()), ] From e0846eb1bc1d5dd2250c2529a2c190a2133deec2 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Aug 2023 18:34:53 +0000 Subject: [PATCH 2/3] [python] Expand unit-testing for enumerated types --- apis/python/devtools/outgestor | 2 +- apis/python/tests/test_basic_anndata_io.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/apis/python/devtools/outgestor b/apis/python/devtools/outgestor index df29fd9a74..d5961a57dc 100755 --- a/apis/python/devtools/outgestor +++ b/apis/python/devtools/outgestor @@ -62,7 +62,7 @@ def main(): "--var-id-name", help="Which var column name to use as index for outgested andata", type=str, - default="obs_id", + default="var_id", ) parser.add_argument( "paths", diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 2af4e340f8..fc0eb32c83 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -1,3 +1,4 @@ +import math import pathlib import tempfile from pathlib import Path @@ -476,3 +477,18 @@ def test_null_obs(adata, tmp_path: Path): # of the Pandas data frame for k in adata.obs: assert obs.attr(k).isnullable == adata.obs[k].isnull().any() + + +# There exist in the wild AnnData files with categorical-int columns where the "not in the category" +# is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest +# this. +def test_obs_with_categorical_int_nan_enumeration(tmp_path, adata): + output_path = tmp_path.as_uri() + + # Currently getting float not int here, failing to repro the problem + s = pd.Series(list(range(len(adata.obs)))) + s[0] = math.nan + adata.obs["categ_int_nan"] = s + + output_path = tmp_path.as_posix() + tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") From 22368cf5729630d0c49277f69acaf028bf0f51ab Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Aug 2023 19:13:14 +0000 Subject: [PATCH 3/3] used pre-prepared input for categorical-int-nan data --- apis/python/testdata/categorical_int_nan.h5ad | Bin 0 -> 60864 bytes apis/python/tests/test_basic_anndata_io.py | 33 +++++++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 apis/python/testdata/categorical_int_nan.h5ad diff --git a/apis/python/testdata/categorical_int_nan.h5ad b/apis/python/testdata/categorical_int_nan.h5ad new file mode 100644 index 0000000000000000000000000000000000000000..f8b5af115fe497c68e8ff5cffa756f1c0a5bbe34 GIT binary patch literal 60864 zcmeGl2|$!Z_ggMeQ8AD3C^1d(Lck;M@8c3wL_ky&e`SRgP*`wT6ps?Lyi5PGJj&AY zEK4*s%@Qh8D=qIU54`aNyhYUiy>DjLWpG{5UA4b&VD`s+B+ z&2j=dM@PXM6G92HFZpUPzaORGT`SwE)96!0&x0PBAdw7jkq z2GVhvnIT54S7SI{8xtR`NkN#b*Rehz2&Embdj|LL!#RX@6(NVt8*ILDv?DpYQ9UGr z3R_Q+0OQN2xbs;OkNpyz$7BK-Uj)U|rjmG90)(xbi}S_pCwM-Mr)>W?5a8P*NG>}y z2(yDcK0#^JTCH5AbUGL0@de^N+&$er0MhCZ$O1wTqEmDO6V-ZMd>SzYt)3F;n4AiP zO-Rt_^%|Xfe2hk`Pk}hrtQd^dC8Z_NF|to{)~U6Go3#>ou_h zp?2O*$E0OVhVqlbfFZ}!X0RiEdGh{)^|8faBY91aSlR8n zrZ8`EbfP(y9SQ*X*W~Ds)H@usLmhm9UW=n6F#yVs3ZVUK^K{4^>kmzY>0jaLU|lF8 z`!9K5X zbktjJfS|oEq_4-(WxcU#&<>EkK1T=n@P`&4^w&8$YK4|(_1A!-LrSQ7xsgK=1fd~E zN2|-N8w&#Zx$$%;0Ok)eWcxPa=s*D~e9Em*5E^rIxnam{5Lzn8Z^F^Rcu+D>63AFT zO*uMdj0MC}1VH^X#re}9gSgAdT41ppt=JM(mG8<;f;0R7-!c1(Y)c5L6+g0cPr zIXdfKa`l1yE*u>)ht$}{P=+9M<>)9Kc(9v5KoGi>prZxgAJoV88N|`itSA}>VU*vU zqZ8Sv0geEI5X{j*Ih=seIABCU=)uvken5Nd!XQeZyGNt`4RmZ8SP0ZVluHlhCFa8# z0(uxnhh*?CHvteM2t7GE8V@)?Ni3!yfC{o>^&?llY++JAy?8oikJf?II6n2}=#Z&A z{=g^&T!PStr_0SxwjU@W2oW3|iiHCl3I{a>p)W^A5|{vuBh&kFbUB&K1BHb2ksMv_ z8Cd^vKoI(KbWjSK;Dh@44@UZ)vxmEv zy#9{j;#mSU7tiLzwK?SbB*X zf}Q)@0v%Z^yNZVVT9y}D-lqnzhghWB*b=dhRGzJkO*Y-j;S~c>Rn~zPkm~ci0Xf&Q zH7FQI%wbY3R$uurS3JrwF&?agKZ^PPfQIMRs)yYhu>plEWBzM2g3{SN6l2S03w;{F z=td2Ru_{opfMNl~0*VC`3n&)&hgcw~(!pE0OvUGN6=SD&^xTovd2XGPS;zKg{aJ12%Zn@io%P}Im;P9?et%Z* zscBV??LUxpYii3`-?lrHmGf?X<7o-|vW~`X|H#vEZ`Pfzzc7CoAF9Hpe69bFy4Bviu_c-jl`a*Ese_J-b5} z#i!M8^4L$+%?oWLOY3*~e7O(n&o-^!v^vtxF|*4^-z@a@J9zts_^j>6*KdcigTC}R zdsVb{{RW;`ad5+dJED(gYPZ;FS4C6x%j@@|wLe9_hxgyhp~-zWY!D=K{r9g^Emn=s zy!_Pt*IMI?Qoj$c-^OqBjvT%HuE^K#k>B4N+O1thiJw2t>{@j2`1vd1i`svit7;sX z`)h%i5k6sA^;Lx>#=o%>4}SOct5+;GK40zAG=1W$#{nL!)9ZK5odF!WbA3e5d!)v8(e|G2V zY;1kWR}9P5$=dVp*BIsJ@ek{ee1ck zKf5o}CeGED>>q6X7dm-JPKD+74Ep7c?^Cr~V|(d&x4%!O+E)AQqk>YmKWnd_S^YLU zu<6e$<*$e?XTEg1cKw>ce&*Kq2Q%HgPUg`KDK_=vft%<%hq)%xGJvGc5d|Hg`r=T3J~ zC5=wrwE1n%`{L+N`>j5hzUzf)f4kqA(mVM5n_^;P)xZxb6pF`Ncdh&G#$zvxe(rzT zCEriGD{kLnmk_-6uA%*zTEC1=YWsPOH_usYeK2k1kLUd23rei7*#38k@N?1C3iri( zP6c^YHP^&Z@8!>{b@TEIW4}eyyZhYH-xtS6HVAgDW8Qw28lSUP?j2CQs!B39zZI@r zmytEsMoL(`_L#QUeX;vt{W~*)FC{>$%=KB?C$N>eG%V)|L?e2v+Ut*=at zf0n9W{`~J|^vM$$t}1Cr#MgZfSF%z#!ClO z(%B2c`yXm-dVG0a_3Lz@#gG0fTPfqhzMIn{9V~9Y-mB`hnK`sbEbab$fcE&kIr?kj zyZy`kFrcy^_1CXUc51J(xPGSQ4|C&lwcvtn<(d?VM>VRdXNCzS`Y%6!G+C0+dC#4T z;%_xFmd*;W-ueCEoK`^A%~*SyXQ9 z){A1putualspsG6 zBOBjZv-gV0{Tu7Q>$jvibxJrZy0lmSwJh<)*&hwj?Hci)Id=@}=jtaC8_ui!;?(c# z)^GiCxOToc{jbw`&gHI%TYJ5KQW#Ql{#ek}zf!1=gUR}R()EkE{=w>ZW}bFP!0hv4 zqb29UqN|@3XKcx@z3siThW2Oa`6FiIo!DB#?~CU2JN?bMuaE7@5!bISxEWb1U#vAU zv&=i{lVZE>YsXita@wHZnaJ4Rg11}Gel=^1xqi9ubcykARm;upzx$sNPjCJ;=d=8L zvFp-bH#KTAm@Ip76}|W9^Ua-)8Q1FHK|L$jIVYf^rqNh+gp9|GR~H$UtYf|y>2wB zoOMGy@0xS*=Q(%9=nE~+?!Q)EGS@%WPt^U?Ztc}lkI(()d>687>Uopnb5ZL9t;XIx zW2oOXzd219yX1zrHhRSO?O`8v<-bb~1+=VURzGWxFC)LU8Q0bMn5c^$-Za+rns~ED zj-Py9+_wA3p1yw;h|Oxh zb2aa3f!NyS{>r)^`&D`J)3$4B<@eSY`nZ2x;nJgXhm=)QRU&guN?%?ri3 zQIX62n_duuY7Lq;CI9j7f6et@vFOJ`+kd(%miGD1&jSBe7i_&EUj4={^^3LF#kGZl z7S7GOARbKr`nqrS_2=F{`0;s#?z7e-Dqj_YU;gCtYQNnO&rO-OqkV{j$@ceguTzET zlk!A!_A__C@UcVAx*=7Mml$7|e!~X!uQ~bjW%2m@8?Cp@HGTiPEV)7JLBs5%a@Y0x zgA?tg3)+gW4$XFuzBm~6?ZUbC(&AY+s-1tuUh2E+r3CMuWu&pO?eBDGUtW60$?@iu zxen5q)lb_O!(?&JHzw4C+3U@n?Bo4YJVy$;_I3=5^H~p4qr?7VWyMx z)6$;5*C;Aq;`uAqKbZa4`oP@zP*_OcA^(|YC*3&T_(+pD2dVM$)0ejoc9e!Jh&|DJ zqn*@d(tgkHo0pN69OIdwA0S4{YtCf zE1}oEuH=UyW+RQa?cjqNei}TZ`9kTixHIcP_G%c6Hp{bIdXaspD&J zMh}0}-k={lzi++q$hRIR9i@c8kS-dVveKp4Id7eqSIN+Snf=O^$sWG;l|qZzZ~2ti zxvvejlT^Dacx5iMmGZn6%yzqEC!NZ#I<)ABN?OtTV28W=${P9y>%aB7E%<0kcZU-C zTJHzHt zUFC1OJmUMu?6)lNE6wa{*^p+&3;7Kh^8ML|{O+TP@6UR$AF80GVc**}D(DMYPDLrj z0*VC`3n&&)ETCB6=`0}s?h1&&pJKszAAe&8f4-#*c>J9V{0$)d{Zsr6A^e@53gGc~ z!SFXs@pnurgMSG;`%O_ro?<_xy(s-d84r~4RGB}N`BGUQ;G1xi^_8-oRMyYReu2C@ zMHvs2{iU*>R?ZJ#SwMdGR#_h?=S|QCl=C&^JW)A6RnAY9^V4T@eroRdkx-nV!9DmL z`%*tYBGGp+peUoqD*jo|kHAYCAL^3Ri$^ni85OiRgMuj*P%NNWK(T;g0mTCU7Z$*G z0=a?5cP55_j{%Qso@wCcgI^7PEBM3U^TFG}TEYc9$AF&!ej)gE;J1O# z0e=m=BOI942JZph4}1jp1n}wLXM$e}{s-_oz@Gwt3%nCN(1P!KX$d|Me1GuC;NJ#6 z8~k$c+2Hqp&jnuu-We7y^})9R9|S%MydL}{@SlLs1iuOV0q_^WvH&O9A1yZ>vQ<)9NvJ#8*(^~v&Q9Z z#NmxOya|UlXF*$P0l_-f)dBB|b?j6XC<0 zT$p|hWkHoyaO;41;<$p2y+1L3D#1OdLioKJv;;c`Vvumby5v0)ilPy>mXQFKPjUbM z76|nBmK$g$E$So&KOq5YwBJ||&Dv}FI+9N|0nlHz${`gztz8vbcXU!hYNEDTk}gK0 zLxRbELIwub^$ZZtqZ#4^L1UcVV>3EFFOKU5h9fUNGtO}AgYq!ph>;|0OvmShjB#Xh z5+wK}{T0WPf$`cHP1+;MA#)ec2W6@tt=y$)zvkatN=BIn@84$cEj48?!}kVlh*-mW zOI52gs0#9sL&Hx!E~4$39r3Gstp3xclN=%lfKOLL=urXk-coyNUD&sL{3veNr%|%Y z;z#q*suf({eKa3bq3qf$A&^#!5JmBHI*vvWvJmlUalUkl&!0~jo)fS}|HgNOtDvW# zO`NG=%(quF_Y*cYvLvH+I)Ru>Vy2hTr=7p$Mo$|-6iBp&ebgZLN&BLa>`P$} z0&`*slxTc<7lLO;P(CU*o65_HqV%3LT_l0?bp%F@r1%g5a}o&5A44G1J6TU~fzoFT zC%7|(*;>lqpTPN4&nODhqe*;v45bexFlP{rr*_Fsp*ZD_QWIRDdZ+slTpC2<`w^H< z)2H{Lcszm3E*>KY?vX|ylgsFtqe;B;C`unophU|pQF}4I*^>y}bv%JN!ziS7lco~f zZ32OT6A4VG@$x{fkKJJQ@f|76FCws-+XGP>_H^nHBrL-BJT0Lz_$U9 z`#TTtxbJO7VIzRJukHvQ!!^Mpttt4rG~5Ir(!9ZU1&=atuh|`ZWAL58V;(`^L%_EO zk9$qb59K01>f#F?X&8^P@cahpn3pSf8+FUEy}$2X^h!}3HX<9L<`7Q= z!-Q)7lgTm9NzMdpfYF>c*5grO%oAljjx*+C`Sa;mUPxzQ9%33s=bVDG%N-P15>ox zfjUi!yN7!lAwi?pYji|H3M7fuC8Z`qz?cu$2hnQ1CKfW)57er)fV0MAPJ8ROZ($ZG zsrBkF4l=Mt`;M3GNcN#Fq!5O7;7z_I&ZdJizinSr5(?`SY4<6G>jU>v`ZF zl;g?nTkKI3?UX|x$n16C3z7$`FS1)AagXy!&KGS!9&sKEADxPg3+W3U&G&DkT4P)v z3Pxn}9zKtQjz3>yYhMPR{5;%m63M}p0Q&i~?bZq9&_;vvy*2EV1ajFuCx71jwECAf z(m&Zs`VRxSte&lRjx5c_q6qR6T_l8`Pir6JaY-l3BjcVW?}2fiVfJxBrNOcBPx^}B zaRiuM->h3s7}+>RlPTa|VgXa@oB7L%8^F@*8#2$@(WfNf>~&u8{CLcjM&!@nDaCOQ zibl*W9?xUWG-Ahep7PJaS-;Qzj_93D)qgOK{ZrP7>2rwcWW1!su;_S3yQkmR z3MzX!+S4uZUo48d>F(#Hgru0kniU(h<9=5+zF8H%rMC2B|P&( zhxc^oS`Z-L<6#i*{m`{0K)&O{U>l0JrLY}^cvpxH?+MX$BtX7z#GpuVi9&A*eJJ#$ z(2qiY3OiF6Kw%(-ct41)D*^HyBL>-BAl^Ho3noD5K_RxQxGe<1HiGSo;n;SJacooA z-WVO*oH34V58ErFV;E&J9P=`!qik&FjE-&En2xeB4cnKE}Z6Sr?eS4NsU2VCD;1kQO?BJuad!Lr*- z`SuVPR81o3Rg|xQ^6jJWqA1IovzNl1H2q;p-$mh{v^iZ7i9A~>Z-+Q<%c&m=7Q7}! zrF>a_N=8?yT|$%npH$UKf1tYkQ=}}-WV`a!Bq`%^=GXmGPKf%6iKplGED(PvP|bS1 zFhRQ5p+VleJwl{DJNJ(adaJp#SU94NTz^CSaOISZf41K~;_V7A z=M~%&KU}!w)r7i|lzZcyPwXZ}OGR!ys~4P#lgtU-~X})d^9I%&7J8%|bDA#iZ2pi%*Kt?-mUh<99?n(6))M%k8?-;=yZ<)vnq` ziur7@v}Db3(Y8)x(vqes>F4`XM}6A0Q0)Ik&`&cL-4aL5+&XOG$y;LMelrSc7v2`j z=cz_a7$QhVSM+kt*>+3p5IXd)ow>Hs&rnJ-=F-c?#rhN#Zk2~r_64ATlDOH zW{tW*p*TKZ>{7RHio{tPy}n)0vrt?(Xm$HHyojf6jt zCR4z_%L42iD3cZ&`y9G9n+UXI`(^7MT-&q#jpJsLPc{M2UuMgpC;dKvyxsx{49Z=R z_Q!OHm-j0a9}Bp=@1XcwfXn*~ijU#=-;{9)K1u(3(&zg#anHx*`_#1JloF+KR=o;kdobgc{HD;sn+PPxR*EFJK#c4#$5=c-@{?;ltI>BOaJ`3 z+qbe{VLk!$^J(X091mD~#kOM&JK-6}v$NAytJ?|BYOG-=Jo|Zec8XgK*@2rcf3got zwkK@!*02-KP3F!sS$|{j$@`t-8e*II1kg`Q+6mA0*gUax1IeJY^8{0lJ+C)BMem-d zksqOxNr(?u>*CegXbnaf18+r`uB zkNYq--s73EHT16qa#e7GhxJ8f!w2nq(m{v>mo0z7cP!}lZdrS$OH9F;o{dsASr&$i z*CwNu9tQaK2$I__PI};2JI47CU2i{cUp$M(vqyAz9*Hg>IH)f|`Fs-~o=c+h?iS{W z2^b%PzJ8v*NM~~KTozqeh^J>WueKhYy*%78A0~&T3kmU&i;8En=vaOsomGMR(8L^(xvEi_-g@bolZRxC9!)Cmd?Z7%iROwsre1|VF}p12+z;%NmxAN$GH<7 zi)ZrL_`%{CKh7cOSUlrr_be=)@w0mu7SH%u`)2WszXlOsLO#2vVf7TF)~g5W)QK9( zk9`LnlgIL5_c^S-W8$Or$j9z?9`Jk0c4hZHJbxWb1diu-m17^YUnajUjpzAaGr?ca z1b=-K{I8qfZ(xGIp$UE(EqJg7$RZS|SU|CWVgbbhiUkx4C>BsGpjbe$z`x!Cth z_r3r1hEWV4TflJswx1Xze=~tkd;W&|ad!TOdwOe}zu}pHsq;6-BcvSJ1kmrpDE)w% zLjjxxOr6hW9xl$v&QIk-%i^Rb!dU-HK17nwp!r#{|KS;#HToZ(v6}Up}2LoN0cR?0>FM1Z(s^JhL{{{~S*N5nQ(X$$sxbng6Ic z6u?=)RR7C7S)9>Y{V(}A$!7-5&yxKQ?{KWq|L_jTRR5cFmXs%(0A>E8c2K~>Enuqu zdC+#mQdz72<*64qDr)sMI373e)vI$W7AGL!59&Q0s{V(%;aehnp zKPH_$7nC$8MUH}J*klNJA_f4LA>hdrmtlf>B)nKc5K^@y9n&ugnXKU0@34DN*<0zl zndQr`qXQ{lU@i$TzGRBep!Q|Ymq_c2^YPDxxiNWkzig1l?VsTh9@zyRC(yu0akkE& ZY|k7R_Whh4EIIDtvnAFTck$Vj{{ctiQY!!e literal 0 HcmV?d00001 diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index fc0eb32c83..93f64ecfdd 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -1,4 +1,3 @@ -import math import pathlib import tempfile from pathlib import Path @@ -39,6 +38,24 @@ def h5ad_file_uns_string_array(request): return input_path +@pytest.fixture +def h5ad_file_categorical_int_nan(request): + # This has obs["categ_int_nan"] as a categorical int but with math.nan as a + # "not-in-the-category" indicator. Such H5AD files do arise in the wild. + # + # Reference: + # import anndata as ad + # import pandas as pd + # import math + # adata = adata.read_h5ad("whatever.h5ad") + # s = pd.Series(list(range(80)), dtype="category") + # s[0] = math.nan + # adata.obs["categ_int_nan"] = s + # adata.write_h5ad("categorical_int_nan.h5ad") + input_path = HERE.parent / "testdata/categorical_int_nan.h5ad" + return input_path + + @pytest.fixture def adata(h5ad_file): return anndata.read_h5ad(h5ad_file) @@ -482,13 +499,11 @@ def test_null_obs(adata, tmp_path: Path): # There exist in the wild AnnData files with categorical-int columns where the "not in the category" # is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest # this. -def test_obs_with_categorical_int_nan_enumeration(tmp_path, adata): +def test_obs_with_categorical_int_nan_enumeration( + tmp_path, h5ad_file_categorical_int_nan +): output_path = tmp_path.as_uri() - # Currently getting float not int here, failing to repro the problem - s = pd.Series(list(range(len(adata.obs)))) - s[0] = math.nan - adata.obs["categ_int_nan"] = s - - output_path = tmp_path.as_posix() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") + tiledbsoma.io.from_h5ad( + output_path, h5ad_file_categorical_int_nan, measurement_name="RNA" + )