From 74bab7c49becc408c181602580434e6079441f2e Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:58:18 +0100 Subject: [PATCH 01/29] coxph_forestplot --- .../docstring_previews/coxph_forestplot.png | Bin 0 -> 18530 bytes ehrapy/plot/__init__.py | 2 +- ehrapy/plot/_survival_analysis.py | 102 ++++++++++++++++++ .../coxph_forestplot_create_expected.ipynb | 86 +++++++++++++++ tests/conftest.py | 4 + .../_images/coxph_forestplot_expected.png | Bin 0 -> 17755 bytes tests/plot/test_catplot.py | 15 ++- 7 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 docs/_static/docstring_previews/coxph_forestplot.png create mode 100644 tests/_scripts/coxph_forestplot_create_expected.ipynb create mode 100644 tests/plot/_images/coxph_forestplot_expected.png diff --git a/docs/_static/docstring_previews/coxph_forestplot.png b/docs/_static/docstring_previews/coxph_forestplot.png new file mode 100644 index 0000000000000000000000000000000000000000..82e72b892d14bad910ff79f1737b80796dc196b5 GIT binary patch literal 18530 zcmdVC2{hL2zc+eIMaWPYQW8pKRvJ(gA{3cr9vZl%GK9!flnhNmk}~)k_WZeB zV$xy~BAacToi8}ah>P3*`y0g0JDw4jT7IMeAF}GgkrPf73X=x;L*wllEI^^CG^#2c z(sg?{^7)eCz5WIHv8f7)U7G_QGX(hbRVJ({I(_X*pif}9Wm-UF;C*#V>M23dz~uhi zvIG~$V=Fq?SGY>a^7l;2iuHvT^#n391XRq=Rs{|2TV3z7%V*Nd^u0-4acrZfe5{At zP{Z{z#itbLY54zs9gs{NULLUYHPl*L8T$9zi9r8tY)fCm?2MX!-`On85u>#9m1rQc z|HjfAk7OIVtyy~C9kblAEq~uwYSpd!_l@o~{}VsZUG;OZ6P@SGgs;E+1r7iK5z zcW4NPTGj@y*PC9R9`QVR?PhYn64E23odj`^pi}kX@m}*Y7cO4R9KL79FgrP-cx%&v zu{W}#n`LCU4YEvL490rj+;!%SMPtl?+b>U2$IBOHg!k{?pExBbC`kQORaK>xqQUdn zxR5#T^lJq-w|!oV^RBkD_jc&8u358;mzOv5XNl|Rre`iAfq7>dts3r2-j}pq85|sZ zVQ$)?y0(^ISeQ{gT;!6kX57rbo~FC%sgoz;aAx$YR;3N=nhAAX{8*fF*>YZ6>mr5qKkYRfdv^(aeAG82Bt*a0wA7t_e0&^>{6Z`CBJ3n(3 z%H;TG*hYbr@BU&bjjTettQBkoa+niTGIryN9 zOTp9E&CP4Fva$j~LTXAqrX85f?T_`HtVOHTR2Q`@!fQ$JQy=$V-O3Jdop zJbKiWVIXcB%pq!IQ6DZUA>rndmFhV=Y12_8$sukQd`Cbf0e_X)yH~}_%PTU%S9|!u z9{a$jPo?q;CMKNe`C0SC z-`=zQ8f?6d(rwE&ztz!Ua`e6g4OXqMNtWlwi@(3>Q4T&*4sn)v*M9ZP-!A36W$#{2 zI+jgJZf-@o&T{7$7v?1-CGRjC{qk{Z-u&!TU9NTA*z~w{Uw^+PUZY>V`mlq8Bm*O( zmZ2g2h7B8@+B7Y@&dihkW=HAW+M7HBbcHRYYrbpFw?K2-2n<%8XJ7k>7sR@c<1IxH+K6gl*++_`h-?XWQaH_@^Jf`TjU z+n@TDm&@M`4ejpkrm?fLV_Ls+`7K`gtQ)rPxBsi-$~ca89$3f8S@q&Ye8WgVK>_!a z^&XqI6b1T@a&O*`A3tsv5~{4evDWhB$;3%DP0hOg`iKl`q3oxJuCBCfPE>6!jUb1K zjpcp*{JF)MGY03n&V7FN0?T7~c$hkZdyB_ilxYNc8`LhqU6P| zx)0^OH?3d4e)>nr=&Oz8eIG8hdcAqx5>6(-5fu>;5eo|oAG`NiVa4+(Fjj?B_Sn7W zI(&`_DwewcrchR{TzOx@av9crisZG($VfBC#cexI%sT$;ds*Z*&cAWvMpDtj!otBk zvijd7U#($h_m{M;t@!wHebnCb*R<1=Y;3kuHj9WbsYi$%K6>gMRsP{6|ctaW##|33GL z)Y?h(HmkauJncub^}E;qouxc83$vq5#ZG6YXJ)Lvf5^=#D5zbW^{nz*6zQ~>veGOO>|S?d2=RiDf+gywtrr|s_ZQF=pG#0 z7JcARy0r5UtG2fGm;QeL%1VXrI8FKaiM0a_kymR&HmEyXUAg*7P!Qwg%a<)Mz)*V) z52e?dnVGqbmQG34ii(I-f5^3#^q4$rdm;?eP&Gnh_Rk;7&gYksCOz@rUt<_RmG9qk zTGs|2iV!nJ&l>MqT;N1Ks-Qp;`_0X`uPm(!x`EeG2Jd#XaHC;Wc6FJzr0J+&9tL84 z1YxN$^7$2cOl3`Dnfd4E=O5p`t_P*Vxna-Qw_Lr`#1DaPK`Qa*b&{ zzecQ_XbqY!y}5Z8^+9V!FU~YU@j7#5L&H(74I6@Rx;M~bZL5#P%9*;kyEE+CwTmyJ z-XFtxx-V3-8g;JDGgZgyy$<80$)@Y6ZKzFi;*C_}!YsGN`6<%3g&W4ld+YcG1(n?{ zU0Stv?b>)MN2Fh8&SV+pF8lH0$MH1X3$Ctpufl{| z3t|s=a9G#f#Bois2RiL;o))r{{1+e6pek3eF4fH7+i1j?F_Jj>3)B0t@0Qxz5eZewgFYH zG2^R4p8IbqnMgNp-UJ{ExOuZJp!ni9j+x03#%@MN&5Qw9#L1<2{L}}CfU)^ zaa>ndxBt>T|9>HeoBQ3yC>V+Vym?cLVIkrA!-OC>K+mk4Q8Z09oO8?p4}NLs^)Ft$ z;F7Xo-E+QEqy}As7Qmn?$FeHtR6R$-gWZg15-Ohx>>Yj$9P`x3tc~7(X$|RkXj1x? zX90IGuS3E`jmakgvXh?dweV+0V8qt9$&MI_3(-M~&%#ft}K${gPcm?_` zTefW8vxmdh)^==aij452!AbtQ*Er+E1U<2c2=35`2s#Y#%Tu3Sk7#JrURl9Np3(H| ztbl-kVjv?UqvE-9V&UTEX2a6DM_37G7_LA|q00Dk?{ykD8jtCNgC`ZBh*wd1#=5Ke zCoeiVtp#S#v-{SX5ivF~aj&zh%+nZiFr&ugnX~FYCwEHXCwUs;Cb{>Vy-!b1|5j*^ zgv9ab>FHEYee@`{@bK`2xVX3mulHG|YaNSC8lq(ls2+3Exi)X(e7O#|KW*9LJak;E z+{^P$RFrT&kF=u_g;joW&goS0RzJ7g-sIHOBQu5CY3r~WTB)(p($WfPx`El{3r3mq z_2T90M2()lmAfQ59=t@B?%dq6b?b}9MivZcZR63-QcEf|5VPdoNAX>|)?$E?M$_6( z$GkBhbepDHM6;2xv7%sTT8$L7g?`VTJw+}f517wrmjH+G@$u21V`WQBz6{;=q10pV z%+zT1L-8jm=ot*REY- zU)cC+=Fcj=#9YvaKwwWSD0RsQadTzRnc`l9Q{{T=vqiMDv?O-!ymIv_ZBI|n?R)q9 z-kW(G6IB)rjn~W2=VsaDt|hYHd>J^OUtXTmv6Mq(7UVZzb`gR>P`t$M-I>GA6N2DW z_+0LY^_(0W>80uQm|G4%I;52jAMVCX-sdqXf>94NbG%>OJfjL9;4t)-?`-?iRX{h& z7cNNL+;kwF?To#B$m7Q%(LYKiy@ZsMmVfb{O8?$jTB4@;N`OD%tF@)2@9fm5-P5iT zR~9V#{z=-nrouiZV_VNN4flPEod&JOdumX)+3wBazQ8ow!_@K%$mCqVKCRT}iG2OT zha1&FdldjgKvI&8nVEU|Yeu$IkFHRd^NHN7oUu7NHU-pQ9GQ)3hG>5Pd!(5L| z-ms^dRMIlUrLvE@>ADr_{Gsd@$$JVjq-~dL0G+##B)V0 zZEbG=%ctgl{PaoSzyZ^d6%CwG^Xk*^Ogm1h${5fhFG3G z8@#vEjTX)Ix>}O9@%@Wvu|pqn1%NdzaNBcoBsz+nRN7-ABUeQpy@sYKASA>9WR{bk zUxO}&#*$G`PTFW!7jJEC?FDD&>Ra*)hE%f?Cr)tfJV}4n-X6X0ZSt`VU=Avx$xefB zhMFGH_4M|3qu%@#;@NZ_@F`x4yYy=Sbx**34`sz;AjKT^5Hx^bh)X?adgPO0;q z8&|DbMICW)a2OjK^9cY@XjjjX(YR0iUew1{Pkqd+qBORu~nY z%bZ*mbKpxIIwa=OHCBbdTR}lrLF8o6ga^EyxsEb}z~;v)LPLROp!>B^Gf6xCa1aM6 zX|N?AV_%?4wZ`o9y4OKyH3!|@Wq=PK=Z-*!v1xyrcFr~R*9s#uKI4M3Y!o2K`fd_7%MWpZjpMQ9Lbb?s>{kOD7m;!KzCFZgyyq~@K{VqdsO13Y7(rNz zj2K_ze+FaK9WGtERN2&|C3UXD@Uc<8+GF!_GfWzKIy$=UzP>FrSa+HL69KsF8(0NF z_MZxFfEI$1Z2{NVTJvXSCJ+xJ2!#}c7IFiCGO(ecfnktm(Q6hQiuPJzQzxS|ZBJy^p1+C?)!SbCa8m8Tj&LGj9IoyLaza z13ev{pZR_G$dLe}4lsOwj0U4mUt2R)P$Z!Q+_`&~Ur^)Wc1i#`wYnr4F*}35g7eVZ zLy5`B>7{RC<;{6?9WA*CZ4#M560CajXvdh+>05AW1xsAJ+ z!V0q7KQIdflBSbV4HQED_+f>(0e>V-ZN*dA$}b>r`10lFA4*)a4fCmCLoI2cXz6Rp z7Z*y}-LNdz?TkG7*f2MtY4h$sQ?x1GiwiyZ@6B{J7z>8pAhX=Q@O&2s5#jvLI6FT@>t_ez2yG7x z3i6MLh#2ZBFK-$6_D#6YvA76VnlwP3IVAu%Rn;E2V3|)ljbUWvyEkt%?0r82uQE`m z<*aDvoRn&;d*LwgJ7r**0ces`@78Sd@`lQHwYAC0j;~(77Ip2m=bt%a-TkK_a@QH9 zg9m+}Z4lrC%}k-MuP?Rc-sP^D5-4~#a8FkGEAU01bn66~t)E-G78n>65secB7QE6n zuBxh9Szk}YEwZ3bH^yYL_2L`IovrN*3=GwT?Q%=mY({%W1AMg4dFZxLLzu9E5~!e` zzrX)+p|AuT#|q4syI0e7(!ePvbd#*?CkwK36!wHzvJlK?NF7E)zF=Rjbu^;5N zBFs#GX@hGfg@c;rJckb-2K1)4va)Jv@47U|gTAtig8Ohih*R7OT$ZZYL~Tk~fyPNj zWxl!=?XqPpwT`Z?2F4b9F!IlxJBPM(yZv!yW;brSX(;o4;}2~>vA|{2*^%~qVbIW& z0u~mQTiTkj2GtmA;K0qZ$;YA~%b%tcr;wlCP1&FeCC%crC#INFmtS}||Yeh_->5jA|t^rJ{{qo{! z{dZ0to=lhe_wp|T*aV2UwSL!Wo8NsODk=^xU%4`TLuh1VTGQ#1C;jf<=L(VeatSlC zd#GJ9QNo^_=T8PQ;^`+@^yc8CjdZN99=J)R~wf5;2jdXjcu+%Zb#>MiJ~X?dZ|YBucHJ5qyC8)ooUGII z3f&b&6})zv#?a-nULM=8hJ>s}h19Oi#K;y9U1$8$rWeqqCuDEuTdu11_Gj9&PcaGN z6|YORpYMF0HrWXQ3rs~6IC<+rS=$}4G+m*Pl)kTyzQzgdJ%3c58l<3fiBiDr{gH{0 zu?iYz_pwS)H2Fa*2B#Xl~-2=Ft0WUI^|@C9daNGL1P6vdwC4^2kb9Rnt3vZBhL;zZyd0yA(~8Y_n1c z`*taiVM5-xBhr))ALf&j+jPG4HwPf% z)gKb9cFIWC_J{Dwwr-st1ac@_g~{*u$T{z}vbL6_wxmALxEes^Gou>c1m?8BI2!x) z>uPbaw8q1IzP9g@U)0sn;V$WukxITMv@N<%Fat51JbfA;vkY7<=jl@=3yaO@y-Dv= zYQE}dX|cVMefj#-8lsa09C>ee>RwfK^>QekS;JD!uUcE#(M^S%W}(Can3#C3B4^9i zBz((%Q2wZu6@SeBOWFf29v-vQ21ptBT3BW1wonC)Iw4u-uc744C6=PFbUsS|GYz=^ zO^Va-%RqMFF96A5YAak_T_2kiKdF?1pv|@CEEmwgO2A~I?zE(86*&#wRQ{f2TJpBc z(*x`LLwmaw+Lhbb7hkz#ZR^U)N(d71dfy@Lei<2Ajn4zj#XMEX@Oty+4U`@3Z0LjP zk|<#XNDvs0%dM@g@hSWg60DF!^d1;oN_Xs_R1lcFe*G$__)t6fp;&Q>nQY&^2W%d2 z&>tjSL{zkUbo3VRob44t3(*a97Jm)XL{2D6Du#w!?vvkd0L76~cpG=R>!eX5p8kC1ZesE%2EtO zHDdPPXw5L_&1;m)kRU@6>s=Z6ku+c^V#Gs%?vbZI`xW+`wvJA#8K!9SB$zwKOK-_& znWCfPP7V=6A5>Tsm<&*hUQBP7-tQtB9~#<~7FE|)-g)o8}bwZ3!{ckCc)6C@3&g4$8h(LB$bhp#|wI%1DL zd<`=mW`Uul9NM&Dv15|PK;2&}{zx5^^wkt<1a+AYMioVOrn#ZZd(qT-Ke!iAM*cpu zFAR`}G8faErQh!%zL|ywGYFEQ<#&HZ&f~|Ak152uDkv-q4p}0{0A}j3f4bN?IUnWZ zu)fbWuY<_W1S*V4FM3)xms9#2ju`S6TpfQj`fHwf<31m-AoBb#I67W25mU*~&%FKc z;bX}G^zIwvdZir`UpWcA__tFA52B-_8f3>9m4Gf%Hp|Kyf6K*aV}n(spJ}9eMv5j5 z?Uz2DJb9vB;>_XGe#X?$Fg4V3 ze(qNzJ@i<}gkzl^qn7Ascn*~Y=AKDxvR-p;8{wWq8dCuC0v`)TPt&J*&+I=fqz#uC zD4-i)=`gw!2>VT_wec_+?@K!+8$HI8gyYu|<6^8QzR0G^KJz`VaSU&^giH)0+aEsh zwbdK+2l{End-`665(RoC6PFK`x&UbG;B4H3@rhxbj^~$j&P++5BbN#xjzNd@INbmB zt59_OGw;PRqq8s9i$dhgtlhMmx~|~t$1CV}mG9o^*nfiRCjq8>^aRxD_Ol@e1@?9Mtl zJ1gNHyv5(Q2ntrhcWqggzdwm*SO06psNP!Wu8+)By9Q^&zCVKSq8+6We-QUwy;R9K zNt;hxT(jgxnBqdE!12roh!)`ge495Pq-Gcwib1JP{pC52{zvg)6Htyr?m&h&Hr@Qt zA)2z8uflWinu?Ioc>MS=Zh-I5kj=3CBxd>INjW(=>WV<^G(TA5K4-eBV#K-XpIy(E zGQT{&Vqt#NTL6d+vOM=E^{nkt`!DSjGkZ?73AAsb0fTCKHrclB!<`G+tvWI?qAt49 z)YLTo`1ad4LHG-5BVm7@tN)tH%k&2PrxzC&&;Ko#uhlL|J15?lzHu+c!G{9-4Pdmb zZ`@+#JdHAAuV1G_yd$y6oIbwbq1uhRqu%02w>8!#aXXEDInF`l+|udo24-pxv_XY{CybhSw36t_xnQK@ z{SoF2fc@krG99J87oJZ;e_lb!^_q9T^WcF{rsD6ZN+%50Wj;$*D2l2EgG|80#Kh38 zZ6~fwB!6i6=94=kF^)}+r9l45r)Si>2-eF%sZLZ4C3?>NeGUl|2Zk>}K(`ut8{a$& z4GV%N8Pvi{cmcHeP7g90)tHaf08-VknyN7hM9>eb(U&spZ_c2-Nz)umOiZ+4_x@5r z1An%Lj`#9rIM&rbTmB|uoK6D`T0u@1FS27PFCF6s>TfqOvvc2aS*9so>iHtl^ z?1|D#+U8yg@NvP#MQnE#@&llVRp8&_y|?7mX5HaY)vkWsI)a5EQ=&Sr2NHC1XW=To z9H&8zef#$5S}W&AnTQSc*C{D&xdhghd-mhbqp@;4xPQKI_~EnN1OrX4!PH1QU~(Gp zkX3jPvBdGftk9|5W=9Ht%N2edYq!gOQD4sp zS*V#-xc*(dBHh`}=j%zo(a$njB`Yh702wW>_gt<~M;GrvZ%@w^I87FF)8nLpLE3zq zYpsG*Spq1!_QQw>PArOR`F8JnURS`rL3a*^mudJ^Qv=k`I z(_pZYryIBcZ+kNfJ1c+o*MmZv?x?M(pg`Y!krL|_2)kZDT$}~jmKSJYU%I>F_n2vG zuYsf)7iv6V=vLPEqpSSZ`LaJN5iO_y{%s9RhO8DbbEN+#Y+KFVc~}d`zcD?v|D?~k zqZevtk=MKoanjlrkh^Pf>n{E)`Lxtp7 zznjELmQ5z-f>{+t=HWI&TWDseoBa8+5^|o|mQthh)d6e)H*UN@bv$*~J`YN`jFz@O z99V(-pT{nD z>;J^+0zjK`%Q$a84Qpw7+Kq`z0tQdhE5KK_O`EbJ=q~A}+})LgtV#1rx4H0%HJH09 z;40+pnsR@XsG3KQKK^+hTuR%+8(uZ#?XW4lp|XcWE{w2=!zQuX85q9l&qfM^|f(81Y;* zHWJK%OidLQ5m7?B3#)_PBT7WG{3!rJJ+I{#Z84DP;rRoLYmGE_;(CYA@+QhcUIxKn zCya-YQ!Gw3R4^boSP7ock|M)DL+9pIs+umRc#WxP*bPpg70gN!aKuH@pap>a^6zjQsk_>ytm-hS!2C{&a4=F94Clp$)8|w{ph{PeOnT^CA=3d zBg?{olok+S`mi$rpm+b17NWkp2ff(v$;nk{arEH9iAhO(2(n?(ooZSN{!Rp1cB>k} z%oDm;8n-6wgc`}gUT5L0Enl2JPvQQY0X;o(u)u$S0>+5;t!oLNoH3Jj*E^e)JaWW9g*^ofXGFe57wuX#1HcHO$= zAV@?F&%f17;+}Zv3gnXSBO?UeB!B>Al*unl6U-8)o%W@-_bNQHRD1F5+bfWJgv`%~ zNHAzCgg0vNuT}%|ct{3%F^a{@m(~C?9MSA#om4$oC`wrVt?ll=zivg2xqc)fJY0|x za4c$%sy)anC&6to2aKs$y>}OQ*x2|UK78mL>Lz-zr+O7Sa<#elyq@KEj4*%T4tf?A zPLIj&Y?6|aBmzoASN2JsU8jSI1B7y9m395G4oD&anFW$TN~+C9$HJUDNMXg#gvCU# zDwsTX(}Fx}0K0X`6a4dQFbD~afYj7fhS>!q3jeNM zc@1SqW3Zzcl6#z4N`Nm6)1=8i+t$SZFRZ1Zp)smKgp&axxR!~DNPfurUCM?=MuC`p zJ7+Q4j}cQ3xG`-6(>MWK&}w3+iF+LKPXc1uR)}JX>$)i`D>IA>Hq8zd|!F>;=PfRGU%#qqGGM!V{e2^P;^&&Bm+ zeyXbuvIqj)oAc!tPu$xkCAAJHs_4?M)9k0U_xxMV2Ijy4%0a@I+Y!B_P;fp-*LalQ zT{?T7{cfzdiE!Gi{c|BC97OaYEXWHvS?G?`iBg2bp&KO@z&`o1xG=gn=Dj%O9TpjQ z4Rf2MqXfE8zZK2RPbx3_HZN9;Lsy!EULW=*1yB-sVFficwWQhb>wjZtO^mGnE;F<` zt7Sig8U~>h8Suo)4^INUKuEZ_x%G>OI2DdqGx9;?ywP2C{Sin&yJq5=KjGgC6E3e_ zy&8$9^d^?=i!2*Y<~*`d5LNfVyvYB-Ns)2= z0qx#h_gNPSsm|LSfrZ+><02VN?!`p6$D?hoZ*A3^>4EX;ng8J~e-*B)VH~cx)J)if zXm(eI$iKyGg5=D{>}Ho6nvwzaCgwm8v2fcbPej8Fyb~6d^iHy0h9~m@8WZyL`O2F% z@NC$?NPG3w`)>fR)u=B?_X(T)Ni1Ey?c0wS46`+<_aFgJV%-i1d!u$Kyk66Mn7mp^ zi)E~ATCabOj~{Y(e>P#3rmJ=)4oYB4Szh=>+rD4uH96>kB(FBVSP3LwKQblsA4CYh zpr9b5DAt6A1o^)rgr3;Ush#U_RbxX<0!JT6tpe8eul4%*>z4zr8?|odsd?&UKcQ*AP@%OcOfAXfM6THLdtDbGI0b>_**(3z+;Xphdbzso0Pjppxdt7?aFOs+K^=Wte_Y2fs7?S|K%R ze86)?3b7KB|G2oRk?d=MW|H1gC=w>LA1XsI))GcfJWvUVVYg30Pe)^-1@yQM1qGR( zoXdY4Z9Sm7z(^(uhmc$-fY<~P*SdQ=$&@PWSMgBViCk=w_9|mzW0by%JGah>h^G)L zHWJ`BDe$(arQl=-_yf@X3fTWPZI9O@E|z#7fgKAXZ#xYzLFNOqe%e?*sJF8aCjq!j zLA!+w z#uSG=OI${f1qz0GWzm|hN5BH~APek(!oM7GC@wB0)z!Zh~5sTh?lMOVq*6=cN1UeQvZU^)Exc?!bK(`L{5 zq>*RBOKG8M(!JpY*5IL&!n|GIZv}}LDiz=_`9~#3$0r~1?IP|&7$+|xZqvLE&37GE z6el=XSUapaeyqW2PV>w0dSW|v>{#Xl)ibOeITU|@0A$e~l5h_^9!`J~{Hr4U_}K$n zlC;w#^&w=C?I{JkSFwpm&saY74RlV_AfkR@ zb4FRqbG3m1#F&VJh|Mg_k`@&dnuC{ThEquR9s*{dx!0e=Bqk5JQPmTLtAvN>S6W&M zcHxT&bDe89^&VO>Ihavn)618tBnAb!<^(ZBAXgE(KaQ;xsWtKU@&gyj#E!q@)`xgP zhCSKc&;!|mgQBFXdv9!foQ+%}c1u;Zw;TES`PoEoYpO>I@F2EgsAU6}&{Iy8B_t%Y z^kB``o1PC2NZr#d6Y~K2gCKl6LF{ZoAw?RNT`pE;qJ)e-B3T{mX<~(eQVA&rhg5uc zz(Zd&xz9u2HhZgj_>hBs>nOSiZn&yylh6Wm6oQ&qE-vHiOoTYwz7yE!9%paF9PD#*rO8xyUCG8cJr| zq^UVQA3nw}BO}A;6Sj^Ji-?HeAM(!JBB&4}i(gdqm?)NkDqdEe5!&S_PdZ)cHRvv~yq+z*N zX$uGT-r;*BiLU4u&y|ir&X@OxblKF@bZWBcO!umTB;`gefm%*>R=~UqZAZrRFxHq* zL&nK-M6ziK4-FN7_KK7uJH(gpb~ts4-H3ygnm=<+lYp&>hpO{Q1!*wQ+UPbXXRJ6E z7Z?7$dkv^jSaOujS!Sg|`FhD?-`}O&TBieO2&#s-BI{D}seFFY7@^k9sELG&SxWh) zR#pj^F!#)`?Q%!q*jb=Fqs!Q>bOc(!uq`W_E#mOQh>-ucqe#d7XN5;f#u1lBUd?-9 zj$t+>Dd`6C4h&&xTS;n34zcbKdAZq{83si2L&}JBNMz{v<3#L+4{Gw&M1A(h_>_?i z77D?x34q|04}}#ot}{YL>;C7Cz>QzdRGwHXxt)s2I*{=e5{qz=dP9$g&y`!Mbzj#4I0!KgH z8e_Hh?%wTRT6(~!;4ImFl2!^!i=Y^6AVdl!0EDLk6c5y&s`(oeeuLbfzO_3vHV~?W zJ~2^t^7A9`zls+xy8Q0hTtnCSt+W)_GLYgo zV?&JgpBsZ>-$WrbGr;)HjOl%g^x+)B-2Jr>WeZy-kxW^K@QiJ-r+So~Ee50n0-w?H zW5+X^&UKZAz)T78*R?-;Hqj5wnQS8@8%Ie$=}Z%!yq)t$|WsKJFg$j&(hCuOn`5&duBWlS=&YxS}F1 zRMiSf$Fqx!M9}wmScsjC%C|O!Ai1Oeb>U_6Bum^Yr)C#9@h2<^?f?Aq4?QBg&Gj7} z8zCoqrfEL&fH(?;&LH0n!Ut${>%jrAtNePX8GNBHA+VT3kFv)uLZY;{Xl!ad4P>H> zHY2w%Z5@3pvo*E}7mAHjyB8F^5mO$fSr2SqftXV6>DSyulMLKpe;^ux6{ua-FYj*s z2c}h0Qj+szo)8c$pWi`*$s^D7>I$*L8`-@{-8Ag5o}Dmk9q8N^!_7%a=N4yqna%ED z+uWf;zDA!iCN8ZR#*6#DzR$je#8od0gl0z}B%3paTOgHCMG?ryi0(oBad&iJ8=9~_ zf>3Gzb`X)X9GZ`CJ{8&zXx|Oa4QbM<(r=aaEZ&^tJvcCcG6qOc!1yLO6Q)ISxfEP$3+t9RH_gz_i9nmK5P=wa5oHr(GR4nm2Gx_083Qa z4kFtCJZslExwu?|?2czer>WV<8S@V&q@-P&2uWgqbygySh<&Iluy!boVv2~73phGD zjzK-N#)hc-=<#pF*JMFO_U>nWwKrAk80M1vJ*U0t>OFOMUns zwZ?d=xAGzw_BeJOV&mOPgzC8@EDl~wIY<-@C8gHXcE(2Rk|2H**=DT&*f2hqNfn58 zBC(?xbT#}Lldssl$>p7Eko5hwTf#2SyK>`_dXVbUG?E@?MDq6p44(7je$xkD%LrWJnUiBP;vuawMBQq7@)i&)TPqqZd{=E&Zm1g!Ytn@%*hz zHzmQF<6p9X(*NPVUE#kWV*c;DJ-U=Bf0bqI9MA z0AQ&yEBMn-&y{X-mmYHs3s9@N&bU;u(47CZB@CO~{%^lOLpK9`fcgCM@s(urJCW|8 z(4VS}+2^nfFI+>LBBBB88EV5loLW@ZIN)*|fFbrbVt)V~Jn;ZTKqwU?9*AHOkPG&@ z_I!5jB0K!kJ-C&YTObYsQ}G&@AOtx(86N)hQ-*HZnH-1{fS`RrmEM=KU4uQf+Ef#S zUl7L}!_SiqA{dEEFnZGWp5RZHviGWkDn%><^vYga9%%UE{&B?U+I_~jCGtN80);aA{19niPf|r2` zCz;Jv6y$UKLPCz^52!1JJsKIECR?@9+h{2mcHrx`vBQRfSeC?|Ju4~LtkjLm17&I5 zyNdtr^WSa8Hjt=^QIfxoQ8PUshgb&A0&$W;cA`Uxx&YP}@&Om4n5LTmm3s{490U&% zo%^a086XtDv%GgJN%vsNk=M12u~^7{dF;J{B-)MBY4iAjHlQmlvVS;?IHo}DmSmr| z%Xcp9Y=S6u0=t>+Fdw))%}N?9lf%JD+bo#C{O1i5sON|z9 z4)*oMLAk~6=-A=p;O?*B3+6)s1dlTN1&1f$m36mj{ws2=2;Fa#_u7bGOmGEZ95hTC ziece79_S1KWPjTub8NK-K^Y(GR-_z+lWLSt)yphitJP05$KSFG(52A#Xb-qgR79A2 zr4@`oZ20H(>lGA?Rcm<1a2!wlKI#Kp@la?KCGl6*zK-XCH iTkHM*Vf*vUq5>PQ?<$4`2mHPtit1qvr7Q)DEB^~+15qjf literal 0 HcmV?d00001 diff --git a/ehrapy/plot/__init__.py b/ehrapy/plot/__init__.py index 5ae52ab1..170b9fe9 100644 --- a/ehrapy/plot/__init__.py +++ b/ehrapy/plot/__init__.py @@ -2,6 +2,6 @@ from ehrapy.plot._colormaps import * # noqa: F403 from ehrapy.plot._missingno_pl_api import * # noqa: F403 from ehrapy.plot._scanpy_pl_api import * # noqa: F403 -from ehrapy.plot._survival_analysis import kmf, ols +from ehrapy.plot._survival_analysis import kmf, ols, coxph_forestplot from ehrapy.plot.causal_inference._dowhy import causal_effect from ehrapy.plot.feature_ranking._feature_importances import rank_features_supervised diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index bf74df85..91d78d81 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -2,9 +2,13 @@ from typing import TYPE_CHECKING +from lifelines import CoxPHFitter +from matplotlib import gridspec import matplotlib.pyplot as plt +import matplotlib.ticker as ticker import numpy as np from numpy import ndarray +import pandas as pd from ehrapy.plot import scatter @@ -251,3 +255,101 @@ def kmf( if not show: return ax + + +def coxph_forestplot(coxph: CoxPHFitter, + labels: list[str] | None = None, + fig_size: tuple = (10, 10), + t_adjuster: float = 0.1, + ecolor: str = 'dimgray', + size: int = 3, + marker: str = 'o', + decimal: int = 2, + text_size: int = 12, + color: str = 'k'): + """Plots a forest plot of the Cox Proportional Hazard model. + Inspired by the forest plot in the zEpid package in Python. + Link: https://zepid.readthedocs.io/en/latest/Graphics.html#effect-measure-plots + + Args: + coxph: Fitted CoxPHFitter object. + labels: List of labels for each coefficient, default uses the index of the coxph.summary. + fig_size: Width, height in inches. + t_adjuster: Adjust the table to the right. + ecolor: Color of the error bars. + size: Size of the markers. + marker: Marker style. + decimal: Number of decimal places to display. + text_size: Font size of the text. + color: Color of the markers. + + Examples: + >>> import ehrapy as ep + >>> adata = ep.dt.mimic_2(encoded=False) + >>> adata_subset = adata[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] + >>> coxph = ep.tl.coxph(adata_subset, event_col="censor_flg", duration_col="mort_day_censored") + >>> ep.pl.coxph_forestplot(coxph) + + .. image:: /_static/docstring_previews/coxph_forestplot.png + + """ + + data = coxph.summary + auc_col = 'coef' + + if labels is None: + labels = data.index + tval = [] + ytick = [] + for i in range(len(data)): + if not np.isnan(data[auc_col][i]): + if ((isinstance(data[auc_col][i], float)) & (isinstance(data['coef lower 95%'][i], float)) & + (isinstance(data['coef upper 95%'][i], float))): + tval.append([round(data[auc_col][i], decimal), ( + '(' + str(round(data['coef lower 95%'][i], decimal)) + ', ' + + str(round(data['coef upper 95%'][i], decimal)) + ')')]) + else: + tval.append([data[auc_col][i], ('(' + str(data['coef lower 95%'][i]) + ', ' + str(data['coef upper 95%'][i]) + ')')]) + ytick.append(i) + else: + tval.append([' ', ' ']) + ytick.append(i) + + maxi = round(((pd.to_numeric(data['coef upper 95%'])).max() + 0.1),2) # setting x-axis maximum + + mini = round(((pd.to_numeric(data['coef lower 95%'])).min() - 0.1), 1) # setting x-axis minimum + + fig = plt.figure(figsize=fig_size) + gspec = gridspec.GridSpec(1, 6) # sets up grid + plot = plt.subplot(gspec[0, 0:4]) # plot of data + tabl = plt.subplot(gspec[0, 4:]) # table + plot.set_ylim(-1, (len(data))) # spacing out y-axis properly + + plot.axvline(1, color='gray', zorder=1) + lower_diff = data[auc_col] - data['coef lower 95%'] + upper_diff = data['coef upper 95%'] - data[auc_col] + plot.errorbar(data[auc_col], data.index, xerr=[lower_diff, upper_diff], marker='None', zorder=2, ecolor=ecolor, linewidth=0, elinewidth=1) + plot.scatter(data[auc_col], data.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors='None') + plot.xaxis.set_ticks_position('bottom') + plot.yaxis.set_ticks_position('left') + plot.get_xaxis().set_major_formatter(ticker.ScalarFormatter()) + plot.get_xaxis().set_minor_formatter(ticker.NullFormatter()) + plot.set_yticks(ytick) + plot.set_xlim([mini, maxi]) + plot.set_xticks([mini, 1, maxi]) + plot.set_xticklabels([mini, 1, maxi]) + plot.set_yticklabels(labels) + plot.tick_params(axis='y', labelsize=text_size) + plot.yaxis.set_ticks_position('none') + plot.invert_yaxis() # invert y-axis to align values properly with table + tb = tabl.table(cellText=tval, cellLoc='center', loc='right', colLabels=[auc_col, '95% CI'], bbox=[0, t_adjuster, 1, 1]) + tabl.axis('off') + tb.auto_set_font_size(False) + tb.set_fontsize(text_size) + for _ , cell in tb.get_celld().items(): + cell.set_linewidth(0) + plot.spines["top"].set_visible(False) + plot.spines["right"].set_visible(False) + plot.spines["left"].set_visible(False) + return fig, plot + diff --git a/tests/_scripts/coxph_forestplot_create_expected.ipynb b/tests/_scripts/coxph_forestplot_create_expected.ipynb new file mode 100644 index 00000000..bcd174c1 --- /dev/null +++ b/tests/_scripts/coxph_forestplot_create_expected.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import ehrapy as ep\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "current_notebook_dir = %pwd\n", + "_TEST_IMAGE_PATH = f\"{current_notebook_dir}/../plot/_images\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adata = ep.dt.mimic_2(encoded=False)\n", + "adata_subset = adata[:, [\"mort_day_censored\", \"censor_flg\", \"gender_num\", \"afib_flg\", \"day_icu_intime_num\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "genderafib_coxph = ep.tl.cox_ph(adata_subset, duration_col=\"mort_day_censored\", event_col=\"censor_flg\")\n", + "\n", + "fig, ax = ep.pl.coxph_forestplot(genderafib_coxph, fig_size=(12,3), t_adjuster=0.15, marker=\"o\", size=2, text_size=14)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig.savefig(f\"{_TEST_IMAGE_PATH}/coxph_forestplot_expected.png\", dpi=80)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/conftest.py b/tests/conftest.py index 0983e7d0..56e1be71 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,10 @@ def root_dir(): def rng(): return np.random.default_rng(seed=42) +@pytest.fixture +def mimic_2(): + adata = ep.dt.mimic_2() + return adata @pytest.fixture def mimic_2_encoded(): diff --git a/tests/plot/_images/coxph_forestplot_expected.png b/tests/plot/_images/coxph_forestplot_expected.png new file mode 100644 index 0000000000000000000000000000000000000000..9eba19ab7857dde6e1a51acd022e501afcb4d415 GIT binary patch literal 17755 zcmdtKc{G-7*e`q&87f1@42ejYl_D9+P^J{2LdHZSDf3(rl0;EGk&q0LAwy*>Q<_kc zDPw~~hRoyso%Ou?{nmc>`u=+NxAyw1)$=Ia_jO(8aURESI)xw9-?xU5hmk^|tkKre zFr-jcRO0hd`jz-^gT|VN_}@-9%_DAyTu!)oTA#I}?6-D1bIQf-)JdBS9(HHXopd?9 zO=_opLLRqa({!j&aUfD>YXuQS%+Z2NzAWkXYC%n}e4xm(j6vP;o?F)|R;CV021bqN$o&*QU{7 zqRHIg9GjV3Q0)cUA%`~{tdT08)UvER@bzOdux8D##xOg%N5)!*-nq5W@tL7#qQuThHs+{F3RR1tPY z+Lo-NDI?b`c;}}__H`6FCz(97c5;enO4-+sKQW4-O#Bzrk{a8Tl$3-gADKU;mX?;j zb>~jXx$p0^baY~d4(#6_lASH7e^=?~(9G6t+hhZm{N!A^_)eTSAtNh$J~WiBe5P+z zXGNf;*@2{kR$Zmusa|vg`)%%zQ2yMqSo^~4Z`XlF!K`B?oP~}bFFbzyxLn;wl265t zTiIt~cSXRWj8lt5=M&cziGfQyrOclMU8G}HOFF1ExIFRUz!^`^PvtwdZ;u^fW@6fl zpU?IFeL~_$F6H_2=QE4b-G%e^Z*HBhtlVAT(3E^+EHyPX$?4OhCthPhwNV05-oL*+ zZ*ApHJCL~d$PsQsL&IF3$-%9qD<&RT|6LoG(&}mrxs&f{&YwR&$Mm&utNlAZFE6hR zLPBbHhD^=OQYY@bpJv7Nan6h_G z!O0IR*R5M8vweF_;PSks#@@X`va)%tW0Ab_Oy3(4>jIa43*fn#n{_=sXMcr%7iV5x zUMlzYvU&yDf5?NH_|}?dJ32WT5)wlD=FOYNd^=qYO-(AVo%|lvVXWy#A8%@Q-2RWe z%cIZJnf-N9DZbOcWaQ;TZ{J==y<&yPAtw$-T94uOkd6DUI;4Fw9ILLYOED`x9W$MU zZ=?h{cNE2@=ioY4Q1tZlgqQCeR+@UoZ#9?A^hc@oeEk}NJGTA$Dz^2RyZD6*7tSosxqtccrLv)c`Hs9x?I+8? zNJ+Dz%BCh(|M{t!tNgnP{P|P^_*^O$cU-x0B{yKvXJP!c^b?O^9zQ?7Yq7Dkl(2{h zbyQ_j*3oU%H8p}#Qu>za0mIxpJZc$6Ip6wa>WJ)1_Pf<fnA?fSTIgNgwEEYm{9lzmZq@mE~RdHaUD916-e z$G`XWU6`5i(mtxDMjgx&^QGm*3+n5e_FpJ2md66K2na+qHXg)H_g06q9n3JI1jULU zP7YjNI#TYpQ$kWQ&+awn@#Dv>UftMoq|9ggl`EGM?{=1Yvr!J_Sd^XR-cnDS^QY$+ zq`rRnvNF%Mj)jjeYkz>H`x!?@(9#OZ z%X780wY_}zZtvTFwl_U-wZ%P`ZVjE-`;Uw@H4~pg&`9|_6Z3kd%8Wy4ENje@hC2P6otf#uv(eJi)6mgjEG;Wz z<>9$(l5P6Du8vMtRu)@1G%|9{r8T^luptL+TPIf9{@sPKQj(Huo<4n=TU;!CrXo8# z``zNRLZ?rcSg4r=R5TnN#m=5RJKAQSh`oFyUqo2AXKwOmtn3Z>_MYb#$ubU?Jw;il z-IO}k^W#UjqWd6wdl8C1H$PuZP3^*c^HjqmgOt5^#^`7-NSUB<^e-*W(=#%@cyoK3 z>+HCl@TN_DcU;Q1z zb!<+jqR7a|RNmNfsIek2FhS+b+qdGv9y{yJc6rWC{J`5?C@WKzkdUx)c8+9V+r(ZH zbwMv)ffXA#G$mzYkxMsItWL5}8e%p}u7 zM~CimWh_T%oTS;{sWrSivfc=mHQ@rA?rXChF*CEB9Q^1tHMCA;rstws$57j2x{Qnr zhtJR4%P}dsqg0m{*gibi{~^xK&Q4HNl&)(sS2+# zYV;V%ccWT+|Gm(OzLJshi>UL`(oz}pnby|Up1|elz$7Cv*0pOdv}BuYR8ZhfN=iC& z=1gUS>hcxzQ{6bVIc&w=;o-<~|9P9yp69uFd3#VtO#F($QpcV-$UcRlPL-ByHa(zB+QB=x{Gp5i%zXun)@m3RiVsW zJ9g|q6T5uk?LXn!X2pXOu5%M7S-H74V{eD(q^_O(`H8FQ(i%22A3Mq;s}qoXXA0kf%S zl;?}qQ?OS}FEo9)qa~;z_?wYTr1VbAB5TZuIi`zYS{`1^|EH0QkkEfK6#Bpa1>Z-Y zwyL_j1puc|@6T_^+Kx{A_+V3btRg_Uv$OM_$-~}n-_SC5XiZs`da<%?(hCCQ!u;sL zKHPJ0Wp8_-6Nl^#F}s)71i8068s!cC>MG;K$O<*hzI6RM8>h5|d6tK?MQJ5o*4o~l z?$Dt_ZiCGXq6h9aXLyc((RLed=lGautRBg;eH7cIZ)&(hgs0)5&iw5*w3E5%5oS6% zy3Op2Z~ob?oA$2Ece=6I#S(M#)~#EWEiIe@0RdrAQ8$~-3LQDjj~$D?e*Ms(>$Ro7 zAUIiPdZ1~e+mH9OXf3S;C)wg8j$XjE*$y-&X%>CXw_9~-^|}j~CPJGwt)!x&;#TsK z!6tf?{n&HFf&j#gd#_ZYh2>d2r;bFZoB`xg(x&Zn%9rkWV$R^J{Aw;}2o1 z8-sIlT=E9zW@p3E-?X~Y#`^14iHnOz$Hp34m0;R2Y}~jp?|A)M=QdoJ=5Tvq%@f!D z0y}|#nWS{%hjA}ozHE$3Q1UjUu&6GKs%u{o3Etdcze!Yd-@Hq#jtCyl(Jb2sX=$s- zUmEU8ZdnFW)9kN}j1W*VURU}bgIZCM=h!p1^O(^^S)LZK`4n+Rp)ly*R%zFSH~WqpF8`-SlE@S-dr z{M@3VTEKE0g=g;rPf#i=@41~k0HScuFk`f@W|g-0!GkRC;&)yjDR9Yb|4Y*hQg;O| zDWP0N?Dp>6d&bo@T2KCM4nHNK==AB+l!Fc(6-#rgrhk3@aB}{Eg2xc|`wt(QlGHLY-QW_EIN(iZyX(-gMg%QtUmD1TAwV8P)GBL&Z2=Fzg>JUid}ozM~c z3%8#5&+kx~{lbRLL_4UzuYkNQwtD2e&x~&NTHsAnT66V`^mZoB1d=>i81iyZWmbUi! z!otFL%_BoYr|M0PZqmCIqPx4pAW1jW40_0!*Xo2S3P08pg``((W4ygOhNh` zr4O&p7ADBMtS9}%_@M~^X4=G_!>-u47j;r&ZitAARuQO3MOzW|n9y_nE%zB!M5~0aEVTJae{5IIpZQo&vSILrKKf2SN-t1!|e@v+B+2#?%|aQ3Gf(d zrNSffoBViKakAO5(BqkWnaVB7@%^x36En7{MjY=@WIjt30qsa-ge6AaH?|a9%*_nOc4SceZwRA^W1% zYrh&A8oGAv+J-htCD@HlYJTfT|3g&k*mQRwfvt+pzgrsL{vN9n=zo5ZZe}EKIRg7% zaO>8!H>EA5TKr$l@)ye`-p&VI3QkKCm9i|)dMpiu-O|#+an?Lj;cE7&3xWF$O)P8Go^`AYuLjT`K%d3UTXDk?ho@T0@WOb({?N}DtK0q@o; z`+#3lW2+q$i&bG&g4Z&QA6~)?V4pKm5!#7e$p!q$!od-O#@mNZ zdP~t=3}Zs_=?KtGCR1QwV0CS+PE0!gE{~r2>!S4Z^qX@NFf)0!I|idWX?uHlDRh4R zoYdq{V8<(eT0={V{(*kN<#0C9xaAF*w=k83wr}T%5>Rc4+Y+mTS~kq5k`~bT2uw*v z6L#O?=;)avrl!f5IJE222xXU(yRonaOpKL-BX*=)Kiee5wZE3^XUyGDG|($N+cT^C z#fR-5NaW||qY8q+b$$-%f=NU~=!Wp!6gznIlR^8_b7_mzy{mJ24MIFwu;}< z?Duw`+qZAOc>a7(bB?9u!rads8$9S_%?QpHU0oKK_|9e6gTU9qJUdQp28I{)80nIP z2oMZJj(!(kbob`Xn`G2DG^a<;On$Pg0g%@Kb&Kw2Vq)Tqi;H{l`SUTzKql4I)nS13 z^xL*=BQ>o%xBT7o)RrwjWR4inlaB-1FHHaps zq|{*1h=ibFWp(d+D~r&NO{Yo0olH8b?Mfrdipw{wDeC=)}f;TH1K zeSLkx9baGH6t@kF{q;UompUjcGE&lH5zL*J~Slp z%^jDjcvldAKR# zkSMykM8NW*r*`9x6B^OwZG6mI`Na;&(`<{!&wAMI>Bnp?1Ma~ss}LjC$f4j-S` zKRn-rCPlbkf&%U<1g*+zX4E=b_RvQgqqv84J9q9>@E#XdQ&$)Hx;QtfjwVCtL3t6W z0zFIQK!Q`}6WLSF&aU8b_3YM`mX_L<=$$bk`ThOX!Jp?!HmknXp)cSP!-y)<~5svF>SdVl=L@E`8$Bccve*OY31s>Y+6m0WZ*QG)KT zPIr|a&9r4PzMD)YX>{e{KFrpNXN%J}Yt4Y>C}YL#ix~jY#BV71V9W++1E>?&F>R zfQ346X;u_?=GMs4&U4=eBpJ4E75flq0~bJN2y~;m)cZF!<9ReS%kEzG*U=mIL6y7m z%>C!!t{XN_p-Q7)pZWG?oqwoW>cIz=_E2AgAy&R)>shblbzCA44V!d%}`o47tdD8_~@9BrgQVV+Gh>pnuogL zMn&k@Ne2>D6rFhI=H`+iU>o~n?*R}Qzi*g9B&N=1?z-Jc>_qaJPv+>y0i z2`xSxXxygp-kv9Cda2{2j*#NfwOP)tHhvuowM{Zh91djYQk8B}(-R76IpE^z}c?}JuC+Ts@g z?_Zry?f8B2pr7`pB?hQrqY?#tuz3sg-%P`+`-MfpFY7QEC;3+r@M>&rS1z{}x0o0( z$FT1z-yTe5OyeLZn?mF{9P+|Hkg@;sQ;sd@JMc$k=hJfrWj>Q@RsD3bO>9TH${HQy zxN*J2%s?al(b;c*@?=Y!ts=k_tJeO`2nw2a2 zMtiG{K%`FPPb8vFSeS(&{n)ElpT0uzTnWsp;5NX5&he_m7j9JYCsX(&7)L21GGSrI z)7~7qC!p*Tyl&fxd%*L#xw)0Mx7o{p+rVw1hN|!3U9Cl^F39c)JN=-@$jE5$KxnOk z07fPzCfMm!7id?BP5gMpu>~Sx$T9Bk?>GJZiJZi(_GceFc+d*`>0NDTr?6{RKjtj$ zuLHn3DJiyJY5#tohFz`h?OEI#-Cfl{i=Kd8yGnGfT>5)2uU4@2_{p8_Jkl*lW+3Rv zPVwJc|GgxQoE87A#@hc~YSM%wrp1|a=Pp9$xpN0K-Foo&o#0@aE8Mcw7=6KzisR$s zWlp?-s4}`c?H77wQ<1Y7xMFL2yUoNvlh?$6f#&LvuuO%&PwBqAS<$KIkf9sV+yMeM zY}h~n_OC^Su5#wWKwrfrWkq&Kc=+=x>$c_E)-kyawX$L_e`wj|HzVUSF+df$_adAo zYwVO6h?R|?>-2(RK3=ho|F7!Q)vH$_5-4^6SQog1fP?}!s--klR#(%2KA?uw;TC1o z8sB;!uS69DW5Z6}QDunmF%91iDLNap{7_d*~UJwPrU^Llp7lNWGyB#u2R zcv%E!K_nii#o9%0P3#D?(N=`_N=8$)!Lkb}Dw^VEO##ZsX2xu_%R*LhG1}VNj?I2gjKG>* z2BQhva_E6p*Z1Gw8&=^fuF%NO^70z<{ax$WD-5H$+S=NHT|fZ$m0)*LSnBKRMFyoe zZd9kRfWp0x1O45v2iw_S8)u-RALt&{Fm-lVy_4VzqpU0|5zO!VVX^@2N&tA6Sal2xIKU6zxM9O?Gm|uZez#wp>%d8y;q zA>)(Bvh^-3oKvm2Nu9O(I70tA{692JO?fEyATGq5)z#Id?1q$+H~$J&9aNX5f|G~b zM!Jrf{p#yWJ~wjmk3%RjSRsb``Y&peo#GGZlMHNZY=mHUbeQ?}^(F#wN5jv+v+Tcq zK;^tz(r$PuvU`mR^5d0u5^>Ai+{(cr{lnBp55lV$F_A> zwf66?!Hm~c-jvCw7qnafJX;A36z)N&T2dwEEOF=2HU4m+JxK?9(24C-x?Y5=GRUz| zhU7(@K+3&?n`Kj#I`XUapx2eU@kO`Ebj7o`0^QTf*bL;RI6fGR z#1qAo@9FQq?y^?NwNHbam$#~`Ydv(-u&Y=1UR=56vTCm3l@tZh1MlCzcWeJz=wJ9b zVVCC_@P;?>JK4JZey;%`BsT%k!_0naH?&R==YxXGbIId&h%aPb#&hZgKO@RDwBH^G z@hjKxRCae)bVd>VS?~5%T96b?T+Q!A>a`d@W!l7SMkP8~I?3?_hlEg3)~oo1VMsep ze-)9(M64O~g`?8?^qeS=U{7zaV4J+xs8#$fPo`}r-c*8tT01y|p`ob3v&7xn@tee6 zi;1Cz)L4Jx1O;d}u5`y%vH1hb&~^8WE6T{Q5J&p3rGE%OxAqhm2?K6j4ZfL4){*l; zK@>iPGZaYf(oMpv3+&MT44P=V zLCU2sKN^j;%Tg^ZW6u#%iAaFs>&(MBQnOL6-m^b*Dkv1B4K&b?UY7;FPsBa}%Uln_ zxfiyij7>E?OvazPBmgVQoB$@*u3cM@V=Tr2&5%;Mz(3{KXWBT?vfwvX!!}0kb;*wpX&bj}VVIhl0_F$;E`*|Zu*KS#0n_GxDtz_z+J>8g8!ZI=XGW8b zTl)AYqTVk1zZZ-O4`(2D0Q~g9p>t=>1YNwifV)r@y91wc1aQ{}!3Omb6boBF zco7erY6U`5@d*h;SA$1L{WJ8^CCC1$$;lcl|K(HabFz4Z2rX;??|~io0s)go{GtAI zV&*7go?q5%;R9s@`9}m?HNca!4ds26{D?6S$~5KcAV~ zFOQ@PR6w$K;JRF5R++i?razMreR~u}jrQ$?1b!@&Fz@L2bedsC^V5Ar!lL}+zZ)Jl zEFX7ppoe*AUhel)WAR&pLVX8xQNi%Xa{Xw|H8l}j@GTyacJu4kuh}9lHLaF^-d&H2 zt6e3KLv>WR65LW9;{O<8B$!@2=$dulAF{qvr%>~BuCA^Cx}($6`^?SxipXRO6ciLp zH!GGZI@NXoM9mgD6&WOXHn#zK%a`Vyz;gqlA|kFUo&RNw=`qt6Ax#{FQyoQXF#W3M zM=C7ypC3vkXax>))}er(Es}oY^`fx?fB{WTHn0R*OTD*2PPv4gV-02ycKP!AC5u2;}dWJZh4e=|9()^1tEZFJj5_3%K|EJnOq<^jLRFn(*H#i ztY;whJGGbh*D?TTV$cV(w9lbOVGxPHsm6iFMnwr}4flq{X>@XQGDTVVwA^p?gH8Am2)ukEr`GE3vm2KPDAoq*# z2pFpz)jQ9`$oPKAjQ%`gI70Y}Zk)vP_?_n(|AN5gtfOpzYLfXCD08d&7!gVkkQa%B z%_J_*3Xs+YvmvC&-NU1$YoQzQoZ*k(0$Cw+^w2*9w9zs zeIf7sMb1tQDsNb3=jSK0$tkU=@wI?#ZEY=)$-^>(Q1n)?re>x(T=KleY!r6xv_{Jr z=qS#)Z=tV$^?2=P)a8cYO0Xz*DGSo4AD?(jJ-U_k%H_*1@XloBLE*K6?ME~#F~3u> zsTg7`YCTxwW)<^78B703j;?@t=hXr#d|22TELYyCwylVa?an`Lgx+`$axQ}vmY1Y2 zpaC~$n{gGN{`>-z>5pbGR1GxMhqgaDKCXsqb)BDbmar`616-QxafH#g?kc~o5>uvmzE+R`CJ96b zD{P}Ds6=5`U+75T0%{+jFC~0}skZ`+IznmQ*ql&aa8Y8HX?r7Jm#INrYTvNF{^+)+ z5fKsMXP_TRq3|D+HH2H-^&W0}oK{}7K`2?w2EHt}qMJjKyG5BP1I4K&Ym3SIFF$@9 zwRY+-;ea_51UmskGa{B!?DUBV3tY!#>31G<gyRie|0{; zuYLVmT--{uDz||~D(q5+4-X=X?&3ut9Y=Nt%&&=BudD_yuQAhDTv%B69tLi^53~zf zN;ANhQNPr1JzJ06R#m`YV9fRC{#AsWBmbhk{>lj}s~`+?oAEF8#8QH&-h`LoQ}Q}! z@8jm?CI>QkSg?UPd0ZrdX67QZN@Z_VsH|=6Y7+P3E)?b78lLf!sQ1?nzyh+i(1->d(sf6EO3 zo{7M{SNXyLUf$f1&pGmX*X-=<^M;0;MnypCm`~Vdkd;qG`+LG_2W^+kw?LP>p(9co z$$R+t=Lp1x;R{XcW*QbZ*#<`9k#fYjn9;E_2KcwQYj| zxZ?M>w^isonUW19=gZ}=b49)Vr+*!z?8d{?`TSk=>!kbg(xO^awlwx${EN9iEoyfB zD=gtl@5;emh=|K<0LT(;?l;pLinUvhL2YrzzLt1UL>WtV;++2xOy8Te?fV&pqY8_$mr1 zR04_+(Hw4fLk(2|4P90e*Cs&GL@bsORo3L4?`pgQvtR)+!#SpuHxT9X@CYpUeD;@) z9S)L|kZ6Qzji<{@1T$nFA^MmO*+q>1CMit#XtLDQ)T4C*fmUt#cDGdgeWIBw#TX!P z>Lsf1qd$-Y>%zjqnW0wM;p@LHZz~KDy9uL*=vzR?y}-Y@k019J*y9mo%H=#dtQ%|J znrl7BbY5pS=h5bziY1b}@tbJcM|LuzRUzm$rOk`kNAjr2UfVTNdJ*vfJ%BGFHP?k% z@@zY?8pRa>Py=+y3t5Z=P=juUD;w`e?IGx9Kdwvk4WpXP8Udamhhhu;X3v6FuSCgc zN--V4my8enKed?ek%`ociY1T`O>!(E(X& zxk;Ewvqeu6^$FA3}<#ir_L)^hqZ@~ zWWSd->xBP&DkSv(XBFo3e^FtwJPzDd+5>eEd*15AiIo^J=1)UJR9FOtdw2NHySC5pYTfq+vrq}TE6 zWsHz;42M+|Avw8%>i)HAVfXI~Lw0rV9T|}(-yR1Wod`0U^y7~l?||4~K@2Sz7~w=* z0VigJC#WBQDSXJqBb- z^?#$f&=%pw9uRvFeKlqr{y4ph%ZOjcOF^pjvNpj8g-enN- zRYqG>;{G!Zw#8c1MpK+JVtyE%W>MR5IppGGfdz&%8%85mPzOkjtXBdcD#SNahcX`Zp~0 zvOAEy{r(Q*r4JRw9r&Dnam>o_dsuq#)pVdh6<%`eQJBXktN zH5f+@;+T2l=n)K4@R@Kp?+iyK0_l;``CldNTM<%&piM_~mJC(s57;a=kPt=qqKz=v zQN1e)O|vC^mlvm@YpntlPpk9*hk&sr*yTH-?0ozDc`EYBP)9K{9#Z@G;qHhRB=`*K zfjuEvqorl}HwqNp1-L~;fnX&_0yi`igpWAerxJ(9+c{f)?FYw^y`Pqr0wk%~Qg-$$ zBTRW6|MBPT?L0tniq1L5N-jZga&E&&9RQ~p+`Lew(9dzbQ=glc_V-j_eAms^WU=|ne1xx$L;N%it!cy(23gcs3^qvYRz2A zjDZvMRiHyvH8$opiwb~6l!vg;GcY_SAq=zx&cF>KVe%tSG;5qd)L^V9c?%2V%%VRaPmg_g7fP3LUeC1frTDFPkB$$@O zje>|lyb^@}C;2%s($pYFA_;NNKie>);P4lqToGM%4mrC?<42DkwU+tHLkttC*o8=z zr9+8)8~i(Dzl7~DNza@;8*_s}kWp#&+TFXrF7bwmevq0$zN*pmWDp%E@`GZXH*(Op z0Qyy{B5bM2d^t%;Dd5jbPz=g*cb@tu~kv9)DoVL52spZ@B<@N^1H6ZH(E!OmS~o!6NI zmlx%KbywtkF!?K-RE-h?$k0t@|-lajd9Gr7Y`SMlEjsMXq(#<c2`Kota4ksP!Olau{ftH~pBbjW*{Y^hm;mNB*<~LM+u_>>7zG z{=qn)Gu4U&8cEW@z6pEsM1hryi^R|3UCiN+PzM24vV)(;ww8G2c%gaIQi#EU5bZ{y zB?T;u?X$MA5mH&2v32f11fvOSjQ~cLwarlxO*%?Dx;ZNn&r8hZfpn8Lgi;M9@ z)@*>9PrG56}z(?Y7#VhY#ym*mY{&YNf$C)qJ571C#u^Y%BCHWo{iy%-M z!a!De{lY%{$L;LGA;%0)p)yxdXivj-zuDaI^5tf?GBu&(%;2AtpQeDw4p92BMUt(n zF4>?i72PQV3+w$CX7UKDCvzF7DH*(?50XNRqIbpP@eW}2W9L&c{otOjrD)(i^9Et8 z8WC6l5pFlez-7p zzz&5}R4fdo;V7MIFVLPhQ<9L-BcFkeYYh^>MPUiV!XzQDav_k@=s5N2-9VzoW>ZLI z;ygzUvCbt|;iXX1%|CMP(agY)Xo^eyn@yRUmp`napg<4Yb^7zORLNz; z(?U>iNLhO4*@|Qik^;hEp8_-JMU4A{78e&qCe_u{szFyKlvfKS+u*<-NBxHnhoEew zwoc;g1PLr*6-lK+@EL4wa*%t0=#pVvY^(?d>Lz44CDHI9iCk0woQ$q}JyJY|;X+Hv z^P3$P>Zq%$ivSIyFEo`T->R~_a4z9`58^|3#esrjxmM>PY2p9~IdZ|u#&&(=$nW{# zu?BMo=w->c31k4$ULul}o*Qb#QT^%D;dvMA;nt20%tqeSUn3tgn8=}k$B)&KyFn;2 zQ_|Db^*I0-hSCa9TYzsafG!;4xTZv#gBaEy_%?Yt1(pQPjWm_NGm{_2OOZoWAG~0Y z@yIz)5EyT5Ytyd1e&c?Qc_JD^{$4z()_DefGX) z%gc*EP{GS765R(+H*g@4y02y`*!S=jpEefFMs8Qd=AA)WUw5*{@_nMtqEm*En07vW zX{l)M$DIozv7Q?jA1*GQbC!W94>PG02aYIbM!J;am3>XdxdVV#NSB9FvbkxoaF$Ix`_6xuKUy1(}mKF#K5FX2DZ4I0VguPrIY#s0PGCJ zY5!2eQ|R^|J?TYa09K%-{4g&~&O9JW<#FcBZhd_cqfS3oJAiGX7^RT2*{)2;oDb2f zI*eU|hbEHP9!3LsK{rQEcJ+VxO)&9`DBX!?POZ6uXpA8^anJ*pf&`^&#?38Y=`q*w zgFkl#%bTKkPw-R6#_TaNV#krm2$FuUeCk1@6ZbwI;LU(jb`ZgH9h!s)9*3~sE^ew} zRDgdIl99PqFHN9lZ1{7GOr+DP@k}YwX>=|S^1*M3%A1Rp2nDvZGmVz(_{?Z8IkpF_ z=L+0#Yn*QW8n7CLfyQrL8ANsD$r(Wu^H|e9fnFR3gN7?M-M=3y5LR8?!C~`CCO$Ng zaOkKv=3W1&A7SZMjM3e;t>iE1FL3?;dE{y4&1sYcK4L-)CI6WnY=63Z|KR4!WtUDH zZwi(DE4b8ahv!g4MMZyRCC(25Hk-4$nuzVZ68-13wNC5!(YC&O$Hc(EK>G{l0_+&m z#jaIx|2;MS7TwkFj+Yw#%l1jUp@El zs%mlmdr&_X0Hk; zd9k3MlGqR8>0>w;OAho>uHCrt=jh{y4{V6DARtVE@h#@dtwCdj#2&C0NOvWi)k%IP z*61u5ssMF}M2xU-tKr^}W3 z=O~my(>0i@=*}xB$Z(*=;}=it+)0i(vomUbU2`FmONGJUTQ@=uUagLb*_czpCApnU6w*N{By&ySl zj|k?0v||9ijrcVJ01N8KJVGXfKY7G1^niv4js+eZ8o^a6B9DdNi%?1ZiQ8B#7H52! zM_)@<7&&xe`815zkoRd!QYX1&a`=P97%^B;6aCdDuOIM7#~!Klu$2MZBo?2L}>SGaf|TL=?3Wp@%vEaBFAh z>>NSX`W7f$xlW&?U?S4N0*BhxF#S9hm$metzq;cLqa2w Date: Wed, 11 Dec 2024 08:17:30 +0000 Subject: [PATCH 02/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/pull_request_template.md | 8 +- CODE_OF_CONDUCT.md | 28 ++--- README.md | 8 +- docs/contributing.md | 18 +-- docs/index.md | 6 +- docs/installation.md | 8 +- ehrapy/plot/__init__.py | 2 +- ehrapy/plot/_survival_analysis.py | 116 +++++++++++------- .../coxph_forestplot_create_expected.ipynb | 6 +- tests/conftest.py | 2 + tests/plot/test_catplot.py | 3 +- 11 files changed, 119 insertions(+), 86 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 4214c3b8..0bafff61 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -4,10 +4,10 @@ -- [ ] This comment contains a description of changes (with reason) -- [ ] Referenced issue is linked -- [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] Documentation in `docs` is updated +- [ ] This comment contains a description of changes (with reason) +- [ ] Referenced issue is linked +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] Documentation in `docs` is updated **Description of changes** diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index d6209cca..39816a93 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -14,23 +14,23 @@ religion, or sexual identity and orientation. Examples of behavior that contributes to creating a positive environment include: -- Using welcoming and inclusive language -- Being respectful of differing viewpoints and experiences -- Gracefully accepting constructive criticism -- Focusing on what is best for the community -- Showing empathy towards other community members +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members Examples of unacceptable behavior by participants include: -- The use of sexualized language or imagery and unwelcome sexual - attention or advances -- Trolling, insulting/derogatory comments, and personal or political - attacks -- Public or private harassment -- Publishing others’ private information, such as a physical or - electronic address, without explicit permission -- Other conduct which could reasonably be considered inappropriate in a - professional setting +- The use of sexualized language or imagery and unwelcome sexual + attention or advances +- Trolling, insulting/derogatory comments, and personal or political + attacks +- Public or private harassment +- Publishing others’ private information, such as a physical or + electronic address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting ## Our Responsibilities diff --git a/README.md b/README.md index 32e66dec..6c4533b4 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,10 @@ ## Features -- Exploratory and targeted analysis of Electronic Health Records -- Quality control & preprocessing -- Visualization & Exploration -- Clustering & trajectory inference +- Exploratory and targeted analysis of Electronic Health Records +- Quality control & preprocessing +- Visualization & Exploration +- Clustering & trajectory inference ## Installation diff --git a/docs/contributing.md b/docs/contributing.md index ce5858eb..0a5b318e 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -126,11 +126,11 @@ in the cookiecutter-scverse template. Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: -- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- Google-style docstrings -- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) -- [Sphinx autodoc typehints][], to automatically reference annotated input and output types -- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/) +- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- Google-style docstrings +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [Sphinx autodoc typehints][], to automatically reference annotated input and output types +- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/) See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information on how to write documentation. @@ -144,10 +144,10 @@ These notebooks come from [pert-tutorials](https://github.com/theislab/ehrapy-tu #### Hints -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only - if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, you can add an entry to - the `nitpick_ignore` list in `docs/conf.py` +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only + if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, you can add an entry to + the `nitpick_ignore` list in `docs/conf.py` #### Building the docs locally diff --git a/docs/index.md b/docs/index.md index 56cc3037..03a0987d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -61,8 +61,8 @@ medRxiv 2023.12.11.23299816; doi: https://doi.org/10.1101/2023.12.11.23299816 ]( # Indices and tables -- {ref}`genindex` -- {ref}`modindex` -- {ref}`search` +- {ref}`genindex` +- {ref}`modindex` +- {ref}`search` [scanpy genome biology (2018)]: https://doi.org/10.1186/s13059-017-1382-0 diff --git a/docs/installation.md b/docs/installation.md index ba7010a9..b349394e 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -51,10 +51,10 @@ pip install ehrapy[medcat] Available language models are -- en_core_web_md (python -m spacy download en_core_web_md) -- en-core-sci-sm (pip install ) -- en-core-sci-md (pip install ) -- en-core-sci-lg (pip install ) +- en_core_web_md (python -m spacy download en_core_web_md) +- en-core-sci-sm (pip install ) +- en-core-sci-md (pip install ) +- en-core-sci-lg (pip install ) [github repo]: https://github.com/theislab/ehrapy [pip]: https://pip.pypa.io diff --git a/ehrapy/plot/__init__.py b/ehrapy/plot/__init__.py index 70ef2e16..de0b75f7 100644 --- a/ehrapy/plot/__init__.py +++ b/ehrapy/plot/__init__.py @@ -2,6 +2,6 @@ from ehrapy.plot._colormaps import * # noqa: F403 from ehrapy.plot._missingno_pl_api import * # noqa: F403 from ehrapy.plot._scanpy_pl_api import * # noqa: F403 -from ehrapy.plot._survival_analysis import kaplan_meier, ols, coxph_forestplot +from ehrapy.plot._survival_analysis import coxph_forestplot, kaplan_meier, ols from ehrapy.plot.causal_inference._dowhy import causal_effect from ehrapy.plot.feature_ranking._feature_importances import rank_features_supervised diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index e4533477..8b9e10d3 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -3,13 +3,12 @@ import warnings from typing import TYPE_CHECKING -from lifelines import CoxPHFitter -from matplotlib import gridspec import matplotlib.pyplot as plt import matplotlib.ticker as ticker import numpy as np -from numpy import ndarray import pandas as pd +from matplotlib import gridspec +from numpy import ndarray from ehrapy.plot import scatter @@ -18,7 +17,7 @@ from xmlrpc.client import Boolean from anndata import AnnData - from lifelines import KaplanMeierFitter + from lifelines import CoxPHFitter, KaplanMeierFitter from matplotlib.axes import Axes from statsmodels.regression.linear_model import RegressionResults @@ -297,29 +296,32 @@ def kaplan_meier( if not show: return ax - + else: return None -def coxph_forestplot(coxph: CoxPHFitter, - labels: list[str] | None = None, - fig_size: tuple = (10, 10), - t_adjuster: float = 0.1, - ecolor: str = 'dimgray', - size: int = 3, - marker: str = 'o', - decimal: int = 2, - text_size: int = 12, - color: str = 'k'): + +def coxph_forestplot( + coxph: CoxPHFitter, + labels: list[str] | None = None, + fig_size: tuple = (10, 10), + t_adjuster: float = 0.1, + ecolor: str = "dimgray", + size: int = 3, + marker: str = "o", + decimal: int = 2, + text_size: int = 12, + color: str = "k", +): """Plots a forest plot of the Cox Proportional Hazard model. - Inspired by the forest plot in the zEpid package in Python. + Inspired by the forest plot in the zEpid package in Python. Link: https://zepid.readthedocs.io/en/latest/Graphics.html#effect-measure-plots Args: coxph: Fitted CoxPHFitter object. labels: List of labels for each coefficient, default uses the index of the coxph.summary. fig_size: Width, height in inches. - t_adjuster: Adjust the table to the right. + t_adjuster: Adjust the table to the right. ecolor: Color of the error bars. size: Size of the markers. marker: Marker style. @@ -339,7 +341,7 @@ def coxph_forestplot(coxph: CoxPHFitter, """ data = coxph.summary - auc_col = 'coef' + auc_col = "coef" if labels is None: labels = data.index @@ -347,52 +349,80 @@ def coxph_forestplot(coxph: CoxPHFitter, ytick = [] for i in range(len(data)): if not np.isnan(data[auc_col][i]): - if ((isinstance(data[auc_col][i], float)) & (isinstance(data['coef lower 95%'][i], float)) & - (isinstance(data['coef upper 95%'][i], float))): - tval.append([round(data[auc_col][i], decimal), ( - '(' + str(round(data['coef lower 95%'][i], decimal)) + ', ' + - str(round(data['coef upper 95%'][i], decimal)) + ')')]) + if ( + (isinstance(data[auc_col][i], float)) + & (isinstance(data["coef lower 95%"][i], float)) + & (isinstance(data["coef upper 95%"][i], float)) + ): + tval.append( + [ + round(data[auc_col][i], decimal), + ( + "(" + + str(round(data["coef lower 95%"][i], decimal)) + + ", " + + str(round(data["coef upper 95%"][i], decimal)) + + ")" + ), + ] + ) else: - tval.append([data[auc_col][i], ('(' + str(data['coef lower 95%'][i]) + ', ' + str(data['coef upper 95%'][i]) + ')')]) + tval.append( + [ + data[auc_col][i], + ("(" + str(data["coef lower 95%"][i]) + ", " + str(data["coef upper 95%"][i]) + ")"), + ] + ) ytick.append(i) else: - tval.append([' ', ' ']) + tval.append([" ", " "]) ytick.append(i) - maxi = round(((pd.to_numeric(data['coef upper 95%'])).max() + 0.1),2) # setting x-axis maximum + maxi = round(((pd.to_numeric(data["coef upper 95%"])).max() + 0.1), 2) # setting x-axis maximum + + mini = round(((pd.to_numeric(data["coef lower 95%"])).min() - 0.1), 1) # setting x-axis minimum - mini = round(((pd.to_numeric(data['coef lower 95%'])).min() - 0.1), 1) # setting x-axis minimum - fig = plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) # sets up grid plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table plot.set_ylim(-1, (len(data))) # spacing out y-axis properly - - plot.axvline(1, color='gray', zorder=1) - lower_diff = data[auc_col] - data['coef lower 95%'] - upper_diff = data['coef upper 95%'] - data[auc_col] - plot.errorbar(data[auc_col], data.index, xerr=[lower_diff, upper_diff], marker='None', zorder=2, ecolor=ecolor, linewidth=0, elinewidth=1) - plot.scatter(data[auc_col], data.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors='None') - plot.xaxis.set_ticks_position('bottom') - plot.yaxis.set_ticks_position('left') + + plot.axvline(1, color="gray", zorder=1) + lower_diff = data[auc_col] - data["coef lower 95%"] + upper_diff = data["coef upper 95%"] - data[auc_col] + plot.errorbar( + data[auc_col], + data.index, + xerr=[lower_diff, upper_diff], + marker="None", + zorder=2, + ecolor=ecolor, + linewidth=0, + elinewidth=1, + ) + plot.scatter(data[auc_col], data.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None") + plot.xaxis.set_ticks_position("bottom") + plot.yaxis.set_ticks_position("left") plot.get_xaxis().set_major_formatter(ticker.ScalarFormatter()) plot.get_xaxis().set_minor_formatter(ticker.NullFormatter()) plot.set_yticks(ytick) plot.set_xlim([mini, maxi]) plot.set_xticks([mini, 1, maxi]) plot.set_xticklabels([mini, 1, maxi]) - plot.set_yticklabels(labels) - plot.tick_params(axis='y', labelsize=text_size) - plot.yaxis.set_ticks_position('none') + plot.set_yticklabels(labels) + plot.tick_params(axis="y", labelsize=text_size) + plot.yaxis.set_ticks_position("none") plot.invert_yaxis() # invert y-axis to align values properly with table - tb = tabl.table(cellText=tval, cellLoc='center', loc='right', colLabels=[auc_col, '95% CI'], bbox=[0, t_adjuster, 1, 1]) - tabl.axis('off') + tb = tabl.table( + cellText=tval, cellLoc="center", loc="right", colLabels=[auc_col, "95% CI"], bbox=[0, t_adjuster, 1, 1] + ) + tabl.axis("off") tb.auto_set_font_size(False) tb.set_fontsize(text_size) - for _ , cell in tb.get_celld().items(): + for _, cell in tb.get_celld().items(): cell.set_linewidth(0) plot.spines["top"].set_visible(False) plot.spines["right"].set_visible(False) plot.spines["left"].set_visible(False) - return fig, plot + return fig, plot diff --git a/tests/_scripts/coxph_forestplot_create_expected.ipynb b/tests/_scripts/coxph_forestplot_create_expected.ipynb index bcd174c1..36d6dea2 100644 --- a/tests/_scripts/coxph_forestplot_create_expected.ipynb +++ b/tests/_scripts/coxph_forestplot_create_expected.ipynb @@ -16,9 +16,9 @@ "metadata": {}, "outputs": [], "source": [ + "import matplotlib.pyplot as plt\n", "\n", - "import ehrapy as ep\n", - "import matplotlib.pyplot as plt" + "import ehrapy as ep" ] }, { @@ -49,7 +49,7 @@ "source": [ "genderafib_coxph = ep.tl.cox_ph(adata_subset, duration_col=\"mort_day_censored\", event_col=\"censor_flg\")\n", "\n", - "fig, ax = ep.pl.coxph_forestplot(genderafib_coxph, fig_size=(12,3), t_adjuster=0.15, marker=\"o\", size=2, text_size=14)" + "fig, ax = ep.pl.coxph_forestplot(genderafib_coxph, fig_size=(12, 3), t_adjuster=0.15, marker=\"o\", size=2, text_size=14)" ] }, { diff --git a/tests/conftest.py b/tests/conftest.py index 56e1be71..6c42f8a7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,11 +28,13 @@ def root_dir(): def rng(): return np.random.default_rng(seed=42) + @pytest.fixture def mimic_2(): adata = ep.dt.mimic_2() return adata + @pytest.fixture def mimic_2_encoded(): adata = ep.dt.mimic_2(encoded=True) diff --git a/tests/plot/test_catplot.py b/tests/plot/test_catplot.py index a49ed7eb..986de59f 100644 --- a/tests/plot/test_catplot.py +++ b/tests/plot/test_catplot.py @@ -15,10 +15,11 @@ def test_catplot_vanilla(adata_mini, check_same_image): tol=2e-1, ) + def test_coxph_forestplot(mimic_2, check_same_image): adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") - fig, ax = ep.pl.coxph_forestplot(coxph, fig_size=(12,3), t_adjuster=0.15, marker="o", size=2, text_size=14) + fig, ax = ep.pl.coxph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) check_same_image( fig=fig, From 225b60675b0bb81b76cffddf7a489c4c2dd98487 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:25:16 +0100 Subject: [PATCH 03/29] changed-notebook --- docs/tutorials/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index 99b17e70..ac088bca 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit 99b17e7039699548a908433fa3ee6b5cbac5e29f +Subproject commit ac088bcabae5de8516ca9a5aa036b4e3cdf67df6 From 47a5b123e01f177551d458689a34f378874975d3 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:33:09 +0100 Subject: [PATCH 04/29] Update ehrapy/plot/_survival_analysis.py Co-authored-by: Eljas Roellin <65244425+eroell@users.noreply.github.com> --- ehrapy/plot/_survival_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 8b9e10d3..63d626e0 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -314,7 +314,7 @@ def coxph_forestplot( color: str = "k", ): """Plots a forest plot of the Cox Proportional Hazard model. - Inspired by the forest plot in the zEpid package in Python. + Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Link: https://zepid.readthedocs.io/en/latest/Graphics.html#effect-measure-plots Args: From a32c121497f067eefd1e22a97f94fd4cc59d5991 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:41:42 +0100 Subject: [PATCH 05/29] Update ehrapy/plot/_survival_analysis.py Co-authored-by: Eljas Roellin <65244425+eroell@users.noreply.github.com> --- ehrapy/plot/_survival_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 63d626e0..9541882f 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -315,7 +315,6 @@ def coxph_forestplot( ): """Plots a forest plot of the Cox Proportional Hazard model. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). - Link: https://zepid.readthedocs.io/en/latest/Graphics.html#effect-measure-plots Args: coxph: Fitted CoxPHFitter object. From 0c7cd45b7799a2d0ba671778d9079122112e0908 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 11 Dec 2024 15:11:32 +0100 Subject: [PATCH 06/29] Remove useless empty line --- ehrapy/plot/_survival_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 9541882f..c6c638b1 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -338,7 +338,6 @@ def coxph_forestplot( .. image:: /_static/docstring_previews/coxph_forestplot.png """ - data = coxph.summary auc_col = "coef" From 785a2cf7a08eb6ba9486c35732157dbe26e4a2ba Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 11 Dec 2024 15:11:50 +0100 Subject: [PATCH 07/29] Remove useless comment --- ehrapy/plot/_survival_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index c6c638b1..9302d119 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -381,7 +381,7 @@ def coxph_forestplot( mini = round(((pd.to_numeric(data["coef lower 95%"])).min() - 0.1), 1) # setting x-axis minimum fig = plt.figure(figsize=fig_size) - gspec = gridspec.GridSpec(1, 6) # sets up grid + gspec = gridspec.GridSpec(1, 6) plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table plot.set_ylim(-1, (len(data))) # spacing out y-axis properly From 75ee4ba7256f5787d3067b17323cd4d0ca072be0 Mon Sep 17 00:00:00 2001 From: eroell Date: Wed, 11 Dec 2024 15:37:03 +0100 Subject: [PATCH 08/29] undo again; check rtd build --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 78931dcc..7bc35d29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,7 @@ docs = [ "nbsphinx-link", "ipykernel", "ipython", - "ehrapy[dask,medcat]", + "ehrapy[dask]", ] test = [ "ehrapy[dask]", From 541e505c9e7c59624f755a3463ad881897ffe611 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:36:27 +0100 Subject: [PATCH 09/29] renamed function and updated documentation to mention, that it is a lifelines object --- ehrapy/plot/__init__.py | 2 +- ehrapy/plot/_survival_analysis.py | 19 ++++++++++++------- tests/plot/test_catplot.py | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ehrapy/plot/__init__.py b/ehrapy/plot/__init__.py index de0b75f7..4a57b84e 100644 --- a/ehrapy/plot/__init__.py +++ b/ehrapy/plot/__init__.py @@ -2,6 +2,6 @@ from ehrapy.plot._colormaps import * # noqa: F403 from ehrapy.plot._missingno_pl_api import * # noqa: F403 from ehrapy.plot._scanpy_pl_api import * # noqa: F403 -from ehrapy.plot._survival_analysis import coxph_forestplot, kaplan_meier, ols +from ehrapy.plot._survival_analysis import cox_ph_forestplot, kaplan_meier, ols from ehrapy.plot.causal_inference._dowhy import causal_effect from ehrapy.plot.feature_ranking._feature_importances import rank_features_supervised diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 9302d119..8a05ef6c 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -301,8 +301,8 @@ def kaplan_meier( return None -def coxph_forestplot( - coxph: CoxPHFitter, +def cox_ph_forestplot( + cox_ph: CoxPHFitter, labels: list[str] | None = None, fig_size: tuple = (10, 10), t_adjuster: float = 0.1, @@ -313,11 +313,12 @@ def coxph_forestplot( text_size: int = 12, color: str = "k", ): - """Plots a forest plot of the Cox Proportional Hazard model. + """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. + The method requires a fitted CoxPHFitter object from the lifelines library. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: - coxph: Fitted CoxPHFitter object. + coxph: Fitted CoxPHFitter object from the lifelines library. labels: List of labels for each coefficient, default uses the index of the coxph.summary. fig_size: Width, height in inches. t_adjuster: Adjust the table to the right. @@ -332,13 +333,17 @@ def coxph_forestplot( >>> import ehrapy as ep >>> adata = ep.dt.mimic_2(encoded=False) >>> adata_subset = adata[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] - >>> coxph = ep.tl.coxph(adata_subset, event_col="censor_flg", duration_col="mort_day_censored") - >>> ep.pl.coxph_forestplot(coxph) + >>> coxph = ep.tl.cox_ph(adata_subset, event_col="censor_flg", duration_col="mort_day_censored") + >>> ep.pl.cox_ph_forestplot(coxph) .. image:: /_static/docstring_previews/coxph_forestplot.png """ - data = coxph.summary + # check that the coxph object is fitted + if not cox_ph._fitted: + raise ValueError("The CoxPHFitter object must be fitted") + + data = cox_ph.summary auc_col = "coef" if labels is None: diff --git a/tests/plot/test_catplot.py b/tests/plot/test_catplot.py index 986de59f..90b2b5f3 100644 --- a/tests/plot/test_catplot.py +++ b/tests/plot/test_catplot.py @@ -19,7 +19,7 @@ def test_catplot_vanilla(adata_mini, check_same_image): def test_coxph_forestplot(mimic_2, check_same_image): adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") - fig, ax = ep.pl.coxph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) + fig, ax = ep.pl.cox_ph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) check_same_image( fig=fig, From ca4530ac9167499e1ca5c073be4e9a17cc2c5ebe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 08:37:46 +0000 Subject: [PATCH 10/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ehrapy/plot/_survival_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 8a05ef6c..6ecc9af4 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -313,7 +313,7 @@ def cox_ph_forestplot( text_size: int = 12, color: str = "k", ): - """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. + """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. The method requires a fitted CoxPHFitter object from the lifelines library. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). @@ -342,7 +342,7 @@ def cox_ph_forestplot( # check that the coxph object is fitted if not cox_ph._fitted: raise ValueError("The CoxPHFitter object must be fitted") - + data = cox_ph.summary auc_col = "coef" From f77f4681e26f87ea76337a5b2a7e45f6af985e6f Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:40:52 +0100 Subject: [PATCH 11/29] removed fitted check and updated name in notebook --- ehrapy/plot/_survival_analysis.py | 4 ---- tests/_scripts/coxph_forestplot_create_expected.ipynb | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 6ecc9af4..ca99ebe5 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -339,10 +339,6 @@ def cox_ph_forestplot( .. image:: /_static/docstring_previews/coxph_forestplot.png """ - # check that the coxph object is fitted - if not cox_ph._fitted: - raise ValueError("The CoxPHFitter object must be fitted") - data = cox_ph.summary auc_col = "coef" diff --git a/tests/_scripts/coxph_forestplot_create_expected.ipynb b/tests/_scripts/coxph_forestplot_create_expected.ipynb index 36d6dea2..d75d8d2d 100644 --- a/tests/_scripts/coxph_forestplot_create_expected.ipynb +++ b/tests/_scripts/coxph_forestplot_create_expected.ipynb @@ -49,7 +49,7 @@ "source": [ "genderafib_coxph = ep.tl.cox_ph(adata_subset, duration_col=\"mort_day_censored\", event_col=\"censor_flg\")\n", "\n", - "fig, ax = ep.pl.coxph_forestplot(genderafib_coxph, fig_size=(12, 3), t_adjuster=0.15, marker=\"o\", size=2, text_size=14)" + "fig, ax = ep.pl.cox_ph_forestplot(genderafib_coxph, fig_size=(12, 3), t_adjuster=0.15, marker=\"o\", size=2, text_size=14)" ] }, { From 66815c4f0adb17ef1a68feccb9a9b4cc07202c51 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:43:02 +0100 Subject: [PATCH 12/29] Update ehrapy/plot/_survival_analysis.py Co-authored-by: Lukas Heumos --- ehrapy/plot/_survival_analysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index ca99ebe5..285b57ac 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -315,6 +315,7 @@ def cox_ph_forestplot( ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. The method requires a fitted CoxPHFitter object from the lifelines library. + Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: From 76f0f0cb487fb2790adc9aea1b7a093f608ad775 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:01:03 +0100 Subject: [PATCH 13/29] updated variable names and moved test to better file --- ehrapy/plot/_survival_analysis.py | 46 ++++++++++++++-------------- tests/plot/test_catplot.py | 11 ------- tests/plot/test_survival_analysis.py | 17 ++++++++++ 3 files changed, 40 insertions(+), 34 deletions(-) create mode 100644 tests/plot/test_survival_analysis.py diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 285b57ac..6dacc124 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -340,28 +340,28 @@ def cox_ph_forestplot( .. image:: /_static/docstring_previews/coxph_forestplot.png """ - data = cox_ph.summary + coxph_summary = cox_ph.summary auc_col = "coef" if labels is None: - labels = data.index + labels = coxph_summary.index tval = [] ytick = [] - for i in range(len(data)): - if not np.isnan(data[auc_col][i]): + for i in range(len(coxph_summary)): + if not np.isnan(coxph_summary[auc_col][i]): if ( - (isinstance(data[auc_col][i], float)) - & (isinstance(data["coef lower 95%"][i], float)) - & (isinstance(data["coef upper 95%"][i], float)) + (isinstance(coxph_summary[auc_col][i], float)) + & (isinstance(coxph_summary["coef lower 95%"][i], float)) + & (isinstance(coxph_summary["coef upper 95%"][i], float)) ): tval.append( [ - round(data[auc_col][i], decimal), + round(coxph_summary[auc_col][i], decimal), ( "(" - + str(round(data["coef lower 95%"][i], decimal)) + + str(round(coxph_summary["coef lower 95%"][i], decimal)) + ", " - + str(round(data["coef upper 95%"][i], decimal)) + + str(round(coxph_summary["coef upper 95%"][i], decimal)) + ")" ), ] @@ -369,8 +369,8 @@ def cox_ph_forestplot( else: tval.append( [ - data[auc_col][i], - ("(" + str(data["coef lower 95%"][i]) + ", " + str(data["coef upper 95%"][i]) + ")"), + coxph_summary[auc_col][i], + ("(" + str(coxph_summary["coef lower 95%"][i]) + ", " + str(coxph_summary["coef upper 95%"][i]) + ")"), ] ) ytick.append(i) @@ -378,22 +378,22 @@ def cox_ph_forestplot( tval.append([" ", " "]) ytick.append(i) - maxi = round(((pd.to_numeric(data["coef upper 95%"])).max() + 0.1), 2) # setting x-axis maximum + x_axis_upper_bound = round(((pd.to_numeric(coxph_summary["coef upper 95%"])).max() + 0.1), 2) - mini = round(((pd.to_numeric(data["coef lower 95%"])).min() - 0.1), 1) # setting x-axis minimum + x_axis_lower_bound = round(((pd.to_numeric(coxph_summary["coef lower 95%"])).min() - 0.1), 1) fig = plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table - plot.set_ylim(-1, (len(data))) # spacing out y-axis properly + plot.set_ylim(-1, (len(coxph_summary))) # spacing out y-axis properly plot.axvline(1, color="gray", zorder=1) - lower_diff = data[auc_col] - data["coef lower 95%"] - upper_diff = data["coef upper 95%"] - data[auc_col] + lower_diff = coxph_summary[auc_col] - coxph_summary["coef lower 95%"] + upper_diff = coxph_summary["coef upper 95%"] - coxph_summary[auc_col] plot.errorbar( - data[auc_col], - data.index, + coxph_summary[auc_col], + coxph_summary.index, xerr=[lower_diff, upper_diff], marker="None", zorder=2, @@ -401,15 +401,15 @@ def cox_ph_forestplot( linewidth=0, elinewidth=1, ) - plot.scatter(data[auc_col], data.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None") + plot.scatter(coxph_summary[auc_col], coxph_summary.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None") plot.xaxis.set_ticks_position("bottom") plot.yaxis.set_ticks_position("left") plot.get_xaxis().set_major_formatter(ticker.ScalarFormatter()) plot.get_xaxis().set_minor_formatter(ticker.NullFormatter()) plot.set_yticks(ytick) - plot.set_xlim([mini, maxi]) - plot.set_xticks([mini, 1, maxi]) - plot.set_xticklabels([mini, 1, maxi]) + plot.set_xlim([x_axis_lower_bound, x_axis_upper_bound]) + plot.set_xticks([x_axis_lower_bound, 1, x_axis_upper_bound]) + plot.set_xticklabels([x_axis_lower_bound, 1, x_axis_upper_bound]) plot.set_yticklabels(labels) plot.tick_params(axis="y", labelsize=text_size) plot.yaxis.set_ticks_position("none") diff --git a/tests/plot/test_catplot.py b/tests/plot/test_catplot.py index 90b2b5f3..8e569928 100644 --- a/tests/plot/test_catplot.py +++ b/tests/plot/test_catplot.py @@ -15,14 +15,3 @@ def test_catplot_vanilla(adata_mini, check_same_image): tol=2e-1, ) - -def test_coxph_forestplot(mimic_2, check_same_image): - adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] - coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") - fig, ax = ep.pl.cox_ph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) - - check_same_image( - fig=fig, - base_path=f"{_TEST_IMAGE_PATH}/coxph_forestplot", - tol=2e-1, - ) diff --git a/tests/plot/test_survival_analysis.py b/tests/plot/test_survival_analysis.py new file mode 100644 index 00000000..982bc7a8 --- /dev/null +++ b/tests/plot/test_survival_analysis.py @@ -0,0 +1,17 @@ +from pathlib import Path + +import ehrapy as ep + +CURRENT_DIR = Path(__file__).parent +_TEST_IMAGE_PATH = f"{CURRENT_DIR}/_images" + +def test_coxph_forestplot(mimic_2, check_same_image): + adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] + coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") + fig, ax = ep.pl.cox_ph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) + + check_same_image( + fig=fig, + base_path=f"{_TEST_IMAGE_PATH}/coxph_forestplot", + tol=2e-1, + ) \ No newline at end of file From 21086812150e4dbe5596d8f7252e6002a59147b3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:01:21 +0000 Subject: [PATCH 14/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ehrapy/plot/_survival_analysis.py | 12 ++++++++++-- tests/plot/test_catplot.py | 1 - tests/plot/test_survival_analysis.py | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 6dacc124..3e7c245f 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -370,7 +370,13 @@ def cox_ph_forestplot( tval.append( [ coxph_summary[auc_col][i], - ("(" + str(coxph_summary["coef lower 95%"][i]) + ", " + str(coxph_summary["coef upper 95%"][i]) + ")"), + ( + "(" + + str(coxph_summary["coef lower 95%"][i]) + + ", " + + str(coxph_summary["coef upper 95%"][i]) + + ")" + ), ] ) ytick.append(i) @@ -401,7 +407,9 @@ def cox_ph_forestplot( linewidth=0, elinewidth=1, ) - plot.scatter(coxph_summary[auc_col], coxph_summary.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None") + plot.scatter( + coxph_summary[auc_col], coxph_summary.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None" + ) plot.xaxis.set_ticks_position("bottom") plot.yaxis.set_ticks_position("left") plot.get_xaxis().set_major_formatter(ticker.ScalarFormatter()) diff --git a/tests/plot/test_catplot.py b/tests/plot/test_catplot.py index 8e569928..e3591132 100644 --- a/tests/plot/test_catplot.py +++ b/tests/plot/test_catplot.py @@ -14,4 +14,3 @@ def test_catplot_vanilla(adata_mini, check_same_image): base_path=f"{_TEST_IMAGE_PATH}/catplot_vanilla", tol=2e-1, ) - diff --git a/tests/plot/test_survival_analysis.py b/tests/plot/test_survival_analysis.py index 982bc7a8..5196ddad 100644 --- a/tests/plot/test_survival_analysis.py +++ b/tests/plot/test_survival_analysis.py @@ -5,6 +5,7 @@ CURRENT_DIR = Path(__file__).parent _TEST_IMAGE_PATH = f"{CURRENT_DIR}/_images" + def test_coxph_forestplot(mimic_2, check_same_image): adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") @@ -14,4 +15,4 @@ def test_coxph_forestplot(mimic_2, check_same_image): fig=fig, base_path=f"{_TEST_IMAGE_PATH}/coxph_forestplot", tol=2e-1, - ) \ No newline at end of file + ) From 88e18a7da617f795ce2ba1478044e4db77eaad1d Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:33:27 +0100 Subject: [PATCH 15/29] made anything after coxphfitter keyword only --- ehrapy/plot/_survival_analysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 3e7c245f..fdb9c9c2 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -303,6 +303,7 @@ def kaplan_meier( def cox_ph_forestplot( cox_ph: CoxPHFitter, + *, labels: list[str] | None = None, fig_size: tuple = (10, 10), t_adjuster: float = 0.1, From 613be89b6ce3449f21255182b437447942c3d0ee Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:37:42 +0100 Subject: [PATCH 16/29] changed type to iterable --- ehrapy/plot/_survival_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index fdb9c9c2..10af4537 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -13,7 +13,7 @@ from ehrapy.plot import scatter if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Iterable, Sequence from xmlrpc.client import Boolean from anndata import AnnData @@ -304,7 +304,7 @@ def kaplan_meier( def cox_ph_forestplot( cox_ph: CoxPHFitter, *, - labels: list[str] | None = None, + labels: Iterable[str] | None = None, fig_size: tuple = (10, 10), t_adjuster: float = 0.1, ecolor: str = "dimgray", From 8c3d17054544db0e31d7746f44209495917ca7b2 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:41:53 +0100 Subject: [PATCH 17/29] added title and show args --- ehrapy/plot/_survival_analysis.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 10af4537..fc513cd6 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -313,6 +313,8 @@ def cox_ph_forestplot( decimal: int = 2, text_size: int = 12, color: str = "k", + show: bool = True, + title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. The method requires a fitted CoxPHFitter object from the lifelines library. @@ -330,6 +332,8 @@ def cox_ph_forestplot( decimal: Number of decimal places to display. text_size: Font size of the text. color: Color of the markers. + show: Show the plot, do not return axis. + title: Set the title of the plot. Examples: >>> import ehrapy as ep @@ -389,7 +393,7 @@ def cox_ph_forestplot( x_axis_lower_bound = round(((pd.to_numeric(coxph_summary["coef lower 95%"])).min() - 0.1), 1) - fig = plt.figure(figsize=fig_size) + plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table @@ -434,4 +438,12 @@ def cox_ph_forestplot( plot.spines["top"].set_visible(False) plot.spines["right"].set_visible(False) plot.spines["left"].set_visible(False) - return fig, plot + + if title: + plt.title(title) + + if not show: + return plot + + else: + return None From b3a7c2193462d2fa9926bfc01e77ba34665b0931 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:57:51 +0100 Subject: [PATCH 18/29] less ambiguous loop index --- ehrapy/plot/_survival_analysis.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index fc513cd6..3dd3c343 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -352,21 +352,21 @@ def cox_ph_forestplot( labels = coxph_summary.index tval = [] ytick = [] - for i in range(len(coxph_summary)): - if not np.isnan(coxph_summary[auc_col][i]): + for row_index in range(len(coxph_summary)): + if not np.isnan(coxph_summary[auc_col][row_index]): if ( - (isinstance(coxph_summary[auc_col][i], float)) - & (isinstance(coxph_summary["coef lower 95%"][i], float)) - & (isinstance(coxph_summary["coef upper 95%"][i], float)) + (isinstance(coxph_summary[auc_col][row_index], float)) + & (isinstance(coxph_summary["coef lower 95%"][row_index], float)) + & (isinstance(coxph_summary["coef upper 95%"][row_index], float)) ): tval.append( [ - round(coxph_summary[auc_col][i], decimal), + round(coxph_summary[auc_col][row_index], decimal), ( "(" - + str(round(coxph_summary["coef lower 95%"][i], decimal)) + + str(round(coxph_summary["coef lower 95%"][row_index], decimal)) + ", " - + str(round(coxph_summary["coef upper 95%"][i], decimal)) + + str(round(coxph_summary["coef upper 95%"][row_index], decimal)) + ")" ), ] @@ -374,20 +374,20 @@ def cox_ph_forestplot( else: tval.append( [ - coxph_summary[auc_col][i], + coxph_summary[auc_col][row_index], ( "(" - + str(coxph_summary["coef lower 95%"][i]) + + str(coxph_summary["coef lower 95%"][row_index]) + ", " - + str(coxph_summary["coef upper 95%"][i]) + + str(coxph_summary["coef upper 95%"][row_index]) + ")" ), ] ) - ytick.append(i) + ytick.append(row_index) else: tval.append([" ", " "]) - ytick.append(i) + ytick.append(row_index) x_axis_upper_bound = round(((pd.to_numeric(coxph_summary["coef upper 95%"])).max() + 0.1), 2) From cfdb1cc35edb43e527700bf17a46a2204be3402b Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:13:16 +0100 Subject: [PATCH 19/29] fixed test. had to return figure and axis, so the test can save the image --- ehrapy/plot/_survival_analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 3dd3c343..5230ed1a 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -313,7 +313,7 @@ def cox_ph_forestplot( decimal: int = 2, text_size: int = 12, color: str = "k", - show: bool = True, + show: bool = None, title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. @@ -332,7 +332,7 @@ def cox_ph_forestplot( decimal: Number of decimal places to display. text_size: Font size of the text. color: Color of the markers. - show: Show the plot, do not return axis. + show: Show the plot, do not return figure and axis. title: Set the title of the plot. Examples: @@ -393,7 +393,7 @@ def cox_ph_forestplot( x_axis_lower_bound = round(((pd.to_numeric(coxph_summary["coef lower 95%"])).min() - 0.1), 1) - plt.figure(figsize=fig_size) + fig = plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table @@ -443,7 +443,7 @@ def cox_ph_forestplot( plt.title(title) if not show: - return plot + return fig, plot else: return None From 0d5d6e8303fc77cc49eb244f39924522ad6065c9 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 08:56:53 +0100 Subject: [PATCH 20/29] get summary form adata --- ehrapy/plot/_survival_analysis.py | 12 +++++++++--- tests/plot/test_survival_analysis.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 5230ed1a..d2a18f99 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -302,8 +302,9 @@ def kaplan_meier( def cox_ph_forestplot( - cox_ph: CoxPHFitter, + adata: AnnData, *, + uns_key: str = "cox_ph", labels: Iterable[str] | None = None, fig_size: tuple = (10, 10), t_adjuster: float = 0.1, @@ -322,7 +323,8 @@ def cox_ph_forestplot( Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: - coxph: Fitted CoxPHFitter object from the lifelines library. + adata: :class:`~anndata.AnnData` object containing all observations in `.uns`. + uns_key: Key in `.uns` where the CoxPHFitter object is stored. labels: List of labels for each coefficient, default uses the index of the coxph.summary. fig_size: Width, height in inches. t_adjuster: Adjust the table to the right. @@ -345,7 +347,11 @@ def cox_ph_forestplot( .. image:: /_static/docstring_previews/coxph_forestplot.png """ - coxph_summary = cox_ph.summary + # check if the key exists in the uns + if uns_key not in adata.uns: + raise ValueError(f"Key {uns_key} not found in adata.uns. Please provide a valid key.") + + coxph_summary = adata.uns[uns_key] auc_col = "coef" if labels is None: diff --git a/tests/plot/test_survival_analysis.py b/tests/plot/test_survival_analysis.py index 5196ddad..2345102a 100644 --- a/tests/plot/test_survival_analysis.py +++ b/tests/plot/test_survival_analysis.py @@ -8,8 +8,8 @@ def test_coxph_forestplot(mimic_2, check_same_image): adata_subset = mimic_2[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] - coxph = ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") - fig, ax = ep.pl.cox_ph_forestplot(coxph, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) + ep.tl.cox_ph(adata_subset, duration_col="mort_day_censored", event_col="censor_flg") + fig, ax = ep.pl.cox_ph_forestplot(adata_subset, fig_size=(12, 3), t_adjuster=0.15, marker="o", size=2, text_size=14) check_same_image( fig=fig, From 503a97d4dd885f57b910a4665be560b78a45f478 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:12:02 +0100 Subject: [PATCH 21/29] updated docs --- docs/usage/usage.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/usage/usage.md b/docs/usage/usage.md index 6f3f2366..541e657b 100644 --- a/docs/usage/usage.md +++ b/docs/usage/usage.md @@ -369,6 +369,7 @@ Methods that extract and visualize tool-specific annotation in an AnnData object plot.ols plot.kaplan_meier + plot.cox_ph_forestplot ``` ### Causal Inference From bb83a5a5713058a8420d6ea7daaa6aebe630efc3 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:48:01 +0100 Subject: [PATCH 22/29] updated exampel --- ehrapy/plot/_survival_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index d2a18f99..17d4a130 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -342,7 +342,7 @@ def cox_ph_forestplot( >>> adata = ep.dt.mimic_2(encoded=False) >>> adata_subset = adata[:, ["mort_day_censored", "censor_flg", "gender_num", "afib_flg", "day_icu_intime_num"]] >>> coxph = ep.tl.cox_ph(adata_subset, event_col="censor_flg", duration_col="mort_day_censored") - >>> ep.pl.cox_ph_forestplot(coxph) + >>> ep.pl.cox_ph_forestplot(adata_subset) .. image:: /_static/docstring_previews/coxph_forestplot.png From b560748c877e33f7ad272e4526294915241e08a0 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:48:42 +0100 Subject: [PATCH 23/29] removed fitter object from docu --- ehrapy/plot/_survival_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 17d4a130..029b0a59 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -318,7 +318,6 @@ def cox_ph_forestplot( title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. - The method requires a fitted CoxPHFitter object from the lifelines library. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). From ad6e8e745e0b649057c23ff897432f3203ae1864 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:45:25 +0100 Subject: [PATCH 24/29] Update ehrapy/plot/_survival_analysis.py Co-authored-by: Eljas Roellin <65244425+eroell@users.noreply.github.com> --- ehrapy/plot/_survival_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 029b0a59..685dbcd3 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -346,7 +346,6 @@ def cox_ph_forestplot( .. image:: /_static/docstring_previews/coxph_forestplot.png """ - # check if the key exists in the uns if uns_key not in adata.uns: raise ValueError(f"Key {uns_key} not found in adata.uns. Please provide a valid key.") From 3386d875e9200a5a4aec87b4066dc27d9817a22e Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:19:26 +0100 Subject: [PATCH 25/29] docu updates, for understandability --- ehrapy/plot/_survival_analysis.py | 55 ++++++++++++++++++------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 685dbcd3..4faaeda7 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -17,7 +17,7 @@ from xmlrpc.client import Boolean from anndata import AnnData - from lifelines import CoxPHFitter, KaplanMeierFitter + from lifelines import KaplanMeierFitter from matplotlib.axes import Axes from statsmodels.regression.linear_model import RegressionResults @@ -318,11 +318,12 @@ def cox_ph_forestplot( title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. + The adata object must be populated via the :func:`~ehrapy.tools._sa.cox_ph` function beforehand. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: - adata: :class:`~anndata.AnnData` object containing all observations in `.uns`. + adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This object is populated using the :func:`~ehrapy.tools._sa.cox_ph` function. uns_key: Key in `.uns` where the CoxPHFitter object is stored. labels: List of labels for each coefficient, default uses the index of the coxph.summary. fig_size: Width, height in inches. @@ -349,28 +350,30 @@ def cox_ph_forestplot( if uns_key not in adata.uns: raise ValueError(f"Key {uns_key} not found in adata.uns. Please provide a valid key.") - coxph_summary = adata.uns[uns_key] + coxph_fitting_summary = adata.uns[ + uns_key + ] # pd.Dataframe with columns: coef, exp(coef), se(coef), z, p, lower 0.95, upper 0.95 auc_col = "coef" if labels is None: - labels = coxph_summary.index + labels = coxph_fitting_summary.index tval = [] ytick = [] - for row_index in range(len(coxph_summary)): - if not np.isnan(coxph_summary[auc_col][row_index]): + for row_index in range(len(coxph_fitting_summary)): + if not np.isnan(coxph_fitting_summary[auc_col][row_index]): if ( - (isinstance(coxph_summary[auc_col][row_index], float)) - & (isinstance(coxph_summary["coef lower 95%"][row_index], float)) - & (isinstance(coxph_summary["coef upper 95%"][row_index], float)) + (isinstance(coxph_fitting_summary[auc_col][row_index], float)) + & (isinstance(coxph_fitting_summary["coef lower 95%"][row_index], float)) + & (isinstance(coxph_fitting_summary["coef upper 95%"][row_index], float)) ): tval.append( [ - round(coxph_summary[auc_col][row_index], decimal), + round(coxph_fitting_summary[auc_col][row_index], decimal), ( "(" - + str(round(coxph_summary["coef lower 95%"][row_index], decimal)) + + str(round(coxph_fitting_summary["coef lower 95%"][row_index], decimal)) + ", " - + str(round(coxph_summary["coef upper 95%"][row_index], decimal)) + + str(round(coxph_fitting_summary["coef upper 95%"][row_index], decimal)) + ")" ), ] @@ -378,12 +381,12 @@ def cox_ph_forestplot( else: tval.append( [ - coxph_summary[auc_col][row_index], + coxph_fitting_summary[auc_col][row_index], ( "(" - + str(coxph_summary["coef lower 95%"][row_index]) + + str(coxph_fitting_summary["coef lower 95%"][row_index]) + ", " - + str(coxph_summary["coef upper 95%"][row_index]) + + str(coxph_fitting_summary["coef upper 95%"][row_index]) + ")" ), ] @@ -393,22 +396,22 @@ def cox_ph_forestplot( tval.append([" ", " "]) ytick.append(row_index) - x_axis_upper_bound = round(((pd.to_numeric(coxph_summary["coef upper 95%"])).max() + 0.1), 2) + x_axis_upper_bound = round(((pd.to_numeric(coxph_fitting_summary["coef upper 95%"])).max() + 0.1), 2) - x_axis_lower_bound = round(((pd.to_numeric(coxph_summary["coef lower 95%"])).min() - 0.1), 1) + x_axis_lower_bound = round(((pd.to_numeric(coxph_fitting_summary["coef lower 95%"])).min() - 0.1), 1) fig = plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) plot = plt.subplot(gspec[0, 0:4]) # plot of data tabl = plt.subplot(gspec[0, 4:]) # table - plot.set_ylim(-1, (len(coxph_summary))) # spacing out y-axis properly + plot.set_ylim(-1, (len(coxph_fitting_summary))) # spacing out y-axis properly plot.axvline(1, color="gray", zorder=1) - lower_diff = coxph_summary[auc_col] - coxph_summary["coef lower 95%"] - upper_diff = coxph_summary["coef upper 95%"] - coxph_summary[auc_col] + lower_diff = coxph_fitting_summary[auc_col] - coxph_fitting_summary["coef lower 95%"] + upper_diff = coxph_fitting_summary["coef upper 95%"] - coxph_fitting_summary[auc_col] plot.errorbar( - coxph_summary[auc_col], - coxph_summary.index, + coxph_fitting_summary[auc_col], + coxph_fitting_summary.index, xerr=[lower_diff, upper_diff], marker="None", zorder=2, @@ -417,7 +420,13 @@ def cox_ph_forestplot( elinewidth=1, ) plot.scatter( - coxph_summary[auc_col], coxph_summary.index, c=color, s=(size * 25), marker=marker, zorder=3, edgecolors="None" + coxph_fitting_summary[auc_col], + coxph_fitting_summary.index, + c=color, + s=(size * 25), + marker=marker, + zorder=3, + edgecolors="None", ) plot.xaxis.set_ticks_position("bottom") plot.yaxis.set_ticks_position("left") From 1364d97a9fbf6bcc91b3127dc69249efcef6e4f3 Mon Sep 17 00:00:00 2001 From: eroell Date: Wed, 15 Jan 2025 14:27:35 +0100 Subject: [PATCH 26/29] link between functions --- ehrapy/plot/_survival_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 4faaeda7..6c21b781 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -318,12 +318,12 @@ def cox_ph_forestplot( title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. - The adata object must be populated via the :func:`~ehrapy.tools._sa.cox_ph` function beforehand. + The adata object must be populated via the :func:`~ehrapy.tools.cox_ph` function beforehand. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: - adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This object is populated using the :func:`~ehrapy.tools._sa.cox_ph` function. + adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This object is populated using the :func:`~ehrapy.tools.cox_ph` function. uns_key: Key in `.uns` where the CoxPHFitter object is stored. labels: List of labels for each coefficient, default uses the index of the coxph.summary. fig_size: Width, height in inches. From c0e9b49816c8212dcacaadcc9b425f5897235897 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:49:41 +0100 Subject: [PATCH 27/29] Enhance documentation for cox_ph_forestplot function to clarify usage of adata and summary table --- ehrapy/plot/_survival_analysis.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index 6c21b781..febefa4f 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -318,14 +318,17 @@ def cox_ph_forestplot( title: str | None = None, ): """Generates a forest plot to visualize the coefficients and confidence intervals of a Cox Proportional Hazards model. - The adata object must be populated via the :func:`~ehrapy.tools.cox_ph` function beforehand. + + The `adata` object must first be populated using the :func:`~ehrapy.tools.cox_ph` function. This function stores the summary table of the `CoxPHFitter` in the `.uns` attribute of `adata`. + The summary table is created when the model is fitted using the :func:`ehrapy.tl.cox_ph` function. + For more information on the `CoxPHFitter`, see the `Lifelines documentation `_. Inspired by `zepid.graphics.EffectMeasurePlot `_ (zEpid Package, https://pypi.org/project/zepid/). Args: - adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This object is populated using the :func:`~ehrapy.tools.cox_ph` function. - uns_key: Key in `.uns` where the CoxPHFitter object is stored. - labels: List of labels for each coefficient, default uses the index of the coxph.summary. + adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This is stored in the `.uns` attribute, after fitting the model using :func:`~ehrapy.tl.cox_ph`. + uns_key: Key in `.uns` where :func:`~ehrapy.tools.cox_ph` function stores the summary table. + labels: List of labels for each coefficient, default uses the index of the summary ta fig_size: Width, height in inches. t_adjuster: Adjust the table to the right. ecolor: Color of the error bars. From 6724d74b4af3fefca0f46f4c645db0a69716ff3d Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:53:14 +0100 Subject: [PATCH 28/29] Update documentation for cox_ph_forestplot function to clarify uns_key argument usage --- ehrapy/plot/_survival_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index febefa4f..ba912519 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -327,7 +327,7 @@ def cox_ph_forestplot( Args: adata: :class:`~anndata.AnnData` object containing the summary table from the CoxPHFitter. This is stored in the `.uns` attribute, after fitting the model using :func:`~ehrapy.tl.cox_ph`. - uns_key: Key in `.uns` where :func:`~ehrapy.tools.cox_ph` function stores the summary table. + uns_key: Key in `.uns` where :func:`~ehrapy.tools.cox_ph` function stored the summary table. See argument `uns_key` in :func:`~ehrapy.tools.cox_ph`. labels: List of labels for each coefficient, default uses the index of the summary ta fig_size: Width, height in inches. t_adjuster: Adjust the table to the right. From 39aca12a92c4603e1aac295de9af781a26cdaa8e Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:56:21 +0100 Subject: [PATCH 29/29] Refactor subplot variable names for clarity in cox_ph_forestplot function --- ehrapy/plot/_survival_analysis.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ehrapy/plot/_survival_analysis.py b/ehrapy/plot/_survival_analysis.py index ba912519..37fcd90c 100644 --- a/ehrapy/plot/_survival_analysis.py +++ b/ehrapy/plot/_survival_analysis.py @@ -405,8 +405,8 @@ def cox_ph_forestplot( fig = plt.figure(figsize=fig_size) gspec = gridspec.GridSpec(1, 6) - plot = plt.subplot(gspec[0, 0:4]) # plot of data - tabl = plt.subplot(gspec[0, 4:]) # table + plot = plt.subplot(gspec[0, 0:4]) + table = plt.subplot(gspec[0, 4:]) plot.set_ylim(-1, (len(coxph_fitting_summary))) # spacing out y-axis properly plot.axvline(1, color="gray", zorder=1) @@ -422,6 +422,7 @@ def cox_ph_forestplot( linewidth=0, elinewidth=1, ) + # plot markers plot.scatter( coxph_fitting_summary[auc_col], coxph_fitting_summary.index, @@ -431,6 +432,7 @@ def cox_ph_forestplot( zorder=3, edgecolors="None", ) + # plot settings plot.xaxis.set_ticks_position("bottom") plot.yaxis.set_ticks_position("left") plot.get_xaxis().set_major_formatter(ticker.ScalarFormatter()) @@ -443,14 +445,16 @@ def cox_ph_forestplot( plot.tick_params(axis="y", labelsize=text_size) plot.yaxis.set_ticks_position("none") plot.invert_yaxis() # invert y-axis to align values properly with table - tb = tabl.table( + tb = table.table( cellText=tval, cellLoc="center", loc="right", colLabels=[auc_col, "95% CI"], bbox=[0, t_adjuster, 1, 1] ) - tabl.axis("off") + table.axis("off") tb.auto_set_font_size(False) tb.set_fontsize(text_size) for _, cell in tb.get_celld().items(): cell.set_linewidth(0) + + # remove spines plot.spines["top"].set_visible(False) plot.spines["right"].set_visible(False) plot.spines["left"].set_visible(False)