From 1c246bec3b141d5ca50b4b0d5807bcdce8988e42 Mon Sep 17 00:00:00 2001 From: Alex Reichenbach <8087473+Reichenbachian@users.noreply.github.com> Date: Tue, 14 May 2024 18:36:09 +0000 Subject: [PATCH] fixed --- .readthedocs | 30 +++ README.md | 4 +- docs/Makefile | 20 ++ docs/Model.md | 33 +++ docs/make.bat | 35 +++ docs/requirements.txt | 1 + docs/source/_static/favicon.png | Bin 0 -> 669 bytes docs/source/_static/logo.png | Bin 0 -> 5649 bytes docs/source/_static/webclip.png | Bin 0 -> 4487 bytes docs/source/_templates/sidebar.html | 56 +++++ docs/source/coming_soon/analysis.rst | 63 ++++++ docs/source/coming_soon/search.rst | 51 +++++ docs/source/coming_soon/sharing.rst | 22 ++ docs/source/conf.py | 54 +++++ docs/source/examples/example0.rst | 197 +++++++++++++++++ docs/source/examples/example1.rst | 105 +++++++++ docs/source/examples/example2.rst | 118 ++++++++++ docs/source/examples/examplehome.rst | 9 + docs/source/get_started/intro.rst | 59 +++++ docs/source/get_started/overview.rst | 23 ++ docs/source/get_started/quickstart.rst | 135 ++++++++++++ docs/source/index.rst | 99 +++++++++ docs/source/more/changelog.rst | 16 ++ docs/source/more/faq.rst | 76 +++++++ docs/source/python_client/backsourcing.rst | 49 +++++ docs/source/python_client/datasets.rst | 243 +++++++++++++++++++++ docs/source/python_client/documents.rst | 67 ++++++ scripts/docs | 7 + scripts/lint | 1 + 29 files changed, 1570 insertions(+), 3 deletions(-) create mode 100644 .readthedocs create mode 100644 docs/Makefile create mode 100644 docs/Model.md create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/source/_static/favicon.png create mode 100644 docs/source/_static/logo.png create mode 100644 docs/source/_static/webclip.png create mode 100644 docs/source/_templates/sidebar.html create mode 100644 docs/source/coming_soon/analysis.rst create mode 100644 docs/source/coming_soon/search.rst create mode 100644 docs/source/coming_soon/sharing.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/examples/example0.rst create mode 100644 docs/source/examples/example1.rst create mode 100644 docs/source/examples/example2.rst create mode 100644 docs/source/examples/examplehome.rst create mode 100644 docs/source/get_started/intro.rst create mode 100644 docs/source/get_started/overview.rst create mode 100644 docs/source/get_started/quickstart.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/more/changelog.rst create mode 100644 docs/source/more/faq.rst create mode 100644 docs/source/python_client/backsourcing.rst create mode 100644 docs/source/python_client/datasets.rst create mode 100644 docs/source/python_client/documents.rst create mode 100755 scripts/docs diff --git a/.readthedocs b/.readthedocs new file mode 100644 index 00000000..d626c2d4 --- /dev/null +++ b/.readthedocs @@ -0,0 +1,30 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: doc-requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index e7152c5c..08cf0304 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,9 @@ The Structify Python library provides convenient access to the Structify REST AP application. The library includes type definitions for all request params and response fields, and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx). -It is generated with [Stainless](https://www.stainlessapi.com/). - ## Documentation -The REST API documentation can be found [on api.structify.ai](https://api.structify.ai/). The full API of this library can be found in [api.md](api.md). +The REST API documentation can be found on [api.structify.ai](https://api.structify.ai/). The full API of this library can be found in [api.md](api.md). ## Installation diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/Model.md b/docs/Model.md new file mode 100644 index 00000000..5629c36d --- /dev/null +++ b/docs/Model.md @@ -0,0 +1,33 @@ +# Model + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**description** | **str** | | +**name** | **str** | | +**public** | **bool** | | +**schema_box_id** | **int** | | +**user_id** | **int** | | +**version** | **int** | | + +## Example + +```python +from structify.models.model import Model + +# TODO update the JSON string below +json = "{}" +# create an instance of Model from a JSON string +model_instance = Model.from_json(json) +# print the JSON string representation of the object +print Model.to_json() + +# convert the object into a dict +model_dict = model_instance.to_dict() +# create an instance of Model from a dict +model_form_dict = model.from_dict(model_dict) +``` +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..747ffb7b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..ed7ea77b --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinxawesome_theme \ No newline at end of file diff --git a/docs/source/_static/favicon.png b/docs/source/_static/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..45b185d883de416e505eb81d41c7ee0a27f611da GIT binary patch literal 669 zcmV;O0%HA%P)q(ZL zWuh9!nADEaj>>!%r5%~Ko7Bd0`zT0>*Xu>k=aX)?+xy*kJa%I~pS!05MUo^N7|*oS z&*!sA)6`f42E0Q9-|Y8$&5|kAA&KICzq>_aM!7MaPTkxHN|KCUuUFAyi3HXNpd!ih z{}|UqB1&MnT(S!;77Mf8ZvQL_FL%40GsexyvaBBgEY2;oTCI!(e8bJHs!C4)9<~?2 zb_hx307Kl?old9Dx8D>1YP}5`da@xV3D5x>agYuh;C}4~?&o7o4Y1J}X8v zC_>E2vSbqpd#hAA6{2MrQ$Lg&N(=JLW%nQikny3qCPx2}Hy|_^fxaZ#(b1HwNWde{ z^WH>|FGW!dL%;)a3-3^pS`tX;!3Z>g@ literal 0 HcmV?d00001 diff --git a/docs/source/_static/logo.png b/docs/source/_static/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..16d5c09c5b56850ef91e3c79106c431f944bd388 GIT binary patch literal 5649 zcmeI0_d6SI_s2u2)xDLf3R=6Rq$xqsn59+`tF6%rl2#BaqBGUndzGqHBT21j5R^|_ zRitKWM5+xz5VcYx@p!)f!1sse*UxpX`_kCUWxz0V#EB23;Fk;s5|(zqy%- z9RMJZ^xq-6ch?iss?!Ys9RFc%a?K%3V1dp{2Y#aLy}bb8V&_GNO>Q53v>+NF_dXWz zV%KyYKNCG62iUckc)*L>i_INMx6t2Bhloujh@O?aq9^;cfaOh%jEcF0<)>HkUdMq6;wVBOiA%a@Thin}-xp<~N(e{`{JRoW*@s+ECs3 z`^7*;-=_W9M!;1=I|)DS0vgfNV0#Cnq#9nLm|~PR`a)1>;I()nhUh`bk(ML37;G?8 z30z%88jotQ)9mGuE1q<$Sfh1&x$tWqHet{x=c$>;&IT1L16OFv+MN=`orvUw@B$Fy z2K>KX&Z^_pB`Gkwa-Q^eO!Evo(R7$w}&;=D4mR$ zw#83fqG=c4HoKAO0x3qR`oqWG^tHn}NFP({f_mar^D=ytStqG2LCI#$AF0SowDAP%{Fj|<*9{&tB zyXD))olx#?e>H9X_@K(sotn#EMJFG+J0TwxlVOrffHC;erkdJlMI& znZzbqEIE9x>9FsE?byc$dqt8SVx0CTt)de`4^9=jJub2fvuisl?-!AHVqK%gg9gLrU^(ZfQM2H3mdD zAxl1&80@Kb=`4_H18uBJ0IHr6x1H(yMui*s=z>i#{81n6iW|uRvV0LR}vw&?d*2 z)qJKYjiEXN3bDx{9c7#Y9(;q!XGE~^sRpR|sCeleW1q!AO#@(hmo%A9S%RW+Q~ zn(PE?33r+OsTq_v5J*>f5+&2{q3pC|+`x1Leq`>}Z;0ZJR)qt;IXIh#dK=nKkckv9 z$sVHkr6r`$w?VJ+vwEdFo01;mP&K_Ti6o3BsyV0<)(`QhSpLd)YA)^)u}<&l7eBE6 zNI*dB(v?+x9c|Z%PE`DQfWLYXo>r1i@0o=QEYn)~Au}x0;ik!+-=AKc)aktn1SO6S zYPd}IA2}@d%>v)TEevk=>ScQuAIZy@J)vmfp=OwXcMq|$o^Il_G<NY*74yoISlZ zte{7zzgl<5<(AZ8no7~o^u^Oqm)}SU!qwylt0NuS@MjH*yv78^L;oKz*>iVqcLYJF z(|ST=G{>8qlxeOrEiWtgnoS8fsxQsE6AohC1f3Vw>%i?^2@67YE%l_#OPVL|5fD|$ zvZMWP@+%YC;T?m@*S=>wqB6{|cNG2HBRX9tG*q@_Qds*a?$;$@Vd_?_oEY?C>EJEL zKn^!8<3ahNCdl9G z-$@)DH0I`+PRQ3$B5^mZK*NK%n`WYcaAz((J>H6`%WWjO=vggCtj!I{rN%(rf^1fk zDeuvyimr4J+r}E!6}=V?2~{~Nrjlx;mHi&wx{&rIt*4KBO6g1H=uym^NxMC?%RWTY zH*OqVY7M0@Bi%=bxO>cN60H`&3iXsh<>mK~j;AZl#Y6XlMYe6>cgrVBj}MVJ5XBz8 zYkt|q(Rx}alTG(5J;UzPruV1#M3XtIr1$+5x&7|?nwxMHs(y}RfM2eJe{6KC*Q2|l z>|6|+AOyzFZ3Tc~P0&kVhWR+4XimHRCvk)vFMSwxwz>ikd->16 zdF)qt>!f1GG3U&yMSB_%TZ_va0>JZmbT%sS9J*+#ffi4c?4PK^sR**ZDu@LdbJN zVXo+F)^1Quw~AaPVde7%2e}RJWqMxVRg)hm0a0$#GUrl{hmi@%$0Npmy`*rT=Dyyl zX1RP`Ueu8Ttz8x?3s4)&a9Awgm;!$^bL!x$`)=d`>>UB5EU}x!R(`}fdWDXv z*2D6*=c)N)l=tQzxa%v^D6Z<;a0KW>UQ=8tJDC$T_sgom2L*o@gxFi&y(-=0zdu|? zNd(*iQOh-+y50M`l}PV2&6isB0F^Zcni7VhGACq`cfSN^UcjR1MihkQ6{34=0%Dsz zj$*WjWzE#7SheV`cO`fgscQ=acpDAO2)&n~a3C!23bJBwLRzYYszMAS(%A4WaB7PDb7J0u|SsG@kD02QH|nUJ%72! zNGC0sy^yz3HfGzcd>1FH^`7tP)V#pTXRYc-y09;(2?QCr*4zy?Ln3rYKc9^a>1M(S zCpuxKequ=n(!erhw!4@y#@=PRjCNh!%vyv9y zNb$i+XV|543nf?TsNFEpB!QyZo8&XVlkZVGmsdl*&bam#H>8lKzqVH9$LeeTgGc`w zJ643ndJikJb%JDsI|q`)eW;oj@Rjh^eQSh|!=W~Am5w522bTF(llGb`AgS%DSU`F9 zie$SR^=*XjcTl57!Sh>w3t-0V=TXkbnKPLB_5G!{=+Crkr2kQXV~RjJlePo3=tv^D z>(hOgLGkd@5y+)PV_$X=F~?D)bEY%1#6P;I-bKI;WcsZTAE|>A>g?ReL!CmyE2;Sq zc7z|#?G|T$^bXhzNp3c2*mkHqkR;uf{3>)-@+S}^ETzNi>m=vZJiD)j4nH+(-PxOo zs(%YzPrJFOUWuQM?2Q&id&hF?U3nL(`U-0;huSooqL|1r)bB*7p!Zw=zYo(J85i&< zhsyu;=@s1psK)LSKe;l4cdoOrMP3h<*sU&yn16QfdELkz6yJlhJk(=n%pvKS2Qu9f zR3)Ql(6t@RacE}0wTd^~#Y!}Oz8nHdm7`jMZrPB{ENdE!lDgVXz)~v@CQYOcFoh^9 zy}Gq)*I3>xPP~VirM5vsv%&g|a9?P#baHNByH_mkM^R*US$DCKJw>dnpc&oJ?EJRB zzX{!IpApn&PqJ5xG)*TwoT5Kj)RAS+arO@Lk;M7yH|YkzR!~jmM?Sly^y;by_Ze{u zdCw+h-u&jn1)d>vqBTgyvn6VT;?z+{E9V*JB=clPRaodc>1Zw;ZM^jlQ)?~bRa8bY zmC;1}bgqGPw)4_Z4Q5{3hf!cTY7)b|ZB4$!^v${v9b}syS+G4mc%#%V3mUiA5n+aF z`9_xNtluMF!MfC%`FnM4s1nD?N2NRQv4{uuU8a$1LS}IRc0(RwPx7U_d++f1Gu*G$ zv+3M-(z(Wlm-V)!|3C;C3) z_xSY>&jHyhdh5}N69o(Vlg3pmd=y(V+box+qL`(5hE{Api$e*+&ZaQR7BQ4wr->#^ ztvCqES3ndJJ(VO}tIO2_E1&W0h;BF2OYb3MQJClU^!@&#Nnn3Cg;m65MegxlbynwX z&J-msLVF~cWw3P{CBtduqpq_pFK?)oX5Gji*3%31l*#3ftk*xY{bhxg} zHCr&Vs4P_Vs$8&GyMpmto^*!;tfEzy_NSWLBW15*`hU_MHC7Ahka;}t)fH>XDf`Im zO5c`siK|~!Sr5w1k@kqHBIVrpo;wL_S+#dy0`OZ|^CiuZ)#`d;b7ZID?sHm5y|Fdw zRe~Czvadxmr0G)W~NK9t?okn89Dwa^Kw=njA3K5?(>W}nxW`^+W} zJLnO|RwOip?xc8tPcMPqd7Xj+g{b%arlpxzLo}}1ef^1l1~SExb%84n~D{~t4`!`23!Z4SKmg>?(O3C zaJw}xv*2|L)uf>tW9YAJnkN?X7fqfSz1~cs>R#`)m9ic7SZE6T(?o*E+8uC}9M`}tY!%Pf zV`K?D-YB?}h2d?kadz|zE(OV`p2a#V9{{@dq4X^&#kmI$#aMoO8%r4zD{g1JHUm|2!MVg2@6vgq};0svWPj0zj^wiBC+yMm*df|Y?m!|U(BrGYaU!bch!%?CwyH}vxl^CPck vCjaz*Vc5O@33>d>&;I3x|36M;?f|Yy;4FRTg%oyCFTnh|wF$xK_T&Em+wTcV literal 0 HcmV?d00001 diff --git a/docs/source/_static/webclip.png b/docs/source/_static/webclip.png new file mode 100644 index 0000000000000000000000000000000000000000..5dbaa71f9019300c4dafe6bc70542bb64519040d GIT binary patch literal 4487 zcmZvgc|26#|Hse0W*Ac#Wh61SqDaV4_93EdQ+A3eWyzMUv750++AYyAlqHfq8QY{! zNeg4IWUUx{ELoEMJD+B(&g0zoN?@gkGF&|nK4FKRfV`6w7 z0Ejb$0E(N_T?i<4<8-{&O&kIN*tPe+27#={BL7VZJa23OUUY~}aSVjJzPUaCm3MY+ zUP1yOD0aqB-#P@E9h!AGV7%|$@Akl74Ky?gi;9G`r=GtZdVu$0Cfuh zSmyuCe^=v(IXOAgo#ieq{fZlFEh8<_E_I&K+iUJy^|iHWqg>EvoMyS^YKD@FbN06P z`s5VE}-6Y=fb=B6l?SfN0#%OB#_3Sm=$loZd(=j+SWd7iOZima^vhpv(!xp)^k>g}M z50;r6BsCRtzFt7f8o4|1&AOm;vkb7MH#FzyW&E8GPk)j;J3cNwb^s>veqV_^r*@r8 zZu?HS%s*<*7~~vX&zgAGy!FzFEN1F$U{5e52j_Vl=WQ-7>A6lwyLe?cG{K6`YYShn z+#4HqLBwAIhDYTXs*h>3KED~M+r>*Tf+-XnmAZOl%{5sB{se8xS?=yy`|)9K^F7?s z9<r30G~@Al%9WXcBrULFRG%KvQqxW`Iu|4bdBy`))HR>S#Xi>%>H!x;v;hwjePv zF(|6>%s|o$Y`Fn9C1~6K6Q+%zMQ=nc9ysx*t_C%CZb-%{$8i$go{-s5eywR~|lo`OvSrC-n z?)|D4@%Ju2KYv(@2o|x1T1KUmEjM|ckgAq1s_~H7@8nT+Halp2eWZ8^WvYg|Q#y9t ze&?cMWL@dY_Bhhjw2y!lUlc2?c+v1<&6#!wadGiSjibw2T_Z!?t4-ckOov~`f%dUg znwdH_vd$%aK(fLS`D!QdF?Z-PtP10q1ngbmz1Z)kmdx z2uVpynOa`%HL7npy5+Ru_i|Y)s}H|i^UemC;j|txNN%+$DJk;uHhx*`n&piw-}MHk zUA}foTn@UQWR&D@Zg#|2IKOPHkX1^q`scHOWaL`(=5OPwq@HWhS3*x&D1PXk??#`% zQfzY^o@(jQc8t@xRFv+#Y&8eBPCvOKNQ+i4m7;g}O*uY=+4qJI_I%XLR7|AQ+W9Ir zag9lX=jM$`vW+LBHhs|4_8#h$n+4hTQb5Ix)d3@)EuF<j{}md1y!_WK_{T&v>zch>QKm#@lLDA+t( z`|o`cySaI1s?Q_szp8dc0DIA$@Gqwse(U^Y+M2c30|QieNJ#RIo5x|&>z*DfdGS+} z+dqfGDm4`J&mvh>jdo4QpzR5JUaJY<=r-k}lD?^Kni6}~7|A+Lvv);8tfMq4QB$6h zp4#@~iji*euFvn=9;baWi0601tT!=lyA zOhN#+^<|!I4Lxi=%pPhWKf8h;7%!7+Usjt>34wm~T}Svh`?1feFcd4~Vs?x7bvvab z#R7KHTb;Qv3C<%TL^res;+0|7c_UQCL4n@oM}ES_$CSx^%o9Bj$tQI7 z*RMk<8AKtD@%#I$($Z4I!`98;qoUmQjM8i4=gU>9}96MD2@ zG?c)_j!ns+K9^wVqv%S{Yc#sV$u3PuY;}5dd$Zc(HpZMr%z6qeJp<(96q`1vVQDJ+h^;=c- z-}ZcJy;(WKXl{Oyh_ps{+%H8F9#Eza3JUiZOB0tC5Qr^*8_44vYii0fnSiDC>m6S3 z_4O@AR|wl-DeO6km4nWF)Ly*QhbS)lga&|V@+&v?yeG(QpYRbR;rCVS=EE*VK$e2a zpRWRx4bvM9kTE4(NVrem+n0-Aja1fnnK}H_TC!LW_4^e+@M$R6=Ut%A^lJ4&A zE4T=I0qSV>26*q}=YP^&6@_s5&O_lS+dS`rH9Zs&<0AzVYPu3`j2kte5KbI%V^>Nh z1_YT>>4j~#zrB7GjJnE;1qP^P1v^xzIJ}NM1`JEbmtns3g0a^@B1G{Zr7Hj>MZ~2a zB`bLYlo#Sd+8av7qiE+#c@XwBj2$*qBh^i?NID?Y69=!3}=o0AI_ax zdjx?__a@T=Ut(%8f&TC_(wPDd$0eXj(AMG@?yJAWU<84RH_ObCV|A{suCCnGPt}NX z`^Dd!lmOupVFX1VP343t!;yr`+yBk^Urv5DIt5PBF_|wg6*+^0gJI)Jdtl~%J^_3# zz1=8s59*`U&7Fk>rh(+&JOXEu{oR^O{)3@jy3D<>WM-q81`lj07WMWlA?{w{t}NnH z&A+ElL9=U^F_YskC=Uo`%D+8ni9~@Yl938tUZS_BFHrjdl*~B6zyR$dexq8OvkAp2s3ygR%mj6hUm;lC5QgB^I*b^ zHRNGKTe)~c$Ck5(rfX!W&mD+~0W>QN`v}#jv$OL>4mU@%0@D>BzjCv^fE{l@QWiPr zRYQ)ELx8X+H1cyD>8CId&9AKK=e0NBFmt!5db)E>R|%6tS3i*uW!NDbvuusT05HJ&RX|8cCQB@ByWhUSzp0=O5brx? zn{^qaRaaNXeRfdTQY9bAu+_{A_E5SD;wDZL9vS^NwNAIe!I^j?@uO^ zd;0oplFbgaH1jA8mV`du47b6yPG?aJ)fUd~IQz92*K#2CL+NFBbZ)Mr*QnAv+iVX* z08ZQB;Onu8fGA%yzB8E@{0>=N5JTi>Mfp0*?NsH%OfRhRkWfIsQVFNtX?Qg1)T@1L zYs=vrB$-o6nJR23EQx71bcmEwq-i$I{h1a@Z=N*IT5V`qu99I%e~bD!?vmQQYye?GC7K&u?`OgK`&_g0=&{75=FMyCb5H!D z@>9<~FF$|r^gW24Vj0doZj6qJsQ9q#z8$^vv*URKBWv3!clzJ8&IOu$B}bry^zT8E zjxKup17+!6@>@EyTP-Gn3Xr>oh6eAFDM3RwYExvIq3{yETf8@hZT!#dNJ<$n+)Mfv z$-{0P8oHQV$Od;)Qg&rHspK8)I=l0nEk$sfT^9M0f57aYh87_gDCvIaNikWvXIHu6 zUP$t+Cm#<*b}BUbXOX!ZOu$POSBkO_L&Qe$mD;Kg^w%U}uc-_^+$TWh8F6%w@=+S$ zF{7@zLzc~fKjp8?gdL}^{L8Gu$6dL(MlPHHg;Pc%It>4&OERKHleu{Tlrb<{cKWMc zb_&33#VB69TQet0i`G2>NyY&mhUZ`{2gon!RzILDd2Ez$ir{oUo}#xpjz9NqYwR5O zj-)Ki5xgDMN;>8jl{*t2MWQQAqRO-FZvN<+*3}WuUs7tjic~hw*18t1QUZ+}UK{vP z1!vv}3bGy%T;|QSp3>3QW~~jJL+0-vGvRRx@zXRsU3BJBWF9Q}!#{7fur2&SWmNp< z_}!pOpQf4hu{+*!<7IJM>$z*TaeS0pk`-OjV%5Wm3(#FbM9^l^cRlV4X}EpUm3cUc ze2K7x@*;IH{vKVJ#E+!%%U&CVN%`u?J)^*JMC<0(IH~93yCu0Dpf`CBc+z^2P;`IM-?(ob- XUC-YM9{1(^HvwmiEDT>5kYfG^8?_CZ literal 0 HcmV?d00001 diff --git a/docs/source/_templates/sidebar.html b/docs/source/_templates/sidebar.html new file mode 100644 index 00000000..e39e8244 --- /dev/null +++ b/docs/source/_templates/sidebar.html @@ -0,0 +1,56 @@ + + + diff --git a/docs/source/coming_soon/analysis.rst b/docs/source/coming_soon/analysis.rst new file mode 100644 index 00000000..2fe3e623 --- /dev/null +++ b/docs/source/coming_soon/analysis.rst @@ -0,0 +1,63 @@ +.. _Analyzing Datasets: + +Analyzing Your Datasets +======================= + +Overview +-------- + +Part of the advantages to using Structify as your data infrastructure is the automatic powering of advanced analytics on top of your custom datasets. In our pipeline, Structify is developing the ability to power the following: + +#. :ref:`Creating Custom Tags for Data ` +#. :ref:`Sorting Data along Any Axis ` +#. :ref:`Getting Confidence Scores ` + + +.. _tagging: + +Tagging +------- +We will allow you to tag data either via LLM generated tags or custom tags. This lets you to easily filter your data based on the tags you have created. + +A common practice is to sort datasets by industry. For example, if you are hiring a GTM specialist, you would want them to have deep knowledge and contacts within your vertical, so tagging your network by industry would allow you to easily filter for the right candidates. You can see a great example of this in `our tutorial `. + +.. code-block:: python + + industry_tags = ['healthcare', 'retail', 'finance', 'technology', 'education', 'government', 'non-profit', 'other'] + structify.analysis.filter( + dataset=candidates, + tags=industry_tags, + tag_description="a list of possible industries that the candidate has experience in" + ) + + +.. _sorting: + +Sorting +------- +We allow for you to sort your data along any axis (subjective or objective). For example, you can sort news about clients along the sentiment to see how sentiment has changed over time, or you could cluster based on topic and sentiment to determine why audiences are reacting the way they are. + +.. code-block:: python + + structify.analysis.sort( + dataset=news, + axis=['sentiment', 'topic'], + sort_description="sorts the news by sentiment in order of positive association with our client George Washington University" + ) + +.. _confidence: + +Confidence Scores +----------------- +We allow for you to get confidence scores for any given datapoint. This is useful for understanding the quality of the data, and for understanding how strongly our agents feel about the certainty of a given datapoint. + +If we wanted to get the confidence score for a datapoint, we would call the following: + +.. code-block:: python + + structify.source.get_confidence(id = [123456]) + + +Note that you first have to use the ``structufy.dataset.view`` endpoint to retrieve the id(s) of the relevant entities. + +Now, you have the tools to be able to more deeply understand your datasets and derive insights from them. \ No newline at end of file diff --git a/docs/source/coming_soon/search.rst b/docs/source/coming_soon/search.rst new file mode 100644 index 00000000..207aa811 --- /dev/null +++ b/docs/source/coming_soon/search.rst @@ -0,0 +1,51 @@ +Searching through Datasets +========================== + +Overview +-------- +When you have a large dataset, it can be difficult to find the specific piece of data you are looking for. And often, you will create the datasets as a reference backend for users or AI tools to reference in answering certain questions, which means you won't know immediately what to search for. In those cases, it will be crucial to set up a method to search through the datasets. This can be done via a couple different methods depending on how much specificity you want to allow in the search: + +#. :ref:`If you know the keywords to search ` +#. :ref:`If you just have a question ` + + +.. _string-search: + +Searching for Specific Strings within Datasets +----------------------------------------------- +Another simple method is to allow users to search for a specific string within the dataset. This can be done by creating a function that takes in a string and returns all the rows that contain that string. This endpoint works best if the you've used enums in your dataset. + +If we wanted to power a search for employees who attended a certain school, we could create the following function: + +.. code-block:: python + + def search_schools(dataset_name, school_name): + + # We need to specify the table and columns the keyword search applies to + search_target = { + "table": [ + "name": "education", + "columns": ["name"] + ] + } + return client.dataset.query(name = dataset_name, search = search_target, keyword = school_name.lower()) + +This will return to us a subset of the dataset that contains just the entities whose education table contains the school name we are looking for. + +.. tip:: + You can bulk search for multiple keywords by passing a list of keywords to the "keyword" parameter. You can also conduct a search across multiple tables by passing a list of search targets to the "search" parameter. + +.. _natural-language-search: + +Natural Language Search +----------------------- +The most powerful method is to allow users to ask questions in natural language and have the system return the relevant data. This endpoint is powered by Structify's LLM agents. While the most complex method, it is allows for the most flexible and user-friendly experience. + +If we wanted to power users to search for employees by describing the type of school they attended (e.g. "Ivy League tier schools" or "liberal arts colleges in California"), we could create the following function: + +.. code-block:: python + + def plaintext_school_search(dataset_name, query): + return client.analysis.query(dataset = dataset_name, query = query) + +Using the ``client.analysis.query`` endpoint powers a more conversational experience for users, and typically, we see this endpoint powering chatbots or other conversational interfaces. diff --git a/docs/source/coming_soon/sharing.rst b/docs/source/coming_soon/sharing.rst new file mode 100644 index 00000000..e31f6b27 --- /dev/null +++ b/docs/source/coming_soon/sharing.rst @@ -0,0 +1,22 @@ +Sharing Datasets +================ +Oftentimes, you will want to share your dataset with others. You can use the ``structify.dataset.share`` API call to share your dataset with others. This API call requires the following: + +* **name:** The name of the dataset you want to share +* **share_method:** The method of sharing the dataset. This can be "public" or "private". +* **restrictions**: (optional) A list of restrictions that you want to place on the dataset. This can be "view-only", "refresh-only", "edit", "no-delete", or "admin". Each successive option has more priviledges. The default is "view". +* **users:** (optional) A list of user ids that you want to share the dataset with. +* **emails:** (optional) A list of emails that you want to share the dataset with. + +.. note:: + If you want to share the dataset with specific users, you can use the "private" method and pass a list of either ``user_ids`` to the "users" parameter. If the target recipients are not users, you can pass a list of emails to the "emails" parameter, which will send them an email link to create an account and view the dataset. + +Here's an example that walks through sharing the employees dataset with various co-workers who do not have Structify accounts: + +.. code-block:: python + + structify.dataset.share( + name = "employees", + share_method = "private", + restrictions = "no-delete", + emails = ["ellie@structify.ai", "sami@structify.ai", "maya@structify.ai"]) diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..0ee3d8f0 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,54 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import sys + +sys.path.insert(0, "/home/dev/src/prospero/client/structify/") + +project = "Structify" +copyright = "2024, YMTM Inc." +author = "Alex Reichenbach & Ronak Gandhi" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + # "autoapi.extension", + # "sphinx.ext.autosectionlabel", + # "sphinx.ext.napoleon", +] + +templates_path = ["/docs/_templates"] +exclude_patterns = [] +pygments_style = "sphinx" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinxawesome_theme" +extensions += ["sphinxawesome_theme.highlighting", "sphinxcontrib.details.directive"] +html_title = "Structify" + +autoapi_dirs = ["../../../"] +autoapi_options = [ + "members", + "show-inheritance", + "show-module-summary", + "imported-members", # This is the problematic option + "undoc-members", +] + + +html_baseurl = "/docs/" +html_static_path = ["_static/"] +html_logo = "_static/webclip.png" +html_favicon = "_static/favicon.png" + +html_js_files = ["/docs"] diff --git a/docs/source/examples/example0.rst b/docs/source/examples/example0.rst new file mode 100644 index 00000000..8f58b4fe --- /dev/null +++ b/docs/source/examples/example0.rst @@ -0,0 +1,197 @@ +Making the Internet Your Database +================================= + +The central feature of Structify is powering individuals like you to structure unstructured data on the web. It's a powerful tool that can transform the web into a database that's always up-to-date. + +Grabbing Relevant Press & News about Clients +-------------------------------------------- + +In this example, let's say we have an ever updating list of clients, but we want to keep track of the latest press and news about them. We can use Structify to grab the latest press and news about our clients and keep it up-to-date. + +Step 1: Define a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~ +First things first. We need a Structify dataset to store all this information. We create one by defining the schema. + +.. code-block:: python + + from structify import Structify, Source, Table, Property, Relationship + + client = Structify(headers = {"apiKey": "your-api-key-here") + + # Define the schema for the dataset using our Python Objects + schema = [ + Table( + name = "client", + description = "a list of clients who we cover", + properties = [ + Property(name = "name", description = "The name of the client"), + Property(name = "description", description = "A brief description of the client") + ], + relationships = [] + ) + Table( + name = "press", + description = "news articles covering our clients", + properties = [ + Property(name = "title", description = "The title of the article"), + Property(name = "outlet", description = "The outlet that published the article") + ], + relationships = [ + Relationship(name = "covers", description = "The client covered in the article") + ] + ), + Table( + name = "social_media_noise", + description = "social media posts about our clients", + properties = [ + Property(name = "app", description = "The social media app"), + Property(name = "handle", description = "The handle of the post"), + Property(name = "content", description = "The content of the post") + ], + relationships = [ + Relationship(name = "mentions", description = "The client mentioned in the post") + ] + ) + ] + + client.dataset.create( + name = "client_press", + description = "a dataset that stores all the information about press and social media noise relevant to them.", + schema = schema + ) + +Step 2: Grab Current Press & News +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Now, we are going to use the Structify API to grab the latest press and news about our clients. We will use the `client.structure.run_async` endpoint to do this. + +.. code-block:: python + + # In creating agents to populate the dataset, we have to specify the dataset name and the source + james = client.structure.run_async( + name = "client_press", + source = Source.Web( + prompt = "find me all relevant news about LeBron James from the past 48 hours", + website = ["newyorktimes.com", "cnn.com"]) + ) + + musk = client.structure.run_async( + name = "client_press", + source = Source.Web( + prompt = "find me all relevant news about Elon Musk from the past 48 hours", + website = ["newyorktimes.com", "cnn.com"]) + ) + + swift = client.structure.run_async( + name = "client_press", + source = Source.Web( + prompt = "find me all relevant news about Taylor Swift from the past 48 hours", + website = ["newyorktimes.com", "cnn.com"]) + ) + + client.structure.wait([james, musk, swift]) + + print(client.dataset.view(name = "client_press", table = "press")) + +Step 3: Refresh the Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In order to ensure our database stays current, we can use the `schedule` library to refresh the dataset every day at 9:00 AM. + +.. code-block:: python + + from schedule import every, run_pending + import time + + every().day.at("09:00").do( + structify.structure.run_async, + name = "employees", + sources = Source.Web( + prompt = "find me social media posts about MacKenzie Scott", + websites = ["instagram.com", "twitter.com"] + ) + ) + + while True: + run_pending() + time.sleep(1) + + + +Finding contacts in your network +-------------------------------------------- + +In this tutorial, we will walk you through the steps of finding people in your network based on certain domain expertise. +For example, you might be curious to know who you know that has experience in the field of "AI Infrastructure" or "Beauty and Apparel". +Or you could want to know who in your network has experience in "Python" or "Sales". +With Structify, getting this information has never been easier. + +Step 1: Create a Network Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +First, you are going to want to initialize a dataset to represent your network. You first do this by defining the schema for the dataset. +The schema is a JSON object that defines the structure of the dataset. Remember that you are going to need to include a description for each entity, table, and column. + +.. code-block:: python + + from structify import Structify, Source, Table, Property, relationship + + client = Structify(headers = {"apiKey": "your-api-key-here"}) + + # Define the schema for the dataset using our Python Objects + schema = [ + Table( + name = "person", + description = "A person in my network", + properties = [ + Property(name = "name", description = "The name of the person"), + Property(name = "photo", description = "A photo of the person"), + Property(name = "linkedin_url", description = "The LinkedIn URL of the person") + ], + relationships = [ + Relationship(name = "worked_at", description = "The jobs the person has held"), + Relationship(name = "educated_at", description = "The schools the person has attended") + ] + ), + Table( + name = "job", + description = "A job a person has held", + properties = [ + Property(name = "title", description = "The title of the job"), + Property(name = "company", description = "The company the person worked for"), + Property(name = "industry", description = "The industry the company is in") + ], + relationships = [] + ), + Table( + name = "school", + description = "A school a person has attended", + properties = [ + Property(name = "name", description = "The name of the school"), + Property(name = "degree", description = "The degree the person received"), + Property(name = "gradyear", description = "The year the person graduated") + ], + relationships = [] + ) + ] + + # Create a network dataset + client.dataset.create( + name = "my_network", + description = "A dataset representing the job and educational experience of people in my network", + schema = schema + ) + +Step 2: Populate the Network Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Next, you are going to use the structure endpoint to add data to the dataset. Here, we're doing it synchronously to grab the data from the Web. +Since information about your network can easily be found via LinkedIn, we are going to limit the sources to LinkedIn. + +.. code-block:: python + + # Populate the network dataset + network = client.structure.run( + name = "my_network", + source = Source.Web( + prompt = "use LinkedIn to get details about my first degree connections", + websites = "linkedin.com") + ) + + print(network) diff --git a/docs/source/examples/example1.rst b/docs/source/examples/example1.rst new file mode 100644 index 00000000..df48ffea --- /dev/null +++ b/docs/source/examples/example1.rst @@ -0,0 +1,105 @@ +Monitoring Changes in Datasets +============================== +Using the Structify API, you can easily track changes in datasets over time and keep a database up to date when changes occur. This is helpful to keep up to date on information that changes frequently in large scale, such as company board members, executive team, or other personnel changes. + +Tracking Private Company Board Members +-------------------------------------- + +In this tutorial, imagine you are intested in keeping tabs on who is on the board of various private companies. +Let's say furthermore, you are only interested in companies that are in the technology sector. +You want to know who is on the board of any given company, and you want to know when that information changes. + +This information is not readily available, but you can determine it by periodically checking company websites, press releases, and SEC filings. +The goal being to regularly check if there have been any changes. Of course, since all the websites "Team" or "About Us" pages are all formatted differently, this is a near impossible scraping task to execute with high accuracy. + +Structify disrupts the manual processes in the status quo and allows you to easily collect this information to track any changes. + +Step 1: Upload Your Existing Board Members Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, we want to update the existing dataset that you may have. We start that process from the Structify document endpoint, using the upload call. + +.. code-block:: python + + from structify import Structify, Source, Table, Property, Relationship + client = Structify("your_api_key_here") + + # Here, we suppose that you have a dataset of board members in a CSV file + # We will use the Structify API to upload this dataset to the platform + csv_path = "path/to/your/board_members.csv" + with open(csv_path, 'rb') as f: + client.documents.upload(f, path = '/structify/board_members.csv', doctype = 'Text') + +Step 2: Create a Structify Dataset for Board Members +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Next, we will need to create a dataset to store the board members information. We can do this by defining the schema in tables, properties, and relationships, making sure to include descriptions for each. + +.. code-block:: python + + # We will define the schema we want to use for the dataset + schema = [ + Table( + name = "board_member", + description = "information about board members of private companies in the technology sector", + properties = [ + Property(name = "name", description = "name of the board member"), + Property(name = "title", description = "title of the board member"), + Property(name = "start_date", description = "start date of the board member's tenure"), + Property(name = "end_date", description = "end date of the board member's tenure") + ] + relationships = [ + Relationship(name = "company", description = "Company the board member is associated with") + ] + + ), + Table( + name = "company", + description = "information about private companies in the technology sector", + properties = [ + Property(name = "name", description = "name of the company"), + Property(name = "website", description = "the url of the company's website") + ] + ) + ] + + + # Now, we will create a dataset with the schema + board_members = client.datasets.create( + name = "private_tech_company_board_members", + description = "Dataset containing information about board members of private companies in the technology sector.", + schema = schema + ) + + # Here, we're populating the dataset with the existing information + client.structure.run_async( + name = "private_tech_company_board_members", + source = Source.Document( + prompt = "Please structure the CSV containing board member data according to the new schema." + path = "/structify/board_members.csv") + ) + + +Step 3: Set Up Regular Refreshes of the Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Now that we have a dataset to store the board members information, we want to set up regular refreshes of the dataset to keep the information up to date. + +.. code-block:: python + + # After getting the data from the uploaded CSV, we want to get the most recent information from the Internet sources. + # Simultaneously, we will set up a refresh schedule to run every week at 9:30am + + every().day.at("09:30").do( + structify.structure.run_async, + name = "private_tech_company_board_members", + sources = Source.Web( + prompt = "find me details about the board members and the companies they are associated with in the technology sector.", + websites = ["linkedin.com", "techcrunch.com", "prnewswire.com"] + ) + ) + + while True: + run_pending() + time.sleep(1) + + +With this setup, you will be able to keep track of the board members of various private companies in the technology sector. diff --git a/docs/source/examples/example2.rst b/docs/source/examples/example2.rst new file mode 100644 index 00000000..78982210 --- /dev/null +++ b/docs/source/examples/example2.rst @@ -0,0 +1,118 @@ +Structifying Documents +======================= +In this tutorial, we've cover how you can use the Structify API to structure information from documents into datasets. +In the end, we'll show you how to implement this into an alternative to using RAG to query documents. + +.. _document-example: + +Extracting Company Information from Pitch Decks +----------------------------------------------- +This example will walk through the process of uploading pitch decks and extracting the company name, industry, founders, investors, and funding amount from each deck. + +Step 1: Upload the Relevant Documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Structify allows you to upload PDF documents. +We associate the documents with your account (or your user account), such that multiple datasets can be created from the same document +(or sets of documents involving some of the same documents and different ones). + +.. code-block:: python + + from structify import Structify, Source, Table, Property, Relationship + import os + + client = Structify(headers = {"apiKey": "your_api_key_here"}) + + # You can upload multiple documents at once by specifying a folder than contains them + folder_path = '/path/to/your/folder/of/pitchdecks' + + for filename in os.listdir(folder_path): + file_path = os.path.join(folder_path, filename) + try: + with open(file_path, "rb") as file: + client.documents.upload(file, path = "path/to/your/structify/folder/" + filename, doctype = "Pdf") + except FileNotFoundError: + print("File not found at path:", file_path) + except Exception as e: + print("An error occurred:", e) + + +Step 2: Create a Relevant Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Next, we have to blueprint the schema of the dataset that we are interested in creating from these documents. + +.. code-block:: python + + # Create the dataset schema + schema = [ + Table( + name = "Company", + properties = [ + Property(name = "name", description = "The name of the company"), + Property(name = "industry", description = "The industry of the company"), + Property(name = "founders", description = "The founders of the company"), + Property(name = "investors", description = "The investors of the company"), + Property(name = "funding_amount", description = "The funding amount of the company") + ], + relationships = [ + Relationship(name = "investors", description = "The investors of the company"), + ] + ), + Table( + name = "Investor", + properties = [ + Property(name = "name", description = "The name of the investor"), + Property(name = "description", description = "The description of the investor") + ], + relationships = [] + ) + ] + + + client.dataset.create( + name = "pitchdecks", + description = "A dataset of company information extracted from pitch decks." + schema = schema + ) + +.. note:: + Remember you can always view the schema of any dataset later by using ``client.dataset.info(name = "dataset_name")``. + +Step 3: Populate the Dataset using the Documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Now that we have the dataset schema, we can populate the dataset with the information from the pitch decks. + +.. code-block:: python + + import glob + + # Get a list of all the file paths in the folder + folder_path = '/path/to/your/structify/folder/' + file_paths = glob.glob(folder_path + '*') + + # Iterate over the file paths and make the API call for each file + for file_path in file_paths: + agent = client.structure.run_async( + name = pitchdecks.name, + sources = Source.Document( + prompt = "Extract company information from the uploaded pitch decks.", + path = file_path + ) + ) + client.structure.wait(agent) + + +Step 4: Query the Documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Let's assume you have a user that wants to search through the documents. +Once you've used the populate method to create the dataset, you can use the query method to search through the documents. + +.. code-block:: python + + def query_pitchdecks(query): + response = client.analysis.query(dataset = "pitchdecks", query = query) + print(response) + + query_pitchdecks("Who are the investors in ABC Corp?") + query_pitchdecks("What is the industry of XYZ Inc?") + + diff --git a/docs/source/examples/examplehome.rst b/docs/source/examples/examplehome.rst new file mode 100644 index 00000000..d86e3777 --- /dev/null +++ b/docs/source/examples/examplehome.rst @@ -0,0 +1,9 @@ +.. _examples-home: + +Learn from Examples +=================== +The best way to learn is by doing. The examples in this section are to help you get started with the library and to give you an idea of the capabilities of the Structify API. + +* :doc:`example0` +* :doc:`example1` +* :doc:`example2` \ No newline at end of file diff --git a/docs/source/get_started/intro.rst b/docs/source/get_started/intro.rst new file mode 100644 index 00000000..54707074 --- /dev/null +++ b/docs/source/get_started/intro.rst @@ -0,0 +1,59 @@ +Introduction +============= +Structify powers you and your AI tools to collect custom datasets on demand. + +There are countless use cases for Structify. Just imagine what you could do with human-quality data delivered at superhuman speeds. + +Key Features +------------ +* **Custom Data, At Your Fingertips**: Our AI sources your datasets using a variety of sources from the Internet to documents. +* **Personalized Schemas**: Define whatever schema you want for your datasets, or let our LLM handle that part. +* **Automated Entity Resolution**: Structify ensures that your data is clean, consistent, and free of duplicates. +* **Backsourcing**: Each piece of data is connected to the source or sources we used to collect it. +* **Confidence Scores**: Our reasoning engine bakes confidence metrics into each data point. +* **Notifications**: Get realtime alerts when data you care about changes. + + +How It Works +------------ +Structify relies on knowledge graphs and AI agents to populate and maintain your datasets. + +Intro to Knowledge Graphs +~~~~~~~~~~~~~~~~~~~~~~~~~ +Structify uses knowledge graphs to understand the world and the data it contains. +A knowledge graph is a structured way to represent knowledge in a graph format. It consists of nodes (entities) and edges (relationships) that connect these nodes. Each entity represents a piece of information, such as a person, place, event, or concept, and each relationship represents how entities are connected to each other. + +Core Components of Knowledge Graphs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Entities (Nodes)**: In programming terms, you can think of entities as objects or instances of classes that contain properties or attributes. For example, in a knowledge graph about books, each book could be an entity with attributes like title, author, and publication year. +* **Relationships (Edges)**: These are akin to the relationships in relational databases or associations between objects in object-oriented programming. They define how entities are related to each other. For example, a "written by" relationship might connect a book entity to an author entity. +* **Properties**: Both entities and relationships can have properties. For a programmer, these can be thought of as metadata providing more context or attributes to nodes and edges. For instance, a "published on" property could be associated with the "written by" relationship. + +Imagine a simple knowledge graph representing a book database. In this graph: + +* Nodes represent entities such as books, authors, and publishers. +* Edges represent relationships like "written by," "published by," and "belongs to genre." +* Properties on nodes and edges could include "publication date," "genre name," or "author birthdate." + +Creating Knowledge Graphs +~~~~~~~~~~~~~~~~~~~~~~~~~ +Our knowledge graphs are built by AI agents that index information from a variety of sources, including the web, documents, and other datasets. + +*Since we acknowledge most of the world isn't familiar with the power of knowledge graphs, you'll notice that our docs are written in tabular (or close to tabular) language.* + +Getting Started +--------------- +The best part of Structify is that it's easy to start seeing the power of our API. You can start by creating a dataset and then adding data to it in these simple steps: + +#. Get your API Key by `emailing us `_. +#. Define your first dataset. See :ref:`define-schema`. +#. Add data to your dataset. See :ref:`populating-datasets`. + +And just like that, you've gathered your first dataset using Structify. + +Resources +--------- +* View the `current API Status `_. +* `Email us `_ with any questions or comments. +* Join our developer community on `Discord `_. +* Read our `blog `_ for the latest. diff --git a/docs/source/get_started/overview.rst b/docs/source/get_started/overview.rst new file mode 100644 index 00000000..0060cc0e --- /dev/null +++ b/docs/source/get_started/overview.rst @@ -0,0 +1,23 @@ +Start Structifying +================== + +Welcome to the first step in your journey towards data independence! + +Jump Right In +------------- +If you're ready to start structifying, you can get started by visiting the following pages: + +.. toctree:: + :caption: Get Started + :maxdepth: 1 + + Intro + Quickstart + Examples Home <../examples/examplehome> + +Explore the Key Endpoints +------------------------- +By using the Structify API, you can automatically create datasets and structure information from various sources. + +* :ref:`Populating-Datasets` +* :ref:`Structuring Documents` diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst new file mode 100644 index 00000000..988bca8f --- /dev/null +++ b/docs/source/get_started/quickstart.rst @@ -0,0 +1,135 @@ +.. _quickstart: + +Quickstart Guide +================ +Datasets on demand for you or your AI tool in three easy steps. + +#. :ref:`Installation` +#. :ref:`Getting-An-API-Key` +#. :ref:`create-your-first-dataset` + +Our documentation will guide you through the process of using the Structify API to access, create, and manipulate your data. +When you create a dataset, Structify spins up AI agents to populate your custom schema by indexing information from the sources you specify. Soon, you will see how much fun it is to "structify" your data. +We have a python client library, and we are working on releasing a Rest API. + +.. _Installation: + +Installation +------------ + +Let's get started! + +First, install the python client library using pip: + +.. code-block:: bash + + pip install structifyai + + +Running ``pip list`` after it completes will show you the Python libraries you've got, which will let you know if the Structify Python library was successfully installed. + +.. note:: + We constantly push new updates so make sure to check for the latest version of the Structify Python library by running ``pip install structifyai --upgrade``. + +Currently we also have a dependency on Azure, so you will want to pip install azure-core: + +.. code-block:: bash + + pip install azure-core + + +Anytime you want to use the Structify Python library, you'll need to import it: + +.. code-block:: python + + from structify import Structify + + +.. _Getting-An-API-Key: + +Getting an API Key +------------------ +We are early, so it is important to us to develop a relationship with all our users. That said, the quickest way to secure an API key is to `email us `_ with your name, email, and a brief description of your use case. We will send you back an API key and your account details. + +Alternatively, you can book a time for a detailed guided tour of our API and get an API key at the end of the session. Please find a time to meet via `our Calendly `_. + +Once you have your API key, you can use it to authenticate your requests to the Structify API. You can do this by setting the ``apiKey`` attribute of the client object: + +.. code-block:: python + + structify = Structify(headers = {"apiKey": "your-api-key-here"})" + +Our API recognizes two types of users: business and personal. Both have organizations and users underneath, for the case that you are letting users of your program make API calls through us. Every one of the endpoints is done through an authenticated personel. + +.. _create-your-first-dataset: + +Create Your First Dataset +------------------------- +You can create and fill a dataset with two quick successive API calls: + +#. Define a schema using ``structify.dataset.create``. +#. Specify the source to populate the dataset from with ``structify.structure.run`` (or ``structify.structure.run_async``). + +Here's an example of how you would make an API call to create a dataset: + +.. code-block:: python + + from structify import Structify, Source, Table, Property, Relationship + + structify = Structify(headers = {"apiKey": "your-api-key-here"}) + + # Define a schema as a JSON object, make sure to include descriptions for each of your tables, properties, and relationships + + tables = [ + Table( + name = "author", + description = "an individual who wrote a book", + properties = [ + Property(name = "name", description = "The name of the author"), + Property(name = "genre", description = "The genre that the author typically writes in") + ], + relationships = [] + ), + Table( + name = "publisher", + description = "a company that publishes books", + properties = [ + Property(name = "name", description = "The name of the publisher"), + Property(name = "location", description = "The location of the publisher") + ], + relationships = [] + ), + Table( + name = "book", + description = "a book that has been written", + properties = [ + Property(name = "title", description = "The title of the book"), + Property(name = "copies_sold", description = "The number of copies sold of the book") + ], + relationships = [ + Relationship(name = "authored_by", description = "Connects the book to the list of authors who wrote it"), + Relationship(name = "published_by", description = "Connects the book to the list of publishers of the book") + ] + ) + ] + + # Use the schema to create the dataset + structify.dataset.create( + name = "books", + description = "Create a dataset named 'books' that tells me about the authors and publishers of books.", + schema = entities + ) + + # Specify the source to populate the dataset from the Source object and then populate the dataset with structify.structure.run + source = Source.Web( + prompt = "What are details about the books appearing here?", + websites = ["https://www.goodreads.com/"] + ) + + books_dataset = structify.structure.run(name = "books", source = source) + + +With that, you are on your way to structifying your data. + +.. note:: + We also allow users to asynchronously run agents to populate datasets. This is useful for large datasets that may take a long time to populate. You can use the ``structify.structure.run_async`` method to run an agent asynchronously. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..fe2fe391 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,99 @@ +Welcome to Structify! +===================== +We power you to collect, enrich, and update your own custom datasets using generative AI. Structify allows you transform any information from document to web page into structured data with as little as two API calls. + +.. code-block:: python + + # First, define the schema of your dataset using our Python Objects + schema = Table( + name = "person", + description = "an individual in my professional network", + properties = [ + Property(name = "name", description = "The name of the person"), + Property(name = "job_title", description = "The title the person holds") + ] + ) + + # Next, create a dataset with that schema + structify.dataset.create( + name = "my_network", + description = "A dataset of people in my professional network", + schema = schema + ) + + # Then, create an agent to index information from defined sources for your dataset + structify.structure.run( + name = "my_network", + source = Source.Web( + prompt = "find details about my first degree connections on LinkedIn", + websites = ["linkedin.com"]) + ) + + + +After reading our API documentation, you will be able to use our Python client to do things like: + +* Create a personalized dataset representing the job history of everyone in your network +* Monitor changes in a continuously updating dataset of real estate listings +* Extract structured data about startup financing events from a collection of SEC filings and pitch decks +* Automate notifications when a new job listing is posted that matches your criteria + +Keep reading to learn more about how to use Structify to supercharge your team or an AI tool. + +Get Started with Structify +-------------------------- +.. toctree:: + :caption: Get Started + :maxdepth: 1 + + Overview + Intro + Quickstart + + +Check Out Our Capabilities +-------------------------- +.. toctree:: + :caption: Guide + :maxdepth: 1 + + Creating Datasets + Using Documents + Getting Sources + + +Learn from Examples +------------------- +.. toctree:: + :caption: Tutorials + :maxdepth: 2 + + Making the Internet Your Database + Monitoring Changes in Datasets + Structifying Documents + + +.. Indices and tables +.. ------------------- +.. * :ref:`genindex` +.. * :ref:`modindex` +.. * :ref:`search` + +See What's Coming Soon +------------------------- +.. toctree:: + :caption: In Beta + :maxdepth: 1 + + Analytical Capabilities + Searching through Datasets + Sharing Datasets + +Read More +--------- +.. toctree:: + :caption: More + :maxdepth: 1 + + FAQ + Changelog diff --git a/docs/source/more/changelog.rst b/docs/source/more/changelog.rst new file mode 100644 index 00000000..52b7205b --- /dev/null +++ b/docs/source/more/changelog.rst @@ -0,0 +1,16 @@ +Changelog +========= + +At Structify, we're iterating fast to release new features, develop improvements, and fix bugs. +This changelog will keep you updated on the changes we make to our product. + +Latest Changes +-------------- + +.. details:: **April 20, 2024 - Structify Version 0.30.0**: + + We released a new version with our updated model as part of our efforts to continue increasing accuracy. We also updated some syntax for existing endpoints. + +.. details:: **March 5, 2024 - API Launched**: + + Read about our launch of version 0.0.1 on `our blog `_. \ No newline at end of file diff --git a/docs/source/more/faq.rst b/docs/source/more/faq.rst new file mode 100644 index 00000000..fd7d991c --- /dev/null +++ b/docs/source/more/faq.rst @@ -0,0 +1,76 @@ +Frequently Asked Questions +========================== + +.. details:: How can I get more credits on my account? + + You can upgrade your account to a higher plan to get more credits. To do so, please email us at team@structify.ai. + +.. details:: What are some examples of datasets that Structify's API can power me to collect on demand? + + Here is a list of possible prompts that you can use to collect data on demand via our API: + + - Keep track of M&A activity in the EU + - How have earnings changed for all public biotech companies in the last N years? + - What countries are PE funds deploying the most capital to? + - Track the performance of ESG portfolio managers. + - Board members of all series A companies in the U.S. by industry. + - Venture capital firms active in AI Infrastructure + - What are recent private market transactions in New York City? + - Private Equity funds investing in sports + - Email me all new fintech startups. + - Summarize the sales strategies in comparable transactions to my active deals. + - What are all the British companies with 11-100 employees that work remote? + - People in my network that are connected to a CMO. + - Who are the founders of startups that are actively recruiting for a HR manager (buying intent) + - What beauty companies have hired a Social Media Manager in the last 5 months? + - What is the chain of command in each of my accounts? + - List all finance conferences and which companies are represented there. + - New positions (promotions, firing, etc) of any contacts in my accounts + - Give me a list of all Digital Marketing Managers at Series A companies. + - Find me all public customers of my competitors. + - Who in my CRM has liked content about accounting on social media? + - What are my competitor's branding and messaging? + - Who are new competitors emerging into media monitoring space? + - Rank all online reviews of grocery delivery services by convenience. + - What companies are operating in the content creator tech space? + - What is the branding of AI Infrastructure companies' websites? + - What are the prices of all productivity tools with over $10M ARR? + - How did the employee headcount change for consumer goods companies? + - Which companies engage with content about AI on Twitter? + - On what topics does our ICP most often comment? + - What users of our product have changed companies? + - Send me a daily report summarizing the social media buzz on the top trending movies. + - Tweets about energy companies with regards to climate change. + - What reporters have covered manufacturing companies outside the U.S.? + - Press mentions of the top followed influencers in fitness? + - Who are the top thought leaders on marketing strategy? + - What are the top word associations on social media for the top 100 billboard artists? + - Give me all recent relevant news about my clients. + - Email me new musical festival announcements in Asia. + - Send me any sufficiently negative news about any AI company. + - Group negative social media sentiment about the Fortune 100 by topic. + - What universities did recent startup (Series A or earlier) hires go to? + - What Ivy League engineers with C++ or Python experience are on the job market? + - How are competitors describing their company culture? + - Which of my employees have more than 1k Gen Z followers on social media? + - What universities and companies are my competitors pulling talent from? + - Who are UI/UX designers with 5+ years of experience in my employees' networks? + - Send me a notification anytime a senior engineer+ leaves a F100 tech company. + - Find me all the PhDs behind machine learning papers with over 100 citations. + - How many locations of each fast food chain in F100 are next to risk factors like bars? + - How many fires were in each major U.S. city last year? + - Employment history of Directors and Officers we insure. + - Employee "churn" at companies we insure + - Housing prices of coastal properties + - Get me EV ownership metrics in Europe + - Keep track of all medical papers relevant to diabetes. + - How much experience does management have? + - How many Japanese factories are close to volcanic or nuclear sites. + - What in this company's supply chain is within 10 miles of the coast. + + +.. details:: Where can I get help with using the API? + + We recommend you first check out `our developer community on Discord `_. + + If you can't find an answer there, we're happy to help you out if you send a note to team@structify.ai. \ No newline at end of file diff --git a/docs/source/python_client/backsourcing.rst b/docs/source/python_client/backsourcing.rst new file mode 100644 index 00000000..3b3105ca --- /dev/null +++ b/docs/source/python_client/backsourcing.rst @@ -0,0 +1,49 @@ +.. _backsourcing: + +Backsourcing +============ +For all our users, knowing that you have accurate data is of paramount importance, so we allow you to see the sources that were used to validate and create any given datapoint. This is useful for understanding the provenance of a given datapoint and for understanding the context in which it was created. + +To use this endpoint, you need to know the ids of a given datapoint. In order to find that information, you would need to call the ``structify.dataset.view`` endpoint. + +.. code-block:: python + + from pprint import pprint + + pprint(structify.dataset.view(name = "startups", table = "company")) + +This call would result a JSON object including the id, as follows: + +.. code-block:: python + + [{ + 'id': 232997, + 'label': 'company', + 'properties': { + 'description': 'Dropbox is building the world’s first smart ' + 'workspace. Back in 2007, making work better ' + 'for people meant designing a simpler way to ' + 'keep files in sync. Today, it means designing ' + 'products that reduce busywork so you can ' + 'focus on the work that matters.', + 'name': 'Dropbox', + 'website': 'http://dropbox.com' + } + } + ] + +Once you have the ids, you can call the ``structify.source.get_sources`` endpoint. + +.. code-block:: python + + structify.source.get_sources(id = 232997) + +The output will then be a JSON object containing information about the source and where on the source the relevant information lies. Here is an example output: + +.. code-block:: python + + [{'location': {'Visual': {'x': 0, 'y': 0}}, + 'link': {'Web': {'url': 'https://www.ycombinator.com/companies/dropbox'}}, + 'extra_properties': {}}] + + diff --git a/docs/source/python_client/datasets.rst b/docs/source/python_client/datasets.rst new file mode 100644 index 00000000..e54c1465 --- /dev/null +++ b/docs/source/python_client/datasets.rst @@ -0,0 +1,243 @@ +Creating Datasets +================= + +Overview +-------- +Structify's API, at its core, is designed to let developers collect datasets on demand. You can create a dataset in just a few lines of code. This guide will walk you through the three main steps in getting your first custom dataset: + +#. :ref:`define-schema` +#. :ref:`Populating-Datasets` +#. :ref:`view-dataset` +#. :ref:`Refreshing-Dataset` + +.. _define-schema: + +Defining Your Schema +--------------------- +The basis of creating datasets is defining the schema, much like creating a blueprint for a database. The schema of a Structify dataset is comprised of entity tables, which are in turn made up of columns (which we call Properties), and the relationships between them. Check out the example code below for more clarity. + +Before you can create researchers to automatically fill up your datasets, we need to define the schema of the dataset. Note that each entity table, column, and relationship in the dataset needs a name and description. + +If you have a schmea you want your dataset to follow, you can easily pre-define your schema using our Python objects. + +.. code-block:: python + + from structify import Structify, Table, Property, Relationship + from pydantic import BaseModel + from typing import List + + structify = Structify(headers = {"apiKey": {"your-api-key-here"}) + + entities = [ + Table( + name = "jobs", + description = "the job history of the employee", + properties = [ + Property(name = "title", description = "The title of the job"), + Property(name = "company", description = "The company the employee worked for") + ], + relationships = [] + ), + Table( + name = "education", + description = "the educational history of an employee", + properties = [ + Property(name = "school_name", description = "The name of the school"), + Property(name = "school_gradyear", description = "The year the employee graduated") + ], + relationships = [] + ), + Table( + name = "employees", + description = "details about employees at a certain company.", + properties = [ + Property(name = "name", description = "the full name of the employee") + ], + relationships = [ + Relationship(name = "job", description = "connects the employee to their job history"), + Relationship(name = "education", description = "connects the employee to their education history") + ] + ) + ] + + structify.dataset.create( + name="employees", + description="A dataset named 'employees' that tells me about their job and education history.", + schema = entities + ) + + structify.dataset.info(name = "employees") + +And the output will echo back a JSON representation of the schema you just created. + +.. note:: + Coming soon: a ``structify.dataset.llm_create`` method to create a dataset with a schema that is automatically generated from just a description. + This will allow users to, instead of writing out an entire schema, simply input plain text to allow the LLM to create your schema. + +.. tip:: + Currently, if you need to edit the schema, you will need to either delete the dataset and recreate it with the edited schema or create a dataset with a new name. + + We are working on ``structify.dataset.modify`` to allow users to adjust the schema without deleting an existing dataset. + +.. warning:: + As of now, the every property in the schema has a default type as a String. We're working quickly to allow users to specify types for each property such an integers or lists. + +.. _populating-datasets: + +Populating Your Datasets +------------------------ +Once you have blueprinted your dataset by creating a schema, you can now use Structify's research agents to collect data to fill your dataset. + +You can run our scraper agents either through ``structify.structure.run`` or ``structify.structure.run_async`` to populate a dataset with an initial batch of data. The structure API call requires the following: + +- **name:** The name of the dataset you want to populate +- **source:** The sources or types of sources you want the agent to use (e.g. “LinkedIn” or “news articles”). These will be a Python enum of the sources available to the agent. Make sure to import Source. If not specified, the API call will error out. + +Here's an example of an API call to populate that employees dataset with data from LinkedIn using ``structify.structure.run``: + +.. code-block:: python + + from structify import Structify, Source + + structify.structure.run( + name = "employees", + sources = Source.Web(prompt = "find me details about the employees of ACME", websites = ["linkedin.com"]) + ) + + + +.. note:: + The output of ``structify.structure.run`` will be a JSON representation of the dataset. + +If you want to run the populate request asynchronously, you can use ``structify.structure.run_async``: + +.. code-block:: python + + from structify import Structify, Source + + dataset = structify.structure.run_async( + name = "employees", + sources = Source.Web(prompt = "find me details about the employees of ACME", websites = ["linkedin.com"]) + ) + + structify.structure.wait([dataset]) + +.. note:: + The output of ``structify.structure.run_async`` will be a key that you can use to access the run via ``structify.structure.wait``. We are working on adding an endpoint that will allow you to check the status on an asynchronous run. + + + +Populating Datasets from Documents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sometimes, you will want to collect data from documents, such as PDFs or PNGs. You can use the ``structify.structure.run`` and ``structify.structure.run_async`` endpoint off of documents as well. + +We'll walk you through the process to uploading documents and such in the :doc:`documents` section. Or you can check out the tutorials at :ref:`document-example`. + + +Additional Source Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We allow for two other sources besides the Web and Documents: SEC filings or plain text. + +If you'd like to use Structify to just structure plain text, you can simply pass the text to the API call as such: + +.. code-block:: python + + structify.structure.run( + name = "employees", + sources = Source.Text(text = "John Doe is the CEO of ACME. Previously he was the Senior VP at EMCA.") + ) + + +If you'd like to use Structify to get datasets from SEC filings, you can use the following: + +.. code-block:: python + + structify.structure.run( + name = "employees", + sources = Source.SECFiling( + year = 2021, # Optional + quarter = 3, # Optional + accession_number = "0000320193-21-000056" # Optional + ) + ) + +.. _view-dataset: + +Viewing Your Datasets +--------------------------------------- +Through this endpoint, we allow users to view specific parts of the dataset that they are interested in. For example, if want to allow users to see the names of the schools that each person attended and their graduation date in their employees dataset, we could create the following view: + +.. code-block:: python + + from pprint import pprint + + pprint(client.dataset.view(name = dataset_name, table = "education")) + +The output will be a JSON object containing the properties and relationships of the entities in the education table (along with their ids). + +.. note:: + + We are in the process of adding the ability to view multiple tables at once, or limited the view of a dataset to a certain set of columns. In addition, we are working on methods to export your datasets. + +Helpful Dataset functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We also have a few other helpful functions to help you manage your datasets: ``structify.dataset.delete`` to delete a dataset, ``structify.dataset.list`` to list all your datasets, and ``structify.dataset.info`` to get info on a certain dataset, including the name. + +Here are some examples of how you can use these functions: + +.. code-block:: python + + # Requires no parameters and will return a list of all your datasets in a JSON object + structify.dataset.list() + + # Requires the name of the dataset and will return the schema as a JSON object + structify.dataset.info(name = "employees") + + # Requires the name of the dataset and will delete the dataset + strucctify.dataset.delete(name = "employees") + + +.. _Refreshing-Dataset: + +Refreshing Your Dataset +----------------------- +Of course, the data in your dataset will become outdated over time. Currently, to refresh your dataset, you will want to set a recurring schedule or refresh the dataset continuously. We are developing an endpoint that will streamline this functionality, but in the meantime, we recommend you use the following: + +.. code-block:: python + + while True: + run = structify.structure.run_async( + name = "employees", + sources = Source.Web(prompt = "find me details about the employees of ACME", websites = ["linkedin.com"]) + ) + structify.structure.wait(run) + +If you have a regular schedule you want to run the refresh, you can use the ``schedule`` library to run the refresh on a schedule. Here's an example of how you can run the refresh every day at 3:00 PM: + +.. code-block:: python + + from schedule import every, run_pending + import time + + every().day.at("15:00").do( + structify.structure.run_async, + name = "employees", + sources = Source.Web( + prompt = "find me details about the employees of ACME", + websites = ["linkedin.com"] + ) + ) + + while True: + run_pending() + time.sleep(1) + + + +.. note:: + Keep your eye out for the ``structify.dataset.refresh`` API call to update the data in your dataset. + +For one-time refreshes, we recommend just running ``structify.structure.run`` again to update the dataset. + + + diff --git a/docs/source/python_client/documents.rst b/docs/source/python_client/documents.rst new file mode 100644 index 00000000..f3a27172 --- /dev/null +++ b/docs/source/python_client/documents.rst @@ -0,0 +1,67 @@ +Using Documents in Structify +============================ +In many cases, a wealth of unstructured data lies within documents without set formats. Structify allows you to upload these documents and extract the data you need from them. + +Uploading Documents +--------------------- +You can upload documents to Structify using ``structify.documents.upload`` by passing the file in binary mode. You will also need to specify the path you want to store the document on your Structify account, along with the type of document. + +.. code-block:: python + + from structify import Structify + structify = Structify(headers = {"apiKey": "your-api-key-here"}) + + with open('/path/to/your/local/file.pdf', 'rb') as f: + structify.documents.upload(f, path = '/path/on/your/Structify/remote.pdf', doctype = 'Pdf') + +Currently, we support the following document formats: + +- PDFs +- Text files (TXT, CSV, etc.) + +We are working to support more formats in the future, such as: + +- Images (JPG, PNG, etc.) +- Word documents (DOCX) +- Excel spreadsheets (XLSX) +- PowerPoint presentations (PPTX) + +In the meantime, we recommend converting all your documents to either PDFs or text files before uploading them to Structify. + +Once you've uploaded them, you can use our other document endpoints to list, download, and delete the documents. + +Here are examples of how you would use those endpoints: + +.. code-block:: python + + # Listing all documents will return a JSON object of all your uploaded documents + structify.documents.list() + + # Download and view a document. Note that you will need to convert the download from vectors of bytes to text. + import io + print(io.BytesIO(bytes(client.documents.download(path = '/path/on/your/Structify/remote.pdf'))).read().decode()) + + # Delete a document by specifying the name of the document. This will remove the document from your Structify account. + structify.documents.delete(path = '/path/on/your/Structify/remote.pdf') + + +.. _Structuring Documents: + +Extracting Data from Documents +------------------------------- +Creating datasets from documents is quite simple. You just use the ``structify.structure.run`` or ``structify.structure.run_async`` method and specify the document file path or paths you want to include in the dataset through the Source python object. + +.. code-block:: python + + from structify import Structify, Source + structify = Structify(headers = {"apiKey": "your-api-key-here"}) + + structify.structure.run( + name = "startups", + source = Source.Document( + prompt = "structure data from this startup's pitch deck", + path = "/pitchdecks/structify_deck.pdf" + ) + ) + +And just like that you've created a dataset from your documents. \ No newline at end of file diff --git a/scripts/docs b/scripts/docs new file mode 100755 index 00000000..aa8f8ecd --- /dev/null +++ b/scripts/docs @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -e + +cd "$(dirname "$0")/../docs" || exit 1 + +make html diff --git a/scripts/lint b/scripts/lint index 3419a96a..de7e4be3 100755 --- a/scripts/lint +++ b/scripts/lint @@ -10,3 +10,4 @@ rye run lint echo "==> Making sure it imports" rye run python -c 'import structify' +sphinx-lint docs/**/*.rst