From 90811bc9a8408b1fdf8c481fbc80c12b655e998e Mon Sep 17 00:00:00 2001 From: Dan King Date: Fri, 20 Sep 2024 18:20:35 -0400 Subject: [PATCH] feat: teach PyArray to tree_display & teach CI to doctest (#900) --- .github/workflows/ci.yml | 5 +++++ pyvortex/src/array.rs | 42 ++++++++++++++++++++++++++++++++++++++++ pyvortex/src/compress.rs | 6 +++--- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1980c78fb..dc853c46ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,11 @@ jobs: rye run pytest --benchmark-disable test/ working-directory: pyvortex/ + - name: Doctest - PyVortex + run: | + source ../.venv/bin/activate && make clean && make doctest + working-directory: docs/ + - name: License Check run: cargo install --locked cargo-deny && cargo deny check - uses: rustsec/audit-check@v1.4.1 diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 3802edfcd0..59c901b78a 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -182,4 +182,46 @@ impl PyArray { .map_err(PyVortexError::map_err) .and_then(|arr| Bound::new(py, PyArray { inner: arr })) } + + /// Internal technical details about the encoding of this Array. + /// + /// Warnings + /// -------- + /// The format of the returned string may change without notice. + /// + /// Returns + /// ------- + /// :class:`.str` + /// + /// Examples + /// -------- + /// + /// Uncompressed arrays have straightforward encodings: + /// + /// >>> arr = vortex.encoding.array([1, 2, None, 3]) + /// >>> print(arr.tree_display()) + /// root: vortex.primitive(0x03)(i64?, len=4) nbytes=33 B (100.00%) + /// metadata: PrimitiveMetadata { validity: Array } + /// buffer: 32 B + /// validity: vortex.bool(0x02)(bool, len=4) nbytes=1 B (3.03%) + /// metadata: BoolMetadata { validity: NonNullable, length: 4, bit_offset: 0 } + /// buffer: 1 B + /// + /// + /// Compressed arrays use more complex encodings: + /// + /// >>> print(vortex.encoding.compress(arr).tree_display()) + /// root: fastlanes.for(0x17)(i64?, len=4) nbytes=1 B (100.00%) + /// metadata: FoRMetadata { reference: Scalar { dtype: Primitive(I64, Nullable), value: Primitive(I64(1)) }, shift: 0 } + /// encoded: fastlanes.bitpacked(0x15)(u64?, len=4) nbytes=1 B (100.00%) + /// metadata: BitPackedMetadata { validity: Array, bit_width: 2, offset: 0, length: 4, has_patches: false } + /// buffer: 256 B + /// validity: vortex.bool(0x02)(bool, len=4) nbytes=1 B (100.00%) + /// metadata: BoolMetadata { validity: NonNullable, length: 4, bit_offset: 0 } + /// buffer: 1 B + /// + /// + fn tree_display(&self) -> String { + self.inner.tree_display().to_string() + } } diff --git a/pyvortex/src/compress.rs b/pyvortex/src/compress.rs index 009f1ef3a2..52d7a6da8e 100644 --- a/pyvortex/src/compress.rs +++ b/pyvortex/src/compress.rs @@ -19,13 +19,13 @@ use crate::error::PyVortexError; /// /// >>> a = vortex.encoding.array([42 for _ in range(1000)]) /// >>> str(vortex.encoding.compress(a)) -/// 'vortex.constant(0x0a)(i64, len=1000)' +/// 'vortex.constant(0x09)(i64, len=1000)' /// /// Compress an array of increasing integers: /// /// >>> a = vortex.encoding.array(list(range(1000))) /// >>> str(vortex.encoding.compress(a)) -/// 'fastlanes.for(0x0f)(i64, len=1000)' +/// 'fastlanes.for(0x17)(i64, len=1000)' /// /// Compress an array of increasing floating-point numbers and a few nulls: /// @@ -34,7 +34,7 @@ use crate::error::PyVortexError; /// ... for x in range(1000) /// ... ]) /// >>> str(vortex.encoding.compress(a)) -/// 'vortex.alp(0x0d)(f64?, len=1000)' +/// 'vortex.alp(0x11)(f64?, len=1000)' pub fn compress<'py>(array: &Bound<'py, PyArray>) -> PyResult> { let compressor = SamplingCompressor::default(); let inner = compressor