Skip to content

Commit

Permalink
Pybytes specialization slices (#4442)
Browse files Browse the repository at this point in the history
* specialize collections holding bytes to turn into `PyBytes` rather than `PyList`

* specialize `Cow<'_, T>`

* add tests

* add newsfragment

* hide `iter_into_pyobject`

* add migration entry

* remove redundant generic

* add benchmark for bytes into pyobject

* bytes sequences conversions using `PyBytes::new`

* restore `IntoPyObject for &u8`

* update newsfragment PR number

* bless ui test

---------

Co-authored-by: Icxolu <[email protected]>
  • Loading branch information
davidhewitt and Icxolu authored Aug 15, 2024
1 parent 9267330 commit 7b2cf24
Show file tree
Hide file tree
Showing 12 changed files with 479 additions and 66 deletions.
40 changes: 40 additions & 0 deletions guide/src/migration.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,46 @@ where
```
</details>

### Macro conversion changed for byte collections (`Vec<u8>`, `[u8; N]` and `SmallVec<[u8; N]>`).
<details open>
<summary><small>Click to expand</small></summary>

PyO3 0.23 introduced the new fallible conversion trait `IntoPyObject`. The `#[pyfunction]` and
`#[pymethods]` macros prefer `IntoPyObject` implementations over `IntoPy<PyObject>`.

This change has an effect on functions and methods returning _byte_ collections like
- `Vec<u8>`
- `[u8; N]`
- `SmallVec<[u8; N]>`

In their new `IntoPyObject` implementation these will now turn into `PyBytes` rather than a
`PyList`. All other `T`s are unaffected and still convert into a `PyList`.

```rust
# #![allow(dead_code)]
# use pyo3::prelude::*;
#[pyfunction]
fn foo() -> Vec<u8> { // would previously turn into a `PyList`, now `PyBytes`
vec![0, 1, 2, 3]
}

#[pyfunction]
fn bar() -> Vec<u16> { // unaffected, returns `PyList`
vec![0, 1, 2, 3]
}
```

If this conversion is _not_ desired, consider building a list manually using `PyList::new`.

The following types were previously _only_ implemented for `u8` and now allow other `T`s turn into
`PyList`
- `&[T]`
- `Cow<[T]>`

This is purely additional and should just extend the possible return types.

</details>

## from 0.21.* to 0.22

### Deprecation of `gil-refs` feature continues
Expand Down
2 changes: 2 additions & 0 deletions newsfragments/4442.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
`IntoPyObject` impls for `Vec<u8>`, `&[u8]`, `[u8; N]`, `Cow<[u8]>` and `SmallVec<[u8; N]>` now
convert into `PyBytes` rather than `PyList`.
4 changes: 4 additions & 0 deletions pyo3-benches/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ harness = false
name = "bench_gil"
harness = false

[[bench]]
name = "bench_intopyobject"
harness = false

[[bench]]
name = "bench_list"
harness = false
Expand Down
93 changes: 93 additions & 0 deletions pyo3-benches/benches/bench_intopyobject.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::hint::black_box;

use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion};

use pyo3::conversion::IntoPyObject;
use pyo3::prelude::*;
use pyo3::types::PyBytes;

fn bench_bytes_new(b: &mut Bencher<'_>, data: &[u8]) {
Python::with_gil(|py| {
b.iter_with_large_drop(|| PyBytes::new(py, black_box(data)));
});
}

fn bytes_new_small(b: &mut Bencher<'_>) {
bench_bytes_new(b, &[]);
}

fn bytes_new_medium(b: &mut Bencher<'_>) {
let data = (0..u8::MAX).into_iter().collect::<Vec<u8>>();
bench_bytes_new(b, &data);
}

fn bytes_new_large(b: &mut Bencher<'_>) {
let data = vec![10u8; 100_000];
bench_bytes_new(b, &data);
}

fn bench_bytes_into_pyobject(b: &mut Bencher<'_>, data: &[u8]) {
Python::with_gil(|py| {
b.iter_with_large_drop(|| black_box(data).into_pyobject(py));
});
}

fn byte_slice_into_pyobject_small(b: &mut Bencher<'_>) {
bench_bytes_into_pyobject(b, &[]);
}

fn byte_slice_into_pyobject_medium(b: &mut Bencher<'_>) {
let data = (0..u8::MAX).into_iter().collect::<Vec<u8>>();
bench_bytes_into_pyobject(b, &data);
}

fn byte_slice_into_pyobject_large(b: &mut Bencher<'_>) {
let data = vec![10u8; 100_000];
bench_bytes_into_pyobject(b, &data);
}

fn byte_slice_into_py(b: &mut Bencher<'_>) {
Python::with_gil(|py| {
let data = (0..u8::MAX).into_iter().collect::<Vec<u8>>();
let bytes = data.as_slice();
b.iter_with_large_drop(|| black_box(bytes).into_py(py));
});
}

fn vec_into_pyobject(b: &mut Bencher<'_>) {
Python::with_gil(|py| {
let bytes = (0..u8::MAX).into_iter().collect::<Vec<u8>>();
b.iter_with_large_drop(|| black_box(&bytes).clone().into_pyobject(py));
});
}

fn vec_into_py(b: &mut Bencher<'_>) {
Python::with_gil(|py| {
let bytes = (0..u8::MAX).into_iter().collect::<Vec<u8>>();
b.iter_with_large_drop(|| black_box(&bytes).clone().into_py(py));
});
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("bytes_new_small", bytes_new_small);
c.bench_function("bytes_new_medium", bytes_new_medium);
c.bench_function("bytes_new_large", bytes_new_large);
c.bench_function(
"byte_slice_into_pyobject_small",
byte_slice_into_pyobject_small,
);
c.bench_function(
"byte_slice_into_pyobject_medium",
byte_slice_into_pyobject_medium,
);
c.bench_function(
"byte_slice_into_pyobject_large",
byte_slice_into_pyobject_large,
);
c.bench_function("byte_slice_into_py", byte_slice_into_py);
c.bench_function("vec_into_pyobject", vec_into_pyobject);
c.bench_function("vec_into_py", vec_into_py);
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
61 changes: 60 additions & 1 deletion src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::pyclass::boolean_struct::False;
use crate::types::any::PyAnyMethods;
use crate::types::PyTuple;
use crate::{
ffi, Borrowed, Bound, BoundObject, Py, PyAny, PyClass, PyObject, PyRef, PyRefMut, Python,
ffi, Borrowed, Bound, BoundObject, Py, PyAny, PyClass, PyErr, PyObject, PyRef, PyRefMut, Python,
};
use std::convert::Infallible;

Expand Down Expand Up @@ -200,6 +200,65 @@ pub trait IntoPyObject<'py>: Sized {

/// Performs the conversion.
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error>;

/// Converts sequence of Self into a Python object. Used to specialize `Vec<u8>`, `[u8; N]`
/// and `SmallVec<[u8; N]>` as a sequence of bytes into a `bytes` object.
#[doc(hidden)]
fn owned_sequence_into_pyobject<I>(
iter: I,
py: Python<'py>,
_: private::Token,
) -> Result<Bound<'py, PyAny>, PyErr>
where
I: IntoIterator<Item = Self> + AsRef<[Self]>,
I::IntoIter: ExactSizeIterator<Item = Self>,
PyErr: From<Self::Error>,
{
let mut iter = iter.into_iter().map(|e| {
e.into_pyobject(py)
.map(BoundObject::into_any)
.map(BoundObject::unbind)
.map_err(Into::into)
});
let list = crate::types::list::try_new_from_iter(py, &mut iter);
list.map(Bound::into_any)
}

/// Converts sequence of Self into a Python object. Used to specialize `&[u8]` and `Cow<[u8]>`
/// as a sequence of bytes into a `bytes` object.
#[doc(hidden)]
fn borrowed_sequence_into_pyobject<I>(
iter: I,
py: Python<'py>,
_: private::Token,
) -> Result<Bound<'py, PyAny>, PyErr>
where
Self: private::Reference,
I: IntoIterator<Item = Self> + AsRef<[<Self as private::Reference>::BaseType]>,
I::IntoIter: ExactSizeIterator<Item = Self>,
PyErr: From<Self::Error>,
{
let mut iter = iter.into_iter().map(|e| {
e.into_pyobject(py)
.map(BoundObject::into_any)
.map(BoundObject::unbind)
.map_err(Into::into)
});
let list = crate::types::list::try_new_from_iter(py, &mut iter);
list.map(Bound::into_any)
}
}

pub(crate) mod private {
pub struct Token;

pub trait Reference {
type BaseType;
}

impl<T> Reference for &'_ T {
type BaseType = T;
}
}

impl<'py, T> IntoPyObject<'py> for Bound<'py, T> {
Expand Down
40 changes: 27 additions & 13 deletions src/conversions/smallvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ use crate::exceptions::PyTypeError;
#[cfg(feature = "experimental-inspect")]
use crate::inspect::types::TypeInfo;
use crate::types::any::PyAnyMethods;
use crate::types::list::{new_from_iter, try_new_from_iter};
use crate::types::{PyList, PySequence, PyString};
use crate::types::list::new_from_iter;
use crate::types::{PySequence, PyString};
use crate::PyErr;
use crate::{
err::DowncastError, ffi, Bound, BoundObject, FromPyObject, IntoPy, PyAny, PyObject, PyResult,
Python, ToPyObject,
err::DowncastError, ffi, Bound, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python,
ToPyObject,
};
use smallvec::{Array, SmallVec};

Expand Down Expand Up @@ -62,18 +62,17 @@ where
A::Item: IntoPyObject<'py>,
PyErr: From<<A::Item as IntoPyObject<'py>>::Error>,
{
type Target = PyList;
type Target = PyAny;
type Output = Bound<'py, Self::Target>;
type Error = PyErr;

/// Turns [`SmallVec<u8>`] into [`PyBytes`], all other `T`s will be turned into a [`PyList`]
///
/// [`PyBytes`]: crate::types::PyBytes
/// [`PyList`]: crate::types::PyList
#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
let mut iter = self.into_iter().map(|e| {
e.into_pyobject(py)
.map(BoundObject::into_any)
.map(BoundObject::unbind)
.map_err(Into::into)
});
try_new_from_iter(py, &mut iter)
<A::Item>::owned_sequence_into_pyobject(self, py, crate::conversion::private::Token)
}
}

Expand Down Expand Up @@ -120,7 +119,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::types::PyDict;
use crate::types::{PyBytes, PyBytesMethods, PyDict, PyList};

#[test]
fn test_smallvec_into_py() {
Expand Down Expand Up @@ -162,4 +161,19 @@ mod tests {
assert!(l.eq(hso).unwrap());
});
}

#[test]
fn test_smallvec_intopyobject_impl() {
Python::with_gil(|py| {
let bytes: SmallVec<[u8; 8]> = [1, 2, 3, 4, 5].iter().cloned().collect();
let obj = bytes.clone().into_pyobject(py).unwrap();
assert!(obj.is_instance_of::<PyBytes>());
let obj = obj.downcast_into::<PyBytes>().unwrap();
assert_eq!(obj.as_bytes(), &*bytes);

let nums: SmallVec<[u16; 8]> = [1, 2, 3, 4, 5].iter().cloned().collect();
let obj = nums.into_pyobject(py).unwrap();
assert!(obj.is_instance_of::<PyList>());
});
}
}
57 changes: 30 additions & 27 deletions src/conversions/std/array.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::conversion::IntoPyObject;
use crate::ffi_ptr_ext::FfiPtrExt;
use crate::instance::Bound;
use crate::types::any::PyAnyMethods;
use crate::types::{PyList, PySequence};
use crate::types::PySequence;
use crate::{
err::DowncastError, ffi, FromPyObject, IntoPy, Py, PyAny, PyObject, PyResult, Python,
ToPyObject,
Expand Down Expand Up @@ -41,34 +40,19 @@ where
impl<'py, T, const N: usize> IntoPyObject<'py> for [T; N]
where
T: IntoPyObject<'py>,
PyErr: From<T::Error>,
{
type Target = PyList;
type Target = PyAny;
type Output = Bound<'py, Self::Target>;
type Error = T::Error;
type Error = PyErr;

/// Turns [`[u8; N]`](std::array) into [`PyBytes`], all other `T`s will be turned into a [`PyList`]
///
/// [`PyBytes`]: crate::types::PyBytes
/// [`PyList`]: crate::types::PyList
#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
use crate::BoundObject;
unsafe {
let len = N as ffi::Py_ssize_t;

let ptr = ffi::PyList_New(len);

// We create the `Bound` pointer here for two reasons:
// - panics if the ptr is null
// - its Drop cleans up the list if user code errors or panics.
let list = ptr.assume_owned(py).downcast_into_unchecked::<PyList>();

for (i, obj) in (0..len).zip(self) {
let obj = obj.into_pyobject(py)?.into_ptr();

#[cfg(not(Py_LIMITED_API))]
ffi::PyList_SET_ITEM(ptr, i, obj);
#[cfg(Py_LIMITED_API)]
ffi::PyList_SetItem(ptr, i, obj);
}

Ok(list)
}
T::owned_sequence_into_pyobject(self, py, crate::conversion::private::Token)
}
}

Expand Down Expand Up @@ -166,7 +150,11 @@ mod tests {
sync::atomic::{AtomicUsize, Ordering},
};

use crate::{ffi, types::any::PyAnyMethods};
use crate::{
conversion::IntoPyObject,
ffi,
types::{any::PyAnyMethods, PyBytes, PyBytesMethods},
};
use crate::{types::PyList, IntoPy, PyResult, Python, ToPyObject};

#[test]
Expand Down Expand Up @@ -257,6 +245,21 @@ mod tests {
});
}

#[test]
fn test_array_intopyobject_impl() {
Python::with_gil(|py| {
let bytes: [u8; 6] = *b"foobar";
let obj = bytes.into_pyobject(py).unwrap();
assert!(obj.is_instance_of::<PyBytes>());
let obj = obj.downcast_into::<PyBytes>().unwrap();
assert_eq!(obj.as_bytes(), &bytes);

let nums: [u16; 4] = [0, 1, 2, 3];
let obj = nums.into_pyobject(py).unwrap();
assert!(obj.is_instance_of::<PyList>());
});
}

#[test]
fn test_extract_non_iterable_to_array() {
Python::with_gil(|py| {
Expand Down
Loading

0 comments on commit 7b2cf24

Please sign in to comment.