Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(python): Start at using new Bound<> API from PyO3 #15752

Merged
merged 14 commits into from
Apr 19, 2024
1 change: 1 addition & 0 deletions crates/polars-core/src/datatypes/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ impl<'a> AnyValue<'a> {
NumCast::from((*v).parse::<f64>().ok()?)
}
},
StringOwned(v) => String(v).extract(),
_ => None,
}
}
Expand Down
140 changes: 79 additions & 61 deletions py-polars/src/conversion/any_value.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::borrow::Cow;

#[cfg(feature = "object")]
use polars::chunked_array::object::PolarsObjectSafe;
use polars::datatypes::{DataType, Field, OwnedObject, PlHashMap, TimeUnit};
Expand All @@ -6,9 +8,7 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes;
use pyo3::exceptions::{PyOverflowError, PyTypeError};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{
PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple, PyType,
};
use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple};

use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap};
use crate::error::PyPolarsErr;
Expand All @@ -27,14 +27,14 @@ impl ToPyObject for Wrap<AnyValue<'_>> {
}
}

impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
fn extract(ob: &'s PyAny) -> PyResult<Self> {
impl<'py> FromPyObject<'py> for Wrap<AnyValue<'py>> {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
py_object_to_any_value(ob, true).map(Wrap)
}
}

pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject {
let utils = UTILS.as_ref(py);
let utils = UTILS.bind(py);
match av {
AnyValue::UInt8(v) => v.into_py(py),
AnyValue::UInt16(v) => v.into_py(py),
Expand Down Expand Up @@ -105,7 +105,7 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject {
N * std::mem::size_of::<u128>(),
)
};
let digits = PyTuple::new(py, buf.iter().take(n_digits));
let digits = PyTuple::new_bound(py, buf.iter().take(n_digits));
convert
.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32)))
.unwrap()
Expand All @@ -115,22 +115,25 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject {
}

type TypeObjectPtr = usize;
type InitFn = fn(&PyAny, bool) -> PyResult<AnyValue>;
type InitFn = for<'py> fn(&Bound<'py, PyAny>, bool) -> PyResult<AnyValue<'py>>;
pub(crate) static LUT: crate::gil_once_cell::GILOnceCell<PlHashMap<TypeObjectPtr, InitFn>> =
crate::gil_once_cell::GILOnceCell::new();

pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyValue> {
pub(crate) fn py_object_to_any_value<'py>(
ob: &Bound<'py, PyAny>,
strict: bool,
) -> PyResult<AnyValue<'py>> {
// Conversion functions.
fn get_null(_ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Ok(AnyValue::Null)
}

fn get_bool(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
let b = ob.extract::<bool>().unwrap();
Ok(AnyValue::Boolean(b))
}

fn get_int(ob: &PyAny, strict: bool) -> PyResult<AnyValue> {
fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
if let Ok(v) = ob.extract::<i64>() {
Ok(AnyValue::Int64(v))
} else if let Ok(v) = ob.extract::<u64>() {
Expand All @@ -145,24 +148,36 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
}
}

fn get_float(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Ok(AnyValue::Float64(ob.extract::<f64>().unwrap()))
}

fn get_str(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
let value = ob.extract::<&str>().unwrap();
Ok(AnyValue::String(value))
fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
// Ideally we'd be returning an AnyValue::String(&str) instead, as was
// the case in previous versions of this function. However, if compiling
// with abi3 for versions older than Python 3.10, the APIs that purport
// to return &str actually just encode to UTF-8 as a newly allocated
// PyBytes object, and then return reference to that. So what we're
// doing here isn't any different fundamantelly, and the APIs to for
// converting to &str are deprecated in PyO3 0.21.
//
// Once Python 3.10 is the minimum supported version, converting to &str
// will be cheaper, and we should do that. Python 3.9 security updates
// end-of-life is Oct 31, 2025.
Ok(AnyValue::StringOwned(
ob.extract::<String>().unwrap().into(),
))
}

fn get_bytes(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
let value = ob.extract::<&[u8]>().unwrap();
fn get_bytes<'py>(ob: &Bound<'py, PyAny>, _strict: bool) -> PyResult<AnyValue<'py>> {
let value = ob.extract::<&'py [u8]>().unwrap();
Ok(AnyValue::Binary(value))
}

fn get_date(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let date = UTILS
.as_ref(py)
.bind(py)
.getattr(intern!(py, "date_to_int"))
.unwrap()
.call1((ob,))
Expand All @@ -172,10 +187,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
})
}

fn get_datetime(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let date = UTILS
.as_ref(py)
.bind(py)
.getattr(intern!(py, "datetime_to_int"))
.unwrap()
.call1((ob, intern!(py, "us")))
Expand All @@ -185,10 +200,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
})
}

fn get_timedelta(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let td = UTILS
.as_ref(py)
.bind(py)
.getattr(intern!(py, "timedelta_to_int"))
.unwrap()
.call1((ob, intern!(py, "us")))
Expand All @@ -198,10 +213,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
})
}

fn get_time(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Python::with_gil(|py| {
let time = UTILS
.as_ref(py)
.bind(py)
.getattr(intern!(py, "time_to_int"))
.unwrap()
.call1((ob,))
Expand All @@ -211,7 +226,7 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
})
}

fn get_decimal(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
fn abs_decimal_from_digits(
digits: impl IntoIterator<Item = u8>,
exp: i32,
Expand Down Expand Up @@ -256,15 +271,14 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
Ok(AnyValue::Decimal(v, scale))
}

fn get_list(ob: &PyAny, strict: bool) -> PyResult<AnyValue> {
fn get_list_with_constructor(ob: &PyAny) -> PyResult<AnyValue> {
fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult<AnyValue<'static>> {
fn get_list_with_constructor(ob: &Bound<'_, PyAny>) -> PyResult<AnyValue<'static>> {
// Use the dedicated constructor.
// This constructor is able to go via dedicated type constructors
// so it can be much faster.
Python::with_gil(|py| {
let s = SERIES.call1(py, (ob,))?;
get_list_from_series(s.as_ref(py), true)
})
let py = ob.py();
let s = SERIES.call1(py, (ob,))?;
get_list_from_series(s.bind(py), true)
}

if ob.is_empty()? {
Expand All @@ -276,12 +290,12 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa

let mut avs = Vec::with_capacity(INFER_SCHEMA_LENGTH);
let mut iter = list.iter()?;

let mut items = Vec::with_capacity(INFER_SCHEMA_LENGTH);
for item in (&mut iter).take(INFER_SCHEMA_LENGTH) {
let av = py_object_to_any_value(item?, strict)?;
items.push(item?);
let av = py_object_to_any_value(items.last().unwrap(), strict)?;
avs.push(av)
}

let (dtype, n_dtypes) = any_values_to_supertype_and_n_dtypes(&avs)
.map_err(|e| PyTypeError::new_err(e.to_string()))?;

Expand All @@ -290,9 +304,12 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
get_list_with_constructor(ob)
} else {
// Push the rest.
avs.reserve(list.len()?);
let length = list.len()?;
avs.reserve(length);
let mut rest = Vec::with_capacity(length);
for item in iter {
let av = py_object_to_any_value(item?, strict)?;
rest.push(item?);
let av = py_object_to_any_value(rest.last().unwrap(), strict)?;
avs.push(av)
}

Expand All @@ -310,31 +327,33 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
}
}

fn get_list_from_series(ob: &PyAny, _strict: bool) -> PyResult<AnyValue<'static>> {
fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
let s = super::get_series(ob)?;
Ok(AnyValue::List(s))
}

fn get_struct(ob: &PyAny, strict: bool) -> PyResult<AnyValue<'_>> {
fn get_struct<'py>(ob: &Bound<'py, PyAny>, strict: bool) -> PyResult<AnyValue<'py>> {
let dict = ob.downcast::<PyDict>().unwrap();
let len = dict.len();
let mut keys = Vec::with_capacity(len);
let mut vals = Vec::with_capacity(len);
for (k, v) in dict.into_iter() {
let key = k.extract::<&str>()?;
let val = py_object_to_any_value(v, strict)?;
let key = k.extract::<Cow<str>>()?;
let val = py_object_to_any_value(&v, strict)?;
let dtype = val.dtype();
keys.push(Field::new(key, dtype));
keys.push(Field::new(&key, dtype));
vals.push(val)
}
Ok(AnyValue::StructOwned(Box::new((vals, keys))))
}

fn get_object(ob: &PyAny, _strict: bool) -> PyResult<AnyValue> {
fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
#[cfg(feature = "object")]
{
// This is slow, but hey don't use objects.
let v = &ObjectValue { inner: ob.into() };
let v = &ObjectValue {
inner: ob.clone().unbind(),
};
Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed())))
}
#[cfg(not(feature = "object"))]
Expand All @@ -345,7 +364,7 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
///
/// Note: This function is only ran if the object's type is not already in the
/// lookup table.
fn get_conversion_function(ob: &PyAny, py: Python) -> InitFn {
fn get_conversion_function(ob: &Bound<'_, PyAny>, py: Python<'_>) -> InitFn {
if ob.is_none() {
get_null
}
Expand All @@ -371,50 +390,49 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult<AnyVa
match &*type_name {
// Can't use pyo3::types::PyDateTime with abi3-py37 feature,
// so need this workaround instead of `isinstance(ob, datetime)`.
"date" => get_date,
"time" => get_time,
"datetime" => get_datetime,
"timedelta" => get_timedelta,
"Decimal" => get_decimal,
"range" => get_list,
"date" => get_date as InitFn,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lacking the as InitFn I got errors about "expected fn pointer, got fn item" (or vice versa?).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unresolving temporarily, just in case someone else reviews)

"time" => get_time as InitFn,
"datetime" => get_datetime as InitFn,
"timedelta" => get_timedelta as InitFn,
"Decimal" => get_decimal as InitFn,
"range" => get_list as InitFn,
_ => {
// Support NumPy scalars.
if ob.extract::<i64>().is_ok() || ob.extract::<u64>().is_ok() {
return get_int;
return get_int as InitFn;
} else if ob.extract::<f64>().is_ok() {
return get_float;
return get_float as InitFn;
}

// Support custom subclasses of datetime/date.
let ancestors = ob.get_type().getattr(intern!(py, "__mro__")).unwrap();
let ancestors_str_iter = ancestors
.iter()
.unwrap()
.map(|b| b.unwrap().str().unwrap().to_str().unwrap());
.map(|b| b.unwrap().str().unwrap().to_string());
for c in ancestors_str_iter {
match c {
match &*c {
// datetime must be checked before date because
// Python datetime is an instance of date.
"<class 'datetime.datetime'>" => return get_datetime,
"<class 'datetime.date'>" => return get_date,
"<class 'datetime.datetime'>" => return get_datetime as InitFn,
"<class 'datetime.date'>" => return get_date as InitFn,
_ => (),
}
}

get_object
get_object as InitFn
},
}
}
}

let type_object_ptr = PyType::as_type_ptr(ob.get_type()) as usize;
let type_object_ptr = ob.get_type().as_type_ptr() as usize;

Python::with_gil(|py| {
LUT.with_gil(py, |lut| {
let convert_fn = lut
.entry(type_object_ptr)
.or_insert_with(|| get_conversion_function(ob, py));

convert_fn(ob, strict)
})
})
Expand Down
10 changes: 5 additions & 5 deletions py-polars/src/conversion/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ pub(crate) fn get_lf(obj: &PyAny) -> PyResult<LazyFrame> {
Ok(pydf.extract::<PyLazyFrame>()?.ldf)
}

pub(crate) fn get_series(obj: &PyAny) -> PyResult<Series> {
pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
let pydf = obj.getattr(intern!(obj.py(), "_s"))?;
Ok(pydf.extract::<PySeries>()?.series)
}

pub(crate) fn to_series(py: Python, s: PySeries) -> PyObject {
let series = SERIES.as_ref(py);
let series = SERIES.bind(py);
let constructor = series
.getattr(intern!(series.py(), "_from_pyseries"))
.unwrap();
Expand All @@ -85,7 +85,7 @@ pub(crate) fn to_series(py: Python, s: PySeries) -> PyObject {

#[cfg(feature = "csv")]
impl<'a> FromPyObject<'a> for Wrap<NullValues> {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult<Self> {
if let Ok(s) = ob.extract::<String>() {
Ok(Wrap(NullValues::AllColumnsSingle(s)))
} else if let Ok(s) = ob.extract::<Vec<String>>() {
Expand All @@ -106,7 +106,7 @@ fn struct_dict<'a>(
vals: impl Iterator<Item = AnyValue<'a>>,
flds: &[Field],
) -> PyObject {
let dict = PyDict::new(py);
let dict = PyDict::new_bound(py);
for (fld, val) in flds.iter().zip(vals) {
dict.set_item(fld.name().as_str(), Wrap(val)).unwrap()
}
Expand Down Expand Up @@ -344,7 +344,7 @@ impl FromPyObject<'_> for Wrap<DataType> {
},
"Enum" => {
let categories = ob.getattr(intern!(py, "categories")).unwrap();
let s = get_series(categories)?;
let s = get_series(&categories.as_borrowed())?;
let ca = s.str().map_err(PyPolarsErr::from)?;
let categories = ca.downcast_iter().next().unwrap().clone();
create_enum_data_type(categories)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe/construction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn dicts_to_rows<'a>(data: &'a PyAny, names: &'a [String], strict: bool) -> PyRe
for k in names.iter() {
let val = match d.get_item(k)? {
None => AnyValue::Null,
Some(val) => py_object_to_any_value(val, strict)?,
Some(val) => py_object_to_any_value(&val.as_borrowed(), strict)?,
};
row.push(val)
}
Expand Down
Loading
Loading