diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 251db06b0044..52eeb9214a21 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -429,6 +429,7 @@ impl<'a> AnyValue<'a> { NumCast::from((*v).parse::().ok()?) } }, + StringOwned(v) => String(v).extract(), _ => None, } } diff --git a/py-polars/src/conversion/any_value.rs b/py-polars/src/conversion/any_value.rs index 14b2df63b744..f90262b81150 100644 --- a/py-polars/src/conversion/any_value.rs +++ b/py-polars/src/conversion/any_value.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + #[cfg(feature = "object")] use polars::chunked_array::object::PolarsObjectSafe; use polars::datatypes::{DataType, Field, OwnedObject, PlHashMap, TimeUnit}; @@ -6,9 +8,7 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes; use pyo3::exceptions::{PyOverflowError, PyTypeError}; use pyo3::intern; use pyo3::prelude::*; -use pyo3::types::{ - PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple, PyType, -}; +use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple}; use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap}; use crate::error::PyPolarsErr; @@ -27,14 +27,14 @@ impl ToPyObject for Wrap> { } } -impl<'s> FromPyObject<'s> for Wrap> { - fn extract(ob: &'s PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap> { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { py_object_to_any_value(ob, true).map(Wrap) } } pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { - let utils = UTILS.as_ref(py); + let utils = UTILS.bind(py); match av { AnyValue::UInt8(v) => v.into_py(py), AnyValue::UInt16(v) => v.into_py(py), @@ -105,7 +105,7 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { N * std::mem::size_of::(), ) }; - let digits = PyTuple::new(py, buf.iter().take(n_digits)); + let digits = PyTuple::new_bound(py, buf.iter().take(n_digits)); convert .call1((v.is_negative() as u8, digits, n_digits, -(scale as i32))) .unwrap() @@ -115,22 +115,25 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { } type TypeObjectPtr = usize; -type InitFn = fn(&PyAny, bool) -> PyResult; +type InitFn = for<'py> fn(&Bound<'py, PyAny>, bool) -> PyResult>; pub(crate) static LUT: crate::gil_once_cell::GILOnceCell> = crate::gil_once_cell::GILOnceCell::new(); -pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult { +pub(crate) fn py_object_to_any_value<'py>( + ob: &Bound<'py, PyAny>, + strict: bool, +) -> PyResult> { // Conversion functions. - fn get_null(_ob: &PyAny, _strict: bool) -> PyResult { + fn get_null(_ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Ok(AnyValue::Null) } - fn get_bool(ob: &PyAny, _strict: bool) -> PyResult { + fn get_bool(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { let b = ob.extract::().unwrap(); Ok(AnyValue::Boolean(b)) } - fn get_int(ob: &PyAny, strict: bool) -> PyResult { + fn get_int(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult> { if let Ok(v) = ob.extract::() { Ok(AnyValue::Int64(v)) } else if let Ok(v) = ob.extract::() { @@ -145,24 +148,36 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { + fn get_float(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Ok(AnyValue::Float64(ob.extract::().unwrap())) } - fn get_str(ob: &PyAny, _strict: bool) -> PyResult { - let value = ob.extract::<&str>().unwrap(); - Ok(AnyValue::String(value)) + fn get_str(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { + // Ideally we'd be returning an AnyValue::String(&str) instead, as was + // the case in previous versions of this function. However, if compiling + // with abi3 for versions older than Python 3.10, the APIs that purport + // to return &str actually just encode to UTF-8 as a newly allocated + // PyBytes object, and then return reference to that. So what we're + // doing here isn't any different fundamantelly, and the APIs to for + // converting to &str are deprecated in PyO3 0.21. + // + // Once Python 3.10 is the minimum supported version, converting to &str + // will be cheaper, and we should do that. Python 3.9 security updates + // end-of-life is Oct 31, 2025. + Ok(AnyValue::StringOwned( + ob.extract::().unwrap().into(), + )) } - fn get_bytes(ob: &PyAny, _strict: bool) -> PyResult { - let value = ob.extract::<&[u8]>().unwrap(); + fn get_bytes<'py>(ob: &Bound<'py, PyAny>, _strict: bool) -> PyResult> { + let value = ob.extract::<&'py [u8]>().unwrap(); Ok(AnyValue::Binary(value)) } - fn get_date(ob: &PyAny, _strict: bool) -> PyResult { + fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Python::with_gil(|py| { let date = UTILS - .as_ref(py) + .bind(py) .getattr(intern!(py, "date_to_int")) .unwrap() .call1((ob,)) @@ -172,10 +187,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { + fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Python::with_gil(|py| { let date = UTILS - .as_ref(py) + .bind(py) .getattr(intern!(py, "datetime_to_int")) .unwrap() .call1((ob, intern!(py, "us"))) @@ -185,10 +200,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { + fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Python::with_gil(|py| { let td = UTILS - .as_ref(py) + .bind(py) .getattr(intern!(py, "timedelta_to_int")) .unwrap() .call1((ob, intern!(py, "us"))) @@ -198,10 +213,10 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { + fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { Python::with_gil(|py| { let time = UTILS - .as_ref(py) + .bind(py) .getattr(intern!(py, "time_to_int")) .unwrap() .call1((ob,)) @@ -211,7 +226,7 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { + fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { fn abs_decimal_from_digits( digits: impl IntoIterator, exp: i32, @@ -256,15 +271,14 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult { - fn get_list_with_constructor(ob: &PyAny) -> PyResult { + fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult> { + fn get_list_with_constructor(ob: &Bound<'_, PyAny>) -> PyResult> { // Use the dedicated constructor. // This constructor is able to go via dedicated type constructors // so it can be much faster. - Python::with_gil(|py| { - let s = SERIES.call1(py, (ob,))?; - get_list_from_series(s.as_ref(py), true) - }) + let py = ob.py(); + let s = SERIES.call1(py, (ob,))?; + get_list_from_series(s.bind(py), true) } if ob.is_empty()? { @@ -276,12 +290,12 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult PyResult PyResult PyResult> { + fn get_list_from_series(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { let s = super::get_series(ob)?; Ok(AnyValue::List(s)) } - fn get_struct(ob: &PyAny, strict: bool) -> PyResult> { + fn get_struct<'py>(ob: &Bound<'py, PyAny>, strict: bool) -> PyResult> { let dict = ob.downcast::().unwrap(); let len = dict.len(); let mut keys = Vec::with_capacity(len); let mut vals = Vec::with_capacity(len); for (k, v) in dict.into_iter() { - let key = k.extract::<&str>()?; - let val = py_object_to_any_value(v, strict)?; + let key = k.extract::>()?; + let val = py_object_to_any_value(&v, strict)?; let dtype = val.dtype(); - keys.push(Field::new(key, dtype)); + keys.push(Field::new(&key, dtype)); vals.push(val) } Ok(AnyValue::StructOwned(Box::new((vals, keys)))) } - fn get_object(ob: &PyAny, _strict: bool) -> PyResult { + fn get_object(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { #[cfg(feature = "object")] { // This is slow, but hey don't use objects. - let v = &ObjectValue { inner: ob.into() }; + let v = &ObjectValue { + inner: ob.clone().unbind(), + }; Ok(AnyValue::ObjectOwned(OwnedObject(v.to_boxed()))) } #[cfg(not(feature = "object"))] @@ -345,7 +364,7 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult InitFn { + fn get_conversion_function(ob: &Bound<'_, PyAny>, py: Python<'_>) -> InitFn { if ob.is_none() { get_null } @@ -371,18 +390,18 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult get_date, - "time" => get_time, - "datetime" => get_datetime, - "timedelta" => get_timedelta, - "Decimal" => get_decimal, - "range" => get_list, + "date" => get_date as InitFn, + "time" => get_time as InitFn, + "datetime" => get_datetime as InitFn, + "timedelta" => get_timedelta as InitFn, + "Decimal" => get_decimal as InitFn, + "range" => get_list as InitFn, _ => { // Support NumPy scalars. if ob.extract::().is_ok() || ob.extract::().is_ok() { - return get_int; + return get_int as InitFn; } else if ob.extract::().is_ok() { - return get_float; + return get_float as InitFn; } // Support custom subclasses of datetime/date. @@ -390,31 +409,30 @@ pub(crate) fn py_object_to_any_value(ob: &PyAny, strict: bool) -> PyResult" => return get_datetime, - "" => return get_date, + "" => return get_datetime as InitFn, + "" => return get_date as InitFn, _ => (), } } - get_object + get_object as InitFn }, } } } - let type_object_ptr = PyType::as_type_ptr(ob.get_type()) as usize; + let type_object_ptr = ob.get_type().as_type_ptr() as usize; Python::with_gil(|py| { LUT.with_gil(py, |lut| { let convert_fn = lut .entry(type_object_ptr) .or_insert_with(|| get_conversion_function(ob, py)); - convert_fn(ob, strict) }) }) diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index d281576843bc..3ce887d2b1db 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -70,13 +70,13 @@ pub(crate) fn get_lf(obj: &PyAny) -> PyResult { Ok(pydf.extract::()?.ldf) } -pub(crate) fn get_series(obj: &PyAny) -> PyResult { +pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult { let pydf = obj.getattr(intern!(obj.py(), "_s"))?; Ok(pydf.extract::()?.series) } pub(crate) fn to_series(py: Python, s: PySeries) -> PyObject { - let series = SERIES.as_ref(py); + let series = SERIES.bind(py); let constructor = series .getattr(intern!(series.py(), "_from_pyseries")) .unwrap(); @@ -85,7 +85,7 @@ pub(crate) fn to_series(py: Python, s: PySeries) -> PyObject { #[cfg(feature = "csv")] impl<'a> FromPyObject<'a> for Wrap { - fn extract(ob: &'a PyAny) -> PyResult { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { if let Ok(s) = ob.extract::() { Ok(Wrap(NullValues::AllColumnsSingle(s))) } else if let Ok(s) = ob.extract::>() { @@ -106,7 +106,7 @@ fn struct_dict<'a>( vals: impl Iterator>, flds: &[Field], ) -> PyObject { - let dict = PyDict::new(py); + let dict = PyDict::new_bound(py); for (fld, val) in flds.iter().zip(vals) { dict.set_item(fld.name().as_str(), Wrap(val)).unwrap() } @@ -344,7 +344,7 @@ impl FromPyObject<'_> for Wrap { }, "Enum" => { let categories = ob.getattr(intern!(py, "categories")).unwrap(); - let s = get_series(categories)?; + let s = get_series(&categories.as_borrowed())?; let ca = s.str().map_err(PyPolarsErr::from)?; let categories = ca.downcast_iter().next().unwrap().clone(); create_enum_data_type(categories) diff --git a/py-polars/src/dataframe/construction.rs b/py-polars/src/dataframe/construction.rs index 41ec8521a198..9dc77aeee81c 100644 --- a/py-polars/src/dataframe/construction.rs +++ b/py-polars/src/dataframe/construction.rs @@ -145,7 +145,7 @@ fn dicts_to_rows<'a>(data: &'a PyAny, names: &'a [String], strict: bool) -> PyRe for k in names.iter() { let val = match d.get_item(k)? { None => AnyValue::Null, - Some(val) => py_object_to_any_value(val, strict)?, + Some(val) => py_object_to_any_value(&val.as_borrowed(), strict)?, }; row.push(val) } diff --git a/py-polars/src/file.rs b/py-polars/src/file.rs index e3e8e7363ef8..3a8ecf1223ed 100644 --- a/py-polars/src/file.rs +++ b/py-polars/src/file.rs @@ -34,11 +34,11 @@ impl PyFileLikeObject { let buf = Python::with_gil(|py| { let bytes = self .inner - .call_method(py, "read", (), None) + .call_method_bound(py, "read", (), None) .expect("no read method found"); - let bytes: &PyBytes = bytes - .downcast(py) + let bytes: &Bound<'_, PyBytes> = bytes + .downcast_bound(py) .expect("Expecting to be able to downcast into bytes from read result."); bytes.as_bytes().to_vec() @@ -85,7 +85,7 @@ fn pyerr_to_io_err(e: PyErr) -> io::Error { Python::with_gil(|py| { let e_as_object: PyObject = e.into_py(py); - match e_as_object.call_method(py, "__str__", (), None) { + match e_as_object.call_method_bound(py, "__str__", (), None) { Ok(repr) => match repr.extract::(py) { Ok(s) => io::Error::new(io::ErrorKind::Other, s), Err(_e) => io::Error::new(io::ErrorKind::Other, "An unknown error has occurred"), @@ -100,11 +100,11 @@ impl Read for PyFileLikeObject { Python::with_gil(|py| { let bytes = self .inner - .call_method(py, "read", (buf.len(),), None) + .call_method_bound(py, "read", (buf.len(),), None) .map_err(pyerr_to_io_err)?; - let bytes: &PyBytes = bytes - .downcast(py) + let bytes: &Bound<'_, PyBytes> = bytes + .downcast_bound(py) .expect("Expecting to be able to downcast into bytes from read result."); buf.write_all(bytes.as_bytes())?; @@ -117,11 +117,11 @@ impl Read for PyFileLikeObject { impl Write for PyFileLikeObject { fn write(&mut self, buf: &[u8]) -> Result { Python::with_gil(|py| { - let pybytes = PyBytes::new(py, buf); + let pybytes = PyBytes::new_bound(py, buf); let number_bytes_written = self .inner - .call_method(py, "write", (pybytes,), None) + .call_method_bound(py, "write", (pybytes,), None) .map_err(pyerr_to_io_err)?; number_bytes_written.extract(py).map_err(pyerr_to_io_err) @@ -131,7 +131,7 @@ impl Write for PyFileLikeObject { fn flush(&mut self) -> Result<(), io::Error> { Python::with_gil(|py| { self.inner - .call_method(py, "flush", (), None) + .call_method_bound(py, "flush", (), None) .map_err(pyerr_to_io_err)?; Ok(()) @@ -150,7 +150,7 @@ impl Seek for PyFileLikeObject { let new_position = self .inner - .call_method(py, "seek", (offset, whence), None) + .call_method_bound(py, "seek", (offset, whence), None) .map_err(pyerr_to_io_err)?; new_position.extract(py).map_err(pyerr_to_io_err) @@ -174,9 +174,9 @@ pub enum EitherRustPythonFile { /// * `truncate` - open or create a new file. pub fn get_either_file(py_f: PyObject, truncate: bool) -> PyResult { Python::with_gil(|py| { - if let Ok(pstring) = py_f.downcast::(py) { - let s = pstring.to_str()?; - let file_path = std::path::Path::new(&s); + if let Ok(pstring) = py_f.downcast_bound::(py) { + let s = pstring.to_cow()?; + let file_path = std::path::Path::new(&*s); let file_path = resolve_homedir(file_path); let f = if truncate { File::create(file_path)? diff --git a/py-polars/src/functions/eager.rs b/py-polars/src/functions/eager.rs index c648b069e152..ac703eb97eb2 100644 --- a/py-polars/src/functions/eager.rs +++ b/py-polars/src/functions/eager.rs @@ -48,15 +48,15 @@ pub fn concat_df(dfs: &PyAny, py: Python) -> PyResult { } #[pyfunction] -pub fn concat_series(series: &PyAny) -> PyResult { +pub fn concat_series(series: &Bound<'_, PyAny>) -> PyResult { let mut iter = series.iter()?; let first = iter.next().unwrap()?; - let mut s = get_series(first)?; + let mut s = get_series(&first)?; for res in iter { let item = res?; - let item = get_series(item)?; + let item = get_series(&item)?; s.append(&item).map_err(PyPolarsErr::from)?; } Ok(s.into()) diff --git a/py-polars/src/functions/io.rs b/py-polars/src/functions/io.rs index 212d16b19210..c2b580147ed4 100644 --- a/py-polars/src/functions/io.rs +++ b/py-polars/src/functions/io.rs @@ -21,8 +21,8 @@ pub fn read_ipc_schema(py: Python, py_f: PyObject) -> PyResult { EitherRustPythonFile::Py(mut r) => read_file_metadata(&mut r).map_err(PyPolarsErr::from)?, }; - let dict = PyDict::new(py); - fields_to_pydict(&metadata.schema.fields, dict, py)?; + let dict = PyDict::new_bound(py); + fields_to_pydict(&metadata.schema.fields, &dict, py)?; Ok(dict.to_object(py)) } @@ -37,13 +37,13 @@ pub fn read_parquet_schema(py: Python, py_f: PyObject) -> PyResult { }; let arrow_schema = infer_schema(&metadata).map_err(PyPolarsErr::from)?; - let dict = PyDict::new(py); - fields_to_pydict(&arrow_schema.fields, dict, py)?; + let dict = PyDict::new_bound(py); + fields_to_pydict(&arrow_schema.fields, &dict, py)?; Ok(dict.to_object(py)) } #[cfg(any(feature = "ipc", feature = "parquet"))] -fn fields_to_pydict(fields: &Vec, dict: &PyDict, py: Python) -> PyResult<()> { +fn fields_to_pydict(fields: &Vec, dict: &Bound<'_, PyDict>, py: Python) -> PyResult<()> { for field in fields { let dt = if field.metadata.get(DTYPE_ENUM_KEY) == Some(&DTYPE_ENUM_VALUE.into()) { Wrap(create_enum_data_type(Utf8ViewArray::new_empty( diff --git a/py-polars/src/series/buffers.rs b/py-polars/src/series/buffers.rs index 968b194d84a5..02595018df0d 100644 --- a/py-polars/src/series/buffers.rs +++ b/py-polars/src/series/buffers.rs @@ -33,7 +33,7 @@ impl IntoPy for BufferInfo { } } impl<'a> FromPyObject<'a> for BufferInfo { - fn extract(ob: &'a PyAny) -> PyResult { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { let (pointer, offset, length) = ob.extract()?; Ok(Self { pointer, @@ -170,7 +170,7 @@ impl PySeries { py: Python, dtype: Wrap, buffer_info: BufferInfo, - owner: &PyAny, + owner: &Bound<'_, PyAny>, ) -> PyResult { let dtype = dtype.0; let BufferInfo { diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index 6f61420dbdfa..8ef4bf716a13 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -179,7 +179,7 @@ impl PySeries { fn new_from_any_values(name: &str, values: &PyAny, strict: bool) -> PyResult { let any_values_result = values .iter()? - .map(|v| py_object_to_any_value(v?, strict)) + .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict)) .collect::>>(); let result = any_values_result.and_then(|avs| { let s = Series::from_any_values(name, avs.as_slice(), strict).map_err(|e| { @@ -218,7 +218,7 @@ impl PySeries { ) -> PyResult { let any_values = values .iter()? - .map(|v| py_object_to_any_value(v?, strict)) + .map(|v| py_object_to_any_value(&(v?).as_borrowed(), strict)) .collect::>>()?; let s = Series::from_any_values_and_dtype(name, any_values.as_slice(), &dtype.0, strict) .map_err(|e| {