Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adopt jiter 0.2.0 #1250

Merged
merged 6 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ base64 = "0.21.7"
num-bigint = "0.4.4"
python3-dll-a = "0.2.7"
uuid = "1.7.0"
jiter = { version = "0.1.1", features = ["python"] }
jiter = { version = "0.2.1", features = ["python"] }

[lib]
name = "_pydantic_core"
Expand Down
10 changes: 2 additions & 8 deletions src/input/return_enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use serde::{ser::Error, Serialize, Serializer};
use crate::errors::{
py_err_string, ErrorType, ErrorTypeDefaults, InputValue, ToErrorValue, ValError, ValLineError, ValResult,
};
use crate::tools::{extract_i64, py_err};
use crate::tools::{extract_i64, new_py_string, py_err};
use crate::validators::{CombinedValidator, Exactness, ValidationState, Validator};

use super::{py_error_on_minusone, BorrowInput, Input};
Expand Down Expand Up @@ -437,13 +437,7 @@ impl<'a> EitherString<'a> {

pub fn as_py_string(&'a self, py: Python<'a>, cache_str: StringCacheMode) -> Bound<'a, PyString> {
match self {
Self::Cow(cow) => {
if matches!(cache_str, StringCacheMode::All) {
jiter::cached_py_string(py, cow.as_ref())
} else {
PyString::new_bound(py, cow.as_ref())
}
}
Self::Cow(cow) => new_py_string(py, cow.as_ref(), cache_str),
Self::Py(py_string) => py_string.clone(),
}
}
Expand Down
55 changes: 26 additions & 29 deletions src/input/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;
use pyo3::{intern, Py, PyAny, Python};

use num_bigint::BigInt;
use jiter::{JsonErrorType, NumberInt};

use crate::errors::{ErrorTypeDefaults, ValError, ValResult};

Expand Down Expand Up @@ -68,29 +68,24 @@ fn strip_underscores(s: &str) -> Option<String> {
}

/// parse a string as an int
///
/// max length of the input is 4300, see
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
let str = str.trim();
let len = str.len();
if len > 4300 {
Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input))
} else if let Some(int) = _parse_str(input, str, len) {
Ok(int)
} else if let Some(str_stripped) = strip_decimal_zeros(str) {
if let Some(int) = _parse_str(input, str_stripped, len) {
Ok(int)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))

// we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
match NumberInt::try_from(str.as_bytes()) {
Ok(NumberInt::Int(i)) => return Ok(EitherInt::I64(i)),
Ok(NumberInt::BigInt(i)) => return Ok(EitherInt::BigInt(i)),
Err(e) => {
if e.error_type == JsonErrorType::NumberOutOfRange {
return Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input));
}
}
}

if let Some(str_stripped) = strip_decimal_zeros(str) {
_parse_str(input, str_stripped)
} else if let Some(str_stripped) = strip_underscores(str) {
if let Some(int) = _parse_str(input, &str_stripped, len) {
Ok(int)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
}
_parse_str(input, &str_stripped)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
}
Expand All @@ -108,16 +103,18 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
}

/// parse a string as an int, `input` is required here to get lifetimes to match up
///
fn _parse_str<'py>(_input: &(impl Input<'py> + ?Sized), str: &str, len: usize) -> Option<EitherInt<'py>> {
if len < 19 {
if let Ok(i) = str.parse::<i64>() {
return Some(EitherInt::I64(i));
}
} else if let Ok(i) = str.parse::<BigInt>() {
return Some(EitherInt::BigInt(i));
/// max length of the input is 4300 which is checked by jiter, see
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
fn _parse_str<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
match NumberInt::try_from(str.as_bytes()) {
Ok(jiter::NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
Ok(jiter::NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
Err(e) => match e.error_type {
JsonErrorType::NumberOutOfRange => Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input)),
_ => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
},
}
None
}

/// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
Expand Down
12 changes: 12 additions & 0 deletions src/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use pyo3::prelude::*;
use pyo3::types::{PyDict, PyString};
use pyo3::{ffi, intern, FromPyObject};

use jiter::{cached_py_string, pystring_fast_new, StringCacheMode};

pub trait SchemaDict<'py> {
fn get_as<T>(&self, key: &Bound<'_, PyString>) -> PyResult<Option<T>>
where
Expand Down Expand Up @@ -143,3 +145,13 @@ pub fn extract_i64(v: &Bound<'_, PyAny>) -> Option<i64> {
None
}
}

pub(crate) fn new_py_string<'py>(py: Python<'py>, s: &str, cache_str: StringCacheMode) -> Bound<'py, PyString> {
// we could use `bytecount::num_chars(s.as_bytes()) == s.len()` as orjson does, but it doesn't appear to be faster
let ascii_only = false;
if matches!(cache_str, StringCacheMode::All) {
cached_py_string(py, s, ascii_only)
} else {
pystring_fast_new(py, s, ascii_only)
}
}
7 changes: 2 additions & 5 deletions src/validators/validation_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use pyo3::types::PyString;
use jiter::StringCacheMode;

use crate::recursion_guard::{ContainsRecursionState, RecursionState};
use crate::tools::new_py_string;

use super::Extra;

Expand Down Expand Up @@ -72,11 +73,7 @@ impl<'a, 'py> ValidationState<'a, 'py> {
}

pub fn maybe_cached_str(&self, py: Python<'py>, s: &str) -> Bound<'py, PyString> {
if matches!(self.extra.cache_str, StringCacheMode::All) {
jiter::cached_py_string(py, s)
} else {
PyString::new_bound(py, s)
}
new_py_string(py, s, self.extra.cache_str)
}
}

Expand Down
Loading