Skip to content

Commit

Permalink
test: make miri happy in lazyvalue
Browse files Browse the repository at this point in the history
  • Loading branch information
liuq19 committed Nov 2, 2024
1 parent be587a8 commit 565b650
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 49 deletions.
59 changes: 23 additions & 36 deletions src/lazyvalue/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use faststr::FastStr;

use crate::{
error::Result,
input::{JsonInput, JsonSlice},
input::JsonInput,
lazyvalue::LazyValue,
parser::{Parser, DEFAULT_KEY_BUF_CAPACITY},
reader::{Read, Reader},
Expand Down Expand Up @@ -35,8 +35,7 @@ use crate::{
/// }
/// ```
pub struct ObjectJsonIter<'de> {
json: JsonSlice<'de>,
parser: Option<Parser<Read<'de>>>,
parser: Parser<Read<'de>>,
strbuf: Vec<u8>,
first: bool,
ending: bool,
Expand Down Expand Up @@ -71,18 +70,16 @@ pub struct ObjectJsonIter<'de> {
/// }
/// ```
pub struct ArrayJsonIter<'de> {
json: JsonSlice<'de>,
parser: Option<Parser<Read<'de>>>,
parser: Parser<Read<'de>>,
first: bool,
ending: bool,
check: bool,
}

impl<'de> ObjectJsonIter<'de> {
fn new(json: JsonSlice<'de>, check: bool) -> Self {
fn new<I: JsonInput<'de>>(json: I, check: bool) -> Self {
Self {
json,
parser: None,
parser: Parser::new(Read::new_in(json, check)),
strbuf: Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY),
first: true,
ending: false,
Expand All @@ -95,24 +92,21 @@ impl<'de> ObjectJsonIter<'de> {
return None;
}

if self.parser.is_none() {
let slice = self.json.as_ref();
let slice = unsafe { std::slice::from_raw_parts(slice.as_ptr(), slice.len()) };
let parser = Parser::new(Read::new(slice, check));
if self.first {
// check invalid utf8
if let Err(err) = parser.read.check_utf8_final() {
if let Err(err) = self.parser.read.check_utf8_final() {
self.ending = true;
return Some(Err(err));
}
self.parser = Some(parser);
}

let parser = unsafe { self.parser.as_mut().unwrap_unchecked() };
unsafe { parser.read.update_slice(self.json.as_ref().as_ptr()) };
match parser.parse_entry_lazy(&mut self.strbuf, &mut self.first, check) {
match self
.parser
.parse_entry_lazy(&mut self.strbuf, &mut self.first, check)
{
Ok(ret) => {
if let Some((key, val, has_escaped)) = ret {
let val = self.json.slice_ref(val);
let val = self.parser.read.as_json_slice().slice_ref(val);
Some(LazyValue::new(val, has_escaped).map(|v| (key, v)))
} else {
self.ending = true;
Expand All @@ -128,10 +122,9 @@ impl<'de> ObjectJsonIter<'de> {
}

impl<'de> ArrayJsonIter<'de> {
fn new(json: JsonSlice<'de>, check: bool) -> Self {
fn new<I: JsonInput<'de>>(input: I, check: bool) -> Self {
Self {
json,
parser: None,
parser: Parser::new(Read::new_in(input, check)),
first: true,
ending: false,
check,
Expand All @@ -143,24 +136,18 @@ impl<'de> ArrayJsonIter<'de> {
return None;
}

if self.parser.is_none() {
let slice = self.json.as_ref();
let slice = unsafe { std::slice::from_raw_parts(slice.as_ptr(), slice.len()) };
let parser = Parser::new(Read::new(slice, check));
if self.first {
// check invalid utf8
if let Err(err) = parser.read.check_utf8_final() {
if let Err(err) = self.parser.read.check_utf8_final() {
self.ending = true;
return Some(Err(err));
}
self.parser = Some(parser);
}

let parser = self.parser.as_mut().unwrap();
unsafe { parser.read.update_slice(self.json.as_ref().as_ptr()) };
match parser.parse_array_elem_lazy(&mut self.first, check) {
match self.parser.parse_array_elem_lazy(&mut self.first, check) {
Ok(ret) => {
if let Some((ret, has_escaped)) = ret {
let val = self.json.slice_ref(ret);
if let Some((val, has_escaped)) = ret {
let val = self.parser.read.as_json_slice().slice_ref(val);
Some(LazyValue::new(val, has_escaped))
} else {
self.ending = true;
Expand Down Expand Up @@ -215,7 +202,7 @@ impl<'de> ArrayJsonIter<'de> {
/// }
/// ```
pub fn to_object_iter<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
ObjectJsonIter::new(json.to_json_slice(), true)
ObjectJsonIter::new(json, true)
}

/// Traverse the JSON array text through a lazy iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -252,7 +239,7 @@ pub fn to_object_iter<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
/// }
/// ```
pub fn to_array_iter<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
ArrayJsonIter::new(json.to_json_slice(), true)
ArrayJsonIter::new(json, true)
}

/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -289,7 +276,7 @@ pub fn to_array_iter<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
/// }
/// ```
pub unsafe fn to_object_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ObjectJsonIter<'de> {
ObjectJsonIter::new(json.to_json_slice(), false)
ObjectJsonIter::new(json, false)
}

/// Traverse the JSON text through a lazy object iterator. The JSON parsing will doing when
Expand Down Expand Up @@ -323,7 +310,7 @@ pub unsafe fn to_object_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> Objec
/// }
/// ```
pub unsafe fn to_array_iter_unchecked<'de, I: JsonInput<'de>>(json: I) -> ArrayJsonIter<'de> {
ArrayJsonIter::new(json.to_json_slice(), false)
ArrayJsonIter::new(json, false)
}

impl<'de> Iterator for ObjectJsonIter<'de> {
Expand Down
39 changes: 26 additions & 13 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{marker::PhantomData, ops::Deref, ptr::NonNull};

use crate::{
error::invalid_utf8,
input::JsonSlice,
util::{private::Sealed, utf8::from_utf8},
JsonInput, Result,
};
Expand Down Expand Up @@ -75,7 +76,6 @@ pub trait Reader<'de>: Sealed {
}
fn cur_ptr(&mut self) -> *mut u8;

unsafe fn update_slice(&mut self, _start: *const u8) {}
/// # Safety
/// cur must be a valid pointer in the slice
unsafe fn set_ptr(&mut self, cur: *mut u8);
Expand All @@ -92,6 +92,8 @@ pub trait Reader<'de>: Sealed {
fn check_utf8_final(&self) -> Result<()> {
Ok(())
}

fn as_json_slice(&self) -> JsonSlice<'de>;
}

/// JSON input source that reads from a string/bytes-like JSON input.
Expand Down Expand Up @@ -119,6 +121,7 @@ pub trait Reader<'de>: Sealed {
/// assert_eq!(num, 123);
/// ```
pub struct Read<'a> {
input: JsonSlice<'a>,
slice: &'a [u8],
pub(crate) index: usize,
// next invalid utf8 position, if not found, will be usize::MAX
Expand All @@ -128,21 +131,25 @@ pub struct Read<'a> {
impl<'a> Read<'a> {
/// Make a `Read` from string/bytes-like JSON input.
pub fn from<I: JsonInput<'a>>(input: I) -> Self {
Self::new(input.to_u8_slice(), input.need_utf8_valid())
let need = input.need_utf8_valid();
Self::new_in(input, need)
}

pub(crate) fn new(slice: &'a [u8], need_validate: bool) -> Self {
Self::new_in(slice, need_validate)
}

pub(crate) fn new_in<I: JsonInput<'a>>(input: I, need_validate: bool) -> Self {
let slice = input.to_u8_slice();
let input = input.to_json_slice();
// validate the utf-8 at first for slice
let next_invalid_utf8 = if need_validate {
match from_utf8(slice) {
Ok(_) => usize::MAX,
Err(e) => e.offset(),
}
} else {
usize::MAX
let next_invalid_utf8 = match from_utf8(slice) {
Err(e) if need_validate => e.offset(),
_ => usize::MAX,
};

Self {
input,
slice,
index: 0,
next_invalid_utf8,
Expand All @@ -156,6 +163,11 @@ impl<'a> Reader<'a> for Read<'a> {
self.slice.len() - self.index
}

#[inline(always)]
fn as_json_slice(&self) -> JsonSlice<'a> {
self.input.clone()
}

#[inline(always)]
fn peek_n(&mut self, n: usize) -> Option<&'a [u8]> {
let end = self.index + n;
Expand Down Expand Up @@ -196,10 +208,6 @@ impl<'a> Reader<'a> for Read<'a> {
}
}

unsafe fn update_slice(&mut self, start: *const u8) {
self.slice = std::slice::from_raw_parts(start, self.slice.len());
}

#[inline(always)]
fn cur_ptr(&mut self) -> *mut u8 {
panic!("should only used in PaddedSliceRead");
Expand Down Expand Up @@ -286,6 +294,11 @@ impl<'a> Reader<'a> for PaddedSliceRead<'a> {
unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
}

#[inline(always)]
fn as_json_slice(&self) -> JsonSlice<'a> {
self.as_u8_slice().into()
}

#[inline(always)]
fn remain(&self) -> usize {
let remain = self.len as isize - self.index() as isize;
Expand Down
15 changes: 15 additions & 0 deletions src/util/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::error::{Error, ErrorCode, Result};

// simduft8 will cause `out-of-bounds pointer arithmetic` when using Miri tests
#[cfg(not(miri))]
#[inline(always)]
pub(crate) fn from_utf8(data: &[u8]) -> Result<&str> {
match simdutf8::basic::from_utf8(data) {
Expand All @@ -11,6 +13,19 @@ pub(crate) fn from_utf8(data: &[u8]) -> Result<&str> {
}
}

#[cfg(miri)]
pub(crate) fn from_utf8(data: &[u8]) -> Result<&str> {
match std::str::from_utf8(data) {
Ok(ret) => Ok(ret),
Err(err) => Err(Error::syntax(
ErrorCode::InvalidUTF8,
data,
err.valid_up_to(),
)),
}
}

#[cfg(not(miri))]
#[cold]
fn from_utf8_compat(data: &[u8]) -> Result<&str> {
// compat::from_utf8 is slower than basic::from_utf8
Expand Down

0 comments on commit 565b650

Please sign in to comment.