Skip to content

Commit

Permalink
Merge pull request #115 from kornelski/no_pos
Browse files Browse the repository at this point in the history
Keep old_pos only where necessary
  • Loading branch information
hsivonen authored Nov 13, 2024
2 parents 0fed272 + 494cb55 commit 5d9e4e4
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 48 deletions.
76 changes: 34 additions & 42 deletions src/handles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1160,17 +1160,12 @@ impl<'a> Utf8Destination<'a> {
pub struct Utf16Source<'a> {
slice: &'a [u16],
pos: usize,
old_pos: usize,
}

impl<'a> Utf16Source<'a> {
#[inline(always)]
pub fn new(src: &[u16]) -> Utf16Source {
Utf16Source {
slice: src,
pos: 0,
old_pos: 0,
}
Utf16Source { slice: src, pos: 0 }
}
#[inline(always)]
pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> {
Expand All @@ -1183,7 +1178,6 @@ impl<'a> Utf16Source<'a> {
#[allow(clippy::collapsible_if)]
#[inline(always)]
fn read(&mut self) -> char {
self.old_pos = self.pos;
let unit = self.slice[self.pos];
self.pos += 1;
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
Expand Down Expand Up @@ -1217,7 +1211,6 @@ impl<'a> Utf16Source<'a> {
#[allow(clippy::collapsible_if)]
#[inline(always)]
fn read_enum(&mut self) -> Unicode {
self.old_pos = self.pos;
let unit = self.slice[self.pos];
self.pos += 1;
if unit < 0x80 {
Expand Down Expand Up @@ -1252,11 +1245,6 @@ impl<'a> Utf16Source<'a> {
Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16))
}
#[inline(always)]
fn unread(&mut self) -> usize {
self.pos = self.old_pos;
self.pos
}
#[inline(always)]
pub fn consumed(&self) -> usize {
self.pos
}
Expand Down Expand Up @@ -1417,15 +1405,11 @@ where
}
#[inline(always)]
pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) {
let character = self.source.read();
let handle = Utf16UnreadHandle::new(self.source);
(character, handle)
Utf16UnreadHandle::new_char(self.source)
}
#[inline(always)]
pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) {
let character = self.source.read_enum();
let handle = Utf16UnreadHandle::new(self.source);
(character, handle)
Utf16UnreadHandle::new_enum(self.source)
}
#[inline(always)]
pub fn consumed(&self) -> usize {
Expand All @@ -1438,19 +1422,30 @@ where
'b: 'a,
{
source: &'a mut Utf16Source<'b>,
old_pos: usize,
}

impl<'a, 'b> Utf16UnreadHandle<'a, 'b>
where
'b: 'a,
{
#[inline(always)]
fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> {
Utf16UnreadHandle { source: src }
fn new_char(source: &'a mut Utf16Source<'b>) -> (char, Self) {
let old_pos = source.pos;
let character = source.read();
(character, Self { source, old_pos })
}
#[inline(always)]
fn new_enum(source: &'a mut Utf16Source<'b>) -> (Unicode, Self) {
let old_pos = source.pos;
let character = source.read_enum();
(character, Self { source, old_pos })
}

#[inline(always)]
pub fn unread(self) -> usize {
self.source.unread()
self.source.pos = self.old_pos;
self.old_pos
}
#[inline(always)]
pub fn consumed(&self) -> usize {
Expand All @@ -1467,7 +1462,6 @@ where
pub struct Utf8Source<'a> {
slice: &'a [u8],
pos: usize,
old_pos: usize,
}

impl<'a> Utf8Source<'a> {
Expand All @@ -1476,7 +1470,6 @@ impl<'a> Utf8Source<'a> {
Utf8Source {
slice: src.as_bytes(),
pos: 0,
old_pos: 0,
}
}
#[inline(always)]
Expand All @@ -1489,7 +1482,6 @@ impl<'a> Utf8Source<'a> {
}
#[inline(always)]
fn read(&mut self) -> char {
self.old_pos = self.pos;
let unit = self.slice[self.pos];
if unit < 0x80 {
self.pos += 1;
Expand Down Expand Up @@ -1517,7 +1509,6 @@ impl<'a> Utf8Source<'a> {
}
#[inline(always)]
fn read_enum(&mut self) -> Unicode {
self.old_pos = self.pos;
let unit = self.slice[self.pos];
if unit < 0x80 {
self.pos += 1;
Expand Down Expand Up @@ -1546,11 +1537,6 @@ impl<'a> Utf8Source<'a> {
}))
}
#[inline(always)]
fn unread(&mut self) -> usize {
self.pos = self.old_pos;
self.pos
}
#[inline(always)]
pub fn consumed(&self) -> usize {
self.pos
}
Expand Down Expand Up @@ -1730,20 +1716,16 @@ where
'b: 'a,
{
#[inline(always)]
fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
Utf8ReadHandle { source: src }
fn new(source: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> {
Utf8ReadHandle { source }
}
#[inline(always)]
pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) {
let character = self.source.read();
let handle = Utf8UnreadHandle::new(self.source);
(character, handle)
Utf8UnreadHandle::new_char(self.source)
}
#[inline(always)]
pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) {
let character = self.source.read_enum();
let handle = Utf8UnreadHandle::new(self.source);
(character, handle)
Utf8UnreadHandle::new_enum(self.source)
}
#[inline(always)]
pub fn consumed(&self) -> usize {
Expand All @@ -1756,19 +1738,29 @@ where
'b: 'a,
{
source: &'a mut Utf8Source<'b>,
old_pos: usize,
}

impl<'a, 'b> Utf8UnreadHandle<'a, 'b>
where
'b: 'a,
{
#[inline(always)]
fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> {
Utf8UnreadHandle { source: src }
fn new_char(source: &'a mut Utf8Source<'b>) -> (char, Self) {
let old_pos = source.pos;
let character = source.read();
(character, Self { source, old_pos })
}
#[inline(always)]
fn new_enum(source: &'a mut Utf8Source<'b>) -> (Unicode, Self) {
let old_pos = source.pos;
let character = source.read_enum();
(character, Self { source, old_pos })
}
#[inline(always)]
pub fn unread(self) -> usize {
self.source.unread()
self.source.pos = self.old_pos;
self.old_pos
}
#[inline(always)]
pub fn consumed(&self) -> usize {
Expand Down
12 changes: 8 additions & 4 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,9 @@ macro_rules! ascii_compatible_two_byte_decoder_function {
}
Space::Available(destination_handle_again) => {
{
let (b_again, _unread_handle_again) =
let (b_again, unread_handle_again) =
source_handle_again.read();
unread_handle_again.commit();
b = b_again;
destination_handle = destination_handle_again;
continue 'innermost;
Expand Down Expand Up @@ -570,7 +571,8 @@ macro_rules! gb18030_decoder_function {
dst_written);
}
Space::Available(destination_handle) => {
let (b, _) = source_handle.read();
let (b, unread_handle) = source_handle.read();
unread_handle.commit();
loop {
if b > 127 {
$non_ascii = b;
Expand Down Expand Up @@ -874,7 +876,8 @@ macro_rules! euc_jp_decoder_function {
dst_written);
}
Space::Available(destination_handle) => {
let (b, _) = source_handle.read();
let (b, unread_handle) = source_handle.read();
unread_handle.commit();
loop {
if b > 127 {
$non_ascii = b;
Expand Down Expand Up @@ -1139,8 +1142,9 @@ macro_rules! ascii_compatible_encoder_function {
}
Space::Available(destination_handle_again) => {
{
let (c_again, _unread_handle_again) =
let (c_again, unread_handle_again) =
source_handle_again.read_enum();
unread_handle_again.commit();
c = c_again;
destination_handle = destination_handle_again;
continue 'innermost;
Expand Down
3 changes: 2 additions & 1 deletion src/single_byte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ impl SingleByteDecoder {
Space::Available(
destination_handle_again,
) => {
let (b_again, _unread_handle_again) =
let (b_again, unread_handle_again) =
source_handle_again.read();
unread_handle_again.commit();
b = b_again;
destination_handle =
destination_handle_again;
Expand Down
4 changes: 3 additions & 1 deletion src/x_user_defined.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ impl UserDefinedDecoder {
{},
{},
{
unread_handle.commit();

if b < 0x80 {
// ASCII run not optimized, because binary data expected
destination_handle.write_ascii(b);
Expand All @@ -63,7 +65,7 @@ impl UserDefinedDecoder {
source,
b,
destination_handle,
_unread_handle,
unread_handle,
check_space_bmp,
decode_to_utf8_raw,
u8,
Expand Down

0 comments on commit 5d9e4e4

Please sign in to comment.