From 9e36fd926c80dc096d580b762658f07cb976da86 Mon Sep 17 00:00:00 2001 From: Lukas Markeffsky <@> Date: Wed, 26 Oct 2022 11:58:33 +0200 Subject: [PATCH 01/18] stabilize `int_log` --- library/core/benches/lib.rs | 1 - library/core/src/num/int_macros.rs | 29 +++++++++++++----------- library/core/src/num/nonzero.rs | 8 +++---- library/core/src/num/uint_macros.rs | 29 +++++++++++++----------- library/core/tests/lib.rs | 1 - src/tools/miri/src/lib.rs | 1 - src/tools/miri/tests/pass/integer-ops.rs | 1 - 7 files changed, 36 insertions(+), 34 deletions(-) diff --git a/library/core/benches/lib.rs b/library/core/benches/lib.rs index 1e462e3fc3f8c..5beb9a7759008 100644 --- a/library/core/benches/lib.rs +++ b/library/core/benches/lib.rs @@ -1,7 +1,6 @@ // wasm32 does not support benches (no time). #![cfg(not(target_arch = "wasm32"))] #![feature(flt2dec)] -#![feature(int_log)] #![feature(test)] #![feature(trusted_random_access)] #![feature(iter_array_chunks)] diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 81f050cb283d4..404ddff4f9dab 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2271,15 +2271,16 @@ macro_rules! int_impl { /// # Panics /// /// This function will panic if `self` is less than or equal to zero, - /// or if `base` is less then 2. + /// or if `base` is less than 2. /// /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(5", stringify!($SelfT), ".ilog(5), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -2298,10 +2299,11 @@ macro_rules! int_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".ilog2(), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -2319,10 +2321,11 @@ macro_rules! int_impl { /// # Example /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(10", stringify!($SelfT), ".ilog10(), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -2343,10 +2346,10 @@ macro_rules! int_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(5", stringify!($SelfT), ".checked_ilog(5), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -2379,10 +2382,10 @@ macro_rules! int_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".checked_ilog2(), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -2403,10 +2406,10 @@ macro_rules! int_impl { /// # Example /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(10", stringify!($SelfT), ".checked_ilog10(), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] diff --git a/library/core/src/num/nonzero.rs b/library/core/src/num/nonzero.rs index da402d66502a6..03e78329deb41 100644 --- a/library/core/src/num/nonzero.rs +++ b/library/core/src/num/nonzero.rs @@ -457,14 +457,14 @@ macro_rules! nonzero_unsigned_operations { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("# use std::num::", stringify!($Ty), ";")] /// #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(7).unwrap().ilog2(), 2);")] #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(8).unwrap().ilog2(), 3);")] #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(9).unwrap().ilog2(), 3);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -482,14 +482,14 @@ macro_rules! nonzero_unsigned_operations { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("# use std::num::", stringify!($Ty), ";")] /// #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(99).unwrap().ilog10(), 1);")] #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(100).unwrap().ilog10(), 2);")] #[doc = concat!("assert_eq!(", stringify!($Ty), "::new(101).unwrap().ilog10(), 2);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 93f65c5c7aaf3..0563f28278d36 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -692,15 +692,16 @@ macro_rules! uint_impl { /// /// # Panics /// - /// This function will panic if `self` is zero, or if `base` is less then 2. + /// This function will panic if `self` is zero, or if `base` is less than 2. /// /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(5", stringify!($SelfT), ".ilog(5), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -719,10 +720,11 @@ macro_rules! uint_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".ilog2(), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -740,10 +742,11 @@ macro_rules! uint_impl { /// # Example /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(10", stringify!($SelfT), ".ilog10(), 1);")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_const_fn_unstable(const_option)] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -764,10 +767,10 @@ macro_rules! uint_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(5", stringify!($SelfT), ".checked_ilog(5), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -800,10 +803,10 @@ macro_rules! uint_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(2", stringify!($SelfT), ".checked_ilog2(), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] @@ -822,10 +825,10 @@ macro_rules! uint_impl { /// # Examples /// /// ``` - /// #![feature(int_log)] #[doc = concat!("assert_eq!(10", stringify!($SelfT), ".checked_ilog10(), Some(1));")] /// ``` - #[unstable(feature = "int_log", issue = "70887")] + #[stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] + #[rustc_const_stable(feature = "int_log", since = "CURRENT_RUSTC_VERSION")] #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs index b1f492381b136..cbc71d0ed1ae1 100644 --- a/library/core/tests/lib.rs +++ b/library/core/tests/lib.rs @@ -62,7 +62,6 @@ #![feature(try_trait_v2)] #![feature(slice_internals)] #![feature(slice_partition_dedup)] -#![feature(int_log)] #![feature(iter_advance_by)] #![feature(iter_array_chunks)] #![feature(iter_collect_into)] diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 8d754bad1b3cd..e9c5c67e7f58b 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -3,7 +3,6 @@ #![feature(never_type)] #![feature(try_blocks)] #![feature(io_error_more)] -#![feature(int_log)] #![feature(variant_count)] #![feature(yeet_expr)] #![feature(is_some_and)] diff --git a/src/tools/miri/tests/pass/integer-ops.rs b/src/tools/miri/tests/pass/integer-ops.rs index 724be9efc9f82..0ec1f8e9c6935 100644 --- a/src/tools/miri/tests/pass/integer-ops.rs +++ b/src/tools/miri/tests/pass/integer-ops.rs @@ -1,5 +1,4 @@ //@compile-flags: -Coverflow-checks=off -#![feature(int_log)] #![allow(arithmetic_overflow)] pub fn main() { From 8498e3a9bbee8048a9cf81790a627400d3d87569 Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Fri, 28 Oct 2022 19:48:38 +0400 Subject: [PATCH 02/18] Add examples for `pointer::mask` --- library/core/src/ptr/const_ptr.rs | 25 +++++++++++++++++++++++++ library/core/src/ptr/mut_ptr.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index ed16c5f051f7b..298a26ef58804 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -568,6 +568,31 @@ impl *const T { /// /// For non-`Sized` pointees this operation changes only the data pointer, /// leaving the metadata untouched. + /// + /// ## Examples + /// + /// ``` + /// #![feature(ptr_mask, strict_provenance)] + /// let v = 17_u32; + /// let ptr: *const u32 = &v; + /// + /// // `u32` is 4 bytes aligned, + /// // which means that lower 2 bits are always 0. + /// let tag_mask = 0b11; + /// let ptr_mask = !tag_mask; + /// + /// // We can store something in these lower bits + /// let tagged_ptr = ptr.map_addr(|a| a | 0b10); + /// + /// // Get the "tag" back + /// let tag = tagged_ptr.addr() & tag_mask; + /// assert_eq!(tag, 0b10); + /// + /// // Note that `tagged_ptr` is unaligned, it's UB to read from it. + /// // To get original pointer `mask` can be used: + /// let masked_ptr = tagged_ptr.mask(ptr_mask); + /// assert_eq!(unsafe { *masked_ptr }, 17); + /// ``` #[unstable(feature = "ptr_mask", issue = "98290")] #[must_use = "returns a new pointer rather than modifying its argument"] #[inline(always)] diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 6764002bcd434..f71696e9ca0fa 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -588,6 +588,34 @@ impl *mut T { /// /// For non-`Sized` pointees this operation changes only the data pointer, /// leaving the metadata untouched. + /// + /// ## Examples + /// + /// ``` + /// #![feature(ptr_mask, strict_provenance)] + /// let mut v = 17_u32; + /// let ptr: *mut u32 = &mut v; + /// + /// // `u32` is 4 bytes aligned, + /// // which means that lower 2 bits are always 0. + /// let tag_mask = 0b11; + /// let ptr_mask = !tag_mask; + /// + /// // We can store something in these lower bits + /// let tagged_ptr = ptr.map_addr(|a| a | 0b10); + /// + /// // Get the "tag" back + /// let tag = tagged_ptr.addr() & tag_mask; + /// assert_eq!(tag, 0b10); + /// + /// // Note that `tagged_ptr` is unaligned, it's UB to read from/write to it. + /// // To get original pointer `mask` can be used: + /// let masked_ptr = tagged_ptr.mask(ptr_mask); + /// assert_eq!(unsafe { *masked_ptr }, 17); + /// + /// unsafe { *masked_ptr = 0 }; + /// assert_eq!(v, 0); + /// ``` #[unstable(feature = "ptr_mask", issue = "98290")] #[must_use = "returns a new pointer rather than modifying its argument"] #[inline(always)] From f32e6781b2932aed55342ad8a4a7f1023acb30b4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 11:31:07 +1100 Subject: [PATCH 03/18] Rename some variables. These have been bugging me for a while. - `literal_text`: `src` is also used and is shorter and better. - `first_char`: used even when "first" doesn't make sense; `c` is shorter and better. - `curr`: `c` is shorter and better. - `unescaped_char`: `result` is also used and is shorter and better. - `second_char`: these have a single use and can be elided. --- compiler/rustc_lexer/src/unescape.rs | 70 +++++++++++++--------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8f64b5f5158e4..a6752c82bd3c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -78,54 +78,52 @@ impl EscapeError { /// Takes a contents of a literal (without quotes) and produces a /// sequence of escaped characters or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { match mode { Mode::Char | Mode::Byte => { - let mut chars = literal_text.chars(); + let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode); // The Chars iterator moved forward. - callback(0..(literal_text.len() - chars.as_str().len()), result); + callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => { - unescape_raw_str_or_raw_byte_str(literal_text, mode, callback) - } + Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), } } /// Takes a contents of a byte, byte string or raw byte string (without quotes) /// and produces a sequence of bytes or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode.is_bytes()); - unescape_literal(literal_text, mode, &mut |range, result| { + unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) } /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error -pub fn unescape_char(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_char(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. -pub fn unescape_byte(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_byte(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Byte) .map(byte_from_char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// What kind of literal do we parse. @@ -157,10 +155,7 @@ impl Mode { fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Previous character was '\\', unescape what follows. - - let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; - - let res = match second_char { + let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', 'n' => '\n', 'r' => '\r', @@ -249,23 +244,23 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(first_char: char, mode: Mode) -> Result { - if mode.is_bytes() && !first_char.is_ascii() { +fn ascii_check(c: char, mode: Mode) -> Result { + if mode.is_bytes() && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { - Ok(first_char) + Ok(c) } } fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { debug_assert!(mode == Mode::Char || mode == Mode::Byte); - let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match first_char { + let c = chars.next().ok_or(EscapeError::ZeroChars)?; + let res = match c { '\\' => scan_escape(chars, mode), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -282,13 +277,12 @@ where debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let initial_len = src.len(); let mut chars = src.chars(); - while let Some(first_char) = chars.next() { - let start = initial_len - chars.as_str().len() - first_char.len_utf8(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let unescaped_char = match first_char { + let result = match c { '\\' => { - let second_char = chars.clone().next(); - match second_char { + match chars.clone().next() { Some('\n') => { // Rust language specification requires us to skip whitespaces // if unescaped '\' character is followed by '\n'. @@ -304,10 +298,10 @@ where '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }; let end = initial_len - chars.as_str().len(); - callback(start..end, unescaped_char); + callback(start..end, result); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -341,18 +335,18 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(literal_text: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = literal_text.len(); + let initial_len = src.len(); - let mut chars = literal_text.chars(); - while let Some(curr) = chars.next() { - let start = initial_len - chars.as_str().len() - curr.len_utf8(); + let mut chars = src.chars(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let result = match curr { + let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), From 84ca2c3bab370ee58ebd23050e9286e1d9e664b9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 12:15:55 +1100 Subject: [PATCH 04/18] Clarify range calculations. There is some subtlety here. --- compiler/rustc_lexer/src/unescape.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index a6752c82bd3c5..dc2fd359e278e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -275,11 +275,13 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); - let initial_len = src.len(); let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here is complicated because + // `skip_ascii_whitespace` makes us to skip over chars without counting + // them in the range computation. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\\' => { match chars.clone().next() { @@ -300,7 +302,7 @@ where '\r' => Err(EscapeError::BareCarriageReturn), _ => ascii_check(c, mode), }; - let end = initial_len - chars.as_str().len(); + let end = src.len() - chars.as_str().len(); callback(start..end, result); } @@ -340,19 +342,19 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = src.len(); - let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here matches the one in + // `unescape_str_or_byte_str` for consistency, even though this function + // doesn't have to worry about skipping any chars. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; - let end = initial_len - chars.as_str().len(); - + let end = src.len() - chars.as_str().len(); callback(start..end, result); } } From 34b32b0dac9da3fad7861bdc2bad89d771172bb3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 13:35:49 +1100 Subject: [PATCH 05/18] Use `Mode` less. It's passed to numerous places where we just need an `is_byte` bool. Passing the bool avoids the need for some assertions. Also rename `is_bytes()` as `is_byte()`, to better match `Mode::Byte`, `Mode::ByteStr`, and `Mode::RawByteStr`. --- compiler/rustc_lexer/src/unescape.rs | 46 +++++++++---------- .../src/lexer/unescape_error_reporting.rs | 14 +++--- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index dc2fd359e278e..f0042a397c2c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -85,14 +85,16 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode); + let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), + Mode::RawStr | Mode::RawByteStr => { + unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) + } } } @@ -103,7 +105,7 @@ pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode.is_bytes()); + debug_assert!(mode.is_byte()); unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) @@ -113,15 +115,14 @@ where /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (src.len() - chars.as_str().len(), err)) + unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. pub fn unescape_byte(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Byte) + unescape_char_or_byte(&mut chars, true) .map(byte_from_char) .map_err(|err| (src.len() - chars.as_str().len(), err)) } @@ -145,7 +146,7 @@ impl Mode { } } - pub fn is_bytes(self) -> bool { + pub fn is_byte(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, Mode::Char | Mode::Str | Mode::RawStr => false, @@ -153,7 +154,7 @@ impl Mode { } } -fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { +fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { // Previous character was '\\', unescape what follows. let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', @@ -176,7 +177,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { let value = hi * 16 + lo; // For a non-byte literal verify that it is within ASCII range. - if !mode.is_bytes() && !is_ascii(value) { + if !is_byte && !is_ascii(value) { return Err(EscapeError::OutOfRangeHexEscape); } let value = value as u8; @@ -212,7 +213,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Incorrect syntax has higher priority for error reporting // than unallowed value for a literal. - if mode.is_bytes() { + if is_byte { return Err(EscapeError::UnicodeEscapeInByte); } @@ -244,8 +245,8 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(c: char, mode: Mode) -> Result { - if mode.is_bytes() && !c.is_ascii() { +fn ascii_check(c: char, is_byte: bool) -> Result { + if is_byte && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { @@ -253,14 +254,13 @@ fn ascii_check(c: char, mode: Mode) -> Result { } } -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - debug_assert!(mode == Mode::Char || mode == Mode::Byte); +fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result { let c = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match c { - '\\' => scan_escape(chars, mode), + '\\' => scan_escape(chars, is_byte), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -270,11 +270,10 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result(src: &str, mode: Mode, callback: &mut F) +fn unescape_str_or_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let mut chars = src.chars(); // The `start` and `end` computation here is complicated because @@ -293,14 +292,14 @@ where skip_ascii_whitespace(&mut chars, start, callback); continue; } - _ => scan_escape(&mut chars, mode), + _ => scan_escape(&mut chars, is_byte), } } '\n' => Ok('\n'), '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); @@ -337,11 +336,10 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); let mut chars = src.chars(); // The `start` and `end` computation here matches the one in @@ -351,7 +349,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), + c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; let end = src.len() - chars.as_str().len(); diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index f075de7142676..055ee98a00aa3 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -108,7 +108,7 @@ pub(crate) fn emit_unescape_error( } if !has_help { - let (prefix, msg) = if mode.is_bytes() { + let (prefix, msg) = if mode.is_byte() { ("b", "if you meant to write a byte string literal, use double quotes") } else { ("", "if you meant to write a `str` literal, use double quotes") @@ -142,7 +142,7 @@ pub(crate) fn emit_unescape_error( EscapeError::EscapeOnlyChar => { let (c, char_span) = last_char(); - let msg = if mode.is_bytes() { + let msg = if mode.is_byte() { "byte constant must be escaped" } else { "character constant must be escaped" @@ -182,11 +182,11 @@ pub(crate) fn emit_unescape_error( let (c, span) = last_char(); let label = - if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_bytes() { + if c == '{' || c == '}' && !mode.is_byte() { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -196,7 +196,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_bytes() { + if !mode.is_byte() { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", @@ -231,7 +231,7 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { @@ -271,7 +271,7 @@ pub(crate) fn emit_unescape_error( err.emit(); } EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) From 7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 15:17:37 +1100 Subject: [PATCH 06/18] Make non-ASCII errors more consistent. There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference. --- compiler/rustc_lexer/src/unescape.rs | 7 ++-- compiler/rustc_lexer/src/unescape/tests.rs | 7 ++-- .../src/lexer/unescape_error_reporting.rs | 32 ++++++++----------- src/test/ui/attributes/key-value-non-ascii.rs | 2 +- .../ui/attributes/key-value-non-ascii.stderr | 4 +-- src/test/ui/parser/byte-literals.rs | 2 +- src/test/ui/parser/byte-literals.stderr | 4 +-- src/test/ui/parser/byte-string-literals.rs | 4 +-- .../ui/parser/byte-string-literals.stderr | 6 ++-- .../ui/parser/raw/raw-byte-string-literals.rs | 2 +- .../raw/raw-byte-string-literals.stderr | 2 +- .../ui/parser/unicode-control-codepoints.rs | 16 +++++----- .../parser/unicode-control-codepoints.stderr | 24 +++++++------- src/test/ui/suggestions/multibyte-escapes.rs | 12 +++---- .../ui/suggestions/multibyte-escapes.stderr | 12 +++---- 15 files changed, 62 insertions(+), 74 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index f0042a397c2c5..9c9cce7cbd48e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -52,10 +52,8 @@ pub enum EscapeError { /// Unicode escape code in byte literal. UnicodeEscapeInByte, - /// Non-ascii character in byte literal. + /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, - /// Non-ascii character in byte string literal. - NonAsciiCharInByteString, /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. @@ -349,8 +347,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), - c => Ok(c), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index fa61554afde6c..008edef5a6385 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -289,9 +289,6 @@ fn test_unescape_raw_byte_str() { } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]); - check( - "🦀a", - &[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))], - ); + check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 055ee98a00aa3..6373f5b4fd6ff 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_byte()); let (c, span) = last_char(); - let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); + let desc = match mode { + Mode::Byte => "byte literal", + Mode::ByteStr => "byte string literal", + Mode::RawByteStr => "raw byte string literal", + _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), + }; + let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) } else { String::new() }; - err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); - if (c as u32) <= 0xFF { + err.span_label(span, &format!("must be ASCII{}", postfix)); + // Note: the \\xHH suggestions are not given for raw byte string + // literals, because they are araw and so cannot use any escapes. + if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, &format!( @@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error( format!("\\x{:X}", c as u32), Applicability::MaybeIncorrect, ); - } else if matches!(mode, Mode::Byte) { + } else if mode == Mode::Byte { err.span_label(span, "this multibyte character does not fit into a single byte"); - } else if matches!(mode, Mode::ByteStr) { + } else if mode != Mode::RawByteStr { let mut utf8 = String::new(); utf8.push(c); err.span_suggestion( @@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error( } err.emit(); } - EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_byte()); - let (c, span) = last_char(); - let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { - format!(" but is {:?}", c) - } else { - String::new() - }; - handler - .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, &format!("must be ASCII{}", postfix)) - .emit(); - } EscapeError::OutOfRangeHexEscape => { handler .struct_span_err(span, "out of range hex escape") diff --git a/src/test/ui/attributes/key-value-non-ascii.rs b/src/test/ui/attributes/key-value-non-ascii.rs index 12942eabdf7b5..e14e2fc05ad39 100644 --- a/src/test/ui/attributes/key-value-non-ascii.rs +++ b/src/test/ui/attributes/key-value-non-ascii.rs @@ -1,4 +1,4 @@ #![feature(rustc_attrs)] -#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant +#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte string literal fn main() {} diff --git a/src/test/ui/attributes/key-value-non-ascii.stderr b/src/test/ui/attributes/key-value-non-ascii.stderr index 422107867f7f9..23d482de6a868 100644 --- a/src/test/ui/attributes/key-value-non-ascii.stderr +++ b/src/test/ui/attributes/key-value-non-ascii.stderr @@ -1,8 +1,8 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/key-value-non-ascii.rs:3:19 | LL | #[rustc_dummy = b"ffi.rs"] - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the UTF-8 encoding of 'ffi', use \xHH escapes | diff --git a/src/test/ui/parser/byte-literals.rs b/src/test/ui/parser/byte-literals.rs index 05a510b24a7ab..896dc1a1a5fba 100644 --- a/src/test/ui/parser/byte-literals.rs +++ b/src/test/ui/parser/byte-literals.rs @@ -7,6 +7,6 @@ pub fn main() { b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z` b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped - b'é'; //~ ERROR non-ASCII character in byte constant + b'é'; //~ ERROR non-ASCII character in byte literal b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/src/test/ui/parser/byte-literals.stderr b/src/test/ui/parser/byte-literals.stderr index c3d0006163005..efa55ae05bd37 100644 --- a/src/test/ui/parser/byte-literals.stderr +++ b/src/test/ui/parser/byte-literals.stderr @@ -32,11 +32,11 @@ error: byte constant must be escaped: `'` LL | b'''; | ^ help: escape the character: `\'` -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/byte-literals.rs:10:7 | LL | b'é'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | diff --git a/src/test/ui/parser/byte-string-literals.rs b/src/test/ui/parser/byte-string-literals.rs index b1f11024a7bb6..30a4f50c4e40b 100644 --- a/src/test/ui/parser/byte-string-literals.rs +++ b/src/test/ui/parser/byte-string-literals.rs @@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape pub fn main() { b"\f"; //~ ERROR unknown byte escape b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` - b"é"; //~ ERROR non-ASCII character in byte constant - br##"é"##; //~ ERROR raw byte string must be ASCII + b"é"; //~ ERROR non-ASCII character in byte string literal + br##"é"##; //~ ERROR non-ASCII character in raw byte string literal b"a //~ ERROR unterminated double quote byte string } diff --git a/src/test/ui/parser/byte-string-literals.stderr b/src/test/ui/parser/byte-string-literals.stderr index 3b8b3692e053f..5b96cc3d18abc 100644 --- a/src/test/ui/parser/byte-string-literals.stderr +++ b/src/test/ui/parser/byte-string-literals.stderr @@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z` LL | b"\x0Z"; | ^ invalid character in numeric character escape -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/byte-string-literals.rs:6:7 | LL | b"é"; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | LL | b"\xE9"; | ~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/byte-string-literals.rs:7:10 | LL | br##"é"##; diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.rs b/src/test/ui/parser/raw/raw-byte-string-literals.rs index 163c8ac66b022..1b859fee596ad 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.rs +++ b/src/test/ui/parser/raw/raw-byte-string-literals.rs @@ -2,6 +2,6 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string - br"é"; //~ ERROR raw byte string must be ASCII + br"é"; //~ ERROR non-ASCII character in raw byte string literal br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.stderr b/src/test/ui/parser/raw/raw-byte-string-literals.stderr index cfc877104bd9f..a2f27d1ed70ae 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-literals.stderr @@ -4,7 +4,7 @@ error: bare CR not allowed in raw string LL | br"a "; | ^ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/raw-byte-string-literals.rs:5:8 | LL | br"é"; diff --git a/src/test/ui/parser/unicode-control-codepoints.rs b/src/test/ui/parser/unicode-control-codepoints.rs index 5af0b585a1275..df099bb62ad1e 100644 --- a/src/test/ui/parser/unicode-control-codepoints.rs +++ b/src/test/ui/parser/unicode-control-codepoints.rs @@ -14,15 +14,15 @@ fn main() { println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); //~^ ERROR unicode codepoint changing visible direction of text present in literal println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "); - //~^ ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant + //~^ ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); - //~^ ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII + //~^ ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal } diff --git a/src/test/ui/parser/unicode-control-codepoints.stderr b/src/test/ui/parser/unicode-control-codepoints.stderr index 44548c72ff5d0..fc071a9419142 100644 --- a/src/test/ui/parser/unicode-control-codepoints.stderr +++ b/src/test/ui/parser/unicode-control-codepoints.stderr @@ -14,69 +14,69 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | = help: unicode escape sequences cannot be used as a byte or in a byte string -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:26 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{202e}' + | ^ must be ASCII but is '\u{202e}' | help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes | LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:30 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:41 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2069}' + | ^ must be ASCII but is '\u{2069}' | help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:43 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); | ~~~~~~~~~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:29 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{202e}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:33 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2066}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:44 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2069}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:46 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); diff --git a/src/test/ui/suggestions/multibyte-escapes.rs b/src/test/ui/suggestions/multibyte-escapes.rs index fd5d46a4e923e..c4105186244db 100644 --- a/src/test/ui/suggestions/multibyte-escapes.rs +++ b/src/test/ui/suggestions/multibyte-escapes.rs @@ -2,17 +2,17 @@ fn main() { b'µ'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b'字'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| NOTE: this multibyte character does not fit into a single byte - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b"字"; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte string literal //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII } diff --git a/src/test/ui/suggestions/multibyte-escapes.stderr b/src/test/ui/suggestions/multibyte-escapes.stderr index 6e26bc1f01cef..1e7c43e6538f6 100644 --- a/src/test/ui/suggestions/multibyte-escapes.stderr +++ b/src/test/ui/suggestions/multibyte-escapes.stderr @@ -1,28 +1,28 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:4:7 | LL | b'µ'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'µ', use a \xHH escape | LL | b'\xB5'; | ~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:9:7 | LL | b'字'; | ^^ | | - | byte constant must be ASCII + | must be ASCII | this multibyte character does not fit into a single byte -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/multibyte-escapes.rs:14:7 | LL | b"字"; - | ^^ byte constant must be ASCII + | ^^ must be ASCII | help: if you meant to use the UTF-8 encoding of '字', use \xHH escapes | From a21c0458979d786d821c2d75a1b109fe38914da0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 16:26:27 +1100 Subject: [PATCH 07/18] Improve comments. Remove a low-value comment, remove a duplicate comment, and correct a third comment. --- compiler/rustc_lexer/src/unescape.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 9c9cce7cbd48e..db7bf02e71adc 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -84,12 +84,9 @@ where Mode::Char | Mode::Byte => { let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), - // NOTE: Raw strings do not perform any explicit character escaping, here we - // only translate CRLF to LF and produce errors on bare CR. Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } @@ -333,7 +330,7 @@ where /// Takes a contents of a string literal (without quotes) and produces a /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. +/// only produce errors on bare CR. fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), From d963686f5a87b9eaa2ac2bdc29ddb796e0e83f1f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 09:19:34 +1100 Subject: [PATCH 08/18] Refactor `cook_lexer_literal`. It deals with eight cases: ints, floats, and the six quoted types (char/byte/strings). For ints and floats we have an early return, and the other six types fall through to the code at the end, which makes the function hard to read. This commit rearranges things to avoid the early returns. --- compiler/rustc_parse/src/lexer/mod.rs | 78 +++++++++++++-------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 462bce16ad717..61b5be4240414 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -363,55 +363,55 @@ impl<'a> StringReader<'a> { fn cook_lexer_literal( &self, start: BytePos, - suffix_start: BytePos, + end: BytePos, kind: rustc_lexer::LiteralKind, ) -> (token::LitKind, Symbol) { - // prefix means `"` or `br"` or `r###"`, ... - let (lit_kind, mode, prefix_len, postfix_len) = match kind { + match kind { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated character literal", error_code!(E0762), ) } - (token::Char, Mode::Char, 1, 1) // ' ' + self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated byte constant", error_code!(E0763), ) } - (token::Byte, Mode::Byte, 2, 1) // b' ' + self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated double quote string", error_code!(E0765), ) } - (token::Str, Mode::Str, 1, 1) // " " + self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated double quote byte string", error_code!(E0766), ) } - (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + let kind = token::StrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -419,56 +419,47 @@ impl<'a> StringReader<'a> { rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + let kind = token::ByteStrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } } rustc_lexer::LiteralKind::Int { base, empty_int } => { - return if empty_int { + if empty_int { self.sess .span_diagnostic .struct_span_err_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "no valid digits found for number", error_code!(E0768), ) .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, suffix_start); - (token::Integer, self.symbol_from_to(start, suffix_start)) - }; + self.validate_int_literal(base, start, end); + (token::Integer, self.symbol_from_to(start, end)) + } } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { if empty_exponent { self.err_span_(start, self.pos, "expected at least one digit in exponent"); } - match base { - Base::Hexadecimal => self.err_span_( - start, - suffix_start, - "hexadecimal float literal is not supported", - ), + Base::Hexadecimal => { + self.err_span_(start, end, "hexadecimal float literal is not supported") + } Base::Octal => { - self.err_span_(start, suffix_start, "octal float literal is not supported") + self.err_span_(start, end, "octal float literal is not supported") } Base::Binary => { - self.err_span_(start, suffix_start, "binary float literal is not supported") + self.err_span_(start, end, "binary float literal is not supported") } - _ => (), + _ => {} } - - let id = self.symbol_from_to(start, suffix_start); - return (token::Float, id); + (token::Float, self.symbol_from_to(start, end)) } - }; - let content_start = start + BytePos(prefix_len); - let content_end = suffix_start - BytePos(postfix_len); - let id = self.symbol_from_to(content_start, content_end); - self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len); - (lit_kind, id) + } } #[inline] @@ -659,20 +650,22 @@ impl<'a> StringReader<'a> { ) } - fn validate_literal_escape( + fn cook_quoted( &self, + kind: token::LitKind, mode: Mode, - content_start: BytePos, - content_end: BytePos, + start: BytePos, + end: BytePos, prefix_len: u32, postfix_len: u32, - ) { + ) -> (token::LitKind, Symbol) { + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); unescape::unescape_literal(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { - let span_with_quotes = self - .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len)); + let span_with_quotes = self.mk_sp(start, end); let (start, end) = (range.start as u32, range.end as u32); let lo = content_start + BytePos(start); let hi = lo + BytePos(end - start); @@ -688,6 +681,7 @@ impl<'a> StringReader<'a> { ); } }); + (kind, Symbol::intern(lit_content)) } fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { From a203482d2a20cba0c86298334ebd74438bd477ba Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 10:02:29 +1100 Subject: [PATCH 09/18] Inline and remove `validate_int_literal`. It has a single callsite, and is fairly small. The `Float` match arm already has base-specific checking inline, so this makes things more consistent. --- compiler/rustc_lexer/src/lib.rs | 10 ++++----- compiler/rustc_parse/src/lexer/mod.rs | 31 +++++++++++---------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 51515976e4ee9..0d29d7b1e3d9b 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -203,13 +203,13 @@ pub enum RawStrError { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Base { /// Literal starts with "0b". - Binary, + Binary = 2, /// Literal starts with "0o". - Octal, - /// Literal starts with "0x". - Hexadecimal, + Octal = 8, /// Literal doesn't contain a prefix. - Decimal, + Decimal = 10, + /// Literal starts with "0x". + Hexadecimal = 16, } /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 61b5be4240414..9de0c74f4b1d2 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -437,7 +437,19 @@ impl<'a> StringReader<'a> { .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, end); + if matches!(base, Base::Binary | Base::Octal) { + let base = base as u32; + let s = self.str_from_to(start + BytePos(2), end); + for (idx, c) in s.char_indices() { + if c != '_' && c.to_digit(base).is_none() { + self.err_span_( + start + BytePos::from_usize(2 + idx), + start + BytePos::from_usize(2 + idx + c.len_utf8()), + &format!("invalid digit for a base {} literal", base), + ); + } + } + } (token::Integer, self.symbol_from_to(start, end)) } } @@ -683,23 +695,6 @@ impl<'a> StringReader<'a> { }); (kind, Symbol::intern(lit_content)) } - - fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { - let base = match base { - Base::Binary => 2, - Base::Octal => 8, - _ => return, - }; - let s = self.str_from_to(content_start + BytePos(2), content_end); - for (idx, c) in s.char_indices() { - let idx = idx as u32; - if c != '_' && c.to_digit(base).is_none() { - let lo = content_start + BytePos(2 + idx); - let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); - self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); - } - } - } } pub fn nfc_normalize(string: &str) -> Symbol { From f8e2cef5faa7ff77e91ea2d0416006f0b7aa52bf Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:01:58 -0700 Subject: [PATCH 10/18] Move intra-doc link checks to a separate function. --- src/tools/linkchecker/main.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 7842611bd4ffa..4092722501d0e 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -365,6 +365,23 @@ impl Checker { } }); + self.check_intra_doc_links(file, &pretty_path, &source, report); + + // we don't need the source anymore, + // so drop to reduce memory-usage + match self.cache.get_mut(&pretty_path).unwrap() { + FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), + _ => unreachable!("must be html file"), + } + } + + fn check_intra_doc_links( + &mut self, + file: &Path, + pretty_path: &str, + source: &str, + report: &mut Report, + ) { // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links @@ -379,12 +396,6 @@ impl Checker { } } } - // we don't need the source anymore, - // so drop to reduce memory-usage - match self.cache.get_mut(&pretty_path).unwrap() { - FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), - _ => unreachable!("must be html file"), - } } /// Load a file from disk, or from the cache if available. From 57b229086e274c21e5544e6719271ae79a952194 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:02:39 -0700 Subject: [PATCH 11/18] Remove reference from the intra-doc link checker. --- src/tools/linkchecker/main.rs | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4092722501d0e..4170c32f1fe25 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -55,30 +55,6 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[ #[rustfmt::skip] const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[ - // This will never have links that are not in other pages. - // To avoid repeating the exceptions twice, an empty list means all broken links are allowed. - ("reference/print.html", &[]), - // All the reference 'links' are actually ENBF highlighted as code - ("reference/comments.html", &[ - "/ !", - "* !", - ]), - ("reference/identifiers.html", &[ - "a-z A-Z", - "a-z A-Z 0-9 _", - "a-z A-Z] [a-z A-Z 0-9 _", - ]), - ("reference/tokens.html", &[ - "0-1", - "0-7", - "0-9", - "0-9", - "0-9 a-f A-F", - ]), - ("reference/notation.html", &[ - "b B", - "a-z", - ]), // This is being used in the sense of 'inclusive range', not a markdown link ("core/ops/struct.RangeInclusive.html", &["begin, end"]), ("std/ops/struct.RangeInclusive.html", &["begin, end"]), @@ -382,6 +358,16 @@ impl Checker { source: &str, report: &mut Report, ) { + let relative = file.strip_prefix(&self.root).expect("should always be relative to root"); + // Don't check the reference. It has several legitimate things that + // look like []. The reference has its own broken link + // checker in its CI which handles this using pulldown_cmark. + // + // This checks both the end of the root (when checking just the + // reference directory) or the beginning (when checking all docs). + if self.root.ends_with("reference") || relative.starts_with("reference") { + return; + } // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links From a838952239493d9dafe2d5f2ca1204f326841ae9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 11:09:23 +1100 Subject: [PATCH 12/18] Remove `unescape_byte_literal`. It's easy to just use `unescape_literal` + `byte_from_char`. --- compiler/rustc_ast/src/util/literal.rs | 29 +++++++------------ compiler/rustc_lexer/src/unescape.rs | 16 ++-------- compiler/rustc_lexer/src/unescape/tests.rs | 12 ++++---- .../crates/syntax/src/validation.rs | 6 ++-- 4 files changed, 20 insertions(+), 43 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 536b385606c69..8f342175f7d37 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,12 +2,9 @@ use crate::ast::{self, Lit, LitKind}; use crate::token::{self, Token}; - -use rustc_lexer::unescape::{unescape_byte, unescape_char}; -use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode}; +use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; - use std::ascii; pub enum LitError { @@ -109,13 +106,11 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); @@ -127,13 +122,11 @@ impl LitKind { let bytes = if s.contains('\r') { let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index db7bf02e71adc..8d5eac29452e7 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -93,19 +93,6 @@ where } } -/// Takes a contents of a byte, byte string or raw byte string (without quotes) -/// and produces a sequence of bytes or errors. -/// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - debug_assert!(mode.is_byte()); - unescape_literal(src, mode, &mut |range, result| { - callback(range, result.map(byte_from_char)); - }) -} - /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { @@ -351,7 +338,8 @@ where } } -fn byte_from_char(c: char) -> u8 { +#[inline] +pub fn byte_from_char(c: char) -> u8 { let res = c as u32; debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr"); res as u8 diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 008edef5a6385..00c8401efdfe4 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -246,10 +246,10 @@ fn test_unescape_byte_good() { fn test_unescape_byte_str_good() { fn check(literal_text: &str, expected: &[u8]) { let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| { + unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| { if let Ok(b) = &mut buf { match c { - Ok(c) => b.push(c), + Ok(c) => b.push(byte_from_char(c)), Err(e) => buf = Err((range, e)), } } @@ -280,15 +280,13 @@ fn test_unescape_raw_str() { #[test] fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { + fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| { - unescaped.push((range, res)) - }); + unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); } diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index b9f2b5132353c..1eea2346451dd 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,9 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{ - self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode, -}; +use rustc_lexer::unescape::{self, unescape_byte, unescape_char, unescape_literal, Mode}; use crate::{ algo, @@ -143,7 +141,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { ast::LiteralKind::ByteString(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 2, '"') { - unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { + unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| { if let Err(err) = char { push_err(2, (range.start, err)); } From 43d21b535f003c81a55331c31e16313a90050b18 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 13:52:44 +1100 Subject: [PATCH 13/18] Rename some `result` variables as `res`, for consistency. --- compiler/rustc_lexer/src/unescape.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8d5eac29452e7..674bbff0878ce 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -83,8 +83,8 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - callback(0..(src.len() - chars.as_str().len()), result); + let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte); + callback(0..(src.len() - chars.as_str().len()), res); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), Mode::RawStr | Mode::RawByteStr => { @@ -263,7 +263,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\\' => { match chars.clone().next() { Some('\n') => { @@ -284,7 +284,7 @@ where _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -329,12 +329,12 @@ where // doesn't have to worry about skipping any chars. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } } From 8af12ebb9d9a00877f1cf618014aa4d6ae088d67 Mon Sep 17 00:00:00 2001 From: Florian Bartels Date: Mon, 7 Nov 2022 09:55:19 +0100 Subject: [PATCH 14/18] Add test for impl of `available_parallelism()` --- src/test/ui/thread-available_parallelism.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/test/ui/thread-available_parallelism.rs diff --git a/src/test/ui/thread-available_parallelism.rs b/src/test/ui/thread-available_parallelism.rs new file mode 100644 index 0000000000000..0633bb4afe18c --- /dev/null +++ b/src/test/ui/thread-available_parallelism.rs @@ -0,0 +1,12 @@ +// run-pass +// this is an ignore list on purpose so that new platforms don't miss it: +//ignore-vxworks +//ignore-redox +//ignore-l4re + +// check that std::thread::available_parallelism returns a valid value + +fn main() { + std::thread::available_parallelism().expect( + "should return a value on all platforms that are not ignored"); +} From f67ee43fe3675eed1a0d885a1ca196c9f73a8737 Mon Sep 17 00:00:00 2001 From: yancy Date: Mon, 7 Nov 2022 17:02:48 +0100 Subject: [PATCH 15/18] rustdoc: Add mutable to the description --- library/core/src/slice/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 4f1bb17344b29..c51e99b6627c2 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -3667,7 +3667,8 @@ impl [T] { unsafe { self.align_to() } } - /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix. + /// Split a mutable slice into a mutable prefix, a middle of aligned SIMD types, + /// and a mutable suffix. /// /// This is a safe wrapper around [`slice::align_to_mut`], so has the same weak /// postconditions as that method. You're only assured that From d6c97a32b4f5e38c0e85010df4438dc7205c44f4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 8 Nov 2022 15:59:19 +1100 Subject: [PATCH 16/18] Simplify `unescape_{char,byte}`. The `usize` isn't needed in the error case. --- compiler/rustc_lexer/src/unescape.rs | 14 +++++--------- compiler/rustc_lexer/src/unescape/tests.rs | 12 ++++-------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 674bbff0878ce..e405013dcabf8 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -94,19 +94,15 @@ where } /// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error -pub fn unescape_char(src: &str) -> Result { - let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err)) +/// unescaped char or an error. +pub fn unescape_char(src: &str) -> Result { + unescape_char_or_byte(&mut src.chars(), false) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result { - let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, true) - .map(byte_from_char) - .map_err(|err| (src.len() - chars.as_str().len(), err)) +pub fn unescape_byte(src: &str) -> Result { + unescape_char_or_byte(&mut src.chars(), true).map(byte_from_char) } /// What kind of literal do we parse. diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 00c8401efdfe4..c7ca8fd16ae47 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -3,8 +3,7 @@ use super::*; #[test] fn test_unescape_char_bad() { fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); + assert_eq!(unescape_char(literal_text), Err(expected_error)); } check("", EscapeError::ZeroChars); @@ -68,8 +67,7 @@ fn test_unescape_char_bad() { #[test] fn test_unescape_char_good() { fn check(literal_text: &str, expected_char: char) { - let actual_result = unescape_char(literal_text); - assert_eq!(actual_result, Ok(expected_char)); + assert_eq!(unescape_char(literal_text), Ok(expected_char)); } check("a", 'a'); @@ -149,8 +147,7 @@ fn test_unescape_str_good() { #[test] fn test_unescape_byte_bad() { fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); + assert_eq!(unescape_byte(literal_text), Err(expected_error)); } check("", EscapeError::ZeroChars); @@ -219,8 +216,7 @@ fn test_unescape_byte_bad() { #[test] fn test_unescape_byte_good() { fn check(literal_text: &str, expected_byte: u8) { - let actual_result = unescape_byte(literal_text); - assert_eq!(actual_result, Ok(expected_byte)); + assert_eq!(unescape_byte(literal_text), Ok(expected_byte)); } check("a", b'a'); From f6658479a83bae99c097031ea0e1e74e6e836c1e Mon Sep 17 00:00:00 2001 From: onestacked Date: Mon, 7 Nov 2022 21:47:46 +0100 Subject: [PATCH 17/18] const Compare Tuples --- library/core/src/tuple.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/library/core/src/tuple.rs b/library/core/src/tuple.rs index fc91fe468cc29..28275798f751e 100644 --- a/library/core/src/tuple.rs +++ b/library/core/src/tuple.rs @@ -22,7 +22,8 @@ macro_rules! tuple_impls { maybe_tuple_doc! { $($T)+ @ #[stable(feature = "rust1", since = "1.0.0")] - impl<$($T:PartialEq),+> PartialEq for ($($T,)+) + #[rustc_const_unstable(feature = "const_cmp", issue = "92391")] + impl<$($T: ~const PartialEq),+> const PartialEq for ($($T,)+) where last_type!($($T,)+): ?Sized { @@ -40,7 +41,7 @@ macro_rules! tuple_impls { maybe_tuple_doc! { $($T)+ @ #[stable(feature = "rust1", since = "1.0.0")] - impl<$($T:Eq),+> Eq for ($($T,)+) + impl<$($T: Eq),+> Eq for ($($T,)+) where last_type!($($T,)+): ?Sized {} @@ -49,7 +50,8 @@ macro_rules! tuple_impls { maybe_tuple_doc! { $($T)+ @ #[stable(feature = "rust1", since = "1.0.0")] - impl<$($T:PartialOrd + PartialEq),+> PartialOrd for ($($T,)+) + #[rustc_const_unstable(feature = "const_cmp", issue = "92391")] + impl<$($T: ~const PartialOrd + ~const PartialEq),+> const PartialOrd for ($($T,)+) where last_type!($($T,)+): ?Sized { @@ -79,7 +81,8 @@ macro_rules! tuple_impls { maybe_tuple_doc! { $($T)+ @ #[stable(feature = "rust1", since = "1.0.0")] - impl<$($T:Ord),+> Ord for ($($T,)+) + #[rustc_const_unstable(feature = "const_cmp", issue = "92391")] + impl<$($T: ~const Ord),+> const Ord for ($($T,)+) where last_type!($($T,)+): ?Sized { From b6c05eb7d3f59b43b2e9ada520167f87bb4084aa Mon Sep 17 00:00:00 2001 From: onestacked Date: Wed, 9 Nov 2022 11:35:28 +0100 Subject: [PATCH 18/18] Cleanup fn trait ref test --- src/test/ui/consts/fn_trait_refs.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/test/ui/consts/fn_trait_refs.rs b/src/test/ui/consts/fn_trait_refs.rs index bc8766c74c60f..b507492970ab3 100644 --- a/src/test/ui/consts/fn_trait_refs.rs +++ b/src/test/ui/consts/fn_trait_refs.rs @@ -1,4 +1,4 @@ -// build-pass +// check-pass #![feature(const_fn_trait_ref_impls)] #![feature(fn_traits)] @@ -60,21 +60,18 @@ const fn test(i: i32) -> i32 { i + 1 } -const fn main() { +fn main() { const fn one() -> i32 { 1 }; const fn two() -> i32 { 2 }; + const _: () = { + let test_one = test_fn(one); + assert!(test_one == (1, 1, 1)); - // FIXME(const_cmp_tuple) - let test_one = test_fn(one); - assert!(test_one.0 == 1); - assert!(test_one.1 == 1); - assert!(test_one.2 == 1); - - let test_two = test_fn_mut(two); - assert!(test_two.0 == 1); - assert!(test_two.1 == 1); + let test_two = test_fn_mut(two); + assert!(test_two == (2, 2)); + }; }