From 6d455296fde6ff0d5154d39513259a60eb73f302 Mon Sep 17 00:00:00 2001 From: yukang Date: Tue, 25 Oct 2022 20:47:19 +0800 Subject: [PATCH 01/20] Fix #103451, find_width_of_character_at_span return width with 1 when reaching end --- compiler/rustc_span/src/source_map.rs | 8 ++---- compiler/rustc_span/src/source_map/tests.rs | 11 +++---- src/test/ui/parser/issue-103451.rs | 5 ++++ src/test/ui/parser/issue-103451.stderr | 32 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 src/test/ui/parser/issue-103451.rs create mode 100644 src/test/ui/parser/issue-103451.stderr diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index f9566eeee9465..3baa2e03cbad7 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -855,7 +855,8 @@ impl SourceMap { /// Returns a new span representing the next character after the end-point of this span. /// Special cases: /// - if span is a dummy one, returns the same span - /// - if next_point reached the end of source, return span with lo = hi + /// - if next_point reached the end of source, return a span exceeding the end of source, + /// which means sm.span_to_snippet(next_point) will get `Err` /// - respect multi-byte characters pub fn next_point(&self, sp: Span) -> Span { if sp.is_dummy() { @@ -864,9 +865,6 @@ impl SourceMap { let start_of_next_point = sp.hi().0; let width = self.find_width_of_character_at_span(sp, true); - if width == 0 { - return Span::new(sp.hi(), sp.hi(), sp.ctxt(), None); - } // If the width is 1, then the next span should only contain the next char besides current ending. // However, in the case of a multibyte character, where the width != 1, the next span should // span multiple bytes to include the whole character. @@ -938,7 +936,7 @@ impl SourceMap { // Ensure indexes are also not malformed. if start_index > end_index || end_index > source_len - 1 { debug!("find_width_of_character_at_span: source indexes are malformed"); - return 0; + return 1; } let src = local_begin.sf.external_src.borrow(); diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 1fd81018fa05c..3cab59e8dbe6c 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -511,16 +511,17 @@ fn test_next_point() { assert_eq!(span.lo().0, 4); assert_eq!(span.hi().0, 5); - // A non-empty span at the last byte should advance to create an empty - // span pointing at the end of the file. + // Reaching to the end of file, return a span that will get error with `span_to_snippet` let span = Span::with_root_ctxt(BytePos(4), BytePos(5)); let span = sm.next_point(span); assert_eq!(span.lo().0, 5); - assert_eq!(span.hi().0, 5); + assert_eq!(span.hi().0, 6); + assert!(sm.span_to_snippet(span).is_err()); - // Empty span pointing just past the last byte. + // Reaching to the end of file, return a span that will get error with `span_to_snippet` let span = Span::with_root_ctxt(BytePos(5), BytePos(5)); let span = sm.next_point(span); assert_eq!(span.lo().0, 5); - assert_eq!(span.hi().0, 5); + assert_eq!(span.hi().0, 6); + assert!(sm.span_to_snippet(span).is_err()); } diff --git a/src/test/ui/parser/issue-103451.rs b/src/test/ui/parser/issue-103451.rs new file mode 100644 index 0000000000000..1fdb001488155 --- /dev/null +++ b/src/test/ui/parser/issue-103451.rs @@ -0,0 +1,5 @@ +// error-pattern: this file contains an unclosed delimiter +// error-pattern: expected value, found struct `R` +struct R { } +struct S { + x: [u8; R diff --git a/src/test/ui/parser/issue-103451.stderr b/src/test/ui/parser/issue-103451.stderr new file mode 100644 index 0000000000000..eb3c92fb43d9b --- /dev/null +++ b/src/test/ui/parser/issue-103451.stderr @@ -0,0 +1,32 @@ +error: this file contains an unclosed delimiter + --> $DIR/issue-103451.rs:5:15 + | +LL | struct S { + | - unclosed delimiter +LL | x: [u8; R + | - ^ + | | + | unclosed delimiter + +error: this file contains an unclosed delimiter + --> $DIR/issue-103451.rs:5:15 + | +LL | struct S { + | - unclosed delimiter +LL | x: [u8; R + | - ^ + | | + | unclosed delimiter + +error[E0423]: expected value, found struct `R` + --> $DIR/issue-103451.rs:5:13 + | +LL | struct R { } + | ------------ `R` defined here +LL | struct S { +LL | x: [u8; R + | ^ help: use struct literal syntax instead: `R {}` + +error: aborting due to 3 previous errors + +For more information about this error, try `rustc --explain E0423`. From f5e390e8631a759579674b0899087a51bb073dd3 Mon Sep 17 00:00:00 2001 From: Alex Macleod Date: Thu, 27 Oct 2022 18:20:56 +0000 Subject: [PATCH 02/20] Fix rustc_parse_format spans following escaped utf-8 multibyte chars --- compiler/rustc_parse_format/src/lib.rs | 47 +++++++++------ src/test/ui/fmt/unicode-escape-spans.rs | 19 +++++++ src/test/ui/fmt/unicode-escape-spans.stderr | 63 +++++++++++++++++++++ 3 files changed, 111 insertions(+), 18 deletions(-) create mode 100644 src/test/ui/fmt/unicode-escape-spans.rs create mode 100644 src/test/ui/fmt/unicode-escape-spans.stderr diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 1394993abade9..54bf4d1d6b738 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -819,19 +819,19 @@ fn find_skips_from_snippet( }; fn find_skips(snippet: &str, is_raw: bool) -> Vec { - let mut s = snippet.char_indices().peekable(); + let mut s = snippet.char_indices(); let mut skips = vec![]; while let Some((pos, c)) = s.next() { - match (c, s.peek()) { + match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' ('\\', Some((next_pos, '\n'))) if !is_raw => { skips.push(pos); - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); - while let Some((pos, c)) = s.peek() { + while let Some((pos, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(*pos); + skips.push(pos); let _ = s.next(); } else { break; @@ -839,7 +839,7 @@ fn find_skips_from_snippet( } } ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); } ('\\', Some((_, 'x'))) if !is_raw => { @@ -858,19 +858,30 @@ fn find_skips_from_snippet( } if let Some((next_pos, next_c)) = s.next() { if next_c == '{' { - skips.push(next_pos); - let mut i = 0; // consume up to 6 hexanumeric chars + closing `}` - while let (Some((next_pos, c)), true) = (s.next(), i < 7) { - if c.is_digit(16) { - skips.push(next_pos); - } else if c == '}' { - skips.push(next_pos); - break; - } else { - break; - } - i += 1; + // consume up to 6 hexanumeric chars + let digits_len = + s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); + + let len_utf8 = s + .as_str() + .get(..digits_len) + .and_then(|digits| u32::from_str_radix(digits, 16).ok()) + .and_then(char::from_u32) + .map_or(1, char::len_utf8); + + // Skip the digits, for chars that encode to more than 1 utf-8 byte + // exclude as many digits as it is greater than 1 byte + // + // So for a 3 byte character, exclude 2 digits + let required_skips = + digits_len.saturating_sub(len_utf8.saturating_sub(1)); + + // skip '{' and '}' also + for pos in (next_pos..).take(required_skips + 2) { + skips.push(pos) } + + s.nth(digits_len); } else if next_c.is_digit(16) { skips.push(next_pos); // We suggest adding `{` and `}` when appropriate, accept it here as if diff --git a/src/test/ui/fmt/unicode-escape-spans.rs b/src/test/ui/fmt/unicode-escape-spans.rs new file mode 100644 index 0000000000000..753d91ce58e66 --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.rs @@ -0,0 +1,19 @@ +fn main() { + // 1 byte in UTF-8 + format!("\u{000041}{a}"); //~ ERROR cannot find value + format!("\u{0041}{a}"); //~ ERROR cannot find value + format!("\u{41}{a}"); //~ ERROR cannot find value + format!("\u{0}{a}"); //~ ERROR cannot find value + + // 2 bytes + format!("\u{0df}{a}"); //~ ERROR cannot find value + format!("\u{df}{a}"); //~ ERROR cannot find value + + // 3 bytes + format!("\u{00211d}{a}"); //~ ERROR cannot find value + format!("\u{211d}{a}"); //~ ERROR cannot find value + + // 4 bytes + format!("\u{1f4a3}{a}"); //~ ERROR cannot find value + format!("\u{10ffff}{a}"); //~ ERROR cannot find value +} diff --git a/src/test/ui/fmt/unicode-escape-spans.stderr b/src/test/ui/fmt/unicode-escape-spans.stderr new file mode 100644 index 0000000000000..1d8473f01b822 --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.stderr @@ -0,0 +1,63 @@ +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:3:25 + | +LL | format!("\u{000041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:4:23 + | +LL | format!("\u{0041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:5:21 + | +LL | format!("\u{41}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:6:20 + | +LL | format!("\u{0}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:9:22 + | +LL | format!("\u{0df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:10:21 + | +LL | format!("\u{df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:13:25 + | +LL | format!("\u{00211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:14:23 + | +LL | format!("\u{211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:17:24 + | +LL | format!("\u{1f4a3}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:18:25 + | +LL | format!("\u{10ffff}{a}"); + | ^ not found in this scope + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0425`. From 8dbd817af13f1248e307502a331d6bb1a091b177 Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Sat, 29 Oct 2022 22:46:19 +0100 Subject: [PATCH 03/20] Upgrade cc for working is_flag_supported on cross-compiles --- library/unwind/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/unwind/Cargo.toml b/library/unwind/Cargo.toml index 69fce8d7795c1..dbd360065c844 100644 --- a/library/unwind/Cargo.toml +++ b/library/unwind/Cargo.toml @@ -20,7 +20,7 @@ compiler_builtins = "0.1.0" cfg-if = "0.1.8" [build-dependencies] -cc = "1.0.69" +cc = "1.0.74" [features] From a9d7cfc480e5298def35fc0112ebe6d706934941 Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Sat, 29 Oct 2022 23:21:12 +0100 Subject: [PATCH 04/20] Update cc in Cargo.lock --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dab693419a95d..dcf40977bdcb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,9 +489,9 @@ version = "0.1.0" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "581f5dba903aac52ea3feb5ec4810848460ee833876f1f9b0fdeab1f19091574" dependencies = [ "jobserver", ] From f32e6781b2932aed55342ad8a4a7f1023acb30b4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 11:31:07 +1100 Subject: [PATCH 05/20] Rename some variables. These have been bugging me for a while. - `literal_text`: `src` is also used and is shorter and better. - `first_char`: used even when "first" doesn't make sense; `c` is shorter and better. - `curr`: `c` is shorter and better. - `unescaped_char`: `result` is also used and is shorter and better. - `second_char`: these have a single use and can be elided. --- compiler/rustc_lexer/src/unescape.rs | 70 +++++++++++++--------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8f64b5f5158e4..a6752c82bd3c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -78,54 +78,52 @@ impl EscapeError { /// Takes a contents of a literal (without quotes) and produces a /// sequence of escaped characters or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { match mode { Mode::Char | Mode::Byte => { - let mut chars = literal_text.chars(); + let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode); // The Chars iterator moved forward. - callback(0..(literal_text.len() - chars.as_str().len()), result); + callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => { - unescape_raw_str_or_raw_byte_str(literal_text, mode, callback) - } + Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), } } /// Takes a contents of a byte, byte string or raw byte string (without quotes) /// and produces a sequence of bytes or errors. /// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(literal_text: &str, mode: Mode, callback: &mut F) +pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode.is_bytes()); - unescape_literal(literal_text, mode, &mut |range, result| { + unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) } /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error -pub fn unescape_char(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_char(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. -pub fn unescape_byte(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); +pub fn unescape_byte(src: &str) -> Result { + let mut chars = src.chars(); unescape_char_or_byte(&mut chars, Mode::Byte) .map(byte_from_char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) + .map_err(|err| (src.len() - chars.as_str().len(), err)) } /// What kind of literal do we parse. @@ -157,10 +155,7 @@ impl Mode { fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Previous character was '\\', unescape what follows. - - let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; - - let res = match second_char { + let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', 'n' => '\n', 'r' => '\r', @@ -249,23 +244,23 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(first_char: char, mode: Mode) -> Result { - if mode.is_bytes() && !first_char.is_ascii() { +fn ascii_check(c: char, mode: Mode) -> Result { + if mode.is_bytes() && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { - Ok(first_char) + Ok(c) } } fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { debug_assert!(mode == Mode::Char || mode == Mode::Byte); - let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match first_char { + let c = chars.next().ok_or(EscapeError::ZeroChars)?; + let res = match c { '\\' => scan_escape(chars, mode), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -282,13 +277,12 @@ where debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let initial_len = src.len(); let mut chars = src.chars(); - while let Some(first_char) = chars.next() { - let start = initial_len - chars.as_str().len() - first_char.len_utf8(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let unescaped_char = match first_char { + let result = match c { '\\' => { - let second_char = chars.clone().next(); - match second_char { + match chars.clone().next() { Some('\n') => { // Rust language specification requires us to skip whitespaces // if unescaped '\' character is followed by '\n'. @@ -304,10 +298,10 @@ where '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(first_char, mode), + _ => ascii_check(c, mode), }; let end = initial_len - chars.as_str().len(); - callback(start..end, unescaped_char); + callback(start..end, result); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -341,18 +335,18 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(literal_text: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = literal_text.len(); + let initial_len = src.len(); - let mut chars = literal_text.chars(); - while let Some(curr) = chars.next() { - let start = initial_len - chars.as_str().len() - curr.len_utf8(); + let mut chars = src.chars(); + while let Some(c) = chars.next() { + let start = initial_len - chars.as_str().len() - c.len_utf8(); - let result = match curr { + let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), From 84ca2c3bab370ee58ebd23050e9286e1d9e664b9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 12:15:55 +1100 Subject: [PATCH 06/20] Clarify range calculations. There is some subtlety here. --- compiler/rustc_lexer/src/unescape.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index a6752c82bd3c5..dc2fd359e278e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -275,11 +275,13 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); - let initial_len = src.len(); let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here is complicated because + // `skip_ascii_whitespace` makes us to skip over chars without counting + // them in the range computation. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\\' => { match chars.clone().next() { @@ -300,7 +302,7 @@ where '\r' => Err(EscapeError::BareCarriageReturn), _ => ascii_check(c, mode), }; - let end = initial_len - chars.as_str().len(); + let end = src.len() - chars.as_str().len(); callback(start..end, result); } @@ -340,19 +342,19 @@ where F: FnMut(Range, Result), { debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); - let initial_len = src.len(); - let mut chars = src.chars(); - while let Some(c) = chars.next() { - let start = initial_len - chars.as_str().len() - c.len_utf8(); + // The `start` and `end` computation here matches the one in + // `unescape_str_or_byte_str` for consistency, even though this function + // doesn't have to worry about skipping any chars. + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; - let end = initial_len - chars.as_str().len(); - + let end = src.len() - chars.as_str().len(); callback(start..end, result); } } From 34b32b0dac9da3fad7861bdc2bad89d771172bb3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 13:35:49 +1100 Subject: [PATCH 07/20] Use `Mode` less. It's passed to numerous places where we just need an `is_byte` bool. Passing the bool avoids the need for some assertions. Also rename `is_bytes()` as `is_byte()`, to better match `Mode::Byte`, `Mode::ByteStr`, and `Mode::RawByteStr`. --- compiler/rustc_lexer/src/unescape.rs | 46 +++++++++---------- .../src/lexer/unescape_error_reporting.rs | 14 +++--- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index dc2fd359e278e..f0042a397c2c5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -85,14 +85,16 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode); + let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } - Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode, callback), + Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. - Mode::RawStr | Mode::RawByteStr => unescape_raw_str_or_raw_byte_str(src, mode, callback), + Mode::RawStr | Mode::RawByteStr => { + unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) + } } } @@ -103,7 +105,7 @@ pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode.is_bytes()); + debug_assert!(mode.is_byte()); unescape_literal(src, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) @@ -113,15 +115,14 @@ where /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (src.len() - chars.as_str().len(), err)) + unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err)) } /// Takes a contents of a byte literal (without quotes), and returns an /// unescaped byte or an error. pub fn unescape_byte(src: &str) -> Result { let mut chars = src.chars(); - unescape_char_or_byte(&mut chars, Mode::Byte) + unescape_char_or_byte(&mut chars, true) .map(byte_from_char) .map_err(|err| (src.len() - chars.as_str().len(), err)) } @@ -145,7 +146,7 @@ impl Mode { } } - pub fn is_bytes(self) -> bool { + pub fn is_byte(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, Mode::Char | Mode::Str | Mode::RawStr => false, @@ -153,7 +154,7 @@ impl Mode { } } -fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { +fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { // Previous character was '\\', unescape what follows. let res = match chars.next().ok_or(EscapeError::LoneSlash)? { '"' => '"', @@ -176,7 +177,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { let value = hi * 16 + lo; // For a non-byte literal verify that it is within ASCII range. - if !mode.is_bytes() && !is_ascii(value) { + if !is_byte && !is_ascii(value) { return Err(EscapeError::OutOfRangeHexEscape); } let value = value as u8; @@ -212,7 +213,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { // Incorrect syntax has higher priority for error reporting // than unallowed value for a literal. - if mode.is_bytes() { + if is_byte { return Err(EscapeError::UnicodeEscapeInByte); } @@ -244,8 +245,8 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { } #[inline] -fn ascii_check(c: char, mode: Mode) -> Result { - if mode.is_bytes() && !c.is_ascii() { +fn ascii_check(c: char, is_byte: bool) -> Result { + if is_byte && !c.is_ascii() { // Byte literal can't be a non-ascii character. Err(EscapeError::NonAsciiCharInByte) } else { @@ -253,14 +254,13 @@ fn ascii_check(c: char, mode: Mode) -> Result { } } -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - debug_assert!(mode == Mode::Char || mode == Mode::Byte); +fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result { let c = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match c { - '\\' => scan_escape(chars, mode), + '\\' => scan_escape(chars, is_byte), '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }?; if chars.next().is_some() { return Err(EscapeError::MoreThanOneChar); @@ -270,11 +270,10 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result(src: &str, mode: Mode, callback: &mut F) +fn unescape_str_or_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let mut chars = src.chars(); // The `start` and `end` computation here is complicated because @@ -293,14 +292,14 @@ where skip_ascii_whitespace(&mut chars, start, callback); continue; } - _ => scan_escape(&mut chars, mode), + _ => scan_escape(&mut chars, is_byte), } } '\n' => Ok('\n'), '\t' => Ok('\t'), '"' => Err(EscapeError::EscapeOnlyChar), '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); @@ -337,11 +336,10 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_raw_byte_str(src: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), { - debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); let mut chars = src.chars(); // The `start` and `end` computation here matches the one in @@ -351,7 +349,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), + c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), c => Ok(c), }; let end = src.len() - chars.as_str().len(); diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index f075de7142676..055ee98a00aa3 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -108,7 +108,7 @@ pub(crate) fn emit_unescape_error( } if !has_help { - let (prefix, msg) = if mode.is_bytes() { + let (prefix, msg) = if mode.is_byte() { ("b", "if you meant to write a byte string literal, use double quotes") } else { ("", "if you meant to write a `str` literal, use double quotes") @@ -142,7 +142,7 @@ pub(crate) fn emit_unescape_error( EscapeError::EscapeOnlyChar => { let (c, char_span) = last_char(); - let msg = if mode.is_bytes() { + let msg = if mode.is_byte() { "byte constant must be escaped" } else { "character constant must be escaped" @@ -182,11 +182,11 @@ pub(crate) fn emit_unescape_error( let (c, span) = last_char(); let label = - if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; let ec = escaped_char(c); let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); diag.span_label(span, label); - if c == '{' || c == '}' && !mode.is_bytes() { + if c == '{' || c == '}' && !mode.is_byte() { diag.help( "if used in a formatting string, curly braces are escaped with `{{` and `}}`", ); @@ -196,7 +196,7 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else { - if !mode.is_bytes() { + if !mode.is_byte() { diag.span_suggestion( span_with_quotes, "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", @@ -231,7 +231,7 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { @@ -271,7 +271,7 @@ pub(crate) fn emit_unescape_error( err.emit(); } EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_bytes()); + assert!(mode.is_byte()); let (c, span) = last_char(); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) From 7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 15:17:37 +1100 Subject: [PATCH 08/20] Make non-ASCII errors more consistent. There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference. --- compiler/rustc_lexer/src/unescape.rs | 7 ++-- compiler/rustc_lexer/src/unescape/tests.rs | 7 ++-- .../src/lexer/unescape_error_reporting.rs | 32 ++++++++----------- src/test/ui/attributes/key-value-non-ascii.rs | 2 +- .../ui/attributes/key-value-non-ascii.stderr | 4 +-- src/test/ui/parser/byte-literals.rs | 2 +- src/test/ui/parser/byte-literals.stderr | 4 +-- src/test/ui/parser/byte-string-literals.rs | 4 +-- .../ui/parser/byte-string-literals.stderr | 6 ++-- .../ui/parser/raw/raw-byte-string-literals.rs | 2 +- .../raw/raw-byte-string-literals.stderr | 2 +- .../ui/parser/unicode-control-codepoints.rs | 16 +++++----- .../parser/unicode-control-codepoints.stderr | 24 +++++++------- src/test/ui/suggestions/multibyte-escapes.rs | 12 +++---- .../ui/suggestions/multibyte-escapes.stderr | 12 +++---- 15 files changed, 62 insertions(+), 74 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index f0042a397c2c5..9c9cce7cbd48e 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -52,10 +52,8 @@ pub enum EscapeError { /// Unicode escape code in byte literal. UnicodeEscapeInByte, - /// Non-ascii character in byte literal. + /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, - /// Non-ascii character in byte string literal. - NonAsciiCharInByteString, /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. @@ -349,8 +347,7 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let result = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString), - c => Ok(c), + _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); callback(start..end, result); diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index fa61554afde6c..008edef5a6385 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -289,9 +289,6 @@ fn test_unescape_raw_byte_str() { } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]); - check( - "🦀a", - &[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))], - ); + check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 055ee98a00aa3..6373f5b4fd6ff 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error( .emit(); } EscapeError::NonAsciiCharInByte => { - assert!(mode.is_byte()); let (c, span) = last_char(); - let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); + let desc = match mode { + Mode::Byte => "byte literal", + Mode::ByteStr => "byte string literal", + Mode::RawByteStr => "raw byte string literal", + _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), + }; + let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { format!(" but is {:?}", c) } else { String::new() }; - err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); - if (c as u32) <= 0xFF { + err.span_label(span, &format!("must be ASCII{}", postfix)); + // Note: the \\xHH suggestions are not given for raw byte string + // literals, because they are araw and so cannot use any escapes. + if (c as u32) <= 0xFF && mode != Mode::RawByteStr { err.span_suggestion( span, &format!( @@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error( format!("\\x{:X}", c as u32), Applicability::MaybeIncorrect, ); - } else if matches!(mode, Mode::Byte) { + } else if mode == Mode::Byte { err.span_label(span, "this multibyte character does not fit into a single byte"); - } else if matches!(mode, Mode::ByteStr) { + } else if mode != Mode::RawByteStr { let mut utf8 = String::new(); utf8.push(c); err.span_suggestion( @@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error( } err.emit(); } - EscapeError::NonAsciiCharInByteString => { - assert!(mode.is_byte()); - let (c, span) = last_char(); - let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { - format!(" but is {:?}", c) - } else { - String::new() - }; - handler - .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, &format!("must be ASCII{}", postfix)) - .emit(); - } EscapeError::OutOfRangeHexEscape => { handler .struct_span_err(span, "out of range hex escape") diff --git a/src/test/ui/attributes/key-value-non-ascii.rs b/src/test/ui/attributes/key-value-non-ascii.rs index 12942eabdf7b5..e14e2fc05ad39 100644 --- a/src/test/ui/attributes/key-value-non-ascii.rs +++ b/src/test/ui/attributes/key-value-non-ascii.rs @@ -1,4 +1,4 @@ #![feature(rustc_attrs)] -#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant +#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte string literal fn main() {} diff --git a/src/test/ui/attributes/key-value-non-ascii.stderr b/src/test/ui/attributes/key-value-non-ascii.stderr index 422107867f7f9..23d482de6a868 100644 --- a/src/test/ui/attributes/key-value-non-ascii.stderr +++ b/src/test/ui/attributes/key-value-non-ascii.stderr @@ -1,8 +1,8 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/key-value-non-ascii.rs:3:19 | LL | #[rustc_dummy = b"ffi.rs"] - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the UTF-8 encoding of 'ffi', use \xHH escapes | diff --git a/src/test/ui/parser/byte-literals.rs b/src/test/ui/parser/byte-literals.rs index 05a510b24a7ab..896dc1a1a5fba 100644 --- a/src/test/ui/parser/byte-literals.rs +++ b/src/test/ui/parser/byte-literals.rs @@ -7,6 +7,6 @@ pub fn main() { b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z` b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped - b'é'; //~ ERROR non-ASCII character in byte constant + b'é'; //~ ERROR non-ASCII character in byte literal b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/src/test/ui/parser/byte-literals.stderr b/src/test/ui/parser/byte-literals.stderr index c3d0006163005..efa55ae05bd37 100644 --- a/src/test/ui/parser/byte-literals.stderr +++ b/src/test/ui/parser/byte-literals.stderr @@ -32,11 +32,11 @@ error: byte constant must be escaped: `'` LL | b'''; | ^ help: escape the character: `\'` -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/byte-literals.rs:10:7 | LL | b'é'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | diff --git a/src/test/ui/parser/byte-string-literals.rs b/src/test/ui/parser/byte-string-literals.rs index b1f11024a7bb6..30a4f50c4e40b 100644 --- a/src/test/ui/parser/byte-string-literals.rs +++ b/src/test/ui/parser/byte-string-literals.rs @@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape pub fn main() { b"\f"; //~ ERROR unknown byte escape b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` - b"é"; //~ ERROR non-ASCII character in byte constant - br##"é"##; //~ ERROR raw byte string must be ASCII + b"é"; //~ ERROR non-ASCII character in byte string literal + br##"é"##; //~ ERROR non-ASCII character in raw byte string literal b"a //~ ERROR unterminated double quote byte string } diff --git a/src/test/ui/parser/byte-string-literals.stderr b/src/test/ui/parser/byte-string-literals.stderr index 3b8b3692e053f..5b96cc3d18abc 100644 --- a/src/test/ui/parser/byte-string-literals.stderr +++ b/src/test/ui/parser/byte-string-literals.stderr @@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z` LL | b"\x0Z"; | ^ invalid character in numeric character escape -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/byte-string-literals.rs:6:7 | LL | b"é"; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'é', use a \xHH escape | LL | b"\xE9"; | ~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/byte-string-literals.rs:7:10 | LL | br##"é"##; diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.rs b/src/test/ui/parser/raw/raw-byte-string-literals.rs index 163c8ac66b022..1b859fee596ad 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.rs +++ b/src/test/ui/parser/raw/raw-byte-string-literals.rs @@ -2,6 +2,6 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string - br"é"; //~ ERROR raw byte string must be ASCII + br"é"; //~ ERROR non-ASCII character in raw byte string literal br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.stderr b/src/test/ui/parser/raw/raw-byte-string-literals.stderr index cfc877104bd9f..a2f27d1ed70ae 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-literals.stderr @@ -4,7 +4,7 @@ error: bare CR not allowed in raw string LL | br"a "; | ^ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/raw-byte-string-literals.rs:5:8 | LL | br"é"; diff --git a/src/test/ui/parser/unicode-control-codepoints.rs b/src/test/ui/parser/unicode-control-codepoints.rs index 5af0b585a1275..df099bb62ad1e 100644 --- a/src/test/ui/parser/unicode-control-codepoints.rs +++ b/src/test/ui/parser/unicode-control-codepoints.rs @@ -14,15 +14,15 @@ fn main() { println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); //~^ ERROR unicode codepoint changing visible direction of text present in literal println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "); - //~^ ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant - //~| ERROR non-ASCII character in byte constant + //~^ ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal + //~| ERROR non-ASCII character in byte string literal println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); - //~^ ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII - //~| ERROR raw byte string must be ASCII + //~^ ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal + //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal } diff --git a/src/test/ui/parser/unicode-control-codepoints.stderr b/src/test/ui/parser/unicode-control-codepoints.stderr index 44548c72ff5d0..fc071a9419142 100644 --- a/src/test/ui/parser/unicode-control-codepoints.stderr +++ b/src/test/ui/parser/unicode-control-codepoints.stderr @@ -14,69 +14,69 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | = help: unicode escape sequences cannot be used as a byte or in a byte string -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:26 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{202e}' + | ^ must be ASCII but is '\u{202e}' | help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes | LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:30 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:41 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2069}' + | ^ must be ASCII but is '\u{2069}' | help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); | ~~~~~~~~~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/unicode-control-codepoints.rs:16:43 | LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ byte constant must be ASCII but is '\u{2066}' + | ^ must be ASCII but is '\u{2066}' | help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes | LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); | ~~~~~~~~~~~~ -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:29 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{202e}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:33 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2066}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:44 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); | ^ must be ASCII but is '\u{2069}' -error: raw byte string must be ASCII +error: non-ASCII character in raw byte string literal --> $DIR/unicode-control-codepoints.rs:21:46 | LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); diff --git a/src/test/ui/suggestions/multibyte-escapes.rs b/src/test/ui/suggestions/multibyte-escapes.rs index fd5d46a4e923e..c4105186244db 100644 --- a/src/test/ui/suggestions/multibyte-escapes.rs +++ b/src/test/ui/suggestions/multibyte-escapes.rs @@ -2,17 +2,17 @@ fn main() { b'µ'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b'字'; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte literal //~| NOTE: this multibyte character does not fit into a single byte - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII b"字"; - //~^ ERROR: non-ASCII character in byte constant + //~^ ERROR: non-ASCII character in byte string literal //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes - //~| NOTE: byte constant must be ASCII + //~| NOTE: must be ASCII } diff --git a/src/test/ui/suggestions/multibyte-escapes.stderr b/src/test/ui/suggestions/multibyte-escapes.stderr index 6e26bc1f01cef..1e7c43e6538f6 100644 --- a/src/test/ui/suggestions/multibyte-escapes.stderr +++ b/src/test/ui/suggestions/multibyte-escapes.stderr @@ -1,28 +1,28 @@ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:4:7 | LL | b'µ'; - | ^ byte constant must be ASCII + | ^ must be ASCII | help: if you meant to use the unicode code point for 'µ', use a \xHH escape | LL | b'\xB5'; | ~~~~ -error: non-ASCII character in byte constant +error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:9:7 | LL | b'字'; | ^^ | | - | byte constant must be ASCII + | must be ASCII | this multibyte character does not fit into a single byte -error: non-ASCII character in byte constant +error: non-ASCII character in byte string literal --> $DIR/multibyte-escapes.rs:14:7 | LL | b"字"; - | ^^ byte constant must be ASCII + | ^^ must be ASCII | help: if you meant to use the UTF-8 encoding of '字', use \xHH escapes | From a21c0458979d786d821c2d75a1b109fe38914da0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 3 Nov 2022 16:26:27 +1100 Subject: [PATCH 09/20] Improve comments. Remove a low-value comment, remove a duplicate comment, and correct a third comment. --- compiler/rustc_lexer/src/unescape.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 9c9cce7cbd48e..db7bf02e71adc 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -84,12 +84,9 @@ where Mode::Char | Mode::Byte => { let mut chars = src.chars(); let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - // The Chars iterator moved forward. callback(0..(src.len() - chars.as_str().len()), result); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), - // NOTE: Raw strings do not perform any explicit character escaping, here we - // only translate CRLF to LF and produce errors on bare CR. Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } @@ -333,7 +330,7 @@ where /// Takes a contents of a string literal (without quotes) and produces a /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. +/// only produce errors on bare CR. fn unescape_raw_str_or_raw_byte_str(src: &str, is_byte: bool, callback: &mut F) where F: FnMut(Range, Result), From d963686f5a87b9eaa2ac2bdc29ddb796e0e83f1f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 09:19:34 +1100 Subject: [PATCH 10/20] Refactor `cook_lexer_literal`. It deals with eight cases: ints, floats, and the six quoted types (char/byte/strings). For ints and floats we have an early return, and the other six types fall through to the code at the end, which makes the function hard to read. This commit rearranges things to avoid the early returns. --- compiler/rustc_parse/src/lexer/mod.rs | 78 +++++++++++++-------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 462bce16ad717..61b5be4240414 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -363,55 +363,55 @@ impl<'a> StringReader<'a> { fn cook_lexer_literal( &self, start: BytePos, - suffix_start: BytePos, + end: BytePos, kind: rustc_lexer::LiteralKind, ) -> (token::LitKind, Symbol) { - // prefix means `"` or `br"` or `r###"`, ... - let (lit_kind, mode, prefix_len, postfix_len) = match kind { + match kind { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated character literal", error_code!(E0762), ) } - (token::Char, Mode::Char, 1, 1) // ' ' + self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated byte constant", error_code!(E0763), ) } - (token::Byte, Mode::Byte, 2, 1) // b' ' + self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "unterminated double quote string", error_code!(E0765), ) } - (token::Str, Mode::Str, 1, 1) // " " + self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { self.sess.span_diagnostic.span_fatal_with_code( - self.mk_sp(start + BytePos(1), suffix_start), + self.mk_sp(start + BytePos(1), end), "unterminated double quote byte string", error_code!(E0766), ) } - (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + let kind = token::StrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -419,56 +419,47 @@ impl<'a> StringReader<'a> { rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); - (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + let kind = token::ByteStrRaw(n_hashes); + self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } } rustc_lexer::LiteralKind::Int { base, empty_int } => { - return if empty_int { + if empty_int { self.sess .span_diagnostic .struct_span_err_with_code( - self.mk_sp(start, suffix_start), + self.mk_sp(start, end), "no valid digits found for number", error_code!(E0768), ) .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, suffix_start); - (token::Integer, self.symbol_from_to(start, suffix_start)) - }; + self.validate_int_literal(base, start, end); + (token::Integer, self.symbol_from_to(start, end)) + } } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { if empty_exponent { self.err_span_(start, self.pos, "expected at least one digit in exponent"); } - match base { - Base::Hexadecimal => self.err_span_( - start, - suffix_start, - "hexadecimal float literal is not supported", - ), + Base::Hexadecimal => { + self.err_span_(start, end, "hexadecimal float literal is not supported") + } Base::Octal => { - self.err_span_(start, suffix_start, "octal float literal is not supported") + self.err_span_(start, end, "octal float literal is not supported") } Base::Binary => { - self.err_span_(start, suffix_start, "binary float literal is not supported") + self.err_span_(start, end, "binary float literal is not supported") } - _ => (), + _ => {} } - - let id = self.symbol_from_to(start, suffix_start); - return (token::Float, id); + (token::Float, self.symbol_from_to(start, end)) } - }; - let content_start = start + BytePos(prefix_len); - let content_end = suffix_start - BytePos(postfix_len); - let id = self.symbol_from_to(content_start, content_end); - self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len); - (lit_kind, id) + } } #[inline] @@ -659,20 +650,22 @@ impl<'a> StringReader<'a> { ) } - fn validate_literal_escape( + fn cook_quoted( &self, + kind: token::LitKind, mode: Mode, - content_start: BytePos, - content_end: BytePos, + start: BytePos, + end: BytePos, prefix_len: u32, postfix_len: u32, - ) { + ) -> (token::LitKind, Symbol) { + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); unescape::unescape_literal(lit_content, mode, &mut |range, result| { // Here we only check for errors. The actual unescaping is done later. if let Err(err) = result { - let span_with_quotes = self - .mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len)); + let span_with_quotes = self.mk_sp(start, end); let (start, end) = (range.start as u32, range.end as u32); let lo = content_start + BytePos(start); let hi = lo + BytePos(end - start); @@ -688,6 +681,7 @@ impl<'a> StringReader<'a> { ); } }); + (kind, Symbol::intern(lit_content)) } fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { From a203482d2a20cba0c86298334ebd74438bd477ba Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 10:02:29 +1100 Subject: [PATCH 11/20] Inline and remove `validate_int_literal`. It has a single callsite, and is fairly small. The `Float` match arm already has base-specific checking inline, so this makes things more consistent. --- compiler/rustc_lexer/src/lib.rs | 10 ++++----- compiler/rustc_parse/src/lexer/mod.rs | 31 +++++++++++---------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 51515976e4ee9..0d29d7b1e3d9b 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -203,13 +203,13 @@ pub enum RawStrError { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Base { /// Literal starts with "0b". - Binary, + Binary = 2, /// Literal starts with "0o". - Octal, - /// Literal starts with "0x". - Hexadecimal, + Octal = 8, /// Literal doesn't contain a prefix. - Decimal, + Decimal = 10, + /// Literal starts with "0x". + Hexadecimal = 16, } /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 61b5be4240414..9de0c74f4b1d2 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -437,7 +437,19 @@ impl<'a> StringReader<'a> { .emit(); (token::Integer, sym::integer(0)) } else { - self.validate_int_literal(base, start, end); + if matches!(base, Base::Binary | Base::Octal) { + let base = base as u32; + let s = self.str_from_to(start + BytePos(2), end); + for (idx, c) in s.char_indices() { + if c != '_' && c.to_digit(base).is_none() { + self.err_span_( + start + BytePos::from_usize(2 + idx), + start + BytePos::from_usize(2 + idx + c.len_utf8()), + &format!("invalid digit for a base {} literal", base), + ); + } + } + } (token::Integer, self.symbol_from_to(start, end)) } } @@ -683,23 +695,6 @@ impl<'a> StringReader<'a> { }); (kind, Symbol::intern(lit_content)) } - - fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { - let base = match base { - Base::Binary => 2, - Base::Octal => 8, - _ => return, - }; - let s = self.str_from_to(content_start + BytePos(2), content_end); - for (idx, c) in s.char_indices() { - let idx = idx as u32; - if c != '_' && c.to_digit(base).is_none() { - let lo = content_start + BytePos(2 + idx); - let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); - self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); - } - } - } } pub fn nfc_normalize(string: &str) -> Symbol { From f8e2cef5faa7ff77e91ea2d0416006f0b7aa52bf Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:01:58 -0700 Subject: [PATCH 12/20] Move intra-doc link checks to a separate function. --- src/tools/linkchecker/main.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 7842611bd4ffa..4092722501d0e 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -365,6 +365,23 @@ impl Checker { } }); + self.check_intra_doc_links(file, &pretty_path, &source, report); + + // we don't need the source anymore, + // so drop to reduce memory-usage + match self.cache.get_mut(&pretty_path).unwrap() { + FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), + _ => unreachable!("must be html file"), + } + } + + fn check_intra_doc_links( + &mut self, + file: &Path, + pretty_path: &str, + source: &str, + report: &mut Report, + ) { // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links @@ -379,12 +396,6 @@ impl Checker { } } } - // we don't need the source anymore, - // so drop to reduce memory-usage - match self.cache.get_mut(&pretty_path).unwrap() { - FileEntry::HtmlFile { source, .. } => *source = Rc::new(String::new()), - _ => unreachable!("must be html file"), - } } /// Load a file from disk, or from the cache if available. From 57b229086e274c21e5544e6719271ae79a952194 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 3 Nov 2022 22:02:39 -0700 Subject: [PATCH 13/20] Remove reference from the intra-doc link checker. --- src/tools/linkchecker/main.rs | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4092722501d0e..4170c32f1fe25 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -55,30 +55,6 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[ #[rustfmt::skip] const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[ - // This will never have links that are not in other pages. - // To avoid repeating the exceptions twice, an empty list means all broken links are allowed. - ("reference/print.html", &[]), - // All the reference 'links' are actually ENBF highlighted as code - ("reference/comments.html", &[ - "/ !", - "* !", - ]), - ("reference/identifiers.html", &[ - "a-z A-Z", - "a-z A-Z 0-9 _", - "a-z A-Z] [a-z A-Z 0-9 _", - ]), - ("reference/tokens.html", &[ - "0-1", - "0-7", - "0-9", - "0-9", - "0-9 a-f A-F", - ]), - ("reference/notation.html", &[ - "b B", - "a-z", - ]), // This is being used in the sense of 'inclusive range', not a markdown link ("core/ops/struct.RangeInclusive.html", &["begin, end"]), ("std/ops/struct.RangeInclusive.html", &["begin, end"]), @@ -382,6 +358,16 @@ impl Checker { source: &str, report: &mut Report, ) { + let relative = file.strip_prefix(&self.root).expect("should always be relative to root"); + // Don't check the reference. It has several legitimate things that + // look like []. The reference has its own broken link + // checker in its CI which handles this using pulldown_cmark. + // + // This checks both the end of the root (when checking just the + // reference directory) or the beginning (when checking all docs). + if self.root.ends_with("reference") || relative.starts_with("reference") { + return; + } // Search for intra-doc links that rustdoc didn't warn about // FIXME(#77199, 77200) Rustdoc should just warn about these directly. // NOTE: only looks at one line at a time; in practice this should find most links From ee7c58bc1615963079edfed0e43d8d3578a55bf3 Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Fri, 4 Nov 2022 06:09:35 +0000 Subject: [PATCH 14/20] Update linker-plugin-lto.md to contain up to Rust 1.65 The table rows were obtained via the script embedded in the page. --- src/doc/rustc/src/linker-plugin-lto.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/doc/rustc/src/linker-plugin-lto.md b/src/doc/rustc/src/linker-plugin-lto.md index b1854b22a7cd6..5730f95ad2b95 100644 --- a/src/doc/rustc/src/linker-plugin-lto.md +++ b/src/doc/rustc/src/linker-plugin-lto.md @@ -136,7 +136,7 @@ able to get around this problem by setting `-Clinker=lld-link` in RUSTFLAGS ```sh rustup toolchain install --profile minimal nightly MINOR_VERSION=$(rustc +nightly --version | cut -d . -f 2) -LOWER_BOUND=61 +LOWER_BOUND=66 llvm_version() { toolchain="$1" @@ -193,5 +193,10 @@ The following table shows known good combinations of toolchain versions. | Rust 1.58 | Clang 13 | | Rust 1.59 | Clang 13 | | Rust 1.60 | Clang 14 | +| Rust 1.61 | Clang 14 | +| Rust 1.62 | Clang 14 | +| Rust 1.63 | Clang 14 | +| Rust 1.64 | Clang 14 | +| Rust 1.65 | Clang 15 | Note that the compatibility policy for this feature might change in the future. From 971a146e5547b0b4963480a880e22d138f1240de Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Thu, 3 Nov 2022 12:49:49 -0400 Subject: [PATCH 15/20] Promote {aarch64,i686,x86_64}-unknown-uefi to Tier 2 MCP: https://github.com/rust-lang/compiler-team/issues/555 --- .../host-x86_64/dist-various-2/Dockerfile | 3 ++ src/doc/rustc/src/platform-support.md | 6 +-- .../src/platform-support/unknown-uefi.md | 38 ++++--------------- src/tools/build-manifest/src/main.rs | 3 ++ 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/ci/docker/host-x86_64/dist-various-2/Dockerfile b/src/ci/docker/host-x86_64/dist-various-2/Dockerfile index 126c292b38ea1..e1adabaac9b56 100644 --- a/src/ci/docker/host-x86_64/dist-various-2/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-various-2/Dockerfile @@ -118,6 +118,9 @@ ENV TARGETS=$TARGETS,armv7-unknown-linux-gnueabi ENV TARGETS=$TARGETS,armv7-unknown-linux-musleabi ENV TARGETS=$TARGETS,i686-unknown-freebsd ENV TARGETS=$TARGETS,x86_64-unknown-none +ENV TARGETS=$TARGETS,aarch64-unknown-uefi +ENV TARGETS=$TARGETS,i686-unknown-uefi +ENV TARGETS=$TARGETS,x86_64-unknown-uefi # As per https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/1300211 # we need asm in the search path for gcc-8 (for gnux32) but not in the search path of the diff --git a/src/doc/rustc/src/platform-support.md b/src/doc/rustc/src/platform-support.md index 3ae9872cf62d4..5b4e436da7d57 100644 --- a/src/doc/rustc/src/platform-support.md +++ b/src/doc/rustc/src/platform-support.md @@ -128,6 +128,7 @@ target | std | notes [`aarch64-linux-android`](platform-support/android.md) | ✓ | ARM64 Android `aarch64-unknown-none-softfloat` | * | Bare ARM64, softfloat `aarch64-unknown-none` | * | Bare ARM64, hardfloat +[`aarch64-unknown-uefi`](platform-support/unknown-uefi.md) | * | ARM64 UEFI [`arm-linux-androideabi`](platform-support/android.md) | ✓ | ARMv7 Android `arm-unknown-linux-musleabi` | ✓ | ARMv6 Linux with MUSL `arm-unknown-linux-musleabihf` | ✓ | ARMv6 Linux with MUSL, hardfloat @@ -149,6 +150,7 @@ target | std | notes [`i686-linux-android`](platform-support/android.md) | ✓ | 32-bit x86 Android `i686-unknown-freebsd` | ✓ | 32-bit FreeBSD `i686-unknown-linux-musl` | ✓ | 32-bit Linux with MUSL +[`i686-unknown-uefi`](platform-support/unknown-uefi.md) | * | 32-bit UEFI `mips-unknown-linux-musl` | ✓ | MIPS Linux with MUSL `mips64-unknown-linux-muslabi64` | ✓ | MIPS64 Linux, n64 ABI, MUSL `mips64el-unknown-linux-muslabi64` | ✓ | MIPS64 (LE) Linux, n64 ABI, MUSL @@ -181,6 +183,7 @@ target | std | notes `x86_64-unknown-linux-gnux32` | ✓ | 64-bit Linux (x32 ABI) (kernel 4.15, glibc 2.27) [`x86_64-unknown-none`](platform-support/x86_64-unknown-none.md) | * | Freestanding/bare-metal x86_64, softfloat `x86_64-unknown-redox` | ✓ | Redox OS +[`x86_64-unknown-uefi`](platform-support/unknown-uefi.md) | * | 64-bit UEFI [Fortanix ABI]: https://edp.fortanix.com/ @@ -213,7 +216,6 @@ target | std | host | notes [`aarch64-pc-windows-gnullvm`](platform-support/pc-windows-gnullvm.md) | ✓ | ✓ | `aarch64-unknown-freebsd` | ✓ | ✓ | ARM64 FreeBSD `aarch64-unknown-hermit` | ✓ | | ARM64 HermitCore -[`aarch64-unknown-uefi`](platform-support/unknown-uefi.md) | * | | ARM64 UEFI `aarch64-unknown-linux-gnu_ilp32` | ✓ | ✓ | ARM64 Linux (ILP32 ABI) `aarch64-unknown-netbsd` | ✓ | ✓ | [`aarch64-unknown-openbsd`](platform-support/openbsd.md) | ✓ | ✓ | ARM64 OpenBSD @@ -252,7 +254,6 @@ target | std | host | notes `i686-unknown-haiku` | ✓ | ✓ | 32-bit Haiku `i686-unknown-netbsd` | ✓ | ✓ | NetBSD/i386 with SSE2 [`i686-unknown-openbsd`](platform-support/openbsd.md) | ✓ | ✓ | 32-bit OpenBSD -[`i686-unknown-uefi`](platform-support/unknown-uefi.md) | * | | 32-bit UEFI `i686-uwp-windows-gnu` | ? | | `i686-uwp-windows-msvc` | ? | | `i686-wrs-vxworks` | ? | | @@ -311,7 +312,6 @@ target | std | host | notes `x86_64-unknown-l4re-uclibc` | ? | | `x86_64-unknown-none-linuxkernel` | * | | Linux kernel modules [`x86_64-unknown-openbsd`](platform-support/openbsd.md) | ✓ | ✓ | 64-bit OpenBSD -[`x86_64-unknown-uefi`](platform-support/unknown-uefi.md) | * | | 64-bit UEFI `x86_64-uwp-windows-gnu` | ✓ | | `x86_64-uwp-windows-msvc` | ✓ | | `x86_64-wrs-vxworks` | ? | | diff --git a/src/doc/rustc/src/platform-support/unknown-uefi.md b/src/doc/rustc/src/platform-support/unknown-uefi.md index 295dec0f0e488..e2bdf73a92990 100644 --- a/src/doc/rustc/src/platform-support/unknown-uefi.md +++ b/src/doc/rustc/src/platform-support/unknown-uefi.md @@ -1,6 +1,6 @@ # `*-unknown-uefi` -**Tier: 3** +**Tier: 2** Unified Extensible Firmware Interface (UEFI) targets for application, driver, and core UEFI binaries. @@ -72,28 +72,14 @@ target = ["x86_64-unknown-uefi"] ## Building Rust programs -Rust does not yet ship pre-compiled artifacts for this target. To compile for -this target, you will either need to build Rust with the target enabled (see -"Building rust for UEFI targets" above), or build your own copy of `core` by -using `build-std`, `cargo-buildx`, or similar. - -A native build with the unstable `build-std`-feature can be achieved via: - -```sh -cargo +nightly build \ - -Zbuild-std=core,compiler_builtins \ - -Zbuild-std-features=compiler-builtins-mem \ - --target x86_64-unknown-uefi -``` - -Alternatively, you can install `cargo-xbuild` via -`cargo install --force cargo-xbuild` and build for the UEFI targets via: +Starting with Rust 1.67, precompiled artifacts are provided via +`rustup`. For example, to use `x86_64-unknown-uefi`: ```sh -cargo \ - +nightly \ - xbuild \ - --target x86_64-unknown-uefi +# install cross-compile toolchain +rustup target add x86_64-unknown-uefi +# target flag may be used with any cargo or rustc command +cargo build --target x86_64-unknown-uefi ``` ## Testing @@ -167,18 +153,10 @@ The following code is a valid UEFI application returning immediately upon execution with an exit code of 0. A panic handler is provided. This is executed by rust on panic. For simplicity, we simply end up in an infinite loop. -Note that as of rust-1.31.0, all features used here are stabilized. No unstable -features are required, nor do we rely on nightly compilers. However, if you do -not compile rustc for the UEFI targets, you need a nightly compiler to support -the `-Z build-std` flag. - This example can be compiled as binary crate via `cargo`: ```sh -cargo +nightly build \ - -Zbuild-std=core,compiler_builtins \ - -Zbuild-std-features=compiler-builtins-mem \ - --target x86_64-unknown-uefi +cargo build --target x86_64-unknown-uefi ``` ```rust,ignore (platform-specific,eh-personality-is-unstable) diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index b0006cb90bdd6..8d081c9986535 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -64,6 +64,7 @@ static TARGETS: &[&str] = &[ "aarch64-unknown-none", "aarch64-unknown-none-softfloat", "aarch64-unknown-redox", + "aarch64-unknown-uefi", "arm-linux-androideabi", "arm-unknown-linux-gnueabi", "arm-unknown-linux-gnueabihf", @@ -99,6 +100,7 @@ static TARGETS: &[&str] = &[ "i686-unknown-freebsd", "i686-unknown-linux-gnu", "i686-unknown-linux-musl", + "i686-unknown-uefi", "m68k-unknown-linux-gnu", "mips-unknown-linux-gnu", "mips-unknown-linux-musl", @@ -155,6 +157,7 @@ static TARGETS: &[&str] = &[ "x86_64-unknown-none", "x86_64-unknown-redox", "x86_64-unknown-hermit", + "x86_64-unknown-uefi", ]; /// This allows the manifest to contain rust-docs for hosts that don't build From a838952239493d9dafe2d5f2ca1204f326841ae9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 11:09:23 +1100 Subject: [PATCH 16/20] Remove `unescape_byte_literal`. It's easy to just use `unescape_literal` + `byte_from_char`. --- compiler/rustc_ast/src/util/literal.rs | 29 +++++++------------ compiler/rustc_lexer/src/unescape.rs | 16 ++-------- compiler/rustc_lexer/src/unescape/tests.rs | 12 ++++---- .../crates/syntax/src/validation.rs | 6 ++-- 4 files changed, 20 insertions(+), 43 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 536b385606c69..8f342175f7d37 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,12 +2,9 @@ use crate::ast::{self, Lit, LitKind}; use crate::token::{self, Token}; - -use rustc_lexer::unescape::{unescape_byte, unescape_char}; -use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode}; +use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; - use std::ascii; pub enum LitError { @@ -109,13 +106,11 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); @@ -127,13 +122,11 @@ impl LitKind { let bytes = if s.contains('\r') { let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| { - match unescaped_byte { - Ok(c) => buf.push(c), - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); } } }); diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index db7bf02e71adc..8d5eac29452e7 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -93,19 +93,6 @@ where } } -/// Takes a contents of a byte, byte string or raw byte string (without quotes) -/// and produces a sequence of bytes or errors. -/// Values are returned through invoking of the provided callback. -pub fn unescape_byte_literal(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - debug_assert!(mode.is_byte()); - unescape_literal(src, mode, &mut |range, result| { - callback(range, result.map(byte_from_char)); - }) -} - /// Takes a contents of a char literal (without quotes), and returns an /// unescaped char or an error pub fn unescape_char(src: &str) -> Result { @@ -351,7 +338,8 @@ where } } -fn byte_from_char(c: char) -> u8 { +#[inline] +pub fn byte_from_char(c: char) -> u8 { let res = c as u32; debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr"); res as u8 diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 008edef5a6385..00c8401efdfe4 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -246,10 +246,10 @@ fn test_unescape_byte_good() { fn test_unescape_byte_str_good() { fn check(literal_text: &str, expected: &[u8]) { let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| { + unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| { if let Ok(b) = &mut buf { match c { - Ok(c) => b.push(c), + Ok(c) => b.push(byte_from_char(c)), Err(e) => buf = Err((range, e)), } } @@ -280,15 +280,13 @@ fn test_unescape_raw_str() { #[test] fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { + fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| { - unescaped.push((range, res)) - }); + unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]); + check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); } diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index b9f2b5132353c..1eea2346451dd 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,9 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{ - self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode, -}; +use rustc_lexer::unescape::{self, unescape_byte, unescape_char, unescape_literal, Mode}; use crate::{ algo, @@ -143,7 +141,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { ast::LiteralKind::ByteString(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 2, '"') { - unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { + unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| { if let Err(err) = char { push_err(2, (range.start, err)); } From 43d21b535f003c81a55331c31e16313a90050b18 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 4 Nov 2022 13:52:44 +1100 Subject: [PATCH 17/20] Rename some `result` variables as `res`, for consistency. --- compiler/rustc_lexer/src/unescape.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 8d5eac29452e7..674bbff0878ce 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -83,8 +83,8 @@ where match mode { Mode::Char | Mode::Byte => { let mut chars = src.chars(); - let result = unescape_char_or_byte(&mut chars, mode == Mode::Byte); - callback(0..(src.len() - chars.as_str().len()), result); + let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte); + callback(0..(src.len() - chars.as_str().len()), res); } Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback), Mode::RawStr | Mode::RawByteStr => { @@ -263,7 +263,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\\' => { match chars.clone().next() { Some('\n') => { @@ -284,7 +284,7 @@ where _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) @@ -329,12 +329,12 @@ where // doesn't have to worry about skipping any chars. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let result = match c { + let res = match c { '\r' => Err(EscapeError::BareCarriageReturnInRawString), _ => ascii_check(c, is_byte), }; let end = src.len() - chars.as_str().len(); - callback(start..end, result); + callback(start..end, res); } } From 6994651b6c52e010dc93efbe4f90aa3816d79f5c Mon Sep 17 00:00:00 2001 From: jeremyd2019 Date: Sun, 6 Nov 2022 11:51:12 -0800 Subject: [PATCH 18/20] fix debuginfo for windows_gnullvm_base.rs These lines (including the FIXME comment) were added to windows_gnu_base.rs in cf2c492ef8c87c049b4e3a62f43c841aafc88cba but windows_gnullvm_base.rs was not updated. This resulted in an error `LLVM ERROR: dwo only supported with ELF and Wasm` attempting to build on aarch64-pc-windows-gnullvm. Signed-off-by: Jeremy Drake --- compiler/rustc_target/src/spec/windows_gnullvm_base.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_target/src/spec/windows_gnullvm_base.rs b/compiler/rustc_target/src/spec/windows_gnullvm_base.rs index 58210c75a3da2..cada28652f98a 100644 --- a/compiler/rustc_target/src/spec/windows_gnullvm_base.rs +++ b/compiler/rustc_target/src/spec/windows_gnullvm_base.rs @@ -1,4 +1,5 @@ -use crate::spec::{cvs, Cc, LinkerFlavor, Lld, TargetOptions}; +use crate::spec::{cvs, Cc, DebuginfoKind, LinkerFlavor, Lld, SplitDebuginfo, TargetOptions}; +use std::borrow::Cow; pub fn opts() -> TargetOptions { // We cannot use `-nodefaultlibs` because compiler-rt has to be passed @@ -36,7 +37,10 @@ pub fn opts() -> TargetOptions { eh_frame_header: false, no_default_libraries: false, has_thread_local: true, - + // FIXME(davidtwco): Support Split DWARF on Windows GNU - may require LLVM changes to + // output DWO, despite using DWARF, doesn't use ELF.. + debuginfo_kind: DebuginfoKind::Pdb, + supported_split_debuginfo: Cow::Borrowed(&[SplitDebuginfo::Off]), ..Default::default() } } From ee7a80211a066b8345119ce258361a8ad22986b4 Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Mon, 7 Nov 2022 03:25:54 +0000 Subject: [PATCH 19/20] Migrate linker-plugin-lto.md compatibility table to show Rust ranges The helper shell script has been rewritten as a helper Python script that generates the range-based table. --- src/doc/rustc/src/linker-plugin-lto.md | 99 +++++++++++++------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/src/doc/rustc/src/linker-plugin-lto.md b/src/doc/rustc/src/linker-plugin-lto.md index 5730f95ad2b95..9272b9ac9b2d4 100644 --- a/src/doc/rustc/src/linker-plugin-lto.md +++ b/src/doc/rustc/src/linker-plugin-lto.md @@ -131,24 +131,47 @@ able to get around this problem by setting `-Clinker=lld-link` in RUSTFLAGS ## Toolchain Compatibility - @@ -166,37 +189,13 @@ The following table shows known good combinations of toolchain versions. | Rust Version | Clang Version | |--------------|---------------| -| Rust 1.34 | Clang 8 | -| Rust 1.35 | Clang 8 | -| Rust 1.36 | Clang 8 | -| Rust 1.37 | Clang 8 | -| Rust 1.38 | Clang 9 | -| Rust 1.39 | Clang 9 | -| Rust 1.40 | Clang 9 | -| Rust 1.41 | Clang 9 | -| Rust 1.42 | Clang 9 | -| Rust 1.43 | Clang 9 | -| Rust 1.44 | Clang 9 | -| Rust 1.45 | Clang 10 | -| Rust 1.46 | Clang 10 | -| Rust 1.47 | Clang 11 | -| Rust 1.48 | Clang 11 | -| Rust 1.49 | Clang 11 | -| Rust 1.50 | Clang 11 | -| Rust 1.51 | Clang 11 | -| Rust 1.52 | Clang 12 | -| Rust 1.53 | Clang 12 | -| Rust 1.54 | Clang 12 | -| Rust 1.55 | Clang 12 | -| Rust 1.56 | Clang 13 | -| Rust 1.57 | Clang 13 | -| Rust 1.58 | Clang 13 | -| Rust 1.59 | Clang 13 | -| Rust 1.60 | Clang 14 | -| Rust 1.61 | Clang 14 | -| Rust 1.62 | Clang 14 | -| Rust 1.63 | Clang 14 | -| Rust 1.64 | Clang 14 | -| Rust 1.65 | Clang 15 | +| 1.34 - 1.37 | 8 | +| 1.38 - 1.44 | 9 | +| 1.45 - 1.46 | 10 | +| 1.47 - 1.51 | 11 | +| 1.52 - 1.55 | 12 | +| 1.56 - 1.59 | 13 | +| 1.60 - 1.64 | 14 | +| 1.65 | 15 | Note that the compatibility policy for this feature might change in the future. From d97fa2536f6fb583e2e7a49b90a33632da33443c Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 7 Nov 2022 17:46:46 +0100 Subject: [PATCH 20/20] Fix invalid background-image file name --- src/librustdoc/html/static/css/rustdoc.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustdoc/html/static/css/rustdoc.css b/src/librustdoc/html/static/css/rustdoc.css index a38c0e42ab455..f58d2c609426d 100644 --- a/src/librustdoc/html/static/css/rustdoc.css +++ b/src/librustdoc/html/static/css/rustdoc.css @@ -854,7 +854,7 @@ so that we can apply CSS-filters to change the arrow color in themes */ background-size: 20px; background-position: calc(100% - 2px) 56%; /* image is black color, themes should apply a "filter" property to change the color */ - background-image: url("down-arrow-2d685a4bae708e15.svg"); + background-image: url("down-arrow-927217e04c7463ac.svg"); } #crate-search > option { font-size: 1rem;