Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rollup of 9 pull requests #104124

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6d45529
Fix #103451, find_width_of_character_at_span return width with 1 when…
chenyukang Oct 25, 2022
f5e390e
Fix rustc_parse_format spans following escaped utf-8 multibyte chars
Alexendoo Oct 27, 2022
8dbd817
Upgrade cc for working is_flag_supported on cross-compiles
palfrey Oct 29, 2022
a9d7cfc
Update cc in Cargo.lock
palfrey Oct 29, 2022
f32e678
Rename some variables.
nnethercote Nov 3, 2022
84ca2c3
Clarify range calculations.
nnethercote Nov 3, 2022
34b32b0
Use `Mode` less.
nnethercote Nov 3, 2022
7dbf2c0
Make non-ASCII errors more consistent.
nnethercote Nov 3, 2022
a21c045
Improve comments.
nnethercote Nov 3, 2022
d963686
Refactor `cook_lexer_literal`.
nnethercote Nov 3, 2022
a203482
Inline and remove `validate_int_literal`.
nnethercote Nov 3, 2022
f8e2cef
Move intra-doc link checks to a separate function.
ehuss Nov 4, 2022
57b2290
Remove reference from the intra-doc link checker.
ehuss Nov 4, 2022
ee7c58b
Update linker-plugin-lto.md to contain up to Rust 1.65
str4d Nov 4, 2022
971a146
Promote {aarch64,i686,x86_64}-unknown-uefi to Tier 2
nicholasbishop Nov 3, 2022
a838952
Remove `unescape_byte_literal`.
nnethercote Nov 4, 2022
43d21b5
Rename some `result` variables as `res`, for consistency.
nnethercote Nov 4, 2022
6994651
fix debuginfo for windows_gnullvm_base.rs
jeremyd2019 Nov 6, 2022
ee7a802
Migrate linker-plugin-lto.md compatibility table to show Rust ranges
str4d Nov 7, 2022
d97fa25
Fix invalid background-image file name
GuillaumeGomez Nov 7, 2022
948e7e7
Rollup merge of #103521 - chenyukang:yukang/fix-103451-avoid-hang, r=…
GuillaumeGomez Nov 7, 2022
591c3d6
Rollup merge of #103651 - Alexendoo:parse-format-unicode-escapes, r=w…
GuillaumeGomez Nov 7, 2022
070107c
Rollup merge of #103744 - palfrey:unwind-upgrade-cc, r=Mark-Simulacrum
GuillaumeGomez Nov 7, 2022
36fae54
Rollup merge of #103919 - nnethercote:unescaping-cleanups, r=matklad
GuillaumeGomez Nov 7, 2022
e451151
Rollup merge of #103933 - nicholasbishop:bishop-uefi-tier-2, r=JohnTitor
GuillaumeGomez Nov 7, 2022
73b5d94
Rollup merge of #103952 - ehuss:dont-intra-linkcheck-reference, r=Mar…
GuillaumeGomez Nov 7, 2022
29d35fe
Rollup merge of #103955 - str4d:update-lto-doc-1.65, r=ehuss
GuillaumeGomez Nov 7, 2022
73afd5f
Rollup merge of #104067 - jeremyd2019:patch-1, r=davidtwco
GuillaumeGomez Nov 7, 2022
9c5dd2b
Rollup merge of #104114 - GuillaumeGomez:background-image-path, r=not…
GuillaumeGomez Nov 7, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,9 @@ version = "0.1.0"

[[package]]
name = "cc"
version = "1.0.73"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
checksum = "581f5dba903aac52ea3feb5ec4810848460ee833876f1f9b0fdeab1f19091574"
dependencies = [
"jobserver",
]
Expand Down
29 changes: 11 additions & 18 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};

use rustc_lexer::unescape::{unescape_byte, unescape_char};
use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;

use std::ascii;

pub enum LitError {
Expand Down Expand Up @@ -109,13 +106,11 @@ impl LitKind {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand All @@ -127,13 +122,11 @@ impl LitKind {
let bytes = if s.contains('\r') {
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand Down
10 changes: 5 additions & 5 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ pub enum RawStrError {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Base {
/// Literal starts with "0b".
Binary,
Binary = 2,
/// Literal starts with "0o".
Octal,
/// Literal starts with "0x".
Hexadecimal,
Octal = 8,
/// Literal doesn't contain a prefix.
Decimal,
Decimal = 10,
/// Literal starts with "0x".
Hexadecimal = 16,
}

/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
Expand Down
130 changes: 53 additions & 77 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,8 @@ pub enum EscapeError {

/// Unicode escape code in byte literal.
UnicodeEscapeInByte,
/// Non-ascii character in byte literal.
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,
/// Non-ascii character in byte string literal.
NonAsciiCharInByteString,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
Expand All @@ -78,54 +76,37 @@ impl EscapeError {
/// Takes a contents of a literal (without quotes) and produces a
/// sequence of escaped characters or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
pub fn unescape_literal<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
match mode {
Mode::Char | Mode::Byte => {
let mut chars = literal_text.chars();
let result = unescape_char_or_byte(&mut chars, mode);
// The Chars iterator moved forward.
callback(0..(literal_text.len() - chars.as_str().len()), result);
let mut chars = src.chars();
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
callback(0..(src.len() - chars.as_str().len()), res);
}
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback),
// NOTE: Raw strings do not perform any explicit character escaping, here we
// only translate CRLF to LF and produce errors on bare CR.
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
Mode::RawStr | Mode::RawByteStr => {
unescape_raw_str_or_raw_byte_str(literal_text, mode, callback)
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
}
}
}

/// Takes a contents of a byte, byte string or raw byte string (without quotes)
/// and produces a sequence of bytes or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_byte_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<u8, EscapeError>),
{
debug_assert!(mode.is_bytes());
unescape_literal(literal_text, mode, &mut |range, result| {
callback(range, result.map(byte_from_char));
})
}

/// Takes a contents of a char literal (without quotes), and returns an
/// unescaped char or an error
pub fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
pub fn unescape_char(src: &str) -> Result<char, (usize, EscapeError)> {
let mut chars = src.chars();
unescape_char_or_byte(&mut chars, false).map_err(|err| (src.len() - chars.as_str().len(), err))
}

/// Takes a contents of a byte literal (without quotes), and returns an
/// unescaped byte or an error.
pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Byte)
pub fn unescape_byte(src: &str) -> Result<u8, (usize, EscapeError)> {
let mut chars = src.chars();
unescape_char_or_byte(&mut chars, true)
.map(byte_from_char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
.map_err(|err| (src.len() - chars.as_str().len(), err))
}

/// What kind of literal do we parse.
Expand All @@ -147,20 +128,17 @@ impl Mode {
}
}

pub fn is_bytes(self) -> bool {
pub fn is_byte(self) -> bool {
match self {
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
Mode::Char | Mode::Str | Mode::RawStr => false,
}
}
}

fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
// Previous character was '\\', unescape what follows.

let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;

let res = match second_char {
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
'"' => '"',
'n' => '\n',
'r' => '\r',
Expand All @@ -181,7 +159,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
let value = hi * 16 + lo;

// For a non-byte literal verify that it is within ASCII range.
if !mode.is_bytes() && !is_ascii(value) {
if !is_byte && !is_ascii(value) {
return Err(EscapeError::OutOfRangeHexEscape);
}
let value = value as u8;
Expand Down Expand Up @@ -217,7 +195,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {

// Incorrect syntax has higher priority for error reporting
// than unallowed value for a literal.
if mode.is_bytes() {
if is_byte {
return Err(EscapeError::UnicodeEscapeInByte);
}

Expand Down Expand Up @@ -249,23 +227,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
}

#[inline]
fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
if mode.is_bytes() && !first_char.is_ascii() {
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
if is_byte && !c.is_ascii() {
// Byte literal can't be a non-ascii character.
Err(EscapeError::NonAsciiCharInByte)
} else {
Ok(first_char)
Ok(c)
}
}

fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
debug_assert!(mode == Mode::Char || mode == Mode::Byte);
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match first_char {
'\\' => scan_escape(chars, mode),
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match c {
'\\' => scan_escape(chars, is_byte),
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
}?;
if chars.next().is_some() {
return Err(EscapeError::MoreThanOneChar);
Expand All @@ -275,20 +252,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca

/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of escaped characters or errors.
fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::Str || mode == Mode::ByteStr);
let initial_len = src.len();
let mut chars = src.chars();
while let Some(first_char) = chars.next() {
let start = initial_len - chars.as_str().len() - first_char.len_utf8();

let unescaped_char = match first_char {
// The `start` and `end` computation here is complicated because
// `skip_ascii_whitespace` makes us to skip over chars without counting
// them in the range computation.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\\' => {
let second_char = chars.clone().next();
match second_char {
match chars.clone().next() {
Some('\n') => {
// Rust language specification requires us to skip whitespaces
// if unescaped '\' character is followed by '\n'.
Expand All @@ -297,17 +274,17 @@ where
skip_ascii_whitespace(&mut chars, start, callback);
continue;
}
_ => scan_escape(&mut chars, mode),
_ => scan_escape(&mut chars, is_byte),
}
}
'\n' => Ok('\n'),
'\t' => Ok('\t'),
'"' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();
callback(start..end, unescaped_char);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}

fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
Expand Down Expand Up @@ -340,30 +317,29 @@ where
/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of characters or errors.
/// NOTE: Raw strings do not perform any explicit character escaping, here we
/// only translate CRLF to LF and produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
/// only produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr);
let initial_len = literal_text.len();

let mut chars = literal_text.chars();
while let Some(curr) = chars.next() {
let start = initial_len - chars.as_str().len() - curr.len_utf8();
let mut chars = src.chars();

let result = match curr {
// The `start` and `end` computation here matches the one in
// `unescape_str_or_byte_str` for consistency, even though this function
// doesn't have to worry about skipping any chars.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString),
c => Ok(c),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();

callback(start..end, result);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}
}

fn byte_from_char(c: char) -> u8 {
#[inline]
pub fn byte_from_char(c: char) -> u8 {
let res = c as u32;
debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
res as u8
Expand Down
17 changes: 6 additions & 11 deletions compiler/rustc_lexer/src/unescape/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,10 @@ fn test_unescape_byte_good() {
fn test_unescape_byte_str_good() {
fn check(literal_text: &str, expected: &[u8]) {
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| {
unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| {
if let Ok(b) = &mut buf {
match c {
Ok(c) => b.push(c),
Ok(c) => b.push(byte_from_char(c)),
Err(e) => buf = Err((range, e)),
}
}
Expand Down Expand Up @@ -280,18 +280,13 @@ fn test_unescape_raw_str() {

#[test]
fn test_unescape_raw_byte_str() {
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| {
unescaped.push((range, res))
});
unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
check(
"🦀a",
&[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))],
);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
}
Loading