From c34aac7b1703edc9ef4ebaa17c16b96a18ca1443 Mon Sep 17 00:00:00 2001 From: Patrick McCarter Date: Sat, 16 Feb 2019 19:56:30 -0500 Subject: [PATCH 1/2] help suggestion when trying to delimit string literals with directed unicode quotes #58436 --- src/libsyntax/parse/lexer/mod.rs | 22 +++++++++++++++ src/libsyntax/parse/lexer/unicode_chars.rs | 31 +++++++++++++++------- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index babe0eef20f8c..a7cde5fbb92cd 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -125,6 +125,28 @@ impl<'a> StringReader<'a> { Ok(ret_val) } + /// Immutably extract string if found at current position with given delimiters + pub fn peek_delimited(&self, from_ch: char, to_ch: char) -> Option { + let mut pos = self.pos; + let mut idx = self.src_index(pos); + let mut ch = char_at(&self.src, idx); + if ch != from_ch { + return None; + } + pos = pos + Pos::from_usize(ch.len_utf8()); + let start_pos = pos; + idx = self.src_index(pos); + while idx < self.end_src_index { + ch = char_at(&self.src, idx); + if ch == to_ch { + return Some(self.src[self.src_index(start_pos)..self.src_index(pos)].to_string()); + } + pos = pos + Pos::from_usize(ch.len_utf8()); + idx = self.src_index(pos); + } + return None; + } + fn try_real_token(&mut self) -> Result { let mut t = self.try_next_token()?; loop { diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 7da4284c0e4aa..94ce6297fbefb 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -1,7 +1,7 @@ // Characters and their corresponding confusables were collected from // http://www.unicode.org/Public/security/10.0.0/confusables.txt -use syntax_pos::{Span, NO_EXPANSION}; +use syntax_pos::{Span, Pos, NO_EXPANSION}; use errors::{Applicability, DiagnosticBuilder}; use super::StringReader; @@ -333,14 +333,27 @@ crate fn check_for_substitution<'a>(reader: &StringReader<'a>, let span = Span::new(reader.pos, reader.next_pos, NO_EXPANSION); match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { Some(&(ascii_char, ascii_name)) => { - let msg = - format!("Unicode character '{}' ({}) looks like '{}' ({}), but it is not", - ch, u_name, ascii_char, ascii_name); - err.span_suggestion( - span, - &msg, - ascii_char.to_string(), - Applicability::MaybeIncorrect); + // special help suggestion for "directed" double quotes + if let Some(s) = reader.peek_delimited('“', '”') { + let msg = format!("Unicode characters '“' (Left Double Quotation Mark) and \ + '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", + ascii_char, ascii_name); + err.span_suggestion( + Span::new(reader.pos, reader.next_pos + Pos::from_usize(s.len()) + + Pos::from_usize('”'.len_utf8()), NO_EXPANSION), + &msg, + format!("\"{}\"", s), + Applicability::MaybeIncorrect); + } else { + let msg = + format!("Unicode character '{}' ({}) looks like '{}' ({}), but it is not", + ch, u_name, ascii_char, ascii_name); + err.span_suggestion( + span, + &msg, + ascii_char.to_string(), + Applicability::MaybeIncorrect); + } true }, None => { From 71cd4c8e4af4a05991fef5a61110510c78d90131 Mon Sep 17 00:00:00 2001 From: Patrick McCarter Date: Sat, 16 Feb 2019 20:56:12 -0500 Subject: [PATCH 2/2] ui test for directed quote help suggestion #58436 --- src/test/ui/parser/unicode-quote-chars.rs | 7 +++++++ src/test/ui/parser/unicode-quote-chars.stderr | 12 ++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 src/test/ui/parser/unicode-quote-chars.rs create mode 100644 src/test/ui/parser/unicode-quote-chars.stderr diff --git a/src/test/ui/parser/unicode-quote-chars.rs b/src/test/ui/parser/unicode-quote-chars.rs new file mode 100644 index 0000000000000..69644211b8a11 --- /dev/null +++ b/src/test/ui/parser/unicode-quote-chars.rs @@ -0,0 +1,7 @@ +// ignore-tidy-linelength + +fn main() { + println!(“hello world”); + //~^ ERROR unknown start of token: \u{201c} + //~^^ HELP Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '"' (Quotation Mark), but are not +} diff --git a/src/test/ui/parser/unicode-quote-chars.stderr b/src/test/ui/parser/unicode-quote-chars.stderr new file mode 100644 index 0000000000000..315e20cf854cd --- /dev/null +++ b/src/test/ui/parser/unicode-quote-chars.stderr @@ -0,0 +1,12 @@ +error: unknown start of token: /u{201c} + --> $DIR/unicode-quote-chars.rs:4:14 + | +LL | println!(“hello world”); + | ^ +help: Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '"' (Quotation Mark), but are not + | +LL | println!("hello world"); + | ^^^^^^^^^^^^^ + +error: aborting due to previous error +