From c34aac7b1703edc9ef4ebaa17c16b96a18ca1443 Mon Sep 17 00:00:00 2001
From: Patrick McCarter
Date: Sat, 16 Feb 2019 19:56:30 -0500
Subject: [PATCH 1/2] help suggestion when trying to delimit string literals
with directed unicode quotes #58436
---
src/libsyntax/parse/lexer/mod.rs | 22 +++++++++++++++
src/libsyntax/parse/lexer/unicode_chars.rs | 31 +++++++++++++++-------
2 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index babe0eef20f8c..a7cde5fbb92cd 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -125,6 +125,28 @@ impl<'a> StringReader<'a> {
Ok(ret_val)
}
+ /// Immutably extract string if found at current position with given delimiters
+ pub fn peek_delimited(&self, from_ch: char, to_ch: char) -> Option {
+ let mut pos = self.pos;
+ let mut idx = self.src_index(pos);
+ let mut ch = char_at(&self.src, idx);
+ if ch != from_ch {
+ return None;
+ }
+ pos = pos + Pos::from_usize(ch.len_utf8());
+ let start_pos = pos;
+ idx = self.src_index(pos);
+ while idx < self.end_src_index {
+ ch = char_at(&self.src, idx);
+ if ch == to_ch {
+ return Some(self.src[self.src_index(start_pos)..self.src_index(pos)].to_string());
+ }
+ pos = pos + Pos::from_usize(ch.len_utf8());
+ idx = self.src_index(pos);
+ }
+ return None;
+ }
+
fn try_real_token(&mut self) -> Result {
let mut t = self.try_next_token()?;
loop {
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index 7da4284c0e4aa..94ce6297fbefb 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -1,7 +1,7 @@
// Characters and their corresponding confusables were collected from
// http://www.unicode.org/Public/security/10.0.0/confusables.txt
-use syntax_pos::{Span, NO_EXPANSION};
+use syntax_pos::{Span, Pos, NO_EXPANSION};
use errors::{Applicability, DiagnosticBuilder};
use super::StringReader;
@@ -333,14 +333,27 @@ crate fn check_for_substitution<'a>(reader: &StringReader<'a>,
let span = Span::new(reader.pos, reader.next_pos, NO_EXPANSION);
match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) {
Some(&(ascii_char, ascii_name)) => {
- let msg =
- format!("Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
- ch, u_name, ascii_char, ascii_name);
- err.span_suggestion(
- span,
- &msg,
- ascii_char.to_string(),
- Applicability::MaybeIncorrect);
+ // special help suggestion for "directed" double quotes
+ if let Some(s) = reader.peek_delimited('“', '”') {
+ let msg = format!("Unicode characters '“' (Left Double Quotation Mark) and \
+ '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
+ ascii_char, ascii_name);
+ err.span_suggestion(
+ Span::new(reader.pos, reader.next_pos + Pos::from_usize(s.len()) +
+ Pos::from_usize('”'.len_utf8()), NO_EXPANSION),
+ &msg,
+ format!("\"{}\"", s),
+ Applicability::MaybeIncorrect);
+ } else {
+ let msg =
+ format!("Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
+ ch, u_name, ascii_char, ascii_name);
+ err.span_suggestion(
+ span,
+ &msg,
+ ascii_char.to_string(),
+ Applicability::MaybeIncorrect);
+ }
true
},
None => {
From 71cd4c8e4af4a05991fef5a61110510c78d90131 Mon Sep 17 00:00:00 2001
From: Patrick McCarter
Date: Sat, 16 Feb 2019 20:56:12 -0500
Subject: [PATCH 2/2] ui test for directed quote help suggestion #58436
---
src/test/ui/parser/unicode-quote-chars.rs | 7 +++++++
src/test/ui/parser/unicode-quote-chars.stderr | 12 ++++++++++++
2 files changed, 19 insertions(+)
create mode 100644 src/test/ui/parser/unicode-quote-chars.rs
create mode 100644 src/test/ui/parser/unicode-quote-chars.stderr
diff --git a/src/test/ui/parser/unicode-quote-chars.rs b/src/test/ui/parser/unicode-quote-chars.rs
new file mode 100644
index 0000000000000..69644211b8a11
--- /dev/null
+++ b/src/test/ui/parser/unicode-quote-chars.rs
@@ -0,0 +1,7 @@
+// ignore-tidy-linelength
+
+fn main() {
+ println!(“hello world”);
+ //~^ ERROR unknown start of token: \u{201c}
+ //~^^ HELP Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '"' (Quotation Mark), but are not
+}
diff --git a/src/test/ui/parser/unicode-quote-chars.stderr b/src/test/ui/parser/unicode-quote-chars.stderr
new file mode 100644
index 0000000000000..315e20cf854cd
--- /dev/null
+++ b/src/test/ui/parser/unicode-quote-chars.stderr
@@ -0,0 +1,12 @@
+error: unknown start of token: /u{201c}
+ --> $DIR/unicode-quote-chars.rs:4:14
+ |
+LL | println!(“hello world”);
+ | ^
+help: Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '"' (Quotation Mark), but are not
+ |
+LL | println!("hello world");
+ | ^^^^^^^^^^^^^
+
+error: aborting due to previous error
+