Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue3961 fix whitespace detection #7114

Merged
merged 4 commits into from
Jun 16, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions src/libsyntax/parse/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
}
}

// FIXME #3961: This is not the right way to convert string byte
// offsets to characters.
fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
let mut i: uint = begin;
while i != end {
if !is_whitespace(s[i] as char) { return false; } i += 1u;
// Returns None if the first col chars of s contain a non-whitespace char.
// Otherwise returns Some(k) where k is first char offset after that leading
// whitespace. Note k may be outside bounds of s.
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
let len = s.len();
let mut col = col.to_uint();
let mut cursor: uint = 0;
while col > 0 && cursor < len {
let r: str::CharRange = s.char_range_at(cursor);
if !r.ch.is_whitespace() {
return None;
}
cursor = r.next;
col -= 1;
}
return true;
return Some(cursor);
}

fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
s: ~str, col: CharPos) {
let len = s.len();
// FIXME #3961: Doing bytewise comparison and slicing with CharPos
let col = col.to_uint();
let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
if col < len {
s.slice(col, len).to_owned()
} else { ~"" }
} else { s };
let s1 = match all_whitespace(s, col) {
Some(col) => {
if col < len {
s.slice(col, len).to_owned()
} else { ~"" }
}
None => s,
};
debug!("pushing line: %s", s1);
lines.push(s1);
}
Expand Down
116 changes: 116 additions & 0 deletions src/test/pretty/block-comment-wchar.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// This is meant as a test case for Issue 3961.
//
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
//
// pp-exact:block-comment-wchar.pp
fn f() {
fn nested() {
/*
Spaced2
*/
/*
Spaced10
*/
/*
Tabbed8+2
*/
/*
CR8+2
*/
}
/*
Spaced2: (prefixed so start of space aligns with comment)
*/
/*
Tabbed2: (more indented b/c *start* of space will align with comment)
*/
/*
Spaced6: (Alignment removed and realigning spaces inserted)
*/
/*
Tabbed4+2: (Alignment removed and realigning spaces inserted)
*/

/*
VT4+2: (should align)
*/
/*
FF4+2: (should align)
*/
/*
CR4+2: (should align)
*/
/*
// (NEL deliberately omitted)
*/
/*
Ogham Space Mark 4+2: (should align)
*/
/*
Mongolian Vowel Separator 4+2: (should align)
*/
/*
Four-per-em space 4+2: (should align)
*/

/*
Mongolian Vowel Sep count 1: (should align)
Mongolian Vowel Sep count 2: (should align)
Mongolian Vowel Sep count 3: (should align)
Mongolian Vowel Sep count 4: (should align)
Mongolian Vowel Sep count 5: (should align)
Mongolian Vowel Sep count 6: (should align)
Mongolian Vowel Sep count 7: (should align)
Mongolian Vowel Sep count 8: (should align)
Mongolian Vowel Sep count 9: (should align)
Mongolian Vowel Sep count A: (should align)
Mongolian Vowel Sep count B: (should align)
Mongolian Vowel Sep count C: (should align)
Mongolian Vowel Sep count D: (should align)
Mongolian Vowel Sep count E: (should align)
Mongolian Vowel Sep count F: (should align)
*/



/* */

/*
Hello from offset 6
Space 6+2: compare A
Mongolian Vowel Separator 6+2: compare B
*/

/*᠎*/

/*
Hello from another offset 6 with wchars establishing column offset
Space 6+2: compare C
Mongolian Vowel Separator 6+2: compare D
*/
}

fn main() {
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
let chars =
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
// '\x85', // for some reason Rust thinks NEL isn't whitespace
'\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
'\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
// <= bugs in pretty-printer?
for chars.each |c| {
let ws = c.is_whitespace();
println(fmt!("%? %?" , c , ws));
}
}
109 changes: 109 additions & 0 deletions src/test/pretty/block-comment-wchar.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// This is meant as a test case for Issue 3961.
//
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
//
// pp-exact:block-comment-wchar.pp
fn f() {
fn nested() {
/*
Spaced2
*/
/*
Spaced10
*/
/*
Tabbed8+2
*/
/*
CR8+2
*/
}
/*
Spaced2: (prefixed so start of space aligns with comment)
*/
/*
Tabbed2: (more indented b/c *start* of space will align with comment)
*/
/*
Spaced6: (Alignment removed and realigning spaces inserted)
*/
/*
Tabbed4+2: (Alignment removed and realigning spaces inserted)
*/

/*
VT4+2: (should align)
*/
/*
FF4+2: (should align)
*/
/*
CR4+2: (should align)
*/
/*
// (NEL deliberately omitted)
*/
/*
     Ogham Space Mark 4+2: (should align)
*/
/*
᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align)
*/
/*
     Four-per-em space 4+2: (should align)
*/

/*
᠎ Mongolian Vowel Sep count 1: (should align)
᠎ Mongolian Vowel Sep count 2: (should align)
᠎᠎ Mongolian Vowel Sep count 3: (should align)
᠎ Mongolian Vowel Sep count 4: (should align)
᠎ ᠎ Mongolian Vowel Sep count 5: (should align)
᠎᠎ Mongolian Vowel Sep count 6: (should align)
᠎᠎᠎ Mongolian Vowel Sep count 7: (should align)
᠎ Mongolian Vowel Sep count 8: (should align)
᠎ ᠎ Mongolian Vowel Sep count 9: (should align)
᠎ ᠎ Mongolian Vowel Sep count A: (should align)
᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align)
᠎᠎ Mongolian Vowel Sep count C: (should align)
᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align)
᠎᠎᠎ Mongolian Vowel Sep count E: (should align)
᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align)
*/


/* */ /*
Hello from offset 6
Space 6+2: compare A
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B
*/
/*᠎*/ /*
Hello from another offset 6 with wchars establishing column offset
Space 6+2: compare C
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D
*/
}

fn main() {
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
let chars =
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
// '\x85', // for some reason Rust thinks NEL isn't whitespace
'\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
'\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
for chars.each |c| {
let ws = c.is_whitespace();
println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
}
}
Expand Down