From 2fa7c48133001d86da39feda04d870ff67e88400 Mon Sep 17 00:00:00 2001 From: kwantam Date: Wed, 18 Jun 2014 01:05:49 -0400 Subject: [PATCH] ParagraphStream iterator complete --- fmt/fmt.rs | 15 +++-- fmt/fmtoptions.rs | 4 +- fmt/parasplit.rs | 141 +++++++++++++++++++++++++++++++++++++++------- fmt/test.rs | 6 ++ 4 files changed, 140 insertions(+), 26 deletions(-) diff --git a/fmt/fmt.rs b/fmt/fmt.rs index 2a5a884c278..9aabb7f0095 100644 --- a/fmt/fmt.rs +++ b/fmt/fmt.rs @@ -81,8 +81,11 @@ fn uumain(args: Vec) -> int { , mail : false , uniform : false , split_only : false + , use_prefix : false , prefix : String::new() , xprefix : false + , prefix_len : 0 + , use_anti_prefix : false , anti_prefix : String::new() , xanti_prefix: false , width : 78 @@ -90,8 +93,8 @@ fn uumain(args: Vec) -> int { , tabwidth : 8 }; - if matches.opt_present("c") { fmt_opts.crown = true; } - if matches.opt_present("t") { fmt_opts.tagged = true; fmt_opts.crown = false; } + if matches.opt_present("t") { fmt_opts.tagged = true; } + if matches.opt_present("c") { fmt_opts.crown = true; fmt_opts.tagged = false; } if matches.opt_present("m") { fmt_opts.mail = true; } if matches.opt_present("u") { fmt_opts.uniform = true; } if matches.opt_present("s") { fmt_opts.split_only = true; fmt_opts.crown = false; fmt_opts.tagged = false; } @@ -99,12 +102,12 @@ fn uumain(args: Vec) -> int { if matches.opt_present("X") { fmt_opts.xanti_prefix = true; } match matches.opt_str("p") { - Some(s) => { fmt_opts.prefix = s; }, + Some(s) => { fmt_opts.prefix = s; fmt_opts.use_prefix = true; fmt_opts.prefix_len = fmt_opts.prefix.as_slice().char_len() }, None => () }; match matches.opt_str("P") { - Some(s) => { fmt_opts.anti_prefix = s; }, + Some(s) => { fmt_opts.anti_prefix = s; fmt_opts.use_anti_prefix = true; }, None => () }; @@ -156,8 +159,8 @@ fn uumain(args: Vec) -> int { } Ok(f) => f }; - let mut fPara = parasplit::ParagraphStream::new(&fmt_opts, &mut fp); - fPara.count(); + let fPara = parasplit::ParagraphStream::new(&fmt_opts, &mut fp); + fPara.map(|x| println!("{}",x)).count(); } 0 diff --git a/fmt/fmtoptions.rs b/fmt/fmtoptions.rs index 7acc8b5faab..d2c29c2cfd1 100644 --- a/fmt/fmtoptions.rs +++ b/fmt/fmtoptions.rs @@ -13,8 +13,11 @@ pub struct FmtOptions { pub tagged : bool, pub mail : bool, pub split_only : bool, + pub use_prefix : bool, pub prefix : String, pub xprefix : bool, + pub prefix_len : uint, + pub use_anti_prefix : bool, pub anti_prefix : String, pub xanti_prefix: bool, pub uniform : bool, @@ -22,4 +25,3 @@ pub struct FmtOptions { pub goal : uint, pub tabwidth : uint, } - diff --git a/fmt/parasplit.rs b/fmt/parasplit.rs index 31262d2bf7a..c2417fa6d36 100644 --- a/fmt/parasplit.rs +++ b/fmt/parasplit.rs @@ -29,21 +29,28 @@ pub fn open_file(filename : &str) -> IoResult { } } -pub struct FileLines<'a> { +struct FileLines<'a> { opts : &'a FmtOptions, lines : Lines<'a, FileOrStdReader>, - use_prefix : bool, - use_anti_prefix : bool, } #[deriving(Show)] -pub enum Line { +enum Line { FormatLine(FileLine), NoFormatLine(String, bool) } +impl Line { + fn get_fileline(self) -> FileLine { + match self { + FormatLine(fl) => fl, + NoFormatLine(..) => fail!("Found NoFormatLine when expecting FormatLine") + } + } +} + #[deriving(Show)] -pub struct FileLine { +struct FileLine { line : String, indent_end : uint, prefix_end : uint, @@ -53,20 +60,20 @@ pub struct FileLine { } impl<'a> FileLines<'a> { - pub fn new<'a>(opts : &'a FmtOptions, lines : Lines<'a, FileOrStdReader>) -> FileLines<'a> { - FileLines { opts : opts, lines : lines, use_prefix : opts.prefix.len() > 0, use_anti_prefix : opts.anti_prefix.len() > 0 } + fn new<'a>(opts : &'a FmtOptions, lines : Lines<'a, FileOrStdReader>) -> FileLines<'a> { + FileLines { opts : opts, lines : lines } } // returns true if this line should be formatted fn match_prefix(&self, line : &str) -> (bool, uint) { - if ! self.use_prefix { return (true, 0u); } + if ! self.opts.use_prefix { return (true, 0u); } FileLines::match_prefix_generic(self.opts.prefix.as_slice(), line, self.opts.xprefix) } // returns true if this line should be formatted fn match_anti_prefix(&self, line : &str) -> bool { - if ! self.use_anti_prefix { return true; } + if ! self.opts.use_anti_prefix { return true; } match FileLines::match_prefix_generic(self.opts.anti_prefix.as_slice(), line, self.opts.xanti_prefix) { (true, _) => false, @@ -96,7 +103,7 @@ impl<'a> FileLines<'a> { impl<'a> Iterator for FileLines<'a> { fn next(&mut self) -> Option { - let n = + let mut n = match self.lines.next() { Some(t) => match t { Ok(tt) => tt, @@ -126,8 +133,25 @@ impl<'a> Iterator for FileLines<'a> { return Some(NoFormatLine(n, false)); } + let nLen = n.len(); + // replace trailing newline, if any, with space + { + let CharRange {ch, next : i} = n.as_slice().char_range_at_reverse(nLen); + if ch == '\n' { + unsafe { + let nmut = n.as_mut_bytes(); + nmut[i] = 0x20; + } + let CharRange {ch, next : _} = n.as_slice().char_range_at_reverse(i); + if ch == '.' { + n.push_char(' '); + } + } + } + + // figure out the indent, prefix, and prefixindent ending points let (indEnd, pfxEnd, pfxIndEnd) = - if self.use_prefix { + if self.opts.use_prefix { let pfxEnd = poffset + self.opts.prefix.len(); let nSlice = n.as_slice().slice_from(pfxEnd); let nSlice2 = nSlice.trim_left(); @@ -136,9 +160,10 @@ impl<'a> Iterator for FileLines<'a> { } else { let nSlice = n.as_slice().trim_left(); - (n.len() - nSlice.len(), 0, 0) + (nLen - nSlice.len(), 0, 0) }; + // indent length let indLen = if indEnd > 0 { let nSlice = n.as_slice().slice(pfxEnd, indEnd); @@ -147,6 +172,7 @@ impl<'a> Iterator for FileLines<'a> { 0 }; + // prefix indent length let pfxIndLen = if pfxIndEnd > 0 { let nSlice = n.as_slice().slice_to(pfxIndEnd); @@ -161,9 +187,9 @@ impl<'a> Iterator for FileLines<'a> { } } -#[allow(dead_code)] +#[deriving(Show)] pub struct Paragraph { - pub words : Vec, + pub lines : Vec, pub init_str : String, pub init_len : uint, pub indent_str : String, @@ -172,14 +198,12 @@ pub struct Paragraph { pub pfxind_len : uint } -#[allow(dead_code)] pub struct ParagraphStream<'a> { lines : Peekable>, next_mail : bool, // next line should be considered for a mail header opts : &'a FmtOptions } -#[allow(dead_code)] impl<'a> ParagraphStream<'a> { pub fn new<'a>(opts : &'a FmtOptions, reader : &'a mut FileOrStdReader) -> ParagraphStream<'a> { let lines = FileLines::new(opts, reader.lines()).peekable(); @@ -204,6 +228,9 @@ impl<'a> ParagraphStream<'a> { None => return false }; + // header field must be nonzero length + if colonPosn == 0 { return false; } + return lSlice.slice_to(colonPosn).chars() .all(|x| match x as uint { y if y < 33 || y > 126 => false, @@ -243,11 +270,18 @@ impl<'a> Iterator for ParagraphStream<'a> { } // now build a paragraph - let mut words : Vec = vec!(); + let mut init_str = String::new(); + let mut init_len = 0; + let mut indent_str = String::new(); + let mut indent_len = 0; + let mut pfxind_str = String::new(); + let mut pfxind_len = 0; + + let mut pLines : Vec = Vec::new(); let mut in_mail = false; let mut second_done = false; // for when we use crown or tagged mode loop { - // scope for fl - need to unborrow before we can take next() + // need to explicitly force fl out of scope before we can call self.lines.next() { // peek ahead let fl = match self.lines.peek() { @@ -258,9 +292,78 @@ impl<'a> Iterator for ParagraphStream<'a> { &NoFormatLine(..) => break } }; + + if pLines.len() == 0 { + // first time through the loop, get things set up + // detect mail header + if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { + in_mail = true; + // there can't be any indent or pfxind because otherwise is_mail_header would fail + // since there cannot be any whitespace before the colon in a valid header field + indent_str.push_str(" "); + indent_len = 2; + } else { + if self.opts.crown || self.opts.tagged { + init_str.push_str(fl.line.as_slice().slice_to(fl.indent_end)); + init_len = fl.indent_len + fl.pfxind_len + self.opts.prefix_len; + } + + indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end)); + indent_len = fl.indent_len; + + if self.opts.tagged { + indent_str.push_str(" "); + indent_len += 4; + } + + if self.opts.use_prefix { + pfxind_str.push_str(fl.line.as_slice().slice_to(fl.pfxind_end)); + pfxind_len = fl.pfxind_len; + } + } + } else if in_mail { + // lines following mail headers must begin with spaces + if (self.opts.use_prefix && fl.pfxind_end == 0) || (! self.opts.use_prefix && fl.indent_end == 0) { + break; // this line does not begin with spaces + } + second_done = true; + } else if ! second_done { + // now we have enough info to handle crown margin and tagged mode + if self.opts.crown { + // even in crown mode we require that pfxind is the same + if pfxind_len != fl.pfxind_len { + break; + } else { + // matching pfxind, so get indent_len from this line + indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end)); + indent_len = fl.indent_len; + } + } else if self.opts.tagged { + // in tagged mode, pfxind has to be the same and indent has to be *different* + if (pfxind_len != fl.pfxind_len) || (indent_len - 4 == fl.indent_len) { + break; + } else { + indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end)); + indent_len = fl.indent_len; + } + } else { + // detect mismatch + if (indent_len != fl.indent_len) || (pfxind_len != fl.pfxind_len) { + break; + } + } + second_done = true; + } else { + // detect mismatch + if (indent_len != fl.indent_len) || (pfxind_len != fl.pfxind_len) { + break; + } + } } + + pLines.push(self.lines.next().unwrap().get_fileline()); } - None + Some(Paragraph { lines : pLines, init_str : init_str, init_len : init_len, indent_str : indent_str, indent_len : indent_len, pfxind_str : pfxind_str, pfxind_len : pfxind_len }) } } diff --git a/fmt/test.rs b/fmt/test.rs index d2dc3d680ec..d9edc974825 100644 --- a/fmt/test.rs +++ b/fmt/test.rs @@ -1,5 +1,6 @@ use std::io::{stdin}; +use std::str::CharRange; fn main() { let mut s = stdin(); @@ -7,6 +8,7 @@ fn main() { //let mut q = s.lines().scan(0u, |s, _| { *s = *s + 1; Some(*s) }); //q.count(); + /* let z = true ^ false; let foo = s.lines().filter(|x| z && x.is_ok()).map(|x| x.unwrap()); @@ -16,4 +18,8 @@ fn main() { println!("{}",bar); println!("{} : {}","asdf".slice_to(2),"asdf".slice_from(2)); + */ + + let CharRange { ch, next } = "asdf\n".char_range_at_reverse("asdf\n".len()); + println!("{} {}", ch, next); }