Skip to content

Commit

Permalink
remove bogusname???
Browse files Browse the repository at this point in the history
  • Loading branch information
untitaker committed Mar 19, 2022
1 parent 4bf9b3e commit 575b077
Showing 1 changed file with 2 additions and 50 deletions.
52 changes: 2 additions & 50 deletions html5ever/src/tokenizer/char_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ enum State {
Numeric(u32), // base
NumericSemicolon,
Named,
BogusName,
}

pub struct CharRefTokenizer {
Expand Down Expand Up @@ -128,7 +127,6 @@ impl CharRefTokenizer {
Numeric(base) => self.do_numeric(tokenizer, input, base),
NumericSemicolon => self.do_numeric_semicolon(tokenizer, input),
Named => self.do_named(tokenizer, input),
BogusName => self.do_bogus_name(tokenizer, input),
}
}

Expand Down Expand Up @@ -289,20 +287,10 @@ impl CharRefTokenizer {
},

// Can't continue the match.
None => self.finish_named(tokenizer, input, Some(c)),
None => self.finish_named(tokenizer, input),
}
}

fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &mut Tokenizer<Sink>) {
let msg = format_if!(
tokenizer.opts.exact_errors,
"Invalid character reference",
"Invalid character reference &{}",
self.name_buf()
);
tokenizer.emit_error(msg);
}

fn unconsume_name(&mut self, input: &mut BufferQueue) {
input.push_front(self.name_buf_opt.take().unwrap());
}
Expand All @@ -311,23 +299,9 @@ impl CharRefTokenizer {
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
end_char: Option<char>,
) -> Status {
match self.name_match {
None => {
match end_char {
Some(c) if c.is_ascii_alphanumeric() => {
// Keep looking for a semicolon, to determine whether
// we emit a parse error.
self.state = BogusName;
return Progress;
},

// Check length because &; is not a parse error.
Some(';') if self.name_buf().len() > 1 => self.emit_name_error(tokenizer),

_ => (),
}
self.unconsume_name(input);
self.finish_none()
},
Expand Down Expand Up @@ -358,7 +332,6 @@ impl CharRefTokenizer {
// character is either a U+003D EQUALS SIGN character (=) or an ASCII alphanumeric,
// then, for historical reasons, flush code points consumed as a character
// reference and switch to the return state.

let unconsume_all = match (self.is_consumed_in_attribute, last_matched, next_after) {
(_, ';', _) => false,
(true, _, Some('=')) => true,
Expand Down Expand Up @@ -389,22 +362,6 @@ impl CharRefTokenizer {
}
}

fn do_bogus_name<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
input: &mut BufferQueue,
) -> Status {
let c = unwrap_or_return!(tokenizer.get_char(input), Stuck);
self.name_buf_mut().push_char(c);
match c {
_ if c.is_ascii_alphanumeric() => return Progress,
';' => self.emit_name_error(tokenizer),
_ => (),
}
self.unconsume_name(input);
self.finish_none()
}

pub fn end_of_file<Sink: TokenSink>(
&mut self,
tokenizer: &mut Tokenizer<Sink>,
Expand All @@ -421,12 +378,7 @@ impl CharRefTokenizer {
self.finish_numeric(tokenizer);
},

Named => drop(self.finish_named(tokenizer, input, None)),

BogusName => {
self.unconsume_name(input);
self.finish_none();
},
Named => drop(self.finish_named(tokenizer, input)),

Octothorpe => {
input.push_front(StrTendril::from_slice("#"));
Expand Down

0 comments on commit 575b077

Please sign in to comment.