Skip to content

Commit

Permalink
fix: decode html entities correctly (#8047)
Browse files Browse the repository at this point in the history
fixes #8026

1. replace the big entities list with entities that have a ; at the end where valid (there are some exceptions)
2. construct two regexes from that entities list: one for HTML where it's strictly matched, one for attributes where it tries to match browser behavior by also allowing = / digit / character afterwards
3. decode character references with one of these regexes depending on this is an attribute value or not

---------

Co-authored-by: Yuichiro Yamashita <[email protected]>
  • Loading branch information
xxkl1 and baseballyama authored Feb 28, 2023
1 parent aa15a64 commit f34abc5
Show file tree
Hide file tree
Showing 8 changed files with 2,218 additions and 1,936 deletions.
2 changes: 1 addition & 1 deletion src/compiler/parse/state/tag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ function read_sequence(parser: Parser, done: () => boolean, location: string): T

function flush(end: number) {
if (current_chunk.raw) {
current_chunk.data = decode_character_references(current_chunk.raw);
current_chunk.data = decode_character_references(current_chunk.raw, true);
current_chunk.end = end;
chunks.push(current_chunk);
}
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/parse/state/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export default function text(parser: Parser) {
end: parser.index,
type: 'Text',
raw: data,
data: decode_character_references(data)
data: decode_character_references(data, false)
};

parser.current().children.push(node);
Expand Down
Loading

0 comments on commit f34abc5

Please sign in to comment.