Skip to content

Commit

Permalink
Fix handling of multiple attributes with the same name
Browse files Browse the repository at this point in the history
Closes #8.
  • Loading branch information
openandclose authored and domenic committed Feb 23, 2020
1 parent 9fd7d44 commit cb04b18
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lib/html-encoding-sniffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,16 @@ function prescanMetaCharset(buffer) {
(isSpaceCharacter(c5) || c5 === 0x2F)) {
// "meta" + space or /
i += 6;
const attributeList = new Set();
let gotPragma = false;
let needPragma = null;
let charset = null;

let attrRes;
do {
attrRes = getAttribute(buffer, i, l);
if (attrRes.attr) {
if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {
attributeList.add(attrRes.attr.name);
if (attrRes.attr.name === "http-equiv") {
gotPragma = attrRes.attr.value === "content-type";
} else if (attrRes.attr.name === "content" && !charset) {
Expand Down
7 changes: 7 additions & 0 deletions test/no-bom-charset-http-equiv-refresh.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv=refresh http-equiv=Content-Type content="text/html;charset=iso-8859-2">
</head>
<body></body>
</html>
28 changes: 28 additions & 0 deletions test/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,34 @@ describe("A file with no BOM and a <meta http-equiv> with 'charsetcharset'", ()
});
});

describe("A file with no BOM and a <meta http-equiv=refresh> with another http-equiv", () => {
const buffer = read("no-bom-charset-http-equiv-refresh.html");

it("should sniff as windows-1252, given no options", () => {
const sniffedEncoding = htmlEncodingSniffer(buffer);

assert.strictEqual(sniffedEncoding, "windows-1252");
});

it("should sniff as the transport layer encoding, given that", () => {
const sniffedEncoding = htmlEncodingSniffer(buffer, {
transportLayerEncodingLabel: "windows-1251",
defaultEncoding: "ISO-8859-16"
});

assert.strictEqual(sniffedEncoding, "windows-1251");
});


it("should sniff as the default encoding, given that", () => {
const sniffedEncoding = htmlEncodingSniffer(buffer, {
defaultEncoding: "ISO-8859-16"
});

assert.strictEqual(sniffedEncoding, "ISO-8859-16");
});
});

for (const utf16Encoding of ["utf-16be", "utf-16", "utf-16le"]) {
describe(`A file with a BOM and a <meta charset> of ${utf16Encoding}`, () => {
const buffer = read(`no-bom-charset-${utf16Encoding}.html`);
Expand Down

0 comments on commit cb04b18

Please sign in to comment.