Skip to content

Commit

Permalink
fix(fetch): Correctly decode multipart/form-data names and filenames (
Browse files Browse the repository at this point in the history
#19145)

Currently the `multipart/form-data` parser in
`Request.prototype.formData` and `Response.prototype.formData` decodes
non-ASCII filenames incorrectly, as if they were encoded in Latin-1
rather than UTF-8. This happens because the header section of each
`multipart/form-data` entry is decoded as Latin-1 in order to be parsed
with `Headers`, which only allows `ByteString`s, but the names and
filenames are never decoded correctly. This PR fixes this as a
post-processing step.

Note that the `multipart/form-data` parsing for this APIs in the Fetch
spec is very much underspecified, and it does not specify that names and
filenames must be decoded as UTF-8. However, it does require that the
bodies of non-`File` entries are decoded as UTF-8, and in browsers,
names and filenames always use the same encoding as the body.

Closes #19142.
  • Loading branch information
andreubotella authored and levex committed May 18, 2023
1 parent d7361ed commit 5c117a9
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 5 deletions.
53 changes: 53 additions & 0 deletions cli/tests/unit/body_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,59 @@ Deno.test(
},
);

// FormData: non-ASCII names and filenames
Deno.test(
{ permissions: { net: true } },
async function bodyMultipartFormDataNonAsciiNames() {
const boundary = "----01230123";
const payload = [
`--${boundary}`,
`Content-Disposition: form-data; name="文字"`,
"",
"文字",
`--${boundary}`,
`Content-Disposition: form-data; name="file"; filename="文字"`,
"Content-Type: application/octet-stream",
"",
"",
`--${boundary}--`,
].join("\r\n");

const body = buildBody(
new TextEncoder().encode(payload),
new Headers({
"Content-Type": `multipart/form-data; boundary=${boundary}`,
}),
);

const formData = await body.formData();
assert(formData.has("文字"));
assertEquals(formData.get("文字"), "文字");
assert(formData.has("file"));
assert(formData.get("file") instanceof File);
assertEquals((formData.get("file") as File).name, "文字");
},
);

// FormData: non-ASCII names and filenames roundtrip
Deno.test(
{ permissions: { net: true } },
async function bodyMultipartFormDataNonAsciiRoundtrip() {
const inFormData = new FormData();
inFormData.append("文字", "文字");
inFormData.append("file", new File([], "文字"));

const body = buildBody(inFormData);

const formData = await body.formData();
assert(formData.has("文字"));
assertEquals(formData.get("文字"), "文字");
assert(formData.has("file"));
assert(formData.get("file") instanceof File);
assertEquals((formData.get("file") as File).name, "文字");
},
);

Deno.test(
{ permissions: { net: true } },
async function bodyURLEncodedFormData() {
Expand Down
33 changes: 28 additions & 5 deletions ext/fetch/21_formdata.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const {
StringPrototypeReplaceAll,
TypeError,
TypedArrayPrototypeSubarray,
Uint8Array,
} = primordials;

const entryList = Symbol("entry list");
Expand Down Expand Up @@ -358,6 +359,20 @@ function parseContentDisposition(value) {
return params;
}

/**
* Decodes a string containing UTF-8 mistakenly decoded as Latin-1 and
* decodes it correctly.
* @param {string} latin1String
* @returns {string}
*/
function decodeLatin1StringAsUtf8(latin1String) {
const buffer = new Uint8Array(latin1String.length);
for (let i = 0; i < latin1String.length; i++) {
buffer[i] = latin1String.charCodeAt(i);
}
return core.decode(buffer);
}

const CRLF = "\r\n";
const LF = StringPrototypeCodePointAt(CRLF, 1);
const CR = StringPrototypeCodePointAt(CRLF, 0);
Expand Down Expand Up @@ -465,23 +480,31 @@ class MultipartParser {
i - boundaryIndex - 1,
);
// https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata
const filename = MapPrototypeGet(disposition, "filename");
const name = MapPrototypeGet(disposition, "name");
// These are UTF-8 decoded as if it was Latin-1.
// TODO(@andreubotella): Maybe we shouldn't be parsing entry headers
// as Latin-1.
const latin1Filename = MapPrototypeGet(disposition, "filename");
const latin1Name = MapPrototypeGet(disposition, "name");

state = 5;
// Reset
boundaryIndex = 0;
headerText = "";

if (!name) {
if (!latin1Name) {
continue; // Skip, unknown name
}

if (filename) {
const name = decodeLatin1StringAsUtf8(latin1Name);
if (latin1Filename) {
const blob = new Blob([content], {
type: headers.get("Content-Type") || "application/octet-stream",
});
formData.append(name, blob, filename);
formData.append(
name,
blob,
decodeLatin1StringAsUtf8(latin1Filename),
);
} else {
formData.append(name, core.decode(content));
}
Expand Down

0 comments on commit 5c117a9

Please sign in to comment.