Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support query encoding in http, http, ftp and fix path decode issues #73849

Merged
merged 9 commits into from
Jun 3, 2019
134 changes: 119 additions & 15 deletions src/vs/base/common/uri.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;

function _isQueryStringScheme(scheme: string) {
switch (scheme.toLowerCase()) {
case 'http':
case 'https':
case 'ftp':
return true;
}
return false;
}

/**
* Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
* This class is a simple parser which creates the basic component parts
Expand Down Expand Up @@ -282,14 +292,14 @@ export class URI implements UriComponents {
static parse(value: string, _strict: boolean = false): URI {
const match = _regexp.exec(value);
if (!match) {
return new _URI(_empty, _empty, _empty, _empty, _empty);
return new _URI(_empty, _empty, _empty, _empty, _empty, _strict);
}
return new _URI(
match[2] || _empty,
decodeURIComponent(match[4] || _empty),
decodeURIComponent(match[5] || _empty),
decodeURIComponent(match[7] || _empty),
decodeURIComponent(match[9] || _empty),
decodeURIComponentFast(match[4] || _empty, false, false),
decodeURIComponentFast(match[5] || _empty, true, false),
decodeURIComponentFast(match[7] || _empty, false, _isQueryStringScheme(match[2])),
decodeURIComponentFast(match[9] || _empty, false, false),
_strict
);
}
Expand Down Expand Up @@ -465,6 +475,84 @@ class _URI extends URI {
}
}

function isHex(value: string, pos: number): boolean {
if (pos >= value.length) {
return false;
}
const code = value.charCodeAt(pos);
return (code >= CharCode.Digit0 && code <= CharCode.Digit9)// 0-9
|| (code >= CharCode.a && code <= CharCode.f) //a-f
|| (code >= CharCode.A && code <= CharCode.F); //A-F
}


function decodeURIComponentFast(uriComponent: string, isPath: boolean, isQueryString: boolean): string {

let res: string | undefined;
let nativeDecodePos = -1;

for (let pos = 0; pos < uriComponent.length; pos++) {
const code = uriComponent.charCodeAt(pos);

// decoding needed
if (code === CharCode.PercentSign && isHex(uriComponent, pos + 1) && isHex(uriComponent, pos + 2)) {

const chA = uriComponent.charCodeAt(pos + 1);
const chB = uriComponent.charCodeAt(pos + 2);

// when in a path -> check and accept %2f and %2F (fwd slash)
// when in a query string -> check and accept %3D, %26, and %3B (equals, ampersand, semi-colon)
if (
(isPath && chA === CharCode.Digit2 && (chB === CharCode.F || chB === CharCode.f))
||
(isQueryString && (
(chA === CharCode.Digit2 && chB === CharCode.Digit6) // %26
||
(chA === CharCode.Digit3 && (chB === CharCode.B || chB === CharCode.b || chB === CharCode.D || chB === CharCode.d)) // %3D, %3D
))
) {
if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substring(nativeDecodePos, pos));
nativeDecodePos = -1;
}

if (res !== undefined) {
res += uriComponent.substr(pos, 3);
}

pos += 2;
continue;
}

if (res === undefined) {
res = uriComponent.substring(0, pos);
}
if (nativeDecodePos === -1) {
nativeDecodePos = pos;
}

pos += 2;

} else {

if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substring(nativeDecodePos, pos));
nativeDecodePos = -1;
}

if (res !== undefined) {
res += String.fromCharCode(code);
}
}
}

if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substr(nativeDecodePos));
}

return res !== undefined ? res : uriComponent;
}

// reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
const encodeTable: { [ch: number]: string } = {
[CharCode.Colon]: '%3A', // gen-delims
Expand All @@ -490,7 +578,7 @@ const encodeTable: { [ch: number]: string } = {
[CharCode.Space]: '%20',
};

function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string {
function encodeURIComponentFast(uriComponent: string, isPath: boolean, isQueryString: boolean): string {
let res: string | undefined = undefined;
let nativeEncodePos = -1;

Expand All @@ -506,7 +594,8 @@ function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): stri
|| code === CharCode.Period
|| code === CharCode.Underline
|| code === CharCode.Tilde
|| (allowSlash && code === CharCode.Slash)
|| (isPath && code === CharCode.Slash) // path => allow slash AS-IS
|| (isQueryString && (code === CharCode.Equals || code === CharCode.Ampersand || code === CharCode.Semicolon)) // query string => allow &=;
) {
// check if we are delaying native encode
if (nativeEncodePos !== -1) {
Expand All @@ -518,6 +607,20 @@ function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): stri
res += uriComponent.charAt(pos);
}

} else if (code === CharCode.PercentSign && isHex(uriComponent, pos + 1) && isHex(uriComponent, pos + 2)) {
// at percentage encoded value

// check if we are delaying native encode
if (nativeEncodePos !== -1) {
res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
nativeEncodePos = -1;
}
// check if we write into a new string (by default we try to return the param)
if (res !== undefined) {
res += uriComponent.substr(pos, 3);
}
pos += 2;

} else {
// encoding needed, we need to allocate a new string
if (res === undefined) {
Expand Down Expand Up @@ -606,6 +709,7 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {

let res = '';
let { scheme, authority, path, query, fragment } = uri;

if (scheme) {
res += scheme;
res += ':';
Expand All @@ -622,22 +726,22 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {
authority = authority.substr(idx + 1);
idx = userinfo.indexOf(':');
if (idx === -1) {
res += encoder(userinfo, false);
res += encoder(userinfo, false, false);
} else {
// <user>:<pass>@<auth>
res += encoder(userinfo.substr(0, idx), false);
res += encoder(userinfo.substr(0, idx), false, false);
res += ':';
res += encoder(userinfo.substr(idx + 1), false);
res += encoder(userinfo.substr(idx + 1), false, false);
}
res += '@';
}
authority = authority.toLowerCase();
idx = authority.indexOf(':');
if (idx === -1) {
res += encoder(authority, false);
res += encoder(authority, false, false);
} else {
// <auth>:<port>
res += encoder(authority.substr(0, idx), false);
res += encoder(authority.substr(0, idx), false, false);
res += authority.substr(idx);
}
}
Expand All @@ -655,15 +759,15 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {
}
}
// encode the rest of the path
res += encoder(path, true);
res += encoder(path, true, false);
}
if (query) {
res += '?';
res += encoder(query, false);
res += encoder(query, false, _isQueryStringScheme(scheme));
}
if (fragment) {
res += '#';
res += !skipEncoding ? encodeURIComponentFast(fragment, false) : fragment;
res += !skipEncoding ? encodeURIComponentFast(fragment, false, false) : fragment;
}
return res;
}
71 changes: 61 additions & 10 deletions src/vs/base/test/common/uri.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ suite('URI', () => {
assert.equal(URI.from({ scheme: 'http', authority: '', path: 'my/path' }).toString(), 'http:/my/path');
assert.equal(URI.from({ scheme: 'http', authority: '', path: '/my/path' }).toString(), 'http:/my/path');
//http://a-test-site.com/#test=true
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: 'test=true' }).toString(), 'http://a-test-site.com/?test%3Dtrue');
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: 'test=true' }).toString(), 'http://a-test-site.com/?test=true');
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: '', fragment: 'test=true' }).toString(), 'http://a-test-site.com/#test%3Dtrue');
});

Expand Down Expand Up @@ -102,11 +102,11 @@ suite('URI', () => {

test('with, changes', () => {
assert.equal(URI.parse('before:some/file/path').with({ scheme: 'after' }).toString(), 'after:some/file/path');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', path: '/api/files/test.me', query: 't=1234' }).toString(), 'http:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'http:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'https', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'https:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTP', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTP:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTPS', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTPS:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', path: '/api/files/test.me', query: 't=1234' }).toString(), 'http:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'http:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'https', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'https:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTP', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTP:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTPS', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTPS:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'boo', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'boo:/api/files/test.me?t%3D1234');
});

Expand Down Expand Up @@ -262,11 +262,11 @@ suite('URI', () => {

value = URI.file('c:\\test with %25\\path');
assert.equal(value.path, '/c:/test with %25/path');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%2525/path');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%25/path');

value = URI.file('c:\\test with %25\\c#code');
assert.equal(value.path, '/c:/test with %25/c#code');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%2525/c%23code');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%25/c%23code');

value = URI.file('\\\\shares');
assert.equal(value.scheme, 'file');
Expand Down Expand Up @@ -376,7 +376,7 @@ suite('URI', () => {
let uri = URI.parse('https://go.microsoft.com/fwlink/?LinkId=518008');
assert.equal(uri.query, 'LinkId=518008');
assert.equal(uri.toString(true), 'https://go.microsoft.com/fwlink/?LinkId=518008');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId%3D518008');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId=518008');

let uri2 = URI.parse(uri.toString());
assert.equal(uri2.query, 'LinkId=518008');
Expand All @@ -385,7 +385,7 @@ suite('URI', () => {
uri = URI.parse('https://go.microsoft.com/fwlink/?LinkId=518008&foö&ké¥=üü');
assert.equal(uri.query, 'LinkId=518008&foö&ké¥=üü');
assert.equal(uri.toString(true), 'https://go.microsoft.com/fwlink/?LinkId=518008&foö&ké¥=üü');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId%3D518008%26fo%C3%B6%26k%C3%A9%C2%A5%3D%C3%BC%C3%BC');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId=518008&fo%C3%B6&k%C3%A9%C2%A5=%C3%BC%C3%BC');

uri2 = URI.parse(uri.toString());
assert.equal(uri2.query, 'LinkId=518008&foö&ké¥=üü');
Expand Down Expand Up @@ -426,6 +426,57 @@ suite('URI', () => {
assert.equal(uri.toString(true), input);
});

test('Support URL specific encodings (query component) #25852', function () {
let input = 'http://example.com/over/there?name=ferret';
assert.equal(input, URI.parse(input).toString());

input = 'http://example.com/over/there?name=ferret&foo=bar';
assert.equal(input, URI.parse(input).toString());

input = 'attp://example.com/over/there?name=ferret';
assert.equal('attp://example.com/over/there?name%3Dferret', URI.parse(input).toString());
});

test('Uri#parse can break path-component #45515', function () {
let uri: URI;
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2f' });
assert.equal(uri.toString(), 's://a/o%2f');
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2fü' });
assert.equal(uri.toString(), 's://a/o%2f%C3%BC');
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2f%' });
assert.equal(uri.toString(), 's://a/o%2f%25');

uri = URI.file('/test with %25/c#code');
assert.equal(uri.path, '/test with %25/c#code');
assert.equal(uri.toString(), 'file:///test%20with%20%25/c%23code');

uri = URI.from({
scheme: 'http',
authority: 'a',
path: '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg'
});
assert.equal(uri.path, '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');
assert.equal(uri.toString(), 'http://a/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');

assert.equal(URI.parse(uri.toString()).path, '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');
assert.equal(uri.toString(), URI.parse(uri.toString()).toString()); // identity

uri = URI.parse('s://a/p%2ft%c3%bc');
assert.equal(uri.path, '/p%2ftü');

uri = URI.parse('s://a/%c3%bcp%2f-REST');
assert.equal(uri.path, '/üp%2f-REST');

uri = URI.parse('s://a/%c3%bcp%2fd%c3%b6wn');
assert.equal(uri.path, '/üp%2fdöwn');

//https://github.com/microsoft/vscode/issues/25852
uri = URI.parse('http://www.test.com/path/service?authId=CN%3DQ10');
assert.equal(uri.query, 'authId=CN%3DQ10');
assert.equal(uri.toString(), 'http://www.test.com/path/service?authId=CN%3DQ10');
});


test('URI - (de)serialize', function () {

const values = [
Expand Down