Skip to content

Commit

Permalink
Merge pull request #73849 from microsoft/joh/uri-query
Browse files Browse the repository at this point in the history
support query encoding in http, http, ftp and fix path decode issues
  • Loading branch information
jrieken authored Jun 3, 2019
2 parents 217d993 + bbc2fab commit 8fc6542
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 25 deletions.
134 changes: 119 additions & 15 deletions src/vs/base/common/uri.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ const _empty = '';
const _slash = '/';
const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;

function _isQueryStringScheme(scheme: string) {
switch (scheme.toLowerCase()) {
case 'http':
case 'https':
case 'ftp':
return true;
}
return false;
}

/**
* Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
* This class is a simple parser which creates the basic component parts
Expand Down Expand Up @@ -282,14 +292,14 @@ export class URI implements UriComponents {
static parse(value: string, _strict: boolean = false): URI {
const match = _regexp.exec(value);
if (!match) {
return new _URI(_empty, _empty, _empty, _empty, _empty);
return new _URI(_empty, _empty, _empty, _empty, _empty, _strict);
}
return new _URI(
match[2] || _empty,
decodeURIComponent(match[4] || _empty),
decodeURIComponent(match[5] || _empty),
decodeURIComponent(match[7] || _empty),
decodeURIComponent(match[9] || _empty),
decodeURIComponentFast(match[4] || _empty, false, false),
decodeURIComponentFast(match[5] || _empty, true, false),
decodeURIComponentFast(match[7] || _empty, false, _isQueryStringScheme(match[2])),
decodeURIComponentFast(match[9] || _empty, false, false),
_strict
);
}
Expand Down Expand Up @@ -465,6 +475,84 @@ class _URI extends URI {
}
}

function isHex(value: string, pos: number): boolean {
if (pos >= value.length) {
return false;
}
const code = value.charCodeAt(pos);
return (code >= CharCode.Digit0 && code <= CharCode.Digit9)// 0-9
|| (code >= CharCode.a && code <= CharCode.f) //a-f
|| (code >= CharCode.A && code <= CharCode.F); //A-F
}


function decodeURIComponentFast(uriComponent: string, isPath: boolean, isQueryString: boolean): string {

let res: string | undefined;
let nativeDecodePos = -1;

for (let pos = 0; pos < uriComponent.length; pos++) {
const code = uriComponent.charCodeAt(pos);

// decoding needed
if (code === CharCode.PercentSign && isHex(uriComponent, pos + 1) && isHex(uriComponent, pos + 2)) {

const chA = uriComponent.charCodeAt(pos + 1);
const chB = uriComponent.charCodeAt(pos + 2);

// when in a path -> check and accept %2f and %2F (fwd slash)
// when in a query string -> check and accept %3D, %26, and %3B (equals, ampersand, semi-colon)
if (
(isPath && chA === CharCode.Digit2 && (chB === CharCode.F || chB === CharCode.f))
||
(isQueryString && (
(chA === CharCode.Digit2 && chB === CharCode.Digit6) // %26
||
(chA === CharCode.Digit3 && (chB === CharCode.B || chB === CharCode.b || chB === CharCode.D || chB === CharCode.d)) // %3D, %3D
))
) {
if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substring(nativeDecodePos, pos));
nativeDecodePos = -1;
}

if (res !== undefined) {
res += uriComponent.substr(pos, 3);
}

pos += 2;
continue;
}

if (res === undefined) {
res = uriComponent.substring(0, pos);
}
if (nativeDecodePos === -1) {
nativeDecodePos = pos;
}

pos += 2;

} else {

if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substring(nativeDecodePos, pos));
nativeDecodePos = -1;
}

if (res !== undefined) {
res += String.fromCharCode(code);
}
}
}

if (nativeDecodePos !== -1) {
res += decodeURIComponent(uriComponent.substr(nativeDecodePos));
}

return res !== undefined ? res : uriComponent;
}

// reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
const encodeTable: { [ch: number]: string } = {
[CharCode.Colon]: '%3A', // gen-delims
Expand All @@ -490,7 +578,7 @@ const encodeTable: { [ch: number]: string } = {
[CharCode.Space]: '%20',
};

function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): string {
function encodeURIComponentFast(uriComponent: string, isPath: boolean, isQueryString: boolean): string {
let res: string | undefined = undefined;
let nativeEncodePos = -1;

Expand All @@ -506,7 +594,8 @@ function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): stri
|| code === CharCode.Period
|| code === CharCode.Underline
|| code === CharCode.Tilde
|| (allowSlash && code === CharCode.Slash)
|| (isPath && code === CharCode.Slash) // path => allow slash AS-IS
|| (isQueryString && (code === CharCode.Equals || code === CharCode.Ampersand || code === CharCode.Semicolon)) // query string => allow &=;
) {
// check if we are delaying native encode
if (nativeEncodePos !== -1) {
Expand All @@ -518,6 +607,20 @@ function encodeURIComponentFast(uriComponent: string, allowSlash: boolean): stri
res += uriComponent.charAt(pos);
}

} else if (code === CharCode.PercentSign && isHex(uriComponent, pos + 1) && isHex(uriComponent, pos + 2)) {
// at percentage encoded value

// check if we are delaying native encode
if (nativeEncodePos !== -1) {
res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
nativeEncodePos = -1;
}
// check if we write into a new string (by default we try to return the param)
if (res !== undefined) {
res += uriComponent.substr(pos, 3);
}
pos += 2;

} else {
// encoding needed, we need to allocate a new string
if (res === undefined) {
Expand Down Expand Up @@ -606,6 +709,7 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {

let res = '';
let { scheme, authority, path, query, fragment } = uri;

if (scheme) {
res += scheme;
res += ':';
Expand All @@ -622,22 +726,22 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {
authority = authority.substr(idx + 1);
idx = userinfo.indexOf(':');
if (idx === -1) {
res += encoder(userinfo, false);
res += encoder(userinfo, false, false);
} else {
// <user>:<pass>@<auth>
res += encoder(userinfo.substr(0, idx), false);
res += encoder(userinfo.substr(0, idx), false, false);
res += ':';
res += encoder(userinfo.substr(idx + 1), false);
res += encoder(userinfo.substr(idx + 1), false, false);
}
res += '@';
}
authority = authority.toLowerCase();
idx = authority.indexOf(':');
if (idx === -1) {
res += encoder(authority, false);
res += encoder(authority, false, false);
} else {
// <auth>:<port>
res += encoder(authority.substr(0, idx), false);
res += encoder(authority.substr(0, idx), false, false);
res += authority.substr(idx);
}
}
Expand All @@ -655,15 +759,15 @@ function _asFormatted(uri: URI, skipEncoding: boolean): string {
}
}
// encode the rest of the path
res += encoder(path, true);
res += encoder(path, true, false);
}
if (query) {
res += '?';
res += encoder(query, false);
res += encoder(query, false, _isQueryStringScheme(scheme));
}
if (fragment) {
res += '#';
res += !skipEncoding ? encodeURIComponentFast(fragment, false) : fragment;
res += !skipEncoding ? encodeURIComponentFast(fragment, false, false) : fragment;
}
return res;
}
71 changes: 61 additions & 10 deletions src/vs/base/test/common/uri.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ suite('URI', () => {
assert.equal(URI.from({ scheme: 'http', authority: '', path: 'my/path' }).toString(), 'http:/my/path');
assert.equal(URI.from({ scheme: 'http', authority: '', path: '/my/path' }).toString(), 'http:/my/path');
//http://a-test-site.com/#test=true
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: 'test=true' }).toString(), 'http://a-test-site.com/?test%3Dtrue');
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: 'test=true' }).toString(), 'http://a-test-site.com/?test=true');
assert.equal(URI.from({ scheme: 'http', authority: 'a-test-site.com', path: '/', query: '', fragment: 'test=true' }).toString(), 'http://a-test-site.com/#test%3Dtrue');
});

Expand Down Expand Up @@ -102,11 +102,11 @@ suite('URI', () => {

test('with, changes', () => {
assert.equal(URI.parse('before:some/file/path').with({ scheme: 'after' }).toString(), 'after:some/file/path');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', path: '/api/files/test.me', query: 't=1234' }).toString(), 'http:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'http:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'https', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'https:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTP', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTP:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTPS', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTPS:/api/files/test.me?t%3D1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', path: '/api/files/test.me', query: 't=1234' }).toString(), 'http:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'http', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'http:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'https', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'https:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTP', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTP:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'HTTPS', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'HTTPS:/api/files/test.me?t=1234');
assert.equal(URI.from({ scheme: 's' }).with({ scheme: 'boo', authority: '', path: '/api/files/test.me', query: 't=1234', fragment: '' }).toString(), 'boo:/api/files/test.me?t%3D1234');
});

Expand Down Expand Up @@ -262,11 +262,11 @@ suite('URI', () => {

value = URI.file('c:\\test with %25\\path');
assert.equal(value.path, '/c:/test with %25/path');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%2525/path');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%25/path');

value = URI.file('c:\\test with %25\\c#code');
assert.equal(value.path, '/c:/test with %25/c#code');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%2525/c%23code');
assert.equal(value.toString(), 'file:///c%3A/test%20with%20%25/c%23code');

value = URI.file('\\\\shares');
assert.equal(value.scheme, 'file');
Expand Down Expand Up @@ -376,7 +376,7 @@ suite('URI', () => {
let uri = URI.parse('https://go.microsoft.com/fwlink/?LinkId=518008');
assert.equal(uri.query, 'LinkId=518008');
assert.equal(uri.toString(true), 'https://go.microsoft.com/fwlink/?LinkId=518008');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId%3D518008');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId=518008');

let uri2 = URI.parse(uri.toString());
assert.equal(uri2.query, 'LinkId=518008');
Expand All @@ -385,7 +385,7 @@ suite('URI', () => {
uri = URI.parse('https://go.microsoft.com/fwlink/?LinkId=518008&foö&ké¥=üü');
assert.equal(uri.query, 'LinkId=518008&foö&ké¥=üü');
assert.equal(uri.toString(true), 'https://go.microsoft.com/fwlink/?LinkId=518008&foö&ké¥=üü');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId%3D518008%26fo%C3%B6%26k%C3%A9%C2%A5%3D%C3%BC%C3%BC');
assert.equal(uri.toString(), 'https://go.microsoft.com/fwlink/?LinkId=518008&fo%C3%B6&k%C3%A9%C2%A5=%C3%BC%C3%BC');

uri2 = URI.parse(uri.toString());
assert.equal(uri2.query, 'LinkId=518008&foö&ké¥=üü');
Expand Down Expand Up @@ -426,6 +426,57 @@ suite('URI', () => {
assert.equal(uri.toString(true), input);
});

test('Support URL specific encodings (query component) #25852', function () {
let input = 'http://example.com/over/there?name=ferret';
assert.equal(input, URI.parse(input).toString());

input = 'http://example.com/over/there?name=ferret&foo=bar';
assert.equal(input, URI.parse(input).toString());

input = 'attp://example.com/over/there?name=ferret';
assert.equal('attp://example.com/over/there?name%3Dferret', URI.parse(input).toString());
});

test('Uri#parse can break path-component #45515', function () {
let uri: URI;
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2f' });
assert.equal(uri.toString(), 's://a/o%2f');
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2fü' });
assert.equal(uri.toString(), 's://a/o%2f%C3%BC');
uri = URI.from({ scheme: 's', authority: 'a', path: '/o%2f%' });
assert.equal(uri.toString(), 's://a/o%2f%25');

uri = URI.file('/test with %25/c#code');
assert.equal(uri.path, '/test with %25/c#code');
assert.equal(uri.toString(), 'file:///test%20with%20%25/c%23code');

uri = URI.from({
scheme: 'http',
authority: 'a',
path: '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg'
});
assert.equal(uri.path, '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');
assert.equal(uri.toString(), 'http://a/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');

assert.equal(URI.parse(uri.toString()).path, '/o/products%2FzVNZkudXJyq8bPGTXUxx%2FBetterave-Sesame.jpg');
assert.equal(uri.toString(), URI.parse(uri.toString()).toString()); // identity

uri = URI.parse('s://a/p%2ft%c3%bc');
assert.equal(uri.path, '/p%2ftü');

uri = URI.parse('s://a/%c3%bcp%2f-REST');
assert.equal(uri.path, '/üp%2f-REST');

uri = URI.parse('s://a/%c3%bcp%2fd%c3%b6wn');
assert.equal(uri.path, '/üp%2fdöwn');

//https://github.com/microsoft/vscode/issues/25852
uri = URI.parse('http://www.test.com/path/service?authId=CN%3DQ10');
assert.equal(uri.query, 'authId=CN%3DQ10');
assert.equal(uri.toString(), 'http://www.test.com/path/service?authId=CN%3DQ10');
});


test('URI - (de)serialize', function () {

const values = [
Expand Down

0 comments on commit 8fc6542

Please sign in to comment.