From 8891d8945d837f0da16a3a5aa2dc9783e39b0acd Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Wed, 2 Aug 2023 19:32:29 +0200 Subject: [PATCH] Fix request URL normalisation for bare domain and 8-bit characters (#26285) --- app/lib/request.rb | 10 ++++-- spec/lib/request_spec.rb | 67 +++++++++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/app/lib/request.rb b/app/lib/request.rb index e3597b052f6ecc..adc9a48f3dd357 100644 --- a/app/lib/request.rb +++ b/app/lib/request.rb @@ -76,8 +76,8 @@ class Request HTTP::URI.new( scheme: uri.normalized_scheme, authority: uri.normalized_authority, - path: Addressable::URI.normalize_path(uri.path), - query: uri.query + path: Addressable::URI.normalize_path(encode_non_ascii(uri.path)).presence || '/', + query: encode_non_ascii(uri.query) ) end @@ -151,6 +151,12 @@ def valid_url?(url) %w(http https).include?(parsed_url.scheme) && parsed_url.host.present? end + NON_ASCII_PATTERN = /[^\x00-\x7F]+/ + + def encode_non_ascii(str) + str&.gsub(NON_ASCII_PATTERN) { |substr| CGI.escape(substr.encode(Encoding::UTF_8)) } + end + def http_client HTTP.use(:auto_inflate).use(normalize_uri: { normalizer: URI_NORMALIZER }).follow(max_hops: 3) end diff --git a/spec/lib/request_spec.rb b/spec/lib/request_spec.rb index 1e16f60fbd5d34..8ccfcacef24bf3 100644 --- a/spec/lib/request_spec.rb +++ b/spec/lib/request_spec.rb @@ -95,6 +95,33 @@ end end + context 'with bare domain URL' do + let(:url) { 'http://example.com' } + + before do + stub_request(:get, 'http://example.com') + end + + it 'normalizes path' do + subject.perform do |response| + expect(response.request.uri.path).to eq '/' + end + end + + it 'normalizes path used for request signing' do + subject.perform + + headers = subject.instance_variable_get(:@headers) + expect(headers[Request::REQUEST_TARGET]).to eq 'get /' + end + + it 'normalizes path used in request line' do + subject.perform do |response| + expect(response.request.headline).to eq 'GET / HTTP/1.1' + end + end + end + context 'with unnormalized URL' do let(:url) { 'HTTP://EXAMPLE.com:80/foo%41%3A?bar=%41%3A#baz' } @@ -114,18 +141,31 @@ end end - it 'does modify path' do + it 'does not modify path' do subject.perform do |response| expect(response.request.uri.path).to eq '/foo%41%3A' end end - it 'does modify query string' do + it 'does not modify query string' do subject.perform do |response| expect(response.request.uri.query).to eq 'bar=%41%3A' end end + it 'does not modify path used for request signing' do + subject.perform + + headers = subject.instance_variable_get(:@headers) + expect(headers[Request::REQUEST_TARGET]).to eq 'get /foo%41%3A' + end + + it 'does not modify path used in request line' do + subject.perform do |response| + expect(response.request.headline).to eq 'GET /foo%41%3A?bar=%41%3A HTTP/1.1' + end + end + it 'strips fragment' do subject.perform do |response| expect(response.request.uri.fragment).to be_nil @@ -134,22 +174,35 @@ end context 'with non-ASCII URL' do - let(:url) { 'http://éxample.com/föo?bär=1' } + let(:url) { 'http://éxample.com:81/föo?bär=1' } before do - stub_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1') + stub_request(:get, 'http://xn--xample-9ua.com:81/f%C3%B6o?b%C3%A4r=1') end it 'IDN-encodes host' do subject.perform do |response| - expect(response.request.uri.authority).to eq 'xn--xample-9ua.com' + expect(response.request.uri.authority).to eq 'xn--xample-9ua.com:81' end end - it 'percent-escapes path and query string' do + it 'IDN-encodes host in Host header' do + subject.perform do |response| + expect(response.request.headers['Host']).to eq 'xn--xample-9ua.com' + end + end + + it 'percent-escapes path used for request signing' do subject.perform - expect(a_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1')).to have_been_made + headers = subject.instance_variable_get(:@headers) + expect(headers[Request::REQUEST_TARGET]).to eq 'get /f%C3%B6o' + end + + it 'normalizes path used in request line' do + subject.perform do |response| + expect(response.request.headline).to eq 'GET /f%C3%B6o?b%C3%A4r=1 HTTP/1.1' + end end end