Skip to content

Commit

Permalink
Try to preserve the original URI as much as possible
Browse files Browse the repository at this point in the history
Closes #2631
  • Loading branch information
mshibuya committed Apr 8, 2023
1 parent 436f51e commit 2f3afaf
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 29 deletions.
21 changes: 11 additions & 10 deletions lib/carrierwave/downloader/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
module CarrierWave
module Downloader
class Base
include CarrierWave::Utilities::Uri

attr_reader :uploader

def initialize(uploader)
Expand Down Expand Up @@ -61,17 +63,16 @@ def download(url, remote_headers = {})
#
# [url (String)] The URL where the remote file is stored
#
def process_uri(uri)
uri_parts = uri.split('?')
encoded_uri = Addressable::URI.parse(uri_parts.shift).normalize.to_s
query = uri_parts.any? ? "?#{uri_parts.join('?')}" : ''
begin
URI.parse("#{encoded_uri}#{query}")
rescue URI::InvalidURIError
URI.parse("#{encoded_uri}#{URI::DEFAULT_PARSER.escape(query)}")
end
def process_uri(source)
uri = Addressable::URI.parse(source)
uri.host = uri.normalized_host
# Perform decode first, as the path is likely to be already encoded
uri.path = encode_path(decode_uri(uri.path)) if uri.path =~ CarrierWave::Utilities::Uri::PATH_UNSAFE
uri.query = encode_non_ascii(uri.query) if uri.query
uri.fragment = encode_non_ascii(uri.fragment) if uri.fragment
URI.parse(uri.to_s)
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError
raise CarrierWave::DownloadError, "couldn't parse URL: #{uri}"
raise CarrierWave::DownloadError, "couldn't parse URL: #{source}"
end

##
Expand Down
22 changes: 12 additions & 10 deletions lib/carrierwave/utilities/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,22 @@ module CarrierWave
module Utilities
module Uri
# based on Ruby < 2.0's URI.encode
SAFE_STRING = URI::REGEXP::PATTERN::UNRESERVED + '\/'
UNSAFE = Regexp.new("[^#{SAFE_STRING}]", false)
PATH_SAFE = URI::REGEXP::PATTERN::UNRESERVED + '\/'
PATH_UNSAFE = Regexp.new("[^#{PATH_SAFE}]", false)
NON_ASCII = /[^[:ascii:]]/.freeze

private

def encode_path(path)
path.to_s.gsub(UNSAFE) do
us = $&
tmp = ''
us.each_byte do |uc|
tmp << sprintf('%%%02X', uc)
end
tmp
end
URI::DEFAULT_PARSER.escape(path, PATH_UNSAFE)
end

def encode_non_ascii(str)
URI::DEFAULT_PARSER.escape(str, NON_ASCII)
end

def decode_uri(str)
URI::DEFAULT_PARSER.unescape(str)
end
end # Uri
end # Utilities
Expand Down
50 changes: 41 additions & 9 deletions spec/downloader/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
let(:uploader) { uploader_class.new }
let(:file) { File.read(file_path("test.jpg")) }
let(:filename) { "test.jpg" }
let(:uri) { "http://www.example.com/#{CGI.escape(filename)}" }
let(:uri) { "http://www.example.com/#{URI::DEFAULT_PARSER.escape(filename)}" }

subject { CarrierWave::Downloader::Base.new(uploader) }

Expand Down Expand Up @@ -42,6 +42,29 @@
end
end

context "with internationalized domain name" do
let(:uri) { "https://ドメイン名例.jp/test.jpg" }
before do
stub_request(:get, 'https://xn--eckwd4c7cu47r2wf.jp/test.jpg').to_return(body: file)
allow(Resolv).to receive(:getaddresses).with('xn--eckwd4c7cu47r2wf.jp').and_return(['1.2.3.4'])
end

it "downloads a file" do
expect(subject.download(uri).file.read).to eq file
end
end

context "with non-ascii characters in the query and the fragment" do
let(:uri) { "http://example.com/test.jpg?q=А#あああ" }
before do
stub_request(:get, "http://example.com/test.jpg?q=%D0%90").to_return(body: file)
end

it "downloads a file" do
expect(subject.download(uri).file.read).to eq file
end
end

context 'with request headers' do
let(:authentication_headers) do
{
Expand Down Expand Up @@ -172,62 +195,71 @@
end

describe '#process_uri' do
it "returns an URI instance" do
uri = "http://example.com/"
expect(subject.process_uri(uri)).to be_an_instance_of(URI::HTTP)
end

it "converts a URL with internationalized domain name to Punycode URI" do
uri = "http://ドメイン名例.jp/#{CGI.escape(filename)}"
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq 'http://xn--eckwd4c7cu47r2wf.jp/test.jpg'
end

it "parses but not escape already escaped uris" do
uri = 'http://example.com/%5B.jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq(uri)
end

it "does not perform normalization on path when not necessary" do
uri = 'http://example.com/o%CC%88.png'
processed = subject.process_uri(uri)
expect(processed.to_s).to eq(uri)
end

it "parses but not escape uris with query-string-only characters not needing escaping" do
uri = 'http://example.com/?foo[]=bar'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq(uri)
end

it "escapes and parse unescaped uris" do
uri = 'http://example.com/ %[].jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/%20%25%5B%5D.jpg')
end

it "parses but not escape uris with query-string characters representing urls not needing escaping " do
uri = 'http://example.com/?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F1234_1280x720.jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq(uri)
end

it "escapes and parse brackets in uri paths without harming the query string" do
uri = 'http://example.com/].jpg?test[]'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/%5D.jpg?test[]')
end

it "escapes and parse unescaped characters in path" do
uri = 'http://example.com/あああ.jpg'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/%E3%81%82%E3%81%82%E3%81%82.jpg')
end

it "escapes and parse unescaped characters in query string" do
uri = 'http://example.com/?q=あああ'
processed = subject.process_uri(uri)
expect(processed.class).to eq(URI::HTTP)
expect(processed.to_s).to eq('http://example.com/?q=%E3%81%82%E3%81%82%E3%81%82')
end

it "escapes and parse unescaped characters in the fragment" do
uri = 'http://example.com/#あああ'
processed = subject.process_uri(uri)
expect(processed.to_s).to eq('http://example.com/#%E3%81%82%E3%81%82%E3%81%82')
end

it "throws an exception on bad uris" do
uri = '~http:'
expect { subject.process_uri(uri) }.to raise_error(CarrierWave::DownloadError)
Expand Down

0 comments on commit 2f3afaf

Please sign in to comment.