From 631eb5f1fac8c8e32713c5a6cb8cac13b5f7f349 Mon Sep 17 00:00:00 2001 From: kastakhov <16296930+kastakhov@users.noreply.github.com> Date: Sat, 17 Aug 2024 22:26:17 +0000 Subject: [PATCH 1/4] fix userinfo in case of some special characters add some UT for this case --- lib/URI.pm | 2 +- lib/URI/_server.pm | 26 ++++++++++++++---------- t/http.t | 50 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 12 deletions(-) diff --git a/lib/URI.pm b/lib/URI.pm index 30c8e4e0..a12d0cf3 100644 --- a/lib/URI.pm +++ b/lib/URI.pm @@ -123,7 +123,7 @@ sub _fix_uric_escape_for_host_part { return; } - if ($_[0] =~ m{^((?:$URI::scheme_re:)?)//([^/?\#]+)(.*)$}os) { + if ($_[0] =~ m{^((?:$URI::scheme_re:)?)//((.*:.*@)?[^/?\#]+)(.*)$}os) { my $orig = $2; my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/; $user ||= ''; diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm index 15d972f7..784ae54e 100644 --- a/lib/URI/_server.pm +++ b/lib/URI/_server.pm @@ -5,19 +5,25 @@ use warnings; use parent 'URI::_generic'; -use URI::Escape qw(uri_unescape); +use URI::Escape qw(uri_unescape uri_escape); our $VERSION = '5.29'; sub _uric_escape { my($class, $str) = @_; - if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { - my($scheme, $host, $rest) = ($1, $2, $3); - my $ui = $host =~ s/(.*@)// ? $1 : ""; - my $port = $host =~ s/(:\d+)\z// ? $1 : ""; - if (_host_escape($host)) { - $str = "$scheme//$ui$host$port$rest"; - } + if ($str =~ m,^((?:$URI::scheme_re:)?)//(.*:.*@)?([^/?\#]*)(.*)$,os) { + my $scheme = $1; + my $ui = $2 || ''; + my $host = $3; + my $rest = $4; + my $port = $host =~ s/(:\d+)\z// ? $1 : ""; + if ($ui) { + # escape /?# symbols as they are used + # in subsequent regex for path parsing + $ui = uri_escape($ui, '/?#'); + } + _host_escape($host); + $str = "$scheme//$ui$host$port$rest"; } return $class->SUPER::_uric_escape($str); } @@ -26,8 +32,8 @@ sub _host_escape { return if URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric]/; return if !URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric4host]/; eval { - require URI::_idna; - $_[0] = URI::_idna::encode($_[0]); + require URI::_idna; + $_[0] = URI::_idna::encode($_[0]); }; return 0 if $@; return 1; diff --git a/t/http.t b/t/http.t index aef92737..eccd0fa1 100644 --- a/t/http.t +++ b/t/http.t @@ -1,7 +1,7 @@ use strict; use warnings; -use Test::More tests => 16; +use Test::More tests => 76; use URI (); @@ -48,3 +48,51 @@ $u = URI->new("http://%65%78%61%6d%70%6c%65%2e%63%6f%6d/%70%75%62/%61/%32%30%30% is($u->canonical, "http://example.com/pub/a/2001/08/27/bjornstad.html"); ok($u->has_recognized_scheme); + +my $username = 'u1!"#$%&\'()*+,-./;<=>?@[\]^_`{|}~'; +my $exp_username = 'u1!%22%23$%&\'()*+,-.%2F;%3C=%3E%3F@%5B%5C%5D%5E_%60%7B%7C%7D~'; +my $password = 'p1!"#$%&\'()*+,-./;<=>?@[\]^_`{|}~'; +my $exp_password = 'p1!%22%23$%&\'()*+,-.%2F;%3C=%3E%3F@%5B%5C%5D%5E_%60%7B%7C%7D~'; +my $path = 'path/to/page'; +my $query = 'a=b&c=d'; +my %host = ( + '[::1]' => { + host => '::1', + port => 80, + }, + '[::1]:8080' => { + host => '::1', + port => 8080, + }, + '127.0.0.1' => { + host => '127.0.0.1', + port => 80, + }, + '127.0.0.1:8080' => { + host => '127.0.0.1', + port => 8080, + }, + 'localhost' => { + host => 'localhost', + port => 80, + }, + 'localhost:8080' => { + host => 'localhost', + port => 8080, + }, +); + +foreach my $host (keys %host) { + my $uri = URI->new("http://${username}:${password}\@${host}/${path}?${query}"); + is($uri->scheme, 'http'); + is($uri->userinfo, "${exp_username}:${exp_password}"); + is($uri->host, $host{$host}->{host}); + is($uri->port, $host{$host}->{port}); + is($uri->path, "/${path}"); + is($uri->query, $query); + is($uri->authority, "${exp_username}:${exp_password}\@${host}"); + is($uri->as_string, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + is($uri->as_iri, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + is($uri->canonical, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); +} + From ab3ff484d4100bb986a79de5a31560753bf88bc9 Mon Sep 17 00:00:00 2001 From: kastakhov <16296930+kastakhov@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:43:32 +0000 Subject: [PATCH 2/4] rename $ui variable to $userinfo --- lib/URI/_server.pm | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm index 784ae54e..4f29e6c0 100644 --- a/lib/URI/_server.pm +++ b/lib/URI/_server.pm @@ -13,17 +13,17 @@ sub _uric_escape { my($class, $str) = @_; if ($str =~ m,^((?:$URI::scheme_re:)?)//(.*:.*@)?([^/?\#]*)(.*)$,os) { my $scheme = $1; - my $ui = $2 || ''; + my $userinfo = $2 || ''; my $host = $3; my $rest = $4; my $port = $host =~ s/(:\d+)\z// ? $1 : ""; - if ($ui) { + if ($userinfo) { # escape /?# symbols as they are used # in subsequent regex for path parsing - $ui = uri_escape($ui, '/?#'); + $userinfo = uri_escape($userinfo, '/?#'); } _host_escape($host); - $str = "$scheme//$ui$host$port$rest"; + $str = "$scheme//$userinfo$host$port$rest"; } return $class->SUPER::_uric_escape($str); } @@ -45,11 +45,11 @@ sub as_iri { if ($str =~ /\bxn--/) { if ($str =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) { my($scheme, $host, $rest) = ($1, $2, $3); - my $ui = $host =~ s/(.*@)// ? $1 : ""; + my $userinfo = $host =~ s/(.*@)// ? $1 : ""; my $port = $host =~ s/(:\d+)\z// ? $1 : ""; require URI::_idna; $host = URI::_idna::decode($host); - $str = "$scheme//$ui$host$port$rest"; + $str = "$scheme//$userinfo$host$port$rest"; } } return $str; @@ -64,10 +64,10 @@ sub userinfo my $new = $old; $new = "" unless defined $new; $new =~ s/.*@//; # remove old stuff - my $ui = shift; - if (defined $ui) { - $ui =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego; - $new = "$ui\@$new"; + my $userinfo = shift; + if (defined $userinfo) { + $userinfo =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego; + $new = "$userinfo\@$new"; } $self->authority($new); } @@ -82,7 +82,7 @@ sub host if (@_) { my $tmp = $old; $tmp = "" unless defined $tmp; - my $ui = ($tmp =~ /(.*@)/) ? $1 : ""; + my $userinfo = ($tmp =~ /(.*@)/) ? $1 : ""; my $port = ($tmp =~ /(:\d+)$/) ? $1 : ""; my $new = shift; $new = "" unless defined $new; @@ -95,7 +95,7 @@ sub host $new = "[$new]" if $new =~ /:/ && $new !~ /^\[/; # IPv6 address _host_escape($new); } - $self->authority("$ui$new$port"); + $self->authority("$userinfo$new$port"); } return undef unless defined $old; $old =~ s/.*@//; From 6113394bb7ae798fffec6a1942dafd8bd295fed3 Mon Sep 17 00:00:00 2001 From: kastakhov <16296930+kastakhov@users.noreply.github.com> Date: Tue, 20 Aug 2024 22:11:51 +0000 Subject: [PATCH 3/4] make regex in _uric_escape more specific --- lib/URI/_server.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm index 4f29e6c0..d61ff6c8 100644 --- a/lib/URI/_server.pm +++ b/lib/URI/_server.pm @@ -11,7 +11,7 @@ our $VERSION = '5.29'; sub _uric_escape { my($class, $str) = @_; - if ($str =~ m,^((?:$URI::scheme_re:)?)//(.*:.*@)?([^/?\#]*)(.*)$,os) { + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^:]+:.*@)?([^/?\#]*)(.*)$,os) { my $scheme = $1; my $userinfo = $2 || ''; my $host = $3; From 837315ca30a17673a0b2b603e6a090fecec55729 Mon Sep 17 00:00:00 2001 From: kastakhov <16296930+kastakhov@users.noreply.github.com> Date: Mon, 26 Aug 2024 21:59:48 +0000 Subject: [PATCH 4/4] revert changes in URI.pm make _uric_escape regex more specific for userinfo part update UT --- lib/URI.pm | 2 +- lib/URI/_server.pm | 2 +- t/http.t | 34 +++++++++++++++++++++------------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/URI.pm b/lib/URI.pm index a12d0cf3..30c8e4e0 100644 --- a/lib/URI.pm +++ b/lib/URI.pm @@ -123,7 +123,7 @@ sub _fix_uric_escape_for_host_part { return; } - if ($_[0] =~ m{^((?:$URI::scheme_re:)?)//((.*:.*@)?[^/?\#]+)(.*)$}os) { + if ($_[0] =~ m{^((?:$URI::scheme_re:)?)//([^/?\#]+)(.*)$}os) { my $orig = $2; my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/; $user ||= ''; diff --git a/lib/URI/_server.pm b/lib/URI/_server.pm index d61ff6c8..5c8899ae 100644 --- a/lib/URI/_server.pm +++ b/lib/URI/_server.pm @@ -11,7 +11,7 @@ our $VERSION = '5.29'; sub _uric_escape { my($class, $str) = @_; - if ($str =~ m,^((?:$URI::scheme_re:)?)//([^:]+:.*@)?([^/?\#]*)(.*)$,os) { + if ($str =~ m,^((?:$URI::scheme_re:)?)//([^:]+:[^@]*@)?([^/?\#]*)(.*)$,os) { my $scheme = $1; my $userinfo = $2 || ''; my $host = $3; diff --git a/t/http.t b/t/http.t index eccd0fa1..69791abc 100644 --- a/t/http.t +++ b/t/http.t @@ -1,7 +1,7 @@ use strict; use warnings; -use Test::More tests => 76; +use Test::More tests => 256; use URI (); @@ -53,7 +53,12 @@ my $username = 'u1!"#$%&\'()*+,-./;<=>?@[\]^_`{|}~'; my $exp_username = 'u1!%22%23$%&\'()*+,-.%2F;%3C=%3E%3F@%5B%5C%5D%5E_%60%7B%7C%7D~'; my $password = 'p1!"#$%&\'()*+,-./;<=>?@[\]^_`{|}~'; my $exp_password = 'p1!%22%23$%&\'()*+,-.%2F;%3C=%3E%3F@%5B%5C%5D%5E_%60%7B%7C%7D~'; -my $path = 'path/to/page'; +my @path = qw( + path/to/page + path@to/page + path:@to/page + path:to@page/with@at +); my $query = 'a=b&c=d'; my %host = ( '[::1]' => { @@ -83,16 +88,19 @@ my %host = ( ); foreach my $host (keys %host) { - my $uri = URI->new("http://${username}:${password}\@${host}/${path}?${query}"); - is($uri->scheme, 'http'); - is($uri->userinfo, "${exp_username}:${exp_password}"); - is($uri->host, $host{$host}->{host}); - is($uri->port, $host{$host}->{port}); - is($uri->path, "/${path}"); - is($uri->query, $query); - is($uri->authority, "${exp_username}:${exp_password}\@${host}"); - is($uri->as_string, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); - is($uri->as_iri, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); - is($uri->canonical, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + foreach my $path (@path) { + my $uri = URI->new("http://${username}:${password}\@${host}/${path}?${query}"); + is($uri->scheme, 'http'); + is($uri->userinfo, "${exp_username}:${exp_password}"); + is($uri->host, $host{$host}->{host}); + is($uri->port, $host{$host}->{port}); + is($uri->path, "/${path}"); + is($uri->query, $query); + is($uri->authority, "${exp_username}:${exp_password}\@${host}"); + is($uri->as_string, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + is($uri->as_iri, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + is($uri->canonical, "http://${exp_username}:${exp_password}\@${host}/${path}?${query}"); + } } +