-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add new basic00 implementation #1040
Changes from 23 commits
c307996
94ce900
ede669a
73e8c8e
cb40bd8
ff4a134
c2a2791
9a2f429
84adfdd
f7bf3c1
b00402d
a1331cf
784514f
a4208c6
91832c0
1dadd10
0179944
9f144f8
acecfa1
d8a8162
35f9c4b
0d54295
61f5d9b
a354e4c
f9095e1
9b248a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
package Zonemaster::Engine::Normalization; | ||
|
||
use 5.014002; | ||
|
||
use utf8; | ||
use strict; | ||
use warnings; | ||
|
||
use parent 'Exporter'; | ||
|
||
use Carp; | ||
use Encode; | ||
use Readonly; | ||
use Try::Tiny; | ||
use Zonemaster::LDNS; | ||
use Data::Dumper; | ||
|
||
use Zonemaster::Engine::Normalization::Error; | ||
|
||
|
||
=head1 NAME | ||
|
||
Zonemaster::Engine::Normalization - utility functions for names normalization | ||
|
||
|
||
=head1 SYNOPSIS | ||
|
||
use Zonemaster::Engine::Normalization; | ||
|
||
my ($errors, $final_domain) = normalize_name($domain); | ||
|
||
=head1 EXPORTED FUNCTIONS | ||
|
||
=over | ||
=cut | ||
|
||
|
||
our @EXPORT = qw[ normalize_name ]; | ||
our @EXPORT_OK = qw[ normalize_name normalize_label ]; | ||
|
||
Readonly my $ASCII => qr/^[[:ascii:]]+$/; | ||
Readonly my $VALID_ASCII => qr(^[A-Za-z0-9/_-]+$); | ||
|
||
Readonly my $ASCII_FULL_STOP => "\x{002E}"; | ||
Readonly my $ASCII_FULL_STOP_RE => qr/\x{002E}/; | ||
Readonly my %FULL_STOPS => ( | ||
FULLWIDTH_FULL_STOP => q/\x{FF0E}/, | ||
IDEOGRAPHIC_FULL_STOP => q/\x{3002}/, | ||
HALFWIDTH_IDEOGRAPHIC_FULL_STOP => q/\x{FF61}/ | ||
); | ||
Readonly my $FULL_STOPS_RE => (sub { | ||
my $re = '[' . (join '', values %FULL_STOPS) . ']'; | ||
return qr/$re/; | ||
})->(); | ||
Readonly my %WHITE_SPACES => ( | ||
SPACE => q/\x{0020}/, | ||
CHARACTER_TABULATION => q/\x{0009}/, | ||
NO_BREAK_SPACE => q/\x{00A0}/, | ||
EN_QUAD => q/\x{2000}/, | ||
EM_QUAD => q/\x{2001}/, | ||
EN_SPACE => q/\x{2002}/, | ||
EM_SPACE => q/\x{2003}/, | ||
THREE_PER_EM_SPACE => q/\x{2004}/, | ||
FOUR_PER_EM_SPACE => q/\x{2005}/, | ||
SIX_PER_EM_SPACE => q/\x{2006}/, | ||
FIGURE_SPACE => q/\x{2007}/, | ||
PUNCTUATION_SPACE => q/\x{2008}/, | ||
THIN_SPACE => q/\x{2009}/, | ||
HAIR_SPACE => q/\x{200A}/, | ||
MEDIUM_MATHEMATICAL_SPACE => q/\x{205F}/, | ||
IDEOGRAPHIC_SPACE => q/\x{3000}/, | ||
OGHAM_SPACE_MARK => q/\x{1680}/, | ||
); | ||
Readonly my $WHITE_SPACES_RE => (sub { | ||
my $re = '[' . (join '', values %WHITE_SPACES) . ']'; | ||
return qr/$re/; | ||
})->(); | ||
Readonly my %AMBIGUOUS_CHARACTERS => ( | ||
"LATIN CAPITAL LETTER I WITH DOT ABOVE" => q/\x{0130}/, | ||
); | ||
|
||
|
||
|
||
=item normalize_label($label) | ||
|
||
Normalize a single label from a domain name. | ||
|
||
If the label is ASCII only, it is down cased, else it is converted according | ||
to IDNA2008. | ||
|
||
Downcasing of upper case non-ASCII characters, normalization to the Unicode | ||
NFC format and conversion from U-label to A-label is performed by libidn2 | ||
using L<Zonemaster::LDNS/to_idn($name, ...)>. | ||
|
||
Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $alabel: String)>. | ||
|
||
In case of errors, the returned label will be undefined. If the method | ||
succeeded an empty error array is returned. | ||
|
||
=cut | ||
|
||
sub normalize_label { | ||
my ( $label ) = @_; | ||
my @messages; | ||
|
||
my $alabel = ""; | ||
|
||
if ( $label =~ $VALID_ASCII ) { | ||
$alabel = lc $label; | ||
} elsif ( $label =~ $ASCII ) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_ASCII => {label => $label}); | ||
|
||
return \@messages, undef; | ||
} elsif (Zonemaster::LDNS::has_idn) { | ||
try { | ||
$alabel = Zonemaster::LDNS::to_idn($label); | ||
} catch { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_U_LABEL => {label => $label}); | ||
|
||
return \@messages, undef; | ||
} | ||
} else { | ||
croak 'The domain name contains at least one non-ASCII character and this installation of Zonemaster has no support for IDNA.'; | ||
} | ||
|
||
if ( length($alabel) > 63) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label}); | ||
return \@messages, undef; | ||
} | ||
|
||
return \@messages, $alabel; | ||
} | ||
|
||
=item normalize_name($name) | ||
|
||
Normalize a domain name. | ||
|
||
|
||
The normalization process is detailed in the L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>. | ||
|
||
Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $name: String)>. | ||
|
||
In case of errors, the returned name will be undefined. If the method succeeded | ||
an empty error array is returned. | ||
|
||
=cut | ||
|
||
sub normalize_name { | ||
my ( $uname ) = @_; | ||
my @messages; | ||
|
||
$uname =~ s/^${$WHITE_SPACES_RE}+//; | ||
$uname =~ s/${WHITE_SPACES_RE}+$//; | ||
|
||
if (length($uname) == 0) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(EMPTY_DOMAIN_NAME => {}); | ||
return \@messages, undef; | ||
} | ||
|
||
foreach my $char_name (keys %AMBIGUOUS_CHARACTERS) { | ||
my $char = $AMBIGUOUS_CHARACTERS{$char_name}; | ||
if ($uname =~ m/${char}/) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(AMBIGUOUS_DOWNCASING => { unicode_name => $char_name }); | ||
} | ||
} | ||
|
||
if ( @messages ) { | ||
return \@messages, undef; | ||
} | ||
|
||
$uname =~ s/${FULL_STOPS_RE}/${ASCII_FULL_STOP}/g; | ||
|
||
if ( $uname eq $ASCII_FULL_STOP ) { | ||
return \@messages, $uname; | ||
} | ||
|
||
if ($uname =~ m/^${ASCII_FULL_STOP_RE}/) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(INITIAL_DOT => {}); | ||
return \@messages, undef; | ||
} | ||
|
||
if ($uname =~ m/${ASCII_FULL_STOP_RE}{2,}/ ) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(REPEATED_DOTS => {}); | ||
return \@messages, undef; | ||
} | ||
|
||
$uname =~ s/${ASCII_FULL_STOP_RE}$//g; | ||
|
||
my @labels = split $ASCII_FULL_STOP_RE, $uname; | ||
my @label_results = map { [ normalize_label($_) ] } @labels; | ||
my @label_errors = map { @{$_->[0]} } @label_results; | ||
|
||
push @messages, @label_errors; | ||
|
||
if ( @messages ) { | ||
return \@messages, undef; | ||
} | ||
|
||
my @label_ok = map { $_->[1] } @label_results; | ||
|
||
my $final_name = join '.', @label_ok; | ||
|
||
if (length($final_name) > 253) { | ||
push @messages, Zonemaster::Engine::Normalization::Error->new(DOMAIN_NAME_TOO_LONG => {}); | ||
return \@messages, undef; | ||
} | ||
|
||
return \@messages, $final_name; | ||
} | ||
|
||
|
||
=back | ||
=cut | ||
|
||
1; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
package Zonemaster::Engine::Normalization::Error; | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Carp; | ||
use Readonly; | ||
use Locale::TextDomain qw[Zonemaster-Engine]; | ||
|
||
use overload '""' => \&string; | ||
|
||
|
||
=head1 NAME | ||
|
||
Zonemaster::Engine::Normalization::Error - normalization error class | ||
|
||
|
||
=head1 SYNOPSIS | ||
|
||
use Zonemaster::Engine::Normalization::Error; | ||
|
||
my $error = Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label}); | ||
|
||
=cut | ||
|
||
|
||
Readonly my %ERRORS => ( | ||
AMBIGUOUS_DOWNCASING => { | ||
message => N__ 'Ambiguous downcaseing of character "{unicode_name}" in the domain name. Use all lower case instead.', | ||
arguments => [ 'unicode_name' ] | ||
}, | ||
DOMAIN_NAME_TOO_LONG => { | ||
message => N__ 'Domain name is too long (more than 253 characters with no final dot).', | ||
}, | ||
EMPTY_DOMAIN_NAME => { | ||
message => N__ 'Domain name is empty.' | ||
}, | ||
INITIAL_DOT => { | ||
message => N__ 'Domain name starts with dot.' | ||
}, | ||
INVALID_ASCII => { | ||
message => N__ 'Domain name has an ASCII label ("{label}") with a character not permitted.', | ||
arguments => [ 'label' ] | ||
}, | ||
INVALID_U_LABEL => { | ||
message => N__ 'Domain name has a non-ASCII label ("{label}") which is not a valid U-label.', | ||
arguments => [ 'label' ] | ||
}, | ||
REPEATED_DOTS => { | ||
message => N__ 'Domain name has repeated dots.' | ||
}, | ||
LABEL_TOO_LONG => { | ||
message => N__ 'Domain name has a label that is too long (more than 63 characters), "{label}".', | ||
arguments => [ 'label' ] | ||
}, | ||
); | ||
|
||
=head1 ATTRIBUTES | ||
|
||
=over | ||
|
||
=item tag | ||
|
||
The message tag asscociated to the error. | ||
|
||
=item params | ||
|
||
The error message parameters to use in the message string. | ||
|
||
=back | ||
|
||
=head1 METHODS | ||
|
||
=over | ||
|
||
=item new($tag, $params) | ||
|
||
Creates and returns a new error object. | ||
This function will croak if there is a missing parameter for the given tag. | ||
|
||
=cut | ||
|
||
sub new { | ||
my ( $proto, $tag, $params ) = @_; | ||
my $class = ref $proto || $proto; | ||
|
||
if (!exists $ERRORS{$tag}) { | ||
croak 'Unknown error tag.'; | ||
} | ||
|
||
my $obj = { tag => $tag, params => {} }; | ||
|
||
if (exists $ERRORS{$tag}->{arguments}) { | ||
foreach my $arg ( @{$ERRORS{$tag}->{arguments}} ) { | ||
if (!exists $params->{$arg} ) { | ||
croak "Missing arguments $arg."; | ||
} | ||
$obj->{params}->{$arg} = $params->{$arg}; | ||
} | ||
} | ||
|
||
return bless $obj, $class; | ||
} | ||
|
||
|
||
=item message | ||
|
||
Returns the translated error message using the parameters given when creating the object. | ||
|
||
=cut | ||
|
||
sub message { | ||
my ( $self ) = @_; | ||
return __x $ERRORS{$self->{tag}}->{message}, %{$self->{params}}; | ||
} | ||
|
||
|
||
=item tag | ||
|
||
Returns the message tag asscociated to the error. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. asscociated -> associated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
=cut | ||
|
||
sub tag { | ||
my ( $self ) = @_; | ||
|
||
return $self->{tag}; | ||
} | ||
|
||
=item string | ||
|
||
Returns a string representation of the error object, equivalent to message. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. error object, equivalent to message. -> error object. Equivalent to message(). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
=cut | ||
|
||
sub string { | ||
my ( $self ) = @_; | ||
|
||
return $self->message; | ||
} | ||
|
||
|
||
=back | ||
|
||
=cut | ||
|
||
1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be safely removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed