Skip to content

Commit

Permalink
Merge pull request #1040 from blacksponge/implement-new-basic00
Browse files Browse the repository at this point in the history
add new basic00 implementation
  • Loading branch information
hannaeko authored Nov 28, 2022
2 parents afc7d27 + 9b248a6 commit e224292
Show file tree
Hide file tree
Showing 4 changed files with 489 additions and 0 deletions.
3 changes: 3 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ lib/Zonemaster/Engine/Logger.pm
lib/Zonemaster/Engine/Logger/Entry.pm
lib/Zonemaster/Engine/Nameserver.pm
lib/Zonemaster/Engine/Nameserver/Cache.pm
lib/Zonemaster/Engine/Normalization.pm
lib/Zonemaster/Engine/Normalization/Error.pm
lib/Zonemaster/Engine/Net/IP.pm
lib/Zonemaster/Engine/NSArray.pm
lib/Zonemaster/Engine/Overview.pod
Expand Down Expand Up @@ -75,6 +77,7 @@ t/nameserver-axfr.data
t/nameserver-axfr.t
t/nameserver.data
t/nameserver.t
t/normalization.t
t/old-bugs.data
t/old-bugs.t
t/pod-coverage.t
Expand Down
214 changes: 214 additions & 0 deletions lib/Zonemaster/Engine/Normalization.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
package Zonemaster::Engine::Normalization;

use 5.014002;

use utf8;
use strict;
use warnings;

use parent 'Exporter';

use Carp;
use Encode;
use Readonly;
use Try::Tiny;
use Zonemaster::LDNS;

use Zonemaster::Engine::Normalization::Error;


=head1 NAME
Zonemaster::Engine::Normalization - utility functions for names normalization
=head1 SYNOPSIS
use Zonemaster::Engine::Normalization;
my ($errors, $final_domain) = normalize_name($domain);
=head1 EXPORTED FUNCTIONS
=over
=cut


our @EXPORT = qw[ normalize_name ];
our @EXPORT_OK = qw[ normalize_name normalize_label ];

Readonly my $ASCII => qr/^[[:ascii:]]+$/;
Readonly my $VALID_ASCII => qr(^[A-Za-z0-9/_-]+$);

Readonly my $ASCII_FULL_STOP => "\x{002E}";
Readonly my $ASCII_FULL_STOP_RE => qr/\x{002E}/;
Readonly my %FULL_STOPS => (
FULLWIDTH_FULL_STOP => q/\x{FF0E}/,
IDEOGRAPHIC_FULL_STOP => q/\x{3002}/,
HALFWIDTH_IDEOGRAPHIC_FULL_STOP => q/\x{FF61}/
);
Readonly my $FULL_STOPS_RE => (sub {
my $re = '[' . (join '', values %FULL_STOPS) . ']';
return qr/$re/;
})->();
Readonly my %WHITE_SPACES => (
SPACE => q/\x{0020}/,
CHARACTER_TABULATION => q/\x{0009}/,
NO_BREAK_SPACE => q/\x{00A0}/,
EN_QUAD => q/\x{2000}/,
EM_QUAD => q/\x{2001}/,
EN_SPACE => q/\x{2002}/,
EM_SPACE => q/\x{2003}/,
THREE_PER_EM_SPACE => q/\x{2004}/,
FOUR_PER_EM_SPACE => q/\x{2005}/,
SIX_PER_EM_SPACE => q/\x{2006}/,
FIGURE_SPACE => q/\x{2007}/,
PUNCTUATION_SPACE => q/\x{2008}/,
THIN_SPACE => q/\x{2009}/,
HAIR_SPACE => q/\x{200A}/,
MEDIUM_MATHEMATICAL_SPACE => q/\x{205F}/,
IDEOGRAPHIC_SPACE => q/\x{3000}/,
OGHAM_SPACE_MARK => q/\x{1680}/,
);
Readonly my $WHITE_SPACES_RE => (sub {
my $re = '[' . (join '', values %WHITE_SPACES) . ']';
return qr/$re/;
})->();
Readonly my %AMBIGUOUS_CHARACTERS => (
"LATIN CAPITAL LETTER I WITH DOT ABOVE" => q/\x{0130}/,
);



=item normalize_label($label)
Normalize a single label from a domain name.
If the label is ASCII only, it is down cased, else it is converted according
to IDNA2008.
Downcasing of upper case non-ASCII characters, normalization to the Unicode
NFC format and conversion from U-label to A-label is performed by libidn2
using L<Zonemaster::LDNS/to_idn($name, ...)>.
Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $alabel: String)>.
In case of errors, the returned label will be undefined. If the method
succeeded an empty error array is returned.
=cut

sub normalize_label {
my ( $label ) = @_;
my @messages;

my $alabel = "";

if ( $label =~ $VALID_ASCII ) {
$alabel = lc $label;
} elsif ( $label =~ $ASCII ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_ASCII => {label => $label});

return \@messages, undef;
} elsif ( Zonemaster::LDNS::has_idn ) {
try {
$alabel = Zonemaster::LDNS::to_idn($label);
} catch {
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_U_LABEL => {label => $label});

return \@messages, undef;
}
} else {
croak 'The domain name contains at least one non-ASCII character and this installation of Zonemaster has no support for IDNA.';
}

if ( length($alabel) > 63 ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label});
return \@messages, undef;
}

return \@messages, $alabel;
}

=item normalize_name($name)
Normalize a domain name.
The normalization process is detailed in the L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>.
Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $name: String)>.
In case of errors, the returned name will be undefined. If the method succeeded
an empty error array is returned.
=cut

sub normalize_name {
my ( $uname ) = @_;
my @messages;

$uname =~ s/^${$WHITE_SPACES_RE}+//;
$uname =~ s/${WHITE_SPACES_RE}+$//;

if ( length($uname) == 0 ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(EMPTY_DOMAIN_NAME => {});
return \@messages, undef;
}

foreach my $char_name ( keys %AMBIGUOUS_CHARACTERS ) {
my $char = $AMBIGUOUS_CHARACTERS{$char_name};
if ( $uname =~ m/${char}/) {
push @messages, Zonemaster::Engine::Normalization::Error->new(AMBIGUOUS_DOWNCASING => { unicode_name => $char_name });
}
}

if ( @messages ) {
return \@messages, undef;
}

$uname =~ s/${FULL_STOPS_RE}/${ASCII_FULL_STOP}/g;

if ( $uname eq $ASCII_FULL_STOP ) {
return \@messages, $uname;
}

if ( $uname =~ m/^${ASCII_FULL_STOP_RE}/ ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(INITIAL_DOT => {});
return \@messages, undef;
}

if ( $uname =~ m/${ASCII_FULL_STOP_RE}{2,}/ ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(REPEATED_DOTS => {});
return \@messages, undef;
}

$uname =~ s/${ASCII_FULL_STOP_RE}$//g;

my @labels = split $ASCII_FULL_STOP_RE, $uname;
my @label_results = map { [ normalize_label($_) ] } @labels;
my @label_errors = map { @{$_->[0]} } @label_results;

push @messages, @label_errors;

if ( @messages ) {
return \@messages, undef;
}

my @label_ok = map { $_->[1] } @label_results;

my $final_name = join '.', @label_ok;

if ( length($final_name) > 253 ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(DOMAIN_NAME_TOO_LONG => {});
return \@messages, undef;
}

return \@messages, $final_name;
}


=back
=cut

1;
147 changes: 147 additions & 0 deletions lib/Zonemaster/Engine/Normalization/Error.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package Zonemaster::Engine::Normalization::Error;

use strict;
use warnings;

use Carp;
use Readonly;
use Locale::TextDomain qw[Zonemaster-Engine];

use overload '""' => \&string;


=head1 NAME
Zonemaster::Engine::Normalization::Error - normalization error class
=head1 SYNOPSIS
use Zonemaster::Engine::Normalization::Error;
my $error = Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label});
=cut


Readonly my %ERRORS => (
AMBIGUOUS_DOWNCASING => {
message => N__ 'Ambiguous downcaseing of character "{unicode_name}" in the domain name. Use all lower case instead.',
arguments => [ 'unicode_name' ]
},
DOMAIN_NAME_TOO_LONG => {
message => N__ 'Domain name is too long (more than 253 characters with no final dot).',
},
EMPTY_DOMAIN_NAME => {
message => N__ 'Domain name is empty.'
},
INITIAL_DOT => {
message => N__ 'Domain name starts with dot.'
},
INVALID_ASCII => {
message => N__ 'Domain name has an ASCII label ("{label}") with a character not permitted.',
arguments => [ 'label' ]
},
INVALID_U_LABEL => {
message => N__ 'Domain name has a non-ASCII label ("{label}") which is not a valid U-label.',
arguments => [ 'label' ]
},
REPEATED_DOTS => {
message => N__ 'Domain name has repeated dots.'
},
LABEL_TOO_LONG => {
message => N__ 'Domain name has a label that is too long (more than 63 characters), "{label}".',
arguments => [ 'label' ]
},
);

=head1 ATTRIBUTES
=over
=item tag
The message tag associated to the error.
=item params
The error message parameters to use in the message string.
=back
=head1 METHODS
=over
=item new($tag, $params)
Creates and returns a new error object.
This function will croak if there is a missing parameter for the given tag.
=cut

sub new {
my ( $proto, $tag, $params ) = @_;
my $class = ref $proto || $proto;

if ( !exists $ERRORS{$tag} ) {
croak 'Unknown error tag.';
}

my $obj = { tag => $tag, params => {} };

if ( exists $ERRORS{$tag}->{arguments} ) {
foreach my $arg ( @{$ERRORS{$tag}->{arguments}} ) {
if ( !exists $params->{$arg} ) {
croak "Missing arguments $arg.";
}
$obj->{params}->{$arg} = $params->{$arg};
}
}

return bless $obj, $class;
}


=item message
Returns the translated error message using the parameters given when creating the object.
=cut

sub message {
my ( $self ) = @_;
return __x $ERRORS{$self->{tag}}->{message}, %{$self->{params}};
}


=item tag
Returns the message tag asscociated to the error.
=cut

sub tag {
my ( $self ) = @_;

return $self->{tag};
}

=item string
Returns a string representation of the error object. Equivalent to message().
=cut

sub string {
my ( $self ) = @_;

return $self->message;
}


=back
=cut

1;
Loading

0 comments on commit e224292

Please sign in to comment.