Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new basic00 implementation #1040

Merged
merged 26 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ lib/Zonemaster/Engine/Logger.pm
lib/Zonemaster/Engine/Logger/Entry.pm
lib/Zonemaster/Engine/Nameserver.pm
lib/Zonemaster/Engine/Nameserver/Cache.pm
lib/Zonemaster/Engine/Normalization.pm
lib/Zonemaster/Engine/Normalization/Error.pm
lib/Zonemaster/Engine/Net/IP.pm
lib/Zonemaster/Engine/NSArray.pm
lib/Zonemaster/Engine/Overview.pod
Expand Down Expand Up @@ -74,6 +76,7 @@ t/nameserver-axfr.data
t/nameserver-axfr.t
t/nameserver.data
t/nameserver.t
t/normalization.t
t/old-bugs.data
t/old-bugs.t
t/pod-coverage.t
Expand Down
215 changes: 215 additions & 0 deletions lib/Zonemaster/Engine/Normalization.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
package Zonemaster::Engine::Normalization;

use 5.014002;

use utf8;
use strict;
use warnings;

use parent 'Exporter';

use Carp;
use Encode;
use Readonly;
use Try::Tiny;
use Zonemaster::LDNS;
use Data::Dumper;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be safely removed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


use Zonemaster::Engine::Normalization::Error;


=head1 NAME

Zonemaster::Engine::Normalization - utility functions for names normalization


=head1 SYNOPSIS

use Zonemaster::Engine::Normalization;

my ($errors, $final_domain) = normalize_name($domain);

=head1 EXPORTED FUNCTIONS

=over
=cut


our @EXPORT = qw[ normalize_name ];
our @EXPORT_OK = qw[ normalize_name normalize_label ];

Readonly my $ASCII => qr/^[[:ascii:]]+$/;
Readonly my $VALID_ASCII => qr(^[A-Za-z0-9/_-]+$);

Readonly my $ASCII_FULL_STOP => "\x{002E}";
Readonly my $ASCII_FULL_STOP_RE => qr/\x{002E}/;
Readonly my %FULL_STOPS => (
FULLWIDTH_FULL_STOP => q/\x{FF0E}/,
IDEOGRAPHIC_FULL_STOP => q/\x{3002}/,
HALFWIDTH_IDEOGRAPHIC_FULL_STOP => q/\x{FF61}/
);
Readonly my $FULL_STOPS_RE => (sub {
my $re = '[' . (join '', values %FULL_STOPS) . ']';
return qr/$re/;
})->();
Readonly my %WHITE_SPACES => (
SPACE => q/\x{0020}/,
CHARACTER_TABULATION => q/\x{0009}/,
NO_BREAK_SPACE => q/\x{00A0}/,
EN_QUAD => q/\x{2000}/,
EM_QUAD => q/\x{2001}/,
EN_SPACE => q/\x{2002}/,
EM_SPACE => q/\x{2003}/,
THREE_PER_EM_SPACE => q/\x{2004}/,
FOUR_PER_EM_SPACE => q/\x{2005}/,
SIX_PER_EM_SPACE => q/\x{2006}/,
FIGURE_SPACE => q/\x{2007}/,
PUNCTUATION_SPACE => q/\x{2008}/,
THIN_SPACE => q/\x{2009}/,
HAIR_SPACE => q/\x{200A}/,
MEDIUM_MATHEMATICAL_SPACE => q/\x{205F}/,
IDEOGRAPHIC_SPACE => q/\x{3000}/,
OGHAM_SPACE_MARK => q/\x{1680}/,
);
Readonly my $WHITE_SPACES_RE => (sub {
my $re = '[' . (join '', values %WHITE_SPACES) . ']';
return qr/$re/;
})->();
Readonly my %AMBIGUOUS_CHARACTERS => (
"LATIN CAPITAL LETTER I WITH DOT ABOVE" => q/\x{0130}/,
);



=item normalize_label($label)

Normalize a single label from a domain name.

If the label is ASCII only, it is down cased, else it is converted according
to IDNA2008.

Downcasing of upper case non-ASCII characters, normalization to the Unicode
NFC format and conversion from U-label to A-label is performed by libidn2
using L<Zonemaster::LDNS/to_idn($name, ...)>.

Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $alabel: String)>.

In case of errors, the returned label will be undefined. If the method
succeeded an empty error array is returned.

=cut

sub normalize_label {
my ( $label ) = @_;
my @messages;

my $alabel = "";

if ( $label =~ $VALID_ASCII ) {
$alabel = lc $label;
} elsif ( $label =~ $ASCII ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_ASCII => {label => $label});

return \@messages, undef;
} elsif (Zonemaster::LDNS::has_idn) {
try {
$alabel = Zonemaster::LDNS::to_idn($label);
} catch {
push @messages, Zonemaster::Engine::Normalization::Error->new(INVALID_U_LABEL => {label => $label});

return \@messages, undef;
}
} else {
croak 'The domain name contains at least one non-ASCII character and this installation of Zonemaster has no support for IDNA.';
}

if ( length($alabel) > 63) {
push @messages, Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label});
return \@messages, undef;
}

return \@messages, $alabel;
}

=item normalize_name($name)

Normalize a domain name.


The normalization process is detailed in the L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>.

Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $name: String)>.

In case of errors, the returned name will be undefined. If the method succeeded
an empty error array is returned.

=cut

sub normalize_name {
my ( $uname ) = @_;
my @messages;

$uname =~ s/^${$WHITE_SPACES_RE}+//;
$uname =~ s/${WHITE_SPACES_RE}+$//;

if (length($uname) == 0) {
push @messages, Zonemaster::Engine::Normalization::Error->new(EMPTY_DOMAIN_NAME => {});
return \@messages, undef;
}

foreach my $char_name (keys %AMBIGUOUS_CHARACTERS) {
my $char = $AMBIGUOUS_CHARACTERS{$char_name};
if ($uname =~ m/${char}/) {
push @messages, Zonemaster::Engine::Normalization::Error->new(AMBIGUOUS_DOWNCASING => { unicode_name => $char_name });
}
}

if ( @messages ) {
return \@messages, undef;
}

$uname =~ s/${FULL_STOPS_RE}/${ASCII_FULL_STOP}/g;

if ( $uname eq $ASCII_FULL_STOP ) {
return \@messages, $uname;
}

if ($uname =~ m/^${ASCII_FULL_STOP_RE}/) {
push @messages, Zonemaster::Engine::Normalization::Error->new(INITIAL_DOT => {});
return \@messages, undef;
}

if ($uname =~ m/${ASCII_FULL_STOP_RE}{2,}/ ) {
push @messages, Zonemaster::Engine::Normalization::Error->new(REPEATED_DOTS => {});
return \@messages, undef;
}

$uname =~ s/${ASCII_FULL_STOP_RE}$//g;

my @labels = split $ASCII_FULL_STOP_RE, $uname;
my @label_results = map { [ normalize_label($_) ] } @labels;
my @label_errors = map { @{$_->[0]} } @label_results;

push @messages, @label_errors;

if ( @messages ) {
return \@messages, undef;
}

my @label_ok = map { $_->[1] } @label_results;

my $final_name = join '.', @label_ok;

if (length($final_name) > 253) {
push @messages, Zonemaster::Engine::Normalization::Error->new(DOMAIN_NAME_TOO_LONG => {});
return \@messages, undef;
}

return \@messages, $final_name;
}


=back
=cut

1;
147 changes: 147 additions & 0 deletions lib/Zonemaster/Engine/Normalization/Error.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package Zonemaster::Engine::Normalization::Error;

use strict;
use warnings;

use Carp;
use Readonly;
use Locale::TextDomain qw[Zonemaster-Engine];

use overload '""' => \&string;


=head1 NAME

Zonemaster::Engine::Normalization::Error - normalization error class


=head1 SYNOPSIS

use Zonemaster::Engine::Normalization::Error;

my $error = Zonemaster::Engine::Normalization::Error->new(LABEL_TOO_LONG => {label => $label});

=cut


Readonly my %ERRORS => (
AMBIGUOUS_DOWNCASING => {
message => N__ 'Ambiguous downcaseing of character "{unicode_name}" in the domain name. Use all lower case instead.',
arguments => [ 'unicode_name' ]
},
DOMAIN_NAME_TOO_LONG => {
message => N__ 'Domain name is too long (more than 253 characters with no final dot).',
},
EMPTY_DOMAIN_NAME => {
message => N__ 'Domain name is empty.'
},
INITIAL_DOT => {
message => N__ 'Domain name starts with dot.'
},
INVALID_ASCII => {
message => N__ 'Domain name has an ASCII label ("{label}") with a character not permitted.',
arguments => [ 'label' ]
},
INVALID_U_LABEL => {
message => N__ 'Domain name has a non-ASCII label ("{label}") which is not a valid U-label.',
arguments => [ 'label' ]
},
REPEATED_DOTS => {
message => N__ 'Domain name has repeated dots.'
},
LABEL_TOO_LONG => {
message => N__ 'Domain name has a label that is too long (more than 63 characters), "{label}".',
arguments => [ 'label' ]
},
);

=head1 ATTRIBUTES

=over

=item tag

The message tag asscociated to the error.

=item params

The error message parameters to use in the message string.

=back

=head1 METHODS

=over

=item new($tag, $params)

Creates and returns a new error object.
This function will croak if there is a missing parameter for the given tag.

=cut

sub new {
my ( $proto, $tag, $params ) = @_;
my $class = ref $proto || $proto;

if (!exists $ERRORS{$tag}) {
croak 'Unknown error tag.';
}

my $obj = { tag => $tag, params => {} };

if (exists $ERRORS{$tag}->{arguments}) {
foreach my $arg ( @{$ERRORS{$tag}->{arguments}} ) {
if (!exists $params->{$arg} ) {
croak "Missing arguments $arg.";
}
$obj->{params}->{$arg} = $params->{$arg};
}
}

return bless $obj, $class;
}


=item message

Returns the translated error message using the parameters given when creating the object.

=cut

sub message {
my ( $self ) = @_;
return __x $ERRORS{$self->{tag}}->{message}, %{$self->{params}};
}


=item tag

Returns the message tag asscociated to the error.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

asscociated -> associated

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


=cut

sub tag {
my ( $self ) = @_;

return $self->{tag};
}

=item string

Returns a string representation of the error object, equivalent to message.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

error object, equivalent to message. -> error object. Equivalent to message().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


=cut

sub string {
my ( $self ) = @_;

return $self->message;
}


=back

=cut

1;
Loading