forked from duckduckgo/zeroclickinfo-spice
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
211 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
package DDG::SpiceRole::NumberStyle; | ||
# ABSTRACT: An object representing a particular numerical notation. | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Moo; | ||
|
||
has [qw(id decimal thousands)] => ( | ||
is => 'ro', | ||
); | ||
|
||
has exponential => ( | ||
is => 'ro', | ||
default => sub { 'e' }, | ||
); | ||
|
||
has number_regex => ( | ||
is => 'lazy', | ||
); | ||
|
||
sub _build_number_regex { | ||
my $self = shift; | ||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential); | ||
|
||
return qr/-?[\d\Q$decimal\E\Q$thousands\E]+(?:\Q$exponential\E-?\d+)?/; | ||
} | ||
|
||
sub understands { | ||
my ($self, $number) = @_; | ||
my ($decimal, $thousands) = ($self->decimal, $self->thousands); | ||
|
||
# How do we know if a number is reasonable for this style? | ||
# This assumes the exponentials are not included to give better answers. | ||
return ( | ||
# The number must contain only things we understand: numerals and separators for this style. | ||
$number =~ /^-?(|\d|\Q$thousands\E|\Q$decimal\E)+$/ | ||
&& ( | ||
# The number is not required to contain thousands separators | ||
$number !~ /\Q$thousands\E/ | ||
|| ( | ||
# But if the number does contain thousands separators, they must delimit exactly 3 numerals. | ||
$number !~ /\Q$thousands\E\d{1,2}\b/ | ||
&& $number !~ /\Q$thousands\E\d{4,}/ | ||
# And cannot follow a leading zero | ||
&& $number !~ /^0\Q$thousands\E/ | ||
)) | ||
&& ( | ||
# The number is not required to include decimal separators | ||
$number !~ /\Q$decimal\E/ | ||
# But if one is included, it cannot be followed by another separator, whether decimal or thousands. | ||
|| $number !~ /\Q$decimal\E(?:.*)?(?:\Q$decimal\E|\Q$thousands\E)/ | ||
)) ? 1 : 0; | ||
} | ||
|
||
sub precision_of { | ||
my ($self, $number_text) = @_; | ||
my $decimal = $self->decimal; | ||
|
||
return ($number_text =~ /\Q$decimal\E(\d+)/) ? length($1) : 0; | ||
} | ||
|
||
sub for_computation { | ||
my ($self, $number_text) = @_; | ||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential); | ||
|
||
$number_text =~ s/\Q$thousands\E//g; # Remove thousands seps, since they are just visual. | ||
$number_text =~ s/\Q$decimal\E/./g; # Make sure decimal mark is something perl knows how to use. | ||
if ($number_text =~ s/^([\d$decimal$thousands]+)\Q$exponential\E(-?[\d$decimal$thousands]+)$/$1e$2/ig) { | ||
# Convert to perl style exponentials and then make into human-style floats. | ||
$number_text = sprintf('%f', $number_text); | ||
} | ||
|
||
return $number_text; | ||
} | ||
|
||
sub for_display { | ||
my ($self, $number_text) = @_; | ||
my ($decimal, $thousands, $exponential) = ($self->decimal, $self->thousands, $self->exponential); | ||
|
||
if ($number_text =~ /(.*)\Q$exponential\E([+-]?\d+)/i) { | ||
$number_text = $self->for_display($1) . ' * 10^' . $self->for_display(int $2); | ||
} else { | ||
$number_text = reverse $number_text; | ||
$number_text =~ s/\./$decimal/g; # Perl decimal mark to whatever we need. | ||
$number_text =~ s/(\d{3})(?=\d)(?!\d*\Q$decimal\E)/$1$thousands/g; | ||
$number_text = reverse $number_text; | ||
} | ||
|
||
return $number_text; | ||
} | ||
|
||
# The display version with HTML added: | ||
# - superscripted exponents | ||
sub with_html { | ||
my ($self, $number_text) = @_; | ||
|
||
return $self->_add_html_exponents($self->for_display($number_text)); | ||
} | ||
|
||
sub _add_html_exponents { | ||
|
||
my ($self, $string) = @_; | ||
|
||
return $string if ($string !~ /\^/ or $string =~ /^\^|\^$/); # Give back the same thing if we won't deal with it properly. | ||
|
||
my @chars = split //, $string; | ||
my $number_re = $self->number_regex; | ||
my ($start_tag, $end_tag) = ('<sup>', '</sup>'); | ||
my ($newly_up, $in_exp_number, $in_exp_parens, %power_parens); | ||
my ($parens_count, $number_up) = (0, 0); | ||
|
||
# because of associativity and power-to-power, we need to scan nearly the whole thing | ||
for my $index (1 .. $#chars - 1) { | ||
my $this_char = $chars[$index]; | ||
if ($this_char =~ $number_re or ($newly_up && $this_char eq '-')) { | ||
if ($newly_up) { | ||
$in_exp_number = 1; | ||
$newly_up = 0; | ||
} | ||
} elsif ($this_char eq '(') { | ||
$parens_count += 1; | ||
$in_exp_number = 0; | ||
if ($newly_up) { | ||
$in_exp_parens += 1; | ||
$power_parens{$parens_count} = 1; | ||
$newly_up = 0; | ||
} | ||
} elsif ($this_char eq '^') { | ||
$chars[$index - 1] =~ s/$end_tag$//; # Added too soon! | ||
$number_up += 1; | ||
$newly_up = 1; | ||
$chars[$index] = $start_tag; # Replace ^ with the tag. | ||
} elsif ($in_exp_number) { | ||
$in_exp_number = 0; | ||
$number_up -= 1; | ||
$chars[$index] = $end_tag . $chars[$index]; | ||
} elsif ($number_up && !$in_exp_parens) { | ||
# Must have ended another term or more | ||
$chars[$index] = ($end_tag x ($number_up - 1)) . $chars[$index]; | ||
$number_up = 0; | ||
} elsif ($this_char eq ')') { | ||
# We just closed a set of parens, see if it closes one of our things | ||
if ($in_exp_parens && $power_parens{$parens_count}) { | ||
$chars[$index] .= $end_tag; | ||
delete $power_parens{$parens_count}; | ||
$in_exp_parens -= 1; | ||
} | ||
$parens_count -= 1; | ||
} | ||
} | ||
$chars[-1] .= $end_tag x $number_up if ($number_up); | ||
|
||
return join('', @chars); | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package DDG::SpiceRole::NumberStyler; | ||
# ABSTRACT: A role to allow Goodies to recognize and work with numbers in different notations. | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Moo::Role; | ||
use DDG::SpiceRole::NumberStyle; | ||
|
||
use List::Util qw( all first ); | ||
|
||
# If it could fit more than one the first in order gets preference. | ||
my @known_styles = ( | ||
DDG::SpiceRole::NumberStyle->new({ | ||
id => 'perl', | ||
decimal => '.', | ||
thousands => ',', | ||
} | ||
), | ||
DDG::SpiceRole::NumberStyle->new({ | ||
id => 'euro', | ||
decimal => ',', | ||
thousands => '.', | ||
} | ||
), | ||
); | ||
|
||
sub number_style_regex { | ||
my $return_regex = join '|', map { $_->number_regex } @known_styles; | ||
return qr/$return_regex/; | ||
} | ||
|
||
# Takes an array of numbers and returns which style to use for parse and display | ||
# If there are conflicting answers among the array, will return undef. | ||
sub number_style_for { | ||
my @numbers = @_; | ||
|
||
my $style; # By default, assume we don't understand the numbers. | ||
|
||
STYLE: | ||
foreach my $test_style (@known_styles) { | ||
my $exponential = lc $test_style->exponential; # Allow for arbitrary casing. | ||
if (all { $test_style->understands($_) } map { split /$exponential/, lc $_ } @numbers) { | ||
# All of our numbers fit this style. Since we have them in preference order | ||
# we can pick it and move on. | ||
$style = $test_style; | ||
last STYLE; | ||
} | ||
} | ||
|
||
return $style; | ||
} | ||
|
||
1; |