Skip to content

Commit

Permalink
Revert 3a1bbc7 in PR #1885 to postpone making Unicode::UTF8 mandatory.
Browse files Browse the repository at this point in the history
  • Loading branch information
ikedas committed Nov 3, 2024
1 parent 68aa5b7 commit d4dd2f7
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/make-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
--verbose --no-interactive
--with-develop
--with-feature=Data::Password --with-feature=ldap
--with-feature=smime
--with-feature=safe-unicode --with-feature=smime
--with-feature=soap --with-feature=sqlite
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- . ~/bashrc
- coverage-install
- coverage-setup
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- autoreconf -i
- ./configure
- cd src; make; cd ..
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:

install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .

before_script:
- coverage-setup
Expand Down
22 changes: 11 additions & 11 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,6 @@ requires 'Time::HiRes', '>= 1.29';
# Used to get Unix time from local time
requires 'Time::Local', '>= 1.23';

# Normalizes file names represented by Unicode.
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';

# Sanitizes inputs with Unicode text.
requires 'Unicode::UTF8', '>= 0.58';

# Used to create URI containing non URI-canonical characters.
# Note: '3.28' is the version included in URI-1.35.
requires 'URI::Escape', '>= 3.28';
Expand Down Expand Up @@ -198,6 +190,13 @@ recommends 'Net::DNS', '>= 0.65';
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
recommends 'Net::SMTP';

# Normalizes file names represented by Unicode
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
recommends 'Unicode::Normalize', '>= 1.03';

recommends 'Unicode::UTF8', '>= 0.60';

### Features
##

Expand Down Expand Up @@ -325,9 +324,10 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
};

feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
# Note: These became required (>=6.2.73b).
#requires 'Unicode::Normalize', '>= 1.03';
#requires 'Unicode::UTF8', '>= 0.58';
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';
requires 'Unicode::UTF8', '>= 0.60';
};

on 'test' => sub {
Expand Down
18 changes: 16 additions & 2 deletions src/cgi/wwsympa.fcgi.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ use IO::File qw();
use MIME::EncWords;
use MIME::Lite::HTML;
use POSIX qw();
use Unicode::UTF8;
use URI;
use Data::Dumper; # tentative

Expand All @@ -53,6 +52,10 @@ BEGIN {
and $Archive::Zip::SimpleZip::VERSION;
}

BEGIN {
eval 'use Unicode::UTF8 qw()';
}

use Sympa;
use Sympa::Archive;
use Conf;
Expand Down Expand Up @@ -1060,7 +1063,18 @@ while ($query = Sympa::WWW::FastCGI->new) {
while (my ($k, $v) = each %in) {
next if ref $v;
next if Encode::is_utf8($v);
unless (Unicode::UTF8::valid_utf8($v)) {

my $valid_utf8;
if ($Unicode::UTF8::VERSION) {
$valid_utf8 = Unicode::UTF8::valid_utf8($v);
} else {
eval {
my $u = $v;
Encode::decode('UTF-8', $u, Encode::FB_CROAK());
$valid_utf8 = 1;
};
}
unless ($valid_utf8) {
$log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
Expand Down
24 changes: 17 additions & 7 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
use Unicode::Normalize qw();
use Unicode::UTF8;
BEGIN { eval 'use Unicode::Normalize qw()'; }
BEGIN { eval 'use Unicode::UTF8 qw()'; }

use Sympa::Language;
use Sympa::Regexps;
Expand Down Expand Up @@ -141,11 +141,15 @@ sub canonic_text {
my $utext;
if (Encode::is_utf8($text)) {
$utext = $text;
} else {
} elsif ($Unicode::UTF8::VERSION) {
no warnings 'utf8';
$utext = Unicode::UTF8::decode_utf8($text);
} else {
$utext = Encode::decode_utf8($text);
}
if ($Unicode::Normalize::VERSION) {
$utext = Unicode::Normalize::normalize('NFC', $utext);
}
$utext = Unicode::Normalize::normalize('NFC', $utext);

# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
# and EIMS:
Expand Down Expand Up @@ -309,8 +313,13 @@ sub guessed_to_utf8 {
and length $text
and $text =~ /[^\x00-\x7F]/;

my $utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
my $utf8;
if ($Unicode::UTF8::VERSION) {
$utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
} else {
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
}
unless (defined $utf8) {
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
$utf8 =
Expand All @@ -323,7 +332,8 @@ sub guessed_to_utf8 {
}

# Apply NFC: e.g. for modified-NFD by Mac OS X.
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
if $Unicode::Normalize::VERSION;

return Encode::encode_utf8($utf8);
}
Expand Down
21 changes: 13 additions & 8 deletions t/Tools_Text.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,18 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
SKIP: {
skip 'Unicode::Normalize and Unicode::UTF8 required.'
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
}

done_testing();

0 comments on commit d4dd2f7

Please sign in to comment.