Skip to content

Commit

Permalink
Merge pull request #43 from schwern/no-encoding
Browse files Browse the repository at this point in the history
Choose a utf-8 encoding that will not break threads.
  • Loading branch information
HayoBaan authored Aug 9, 2016
2 parents 244c50b + 83bbd04 commit ab1b17a
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 10 deletions.
33 changes: 32 additions & 1 deletion lib/utf8/all.pm
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,21 @@ L<Cwd::utf8> for fully utf-8 aware Cwd functions.
use Import::Into;
use parent qw(Encode charnames utf8 open warnings feature);
use Symbol qw(qualify_to_ref);
use Config;

# Holds the pointers to the original version of redefined functions
state %_orig_functions;

sub import {
my $class = shift;

# Enable features/pragmas in calling package
my $target = caller;

my $utf8_encoding = $class->_choose_utf8_encoding;

'utf8'->import::into($target);
'open'->import::into($target, qw{:encoding(UTF-8) :std});
'open'->import::into($target => $utf8_encoding, ':std');
'charnames'->import::into($target, qw{:full :short});
'warnings'->import::into($target, qw{FATAL utf8});
'feature'->import::into($target, qw{unicode_strings}) if $^V >= v5.11.0;
Expand Down Expand Up @@ -208,6 +214,31 @@ sub _utf8_glob {
}
}

sub _choose_utf8_encoding {
# No threads? No problem.
return ':encoding(UTF-8)' if !$Config{usethreads} && !$Config{useithreads};

# 5.24.0 seems to have fixed the major utf8 issues.
return ':encoding(UTF-8)' if $^V ge 5.24.0;

# A safe default.
return ':utf8';
}

=head1 WHICH UTF-8 ENCODING?
I<TL;DR>. Perl's unicode has bugs. utf8::all will try to work around them.
As of this writing, Perl has several ways to do utf-8. It has to do
with whether "unassigned" code points are considered errors or
not. The details are in L<perlunicode: Noncharacter code points|http://perldoc.perl.org/perlunicode.html#Noncharacter-code-points>. Perl also has lots of Unicode bugs, particularly with threads and
strict utf-8 encoding (ie. C<:encoding(UTF-8)>).
utf8::all will prefer the strictest encoding available, but it may
choose a less strict utf-8 encoding if it detects your Perl is
vulnerable to Unicode bugs. This should have no effect on how valid
utf-8 is handled.
=head1 INTERACTION WITH AUTODIE
If you use L<autodie>, which is a great idea, you need to use at least version
Expand Down
5 changes: 2 additions & 3 deletions t/lexical-again.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# no utf8::all should disable its effects lexically
# Note: Changes to @ARGV, STDIN, STDOU, and STDERR are always global!

use Test::More tests => 17;
use Test::More;
use PerlIO;

my $expected_unicode = "\x{30c6}\x{30b9}\x{30c8}"; # Unicode characters
Expand Down Expand Up @@ -52,7 +52,6 @@ for my $fh (keys %handles) {
my @layers = PerlIO::get_layers($handles{$fh});
ok(grep(m/utf8/, @layers), "$fh: utf8 does appear in the perlio layers")
or diag explain { $fh => \@layers };
ok(grep(m/utf-8-strict/, @layers), "$fh: utf-8-strict does appear in the perlio layers")
or diag explain { $fh => \@layers };
}

done_testing;
6 changes: 3 additions & 3 deletions t/open.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
# Test opening an actual file
use utf8::all;
use PerlIO;
use Test::More tests => 4;
use Test::More;

ok open my $in, '<', 'corpus/testfile';
my @layers = PerlIO::get_layers($in);
ok(grep(m/utf8/, @layers), 'utf8 appears in the perlio layers')
or diag explain { $fh => \@layers };
ok(grep(m/utf-8-strict/, @layers), 'utf-8-strict appears in the perlio layers')
or diag explain { $fh => \@layers };

my $contents = do { local $/; <$in>};
is $contents, "\x{30c6}\x{30b9}\x{30c8}\n", 'unicode retrieved OK';

done_testing;
33 changes: 33 additions & 0 deletions t/threads.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!perl

# Test that utf8::all is choosing the right encoding to not
# tickle thread bugs.

use strict;
use warnings;

# This is loaded before threads. It will not be aware of tests run in
# a thread.
use Test::More 0.96;
use Config;

BEGIN {
plan skip_all => "Requires threads"
if !$Config{usethreads};
}

# Deliberately before loading threads so we don't cheat and check
# if threads are loaded, that would be brittle.
use utf8::all;

use threads;
use threads::shared;

note "basic utf8 + threads bug"; {
my $ok :shared = 0;
my $t = threads->create(sub { $ok = 1; });
$t->join();
ok $ok, "threads ok with utf8::all";
}

done_testing;
4 changes: 1 addition & 3 deletions t/utf8.t
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ use Test::More;
my @layers = PerlIO::get_layers($fh);
ok(grep(m/utf8/, @layers), 'utf8 appears in the perlio layers')
or diag explain { $fh => \@layers };
ok(grep(m/utf-8-strict/, @layers), 'utf-8-strict appears in the perlio layers')
or diag explain { $fh => \@layers };
}
}

Expand All @@ -33,7 +31,7 @@ use Test::More;
END { unlink "perlio_test2" }

my @layers = PerlIO::get_layers($test_fh);
SKIP: {
SKIP: {
# If we have the Perl Unicode flag set that adds the UTF-8 layer,
# we need to skip this test.
skip 'Perl Unicode flag set that always adds UTF-8 layer to output', 1 if (${^UNICODE} & 16);
Expand Down

0 comments on commit ab1b17a

Please sign in to comment.