Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/treepublisher handle empty set #287

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
Upcoming

- Bugfix: handling of --exclude CLI arguments by npg_publish_tree.pl.
- Make Treepublisher create an empty target directory, even when
there are no files to publish.

Release 2.22.0

- New script, npg_publish_tree.pl, to publish arbitrary directory trees of
Expand Down
61 changes: 48 additions & 13 deletions bin/npg_publish_tree.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
use FindBin qw[$Bin];
use lib (-d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib");

use Carp;
use Data::Dump qw[pp];
use File::Slurp;
use List::AllUtils qw[any];
use Log::Log4perl qw[:levels];
use Getopt::Long;
use Pod::Usage;
use Try::Tiny;

use WTSI::NPG::iRODS;
use WTSI::NPG::iRODS::Collection;
Expand Down Expand Up @@ -77,18 +79,49 @@
$log->level($ALL);

sub _make_filter_fn {
my @include_re = map { qr{$_}msx } @include;
my @exclude_re = map { qr{$_}msx } @exclude;

my @include_re;
my @exclude_re;
my $nerr = 0;

foreach my $re (@include) {
try {
push @include_re, qr{$re}msx;
} catch {
$log->error("in include regex '$re': $_");
$nerr++;
};
}

foreach my $re (@exclude) {
try {
push @exclude_re, qr{$re}msx;
} catch {
$log->error("in exclude regex '$re': $_");
$nerr++;
};
}

if ($nerr > 0) {
$log->error("$nerr errors in include / exclude filters");
exit 1;
}

return sub {
my ($path) = @_;

if (not @include_re) {
return -f $path;
(defined $path and $path ne q[]) or
croak 'Path argument is required in callback';

my $include = -f $path;
if (@include_re) {
$include = any {$path =~ $_} @include_re;
}
if ($include and @exclude_re) {
$include = not any {$path =~ $_} @exclude_re;
}

return (any {$path =~ $_} @include_re and
not any {$path =~ $_} @exclude_re);
return $include;
};
}

Expand Down Expand Up @@ -161,13 +194,14 @@ sub handler {
});

# Define any file filters required
if (@include) {
if (@include or @exclude) {
push @publish_args, filter => _make_filter_fn();
}

my ($num_files, $num_published, $num_errors) =
$publisher->publish_tree(@publish_args);


# Set any permissions requested
if (@groups) {
$coll->set_content_permissions($WTSI::NPG::iRODS::READ_PERMISSION, @groups);
Expand All @@ -176,7 +210,7 @@ sub handler {
# Add any metadata provided
if ($metadata_file) {
my $metadata = _read_metadata_file();
$log->debug('Adding to ', $coll->str, ' metadata: ', $metadata);
$log->debug('Adding to ', $coll->str, ' metadata: ', pp($metadata));
foreach my $avu (@{$metadata}) {
$coll->add_avu($avu->{attribute}, $avu->{value}, $avu->{units});
}
Expand All @@ -199,7 +233,7 @@ sub handler {

=head1 NAME

npg_publish_illumina_run
npg_publish_tree

=head1 SYNOPSIS

Expand All @@ -212,7 +246,7 @@ =head1 SYNOPSIS
--collection The destination collection in iRODS.
--debug Enable debug level logging. Optional, defaults to
false.
--exclude Specifiy one or more regexes to ignore paths under
--exclude Specify one or more regexes to ignore paths under
the target collection. Matching paths will be not be
published. If more than one regex is supplied, they
are all applied. Exclude regexes are applied after
Expand All @@ -223,7 +257,7 @@ =head1 SYNOPSIS
to none. May be used multiple times to add read
permissions for multiple groups.
--help Display help.
--include Specifiy one or more regexes to select paths under
--include Specify one or more regexes to select paths under
the target collection. Only matching paths will be
published, all others will be ignored. If more than
one regex is supplied, the matches for all of them
Expand All @@ -247,12 +281,13 @@ =head1 SYNOPSIS
or even check these files in iRODS. Optional. The
default restart file is "<archive dir>/published.json".
--source-directory
--source_directory The instrument runfolder path to load.
--source_directory The local path to load.
--verbose Print messages while processing. Optional.

=head1 DESCRIPTION


Publish an arbitrary directory hierarchy to iRODS, set permissions and
add metadata to the root collection.

=head1 AUTHOR

Expand Down
2 changes: 2 additions & 0 deletions lib/WTSI/NPG/HTS/TreePublisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ has 'require_checksum_cache' =>
$files = [grep { $fnlog->($_) } @{$files}];
}

$self->_ensure_coll_exists($self->dest_collection);

my $collated_by_dest = $self->_collate_by_dest_coll($files);

my ($num_files, $num_processed, $num_errors) = (0, 0, 0);
Expand Down