From dc99ba8d023239ab3c1a24334bf70c10d1c94e72 Mon Sep 17 00:00:00 2001 From: Keith James Date: Tue, 1 Sep 2020 19:00:48 +0100 Subject: [PATCH 1/2] Create an empty target directory with Treepublisher, even when there are no files to publish. --- Changes | 5 +++++ lib/WTSI/NPG/HTS/TreePublisher.pm | 2 ++ 2 files changed, 7 insertions(+) diff --git a/Changes b/Changes index e6de0e27..2b60d103 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,8 @@ +Upcoming + + - Make Treepublisher create an empty target directory, even when + there are no files to publish. + Release 2.22.0 - New script, npg_publish_tree.pl, to publish arbitrary directory trees of diff --git a/lib/WTSI/NPG/HTS/TreePublisher.pm b/lib/WTSI/NPG/HTS/TreePublisher.pm index f038e58e..145aa5c9 100644 --- a/lib/WTSI/NPG/HTS/TreePublisher.pm +++ b/lib/WTSI/NPG/HTS/TreePublisher.pm @@ -126,6 +126,8 @@ has 'require_checksum_cache' => $files = [grep { $fnlog->($_) } @{$files}]; } + $self->_ensure_coll_exists($self->dest_collection); + my $collated_by_dest = $self->_collate_by_dest_coll($files); my ($num_files, $num_processed, $num_errors) = (0, 0, 0); From af60e8cbd77cf54b4a8ba67c771e85d030aa9486 Mon Sep 17 00:00:00 2001 From: Keith James Date: Tue, 1 Sep 2020 19:02:35 +0100 Subject: [PATCH 2/2] Bugfix: handle include/exclude and empty file sets correctly Handle --exclude arguments, rather than ignore them. Handle errors in regex arguments nicely, reporting any errors and explanations to the user. Fix omissions and typos in POD. --- Changes | 1 + bin/npg_publish_tree.pl | 61 ++++++++++++++++++++++++++++++++--------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/Changes b/Changes index 2b60d103..d47bd02c 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,6 @@ Upcoming + - Bugfix: handling of --exclude CLI arguments by npg_publish_tree.pl. - Make Treepublisher create an empty target directory, even when there are no files to publish. diff --git a/bin/npg_publish_tree.pl b/bin/npg_publish_tree.pl index e49605f1..eb892791 100755 --- a/bin/npg_publish_tree.pl +++ b/bin/npg_publish_tree.pl @@ -5,12 +5,14 @@ use FindBin qw[$Bin]; use lib (-d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib"); +use Carp; use Data::Dump qw[pp]; use File::Slurp; use List::AllUtils qw[any]; use Log::Log4perl qw[:levels]; use Getopt::Long; use Pod::Usage; +use Try::Tiny; use WTSI::NPG::iRODS; use WTSI::NPG::iRODS::Collection; @@ -77,18 +79,49 @@ $log->level($ALL); sub _make_filter_fn { - my @include_re = map { qr{$_}msx } @include; - my @exclude_re = map { qr{$_}msx } @exclude; + + my @include_re; + my @exclude_re; + my $nerr = 0; + + foreach my $re (@include) { + try { + push @include_re, qr{$re}msx; + } catch { + $log->error("in include regex '$re': $_"); + $nerr++; + }; + } + + foreach my $re (@exclude) { + try { + push @exclude_re, qr{$re}msx; + } catch { + $log->error("in exclude regex '$re': $_"); + $nerr++; + }; + } + + if ($nerr > 0) { + $log->error("$nerr errors in include / exclude filters"); + exit 1; + } return sub { my ($path) = @_; - if (not @include_re) { - return -f $path; + (defined $path and $path ne q[]) or + croak 'Path argument is required in callback'; + + my $include = -f $path; + if (@include_re) { + $include = any {$path =~ $_} @include_re; + } + if ($include and @exclude_re) { + $include = not any {$path =~ $_} @exclude_re; } - return (any {$path =~ $_} @include_re and - not any {$path =~ $_} @exclude_re); + return $include; }; } @@ -161,13 +194,14 @@ sub handler { }); # Define any file filters required -if (@include) { +if (@include or @exclude) { push @publish_args, filter => _make_filter_fn(); } my ($num_files, $num_published, $num_errors) = $publisher->publish_tree(@publish_args); + # Set any permissions requested if (@groups) { $coll->set_content_permissions($WTSI::NPG::iRODS::READ_PERMISSION, @groups); @@ -176,7 +210,7 @@ sub handler { # Add any metadata provided if ($metadata_file) { my $metadata = _read_metadata_file(); - $log->debug('Adding to ', $coll->str, ' metadata: ', $metadata); + $log->debug('Adding to ', $coll->str, ' metadata: ', pp($metadata)); foreach my $avu (@{$metadata}) { $coll->add_avu($avu->{attribute}, $avu->{value}, $avu->{units}); } @@ -199,7 +233,7 @@ sub handler { =head1 NAME -npg_publish_illumina_run +npg_publish_tree =head1 SYNOPSIS @@ -212,7 +246,7 @@ =head1 SYNOPSIS --collection The destination collection in iRODS. --debug Enable debug level logging. Optional, defaults to false. - --exclude Specifiy one or more regexes to ignore paths under + --exclude Specify one or more regexes to ignore paths under the target collection. Matching paths will be not be published. If more than one regex is supplied, they are all applied. Exclude regexes are applied after @@ -223,7 +257,7 @@ =head1 SYNOPSIS to none. May be used multiple times to add read permissions for multiple groups. --help Display help. - --include Specifiy one or more regexes to select paths under + --include Specify one or more regexes to select paths under the target collection. Only matching paths will be published, all others will be ignored. If more than one regex is supplied, the matches for all of them @@ -247,12 +281,13 @@ =head1 SYNOPSIS or even check these files in iRODS. Optional. The default restart file is "/published.json". --source-directory - --source_directory The instrument runfolder path to load. + --source_directory The local path to load. --verbose Print messages while processing. Optional. =head1 DESCRIPTION - +Publish an arbitrary directory hierarchy to iRODS, set permissions and +add metadata to the root collection. =head1 AUTHOR