From cdae6fcf45c9cfbddcdad2ddc9c8de2914ad3527 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 14 Feb 2019 12:30:12 +0000 Subject: [PATCH 1/4] Allow publishing from read-only directories by reducing the severity of failure to create MD5 cache files from a hard error to a warning. Reduced log level of creating missing MD5 cache files from warning to debug. --- lib/WTSI/NPG/iRODS/Publisher.pm | 37 ++++++---- t/data/publisher/publish_file/d.txt | 1 + t/lib/WTSI/NPG/iRODS/PublisherTest.pm | 98 +++++++++++++++++++-------- 3 files changed, 93 insertions(+), 43 deletions(-) create mode 100644 t/data/publisher/publish_file/d.txt diff --git a/lib/WTSI/NPG/iRODS/Publisher.pm b/lib/WTSI/NPG/iRODS/Publisher.pm index 6b8022a8..59403fe8 100644 --- a/lib/WTSI/NPG/iRODS/Publisher.pm +++ b/lib/WTSI/NPG/iRODS/Publisher.pm @@ -5,6 +5,7 @@ use Carp; use Data::Dump qw[pp]; use DateTime; use English qw[-no_match_vars]; +use File::Basename; use File::Spec::Functions qw[catdir catfile splitdir splitpath]; use File::stat; use List::AllUtils qw[any]; @@ -552,21 +553,27 @@ sub _read_md5_cache_file { sub _make_md5_cache_file { my ($self, $cache_file, $md5) = @_; - $self->warn("Adding missing MD5 cache file '$cache_file'"); - - try { - my $out; - open $out, '>', $cache_file or - croak "Failed to open '$cache_file' for writing: $ERRNO"; - print $out "$md5\n" or - croak "Failed to write MD5 to '$cache_file': $ERRNO"; - close $out or - $self->warn("Failed to close '$cache_file' cleanly: $ERRNO"); - } catch { - # Failure to create a cache should not be a hard error. Here we - # just forward the message from croak above. - $self->warn($_); - }; + $self->debug("Adding missing MD5 cache file '$cache_file'"); + + my ($filename, $cache_dir) = fileparse($cache_file); + if (-w $cache_dir) { + try { + my $out; + open $out, '>', $cache_file or + croak "Failed to open '$cache_file' for writing: $ERRNO"; + print $out "$md5\n" or + croak "Failed to write MD5 to '$cache_file': $ERRNO"; + close $out or + $self->warn("Failed to close '$cache_file' cleanly: $ERRNO"); + } catch { + # Failure to create a cache should not be a hard error. Here we + # just forward the message from croak above. + $self->error($_); + }; + } + else { + $self->warn("Cache directory '$cache_dir' is not writable"); + } return $cache_file; } diff --git a/t/data/publisher/publish_file/d.txt b/t/data/publisher/publish_file/d.txt new file mode 100644 index 00000000..d0995e82 --- /dev/null +++ b/t/data/publisher/publish_file/d.txt @@ -0,0 +1 @@ +test file d diff --git a/t/lib/WTSI/NPG/iRODS/PublisherTest.pm b/t/lib/WTSI/NPG/iRODS/PublisherTest.pm index b9448b26..d364ae73 100644 --- a/t/lib/WTSI/NPG/iRODS/PublisherTest.pm +++ b/t/lib/WTSI/NPG/iRODS/PublisherTest.pm @@ -6,12 +6,14 @@ use warnings; use Carp; use Data::Dump qw[pp]; use English qw[-no_match_vars]; +use File::Basename; use File::Copy::Recursive qw[dircopy]; use File::Spec::Functions; use File::Temp; use Log::Log4perl; use Test::Exception; use Test::More; +use Try::Tiny; use URI; use base qw[WTSI::NPG::iRODS::Test]; @@ -36,7 +38,8 @@ sub setup_test : Test(setup) { $cwc = $irods->working_collection; # Prepare a copy of the test data because the tests will modify it - $tmp_data_path = File::Temp->newdir; + $tmp_data_path = File::Temp->newdir(TEMPLATE => 'publishertest.XXXXXX', + CLEANUP => 1); dircopy($data_path, $tmp_data_path) or croak "Failed to copy test data from $data_path to $tmp_data_path"; @@ -94,38 +97,42 @@ sub publish : Test(8) { } 'publish, cram no MD5 fails'; } -sub publish_file : Test(41) { +sub publish_file : Test(43) { my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, strict_baton_version => 0); # publish_file with new full path, no metadata, no timestamp - pf_new_full_path_no_meta_no_stamp($irods, $data_path, $irods_tmp_coll); + pf_new_full_path_no_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with new full path, some metadata, no timestamp - pf_new_full_path_meta_no_stamp($irods, $data_path, $irods_tmp_coll); + pf_new_full_path_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with new full path, no metadata, with timestamp - pf_new_full_path_no_meta_stamp($irods, $data_path, $irods_tmp_coll); + pf_new_full_path_no_meta_stamp($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with existing full path, no metadata, no timestamp, # matching MD5 - pf_exist_full_path_no_meta_no_stamp_match($irods, $data_path, + pf_exist_full_path_no_meta_no_stamp_match($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with existing full path, some metadata, no timestamp, # matching MD5 - pf_exist_full_path_meta_no_stamp_match($irods, $data_path, + pf_exist_full_path_meta_no_stamp_match($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with existing full path, no metadata, no timestamp, # non-matching MD5 - pf_exist_full_path_no_meta_no_stamp_no_match($irods, $data_path, + pf_exist_full_path_no_meta_no_stamp_no_match($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with existing full path, some metadata, no timestamp, # non-matching MD5 - pf_exist_full_path_meta_no_stamp_no_match($irods, $data_path, + pf_exist_full_path_meta_no_stamp_no_match($irods, $tmp_data_path, $irods_tmp_coll); # publish file where the cached md5 file is stale and must be # regenerated - pf_stale_md5_cache($irods, $data_path, $irods_tmp_coll); + pf_stale_md5_cache($irods, $tmp_data_path, $irods_tmp_coll); + + # publish filewhere the MD5 file is absent, yet where the source + # directory is read-only + pf_md5_cache_ro($irods, $tmp_data_path, $irods_tmp_coll); } sub publish_directory : Test(11) { @@ -133,10 +140,10 @@ sub publish_directory : Test(11) { strict_baton_version => 0); # publish_directory with new full path, no metadata, no timestamp - pd_new_full_path_no_meta_no_stamp($irods, $data_path, $irods_tmp_coll); + pd_new_full_path_no_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll); # publish_file with new full path, some metadata, no timestamp - pd_new_full_path_meta_no_stamp($irods, $data_path, $irods_tmp_coll); + pd_new_full_path_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll); } sub pf_new_full_path_no_meta_no_stamp { @@ -146,7 +153,7 @@ sub pf_new_full_path_no_meta_no_stamp { # publish_file with new full path, no metadata, no timestamp my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'; - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$coll_path/pf_new_full_path_no_meta_no_stamp.txt"; is($publisher->publish_file($local_path_a, $remote_path)->str(), $remote_path, @@ -172,7 +179,7 @@ sub pf_new_full_path_meta_no_stamp { my $publisher = WTSI::NPG::iRODS::Publisher->new(irods => $irods); # publish_file with new full path, some metadata, no timestamp - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$coll_path/pf_new_full_path_meta_no_stamp.txt"; my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890'); my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID, @@ -218,7 +225,7 @@ sub pf_new_full_path_no_meta_stamp { # publish_file with new full path, no metadata, no timestamp my $timestamp = DateTime->now; - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$coll_path/pf_new_full_path_no_meta_stamp.txt"; is($publisher->publish_file($local_path_a, @@ -249,7 +256,7 @@ sub pf_exist_full_path_no_meta_no_stamp_match { # publish_file with existing full path, no metadata, no timestamp, # matching MD5 - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$coll_path/pf_exist_full_path_no_meta_no_stamp_match.txt"; $publisher->publish_file($local_path_a, $remote_path) or fail; @@ -272,7 +279,7 @@ sub pf_exist_full_path_meta_no_stamp_match { # publish_file with existing full path, some metadata, no timestamp, # matching MD5 - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$coll_path/pf_exist_full_path_meta_no_stamp_match.txt"; my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890'); my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID, @@ -320,7 +327,7 @@ sub pf_exist_full_path_no_meta_no_stamp_no_match { # publish_file with existing full path, no metadata, no timestamp, # non-matching MD5 my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'; - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$irods_tmp_coll/pf_exist_full_path_no_meta_no_stamp_no_match"; $publisher->publish_file($local_path_a, $remote_path) or fail; @@ -345,7 +352,7 @@ sub pf_exist_full_path_meta_no_stamp_no_match { # publish_file with existing full path, some metadata, no timestamp, # non-matching MD5 my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'; - my $local_path_a = "$tmp_data_path/publish_file/a.txt"; + my $local_path_a = "$data_path/publish_file/a.txt"; my $remote_path = "$irods_tmp_coll/pf_exist_full_path_meta_no_stamp_no_match.txt"; my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890'); @@ -381,7 +388,7 @@ sub pd_new_full_path_no_meta_no_stamp { # publish_directory with new full path, no metadata, no timestamp my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'; - my $local_path = "$tmp_data_path/publish_directory"; + my $local_path = "$data_path/publish_directory"; my $remote_path = "$coll_path/pd_new_full_path_no_meta_no_stamp"; my $sub_coll = "$remote_path/publish_directory"; @@ -407,7 +414,7 @@ sub pd_new_full_path_meta_no_stamp { # publish_directory with new full path, no metadata, no timestamp my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}'; - my $local_path = "$tmp_data_path/publish_directory"; + my $local_path = "$data_path/publish_directory"; my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890'); my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID, 'abcdefg-01234567890-wxyz'); @@ -447,7 +454,7 @@ sub pf_stale_md5_cache { (irods => $irods, checksum_cache_time_delta => $cache_timeout); - my $local_path_c = "$tmp_data_path/publish_file/c.txt"; + my $local_path_c = "$data_path/publish_file/c.txt"; my $remote_path = "$coll_path/pf_stale_md5_cache.txt"; open my $md5_out, '>>', "$local_path_c.md5" @@ -455,12 +462,7 @@ sub pf_stale_md5_cache { print $md5_out "fake_md5_string\n"; close $md5_out or warn "Failed to close $local_path_c.md5"; - sleep $cache_timeout + 5; - - open my $data_out, '>>', $local_path_c - or die "Failed to open $local_path_c for writing"; - print $data_out "extra data\n"; - close $data_out or warn "Failed to close $local_path_c"; + _expire_cache($local_path_c, $cache_timeout); is($publisher->publish_file($local_path_c, $remote_path)->str(), $remote_path, @@ -471,4 +473,44 @@ sub pf_stale_md5_cache { 'Stale MD5 was regenerated') or diag explain $obj->metadata; } +sub pf_md5_cache_ro { + my ($irods, $data_path, $coll_path) = @_; + + my $cache_threshold = 1; # Create MD5 cache for files of 1 byte or + # more + my $publisher = WTSI::NPG::iRODS::Publisher->new + (irods => $irods, + checksum_cache_threshold => $cache_threshold); + + my $local_path_d = "$data_path/publish_file/d.txt"; + my $remote_path = "$coll_path/pf_md5_cache_ro.txt"; + + try { + chmod 0555, "$data_path/publish_file/"; + + is($publisher->publish_file($local_path_d, $remote_path)->str(), + $remote_path, + 'publish_file, ro MD5 cache dir'); + + my $obj = WTSI::NPG::iRODS::DataObject->new($irods, $remote_path); + is($obj->get_avu($FILE_MD5)->{value}, 'd429d9fcd9b12418aa725c32bd6fbc3f', + 'MD5 was generated') or diag explain $obj->metadata; + } catch { + die "$_\n"; + } finally { + chmod 0755, "$data_path/publish_file/"; + }; +} + +sub _expire_cache { + my ($local_path, $cache_timeout) = @_; + + sleep $cache_timeout + 5; + + open my $data_out, '>>', $local_path + or die "Failed to open $local_path for writing"; + print $data_out "extra data\n"; + close $data_out or warn "Failed to close $local_path"; +} + 1; From 18519557580fca923e22dc87d5461e6b3bf52562 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 14 Feb 2019 13:37:03 +0000 Subject: [PATCH 2/4] Check that we can enter the cache directory. --- lib/WTSI/NPG/iRODS/Publisher.pm | 37 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/lib/WTSI/NPG/iRODS/Publisher.pm b/lib/WTSI/NPG/iRODS/Publisher.pm index 59403fe8..5db5b3ba 100644 --- a/lib/WTSI/NPG/iRODS/Publisher.pm +++ b/lib/WTSI/NPG/iRODS/Publisher.pm @@ -556,25 +556,30 @@ sub _make_md5_cache_file { $self->debug("Adding missing MD5 cache file '$cache_file'"); my ($filename, $cache_dir) = fileparse($cache_file); - if (-w $cache_dir) { - try { - my $out; - open $out, '>', $cache_file or - croak "Failed to open '$cache_file' for writing: $ERRNO"; - print $out "$md5\n" or - croak "Failed to write MD5 to '$cache_file': $ERRNO"; - close $out or - $self->warn("Failed to close '$cache_file' cleanly: $ERRNO"); - } catch { - # Failure to create a cache should not be a hard error. Here we - # just forward the message from croak above. - $self->error($_); - }; - } - else { + + if (not -w $cache_dir) { $self->warn("Cache directory '$cache_dir' is not writable"); + return $cache_file; + } + if (not -x $cache_dir) { + $self->warn("Cache directory '$cache_dir' is not executable"); + return $cache_file; } + try { + my $out; + open $out, '>', $cache_file or + croak "Failed to open '$cache_file' for writing: $ERRNO"; + print $out "$md5\n" or + croak "Failed to write MD5 to '$cache_file': $ERRNO"; + close $out or + $self->warn("Failed to close '$cache_file' cleanly: $ERRNO"); + } catch { + # Failure to create a cache should not be a hard error. Here we + # just forward the message from croak above. + $self->error($_); + }; + return $cache_file; } From 0b202951430a46e074e7ee6d49ecb66545478c83 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 14 Feb 2019 13:41:21 +0000 Subject: [PATCH 3/4] Ensure test fails if the source directory cannot be made read-only. --- t/lib/WTSI/NPG/iRODS/PublisherTest.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/lib/WTSI/NPG/iRODS/PublisherTest.pm b/t/lib/WTSI/NPG/iRODS/PublisherTest.pm index d364ae73..843fed88 100644 --- a/t/lib/WTSI/NPG/iRODS/PublisherTest.pm +++ b/t/lib/WTSI/NPG/iRODS/PublisherTest.pm @@ -488,6 +488,9 @@ sub pf_md5_cache_ro { try { chmod 0555, "$data_path/publish_file/"; + -w "$data_path/publish_file/" and + fail "Failed to make $data_path/publish_file/ non-writable"; + is($publisher->publish_file($local_path_d, $remote_path)->str(), $remote_path, 'publish_file, ro MD5 cache dir'); From f91b331e05f7c9f2e06c85768f0ea42534340905 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 14 Feb 2019 14:39:49 +0000 Subject: [PATCH 4/4] Updated changelog. --- Changes | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/Changes b/Changes index b8637b63..9bbcfdd3 100644 --- a/Changes +++ b/Changes @@ -1,22 +1,26 @@ +Release 3.4.0 + - Allow publishing from read-only directories + - Reduced the log level of creating missing MD5 cache files from + warning to debug Release 3.3.0 - - add composition to metadata - - added COMPONENT to standard metadata - - add bcfstats as a standard file type - - add the public group members to sequencing studies on ONT platforms + - Add composition to metadata + - Added COMPONENT to standard metadata + - Add bcfstats as a standard file type + - Add the public group members to sequencing studies on ONT platforms Release 3.2.0 - - added hts genotype suffixes to Annotator - - switched to disposable-irods 1.3 (uses WSI S3 for iRODS packages). - - added gbs_plex to Metadata.pm and hts genotype to non_compress_suffixes + - Added hts genotype suffixes to Annotator + - Switched to disposable-irods 1.3 (uses WSI S3 for iRODS packages). + - Added gbs_plex to Metadata.pm and hts genotype to non_compress_suffixes in Annotator Release 3.1.0 - - add "hops" to HTS ancillary suffixes list + - Added "hops" to HTS ancillary suffixes list Release 3.0.2 - - support for single-server mode - - add "quant" and "tab" to HTS ancillary suffixes list + - Support for single-server mode + - Added "quant" and "tab" to HTS ancillary suffixes list Release 3.0.1 - Support baton versions >=1.0.0 and <=1.1.0 @@ -34,7 +38,7 @@ Release 3.0.0 Net::AMQP::RabbitMQ is not installed Release 2.8.2 - - make internal iRODS.pm method calls private + - Make internal iRODS.pm method calls private - RabbitMQ: add UUID to message header; change routing key format use API for baton calls @@ -45,15 +49,15 @@ Release 2.8.1 little benefit in practice. Release 2.8.0 - - (un)staging new data objects + - (Un)staging new data objects error results in deletion rather than tagging for inspection post-failure "staging=1" AVU ignored rather than error - - new PacBio legacy metadata - - switch to baton-do as the single baton client - - support for RabbitMQ messaging to report method calls - - stop using imv, ichksum and md5sum executables - - add fasta type - - use baton version 1.0.0 + - New PacBio legacy metadata + - Switch to baton-do as the single baton client + - Support for RabbitMQ messaging to report method calls + - Stop using imv, ichksum and md5sum executables + - Add fasta type + - Use baton version 1.0.0 Release 2.7.1 @@ -62,23 +66,19 @@ Release 2.7.1 Release 2.7.0 - Bugfix; clean up any iRODS groups created by the test suite - - Added methods describing know filename suffixes - - Added metadata for BioNano - - Added metadata for 10X - - Added metadata for PacBio pbi files Release 2.6.1 - - use ml_warehouse - - added metadata for PacBio - - fix for staging iput where the target path is a collection + - Use ml_warehouse + - Added metadata for PacBio + - Fix for staging iput where the target path is a collection - check for cases where a user's gidNumber doesn't have a group - Travis CI: build package under perl v.5.22 - - improved handling of file suffix metadata + - Improved handling of file suffix metadata - Support baton versions >=0.16.4 and <=0.17.1 Release 2.6.0