Skip to content

Commit

Permalink
Merge pull request #180 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
merge from devel to master to create release 3.4.0
  • Loading branch information
mgcam authored Feb 15, 2019
2 parents acd95d3 + f4215d9 commit e3f4fa0
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 58 deletions.
52 changes: 26 additions & 26 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
Release 3.4.0
- Allow publishing from read-only directories
- Reduced the log level of creating missing MD5 cache files from
warning to debug

Release 3.3.0
- add composition to metadata
- added COMPONENT to standard metadata
- add bcfstats as a standard file type
- add the public group members to sequencing studies on ONT platforms
- Add composition to metadata
- Added COMPONENT to standard metadata
- Add bcfstats as a standard file type
- Add the public group members to sequencing studies on ONT platforms

Release 3.2.0
- added hts genotype suffixes to Annotator
- switched to disposable-irods 1.3 (uses WSI S3 for iRODS packages).
- added gbs_plex to Metadata.pm and hts genotype to non_compress_suffixes
- Added hts genotype suffixes to Annotator
- Switched to disposable-irods 1.3 (uses WSI S3 for iRODS packages).
- Added gbs_plex to Metadata.pm and hts genotype to non_compress_suffixes
in Annotator

Release 3.1.0
- add "hops" to HTS ancillary suffixes list
- Added "hops" to HTS ancillary suffixes list

Release 3.0.2
- support for single-server mode
- add "quant" and "tab" to HTS ancillary suffixes list
- Support for single-server mode
- Added "quant" and "tab" to HTS ancillary suffixes list

Release 3.0.1
- Support baton versions >=1.0.0 and <=1.1.0
Expand All @@ -34,7 +38,7 @@ Release 3.0.0
Net::AMQP::RabbitMQ is not installed

Release 2.8.2
- make internal iRODS.pm method calls private
- Make internal iRODS.pm method calls private
- RabbitMQ: add UUID to message header; change routing key format
use API for baton calls

Expand All @@ -45,15 +49,15 @@ Release 2.8.1
little benefit in practice.

Release 2.8.0
- (un)staging new data objects
- (Un)staging new data objects
error results in deletion rather than tagging for inspection
post-failure "staging=1" AVU ignored rather than error
- new PacBio legacy metadata
- switch to baton-do as the single baton client
- support for RabbitMQ messaging to report method calls
- stop using imv, ichksum and md5sum executables
- add fasta type
- use baton version 1.0.0
- New PacBio legacy metadata
- Switch to baton-do as the single baton client
- Support for RabbitMQ messaging to report method calls
- Stop using imv, ichksum and md5sum executables
- Add fasta type
- Use baton version 1.0.0

Release 2.7.1

Expand All @@ -62,23 +66,19 @@ Release 2.7.1
Release 2.7.0

- Bugfix; clean up any iRODS groups created by the test suite

- Added methods describing know filename suffixes

- Added metadata for BioNano

- Added metadata for 10X

- Added metadata for PacBio pbi files

Release 2.6.1

- use ml_warehouse
- added metadata for PacBio
- fix for staging iput where the target path is a collection
- Use ml_warehouse
- Added metadata for PacBio
- Fix for staging iput where the target path is a collection
- check for cases where a user's gidNumber doesn't have a group
- Travis CI: build package under perl v.5.22
- improved handling of file suffix metadata
- Improved handling of file suffix metadata
- Support baton versions >=0.16.4 and <=0.17.1

Release 2.6.0
Expand Down
20 changes: 16 additions & 4 deletions lib/WTSI/NPG/iRODS/Publisher.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use Carp;
use Data::Dump qw[pp];
use DateTime;
use English qw[-no_match_vars];
use File::Basename;
use File::Spec::Functions qw[catdir catfile splitdir splitpath];
use File::stat;
use List::AllUtils qw[any];
Expand Down Expand Up @@ -552,20 +553,31 @@ sub _read_md5_cache_file {
sub _make_md5_cache_file {
my ($self, $cache_file, $md5) = @_;

$self->warn("Adding missing MD5 cache file '$cache_file'");
$self->debug("Adding missing MD5 cache file '$cache_file'");

my ($filename, $cache_dir) = fileparse($cache_file);

if (not -w $cache_dir) {
$self->warn("Cache directory '$cache_dir' is not writable");
return $cache_file;
}
if (not -x $cache_dir) {
$self->warn("Cache directory '$cache_dir' is not executable");
return $cache_file;
}

try {
my $out;
open $out, '>', $cache_file or
croak "Failed to open '$cache_file' for writing: $ERRNO";
print $out "$md5\n" or
croak "Failed to write MD5 to '$cache_file': $ERRNO";
print $out "$md5\n" or
croak "Failed to write MD5 to '$cache_file': $ERRNO";
close $out or
$self->warn("Failed to close '$cache_file' cleanly: $ERRNO");
} catch {
# Failure to create a cache should not be a hard error. Here we
# just forward the message from croak above.
$self->warn($_);
$self->error($_);
};

return $cache_file;
Expand Down
1 change: 1 addition & 0 deletions t/data/publisher/publish_file/d.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test file d
101 changes: 73 additions & 28 deletions t/lib/WTSI/NPG/iRODS/PublisherTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ use warnings;
use Carp;
use Data::Dump qw[pp];
use English qw[-no_match_vars];
use File::Basename;
use File::Copy::Recursive qw[dircopy];
use File::Spec::Functions;
use File::Temp;
use Log::Log4perl;
use Test::Exception;
use Test::More;
use Try::Tiny;
use URI;

use base qw[WTSI::NPG::iRODS::Test];
Expand All @@ -36,7 +38,8 @@ sub setup_test : Test(setup) {
$cwc = $irods->working_collection;

# Prepare a copy of the test data because the tests will modify it
$tmp_data_path = File::Temp->newdir;
$tmp_data_path = File::Temp->newdir(TEMPLATE => 'publishertest.XXXXXX',
CLEANUP => 1);
dircopy($data_path, $tmp_data_path) or
croak "Failed to copy test data from $data_path to $tmp_data_path";

Expand Down Expand Up @@ -94,49 +97,53 @@ sub publish : Test(8) {
} 'publish, cram no MD5 fails';
}

sub publish_file : Test(41) {
sub publish_file : Test(43) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);

# publish_file with new full path, no metadata, no timestamp
pf_new_full_path_no_meta_no_stamp($irods, $data_path, $irods_tmp_coll);
pf_new_full_path_no_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll);
# publish_file with new full path, some metadata, no timestamp
pf_new_full_path_meta_no_stamp($irods, $data_path, $irods_tmp_coll);
pf_new_full_path_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll);
# publish_file with new full path, no metadata, with timestamp
pf_new_full_path_no_meta_stamp($irods, $data_path, $irods_tmp_coll);
pf_new_full_path_no_meta_stamp($irods, $tmp_data_path, $irods_tmp_coll);

# publish_file with existing full path, no metadata, no timestamp,
# matching MD5
pf_exist_full_path_no_meta_no_stamp_match($irods, $data_path,
pf_exist_full_path_no_meta_no_stamp_match($irods, $tmp_data_path,
$irods_tmp_coll);
# publish_file with existing full path, some metadata, no timestamp,
# matching MD5
pf_exist_full_path_meta_no_stamp_match($irods, $data_path,
pf_exist_full_path_meta_no_stamp_match($irods, $tmp_data_path,
$irods_tmp_coll);

# publish_file with existing full path, no metadata, no timestamp,
# non-matching MD5
pf_exist_full_path_no_meta_no_stamp_no_match($irods, $data_path,
pf_exist_full_path_no_meta_no_stamp_no_match($irods, $tmp_data_path,
$irods_tmp_coll);
# publish_file with existing full path, some metadata, no timestamp,
# non-matching MD5
pf_exist_full_path_meta_no_stamp_no_match($irods, $data_path,
pf_exist_full_path_meta_no_stamp_no_match($irods, $tmp_data_path,
$irods_tmp_coll);

# publish file where the cached md5 file is stale and must be
# regenerated
pf_stale_md5_cache($irods, $data_path, $irods_tmp_coll);
pf_stale_md5_cache($irods, $tmp_data_path, $irods_tmp_coll);

# publish filewhere the MD5 file is absent, yet where the source
# directory is read-only
pf_md5_cache_ro($irods, $tmp_data_path, $irods_tmp_coll);
}

sub publish_directory : Test(11) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);

# publish_directory with new full path, no metadata, no timestamp
pd_new_full_path_no_meta_no_stamp($irods, $data_path, $irods_tmp_coll);
pd_new_full_path_no_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll);

# publish_file with new full path, some metadata, no timestamp
pd_new_full_path_meta_no_stamp($irods, $data_path, $irods_tmp_coll);
pd_new_full_path_meta_no_stamp($irods, $tmp_data_path, $irods_tmp_coll);
}

sub pf_new_full_path_no_meta_no_stamp {
Expand All @@ -146,7 +153,7 @@ sub pf_new_full_path_no_meta_no_stamp {

# publish_file with new full path, no metadata, no timestamp
my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}';
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path = "$coll_path/pf_new_full_path_no_meta_no_stamp.txt";
is($publisher->publish_file($local_path_a, $remote_path)->str(),
$remote_path,
Expand All @@ -172,7 +179,7 @@ sub pf_new_full_path_meta_no_stamp {
my $publisher = WTSI::NPG::iRODS::Publisher->new(irods => $irods);

# publish_file with new full path, some metadata, no timestamp
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path = "$coll_path/pf_new_full_path_meta_no_stamp.txt";
my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890');
my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID,
Expand Down Expand Up @@ -218,7 +225,7 @@ sub pf_new_full_path_no_meta_stamp {

# publish_file with new full path, no metadata, no timestamp
my $timestamp = DateTime->now;
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path = "$coll_path/pf_new_full_path_no_meta_stamp.txt";

is($publisher->publish_file($local_path_a,
Expand Down Expand Up @@ -249,7 +256,7 @@ sub pf_exist_full_path_no_meta_no_stamp_match {

# publish_file with existing full path, no metadata, no timestamp,
# matching MD5
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path = "$coll_path/pf_exist_full_path_no_meta_no_stamp_match.txt";
$publisher->publish_file($local_path_a, $remote_path) or fail;

Expand All @@ -272,7 +279,7 @@ sub pf_exist_full_path_meta_no_stamp_match {

# publish_file with existing full path, some metadata, no timestamp,
# matching MD5
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path = "$coll_path/pf_exist_full_path_meta_no_stamp_match.txt";
my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890');
my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID,
Expand Down Expand Up @@ -320,7 +327,7 @@ sub pf_exist_full_path_no_meta_no_stamp_no_match {
# publish_file with existing full path, no metadata, no timestamp,
# non-matching MD5
my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}';
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path =
"$irods_tmp_coll/pf_exist_full_path_no_meta_no_stamp_no_match";
$publisher->publish_file($local_path_a, $remote_path) or fail;
Expand All @@ -345,7 +352,7 @@ sub pf_exist_full_path_meta_no_stamp_no_match {
# publish_file with existing full path, some metadata, no timestamp,
# non-matching MD5
my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}';
my $local_path_a = "$tmp_data_path/publish_file/a.txt";
my $local_path_a = "$data_path/publish_file/a.txt";
my $remote_path =
"$irods_tmp_coll/pf_exist_full_path_meta_no_stamp_no_match.txt";
my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890');
Expand Down Expand Up @@ -381,7 +388,7 @@ sub pd_new_full_path_no_meta_no_stamp {

# publish_directory with new full path, no metadata, no timestamp
my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}';
my $local_path = "$tmp_data_path/publish_directory";
my $local_path = "$data_path/publish_directory";

my $remote_path = "$coll_path/pd_new_full_path_no_meta_no_stamp";
my $sub_coll = "$remote_path/publish_directory";
Expand All @@ -407,7 +414,7 @@ sub pd_new_full_path_meta_no_stamp {

# publish_directory with new full path, no metadata, no timestamp
my $timestamp_regex = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}';
my $local_path = "$tmp_data_path/publish_directory";
my $local_path = "$data_path/publish_directory";
my $extra_avu1 = $irods->make_avu($RT_TICKET, '1234567890');
my $extra_avu2 = $irods->make_avu($ANALYSIS_UUID,
'abcdefg-01234567890-wxyz');
Expand Down Expand Up @@ -447,20 +454,15 @@ sub pf_stale_md5_cache {
(irods => $irods,
checksum_cache_time_delta => $cache_timeout);

my $local_path_c = "$tmp_data_path/publish_file/c.txt";
my $local_path_c = "$data_path/publish_file/c.txt";
my $remote_path = "$coll_path/pf_stale_md5_cache.txt";

open my $md5_out, '>>', "$local_path_c.md5"
or die "Failed to open $local_path_c.md5 for writing";
print $md5_out "fake_md5_string\n";
close $md5_out or warn "Failed to close $local_path_c.md5";

sleep $cache_timeout + 5;

open my $data_out, '>>', $local_path_c
or die "Failed to open $local_path_c for writing";
print $data_out "extra data\n";
close $data_out or warn "Failed to close $local_path_c";
_expire_cache($local_path_c, $cache_timeout);

is($publisher->publish_file($local_path_c, $remote_path)->str(),
$remote_path,
Expand All @@ -471,4 +473,47 @@ sub pf_stale_md5_cache {
'Stale MD5 was regenerated') or diag explain $obj->metadata;
}

sub pf_md5_cache_ro {
my ($irods, $data_path, $coll_path) = @_;

my $cache_threshold = 1; # Create MD5 cache for files of 1 byte or
# more
my $publisher = WTSI::NPG::iRODS::Publisher->new
(irods => $irods,
checksum_cache_threshold => $cache_threshold);

my $local_path_d = "$data_path/publish_file/d.txt";
my $remote_path = "$coll_path/pf_md5_cache_ro.txt";

try {
chmod 0555, "$data_path/publish_file/";

-w "$data_path/publish_file/" and
fail "Failed to make $data_path/publish_file/ non-writable";

is($publisher->publish_file($local_path_d, $remote_path)->str(),
$remote_path,
'publish_file, ro MD5 cache dir');

my $obj = WTSI::NPG::iRODS::DataObject->new($irods, $remote_path);
is($obj->get_avu($FILE_MD5)->{value}, 'd429d9fcd9b12418aa725c32bd6fbc3f',
'MD5 was generated') or diag explain $obj->metadata;
} catch {
die "$_\n";
} finally {
chmod 0755, "$data_path/publish_file/";
};
}

sub _expire_cache {
my ($local_path, $cache_timeout) = @_;

sleep $cache_timeout + 5;

open my $data_out, '>>', $local_path
or die "Failed to open $local_path for writing";
print $data_out "extra data\n";
close $data_out or warn "Failed to close $local_path";
}

1;

0 comments on commit e3f4fa0

Please sign in to comment.