From 79f012a1c830e8b091490b6a452c23b220d34ce1 Mon Sep 17 00:00:00 2001 From: Keith James Date: Tue, 8 Nov 2016 17:49:55 +0000 Subject: [PATCH 01/15] Use LDAP to get group membership rather than LDAP --- bin/populate_wtsi_irods_groups.pl | 127 ++++++++++++++++-------------- 1 file changed, 69 insertions(+), 58 deletions(-) diff --git a/bin/populate_wtsi_irods_groups.pl b/bin/populate_wtsi_irods_groups.pl index 02c9f765..349240e8 100755 --- a/bin/populate_wtsi_irods_groups.pl +++ b/bin/populate_wtsi_irods_groups.pl @@ -8,8 +8,8 @@ use autodie; use Getopt::Long; use List::MoreUtils qw(uniq); -use Log::Log4perl; -use Log::Log4perl::Level; +use Log::Log4perl qw(:levels); +use Net::LDAP; use Readonly; use npg_warehouse::Schema; @@ -51,38 +51,27 @@ --debug Enable debug level logging. Optional, defaults to false. --dry-run Report proposed changes, do not perform them. Optional. --dry_run - --group-min Minumum number of "getent group" records to expect [200] - --group_min --help Display help. --logconf A log4perl configuration file. Optional. - --passwd-min Minumum number of "getent passwd" records to expect [5000] - --passwd_min --study Restrict updates to a study. May be used multiple times to select more than one study. Optional. --verbose Print messages while processing. Optional. WOE -Readonly::Scalar my $GETENT_GROUP_ALERT_THRESH => 200; -Readonly::Scalar my $GETENT_PASSWD_ALERT_THRESH => 5000; - my $debug; my $dry_run; -my $group_min_record_count = $GETENT_GROUP_ALERT_THRESH; my $log4perl_config; -my $passwd_min_record_count = $GETENT_PASSWD_ALERT_THRESH; my $verbose; my @studies; GetOptions('debug' => \$debug, 'dry-run|dry_run' => \$dry_run, - 'group-min|group_min=i' => \$group_min_record_count, 'help' => sub { print $what_on_earth; exit 0; }, 'logconf=s' => \$log4perl_config, - 'passwd-min|passwd_min=i' => \$passwd_min_record_count, 'study=s' => \@studies, 'verbose' => \$verbose) or die "\n$what_on_earth\n"; @@ -117,51 +106,27 @@ sub _uid_to_irods_uid { return grep {/^\Q$u\E#/smx} @public; } -Readonly::Scalar my $GROUP_SECONDARY_MEMBERS_FIELD_INDEX => 3; -my%ug2id; #cache of group to users - populate here -my%gid2group; -my $num_group_lines = 0; -open my$gfh, q(-|), q(getent group) or - $log->logcroak("Opening pipe to getent group failed: $ERRNO"); -while(<$gfh>){ - $num_group_lines++; - chomp; - $log->debug("getent group: ", $_); - my@F=split /:/smx; - my$users=$ug2id{$F[0]}||=[]; - push @{$users}, split /,/smx, $F[$GROUP_SECONDARY_MEMBERS_FIELD_INDEX]||q(); #fill with secondary groups for users - $gid2group{$F[2]}=$F[0]; -} -close $gfh or - $log->logcroak("Closing pipe to getent group failed: $ERRNO"); - -if ($group_min_record_count and $num_group_lines < $group_min_record_count) { - $log->logcroak("Output of 'getent group' appears truncated ", - "($num_group_lines lines)"); -} - -Readonly::Scalar my $PASSWD_PRIMARY_GID_FIELD_INDEX => 3; - -my $num_passwd_lines = 0; -open my$pfh, q(-|), q(getent passwd) or - $log->logcroak("Opening pipe to getent passwd failed: $ERRNO"); -while(<$pfh>){ - $num_passwd_lines++; - chomp; - $log->debug("getent passwd: ", $_); - my@F=split /:/smx; - push @{$ug2id{$gid2group{$F[$PASSWD_PRIMARY_GID_FIELD_INDEX]}||=q()}},$F[0]; #fill with primary group for users - empty strong used if no group found for gid +my $host = 'ldap.internal.sanger.ac.uk'; +my $ldap = Net::LDAP->new($host); + +$ldap->bind or $log->logcroak("LDAP failed to bind to '$host': ", $!); +# Get group, gid and member uids from LDAP +my ($group2uids, $gid2group) = find_group_ids($ldap); +# Get uids and their primary gid from LDAP +my $uid2gid = find_primary_gid($ldap); +$ldap->unbind or $log->logwarn("LDAP failed to unbind '$host': ", $!); + +# For each uid, merge primary gid with secondary gids +foreach my $uid (keys %{$uid2gid}) { + my $gid = $uid2gid->{$uid}; + my $primary_group = $gid2group->{$gid}; + push @{$group2uids->{$primary_group}}, $uid; } -close $pfh or - $log->logcroak("Closing pipe to getent passwd failed: $ERRNO"); -if ($passwd_min_record_count and $num_passwd_lines < $passwd_min_record_count) { - $log->logcroak("Output of 'getent passwd' appears truncated ", - "($num_passwd_lines lines)"); -} - -foreach my $users (values%ug2id){ - $users = [uniq @{$users}]; +foreach my $group (keys %{$group2uids}){ + my @uids = uniq @{$group2uids->{$group}}; + $group2uids->{$group} = \@uids; + $log->debug("Group '$group' membership ", join q(, ), @uids); } my $schema = npg_warehouse::Schema->connect; @@ -187,8 +152,8 @@ sub _uid_to_irods_uid { if (@dags) { # if strings from data access group don't match any group name try # treating as usernames - @members = map { _uid_to_irods_uid($_) } - map { @{ $ug2id{$_} || [$_] } } @dags; + @members = map { _uid_to_irods_uid($_) } + map { @{ $group2uids->{$_} || [$_] } } @dags; } elsif ($is_seq) { @members = @public; @@ -212,3 +177,49 @@ sub _uid_to_irods_uid { $log->info("When considering $group_count Sequencescape studies, ", "$altered_count iRODS groups were created or their ", 'membership altered (by ', $iga->_user, ')'); + +# Find both gid and member uids for each group +sub find_group_ids { + my ($ld) = @_; + + my $query_base = 'ou=group,dc=sanger,dc=ac,dc=uk'; + my $query_filter = '(cn=*)'; + my $search = $ld->search(base => $query_base, + filter => $query_filter); + if ($search->code) { + $log->logcroak("LDAP query base: '$query_base', filter: '$query_filter' ", + 'failed: ', $search->error); + } + + my %group2uids; + my %gid2group; + foreach my $entry ($search->entries) { + my $group = $entry->get_value('cn'); + my $gid = $entry->get_value('gidNumber'); + my @uids = $entry->get_value('memberUid'); + $group2uids{$group} = \@uids; + $gid2group{$gid} = $group; + } + + return (\%group2uids, \%gid2group); +} + +sub find_primary_gid { + my ($ld) = @_; + + my $query_base = 'ou=group,dc=sanger,dc=ac,dc=uk'; + my $query_filter = '&(sangerActiveAccount=TRUE)(sangerRealPerson=TRUE)'; + my $search = $ld->search(base => $query_base, + filter => $query_filter); + if ($search->code) { + $log->logcroak("LDAP query base: '$query_base', filter: '$query_filter' ", + 'failed: ', $search->error); + } + + my %uid2gid; + foreach my $entry ($search->entries) { + $uid2gid{$entry->get_value('uid')} = $entry->get_value('gidNumber'); + } + + return \%uid2gid; +} From 2bb57c589a5d34e2cab1cfabeaf24c4ee1c7f8cc Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 9 Nov 2016 12:40:11 +0000 Subject: [PATCH 02/15] Use ML warehouse to get study information. --- Build.PL | 5 +++-- bin/populate_wtsi_irods_groups.pl | 30 +++++++++++++----------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Build.PL b/Build.PL index 021bb0e6..fd6db991 100644 --- a/Build.PL +++ b/Build.PL @@ -43,10 +43,11 @@ my $build = WTSI::DNAP::Utilities::Build->new 'Set::Scalar' => '>= 1.29', 'Try::Tiny' => '>= 0.22', 'URI' => '>= 1.67', - 'WTSI::DNAP::Utilities' => '>= 0.5.2' + 'WTSI::DNAP::Utilities' => '>= 0.5.2', }, recommends => { - 'Net::LDAP' => '0' + 'Net::LDAP' => '0', + 'WTSI::DNAP::Warehouse::Schema' => '0', }); $build->create_build_script; diff --git a/bin/populate_wtsi_irods_groups.pl b/bin/populate_wtsi_irods_groups.pl index 349240e8..3a898aab 100755 --- a/bin/populate_wtsi_irods_groups.pl +++ b/bin/populate_wtsi_irods_groups.pl @@ -12,7 +12,7 @@ use Net::LDAP; use Readonly; -use npg_warehouse::Schema; +use WTSI::DNAP::Warehouse::Schema; use WTSI::NPG::iRODS::GroupAdmin; our $VERSION = ''; @@ -53,8 +53,8 @@ --dry_run --help Display help. --logconf A log4perl configuration file. Optional. - --study Restrict updates to a study. May be used multiple times - to select more than one study. Optional. + --study Restrict updates to a study identifier. May be used multiple + times to select more than one study. Optional. --verbose Print messages while processing. Optional. WOE @@ -63,7 +63,7 @@ my $dry_run; my $log4perl_config; my $verbose; -my @studies; +my @study_ids; GetOptions('debug' => \$debug, 'dry-run|dry_run' => \$dry_run, @@ -72,7 +72,7 @@ exit 0; }, 'logconf=s' => \$log4perl_config, - 'study=s' => \@studies, + 'study=i' => \@study_ids, 'verbose' => \$verbose) or die "\n$what_on_earth\n"; if ($log4perl_config) { @@ -129,21 +129,17 @@ sub _uid_to_irods_uid { $log->debug("Group '$group' membership ", join q(, ), @uids); } -my $schema = npg_warehouse::Schema->connect; -my $rs; -if (@studies) { - $rs = $schema->resultset(q(CurrentStudy))->search({internal_id => \@studies}); -} -else { - $rs = $schema->resultset(q(CurrentStudy)); -} +my $mlwh = WTSI::DNAP::Warehouse::Schema->connect; +my $query = @study_ids ? {id_study_lims => \@study_ids} : {}; +my $studies = $mlwh->resultset('Study')->search($query, + {order_by => 'id_study_lims'}); my ($group_count, $altered_count) = (0, 0); -while (my $study = $rs->next){ - my $study_id = $study->internal_id; +while (my $study = $studies->next){ + my $study_id = $study->id_study_lims; my $dag_str = $study->data_access_group || q(); - my $is_seq = $study->npg_information->count || - $study->npg_plex_information->count; + my $is_seq = $study->iseq_flowcells->count || + $study->pac_bio_runs->count; $log->debug("Working on study $study_id, SScape data access: '$dag_str'"); From 0a18813d1a8365bbaba8fff0f6ff0a645f4368e1 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 17 Nov 2016 15:03:56 +0000 Subject: [PATCH 03/15] Added metadat for PacBio --- lib/WTSI/NPG/iRODS/Metadata.pm | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/WTSI/NPG/iRODS/Metadata.pm b/lib/WTSI/NPG/iRODS/Metadata.pm index e6cdbb34..01c1c94e 100644 --- a/lib/WTSI/NPG/iRODS/Metadata.pm +++ b/lib/WTSI/NPG/iRODS/Metadata.pm @@ -51,11 +51,21 @@ our @EXPORT = qw( $SEQCHKSUM $TAG $TAG_INDEX + $TAG_SEQUENCE $TARGET $TOTAL_READS $XAHUMAN $YHUMAN + $PACBIO_CELL_INDEX + $PACBIO_COLLECTION_NUMBER + $PACBIO_INSTRUMENT_NAME + $PACBIO_RUN + $PACBIO_SAMPLE_LOAD_NAME + $PACBIO_SET_NUMBER + $PACBIO_SOURCE + $PACBIO_WELL + $ANALYSIS_UUID $INFINIUM_PROJECT_TITLE $INFINIUM_BEADCHIP @@ -118,7 +128,7 @@ our $STUDY_ID = 'study_id'; our $STUDY_NAME = 'study'; our $STUDY_TITLE = 'study_title'; -# Nucleotide sequencing +# Nucleotide sequencing (Illumina) our $ALIGNMENT_FILTER = 'alignment_filter'; our $ALIGNMENT = 'alignment'; our $ALT_PROCESS = 'alt_process'; @@ -135,11 +145,22 @@ our $REFERENCE = 'reference'; our $SEQCHKSUM = 'seqchksum'; our $TAG = 'tag'; our $TAG_INDEX = 'tag_index'; +our $TAG_SEQUENCE = 'tag_sequence'; our $TARGET = 'target'; our $TOTAL_READS = 'total_reads'; our $XAHUMAN = 'xahuman'; our $YHUMAN = 'yhuman'; +# PacBio +our $PACBIO_CELL_INDEX = 'cell_index'; +our $PACBIO_COLLECTION_NUMBER = 'collection_number'; +our $PACBIO_INSTRUMENT_NAME = 'instrument_name'; +our $PACBIO_RUN = 'run'; +our $PACBIO_SAMPLE_LOAD_NAME = 'sample_load_name'; +our $PACBIO_SET_NUMBER = 'set_number'; +our $PACBIO_SOURCE = 'source'; +our $PACBIO_WELL = 'well'; + # Genotyping our $ANALYSIS_UUID = 'analysis_uuid'; our $INFINIUM_PROJECT_TITLE = 'dcterms:title'; From 0ee614597d349e4563e23fe683d8b965ffcb8620 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 17 Nov 2016 15:07:13 +0000 Subject: [PATCH 04/15] Test with Perl 5.22 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4a843a7a..171c2579 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ language: perl perl: - "5.16" + - "5.22" addons: postgresql: "9.3" From 28c6b4dd782aacc66ea2d678f57bc63477b91238 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 28 Nov 2016 14:48:01 +0000 Subject: [PATCH 05/15] Travis CI: build package under perl v.5.22 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4a843a7a..01521253 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ language: perl perl: - "5.16" + - "5.22-shrplib" addons: postgresql: "9.3" From fb998a23fcb8413ef5830dbdc81448b2a85c4466 Mon Sep 17 00:00:00 2001 From: Keith James Date: Tue, 8 Nov 2016 17:37:25 +0000 Subject: [PATCH 06/15] Run tests against iRODS 4.1.10 with baton 0.17.0 --- .travis.irodsenv | 3 --- .travis.irodsenv.json | 12 ------------ .travis.server_config | 1 - .travis.setup_irods | 21 --------------------- .travis.yml | 7 +++---- 5 files changed, 3 insertions(+), 41 deletions(-) delete mode 100644 .travis.irodsenv delete mode 100644 .travis.irodsenv.json delete mode 100644 .travis.server_config delete mode 100644 .travis.setup_irods diff --git a/.travis.irodsenv b/.travis.irodsenv deleted file mode 100644 index eef50958..00000000 --- a/.travis.irodsenv +++ /dev/null @@ -1,3 +0,0 @@ -irodsHost=localhost -irodsPort=1247 -irodsZone=tempZone diff --git a/.travis.irodsenv.json b/.travis.irodsenv.json deleted file mode 100644 index 80645a42..00000000 --- a/.travis.irodsenv.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "irods_zone_name": "tempZone", - "irods_host": "localhost", - "irods_port": 1247, - "irods_user_name": "__USER__", - "irods_authentication_file": "__HOME__/.irods/auth_token", - "irods_home": "/tempZone/home/__USER__", - "irods_default_resource": "testResc", - "irods_default_hash_scheme": "MD5", - "irods_match_hash_policy": "strict", - "irods_transfer_buffer_size_for_parallel_transfer_in_megabytes": 4 -} diff --git a/.travis.server_config b/.travis.server_config deleted file mode 100644 index 8d66c4ca..00000000 --- a/.travis.server_config +++ /dev/null @@ -1 +0,0 @@ -. + {default_hash_scheme: "MD5"} diff --git a/.travis.setup_irods b/.travis.setup_irods deleted file mode 100644 index db73a002..00000000 --- a/.travis.setup_irods +++ /dev/null @@ -1,21 +0,0 @@ -irods -irods -tempZone -1247 -20000 -20199 -/var/lib/irods/iRODS/Vault -TEMPORARY_zone_key -TEMPORARY_32byte_negotiation_key -1248 -TEMPORARY__32byte_ctrl_plane_key -off -irods -irods -yes -localhost -5432 -ICAT -irods -irods -yes diff --git a/.travis.yml b/.travis.yml index 4a843a7a..8d34fc03 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,14 +11,14 @@ env: - secure: ZYRGAGHl/9mtiuNtSPhRR34RAqQTX5qMthUO07dytNtle7EPJ+K9tNwT6RvTL6qsNxE0gtvNiAGIZP8aKo/wzEdHKMeJT7E3HaVw/7OQpd/qHegxJlLrkTbo1DlZISM0UgM1u6505ioxzKFed+YaPq+EveHT5V713qkH626GUOw= - PGVERSION="9.3" - JANSSON_VERSION="2.7" - - BATON_VERSION="0.16.4" - - DISPOSABLE_IRODS_VERSION="1.1" + - BATON_VERSION="0.17.0" + - DISPOSABLE_IRODS_VERSION="1.2" - RENCI_FTP_URL=ftp://ftp.renci.org - WTSI_NPG_GITHUB_URL=https://github.com/wtsi-npg matrix: - IRODS_VERSION=3.3.1 IRODS_RIP_DIR=/usr/local/irods - - IRODS_VERSION=4.1.9 PG_PLUGIN_VERSION=1.9 PLATFORM=ubuntu12 + - IRODS_VERSION=4.1.10 PG_PLUGIN_VERSION=1.10 PLATFORM=ubuntu12 before_install: - ./scripts/travis_before_install.sh @@ -33,7 +33,6 @@ script: - ilsresc -l - ./scripts/travis_script.sh - after_success: - ./Build dist - export DIST_FILE=$(ls WTSI-NPG-iRODS-*.tar.gz) From 45a6613cecdbff07fde311e00e1a4c7cc9f8f85e Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 30 Nov 2016 13:58:08 +0000 Subject: [PATCH 07/15] Use temporary staging for iput operations --- lib/WTSI/NPG/iRODS.pm | 161 +++++++++++++++++++++++++++++---- lib/WTSI/NPG/iRODS/Metadata.pm | 2 + 2 files changed, 146 insertions(+), 17 deletions(-) diff --git a/lib/WTSI/NPG/iRODS.pm b/lib/WTSI/NPG/iRODS.pm index 15ff1226..b2c543c2 100644 --- a/lib/WTSI/NPG/iRODS.pm +++ b/lib/WTSI/NPG/iRODS.pm @@ -17,7 +17,7 @@ use Try::Tiny; use WTSI::DNAP::Utilities::Runnable; -use WTSI::NPG::iRODS::Metadata qw($FILE_MD5); +use WTSI::NPG::iRODS::Metadata qw($FILE_MD5 $STAGING); use WTSI::NPG::iRODS::ACLModifier; use WTSI::NPG::iRODS::DataObjectReader; use WTSI::NPG::iRODS::Lister; @@ -58,6 +58,9 @@ our $DEFAULT_CACHE_SIZE = 128; our $OBJECT_PATH = 'OBJECT'; our $COLLECTION_PATH = 'COLLECTION'; +our $STAGING_RAND_MAX = 1024 * 1024 * 1024; +our $STAGING_MAX_TRIES = 2; + our $CALC_CHECKSUM = 1; our $SKIP_CHECKSUM = 0; @@ -1436,19 +1439,23 @@ sub add_object { $checksum_action = $CALC_CHECKSUM; } + $target = $self->_ensure_absolute_path($target); + $self->debug("Adding '$file' as new object '$target'"); + + my $staging_path = $self->_find_staging_path($target); + if (not $staging_path) { + $self->logcroak("Failed to obtain a clear staging path for '$file' ", + " at '$target'"); + } + my @arguments; if ($checksum_action) { push @arguments , '-K'; } - $target = $self->_ensure_absolute_path($target); - $self->debug("Adding '$file' as new object '$target'"); - push @arguments, $file, $target; + $staging_path = $self->_stage_object($file, $staging_path, @arguments); - WTSI::DNAP::Utilities::Runnable->new(executable => $IPUT, - arguments => \@arguments, - environment => $self->environment)->run; - return $target; + return $self->_unstage_object($staging_path, $target); } =head2 replace_object @@ -1489,19 +1496,23 @@ sub replace_object { $checksum_action = $CALC_CHECKSUM; } + $target = $self->_ensure_object_path($target); + $self->debug("Replacing object '$target' with '$file'"); + + my $staging_path = $self->_find_staging_path($target); + if (not $staging_path) { + $self->logcroak("Failed to obtain a clear staging path for '$file' ", + " at '$target'"); + } + my @arguments = ('-f'); if ($checksum_action) { - push @arguments , '-K'; + push @arguments, '-K'; } - $target = $self->_ensure_object_path($target); - $self->debug("Replacing object '$target' with '$file'"); - push @arguments, $file, $target; + $staging_path = $self->_stage_object($file, $staging_path, @arguments); - WTSI::DNAP::Utilities::Runnable->new(executable => $IPUT, - arguments => \@arguments, - environment => $self->environment)->run; - return $target; + return $self->_unstage_object($staging_path, $target); } =head2 copy_object @@ -2517,7 +2528,7 @@ sub _cache_permissions { sub _clear_caches { my ($self, $path) = @_; - $self->debug("Clearing cached path, AVUs abd ACL for '$path'"); + $self->debug("Clearing cached path, AVUs and ACL for '$path'"); $self->_path_cache->remove($path); $self->_permissions_cache->remove($path); $self->_metadata_cache->remove($path); @@ -2525,6 +2536,122 @@ sub _clear_caches { return; } +sub _stage_object { + my ($self, $file, $staging_path, @arguments) = @_; + + defined $file or + $self->logconfess('A defined file argument is required'); + defined $staging_path or + $self->logconfess('A defined staging_path argument is required'); + + my $num_errors = 0; + my $stage_error = q[]; + + try { + $self->debug("Staging '$file' to '$staging_path' with $IPUT"); + WTSI::DNAP::Utilities::Runnable->new + (executable => $IPUT, + arguments => [@arguments, $file, $staging_path], + environment => $self->environment)->run; + } catch { + $num_errors++; + my @stack = split /\n/msx; # Chop up the stack trace + $stage_error = pop @stack; + } finally { + try { + # A failed iput may still leave orphaned replicates + if ($self->is_object($staging_path)) { + # Tag staging file for easy location with a query + $self->add_object_avu($staging_path, $STAGING, 1); + } + } catch { + my @stack = split /\n/msx; + $self->error("Failed to tag staging path '$staging_path': ", pop @stack); + }; + }; + + if ($num_errors > 0) { + $self->logconfess("Failed to stage '$file' to '$staging_path': ", + $stage_error); + } + + return $staging_path; +} + +sub _unstage_object { + my ($self, $staging_path, $target) = @_; + + defined $staging_path or + $self->logconfess('A defined staging_path argument is required'); + defined $target or + $self->logconfess('A defined target argument is required'); + + if (not $self->is_object($staging_path)) { + $self->logconfess("Staging path '$staging_path' is not a data object"); + } + + my $num_errors = 0; + my $unstage_error = q[]; + + try { + # imv will not overwrite an existing data object so we must copy + # all its metadata to the staged file and then remove it + if ($self->is_object($target)) { + my @target_meta = $self->get_object_meta($target); + + # This includes target=1 so we are accepting a race condition + # between customer queries and the file move below + foreach my $avu (@target_meta) { + $self->add_object_avu($staging_path, $avu->{attribute}, + $avu->{value}, $avu->{units}); + } + $self->remove_object($target); + } + + $self->debug("Unstaging '$staging_path' to '$target' with $IMV"); + WTSI::DNAP::Utilities::Runnable->new + (executable => $IMV, + arguments => [$staging_path, $target], + environment => $self->environment)->run; + + # Untag the unstaged file + $self->remove_object_avu($target, $STAGING, 1); + } catch { + $num_errors++; + my @stack = split /\n/msx; + $self->error("Failed to move '$staging_path' to '$target': ", pop @stack); + }; + + if ($num_errors > 0) { + $self->logconfess("Failed to unstage '$staging_path' to '$target': ", + $unstage_error); + } + + return $target; +} + +sub _find_staging_path { + my ($self, $target) = @_; + + defined $target or + $self->logconfess('A defined target argument is required'); + + my $staging_path; + + foreach my $try (1 .. $STAGING_MAX_TRIES) { + my $path = sprintf "%s.%d", $target, int rand $STAGING_RAND_MAX; + if ($self->is_object($path)) { + $self->warn("Path $try '$path' for '$target' is not free"); + } + else { + $self->debug("Path $try available '$path' for '$target'"); + $staging_path = $path; + } + } + + return $staging_path; +} + sub _build_irods_major_version { my ($self) = @_; diff --git a/lib/WTSI/NPG/iRODS/Metadata.pm b/lib/WTSI/NPG/iRODS/Metadata.pm index e6cdbb34..64ba55c4 100644 --- a/lib/WTSI/NPG/iRODS/Metadata.pm +++ b/lib/WTSI/NPG/iRODS/Metadata.pm @@ -18,6 +18,7 @@ our @EXPORT = qw( $FILE_TYPE $QC_STATE $RT_TICKET + $STAGING $SAMPLE_ACCESSION_NUMBER $SAMPLE_COHORT @@ -100,6 +101,7 @@ our $FILE_MD5 = 'md5'; our $FILE_TYPE = 'type'; our $QC_STATE = 'manual_qc'; our $RT_TICKET = 'rt_ticket'; +our $STAGING = 'staging'; # LIMS metadata our $SAMPLE_ACCESSION_NUMBER = 'sample_accession_number'; From 31b549e6234d359f317acd7581c99e5ad292637a Mon Sep 17 00:00:00 2001 From: Keith James Date: Fri, 2 Dec 2016 12:49:14 +0000 Subject: [PATCH 08/15] Use ou=people,dc=sanger,dc=ac,dc=uk to query users Do not restrict the query to real people --- bin/populate_wtsi_irods_groups.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/populate_wtsi_irods_groups.pl b/bin/populate_wtsi_irods_groups.pl index 3a898aab..be6b3699 100755 --- a/bin/populate_wtsi_irods_groups.pl +++ b/bin/populate_wtsi_irods_groups.pl @@ -203,8 +203,8 @@ sub find_group_ids { sub find_primary_gid { my ($ld) = @_; - my $query_base = 'ou=group,dc=sanger,dc=ac,dc=uk'; - my $query_filter = '&(sangerActiveAccount=TRUE)(sangerRealPerson=TRUE)'; + my $query_base = 'ou=people,dc=sanger,dc=ac,dc=uk'; + my $query_filter = '(sangerActiveAccount=TRUE)'; my $search = $ld->search(base => $query_base, filter => $query_filter); if ($search->code) { From 938655d7dacc443e506300324449b8bda76e22ce Mon Sep 17 00:00:00 2001 From: Keith James Date: Mon, 5 Dec 2016 16:25:31 +0000 Subject: [PATCH 09/15] Run scripts under -u --- scripts/travis_before_install.sh | 2 +- scripts/travis_install.sh | 4 +++- scripts/travis_script.sh | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/travis_before_install.sh b/scripts/travis_before_install.sh index 11322209..7adaec73 100755 --- a/scripts/travis_before_install.sh +++ b/scripts/travis_before_install.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -e -x +set -e -u -x sudo apt-get update -qq diff --git a/scripts/travis_install.sh b/scripts/travis_install.sh index 5a92ddd0..479bcd04 100755 --- a/scripts/travis_install.sh +++ b/scripts/travis_install.sh @@ -1,10 +1,11 @@ #!/bin/bash -set -e -x +set -e -u -x # The default build branch for all repositories. This defaults to # TRAVIS_BRANCH unless set in the Travis build environment. WTSI_NPG_BUILD_BRANCH=${WTSI_NPG_BUILD_BRANCH:=$TRAVIS_BRANCH} +IRODS_RIP_DIR=${IRODS_RIP_DIR:+$IRODS_RIP_DIR} sudo apt-get install -qq odbc-postgresql unixodbc-dev @@ -43,6 +44,7 @@ sudo ldconfig # WTSI NPG Perl repo dependencies, only one at the moment +repos="" for repo in perl-dnap-utilities; do cd /tmp # Always clone master when using depth 1 to get current tag diff --git a/scripts/travis_script.sh b/scripts/travis_script.sh index 659e620a..aac6d6ba 100755 --- a/scripts/travis_script.sh +++ b/scripts/travis_script.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -e -x +set -e -u -x export TEST_AUTHOR=1 export WTSI_NPG_iRODS_Test_irodsEnvFile=$HOME/.irods/.irodsEnv From 38a7b617300648bb5a19f0e98f5aeb1ffe517bbf Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 6 Dec 2016 16:09:31 +0000 Subject: [PATCH 10/15] sudo required in Travis config --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index bdacffd3..98b3e26e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: perl +sudo: required perl: - "5.16" From 4d2a1e2da8d19821472db243915f0fe02fc32436 Mon Sep 17 00:00:00 2001 From: David K Jackson Date: Tue, 6 Dec 2016 21:49:22 +0000 Subject: [PATCH 11/15] Add File::Copy::Recursive dependency for tests --- Build.PL | 1 + 1 file changed, 1 insertion(+) diff --git a/Build.PL b/Build.PL index fd6db991..acaaafa5 100644 --- a/Build.PL +++ b/Build.PL @@ -19,6 +19,7 @@ my $build = WTSI::DNAP::Utilities::Build->new 'Module::Build' => '>= 0.42' }, build_requires => { + 'File::Copy::Recursive' => '>= 0.38', 'Test::Perl::Critic' => '0', 'TAP::Harness' => '>= 3.30', 'Test::Class' => '>= 0.41', From 04d5a44cd13008b6ad29fad8eae6d8e351c342bd Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 7 Dec 2016 12:46:51 +0000 Subject: [PATCH 12/15] Fix for staging iput where the target path is a collection --- lib/WTSI/NPG/iRODS.pm | 31 +++++++++++++++++++------------ t/lib/WTSI/NPG/iRODSTest.pm | 22 ++++++++++++++-------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/lib/WTSI/NPG/iRODS.pm b/lib/WTSI/NPG/iRODS.pm index b2c543c2..c2f2ea4b 100644 --- a/lib/WTSI/NPG/iRODS.pm +++ b/lib/WTSI/NPG/iRODS.pm @@ -7,8 +7,8 @@ use DateTime; use Data::Dump qw(pp); use Encode qw(decode); use English qw(-no_match_vars); -use File::Basename qw(basename); -use File::Spec; +use File::Basename qw(basename fileparse); +use File::Spec::Functions qw(canonpath catfile splitdir); use List::AllUtils qw(any uniq); use Log::Log4perl::Level; use Moose; @@ -409,7 +409,7 @@ sub absolute_path { defined $path or $self->logconfess('A defined path argument is required'); $path or $self->logconfess('A non-empty path argument is required'); - $path = File::Spec->canonpath($path); + $path = canonpath($path); return $self->_ensure_absolute_path($path); } @@ -533,7 +533,7 @@ sub find_zone_name { defined $path or $self->logconfess('A defined path argument is required'); - $path = File::Spec->canonpath($path); + $path = canonpath($path); my $abs_path = $self->_ensure_absolute_path($path); $abs_path =~ s/^\///msx; @@ -545,7 +545,7 @@ sub find_zone_name { $abs_path = $self->working_collection; } - my @path = grep { $_ ne q{} } File::Spec->splitdir($abs_path); + my @path = grep { $_ ne q{} } splitdir($abs_path); unless (@path) { $self->logconfess("Failed to parse iRODS zone from path '$path'"); } @@ -736,7 +736,7 @@ sub is_collection { $path eq q{} and $self->logconfess('A non-empty path argument is required'); - $path = File::Spec->canonpath($path); + $path = canonpath($path); $path = $self->_ensure_absolute_path($path); return $self->lister->is_collection($path); @@ -764,7 +764,7 @@ sub list_collection { $collection eq q{} and $self->logconfess('A non-empty collection argument is required'); - $collection = File::Spec->canonpath($collection); + $collection = canonpath($collection); $collection = $self->_ensure_absolute_path($collection); # TODO: We could check that the collection exists here. However, @@ -796,7 +796,7 @@ sub add_collection { $collection eq q{} and $self->logconfess('A non-empty collection argument is required'); - $collection = File::Spec->canonpath($collection); + $collection = canonpath($collection); $collection = $self->_ensure_absolute_path($collection); $self->debug("Adding collection '$collection'"); @@ -831,7 +831,7 @@ sub put_collection { $self->logconfess('A non-empty target (collection) argument is required'); # iput does not accept trailing slashes on directories - $dir = File::Spec->canonpath($dir); + $dir = canonpath($dir); $target = $self->_ensure_collection_path($target); $self->debug("Putting directory '$dir' into collection '$target'"); @@ -868,7 +868,7 @@ sub move_collection { $self->logconfess('A non-empty target (collection) argument is required'); $source = $self->_ensure_collection_path($source); - $target = File::Spec->canonpath($target); + $target = canonpath($target); $target = $self->_ensure_absolute_path($target); $self->debug("Moving collection from '$source' to '$target'"); @@ -904,7 +904,7 @@ sub get_collection { $self->logconfess('A non-empty target (directory) argument is required'); $source = $self->_ensure_collection_path($source); - $target = File::Spec->canonpath($target); + $target = canonpath($target); $self->debug("Getting from '$source' to '$target'"); my @args = ('-r', '-f', $source, $target); @@ -1319,7 +1319,7 @@ sub is_object { $path eq q{} and $self->logconfess('A non-empty path argument is required'); - $path = File::Spec->canonpath($path); + $path = canonpath($path); $path = $self->_ensure_absolute_path($path); my $is_object = 0; @@ -1440,6 +1440,13 @@ sub add_object { } $target = $self->_ensure_absolute_path($target); + + # Account for the target being a collection + if ($self->is_collection($target)) { + my ($file_name, $directories, $suffix) = fileparse($file); + $target = catfile($target, $file_name); + } + $self->debug("Adding '$file' as new object '$target'"); my $staging_path = $self->_find_staging_path($target); diff --git a/t/lib/WTSI/NPG/iRODSTest.pm b/t/lib/WTSI/NPG/iRODSTest.pm index 543a34b4..44ea4513 100644 --- a/t/lib/WTSI/NPG/iRODSTest.pm +++ b/t/lib/WTSI/NPG/iRODSTest.pm @@ -869,23 +869,29 @@ sub read_object : Test(2) { 'Read expected object contents') or diag explain $content; } -sub add_object : Test(7) { +sub add_object : Test(9) { my $irods = WTSI::NPG::iRODS->new(environment => \%ENV, strict_baton_version => 0); my $lorem_file = "$data_path/lorem.txt"; - my $lorem_object = "$irods_tmp_coll/lorem_added.txt"; - is($irods->add_object($lorem_file, $lorem_object), $lorem_object, - 'Added a data object'); - is($irods->list_object($lorem_object), $lorem_object, - 'Found the new data object'); + my $implicit_path = "$irods_tmp_coll/lorem.txt"; + is($irods->add_object($lorem_file, $irods_tmp_coll), $implicit_path, + 'Added a data object with a collection target path'); + is($irods->list_object($implicit_path), $implicit_path, + 'Found the new data object with an implicit path'); + + my $explicit_path = "$irods_tmp_coll/lorem_added.txt"; + is($irods->add_object($lorem_file, $explicit_path), $explicit_path, + 'Added a data object with an object target path'); + is($irods->list_object($explicit_path), $explicit_path, + 'Found the new data object with an explicit path'); TODO: { local $TODO = 'Testing for a checksum will create a checksum if ' . 'it does not exist. Requires a change in baton to test effectively'; - is($irods->checksum($lorem_object), '39a4aa291ca849d601e4e5b8ed627a04', + is($irods->checksum($explicit_path), '39a4aa291ca849d601e4e5b8ed627a04', 'Checksum created by default'); } @@ -905,7 +911,7 @@ sub add_object : Test(7) { dies_ok { $irods->add_object } 'Failed to add an undefined object'; - dies_ok { $irods->add_object($lorem_file, $lorem_object, + dies_ok { $irods->add_object($lorem_file, $explicit_path, 'invalid checksum action') } 'Failed on invalid checksum option'; } From 2efe51d37a30ab61ded0365b1f768b27852238bd Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 7 Dec 2016 18:38:12 +0000 Subject: [PATCH 13/15] Added an undef check for cases where a user's gidNumber doesn't have a corresponding Unix group. --- bin/populate_wtsi_irods_groups.pl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/populate_wtsi_irods_groups.pl b/bin/populate_wtsi_irods_groups.pl index be6b3699..52ab8f1b 100755 --- a/bin/populate_wtsi_irods_groups.pl +++ b/bin/populate_wtsi_irods_groups.pl @@ -120,7 +120,12 @@ sub _uid_to_irods_uid { foreach my $uid (keys %{$uid2gid}) { my $gid = $uid2gid->{$uid}; my $primary_group = $gid2group->{$gid}; - push @{$group2uids->{$primary_group}}, $uid; + + # Some users in LDAP have a gidNumber that does not correspond to a + # Unix group + if (defined $primary_group) { + push @{$group2uids->{$primary_group}}, $uid; + } } foreach my $group (keys %{$group2uids}){ @@ -193,6 +198,7 @@ sub find_group_ids { my $group = $entry->get_value('cn'); my $gid = $entry->get_value('gidNumber'); my @uids = $entry->get_value('memberUid'); + $group2uids{$group} = \@uids; $gid2group{$gid} = $group; } From fe7e38b0fc57bee190c603debe5eba2b9c6599c0 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 8 Dec 2016 09:30:25 +0000 Subject: [PATCH 14/15] Improved the handling of file suffix metadata. Archive suffixes are recognised. Custom suffixes no longer replace defaults. --- lib/WTSI/NPG/iRODS/Annotator.pm | 29 ++++++++++++--------- t/lib/WTSI/NPG/iRODS/AnnotatorTest.pm | 37 ++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/lib/WTSI/NPG/iRODS/Annotator.pm b/lib/WTSI/NPG/iRODS/Annotator.pm index c163a843..4eca22bc 100644 --- a/lib/WTSI/NPG/iRODS/Annotator.pm +++ b/lib/WTSI/NPG/iRODS/Annotator.pm @@ -1,14 +1,16 @@ package WTSI::NPG::iRODS::Annotator; -use Data::Dump qw[pp]; use DateTime; +use List::AllUtils qw[uniq]; use Moose::Role; use WTSI::NPG::iRODS::Metadata; our $VERSION = ''; -our @GENERAL_PURPOSE_SUFFIXES = qw[bin csv h5 tgz tif tsv txt xls xlsx xml]; +our @COMPRESSION_SUFFIXES = qw[bz2 gz xz zip]; + +our @GENERAL_PURPOSE_SUFFIXES = qw[bin csv h5 tar tgz tif tsv txt xls xlsx xml]; our @GENO_DATA_SUFFIXES = qw[gtc idat]; our @HTS_DATA_SUFFIXES = qw[bam cram bai crai]; our @HTS_ANCILLARY_SUFFIXES = qw[bamcheck bed flagstat json seqchksum @@ -18,14 +20,14 @@ our @DEFAULT_FILE_SUFFIXES = (@GENERAL_PURPOSE_SUFFIXES, @GENO_DATA_SUFFIXES, @HTS_DATA_SUFFIXES, @HTS_ANCILLARY_SUFFIXES); -our $SUFFIX_PATTERN = join q[|], @DEFAULT_FILE_SUFFIXES; -our $SUFFIX_REGEX = qr{[.]($SUFFIX_PATTERN)$}msx; with qw[ WTSI::DNAP::Utilities::Loggable WTSI::NPG::iRODS::Utilities ]; +my $COMPRESSION_PATTERN = join q[|], @COMPRESSION_SUFFIXES; + # See http://dublincore.org/documents/dcmi-terms/ =head2 make_creation_metadata @@ -82,7 +84,7 @@ sub make_modification_metadata { Arg [1] : File name, Str. Arg [2] : Array of valid file suffix strings, Str. Optional - Example : my @avus = $ann->make_type_metadata($sample, '.txt', '.csv') + Example : my @avus = $ann->make_type_metadata($sample, 'txt', 'csv') Description: Return an array of metadata AVUs describing the file 'type' (represented by its suffix). Returntype : Array[HashRef] @@ -95,17 +97,20 @@ sub make_type_metadata { defined $file or $self->logconfess('A defined file argument is required'); $file eq q[] and $self->logconfess('A non-empty file argument is required'); - if (not @suffixes) { - @suffixes = @DEFAULT_FILE_SUFFIXES; - } + my @valid_suffixes = uniq (@DEFAULT_FILE_SUFFIXES, @suffixes); - my ($suffix) = $file =~ $SUFFIX_REGEX; + my $suffix_pattern = join q[|], @valid_suffixes; + my $suffix_regex = qr{[.] # Don't capture the suffix dot + ( + ($suffix_pattern) + ([.]($COMPRESSION_PATTERN))* + )$}msx; + my ($suffix) = $file =~ $suffix_regex; my @avus; if ($suffix) { - my ($base_suffix) = $suffix =~ m{^[.]?(.*)}msx; - $self->debug("Parsed base suffix of '$file' as '$base_suffix'"); - push @avus, $self->make_avu($FILE_TYPE, $base_suffix); + $self->debug("Parsed base suffix of '$file' as '$suffix'"); + push @avus, $self->make_avu($FILE_TYPE, $suffix); } else { $self->debug("Did not parse a suffix from '$file'"); diff --git a/t/lib/WTSI/NPG/iRODS/AnnotatorTest.pm b/t/lib/WTSI/NPG/iRODS/AnnotatorTest.pm index ea8f01ef..342cc015 100644 --- a/t/lib/WTSI/NPG/iRODS/AnnotatorTest.pm +++ b/t/lib/WTSI/NPG/iRODS/AnnotatorTest.pm @@ -67,13 +67,36 @@ sub make_modification_metadata : Test(2) { 'Dies on undefined modification time'; } -sub make_type_metadata : Test(2) { - - my @expected_type = ({attribute => $FILE_TYPE, - value => 'cram'}); - my @observed_type = TestAnnotator->new->make_type_metadata('test.cram'); - - is_deeply(\@observed_type, \@expected_type) or diag explain \@observed_type; +sub make_type_metadata : Test(12) { + + # Some of these don't make pactical sense, but should be allowed if + # encountered + my %expected = + ( + 'test.txt' => 'txt', + 'test.txt.gz' => 'txt.gz', + 'test.txt.gz.bz2' => 'txt.gz.bz2', + 'test.txt.gz.bz2.tar' => 'tar', + 'test.foo.txt' => 'txt', + 'test.gz.txt' => 'txt', + 'gz.txt' => 'txt', + 'test.custom1' => 'custom1', + 'test.custom2' => 'custom2', + 'test.custom1.gz' => 'custom1.gz', + 'test.custom2.gz' => 'custom2.gz', + ); + + while (my ($file, $suffix) = each %expected) { + my @expected_type = ({attribute => $FILE_TYPE, + value => $suffix}); + my @observed_type = TestAnnotator->new->make_type_metadata($file, + 'custom1', + 'custom2'); + + is_deeply(\@observed_type, \@expected_type, + "File '$file' has type suffix '$suffix'") + or diag explain \@observed_type; + } my @no_type = TestAnnotator->new->make_type_metadata('test.z', '.a', '.b'); is_deeply(\@no_type, [], 'No type metadata if type not recognised') From 9700e076edbe84fcc00e45ea91efb9b0398ac26c Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Fri, 9 Dec 2016 11:15:20 +0000 Subject: [PATCH 15/15] max version of baton incremented to 0.17.1 --- .travis.yml | 2 +- Changes | 3 +++ README | 2 +- lib/WTSI/NPG/iRODS.pm | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index fa44d594..aa28f384 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ env: - secure: ZYRGAGHl/9mtiuNtSPhRR34RAqQTX5qMthUO07dytNtle7EPJ+K9tNwT6RvTL6qsNxE0gtvNiAGIZP8aKo/wzEdHKMeJT7E3HaVw/7OQpd/qHegxJlLrkTbo1DlZISM0UgM1u6505ioxzKFed+YaPq+EveHT5V713qkH626GUOw= - PGVERSION="9.3" - JANSSON_VERSION="2.7" - - BATON_VERSION="0.17.0" + - BATON_VERSION="0.17.1" - DISPOSABLE_IRODS_VERSION="1.2" - RENCI_FTP_URL=ftp://ftp.renci.org - WTSI_NPG_GITHUB_URL=https://github.com/wtsi-npg diff --git a/Changes b/Changes index 6f96d416..033adec7 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,6 @@ +Release 2.6.1 + - Support baton versions >=0.16.4 and <=0.17.1 + Release 2.6.0 - Bugfix; correctly report multiple AVUs found for a given attribute diff --git a/README b/README index 1c15b73f..4230f2b7 100644 --- a/README +++ b/README @@ -24,4 +24,4 @@ iRODS https://github.com/irods/irods-legacy baton https://github.com/wtsi-npg/baton - Version 0.16.4 - 0.17.0 + Version 0.16.4 - 0.17.1 diff --git a/lib/WTSI/NPG/iRODS.pm b/lib/WTSI/NPG/iRODS.pm index c2f2ea4b..6fd1db2a 100644 --- a/lib/WTSI/NPG/iRODS.pm +++ b/lib/WTSI/NPG/iRODS.pm @@ -30,7 +30,7 @@ with 'WTSI::DNAP::Utilities::Loggable', 'WTSI::NPG::iRODS::Utilities'; our $VERSION = ''; -our $MAX_BATON_VERSION = '0.17.0'; +our $MAX_BATON_VERSION = '0.17.1'; our $MIN_BATON_VERSION = '0.16.4'; our $IADMIN = 'iadmin';