Skip to content

Commit

Permalink
Add support for custom custom root directory to resolve relative paths (
Browse files Browse the repository at this point in the history
nextflow-io#3942)

this PR introduces the environment variable `NXF_FILE_ROOT` which can be used 
to define the root path against which relative paths need to be resolved.

For example, having defined the following variable `NXF_FILE_ROOT=s3://my-bucket/data`, 
the use of `file('foo.txt')` will result in the path `s3://my-bucket/data/foo.txt/`

This behaviour applies to file path resolved via the Nextflow built-in functions `file`, `files`,
`channel.fromPath`, `channel.fromFilePairs` and the `publishDir` operator.
  • Loading branch information
pditommaso authored and abhi18av committed Oct 28, 2023
1 parent 899bb50 commit aee83ff
Show file tree
Hide file tree
Showing 13 changed files with 380 additions and 41 deletions.
5 changes: 5 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,11 @@ The following environment variables control the configuration of the Nextflow ru
`NXF_WORK`
: Directory where working files are stored (usually your *scratch* directory)

`NXF_FILE_ROOT`
: The file storage path against which relative file paths are resolved. For example, having define the variable `NXF_FILE_ROOT=/some/root/path`
the use of `file('foo')` will be resolved to the absolute path `/some/root/path/foo`. A remote root path can be specified using the
usual protocol prefix e.g. `NXF_FILE_ROOT=s3://my-bucket/data`. Files defined using an absolute path are not affected by this setting.

`JAVA_HOME`
: Defines the path location of the Java VM installation used to run Nextflow.

Expand Down
41 changes: 23 additions & 18 deletions modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

package nextflow

import java.nio.file.FileSystem
import static nextflow.file.FileHelper.*

import java.nio.file.Files
import java.nio.file.NoSuchFileException
import java.nio.file.Path
Expand All @@ -39,7 +40,6 @@ import nextflow.util.ArrayTuple
import nextflow.util.CacheHelper
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import static nextflow.file.FileHelper.isGlobAllowed
/**
* Defines the main methods imported by default in the script scope
*
Expand All @@ -57,21 +57,18 @@ class Nextflow {
private static final Random random = new Random()


static private fileNamePattern( FilePatternSplitter splitter, Map opts, FileSystem fs ) {
static private fileNamePattern( FilePatternSplitter splitter, Map opts ) {

final scheme = splitter.scheme
final folder = splitter.parent
final folder = toCanonicalPath(splitter.parent)
final pattern = splitter.fileName

if( !fs )
fs = FileHelper.fileSystemForScheme(scheme)

if( opts == null ) opts = [:]
if( !opts.type ) opts.type = 'file'

def result = new LinkedList()
try {
FileHelper.visitFiles(opts, fs.getPath(folder), pattern) { Path it -> result.add(it) }
FileHelper.visitFiles(opts, folder, pattern) { Path it -> result.add(it) }
}
catch (NoSuchFileException e) {
log.debug "No such file or directory: $folder -- Skipping visit"
Expand All @@ -80,6 +77,18 @@ class Nextflow {

}

static private String str0(value) {
if( value==null )
return null
if( value instanceof CharSequence )
return value.toString()
if( value instanceof File )
return value.toString()
if( value instanceof Path )
return value.toUriString()
throw new IllegalArgumentException("Invalid file path type - offending value: $value [${value.getClass().getName()}]")
}

/**
* Get one or more file object given the specified path or glob pattern.
*
Expand All @@ -101,23 +110,19 @@ class Nextflow {
final path = filePattern as Path
final glob = options?.containsKey('glob') ? options.glob as boolean : isGlobAllowed(path)
if( !glob ) {
return FileHelper.checkIfExists(path, options)
return checkIfExists(path, options)
}

// if it isn't a glob pattern simply return it a normalized absolute Path object
def splitter = FilePatternSplitter.glob().parse(path.toString())
final strPattern = str0(filePattern)
final splitter = FilePatternSplitter.glob().parse(strPattern)
if( !splitter.isPattern() ) {
def normalised = splitter.strip(path.toString())
if( path instanceof Path ) {
return FileHelper.checkIfExists(path.fileSystem.getPath(normalised), options)
}
else {
return FileHelper.checkIfExists(FileHelper.asPath(normalised), options)
}
final normalised = splitter.strip(strPattern)
return checkIfExists(asPath(normalised), options)
}

// revolve the glob pattern returning all matches
return fileNamePattern(splitter, options, path.getFileSystem())
return fileNamePattern(splitter, options)
}

static files( Map options=null, def path ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class PathVisitor {
final path = filePattern.toString()
final splitter = FilePatternSplitter.glob().parse(path)

if( !splitter.isPattern() ) {
if( !splitter.isPattern() ) {
final result = fs.getPath( splitter.strip(path) )
emit0(checkIfExists(result, opts))
close0()
Expand Down Expand Up @@ -146,7 +146,7 @@ class PathVisitor {
log.debug "files for syntax: $syntax; folder: $folder; pattern: $pattern; options: ${opts}"

// now apply glob file search
final path = fs.getPath(folder).complete()
final path = toCanonicalPath(fs.getPath(folder))

if( opts == null )
opts = [:]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,11 @@ class PublishDir {
@Lazy
private ExecutorService threadPool = { def sess = Global.session as Session; sess.publishDirExecutorService() }()

void setPath( Closure obj ) {
setPath( obj.call() as Path )
}

void setPath( String str ) {
nullPathWarn = checkNull(str)
setPath(str as Path)
}

void setPath( Path obj ) {
this.path = obj.complete()
void setPath( def value ) {
final resolved = value instanceof Closure ? value.call() : value
if( resolved instanceof String || resolved instanceof GString )
nullPathWarn = checkNull(resolved.toString())
this.path = FileHelper.toCanonicalPath(resolved)
}

void setMode( String str ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package nextflow.util
import groovy.transform.Canonical
import groovy.transform.CompileStatic
import groovy.transform.EqualsAndHashCode
import nextflow.file.FileHelper

/**
* Split a path into two paths, the first component which may include the host name if it's a remote
Expand All @@ -35,7 +36,7 @@ class PathSplitter {
List<String> tail

static PathSplitter parse(String path) {
final baseUrl = StringUtils.baseUrl(path)
final baseUrl = FileHelper.baseUrl(path)
if( !baseUrl )
return split0(path, 0)

Expand Down
78 changes: 78 additions & 0 deletions modules/nextflow/src/test/groovy/nextflow/ChannelTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,59 @@ class ChannelTest extends Specification {

}

def testFromRelativePathWithGlob() {
given:
def folder = tempDir.root
def file1 = Files.createFile(folder.resolve('file1.txt'))
def file2 = Files.createFile(folder.resolve('file2.txt'))
def file3 = Files.createFile(folder.resolve('file3.log'))
and:
SysEnv.push(NXF_FILE_ROOT: folder.toString())

when:
List<Path> result = Channel
.fromPath( '*.txt' )
.toSortedList().getVal().collect { it.toString() }
then:
result == [ file1.toString(), file2.toString() ]

when:
result = Channel
.fromPath( '*.txt', relative: true )
.toSortedList().getVal().collect { it.toString() }
then:
result == [ file1.name, file2.name ]

cleanup:
SysEnv.pop()
}

def testFromRelativePathWithFileName() {
given:
def folder = tempDir.root
def file1 = Files.createFile(folder.resolve('file1.txt'))
def file2 = Files.createFile(folder.resolve('file2.txt'))
def file3 = Files.createFile(folder.resolve('file3.log'))
and:
SysEnv.push(NXF_FILE_ROOT: folder.toString())

when:
List<Path> result = Channel
.fromPath( 'file3.log' )
.toSortedList().getVal().collect { it.toString() }
then:
result == [ file3.toString() ]

when:
result = Channel
.fromPath( 'file3.log', relative: true )
.toSortedList().getVal().collect { it.toString() }
then:
result == [ file3.name ]

cleanup:
SysEnv.pop()
}

def testFromPathWithLinks() {

Expand Down Expand Up @@ -704,6 +757,31 @@ class ChannelTest extends Specification {
pairs.val == Channel.STOP
}

def 'should group files with the same prefix and root path' () {

setup:
def folder = tempDir.root.toAbsolutePath()
def a1 = Files.createFile(folder.resolve('aa_1.fa'))
def a2 = Files.createFile(folder.resolve('aa_2.fa'))
def x1 = Files.createFile(folder.resolve('xx_1.fa'))
def x2 = Files.createFile(folder.resolve('xx_2.fa'))
def z1 = Files.createFile(folder.resolve('zz_1.fa'))
def z2 = Files.createFile(folder.resolve('zz_2.fa'))
and:
SysEnv.push(NXF_FILE_ROOT: folder.toString())

when:
def pairs = Channel .fromFilePairs("*_{1,2}.*") .toList(). getVal() .sort { it[0] }
then:
pairs == [
['aa', [a1, a2]],
['xx', [x1, x2]],
['zz', [z1, z2]] ]

cleanup:
SysEnv.pop()
}

def 'should group files with the same prefix using a custom grouping' () {

setup:
Expand Down
16 changes: 14 additions & 2 deletions modules/nextflow/src/test/groovy/nextflow/NextflowTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

package nextflow

import java.nio.file.Files
import java.nio.file.NoSuchFileException
import java.nio.file.Paths
Expand All @@ -35,12 +36,10 @@ class NextflowTest extends Specification {
}

def testFile() {

expect:
Nextflow.file('file.log').toFile() == new File('file.log').canonicalFile
Nextflow.file('relative/file.test').toFile() == new File( new File('.').canonicalFile, 'relative/file.test')
Nextflow.file('/user/home/file.log').toFile() == new File('/user/home/file.log')

}

def testFile2() {
Expand All @@ -56,6 +55,19 @@ class NextflowTest extends Specification {

}

def 'should resolve rel paths against env base' () {
given:
SysEnv.push(NXF_FILE_ROOT: '/some/base/dir')

expect:
Nextflow.file( '/abs/path/file.txt' ) == Paths.get('/abs/path/file.txt')
and:
Nextflow.file( 'file.txt' ) == Paths.get('/some/base/dir/file.txt')

cleanup:
SysEnv.pop()
}

def testFile3() {
Exception e
when:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ class PathSplitterTest extends Specification {
'/foo/bar/baz' | new PathSplitter('/foo', ['bar','baz'])
'foo/bar/baz/' | new PathSplitter('foo', ['bar','baz'])
'/foo/bar/baz/' | new PathSplitter('/foo', ['bar','baz'])
'/foo/x/y/z' | new PathSplitter('/foo', ['x','y','z'])
and:
'file:/foo' | new PathSplitter('file:/foo', null)
'file:/foo/x/y/z' | new PathSplitter('file:/foo', ['x','y','z'])
'file:///foo' | new PathSplitter('file:///foo', null)
'file:///foo/x/y/z' | new PathSplitter('file:///foo', ['x','y','z'])
and:
's3://my-bucket' | new PathSplitter('s3://my-bucket')
's3://my-bucket/' | new PathSplitter('s3://my-bucket/')
Expand Down
Loading

0 comments on commit aee83ff

Please sign in to comment.