diff --git a/plugins/nf-ga4gh/build.gradle b/plugins/nf-ga4gh/build.gradle index 0a70f75..2309280 100644 --- a/plugins/nf-ga4gh/build.gradle +++ b/plugins/nf-ga4gh/build.gradle @@ -66,6 +66,10 @@ dependencies { api 'com.google.code.gson:gson:2.10.1' api 'io.gsonfire:gson-fire:1.8.3' api 'org.threeten:threetenbp:1.3.5' + implementation 'com.joyent.util:fast-md5:2.7.1' + implementation "io.nextflow:nf-amazon:2.1.4" + implementation "org.apache.httpcomponents:httpmime:4.5.14" + implementation "org.apache.httpcomponents:httpclient:4.5.14" // test configuration testImplementation "org.apache.groovy:groovy:4.0.21" diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/client/DrsClient.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/client/DrsClient.groovy new file mode 100644 index 0000000..84cb1cc --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/client/DrsClient.groovy @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.client + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +import java.time.Instant +import java.lang.Long +import java.io.File +import java.io.IOException +import java.io.OutputStream +import java.net.URI +import java.net.http.HttpClient +import java.net.http.HttpRequest +import java.net.http.HttpRequest.BodyPublishers +import java.net.http.HttpResponse +import java.net.http.HttpResponse.BodyHandlers +import java.nio.file.Path +import java.nio.channels.Channels +import java.nio.channels.Pipe +import java.nio.charset.StandardCharsets + +import org.apache.http.HttpEntity +import org.apache.http.entity.ContentType +import org.apache.http.entity.mime.MultipartEntityBuilder +import org.apache.http.entity.mime.content.StringBody + +import nextflow.ga4gh.drs.exceptions.DrsObjectPublishingException +import nextflow.ga4gh.drs.exceptions.DrsAuthenticationException +import nextflow.ga4gh.drs.config.DrsConfig +import nextflow.ga4gh.drs.utils.DrsUtils + +/** + * Define the DRS client + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsClient { + + String authHeader + long authExpiration + + String user + String password + String endpoint + + /** + * Create a DRS client based on a DrsConfig object + * + * @param config The DrsConfig object. + */ + DrsClient(DrsConfig config) { + this.user = config.user + this.password = config.password + this.endpoint = config.endpoint + refreshToken() + } + + /** + * Check if the token is expired or expires soon. If so, the token should be refreshed + * + */ + private void checkToken() { + long epoch = Instant.now().toEpochMilli() + // Refresh the token if it expires in less than one minute + if(authExpiration - epoch < 1*60*100) { + refreshToken() + log.debug("Refreshed DRS token") + } + } + + /** + * Refresh the DRS token + * + */ + private void refreshToken() { + // Create the form with the username and password + HttpEntity httpEntity = MultipartEntityBuilder.create() + .addPart( + "username", + new StringBody( + this.user, + ContentType.create("application/x-www-form-urlencoded", StandardCharsets.UTF_8) + ) + ) + .addPart( + "password", + new StringBody( + this.password, + ContentType.create("application/x-www-form-urlencoded", StandardCharsets.UTF_8) + ) + ) + .build() + + // Efficiently stream the form + Pipe pipe = Pipe.open() + new Thread(() -> { + try (OutputStream outputStream = Channels.newOutputStream(pipe.sink())) { + httpEntity.writeTo(outputStream) + } + }).start() + + // Do a POST request to get the bearer token and add it the a header value + HttpClient httpClient = HttpClient.newHttpClient() + + HttpRequest request = HttpRequest.newBuilder(new URI("${this.endpoint}/token".toString())) + .header("Content-Type", httpEntity.getContentType().getValue()) + .version(HttpClient.Version.HTTP_1_1) + .POST(BodyPublishers.ofInputStream(() -> Channels.newInputStream(pipe.source()))).build() + + HttpResponse responseBody = httpClient.send(request, BodyHandlers.ofString(StandardCharsets.UTF_8)) + + Map responseMap = (Map) new JsonSlurper().parseText(responseBody.body()) + if(responseMap.containsKey("status_code") && responseMap.status_code != 200) { + throw new DrsAuthenticationException(responseMap.msg.toString()) + } + this.authHeader = "Bearer ${responseMap.access_token}" + this.authExpiration = Instant.now().toEpochMilli() + Long.parseLong(responseMap.expires_in.toString()) * 60 * 100 + } + + /** + * Upload a DRS object + * + * @param obj A Groovy map with the structure of a DRS object + */ + public String uploadObject(Map obj) { + checkToken() + + // Upload the DRS Object using a POST request + HttpClient client = HttpClient.newHttpClient() + String objBody = new JsonBuilder(obj).toPrettyString() + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create("${this.endpoint}/ga4gh/drs/v1/objects")) + .header("Authorization", this.authHeader) + .header("Content-Type", "application/json") + .version(HttpClient.Version.HTTP_1_1) + .POST(HttpRequest.BodyPublishers.ofString(objBody)) + .build() + + HttpResponse responseBody = client.send(request, BodyHandlers.ofString(StandardCharsets.UTF_8)) + + Map responseMap = (Map) new JsonSlurper().parseText(responseBody.body()) + Integer statusCode = responseBody.statusCode() + + // Return the DRS id if the object has been created or when the object exists + switch(statusCode) { + case 201: + return responseMap.object_id + case 409: + List accessMethods = obj.access_methods as List + String url = accessMethods[0]['access_url']['url'] + log.debug("DRS object for '${url}' already exists. Skipping the upload of this object") + return getIdFromUrl(url) + default: + throw new DrsObjectPublishingException("Received an error when publishing a DRS object (Status code: ${statusCode}): ${responseBody.body()}") + } + } + + /** + * Get a DRS id based on a publish URL + * + * @param url The URL to get the DRS id from + */ + public String getIdFromUrl(String url) { + checkToken() + String sample = new DrsUtils().getSampleName(url as Path) + + HttpClient client = HttpClient.newHttpClient() + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create("${this.endpoint}/ga4gh/drs/v1/objects?alias=${sample}")) + .header("Authorization", this.authHeader) + .version(HttpClient.Version.HTTP_1_1) + .GET() + .build() + + HttpResponse responseBody = client.send(request, BodyHandlers.ofString(StandardCharsets.UTF_8)) + + List responseObjects = (List) new JsonSlurper().parseText(responseBody.body()) + Integer statusCode = responseBody.statusCode() + + Map correctObj = (Map) responseObjects.find { obj -> + List urls = obj['access_methods']['access_url']['url'] as List + return urls.contains(url) + } + if(correctObj) { + return correctObj.id + } + throw new DrsObjectPublishingException("Creating a DRS object for ${url} returned a 'object already exists' response, but the object couldn't be found with an alias search on '${sample}'") + } + +} diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/config/DrsConfig.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/config/DrsConfig.groovy new file mode 100644 index 0000000..ed6fb56 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/config/DrsConfig.groovy @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.config + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import java.util.regex.Matcher +import java.util.regex.Pattern + +import com.amazonaws.services.s3.AmazonS3 +import com.amazonaws.ClientConfiguration + +import nextflow.ga4gh.drs.exceptions.DrsConfigException +import nextflow.cloud.aws.AwsClientFactory +import nextflow.cloud.aws.config.AwsConfig + +/** + * Define the plugin configuration values. + * + * The configuration values can be extracted from the map and will be stored as + * on the instance. + * + * + * TODO: Describe the configuration of your actual implementation. + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsConfig { + + Boolean enabled + String endpoint + String endpointNoProtocol + String user + String password + AmazonS3 s3Client + List allowedExtensions + String run + String summary + + /** + * Construct a configuration instance. + * + * @param map A nextflow plugin wrapper instance. + */ + DrsConfig(Map map = [:]) { + this.enabled = map.navigate("drs.enabled") ?: false + this.endpoint = map.navigate("drs.endpoint") ?: System.getenv("DRS_URL") ?: "" + this.endpointNoProtocol = this.endpoint.split("://")[-1] + this.user = map.navigate("drs.user") ?: System.getenv("DRS_USERNAME") ?: "" + this.password = map.navigate("drs.password") ?: System.getenv("DRS_PASSWORD") ?: "" + Map awsConfig = (Map) map.navigate("aws") ?: [:] + awsConfig.region = awsConfig.region ?: "uz" + this.s3Client = new AwsClientFactory(new AwsConfig(awsConfig)).getS3Client() + this.allowedExtensions = map.navigate("drs.allowedExtensions") as List ?: [] + this.run = map.navigate("drs.run") ?: "" // TODO implement a more dynamic way for pipeline runs with more than one sequencer run + this.summary = map.navigate("drs.summary") ?: "" + + // Some failsafe options to prevent weird errors + if(!this.enabled) { return } + + if(!this.endpoint) { + throw new DrsConfigException("Please provide a DRS endpoint with the drs.endpoint configuration option or with the DRS_URL environment variable") + } + + if(!this.user) { + throw new DrsConfigException("Unable to get the DRS username. Make sure the drs.user configuration option or the DRS_USERNAME environment variable is set") + } + + if(!this.password) { + throw new DrsConfigException("Unable to get the DRS password. Make sure the drs.password configuration option or the DRS_PASSWORD environment variable is set") + } + + if(this.summary) { + String summaryExtension = this.summary.tokenize(".").last() ?: "" + List allowedSummaryExtensions = ["csv", "tsv", "json", "yaml", "yml"] + if(!allowedSummaryExtensions.contains(summaryExtension)) { + throw new DrsConfigException("Unrecognized extension used for the DRS summary file. The extension (${summaryExtension}) should be on of these: ${allowedSummaryExtensions.join(',')}") + } + } + + if(!this.run) { + throw new DrsConfigException("Please provide a run with the `drs.run` configuration option") + } + + } + +} diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsAuthenticationException.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsAuthenticationException.groovy new file mode 100644 index 0000000..537275f --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsAuthenticationException.groovy @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.exceptions + +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +/** + * Exception thrown for errors while creating a DrsConfig object + * + * @author Nicolas Vannieuwkerke + */ +@CompileStatic +class DrsAuthenticationException extends AbortOperationException { + + DrsAuthenticationException(String message) { + super("Something went wrong during the DRS authentication: " + message) + } +} \ No newline at end of file diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsConfigException.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsConfigException.groovy new file mode 100644 index 0000000..7abec9e --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsConfigException.groovy @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.exceptions + +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +/** + * Exception thrown for errors while creating a DrsConfig object + * + * @author Nicolas Vannieuwkerke + */ +@CompileStatic +class DrsConfigException extends AbortOperationException { + + DrsConfigException(String message) { + super(message) + } +} \ No newline at end of file diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectCreationException.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectCreationException.groovy new file mode 100644 index 0000000..464c945 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectCreationException.groovy @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.exceptions + +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +/** + * Exception thrown for errors while creating a DRS object + * + * @author Nicolas Vannieuwkerke + */ +@CompileStatic +class DrsObjectCreationException extends AbortOperationException { + + DrsObjectCreationException(String message) { + super(message) + } +} \ No newline at end of file diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectPublishingException.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectPublishingException.groovy new file mode 100644 index 0000000..0fa1c87 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/exceptions/DrsObjectPublishingException.groovy @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.exceptions + +import groovy.transform.CompileStatic +import nextflow.exception.AbortOperationException +/** + * Exception thrown for errors when publishing a DRS object + * + * @author Nicolas Vannieuwkerke + */ +@CompileStatic +class DrsObjectPublishingException extends AbortOperationException { + + DrsObjectPublishingException(String message) { + super(message) + } +} \ No newline at end of file diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessMethod.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessMethod.groovy new file mode 100644 index 0000000..afcf980 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessMethod.groovy @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.model + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import java.nio.file.Path +import java.util.UUID + +import nextflow.ga4gh.drs.config.DrsConfig + +/** + * Define the AccessMethod object + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class AccessMethod { + + String type + AccessUrl accessUrl + String accessId + String region = "" + + /** + * Construct a DRS object. + * + * @param destPath The URL or path to the destination of the published file + * @param config The DrsConfig object + */ + AccessMethod(Path destPath, DrsConfig config) { + def String protocol = destPath.toUri().toString().split("://")[0] + if(["s3", "gs", "ftp", "https"].contains(protocol)) { + // No support for 'gsiftp', 'globus' and 'htsget', implement this later if needed + this.type = protocol + } else { + this.type = "file" + } + this.accessUrl = new AccessUrl(destPath, this.type, config) + this.accessId = UUID.randomUUID().toString() + } + + /** + * Transform the object to a map + * + */ + public Map toMap() { + return [ + type: this.type, + access_url: this.accessUrl.toMap(), + access_id: this.accessId, + region: this.region + ] + } + +} + + diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessUrl.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessUrl.groovy new file mode 100644 index 0000000..4a9322a --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/AccessUrl.groovy @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.model + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import java.nio.file.Path +import java.util.regex.Matcher +import java.util.regex.Pattern +import java.lang.reflect.Method + +import nextflow.cloud.aws.nio.S3Path + +import nextflow.ga4gh.drs.exceptions.DrsObjectCreationException +import nextflow.ga4gh.drs.config.DrsConfig + +/** + * Define the AccessUrl object + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +class AccessUrl { + + String url + List headers = [] + + /** + * Construct a DRS object. + * + * @param destPath The URL or path to the destination of the published file + * @param type The type of the destination path (e.g s3, az, gc...) + * @param config The DrsConfig object + */ + AccessUrl(Path destPath, String type, DrsConfig config) { + switch(type) { + case "s3": + this.url = config.s3Client.getResourceUrl(destPath.bucket, destPath.key) + break + default: + this.url = destPath.toUri().toString() + break + } + } + + /** + * Transform the object to a map + * + */ + public Map toMap() { + return [ + url: this.url, + headers: this.headers + ] + } + +} + + diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Checksum.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Checksum.groovy new file mode 100644 index 0000000..f8a2757 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Checksum.groovy @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.model + +import java.io.File +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j + +import com.twmacinta.util.MD5 + +/** + * Define the Checksum object + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class Checksum { + + String checksum + String type = "md5" + + /** + * Construct a DRS object. + * + * @param sourceFile The path to the source of the published file + */ + Checksum(File sourceFile) { + this.checksum = MD5.asHex(MD5.getHash(sourceFile)) + } + + /** + * Transform the object to a map + * + */ + public Map toMap() { + return [ + checksum: this.checksum, + type: this.type + ] + } + +} + + diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Content.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Content.groovy new file mode 100644 index 0000000..9aefdbb --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/Content.groovy @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.model + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j + +/** + * Define the Content object + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class Content { + + String name + String id + List drsUri + List contents + + /** + * Construct a Content object. + * + */ + Content() { + // This has not been implemented yet + } + + /** + * Transform the object to a map + * + */ + public Map toMap() { + return null + } + +} + + diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/DrsObject.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/DrsObject.groovy new file mode 100644 index 0000000..24f4313 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/model/DrsObject.groovy @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.model + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import java.util.UUID +import java.nio.file.Path +import java.nio.file.Files +import java.util.regex.Matcher +import java.util.regex.Pattern +import java.lang.IllegalStateException + +import nextflow.Nextflow + +import nextflow.ga4gh.drs.exceptions.DrsObjectCreationException +import nextflow.ga4gh.drs.config.DrsConfig +import nextflow.ga4gh.drs.utils.DrsUtils + +/** + * Define the DRS object + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsObject { + + String id + String name + String selfUri + long size + String createdTime + String updatedTime + String version = "1" + String mimeType = "application/json" + List checksums + List accessMethods + List contents + String description = null + List aliases + + /** + * Construct a DRS object. + * + * @param destPath The URL or path to the destination of the published file + * @param sourcePath The path to the source of the published file + * @param config The DrsConfig object + */ + DrsObject(Path destPath, Path sourcePath, DrsConfig config) { + def File sourceFile = sourcePath.toFile() + this.id = UUID.randomUUID().toString() + log.debug("Creating DRS object '${this.id}' for file '${destPath.toUri().toString()}'") + + this.name = new DrsUtils().getSampleName(sourcePath) + this.selfUri = "drs://${config.endpointNoProtocol}/${this.id}".toString() + this.size = sourceFile.length() + this.createdTime = Files.getAttribute(sourcePath, "creationTime") as String + this.updatedTime = Files.getAttribute(sourcePath, "lastModifiedTime") as String + this.checksums = [new Checksum(sourceFile)] + Content content = new Content() + this.contents = content.toMap() ? [content] : null + AccessMethod accessMethod = new AccessMethod(destPath, config) + this.accessMethods = accessMethod.toMap() ? [accessMethod] : null + + // TODO improve alias handling + this.aliases = [ + "${config.run}/${this.name}" as String + ] + } + + /** + * Transform the object to a map + * + */ + public Map toMap() { + return [ + id: this.id, + name: this.name, + self_uri: this.selfUri, + size: this.size, + created_time: this.createdTime, + updated_time: this.updatedTime, + version: this.version, + mime_type: this.mimeType, + checksums: this.checksums.collect { it.toMap() }, + access_methods: this.accessMethods?.collect { it.toMap() }, + contents: this.contents?.collect { it.toMap() }, + description: this.description, + aliases: this.aliases + ] + } + +} + + diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserver.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserver.groovy new file mode 100644 index 0000000..725a7d5 --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserver.groovy @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.observer + +import java.nio.file.Path +import java.nio.file.Files +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import groovy.json.JsonBuilder +import groovy.json.JsonSlurper +import groovy.yaml.YamlBuilder +import org.yaml.snakeyaml.Yaml + +import nextflow.Session +import nextflow.trace.TraceObserver +import nextflow.Nextflow + +import nextflow.ga4gh.drs.config.DrsConfig +import nextflow.ga4gh.drs.client.DrsClient +import nextflow.ga4gh.drs.model.DrsObject + +/** + * Implement the trace observer functions. + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsObserver implements TraceObserver { + + private DrsConfig config + private DrsClient client + private List idFileList = [] + + /** + * Initialize the configuration and DRS client on flow creation + * + * @param session The current Nextflow session. + */ + @Override + void onFlowCreate(Session session) { + this.config = new DrsConfig(session.config) + this.client = new DrsClient(this.config) + } + + /** + * Create a DRS object of the published file and publish the object + * + * @param destination The destination of the published file. + * @param source The source of the published file. + */ + @Override + void onFilePublish(Path destination, Path source) { + if(new File(source.toString()).isDirectory()) { + publishFilesFromDir(destination, source) + } + else { + createAndPublishObject(destination, source) + } + } + + public void publishFilesFromDir(Path destination, Path directory) { + new File(directory.toString()).listFiles().each { + Path file = Nextflow.file(it) as Path + Path newDestination = destination.resolve(file.name) + if(it.isDirectory()) { + publishFilesFromDir(newDestination, file) + } else { + createAndPublishObject(newDestination, file) + } + } + } + + void createAndPublishObject(Path destination, Path source) { + if(!checkIfFileHasExtension(destination, this.config.allowedExtensions)) { + return + } + DrsObject obj = new DrsObject(destination, source, this.config) + String id = this.client.uploadObject(obj.toMap()) + this.idFileList.add(["drs_id": id, "file": destination.toUri().toString().replaceFirst("///", "//")]) + } + + /** + * Create a summary file of all published DRS objects + * + */ + @Override + void onFlowComplete() { + // Don't create a summary file if no path has been given + if(!this.config.summary) { + return + } + def Path summaryFile = (Path) Nextflow.file(this.config.summary) + def String summaryExtension = this.config.summary.tokenize('.').last() + def String fileText + switch(summaryExtension) { + case "csv": + fileText = "drs_id,file\n" + this.idFileList.each { entry -> + fileText += "${entry.drs_id},${entry.file}\n".toString() + } + break + case "tsv": + fileText = "drs_id\tfile\n" + this.idFileList.each { entry -> + fileText += "${entry.drs_id}\t${entry.file}\n".toString() + } + break + case "json": + fileText = new JsonBuilder(this.idFileList).toPrettyString() + break + case ["yml", "yaml"]: + fileText = new YamlBuilder()(this.idFileList).toString() + break + } + summaryFile.text = fileText + log.debug("Created DRS summary file at ${this.config.summary}") + } + + private static Boolean checkIfFileHasExtension(Path file, List extensions) { + if(extensions) { + def String destString = file.toString() + for(ext : extensions) { + if(destString.endsWith(ext)) { return true } + } + return false + } + return true + } + +} diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserverFactory.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserverFactory.groovy new file mode 100644 index 0000000..3a31a0e --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/observer/DrsObserverFactory.groovy @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.observer + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j + +import nextflow.Session +import nextflow.trace.TraceObserver +import nextflow.trace.TraceObserverFactory + +/** + * Create a new TraceObserverFactory. + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsObserverFactory implements TraceObserverFactory { + + /** + * Create the TraceObserverFactory + * + * @param session A nextflow session instance. + */ + @Override + Collection create(Session session) { + final enabled = session.config.navigate("drs.enabled") + log.debug(enabled ? "DRS publishing enabled" : "DRS publishing disabled") + return enabled ? [ new DrsObserver() ] as Collection : [] as Collection + } + +} diff --git a/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/utils/DrsUtils.groovy b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/utils/DrsUtils.groovy new file mode 100644 index 0000000..b2f2aaf --- /dev/null +++ b/plugins/nf-ga4gh/src/main/nextflow/ga4gh/drs/utils/DrsUtils.groovy @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024 Nicolas Vannieuwkerke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.ga4gh.drs.utils + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import java.util.UUID +import java.nio.file.Path +import java.nio.file.Files +import java.util.regex.Matcher +import java.util.regex.Pattern +import java.lang.IllegalStateException + +import nextflow.Nextflow +import nextflow.ga4gh.drs.exceptions.DrsObjectCreationException + +/** + * Some common DRS utils + * + * @author Nicolas Vannieuwkerke + */ +@Slf4j +@CompileStatic +class DrsUtils { + + /** + * Get the sample name from the file name + * + * @param file the file from which to derive the samplename + */ + public String getSampleName(Path file) { + def String baseName = file.toString().split("/")[-1] + + // TODO This should be improved to work for every occasion (not only ours) + def Pattern familyPattern = Pattern.compile(/^(Proband_\d+[_\.]\d+).*$/, Pattern.MULTILINE) + def Matcher familyMatcher = familyPattern.matcher(baseName) + if(familyMatcher.find()) { + return familyMatcher.group(1) + } + + def Pattern samplePattern = Pattern.compile(/^((K|FD|D|I|DNA)\d+[A-Z]?).*$/, Pattern.MULTILINE) + def Matcher sampleMatcher = samplePattern.matcher(baseName) + if(sampleMatcher.find()) { + return sampleMatcher.group(1) + } + + def Pattern otherPattern = Pattern.compile(/^([^-_\.]+).*$/, Pattern.MULTILINE) + def Matcher otherMatcher = otherPattern.matcher(baseName) + if(otherMatcher.find()) { + return otherMatcher.group(1) + } + + throw new DrsObjectCreationException("Unable to parse the sample name from ${baseName}") + } + +} + + diff --git a/plugins/nf-ga4gh/src/resources/META-INF/MANIFEST.MF b/plugins/nf-ga4gh/src/resources/META-INF/MANIFEST.MF index e7fe6df..4956489 100644 --- a/plugins/nf-ga4gh/src/resources/META-INF/MANIFEST.MF +++ b/plugins/nf-ga4gh/src/resources/META-INF/MANIFEST.MF @@ -1,6 +1,6 @@ Manifest-Version: 1.0 Plugin-Class: nextflow.ga4gh.Ga4ghPlugin Plugin-Id: nf-ga4gh -Plugin-Version: 1.3.0 +Plugin-Version: 1.4.0 Plugin-Provider: Seqera Labs Plugin-Requires: >=24.01.0-edge diff --git a/plugins/nf-ga4gh/src/resources/META-INF/extensions.idx b/plugins/nf-ga4gh/src/resources/META-INF/extensions.idx index 45231d7..dc6b9bb 100644 --- a/plugins/nf-ga4gh/src/resources/META-INF/extensions.idx +++ b/plugins/nf-ga4gh/src/resources/META-INF/extensions.idx @@ -15,3 +15,4 @@ # nextflow.ga4gh.tes.executor.TesExecutor +nextflow.ga4gh.drs.observer.DrsObserverFactory \ No newline at end of file