From 973358d7ef485a8f2cb7f2b7d8c733e20602af70 Mon Sep 17 00:00:00 2001 From: Karl Lum Date: Sun, 30 Jun 2024 18:11:47 -0700 Subject: [PATCH] Update schema for donor metadata (#655) --- resources/queries/cds/MAbMixMAbMeta.sql | 10 +- .../cds/MAbMixMAbMetaForLearnExport.sql | 10 +- .../queries/cds/alignment_mab_metadata.sql | 4 +- .../cds/antibody_sequence_header_source.sql | 11 - resources/queries/cds/donor_mab.sql | 4 + .../cds/donor_mab_sequence_alignment.sql | 3 + .../cds/donor_mab_sequence_germline.sql | 12 + .../cds/donor_mab_sequence_header_source.sql | 15 ++ resources/queries/cds/ds_mabmetasummary.sql | 5 +- resources/queries/cds/lineage_alignment.sql | 98 -------- .../queries/cds/lineage_sequence_germline.sql | 14 -- .../cds/sequence_germline_mab_metadata.sql | 4 +- resources/schemas/cds.xml | 74 ++++-- .../postgresql/cds-24.000-24.001.sql | 86 +++++++ src/org/labkey/cds/CDSManager.java | 6 +- src/org/labkey/cds/CDSModule.java | 2 +- .../labkey/cds/data/CDSImportCopyConfig.java | 14 +- src/org/labkey/cds/data/CSVCopyConfig.java | 2 +- src/org/labkey/cds/data/TSVCopyConfig.java | 18 +- .../labkey/cds/data/steps/BCRImportTask.java | 45 ---- .../labkey/cds/data/steps/CDSImportTask.java | 39 +-- .../dataspace/cdsimport/MAbMetadata.txt | 227 +++++++++++++++++- .../dataspace/cdsimport/antibody_sequence.csv | 4 - .../cdsimport/antibody_structure.csv | 4 + .../cdsimport/donor_mab_sequence.csv | 4 + .../dataspace/cdsimport/donor_metadata.txt | 26 ++ .../dataspace/cdsimport/pab_sequence.csv | 7 + .../cdsimport/pab_sequence_study.csv | 4 + 28 files changed, 519 insertions(+), 233 deletions(-) delete mode 100644 resources/queries/cds/antibody_sequence_header_source.sql create mode 100644 resources/queries/cds/donor_mab.sql create mode 100644 resources/queries/cds/donor_mab_sequence_alignment.sql create mode 100644 resources/queries/cds/donor_mab_sequence_germline.sql create mode 100644 resources/queries/cds/donor_mab_sequence_header_source.sql delete mode 100644 resources/queries/cds/lineage_alignment.sql delete mode 100644 resources/queries/cds/lineage_sequence_germline.sql create mode 100644 resources/schemas/dbscripts/postgresql/cds-24.000-24.001.sql delete mode 100644 src/org/labkey/cds/data/steps/BCRImportTask.java delete mode 100644 test/sampledata/dataspace/cdsimport/antibody_sequence.csv create mode 100644 test/sampledata/dataspace/cdsimport/antibody_structure.csv create mode 100644 test/sampledata/dataspace/cdsimport/donor_mab_sequence.csv create mode 100644 test/sampledata/dataspace/cdsimport/donor_metadata.txt create mode 100644 test/sampledata/dataspace/cdsimport/pab_sequence.csv create mode 100644 test/sampledata/dataspace/cdsimport/pab_sequence_study.csv diff --git a/resources/queries/cds/MAbMixMAbMeta.sql b/resources/queries/cds/MAbMixMAbMeta.sql index 61e4acc5a..e2890cfd4 100644 --- a/resources/queries/cds/MAbMixMAbMeta.sql +++ b/resources/queries/cds/MAbMixMAbMeta.sql @@ -29,11 +29,13 @@ SELECT mab_hxb2_location, mab_ab_binding_type, mab_isotype, - mab_donorid, - mab_donor_species, - mab_donor_clade + mabmeta.donor_id AS mab_donorid, +-- donor meta + donormeta.donor_species AS mab_donor_species, + donormeta.donor_clade AS mab_donor_clade FROM MAbMix mix LEFT JOIN MAbMixMetadata mixmeta ON mix.mab_mix_id = mixmeta.mab_mix_id -LEFT JOIN MAbMetadata mabmeta ON mix.mab_id = mabmeta.mab_id \ No newline at end of file +LEFT JOIN MAbMetadata mabmeta ON mix.mab_id = mabmeta.mab_id +LEFT JOIN donor_metadata donormeta ON mabmeta.donor_id = donormeta.donor_id \ No newline at end of file diff --git a/resources/queries/cds/MAbMixMAbMetaForLearnExport.sql b/resources/queries/cds/MAbMixMAbMetaForLearnExport.sql index f1db8c700..af2ac9999 100644 --- a/resources/queries/cds/MAbMixMAbMetaForLearnExport.sql +++ b/resources/queries/cds/MAbMixMAbMetaForLearnExport.sql @@ -32,11 +32,13 @@ SELECT mab_hxb2_location, mab_ab_binding_type, mab_isotype, - mab_donorid, - mab_donor_species, - mab_donor_clade + mabmeta.donor_id AS mab_donorid, +-- donor meta + donormeta.donor_species AS mab_donor_species, + donormeta.donor_clade AS mab_donor_clade FROM MAbMix mix LEFT JOIN MAbMixMetadata mixmeta ON mix.mab_mix_id = mixmeta.mab_mix_id -LEFT JOIN MAbMetadata mabmeta ON mix.mab_id = mabmeta.mab_id \ No newline at end of file +LEFT JOIN MAbMetadata mabmeta ON mix.mab_id = mabmeta.mab_id +LEFT JOIN donor_metadata donormeta ON mabmeta.donor_id = donormeta.donor_id \ No newline at end of file diff --git a/resources/queries/cds/alignment_mab_metadata.sql b/resources/queries/cds/alignment_mab_metadata.sql index f1cca731e..de1810d3d 100644 --- a/resources/queries/cds/alignment_mab_metadata.sql +++ b/resources/queries/cds/alignment_mab_metadata.sql @@ -1,4 +1,4 @@ SELECT * FROM cds.alignment a - INNER JOIN cds.antibody_sequence aseq ON a.sequence_id = aseq.sequence_id - INNER JOIN cds.MAbMetadata mm ON aseq.mab_id = mm.mab_id \ No newline at end of file + INNER JOIN cds.donor_mab_sequence dseq ON a.sequence_id = dseq.sequence_id + INNER JOIN cds.MAbMetadata mm ON dseq.mab_id = mm.mab_id \ No newline at end of file diff --git a/resources/queries/cds/antibody_sequence_header_source.sql b/resources/queries/cds/antibody_sequence_header_source.sql deleted file mode 100644 index cf41e8bdd..000000000 --- a/resources/queries/cds/antibody_sequence_header_source.sql +++ /dev/null @@ -1,11 +0,0 @@ -SELECT - aseq.mab_id, - seqs.sequence_id, - shed.header, - hsrc.source, - seqs.sequence_nt, - aseq.lineage -FROM sequence seqs - JOIN antibody_sequence AS aseq ON aseq.sequence_id = seqs.sequence_id - JOIN sequence_header AS shed ON shed.sequence_id = seqs.sequence_id - JOIN header_source AS hsrc ON hsrc.source_id = shed.source_id \ No newline at end of file diff --git a/resources/queries/cds/donor_mab.sql b/resources/queries/cds/donor_mab.sql new file mode 100644 index 000000000..42a4dc421 --- /dev/null +++ b/resources/queries/cds/donor_mab.sql @@ -0,0 +1,4 @@ +SELECT DISTINCT dms.mab_id, + dms.donor_id +FROM donor_mab_sequence AS dms +WHERE dms.donor_id IS NOT NULL \ No newline at end of file diff --git a/resources/queries/cds/donor_mab_sequence_alignment.sql b/resources/queries/cds/donor_mab_sequence_alignment.sql new file mode 100644 index 000000000..495faa2e3 --- /dev/null +++ b/resources/queries/cds/donor_mab_sequence_alignment.sql @@ -0,0 +1,3 @@ +SELECT * +FROM alignment ali + JOIN donor_mab_sequence AS dms ON dms.sequence_id = ali.sequence_id \ No newline at end of file diff --git a/resources/queries/cds/donor_mab_sequence_germline.sql b/resources/queries/cds/donor_mab_sequence_germline.sql new file mode 100644 index 000000000..211f4d235 --- /dev/null +++ b/resources/queries/cds/donor_mab_sequence_germline.sql @@ -0,0 +1,12 @@ +SELECT dms.mab_id, + dms.donor_id, + sgl.sequence_id, + sgl.allele, + sgl.percent_identity, + sgl.matches, + sgl.alignment_length, + sgl.score, + sgl.container, + sgl.run_application +FROM sequence_germline AS sgl + JOIN donor_mab_sequence AS dms ON dms.sequence_id = sgl.sequence_id \ No newline at end of file diff --git a/resources/queries/cds/donor_mab_sequence_header_source.sql b/resources/queries/cds/donor_mab_sequence_header_source.sql new file mode 100644 index 000000000..d09287f84 --- /dev/null +++ b/resources/queries/cds/donor_mab_sequence_header_source.sql @@ -0,0 +1,15 @@ +SELECT seq.sequence_id, + dms.mab_id, + dms.donor_id, + mab.mab_name_std, + don.donor_code, + sqh.header, + src.source, + seq.sequence_aa, + seq.sequence_nt +FROM sequence seq + JOIN donor_mab_sequence AS dms ON dms.sequence_id = seq.sequence_id + JOIN sequence_header AS sqh ON sqh.sequence_id = seq.sequence_id + JOIN header_source AS src ON src.source_id = sqh.source_id + LEFT JOIN mabMetadata AS mab ON mab.mab_id = dms.mab_id + LEFT JOIN donor_metadata AS don ON don.donor_id = dms.donor_id \ No newline at end of file diff --git a/resources/queries/cds/ds_mabmetasummary.sql b/resources/queries/cds/ds_mabmetasummary.sql index 8a7dccc4f..77edd95cc 100644 --- a/resources/queries/cds/ds_mabmetasummary.sql +++ b/resources/queries/cds/ds_mabmetasummary.sql @@ -22,8 +22,9 @@ SELECT mabmeta.mab_hxb2_location, mabmeta.mab_ab_binding_type, mabmeta.mab_isotype, - mabmeta.mab_donor_species + donormeta.donor_species AS mab_donor_species FROM cds.MAbMix mix JOIN cds.MAbMixMetadata mixmeta ON (mixmeta.container = mix.container AND mixmeta.mab_mix_id = mix.mab_mix_id) -JOIN cds.MAbMetadata mabmeta ON (mabmeta.container = mix.container AND mabmeta.mab_id = mix.mab_id) \ No newline at end of file +JOIN cds.MAbMetadata mabmeta ON (mabmeta.container = mix.container AND mabmeta.mab_id = mix.mab_id) +LEFT JOIN cds.donor_metadata donormeta ON (donormeta.container = mix.container AND donormeta.donor_id = mabmeta.donor_id) \ No newline at end of file diff --git a/resources/queries/cds/lineage_alignment.sql b/resources/queries/cds/lineage_alignment.sql deleted file mode 100644 index b72def629..000000000 --- a/resources/queries/cds/lineage_alignment.sql +++ /dev/null @@ -1,98 +0,0 @@ -SELECT -aseq.mab_id, -algn.sequence_id, -algn.locus, -algn.stop_codon, -algn.vj_in_frame, -algn.productive, -algn.rev_comp, -algn.complete_vdj, -algn.v_call, -algn.d_call, -algn.j_call, -algn.sequence_alignment, -algn.germline_alignment, -algn.sequence_alignment_aa, -algn.germline_alignment_aa, -algn.v_alignment_start, -algn.v_alignment_end, -algn.d_alignment_start, -algn.d_alignment_end, -algn.j_alignment_start, -algn.j_alignment_end, -algn.v_sequence_alignment, -algn.v_sequence_alignment_aa, -algn.v_germline_alignment, -algn.v_germline_alignment_aa, -algn.d_sequence_alignment, -algn.d_sequence_alignment_aa, -algn.d_germline_alignment, -algn.d_germline_alignment_aa, -algn.j_sequence_alignment, -algn.j_sequence_alignment_aa, -algn.j_germline_alignment, -algn.j_germline_alignment_aa, -algn.fwr1, -algn.fwr1_aa, -algn.cdr1, -algn.cdr1_aa, -algn.fwr2, -algn.fwr2_aa, -algn.cdr2, -algn.cdr2_aa, -algn.fwr3, -algn.fwr3_aa, -algn.fwr4, -algn.fwr4_aa, -algn.cdr3, -algn.cdr3_aa, -algn.junction, -algn.junction_length, -algn.junction_aa, -algn.junction_aa_length, -algn.v_score, -algn.d_score, -algn.j_score, -algn.v_cigar, -algn.d_cigar, -algn.j_cigar, -algn.v_support, -algn.d_support, -algn.j_support, -algn.v_identity, -algn.d_identity, -algn.j_identity, -algn.v_sequence_start, -algn.v_sequence_end, -algn.v_germline_start, -algn.v_germline_end, -algn.d_sequence_start, -algn.d_sequence_end, -algn.d_germline_start, -algn.d_germline_end, -algn.j_sequence_start, -algn.j_sequence_end, -algn.j_germline_start, -algn.j_germline_end, -algn.fwr1_start, -algn.fwr1_end, -algn.cdr1_start, -algn.cdr1_end, -algn.fwr2_start, -algn.fwr2_end, -algn.cdr2_start, -algn.cdr2_end, -algn.fwr3_start, -algn.fwr3_end, -algn.fwr4_start, -algn.fwr4_end, -algn.cdr3_start, -algn.cdr3_end, -algn.np1, -algn.np1_length, -algn.np2, -algn.np2_length, -algn.run_application, -aseq.lineage -FROM alignment AS algn -JOIN antibody_sequence AS aseq ON algn.sequence_id = aseq.sequence_id \ No newline at end of file diff --git a/resources/queries/cds/lineage_sequence_germline.sql b/resources/queries/cds/lineage_sequence_germline.sql deleted file mode 100644 index 8769ae8bf..000000000 --- a/resources/queries/cds/lineage_sequence_germline.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT - aseq.mab_id, - sgrm.allele, - sgrm.sequence_id, - sgrm.percent_identity, - sgrm.matches, - sgrm.alignment_length, - sgrm.score, - sgrm.run_application, - pref.status AS preferred_status, - aseq.lineage -FROM sequence_germline AS sgrm - JOIN antibody_sequence AS aseq ON sgrm.sequence_id = aseq.sequence_id - LEFT JOIN preferred_allele AS pref ON pref.sequence_id = sgrm.sequence_id AND pref.allele = sgrm.allele \ No newline at end of file diff --git a/resources/queries/cds/sequence_germline_mab_metadata.sql b/resources/queries/cds/sequence_germline_mab_metadata.sql index 195abd9a8..ed9fa1876 100644 --- a/resources/queries/cds/sequence_germline_mab_metadata.sql +++ b/resources/queries/cds/sequence_germline_mab_metadata.sql @@ -1,4 +1,4 @@ SELECT * FROM cds.sequence_germline sg - INNER JOIN cds.antibody_sequence aseq ON sg.sequence_id = aseq.sequence_id - INNER JOIN cds.MAbMetadata mm ON aseq.mab_id = mm.mab_id \ No newline at end of file + INNER JOIN cds.donor_mab_sequence dseq ON sg.sequence_id = dseq.sequence_id + INNER JOIN cds.MAbMetadata mm ON dseq.mab_id = mm.mab_id \ No newline at end of file diff --git a/resources/schemas/cds.xml b/resources/schemas/cds.xml index e6a9e728e..297ce86b8 100644 --- a/resources/schemas/cds.xml +++ b/resources/schemas/cds.xml @@ -764,14 +764,13 @@ A classification indicating the type of the antibody. - + Unique ID assigned to the donor of the mab. - - - The species of the donor of the mab. The species in which the immune response was stimulated. - - - The clade of the virus strain (subtype) that infected the donor. + + cds + donor_metadata + donor_id + A reference ID to the `antibody_class` table, which indicates the class of the antibody. @@ -1801,6 +1800,8 @@ The sequence as nucleotides. + + @@ -1871,8 +1872,10 @@ - +
+ + The CDS mAb ID. @@ -1883,11 +1886,19 @@ The sequence ID associated with a given mAB ID and chain. + + cds + sequence + sequence_id + - - True if the sequence is a lineage sequence. + + + cds + donor_metadata + donor_id + -
@@ -1896,9 +1907,6 @@ The CDS sequence ID. - - Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a generic classification of the locus, not necessarily the correct designation according to a specific nomenclature. - True if the aligned sequence contains a stop codon. @@ -2221,4 +2229,42 @@
+ + + + + + + + + +
+ + + + + + +
+ + + + + + + + + + + + + +
+ + + + + + +
\ No newline at end of file diff --git a/resources/schemas/dbscripts/postgresql/cds-24.000-24.001.sql b/resources/schemas/dbscripts/postgresql/cds-24.000-24.001.sql new file mode 100644 index 000000000..4cca1d003 --- /dev/null +++ b/resources/schemas/dbscripts/postgresql/cds-24.000-24.001.sql @@ -0,0 +1,86 @@ +CREATE TABLE cds.donor_metadata +( + donor_id VARCHAR(250) NOT NULL, + donor_lanl_id VARCHAR(250), + donor_code VARCHAR(250), + donor_species VARCHAR(250), + donor_clade VARCHAR(250), + container ENTITYID NOT NULL, + + CONSTRAINT PK_donor_metadata PRIMARY KEY (donor_id, container) +); + +-- update the MabMetadata table +ALTER TABLE cds.MAbMetadata DROP COLUMN mab_donorid; +ALTER TABLE cds.MAbMetadata DROP COLUMN mab_donor_species; +ALTER TABLE cds.MAbMetadata DROP COLUMN mab_donor_clade; +ALTER TABLE cds.MAbMetadata ADD COLUMN donor_id VARCHAR(250); +ALTER TABLE cds.MAbMetadata ADD CONSTRAINT FK_MAbMetadata_donor_id + FOREIGN KEY (donor_id, container) REFERENCES cds.donor_metadata(donor_id, container); +CREATE INDEX IX_MAbMetadata_donor_id ON cds.MAbMetadata(donor_id); + +CREATE TABLE cds.donor_mab_sequence +( + row_id SERIAL NOT NULL, + mab_id VARCHAR(250), + sequence_id VARCHAR(100), + donor_id VARCHAR(250), + container ENTITYID NOT NULL, + + CONSTRAINT PK_donor_mab_sequence PRIMARY KEY (row_id), + CONSTRAINT FK_donor_mab_sequence_mab_id FOREIGN KEY (mab_id, container) REFERENCES cds.MabMetadata (mab_id, container), + CONSTRAINT FK_donor_mab_sequence_sequence_id FOREIGN KEY (sequence_id) REFERENCES cds.sequence (sequence_id), + CONSTRAINT FK_donor_mab_sequence_donor_id FOREIGN KEY (donor_id, container) REFERENCES cds.donor_metadata (donor_id, container) +); +CREATE INDEX IX_donor_mab_sequence_mab_id ON cds.donor_mab_sequence(mab_id); +CREATE INDEX IX_donor_mab_sequence_sequence_id ON cds.donor_mab_sequence(sequence_id); +CREATE INDEX IX_donor_mab_sequence_donor_id ON cds.donor_mab_sequence(donor_id); + +DROP TABLE cds.antibody_sequence; + +-- add new fields +ALTER TABLE cds.alignment DROP COLUMN locus; +ALTER TABLE cds.sequence ADD COLUMN sequence_aa TEXT; +ALTER TABLE cds.sequence ADD COLUMN chain VARCHAR(20); + +-- pAb tables +CREATE TABLE cds.pab_sequence +( + pab_id VARCHAR(250) NOT NULL, + sequence_id VARCHAR(100) NOT NULL, + container ENTITYID NOT NULL, + + CONSTRAINT PK_pab_sequence PRIMARY KEY (pab_id, sequence_id, container), + CONSTRAINT FK_pab_sequence_sequence_id FOREIGN KEY (sequence_id) REFERENCES cds.sequence (sequence_id) +); + +CREATE INDEX IX_pab_sequence_sequence_id ON cds.pab_sequence(sequence_id); + +CREATE TABLE cds.pab_sequence_study +( + row_id SERIAL NOT NULL, + prot VARCHAR(250) NOT NULL, + subject_id VARCHAR(100) NOT NULL, + study_day VARCHAR(100) NOT NULL, + specimen_type VARCHAR(100) NOT NULL, + pab_id VARCHAR(250) NOT NULL, + lab_code VARCHAR(100), + seq_method VARCHAR(100), + assay_identifier VARCHAR(100), + container ENTITYID NOT NULL, + + CONSTRAINT PK_pab_sequence_study PRIMARY KEY (row_id), + CONSTRAINT UQ_prot_subject_study_specimen_pab UNIQUE(prot, subject_id, study_day, specimen_type, pab_id) +); + +CREATE TABLE cds.antibody_structure +( + structure_id VARCHAR(250) NOT NULL, + mab_id VARCHAR(250), + container ENTITYID NOT NULL, + + CONSTRAINT PK_antibody_structure PRIMARY KEY (structure_id), + CONSTRAINT FK_antibody_structure_mab_id FOREIGN KEY (mab_id, container) REFERENCES cds.MabMetadata (mab_id, container) +); +CREATE INDEX IX_antibody_structure_mab_id ON cds.antibody_structure(mab_id); + diff --git a/src/org/labkey/cds/CDSManager.java b/src/org/labkey/cds/CDSManager.java index ebcfea0f0..f09734326 100644 --- a/src/org/labkey/cds/CDSManager.java +++ b/src/org/labkey/cds/CDSManager.java @@ -219,10 +219,14 @@ public void cleanContainer(Container c) "import_virus_metadata_all", "import_assay_combined_antigen_metadata", + "pab_sequence", + "pab_sequence_study", + "antibody_structure", "sequence_header", "sequence_germline", - "antibody_sequence", + "donor_mab_sequence", "MAbMetadata", + "donor_metadata", "alignment", "preferred_allele", "sequence", diff --git a/src/org/labkey/cds/CDSModule.java b/src/org/labkey/cds/CDSModule.java index 69b6368b1..cd7e7d768 100644 --- a/src/org/labkey/cds/CDSModule.java +++ b/src/org/labkey/cds/CDSModule.java @@ -184,7 +184,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 24.000; + return 24.001; } @Override diff --git a/src/org/labkey/cds/data/CDSImportCopyConfig.java b/src/org/labkey/cds/data/CDSImportCopyConfig.java index bb196cb17..135037879 100644 --- a/src/org/labkey/cds/data/CDSImportCopyConfig.java +++ b/src/org/labkey/cds/data/CDSImportCopyConfig.java @@ -48,9 +48,19 @@ public abstract class CDSImportCopyConfig extends CopyConfig private final String _fileName; QueryUpdateService.InsertOption _option = QueryUpdateService.InsertOption.IMPORT; - CDSImportCopyConfig(String sourceSchema, String source, String targetSchema, String target, String fileName) + /** + * + * @param sourceSchema Describes whether the source is TSV or CSV delimited + * @param target The target table name + * @param targetSchema The target schema name + * @param fileName The filename containing the data to be imported + * @param copyToImportTable Copy into an intermediate table with an import_ prefix, this can be useful if further transformation + * needs to be performed before copying to the final destination. Otherwise, set false to + * copy directly into the destination table. + */ + CDSImportCopyConfig(String sourceSchema, String target, String targetSchema, String fileName, boolean copyToImportTable) { - super(sourceSchema, source, targetSchema, target); + super(sourceSchema, target, targetSchema, copyToImportTable ? "import_" + target : target); _fileName = fileName; } diff --git a/src/org/labkey/cds/data/CSVCopyConfig.java b/src/org/labkey/cds/data/CSVCopyConfig.java index 265c2e6dc..d8b8a14d6 100644 --- a/src/org/labkey/cds/data/CSVCopyConfig.java +++ b/src/org/labkey/cds/data/CSVCopyConfig.java @@ -26,7 +26,7 @@ public class CSVCopyConfig extends CDSImportCopyConfig { public CSVCopyConfig(String table) { - super("#CSV#", table, "cds", table, table); + super("#CSV#", table, "cds", table, false); } @Override diff --git a/src/org/labkey/cds/data/TSVCopyConfig.java b/src/org/labkey/cds/data/TSVCopyConfig.java index 003ec5f98..ed5fbe0af 100644 --- a/src/org/labkey/cds/data/TSVCopyConfig.java +++ b/src/org/labkey/cds/data/TSVCopyConfig.java @@ -24,21 +24,19 @@ public class TSVCopyConfig extends CDSImportCopyConfig { - public TSVCopyConfig(String table, String fileName) + public TSVCopyConfig(String table, String fileName, boolean copyToImportTable) { - // With the current setup, there are 2 steps: - // Step 1: ETL data from a tab separated .txt or a .csv file to an "import_table". - // Step 2: Run another ETL to copy data from an "import_table" to its corresponding actual table. - // This causes an issue when importing BCR data (via Step 1) since a BCR table (cds.antibody_sequence) has a FK - // to the mabmetadata table, but the mabmetadata table is not populated during Step 1 and it errors in FK not found. - // To get around this, we are going to ETL data from mabmetadata.txt directly into the mabmetadata table during Step 1 - // (and drop import_mabmetadata table altogether since it is not referenced in any queries or reports). - super("#TSV#", table, "cds", (table.equalsIgnoreCase("mabmetadata") ? table : ("import_" + table)), fileName); + super("#TSV", table, "cds", fileName, copyToImportTable); } public TSVCopyConfig(String table) { - this(table, table); + this(table, true); + } + + public TSVCopyConfig(String table, boolean copyToImportTable) + { + super("#TSV", table, "cds", table, copyToImportTable); } @Override diff --git a/src/org/labkey/cds/data/steps/BCRImportTask.java b/src/org/labkey/cds/data/steps/BCRImportTask.java deleted file mode 100644 index 5bab129b1..000000000 --- a/src/org/labkey/cds/data/steps/BCRImportTask.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2023 LabKey Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.labkey.cds.data.steps; - -import org.labkey.cds.data.CDSImportCopyConfig; -import org.labkey.cds.data.CSVCopyConfig; - -public class BCRImportTask extends ImportTask -{ - private static CDSImportCopyConfig[] bcrTables = new CDSImportCopyConfig[] - { - // .csv source file names which have the same target table names - // order matters due to FKs - new CSVCopyConfig("sequence"), - new CSVCopyConfig("alignment_run"), - new CSVCopyConfig("allele_sequence"), - new CSVCopyConfig("sequence_header"), - new CSVCopyConfig("sequence_germline"), - new CSVCopyConfig("antibody_sequence"), - new CSVCopyConfig("alignment"), - new CSVCopyConfig("antibody_class"), - new CSVCopyConfig("preferred_allele"), - new CSVCopyConfig("header_source") - }; - - @Override - protected CDSImportCopyConfig[] getImportCopyConfig() - { - return bcrTables; - } -} diff --git a/src/org/labkey/cds/data/steps/CDSImportTask.java b/src/org/labkey/cds/data/steps/CDSImportTask.java index 5e1f71f7d..c4cde564d 100644 --- a/src/org/labkey/cds/data/steps/CDSImportTask.java +++ b/src/org/labkey/cds/data/steps/CDSImportTask.java @@ -33,6 +33,8 @@ public class CDSImportTask extends ImportTask new CSVCopyConfig("sequence_germline"), new CSVCopyConfig("preferred_allele"), new CSVCopyConfig("antibody_class"), + new CSVCopyConfig("pab_sequence"), + new CSVCopyConfig("pab_sequence_study"), // Core Tables new TSVCopyConfig("Study"), @@ -43,10 +45,13 @@ public class CDSImportTask extends ImportTask new TSVCopyConfig("StudySubject"), // a.k.a Demographics, SubjectCharacteristics new TSVCopyConfig("Document"), new TSVCopyConfig("Publication"), - new TSVCopyConfig("MAbMetadata"), + new TSVCopyConfig("donor_metadata", false), + new TSVCopyConfig("MAbMetadata", false), new TSVCopyConfig("MAbMixMetadata"), - new CSVCopyConfig("antibody_sequence"), // bcr data, order matters due to FKs + // BCR data + new CSVCopyConfig("donor_mab_sequence"), + new CSVCopyConfig("antibody_structure"), // Dependent Tables new TSVCopyConfig("StudyPartGroupArm"), @@ -54,7 +59,7 @@ public class CDSImportTask extends ImportTask new TSVCopyConfig("StudyPartGroupArmVisit"), new TSVCopyConfig("StudyPartGroupArmVisitProduct"), new TSVCopyConfig("StudyPartGroupArmVisitTime"), - new TSVCopyConfig("ProductInsert", "ProductInsertClade"), + new TSVCopyConfig("ProductInsert", "ProductInsertClade", true), new TSVCopyConfig("StudyRelationshipOrder"), new TSVCopyConfig("StudyRelationship"), new TSVCopyConfig("MAbMix"), @@ -74,22 +79,22 @@ public class CDSImportTask extends ImportTask new TSVCopyConfig("AssayReport"), //AntigenMetadata - new TSVCopyConfig("ICSAntigen", "AssayICSAntigen_Metadata"), - new TSVCopyConfig("ELISpotAntigen", "AssayELSAntigen_Metadata"), - new TSVCopyConfig("NAbAntigen", "AssayNABAntigen_Metadata"), - new TSVCopyConfig("BAMAAntigen", "AssayBAMAAntigen_Metadata"), - new TSVCopyConfig("antigenPanelMeta", "AntigenPanel_Metadata"), - new TSVCopyConfig("antigenPanel", "AntigenPanel"), - new TSVCopyConfig("virusPanel", "VirusPanel"), - new TSVCopyConfig("assay_combined_antigen_metadata", "AssayCombinedAntigenMetadata"), + new TSVCopyConfig("ICSAntigen", "AssayICSAntigen_Metadata", true), + new TSVCopyConfig("ELISpotAntigen", "AssayELSAntigen_Metadata", true), + new TSVCopyConfig("NAbAntigen", "AssayNABAntigen_Metadata", true), + new TSVCopyConfig("BAMAAntigen", "AssayBAMAAntigen_Metadata", true), + new TSVCopyConfig("antigenPanelMeta", "AntigenPanel_Metadata", true), + new TSVCopyConfig("antigenPanel", "AntigenPanel", true), + new TSVCopyConfig("virusPanel", "VirusPanel", true), + new TSVCopyConfig("assay_combined_antigen_metadata", "AssayCombinedAntigenMetadata", true), // Datasets - new TSVCopyConfig("ICS", "AssayICS"), - new TSVCopyConfig("ELS_IFNg", "AssayELS_IFNg"), - new TSVCopyConfig("NAB", "AssayNAB"), - new TSVCopyConfig("BAMA", "AssayBAMA"), - new TSVCopyConfig("NABMAb", "AssayNABMAb"), - new TSVCopyConfig("PKMAb", "AssayPKMAb"), + new TSVCopyConfig("ICS", "AssayICS", true), + new TSVCopyConfig("ELS_IFNg", "AssayELS_IFNg", true), + new TSVCopyConfig("NAB", "AssayNAB", true), + new TSVCopyConfig("BAMA", "AssayBAMA", true), + new TSVCopyConfig("NABMAb", "AssayNABMAb", true), + new TSVCopyConfig("PKMAb", "AssayPKMAb", true), // Virus data new TSVCopyConfig("Virus_Metadata_All"), diff --git a/test/sampledata/dataspace/cdsimport/MAbMetadata.txt b/test/sampledata/dataspace/cdsimport/MAbMetadata.txt index a6f138113..cde33042d 100644 --- a/test/sampledata/dataspace/cdsimport/MAbMetadata.txt +++ b/test/sampledata/dataspace/cdsimport/MAbMetadata.txt @@ -1 +1,226 @@ -mab_id mab_class_id mab_name_std mab_lanlid mab_hxb2_location mab_ab_binding_type mab_isotype mab_donorid mab_donor_species mab_donor_clade cds_mab_1 MAbClassId_1 PGT128 2642 Env gp120 V3 IgG1 Donor 36 human CRF02_AG cds_mab_2 MAbClassId_1 PGDM1400 3201 Env gp120 V2 IgG Donor 84 human C cds_mab_3 MAbClassId_2 3BNC117 2586 gp160 gp120 CD4BS IgG Patient 3 human B cds_mab_4 MAbClassId_2 PGT151 3107 Env gp41-gp120 (quartenary interface) IgG Donor 31 human cds_mab_5 MAbClassId_1 10E8.2 cds_mab_6 10E8.4 cds_mab_7 10E8.5 cds_mab_8 iMab 3410 cds_mab_9 2F5 815 gp160 gp41 MPER IgG3? human cds_mab_10 4.00E+10 846 gp160 gp41 MPER IgG3? human cds_mab_11 b12 633 Env gp120 CD4BS IgG1? Donor b human cds_mab_12 Ab530039K gp120 CD4BS IgG1 3065 human cds_mab_13 Ab530039L gp120 CD4BS IgG1 3065 human cds_mab_14 Ab530057 gp120 CD4BS IgG1 3065 human cds_mab_15 Ab530139K1 gp120 CD4BS IgG1 3065 human cds_mab_16 Ab530139K2 gp120 CD4BS IgG1 3065 human cds_mab_17 Ab530168 gp120 CD4BS IgG1 3065 human cds_mab_18 Ab530204 gp120 CD4BS IgG1 3065 human cds_mab_19 Ab530212 gp120 CD4BS IgG3 3065 human cds_mab_20 Ab530238K gp120 CD4BS IgG1 3065 human cds_mab_21 Ab530239 gp120 CD4BS IgG1 3065 human cds_mab_22 Ab530402.1 gp120 CD4BS IgG1 3065 human cds_mab_23 Ab530402.2 gp120 CD4BS IgG1 3065 human cds_mab_24 sCD4 658 Env gp120 CD4i CoRBS (Cluster C) N70 human cds_mab_25 PG9 2124 gp160 gp120 V2 (quaternary structure) IgG1 Donor 24 human A1 cds_mab_26 PGT121 2635 Env gp120 V3 IgG Donor 17 human A cds_mab_27 PGT123 2637 Env gp120 V3 IgG Donor 17 human A cds_mab_28 PGT125 2639 Env gp120 V3 IgG Donor 36 human CRF02_AG cds_mab_29 PGT126 2640 Env gp120 V3 IgG Donor 36 human CRF02_AG cds_mab_30 PGT127 2641 Env gp120 V3 IgG Donor 36 human CRF02_AG cds_mab_31 PGT130 2643 Env gp120 V3 IgG Donor 36 human CRF02_AG cds_mab_32 PGT135 2645 Env gp120 V3 IgG Donor 39 human C cds_mab_33 PGT143 2650 Env gp120 V2 (quaternary structure) IgG Donor 84 human C cds_mab_34 PGT145 2651 Env gp160 IgG Donor 84 human C cds_mab_35 A12 1860 Env gp120 CD4BS llama cds_mab_36 VRC01 2163 Env gp120 CD4BS IgG1 NIH45 human B cds_mab_37 17b 658 Env gp120 CD4i CoRBS (Cluster C) N70 human cds_mab_38 19e 1837 Env gp120 CD4i cds_mab_39 L9-i3 2976 Env gp120 CD4i cluster C.1 IgG L9 human cds_mab_40 L9-i4 3004 Env gp120 CD4i cluster C.4 IgG L9 human cds_mab_41 1361 952 Env gp120 V2 IgG1? donor_uncoded_1 human B cds_mab_42 1393A 953 Env gp120 V2 donor_uncoded_1 human B cds_mab_43 2158 1347 Env gp120 V2 IgG1? human cds_mab_44 2297 3215 gp160 gp120 V2 human cds_mab_45 30D 1132 Env cds_mab_46 J3 355 gp160 IgG1 mouse cds_mab_47 1H9 3251 Env gp120 V2-CD4BS llama cds_mab_48 3.00E+03 3221 gp160 gp120 CD4BS llama cds_mab_49 A14 3218 gp160 gp120 CD4BS llama cds_mab_50 B21 3220 gp160 gp120 CD4BS llama cds_mab_51 B9 3219 gp160 gp120 CD4BS llama cds_mab_52 1NC9 2900 Env gp120 CD4BS IgG Patient 1 human B cds_mab_53 3BNC60 2582 Env gp120 CD4BS IgG Patient 3 human B cds_mab_54 CH103 2861 Env gp120 CD4BS IgG Donor CH505 human C cds_mab_55 1.00E+09 2708 gp160 gp41 MPER IgG3 Donor N152 human B cds_mab_56 VRC13 3284 Env gp120 CD4BS 44 human B cds_mab_57 HIVIG 1118 Gag human cds_mab_58 M785-U1 3634 gp160 gp41 cds_mab_59 VRC07-523-LS 3156 Env gp120 CD4BS IgG NIH45 human B cds_mab_60 VRC26.25 3285 Env gp120 V2(quaternary structure) CAP256 human C cds_mab_61 VRC13 3284 Env gp120 CD4BS 44 human B cds_mab_62 AB-000402-1 cds_mab_63 AB-000403-1 cds_mab_64 AB-000404-1 cds_mab_65 AB-000405-1 cds_mab_66 AB-000406-1 cds_mab_67 AB-000407-1 cds_mab_68 AB-000408-1 cds_mab_69 AB-000409-1 cds_mab_70 AB-000410-1 cds_mab_71 AB-000411-1 cds_mab_72 AB-000412-1 cds_mab_73 AB-000413-1 cds_mab_74 AB-000414-1 cds_mab_75 AB-000415-1 cds_mab_76 AB-000416-1 cds_mab_77 AB-000417-1 cds_mab_78 AB-000418-1 cds_mab_79 AB-000419-1 cds_mab_80 AB-000420-1 cds_mab_81 AB-000421-1 cds_mab_82 AB-000422-1 cds_mab_83 AB-000423-1 cds_mab_84 AB-000424-1 cds_mab_85 AB-000425-1 cds_mab_86 AB-000426-1 cds_mab_87 AB-000427-1 cds_mab_88 AB-000428-1 cds_mab_89 AB-000429-1 cds_mab_90 AB-000430-1 cds_mab_91 AB-000431-1 cds_mab_92 AB-000432-1 cds_mab_93 AB-000433-1 cds_mab_94 AB-000434-1 cds_mab_95 AB-000435-1 cds_mab_96 AB-000436-1 cds_mab_97 AB-000437-1 cds_mab_98 AB-000438-1 cds_mab_99 AB-000439-1 cds_mab_100 AB-000440-1 cds_mab_101 AB-000441-1 cds_mab_102 AB-000442-1 cds_mab_103 AB-000443-1 cds_mab_104 AB-000444-1 cds_mab_105 AB-000445-1 cds_mab_106 AB-000446-1 cds_mab_107 AB-000447-1 cds_mab_108 AB-000448-1 cds_mab_109 AB-000449-1 cds_mab_110 AB-000450-1 cds_mab_111 AB-000451-1 cds_mab_112 AB-000452-1 cds_mab_113 AB-000453-1 cds_mab_114 AB-000454-1 cds_mab_115 AB-000455-1 cds_mab_116 AB-000456-1 cds_mab_117 AB-000457-1 cds_mab_118 AB-000458-1 cds_mab_119 AB-000459-1 cds_mab_120 AB-000460-1 cds_mab_121 AB-000461-1 cds_mab_122 AB-000462-1 cds_mab_123 AB-000463-1 cds_mab_124 AB-000464-1 cds_mab_125 AB-000465-1 cds_mab_126 AB-000466-1 cds_mab_127 AB-000467-1 cds_mab_128 AB-000468-1 cds_mab_129 AB-000469-1 cds_mab_130 AB-000470-1 cds_mab_131 AB-000471-1 cds_mab_132 AB-000472-1 cds_mab_133 AB-000473-1 cds_mab_134 AB-000474-1 cds_mab_135 AB-000475-1 cds_mab_136 AB-000476-1 cds_mab_137 AB-000477-1 cds_mab_138 AB-000478-1 cds_mab_139 AB-000479-1 cds_mab_140 AB-000480-1 cds_mab_141 AB-000481-1 cds_mab_142 AB-000482-1 cds_mab_143 AB-000483-1 cds_mab_144 AB-000484-1 cds_mab_145 AB-000485-1 cds_mab_146 mAb 1.1 cds_mab_147 mAb 10.1 cds_mab_148 mAb 11.1 cds_mab_149 mAb 2.1 cds_mab_150 mAb 28.1 cds_mab_151 mAb 3.1 cds_mab_152 mAb 31.1 cds_mab_153 mAb 33.1 cds_mab_154 mAb 34.1 cds_mab_155 mAb 37.1 cds_mab_156 mAb 4.1 cds_mab_157 mAb 5.1 cds_mab_158 mAb 6.1 cds_mab_159 mAb 7.1 cds_mab_160 mAb 8.1 cds_mab_161 mAb 9.1 cds_mab_162 mAb 93 cds_mab_163 mAb 94 cds_mab_164 mAb 95 cds_mab_165 mAb 96 cds_mab_166 mAb 97 cds_mab_167 mAb 98 cds_mab_168 mAb 99 cds_mab_169 mAb 100 cds_mab_170 mAb 101 cds_mab_171 mAb 102 cds_mab_172 mAb 103 cds_mab_173 mAb 104 cds_mab_174 mAb 105 cds_mab_175 mAb 106 cds_mab_176 mAb 107 cds_mab_177 mAb 108 cds_mab_178 mAb 109 cds_mab_179 mAb 110 cds_mab_180 mAb 111 cds_mab_181 mAb 112 cds_mab_182 mAb 113 cds_mab_183 mAb 114 cds_mab_184 mAb 115 cds_mab_185 mAb 116 cds_mab_186 mAb 117 cds_mab_187 mAb 118 cds_mab_188 mAb 119 cds_mab_189 mAb 120 cds_mab_190 mAb 121 cds_mab_191 mAb 122 cds_mab_192 mAb 123 cds_mab_193 mAb 124 cds_mab_194 mAb 125 cds_mab_195 mAb 126 cds_mab_196 mAb 127 cds_mab_197 mAb 128 cds_mab_198 AB-000796-1 cds_mab_199 AB-000797-1 cds_mab_200 AB-000798-1 cds_mab_201 AB-000799-1 cds_mab_202 AB-000800-1 cds_mab_203 AB-000801-1 cds_mab_204 AB-000802-1 cds_mab_205 AB-000803-1 cds_mab_206 AB-000804-1 cds_mab_207 AB-000805-1 cds_mab_208 AB-000806-1 cds_mab_209 AB-000807-1 cds_mab_210 AB-000808-1 cds_mab_211 AB-000809-1 cds_mab_212 CCFV cds_mab_213 SCFV cds_mab_214 MVN cds_mab_215 LM52 cds_mab_216 P16i cds_mab_217 Pi cds_mab_218 RhiMab cds_mab_219 10E8 V2.0/iMab 3472 gp160 Donore N152 human B cds_mab_220 CH38 2880 Env gp120 CD4i C1 region IgA1 347759 human cds_mab_221 19B 457 gp160 gp120 V3 IgG1? N70 human cds_mab_222 CH27 3242 gp160 gp120 CD4BS IgA2 CH0457 human C cds_mab_223 CH28 3243 gp160 gp120 CD4BS IgA2 CH0457 human C cds_mab_224 CH31 2574 Env gp120 CD4BS IgG1 CH0219 human A cds_mab_225 DH511 Env gp41 MPER IgG CH0210 human C \ No newline at end of file +mab_id mab_class_id mab_name_std mab_lanlid mab_hxb2_location mab_ab_binding_type mab_isotype donor_id +cds_mab_1 MAbClassId_1 PGT128 2642 Env gp120 V3 IgG1 Donor 36 +cds_mab_2 MAbClassId_1 PGDM1400 3201 Env gp120 V2 IgG Donor 84 +cds_mab_3 MAbClassId_2 3BNC117 2586 gp160 gp120 CD4BS IgG Patient 3 +cds_mab_4 MAbClassId_2 PGT151 3107 Env gp41-gp120 (quartenary interface) IgG Donor 31 +cds_mab_5 MAbClassId_1 10E8.2 +cds_mab_6 10E8.4 +cds_mab_7 10E8.5 +cds_mab_8 iMab 3410 +cds_mab_9 2F5 815 gp160 gp41 MPER IgG3? Donor 31 +cds_mab_10 4.00E+10 846 gp160 gp41 MPER IgG3? Donor 31 +cds_mab_11 b12 633 Env gp120 CD4BS IgG1? Donor b +cds_mab_12 Ab530039K gp120 CD4BS IgG1 3065 +cds_mab_13 Ab530039L gp120 CD4BS IgG1 3065 +cds_mab_14 Ab530057 gp120 CD4BS IgG1 3065 +cds_mab_15 Ab530139K1 gp120 CD4BS IgG1 3065 +cds_mab_16 Ab530139K2 gp120 CD4BS IgG1 3065 +cds_mab_17 Ab530168 gp120 CD4BS IgG1 3065 +cds_mab_18 Ab530204 gp120 CD4BS IgG1 3065 +cds_mab_19 Ab530212 gp120 CD4BS IgG3 3065 +cds_mab_20 Ab530238K gp120 CD4BS IgG1 3065 +cds_mab_21 Ab530239 gp120 CD4BS IgG1 3065 +cds_mab_22 Ab530402.1 gp120 CD4BS IgG1 3065 +cds_mab_23 Ab530402.2 gp120 CD4BS IgG1 3065 +cds_mab_24 sCD4 658 Env gp120 CD4i CoRBS (Cluster C) N70 +cds_mab_25 PG9 2124 gp160 gp120 V2 (quaternary structure) IgG1 Donor 24 +cds_mab_26 PGT121 2635 Env gp120 V3 IgG Donor 17 +cds_mab_27 PGT123 2637 Env gp120 V3 IgG Donor 17 +cds_mab_28 PGT125 2639 Env gp120 V3 IgG Donor 36 +cds_mab_29 PGT126 2640 Env gp120 V3 IgG Donor 36 +cds_mab_30 PGT127 2641 Env gp120 V3 IgG Donor 36 +cds_mab_31 PGT130 2643 Env gp120 V3 IgG Donor 36 +cds_mab_32 PGT135 2645 Env gp120 V3 IgG Donor 39 +cds_mab_33 PGT143 2650 Env gp120 V2 (quaternary structure) IgG Donor 84 +cds_mab_34 PGT145 2651 Env gp160 IgG Donor 84 +cds_mab_35 A12 1860 Env gp120 CD4BS Donor L1 +cds_mab_36 VRC01 2163 Env gp120 CD4BS IgG1 NIH45 +cds_mab_37 17b 658 Env gp120 CD4i CoRBS (Cluster C) N70 +cds_mab_38 19e 1837 Env gp120 CD4i +cds_mab_39 L9-i3 2976 Env gp120 CD4i cluster C.1 IgG L9 +cds_mab_40 L9-i4 3004 Env gp120 CD4i cluster C.4 IgG L9 +cds_mab_41 1361 952 Env gp120 V2 IgG1? donor_uncoded_1 +cds_mab_42 1393A 953 Env gp120 V2 donor_uncoded_1 +cds_mab_43 2158 1347 Env gp120 V2 IgG1? Donor 31 +cds_mab_44 2297 3215 gp160 gp120 V2 Donor 31 +cds_mab_45 30D 1132 Env +cds_mab_46 J3 355 gp160 IgG1 Donor M1 +cds_mab_47 1H9 3251 Env gp120 V2-CD4BS Donor L1 +cds_mab_48 3.00E+03 3221 gp160 gp120 CD4BS Donor L1 +cds_mab_49 A14 3218 gp160 gp120 CD4BS Donor L1 +cds_mab_50 B21 3220 gp160 gp120 CD4BS Donor L1 +cds_mab_51 B9 3219 gp160 gp120 CD4BS Donor L1 +cds_mab_52 1NC9 2900 Env gp120 CD4BS IgG Patient 1 +cds_mab_53 3BNC60 2582 Env gp120 CD4BS IgG Patient 3 +cds_mab_54 CH103 2861 Env gp120 CD4BS IgG Donor CH505 +cds_mab_55 1.00E+09 2708 gp160 gp41 MPER IgG3 Donor N152 +cds_mab_56 VRC13 3284 Env gp120 CD4BS 44 +cds_mab_57 HIVIG 1118 Gag Donor 31 +cds_mab_58 M785-U1 3634 gp160 gp41 +cds_mab_59 VRC07-523-LS 3156 Env gp120 CD4BS IgG NIH45 +cds_mab_60 VRC26.25 3285 Env gp120 V2(quaternary structure) CAP256 +cds_mab_61 VRC13 3284 Env gp120 CD4BS 44 +cds_mab_62 AB-000402-1 +cds_mab_63 AB-000403-1 +cds_mab_64 AB-000404-1 +cds_mab_65 AB-000405-1 +cds_mab_66 AB-000406-1 +cds_mab_67 AB-000407-1 +cds_mab_68 AB-000408-1 +cds_mab_69 AB-000409-1 +cds_mab_70 AB-000410-1 +cds_mab_71 AB-000411-1 +cds_mab_72 AB-000412-1 +cds_mab_73 AB-000413-1 +cds_mab_74 AB-000414-1 +cds_mab_75 AB-000415-1 +cds_mab_76 AB-000416-1 +cds_mab_77 AB-000417-1 +cds_mab_78 AB-000418-1 +cds_mab_79 AB-000419-1 +cds_mab_80 AB-000420-1 +cds_mab_81 AB-000421-1 +cds_mab_82 AB-000422-1 +cds_mab_83 AB-000423-1 +cds_mab_84 AB-000424-1 +cds_mab_85 AB-000425-1 +cds_mab_86 AB-000426-1 +cds_mab_87 AB-000427-1 +cds_mab_88 AB-000428-1 +cds_mab_89 AB-000429-1 +cds_mab_90 AB-000430-1 +cds_mab_91 AB-000431-1 +cds_mab_92 AB-000432-1 +cds_mab_93 AB-000433-1 +cds_mab_94 AB-000434-1 +cds_mab_95 AB-000435-1 +cds_mab_96 AB-000436-1 +cds_mab_97 AB-000437-1 +cds_mab_98 AB-000438-1 +cds_mab_99 AB-000439-1 +cds_mab_100 AB-000440-1 +cds_mab_101 AB-000441-1 +cds_mab_102 AB-000442-1 +cds_mab_103 AB-000443-1 +cds_mab_104 AB-000444-1 +cds_mab_105 AB-000445-1 +cds_mab_106 AB-000446-1 +cds_mab_107 AB-000447-1 +cds_mab_108 AB-000448-1 +cds_mab_109 AB-000449-1 +cds_mab_110 AB-000450-1 +cds_mab_111 AB-000451-1 +cds_mab_112 AB-000452-1 +cds_mab_113 AB-000453-1 +cds_mab_114 AB-000454-1 +cds_mab_115 AB-000455-1 +cds_mab_116 AB-000456-1 +cds_mab_117 AB-000457-1 +cds_mab_118 AB-000458-1 +cds_mab_119 AB-000459-1 +cds_mab_120 AB-000460-1 +cds_mab_121 AB-000461-1 +cds_mab_122 AB-000462-1 +cds_mab_123 AB-000463-1 +cds_mab_124 AB-000464-1 +cds_mab_125 AB-000465-1 +cds_mab_126 AB-000466-1 +cds_mab_127 AB-000467-1 +cds_mab_128 AB-000468-1 +cds_mab_129 AB-000469-1 +cds_mab_130 AB-000470-1 +cds_mab_131 AB-000471-1 +cds_mab_132 AB-000472-1 +cds_mab_133 AB-000473-1 +cds_mab_134 AB-000474-1 +cds_mab_135 AB-000475-1 +cds_mab_136 AB-000476-1 +cds_mab_137 AB-000477-1 +cds_mab_138 AB-000478-1 +cds_mab_139 AB-000479-1 +cds_mab_140 AB-000480-1 +cds_mab_141 AB-000481-1 +cds_mab_142 AB-000482-1 +cds_mab_143 AB-000483-1 +cds_mab_144 AB-000484-1 +cds_mab_145 AB-000485-1 +cds_mab_146 mAb 1.1 +cds_mab_147 mAb 10.1 +cds_mab_148 mAb 11.1 +cds_mab_149 mAb 2.1 +cds_mab_150 mAb 28.1 +cds_mab_151 mAb 3.1 +cds_mab_152 mAb 31.1 +cds_mab_153 mAb 33.1 +cds_mab_154 mAb 34.1 +cds_mab_155 mAb 37.1 +cds_mab_156 mAb 4.1 +cds_mab_157 mAb 5.1 +cds_mab_158 mAb 6.1 +cds_mab_159 mAb 7.1 +cds_mab_160 mAb 8.1 +cds_mab_161 mAb 9.1 +cds_mab_162 mAb 93 +cds_mab_163 mAb 94 +cds_mab_164 mAb 95 +cds_mab_165 mAb 96 +cds_mab_166 mAb 97 +cds_mab_167 mAb 98 +cds_mab_168 mAb 99 +cds_mab_169 mAb 100 +cds_mab_170 mAb 101 +cds_mab_171 mAb 102 +cds_mab_172 mAb 103 +cds_mab_173 mAb 104 +cds_mab_174 mAb 105 +cds_mab_175 mAb 106 +cds_mab_176 mAb 107 +cds_mab_177 mAb 108 +cds_mab_178 mAb 109 +cds_mab_179 mAb 110 +cds_mab_180 mAb 111 +cds_mab_181 mAb 112 +cds_mab_182 mAb 113 +cds_mab_183 mAb 114 +cds_mab_184 mAb 115 +cds_mab_185 mAb 116 +cds_mab_186 mAb 117 +cds_mab_187 mAb 118 +cds_mab_188 mAb 119 +cds_mab_189 mAb 120 +cds_mab_190 mAb 121 +cds_mab_191 mAb 122 +cds_mab_192 mAb 123 +cds_mab_193 mAb 124 +cds_mab_194 mAb 125 +cds_mab_195 mAb 126 +cds_mab_196 mAb 127 +cds_mab_197 mAb 128 +cds_mab_198 AB-000796-1 +cds_mab_199 AB-000797-1 +cds_mab_200 AB-000798-1 +cds_mab_201 AB-000799-1 +cds_mab_202 AB-000800-1 +cds_mab_203 AB-000801-1 +cds_mab_204 AB-000802-1 +cds_mab_205 AB-000803-1 +cds_mab_206 AB-000804-1 +cds_mab_207 AB-000805-1 +cds_mab_208 AB-000806-1 +cds_mab_209 AB-000807-1 +cds_mab_210 AB-000808-1 +cds_mab_211 AB-000809-1 +cds_mab_212 CCFV +cds_mab_213 SCFV +cds_mab_214 MVN +cds_mab_215 LM52 +cds_mab_216 P16i +cds_mab_217 Pi +cds_mab_218 RhiMab +cds_mab_219 10E8 V2.0/iMab 3472 gp160 Donore N152 +cds_mab_220 CH38 2880 Env gp120 CD4i C1 region IgA1 347759 +cds_mab_221 19B 457 gp160 gp120 V3 IgG1? N70 +cds_mab_222 CH27 3242 gp160 gp120 CD4BS IgA2 CH0457 +cds_mab_223 CH28 3243 gp160 gp120 CD4BS IgA2 CH0457 +cds_mab_224 CH31 2574 Env gp120 CD4BS IgG1 CH0219 +cds_mab_225 DH511 Env gp41 MPER IgG CH0210 \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/antibody_sequence.csv b/test/sampledata/dataspace/cdsimport/antibody_sequence.csv deleted file mode 100644 index 64d50eba0..000000000 --- a/test/sampledata/dataspace/cdsimport/antibody_sequence.csv +++ /dev/null @@ -1,4 +0,0 @@ -mab_id,sequence_id,lineage -cds_mab_1,cds_seq_1,true -cds_mab_2,cds_seq_10,false -cds_mab_3,cds_seq_100,true \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/antibody_structure.csv b/test/sampledata/dataspace/cdsimport/antibody_structure.csv new file mode 100644 index 000000000..8fba62a62 --- /dev/null +++ b/test/sampledata/dataspace/cdsimport/antibody_structure.csv @@ -0,0 +1,4 @@ +mab_id,structure_id +cds_mab_1,cds_struct_1 +cds_mab_2,cds_struct_2 +cds_mab_3,cds_struct_3 \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/donor_mab_sequence.csv b/test/sampledata/dataspace/cdsimport/donor_mab_sequence.csv new file mode 100644 index 000000000..e2fd82670 --- /dev/null +++ b/test/sampledata/dataspace/cdsimport/donor_mab_sequence.csv @@ -0,0 +1,4 @@ +mab_id,sequence_id,donor_id +cds_mab_1,cds_seq_1,Donor 36 +cds_mab_2,cds_seq_10,Donor 84 +cds_mab_3,cds_seq_100,Patient 3 \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/donor_metadata.txt b/test/sampledata/dataspace/cdsimport/donor_metadata.txt new file mode 100644 index 000000000..3bc7cbc81 --- /dev/null +++ b/test/sampledata/dataspace/cdsimport/donor_metadata.txt @@ -0,0 +1,26 @@ +donor_id donor_species donor_clade +Donor 36 human CRF02_AG +Donor 31 human +Donor b human +3065 human +N70 human +Donor 24 human A1 +Donor 17 human A +Donor 39 human C +Donor 84 human C +Donor L1 llama +NIH45 human B +L9 human +donor_uncoded_1 human B +Donor M1 mouse +Patient 1 human B +Patient 3 human B +Donor CH505 human C +Donor N152 human B +44 human B +CAP256 human C +Donore N152 human B +347759 human +CH0457 human C +CH0219 human A +CH0210 human C \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/pab_sequence.csv b/test/sampledata/dataspace/cdsimport/pab_sequence.csv new file mode 100644 index 000000000..08affc7b4 --- /dev/null +++ b/test/sampledata/dataspace/cdsimport/pab_sequence.csv @@ -0,0 +1,7 @@ +sequence_id,pab_id +cds_seq_1,pab_id-1 +cds_seq_1,pab_id-2 +cds_seq_10,pab_id-1 +cds_seq_10,pab_id-2 +cds_seq_100,pab_id-1 +cds_seq_100,pab_id-2 \ No newline at end of file diff --git a/test/sampledata/dataspace/cdsimport/pab_sequence_study.csv b/test/sampledata/dataspace/cdsimport/pab_sequence_study.csv new file mode 100644 index 000000000..277f59ee4 --- /dev/null +++ b/test/sampledata/dataspace/cdsimport/pab_sequence_study.csv @@ -0,0 +1,4 @@ +prot,subject_id,study_day,specimen_type,pab_id,lab_code,seq_method,assay_identifier +q2,q2-044,1,human,pab_id_1,LC1,NA,AI-1 +r1,r1-032,2,human,pab_id_2,LC2,NA,AI-1 +z101,z101-041,3,human,pab_id_3,LC3,NA,AI-1 \ No newline at end of file