diff --git a/cicd/Dockerfile b/cicd/Dockerfile index a897ad9..6e0d0ff 100644 --- a/cicd/Dockerfile +++ b/cicd/Dockerfile @@ -6,10 +6,10 @@ VOLUME ["named_volume"] RUN git clone https://github.com/iquasere/MOSCA.git -b development \ && bash MOSCA/cicd/install.bash --mosca_env "$(conda info --base)" \ && conda clean --all \ -# all instructions ahead are only for MP spectra conversion with docker +# all commands ahead are only for MP spectra conversion with docker && apt-get update \ && apt-get install -y curl \ && curl -fsSL https://get.docker.com -o get-docker.sh \ && sh get-docker.sh -CMD [ "python", "bin/mosca.py" ] \ No newline at end of file +CMD [ "mosca" ] \ No newline at end of file diff --git a/resources/default_config.json b/resources/default_config.json index a1da8b8..f000b5d 100644 --- a/resources/default_config.json +++ b/resources/default_config.json @@ -76,7 +76,255 @@ "keggcharter_taxa_level": "SPECIES", "keggcharter_number_of_taxa": 10, "keggcharter_maps": [ - "00680" + "00010", + "00020", + "00030", + "00040", + "00051", + "00052", + "00053", + "00190", + "00195", + "00196", + "00500", + "00520", + "00562", + "00620", + "00630", + "00640", + "00650", + "00660", + "00680", + "00710", + "00720", + "00910", + "00920", + "00061", + "00062", + "00071", + "00073", + "00100", + "00120", + "00121", + "00140", + "00561", + "00564", + "00565", + "00600", + "00590", + "00591", + "00592", + "01040", + "00230", + "00240", + "00250", + "00260", + "00270", + "00280", + "00290", + "00310", + "00220", + "00330", + "00340", + "00350", + "00360", + "00380", + "00400", + "00410", + "00430", + "00440", + "00450", + "00460", + "00470", + "00480", + "00510", + "00513", + "00512", + "00515", + "00514", + "00532", + "00534", + "00533", + "00531", + "00563", + "00601", + "00603", + "00604", + "00540", + "00550", + "00511", + "00571", + "00572", + "00730", + "00740", + "00750", + "00760", + "00770", + "00780", + "00785", + "00790", + "00670", + "00830", + "00860", + "00130", + "00900", + "00902", + "00909", + "00904", + "00906", + "00905", + "00981", + "00908", + "00903", + "00907", + "01052", + "00522", + "01051", + "01059", + "01056", + "01057", + "00253", + "00523", + "01054", + "01053", + "01055", + "00940", + "00945", + "00941", + "00944", + "00942", + "00943", + "00901", + "00403", + "00950", + "00960", + "00996", + "00232", + "00965", + "00966", + "00402", + "00311", + "00332", + "00261", + "00331", + "00521", + "00524", + "00525", + "00401", + "00404", + "00405", + "00333", + "00254", + "00998", + "00999", + "00362", + "00627", + "00364", + "00625", + "00361", + "00623", + "00622", + "00633", + "00642", + "00643", + "00791", + "00930", + "00363", + "00621", + "00626", + "00624", + "00365", + "00984", + "00980", + "03020", + "03022", + "03040", + "03010", + "00970", + "03013", + "03015", + "03008", + "03060", + "04141", + "04130", + "04120", + "04122", + "03050", + "03018", + "03030", + "03410", + "03420", + "03430", + "03440", + "03450", + "03460", + "02010", + "02060", + "03070", + "02020", + "04014", + "04015", + "04010", + "04011", + "04012", + "04310", + "04330", + "04340", + "04350", + "04390", + "04392", + "04370", + "04371", + "04630", + "04064", + "04668", + "04066", + "04068", + "04020", + "04070", + "04072", + "04071", + "04024", + "04022", + "04151", + "04152", + "04150", + "04075", + "04080", + "04060", + "04061", + "04512", + "04514", + "04144", + "04145", + "04142", + "04146", + "04138", + "04136", + "04139", + "04110", + "04111", + "04112", + "04113", + "04114", + "04210", + "04215", + "04216", + "04217", + "04115", + "04218", + "04510", + "04520", + "04530", + "04540", + "04550", + "02024", + "05111", + "02025", + "02026", + "02030", + "02040", + "04810" ], "inside_container": true } diff --git a/workflow/mosca.py b/workflow/mosca.py index 4381423..9de3fc5 100644 --- a/workflow/mosca.py +++ b/workflow/mosca.py @@ -9,7 +9,7 @@ import pandas as pd import re -__version__ = '2.3.0' +__version__ = '2.3.1' parser = argparse.ArgumentParser(description="MOSCA's main script") parser.add_argument("-s", "--snakefile", default=f'{sys.path[0]}/Snakefile', help="Path to Snakefile") @@ -53,14 +53,17 @@ def validate_exps(exps_data): reserved_words = [ 'if', 'else', 'repeat', 'while', 'function', 'for', 'in', 'next', 'break', 'TRUE', 'FALSE', 'NULL', 'Inf', 'NaN', 'NA', 'NA_integer_', 'NA_real_', 'NA_complex_', 'NA_character_'] - good_pattern = re.compile(r'^(?!^\d)(?!^\.\d)([\w.]+)$') # don't start with a number, nor a decimal (dot followed by number) + good_pattern = re.compile(r'^(?!^\d)(?!^\.\d)([\w.]+)$') # don't start with a number, nor a decimal (dot followed by number), and have no special characters (-, +, *, /, etc.) + if exps['Name'].duplicated().any(): + sys.exit(f'ERROR: Multiple rows with same "Name" value: {",".join(exps["Name"].duplicated().any())}.') for name in exps['Name']: if not name: # if name is None, or empty string, MOSCA should be able to build one that is fine continue if name in reserved_words: sys.exit(f'INVALID "NAME" in "experiments": {name} is a reserved R word.') if not bool(good_pattern.match(name)): - sys.exit(f'INVALID "NAME" in "experiments": {name} starts with a number or has a special character.') + sys.exit(f'INVALID "NAME" in "experiments": {name} starts with a number or has a special character.\n' + f'Please use only letters, numbers, dots (.) and underscores (_).') def validate_config(config_data): diff --git a/workflow/scripts/binning.py b/workflow/scripts/binning.py index f394d97..61b59e1 100644 --- a/workflow/scripts/binning.py +++ b/workflow/scripts/binning.py @@ -12,6 +12,7 @@ import pandas as pd import shutil import pathlib +import snakemake class Binner: @@ -74,7 +75,7 @@ def better_bin(self, table1, table2): return True if lq_bins1 < lq_bins2: return False - return True + return False def iterative_binning(self, contigs, output, threads=8, reads=None, reads2=None, markerset='40'): best_bin = 10 diff --git a/workflow/scripts/normalization.R b/workflow/scripts/normalization.R index 89292fb..e882987 100644 --- a/workflow/scripts/normalization.R +++ b/workflow/scripts/normalization.R @@ -28,13 +28,14 @@ normalize_gene_expression <- function(df, output_file, norm_method, imput_method imputed <- llsImpute(t(norm), correlation = "pearson", allVariables = allVariables) df <- t(completeObs(imputed)) } else if (imput_method == "MIN") { + print("Imputing missing values with the minimum value.") df[is.na(df)] <- min(norm, na.rm=TRUE) } } else { stop("Error: normalization method must be either TMM, RLE, or VSN") } - print('Writing normalized results.') - write.table(df, file = output_file, sep = "\t", row.names = TRUE, col.names = TRUE) + print('Writing normalized results.') # but first convert back to counts (normalized) + write.table(2^df, file = output_file, sep = "\t", row.names = TRUE, col.names = TRUE, quote=FALSE) } print("Reading data to normalize.")