Skip to content

Commit

Permalink
fix: Replace outdated df append (#544)
Browse files Browse the repository at this point in the history
* replaced all df.append with pd.concat

* replaced df.append with pd.concat

* replace df.append with pd.concat

* replaced df.append with pd.concat

* replaced df.append with pd.concat

* replaced df.append with pd.concat

Co-authored-by: lenakinzel <[email protected]>
Co-authored-by: Alexander Thomas <[email protected]>
  • Loading branch information
3 people authored Jun 28, 2022
1 parent db6eb17 commit 4a40003
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 41 deletions.
2 changes: 1 addition & 1 deletion workflow/scripts/collect_lineage_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def collect_calls(sm_input, sm_output, states, lineage, number, length):
]

# bring them together
call = pangolin_calls.append(call)
call = pd.concat([pangolin_calls, call])

call.to_csv(sm_output, sep="\t", index=False)

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/evaluate-strain-call-error.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def eval_error(paths, sm_output, max_reads, prefix, separator, percentage, load_

df = df.merge(org_mix_df, how="outer").fillna(0)

results_df = results_df.append(df)
results_df = pd.concat([results_df, df])

for sample in results_df["mix"].unique():
sample_rmse = rmse(
Expand Down
4 changes: 3 additions & 1 deletion workflow/scripts/extract-strains-from-gisaid-provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def extract_strains_from_provision(
chunks = pd.read_json(path_to_provision, lines=True, chunksize=9000)
for i, chunk in enumerate(chunks):
print(f"Parsing chunk {i}", file=sys.stderr)
provision = provision.append(select_oldest_strains(chunk), ignore_index=True)
provision = pd.concat(
[provision, select_oldest_strains(chunk)], ignore_index=True
)
provision = select_oldest_strains(provision)

# save strain genomes
Expand Down
41 changes: 28 additions & 13 deletions workflow/scripts/generate-lineage-variant-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,36 @@ def rename_enumeration(list_length):
lineages = record.info["LINEAGES"]
for signature in signatures:
# generate df with all signatures + VAF and Prob_not_present from calculation
variants_df = variants_df.append(
{
"Mutations": signature,
"Frequency": vaf,
"ReadDepth": dp,
"Prob_not_present": prob_not_present,
},
variants_df = pd.concat(
[
variants_df,
pd.DataFrame(
{
"Frequency": vaf,
"Mutations": signature,
"Prob_not_present": prob_not_present,
"ReadDepth": dp,
},
index=[0],
),
],
ignore_index=True,
)
# generate df with lineage matrix for all signatures
lineage_df = lineage_df.append(
{
"Mutations": signature,
**{lineage.replace(".", " "): "x" for lineage in lineages},
},

lineage_df = pd.concat(
[
lineage_df,
pd.DataFrame(
{
"Mutations": [signature],
**{
lineage.replace(".", " "): "x"
for lineage in lineages
},
},
index=[0],
),
],
ignore_index=True,
)

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/generate-overview-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def is_patient_report():
columns=[eukaryota, bacteria, viruses, sars_cov2, unclassified]
).fillna(0)
kraken_results["sample"] = sample
species_columns = species_columns.append(kraken_results, ignore_index=True)
species_columns = pd.concat([species_columns, kraken_results], ignore_index=True)

data = data.join(species_columns.set_index("sample"))

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/plot-all-coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def plot_coverage(sm_input, sm_output, min_coverage):

sample_df["Sample"] = sample_df["#CHROM"].apply(lambda x: str(x).split(".")[0])

coverage = coverage.append(sample_df, ignore_index=True)
coverage = pd.concat([coverage, sample_df], ignore_index=True)

coverage["# Coverage"] = coverage.Coverage.apply(
lambda x: f"< {min_coverage}"
Expand Down
52 changes: 33 additions & 19 deletions workflow/scripts/plot-assembly-comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,43 @@ def register_lengths(sample, file_list, state, amplicon_state, data):
for file, assembler in zip(file_list, snakemake.params.assembler):
if state in ("initial", "scaffolded"):
with pysam.FastxFile(file) as infile:
data = data.append(
{
"Sample": sample,
"Assembler": assembler,
"Amplicon": amplicon_state,
"length (bp)": max(len(contig.sequence) for contig in infile),
"State": state,
},
data = pd.concat(
[
data,
pd.DataFrame(
{
"Sample": sample,
"Assembler": assembler,
"Amplicon": amplicon_state,
"length (bp)": max(
len(contig.sequence) for contig in infile
),
"State": state,
},
index=[0],
),
],
ignore_index=True,
)
else:
quastDf = pd.read_csv(file, sep="\t")
data = data.append(
{
"Sample": sample,
"Assembler": assembler,
"Amplicon": amplicon_state,
"length (bp)": quastDf.loc[0, "N50"],
"State": "N50",
"Genome fraction (%)": quastDf.loc[0, "Genome fraction (%)"]
if "Genome fraction (%)" in quastDf.columns
else float("nan"),
},
data = pd.concat(
[
data,
pd.DataFrame(
{
"Sample": sample,
"Assembler": assembler,
"Amplicon": amplicon_state,
"length (bp)": quastDf.loc[0, "N50"],
"State": "N50",
"Genome fraction (%)": quastDf.loc[0, "Genome fraction (%)"]
if "Genome fraction (%)" in quastDf.columns
else float("nan"),
},
index=[0],
),
],
ignore_index=True,
)
return data
Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/plot-dependency-of-pangolin-call.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def plot_dependency_of_pangolin_call(sm_input, sm_output):
pangolin_output["mixture_content"] = input.split(MIXTURE_PREFIX, 1)[-1].split(
"."
)[0]
all_sampes = all_sampes.append(pangolin_output, ignore_index=True)
all_sampes = pd.concat([all_sampes, pangolin_output], ignore_index=True)

all_sampes["mixture_content"] = all_sampes["mixture_content"].str.replace("-", ".")

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/plot-pangolin-conflict.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def plot_pangolin_conflict(sm_input, sm_output):
pangolin_output = pd.read_csv(input)
pangolin_output["true_lineage"] = true_lineage
pangolin_output["true_lineage_percent"] = percent
all_sampes = all_sampes.append(pangolin_output, ignore_index=True)
all_sampes = pd.concat([all_sampes, pangolin_output], ignore_index=True)

all_sampes["correct_lineage_assignment"] = (
all_sampes["lineage"] == all_sampes["true_lineage"]
Expand Down
4 changes: 2 additions & 2 deletions workflow/scripts/plot-primer-clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ def plot_classes(counters):
counts_before = count_intervals(file)
counts_before["sample"] = sample
counts_before["state"] = "before"
all_df = all_df.append(counts_before, ignore_index=True)
all_df = pd.concat([all_df, counts_before], ignore_index=True)

for sample, file in iter_with_samples(snakemake.input.clipped):
counts_after = count_intervals(file)
counts_after["sample"] = sample
counts_after["state"] = "after"
all_df = all_df.append(counts_after, ignore_index=True)
all_df = pd.concat([all_df, counts_after], ignore_index=True)

bars, text = plot_classes(all_df)

Expand Down

0 comments on commit 4a40003

Please sign in to comment.