Skip to content

Commit

Permalink
release 0.4.27
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Sep 13, 2024
1 parent 6c29654 commit c572f42
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 55 deletions.
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
0.4.27

* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)
* fix possible duplication of lines [#80](https://github.com/miRTop/mirtop/issues/80)
* accept prefix for gff output [#84](https://github.com/miRTop/mirtop/issues/84)

0.4.26

* Support spaces and special characters in bam files
Expand Down
2 changes: 1 addition & 1 deletion mirtop/gff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def reader(args):
if args.low_memory:
return None
merged = merge.merge(out_dts, samples)
fn_merged_out = op.join(args.out, "mirtop.%s" % args.out_format)
fn_merged_out = op.join(args.out, "%s.%s" % (args.prefix, args.out_format))
_write(merged, header.create(samples, database, header.make_tools([args.format])), fn_merged_out, args)


Expand Down
101 changes: 51 additions & 50 deletions mirtop/gff/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import print_function

import os.path as op
import pandas as pd

from mirtop.mirna import fasta, mapper
from mirtop.mirna.realign import read_id
Expand All @@ -25,69 +26,69 @@ def convert_gff_counts(args):
UID miRNA Variant Sample1 Sample2 ... Sample N
"""
sep = "\t"
variant_header = sep.join(['iso_5p', 'iso_3p',
'iso_add3p', 'iso_snp'])
variant_header = ['iso_5p', 'iso_3p',
'iso_add3p', 'iso_snp']
if args.add_extra:
precursors = fasta.read_precursor(args.hairpin, args.sps)
matures = mapper.read_gtf_to_precursor(args.gtf)
variant_header = sep.join([variant_header,
'iso_5p_nt', 'iso_3p_nt',
'iso_add3p_nt', 'iso_snp_nt'])
variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']

logger.info("INFO Reading GFF file %s", args.gff)
logger.info("INFO Writing TSV file to directory %s", args.out)

gff_file = open(args.gff, 'r')
out_file = op.join(args.out, "%s.tsv" % op.splitext(op.basename(args.gff))[0])
all_lines = []
missing_parent = 0
missing_mirna = 0
unvalid_uid = 0
with open(out_file, 'w') as outh:

for samples_line in gff_file:
if samples_line.startswith("## COLDATA:"):
samples = sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(","))
header = sep.join(['UID', 'Read', 'miRNA', 'Variant',
variant_header, samples])
print(header, file=outh)
break

for mirna_line in gff_file:
gff = feature(mirna_line)
attr = gff.attributes
UID = attr["UID"]
Read = attr["Read"]
mirna = attr["Name"]
parent = attr["Parent"]
variant = attr["Variant"]
try:
read_id(UID)
except KeyError:
unvalid_uid += 1
#with open(out_file, 'w') as outh:

for samples_line in gff_file:
if samples_line.startswith("## COLDATA:"):
samples = [sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(","))]
#header = sep.join(['UID', 'Read', 'miRNA', 'Variant',
# variant_header, samples])
#print(header, file=outh)
break

for mirna_line in gff_file:
gff = feature(mirna_line)
attr = gff.attributes
UID = attr["UID"]
Read = attr["Read"]
mirna = attr["Name"]
parent = attr["Parent"]
variant = attr["Variant"]
try:
read_id(UID)
except KeyError:
unvalid_uid += 1
continue

expression = [sep.join(attr["Expression"].strip().split(","))]
cols_variants = _expand(variant)
logger.debug("COUNTS::Read:%s" % Read)
logger.debug("COUNTS::EXTRA:%s" % variant)
if args.add_extra:
if parent not in precursors:
missing_parent += 1
continue

expression = sep.join(attr["Expression"].strip().split(","))
cols_variants = sep.join(_expand(variant))
logger.debug("COUNTS::Read:%s" % Read)
logger.debug("COUNTS::EXTRA:%s" % variant)
if args.add_extra:
if parent not in precursors:
missing_parent += 1
continue
if mirna not in matures[parent]:
missing_mirna += 1
continue
extra = variant_with_nt(mirna_line, precursors, matures)
if extra == "Invalid":
continue
logger.debug("COUNTS::EXTRA:%s" % extra)
cols_variants = sep.join([cols_variants] + _expand(extra, True))
summary = sep.join([UID, Read, mirna, variant,
cols_variants, expression])
logger.debug(summary)
print(summary, file=outh)

gff_file.close()
if mirna not in matures[parent]:
missing_mirna += 1
continue
extra = variant_with_nt(mirna_line, precursors, matures)
if extra == "Invalid":
continue
logger.debug("COUNTS::EXTRA:%s" % extra)
cols_variants = [cols_variants] + _expand(extra, True)
#import pdb; pdb.set_trace()
summary = [UID, Read, mirna, variant] + cols_variants + expression
logger.debug(summary)
all_lines.append(summary)
df = pd.DataFrame(all_lines, columns = ['UID', 'Read', 'miRNA', 'Variant'] + variant_header + samples)
df = df.drop_duplicates()
df.to_csv(out_file, sep="\t", index=False)
logger.info("Missing Parents in hairpin file: %s" % missing_parent)
logger.info("Missing MiRNAs in GFF file: %s" % missing_mirna)
logger.info("Non valid UID: %s" % unvalid_uid)
Expand Down
2 changes: 2 additions & 0 deletions mirtop/libs/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def _add_subparser_gff(subparsers):
parser.add_argument("files", nargs="*", help="Bam files.")
parser.add_argument("-o", "--out", dest="out", required=1,
help="dir of output files")
parser.add_argument("--prefix", dest="prefix", required=0,
default="mirtop", help="prefix for output file")
parser.add_argument("--sps",
help="species")
parser.add_argument("--keep-name", action="store_true",
Expand Down
4 changes: 2 additions & 2 deletions mirtop/mirna/realign.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from Bio import pairwise2
from Bio.Align import PairwiseAligner
from Bio.Seq import Seq
from collections import defaultdict

Expand Down Expand Up @@ -94,7 +94,7 @@ def formatGFF(self):
value.append("iso_3p:%s%s" % (direction, size))
if not value:
value = ["NA"]
return ",".join(list(set(value)))
return ",".join(sorted(list(set(value))))

def format(self, sep="\t"):
"""Create tabular line from variant fields."""
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import os
from setuptools import setup, find_packages

version = '0.4.26'

version = '0.4.27'
url = 'http://github.com/mirtop/mirtop'


Expand Down

0 comments on commit c572f42

Please sign in to comment.