Skip to content

Commit

Permalink
bump to 1.2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
lmdu committed Jul 13, 2020
1 parent 098c1c5 commit c53c3b2
Show file tree
Hide file tree
Showing 11 changed files with 5,628 additions and 5,054 deletions.
2 changes: 1 addition & 1 deletion setup/win.iss
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

#define MyAppName "Krait"
#define MyAppVersion "1.2.1"
#define MyAppVersion "1.2.2"
#define MyAppPublisher "Lianming Du"
#define MyAppURL "https://github.com/lmdu/krait"
#define MyAppExeName "Krait.exe"
Expand Down
4 changes: 2 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import os
import appdirs

VERSION = "1.2.1"
VERSION = "1.2.2"

BUILD = '20200521'
BUILD = '20200713'

ROOT_PATH = os.path.abspath(os.path.dirname(__file__))

Expand Down
85 changes: 60 additions & 25 deletions src/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,26 +56,28 @@ def parse(self):
if line[0] == '#': continue

cols = line.strip().split('\t')

record = Data(
seqid = cols[0],
feature = cols[2].upper(),
start = int(cols[3]),
end = int(cols[4]),
attrs = Data()
)

for item in cols[-1].split(';'):
if not item:
continue

#if _format == 'GFF':
# name, value = item.split('=')
#else:
# name, value = item.strip().strip('"').split('"')
try:
name, value = self.split_val(item)
except ValueError:
continue

name, value = self.split_val(item)

record.attrs[name.strip().upper()] = value

yield record
Expand Down Expand Up @@ -104,10 +106,10 @@ def create_interval_tree(self):

if feature[0] != prev_chrom:
if starts:
starts = numpy.array(starts, dtype=numpy.int32)
ends = numpy.array(ends, dtype=numpy.int32)
indexes = numpy.array(indexes, dtype=numpy.int32)
self.interval_forest[prev_chrom] = ncls.NCLS32(starts, ends, indexes)
starts = numpy.array(starts, dtype=numpy.int64)
ends = numpy.array(ends, dtype=numpy.int64)
indexes = numpy.array(indexes, dtype=numpy.int64)
self.interval_forest[prev_chrom] = ncls.NCLS64(starts, ends, indexes)

prev_chrom = feature[0]
starts = []
Expand All @@ -119,10 +121,10 @@ def create_interval_tree(self):
indexes.append(feat_id)

if starts:
starts = numpy.array(starts, dtype=numpy.int32)
ends = numpy.array(ends, dtype=numpy.int32)
indexes = numpy.array(indexes, dtype=numpy.int32)
self.interval_forest[prev_chrom] = ncls.NCLS32(starts, ends, indexes)
starts = numpy.array(starts, dtype=numpy.int64)
ends = numpy.array(ends, dtype=numpy.int64)
indexes = numpy.array(indexes, dtype=numpy.int64)
self.interval_forest[prev_chrom] = ncls.NCLS64(starts, ends, indexes)

def mapping(self, chrom, start, end):
if chrom not in self.interval_forest:
Expand All @@ -138,7 +140,7 @@ def mapping(self, chrom, start, end):
for candidate in ['CDS', 'exon', 'UTR', 'intron']:
for feat, gid in feats:
if candidate in feat:
return (self.featid_mapping[feat], self.gene_mapping[gid])
return (self.featid_mapping[feat], self.gene_mapping[gid])

return None

Expand Down Expand Up @@ -192,12 +194,29 @@ def get_gene_mapping(self):
self.gene_info.append((gene_num, row.seqid, row.start, row.end, gene_id, gene_name, biotype))

def get_features(self):
chrom = None
father = None
exons = []

parents = {}

for r in self.parse():
if r.seqid != chrom:
if exons:
exons = sorted(exons, key=lambda x: x[2])

for idx, exon in enumerate(exons):
yield exon

if idx < len(exons)-1:
start = exon[2] + 1
end = exons[idx+1][1] - 1
yield (exons[0][0], start, end, 'intron', exons[0][4])

chrom = r.seqid
exons = []
father = None

if r.feature == 'REGION':
continue

Expand Down Expand Up @@ -258,15 +277,15 @@ def get_features(self):
except:
parents[r.attrs.ID] = r.attrs.ID

exons = sorted(exons, key=lambda x: x[2])

for idx, exon in enumerate(exons):
yield exon
if exons:
exons = sorted(exons, key=lambda x: x[2])
for idx, exon in enumerate(exons):
yield exon

if idx < len(exons)-1:
start = exon[2] + 1
end = exons[idx+1][1] - 1
yield (exons[0][0], start, end, 'intron', exons[0][4])
if idx < len(exons)-1:
start = exon[2] + 1
end = exons[idx+1][1] - 1
yield (exons[0][0], start, end, 'intron', exons[0][4])

class GTFParser(AnnotParser):
def split_val(self, item):
Expand All @@ -288,12 +307,28 @@ def get_gene_mapping(self):
self.gene_info.append((gene_num, row.seqid, row.start, row.end, gene_id, gene_name, biotype))

def get_features(self):
chrom = None
father = None
exons = []
for row in self.parse():
parent = row.attrs.GENE_ID
for r in self.parse():
if r.seqid != chrom:
if exons:
exons = sorted(exons, key=lambda x: x[1])

for idx, exon in enumerate(exons):
yield exon

if idx < len(exons)-1:
start = exon[2] + 1
end = exons[idx+1][1] - 1
yield (exons[0][0], start, end, 'intron', exons[0][4])
exons = []
chrom = None
father = None

parent = r.attrs.GENE_ID

if row.feature == 'CDS':
if r.feature == 'CDS':
yield (r.seqid, r.start, r.end, 'CDS', parent)

elif r.feature == 'FIVE_PRIME_UTR':
Expand Down
2 changes: 1 addition & 1 deletion src/libs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
#from . import intersection
#from . import primerdesign
#from . import fasta
from . import ncls32 as ncls
from . import ncls
from . import issr
4 changes: 2 additions & 2 deletions src/libs/src/ncls/src/intervaldb32.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ extern int find_intervals(IntervalIterator *it0, int32_t start, int32_t end,Inte
}

#define HAS_OVERLAP_POSITIVE(IM,START,END) (((IM).start>=0) ? \
((IM).start<=(START) && (END)<=(IM).end) \
((IM).start<(END) && (START)<(IM).end) \
: (-((IM).end)<(END) && (START) < -((IM).start)))
/* ????? MERGE_INTERVAL_ORIENTATIONS ??????? */

#else
/* STANDARD MACROS */
#define START_POSITIVE(IM) ((IM).start)
#define END_POSITIVE(IM) ((IM).end)
#define HAS_OVERLAP_POSITIVE(IM,START,END) ((IM).start<=(START) && (END)<=(IM).end)
#define HAS_OVERLAP_POSITIVE(IM,START,END) ((IM).start<(END) && (START)<(IM).end)

#endif

Expand Down
Loading

0 comments on commit c53c3b2

Please sign in to comment.