Skip to content

Commit

Permalink
fix traversal
Browse files Browse the repository at this point in the history
  • Loading branch information
masakistan committed Dec 18, 2018
1 parent 3b7f1bf commit 94cf059
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 46 deletions.
17 changes: 16 additions & 1 deletion inc/Kcontainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ inline void free_kcontainer( Kcontainer* kd )
free(kd);
}

inline char* kcontainer_get_child_suffix(Vertex* v, int idx) {
uint256_t verts = v->pref_pres;
uint8_t j = 0, i = 0;
for(i = 0; i < 256; i++) {
if(verts & 0x1) {
//i++;
j++;
}
if(j > idx)
break;
verts >>= 1;
}
return deserialize_kmer(4, 1, &i);
}

inline bool kcontainer_contains( Kcontainer* kd, const char* kmer )
{
uint8_t* bseq = ( uint8_t* ) calloc( kd->k, sizeof( uint8_t ) );
Expand All @@ -66,7 +81,7 @@ inline void kcontainer_add( Kcontainer* kd, const char* kmer, int count )
{
uint8_t* bseq = ( uint8_t* ) calloc( kd->k, sizeof( uint8_t ) );
serialize_kmer( kmer, kd->k, bseq );
//char* dseq = deserialize_kmer(kd->k, calc_bk(kd->k), bseq);
//std::cout << deserialize_kmer(kd->k, calc_bk(kd->k), bseq) << std::endl;
#if KDICT
vertex_insert( &( kd->v ), bseq, kd->k, 0, obj );
#elif KSET
Expand Down
14 changes: 5 additions & 9 deletions inc/Kcounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class Kcounter
UC* uc = &v->uc;
int bk = calc_bk( k );
int suffix_idx = bk * idx;
//std::cout << bk << "\t" << suffix_idx << "\t" << idx << std::endl;
//for(int i = 0; i < bk; i++) {
// std::cout << (unsigned) uc->suffixes[0] << std::endl;
//}
return deserialize_kmer( k, bk, &uc->suffixes[ suffix_idx ] );
}

Expand All @@ -55,14 +59,6 @@ class Kcounter
}
char* get_child_suffix( Vertex* v, int idx )
{
uint256_t verts = v->pref_pres;
uint8_t i = 0;
while(i < idx) {
if(verts & 0x1) {
idx++;
}
verts >>= 1;
}
return deserialize_kmer(4, 1, &i);
return kcontainer_get_child_suffix(v, idx);
}
};
10 changes: 1 addition & 9 deletions inc/Kdict.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,7 @@ class Kdict
}
char* get_child_suffix( Vertex* v, int idx )
{
uint256_t verts = v->pref_pres;
uint8_t i = 0;
while(i < idx) {
if(verts & 0x1) {
idx++;
}
verts >>= 1;
}
return deserialize_kmer(4, 1, &i);
return kcontainer_get_child_suffix(v, idx);
}
Vertex* get_root() { return &kc->v; }
};
10 changes: 1 addition & 9 deletions inc/Kset.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,7 @@ class Kset
}
char* get_child_suffix( Vertex* v, int idx )
{
uint256_t verts = v->pref_pres;
uint8_t i = 0;
while(i < idx) {
if(verts & 0x1) {
idx++;
}
verts >>= 1;
}
return deserialize_kmer(4, 1, &i);
return kcontainer_get_child_suffix(v, idx);
}
void add_seq(const char* seq, uint32_t length);

Expand Down
5 changes: 3 additions & 2 deletions inc/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,13 @@ static char* deserialize_kmer( int k, int bk, uint8_t* bseq )
else
{
bases_in_byte = bases_to_process;
tbkmer >>= 8 - (bases_in_byte * 2);
//tbkmer >>= 8 - (bases_in_byte * 2);
}

tbkmer = bseq[ i ];
for( j = 0; j < bases_in_byte; j++ )
{
pos = ( i * 4 ) + (bases_in_byte - j - 1);
pos = ( i * 4 ) + j;
kmer[ pos ] = COMP_TO_ASCII[ tbkmer & 0x3 ];
tbkmer >>= 2;
}
Expand Down
27 changes: 12 additions & 15 deletions kcollections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ def _get_kmers( self, v, k, prefix = '' ):
for i in range( self.get_uc_size( v ) ):
yield prefix + self.get_uc_kmer( v, k, i )

for i in range( self.get_cc_size( v ) ):
for j in range( self.get_cc_child_size( v, i ) ):
child_prefix = prefix + self.get_cc_child_suffix( v, i, j )
for kmer in self._get_kmers( self.get_cc_child_vertex( v, i, j ), k - 4, child_prefix ):
yield kmer
for i in range( self.get_vs_size( v ) ):
child_prefix = prefix + self.get_child_suffix( v, i )
for kmer in self._get_kmers( self.get_child_vertex( v, i ), k - 4, child_prefix ):
yield kmer

def __str__( self ):
res = []
Expand Down Expand Up @@ -105,11 +104,10 @@ def _get_kmers( self, v, k, prefix = '' ):
for i in range( self.get_uc_size( v ) ):
yield prefix + self.get_uc_kmer( v, k, i )

for i in range( self.get_cc_size( v ) ):
for j in range( self.get_cc_child_size( v, i ) ):
child_prefix = prefix + self.get_cc_child_suffix( v, i, j )
for kmer in self._get_kmers( self.get_cc_child_vertex( v, i, j ), k - 4, child_prefix ):
yield kmer
for i in range( self.get_vs_size( v ) ):
child_prefix = prefix + self.get_child_suffix( v, i )
for kmer in self._get_kmers( self.get_child_vertex( v, i ), k - 4, child_prefix ):
yield kmer

def __str__( self ):
return '{' + ','.join( self ) + '}'
Expand Down Expand Up @@ -239,11 +237,10 @@ def _get_kmers( self, v, k, prefix = '' ):
for i in range( self.get_uc_size( v ) ):
yield prefix + self.get_uc_kmer( v, k, i )

for i in range( self.get_cc_size( v ) ):
for j in range( self.get_cc_child_size( v, i ) ):
child_prefix = prefix + self.get_cc_child_suffix( v, i, j )
for kmer in self._get_kmers( self.get_cc_child_vertex( v, i, j ), k - 4, child_prefix ):
yield kmer
for i in range( self.get_vs_size( v ) ):
child_prefix = prefix + self.get_child_suffix( v, i )
for kmer in self._get_kmers( self.get_child_vertex( v, i ), k - 4, child_prefix ):
yield kmer

def __str__( self ):
res = []
Expand Down
4 changes: 3 additions & 1 deletion kcollections/src/Vertex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ void burst_uc( Vertex* v, int k, int depth )
uint8_t* suffix = &bseq[ 1 ];
uint8_t bits_to_shift = (unsigned) prefix;
int vidx = calc_vidx(v->pref_pres, prefix);
//std::cout << "burst: " << deserialize_kmer(k, calc_bk(k), bseq) << "\t" << (unsigned) bseq[0] << std::endl;

// check if there is already a vertex that represents this prefix
if(!((v->pref_pres >> (unsigned) bits_to_shift) & 0x1)) {
Expand Down Expand Up @@ -245,8 +246,9 @@ void vertex_insert( Vertex* v, uint8_t* bseq, int k, int depth, int count )
uc_insert( &( v->uc ), bseq, k, depth, uc_idx, count );
#endif

if( v->uc.size > CAPACITY - 1 )
if(v->uc.size == CAPACITY)
{
//std::cout << "bursting" << std::endl;
burst_uc( v, k, depth );
}
}
Expand Down

0 comments on commit 94cf059

Please sign in to comment.