Skip to content

Commit

Permalink
Merge pull request #41 from will-rowe/babygroot
Browse files Browse the repository at this point in the history
memory improvements
  • Loading branch information
Will Rowe authored May 7, 2020
2 parents 7a0977c + ae27e07 commit 4526946
Show file tree
Hide file tree
Showing 22 changed files with 238 additions and 1,132 deletions.
8 changes: 4 additions & 4 deletions cmd/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (

"github.com/pkg/profile"
"github.com/spf13/cobra"
"github.com/will-rowe/groot/src/graph"
"github.com/will-rowe/groot/src/lshe"
"github.com/will-rowe/groot/src/misc"
"github.com/will-rowe/groot/src/pipeline"
"github.com/will-rowe/groot/src/version"
Expand Down Expand Up @@ -100,9 +100,9 @@ func runSketch() {
log.Print("loading the graphs...")
log.Printf("\tnumber of variation graphs: %d\n", len(info.Store))
log.Print("rebuilding the LSH Ensemble...")
lshe := &graph.ContainmentIndex{}
misc.ErrorCheck(lshe.Load(*indexDir + "/groot.lshe"))
info.AttachDB(lshe)
index := &lshe.ContainmentIndex{}
misc.ErrorCheck(index.Load(*indexDir + "/groot.lshe"))
info.AttachDB(index)
if *profiling {
log.Printf("\tloaded lshe file -> current memory usage %v", misc.PrintMemUsage())
runtime.GC()
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ require (
github.com/biogo/hts v1.1.0
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076 // indirect
github.com/ekzhu/lshensemble v1.1.0
github.com/golang/protobuf v1.4.0
github.com/orcaman/concurrent-map v0.0.0-20190826125027-8c72a8bb44f6 // indirect
github.com/pkg/profile v1.4.0
github.com/spf13/cobra v1.0.0
Expand Down
16 changes: 0 additions & 16 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,8 @@ github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4er
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
Expand Down Expand Up @@ -164,17 +155,10 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zimw=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
4 changes: 2 additions & 2 deletions src/graph/alignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import (
"sync"

"github.com/biogo/hts/sam"
"github.com/will-rowe/groot/src/lshforest"
"github.com/will-rowe/groot/src/lshe"
"github.com/will-rowe/groot/src/seqio"
)

// AlignRead is a method to run a read to graph hierarchical alignment
func (GrootGraph *GrootGraph) AlignRead(read *seqio.FASTQread, mapping *lshforest.Key, references []*sam.Reference) ([]*sam.Record, error) {
func (GrootGraph *GrootGraph) AlignRead(read *seqio.FASTQread, mapping *lshe.Key, references []*sam.Reference) ([]*sam.Record, error) {

// TODO: move this hardcoded value to CLI options
MaxClip := 1
Expand Down
14 changes: 7 additions & 7 deletions src/graph/alignment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ import (
"testing"

"github.com/biogo/hts/sam"
"github.com/will-rowe/groot/src/lshforest"
"github.com/will-rowe/groot/src/lshe"
"github.com/will-rowe/groot/src/seqio"
)

func setupRead() (*seqio.FASTQread, *lshforest.Key, error) {
func setupRead() (*seqio.FASTQread, *lshe.Key, error) {
testRead, err := seqio.NewFASTQread([]byte("@read-derived-from-path-B7"), []byte("ATGAAAGGATTAAAAGGG"), []byte("+"), []byte("++++++++++++++++++"))
if err != nil {
return nil, nil, err
}
seed := &lshforest.Key{
seed := &lshe.Key{
GraphID: 1,
Node: 2,
OffSet: 0,
Expand All @@ -23,12 +23,12 @@ func setupRead() (*seqio.FASTQread, *lshforest.Key, error) {
return testRead, seed, nil
}

func setupMultimapRead() (*seqio.FASTQread, *lshforest.Key, error) {
func setupMultimapRead() (*seqio.FASTQread, *lshe.Key, error) {
testRead, err := seqio.NewFASTQread([]byte("@read-derived-from-segment-26"), []byte("CCTGATATTAAAATTGAAAAATTAAAAGATAATTTATACGTCTATACAAC"), []byte("+"), []byte("=================================================="))
if err != nil {
return nil, nil, err
}
seed := &lshforest.Key{
seed := &lshe.Key{
GraphID: 1,
Node: 26,
OffSet: 0,
Expand All @@ -37,12 +37,12 @@ func setupMultimapRead() (*seqio.FASTQread, *lshforest.Key, error) {
return testRead, seed, nil
}

func setupUniqmapRead() (*seqio.FASTQread, *lshforest.Key, error) {
func setupUniqmapRead() (*seqio.FASTQread, *lshe.Key, error) {
testRead, err := seqio.NewFASTQread([]byte("@read-derived-from-path-B10"), []byte("ATGAAAGGATTAAAAGGGCTATTGGTTCTGGCTTTAGGCTTTACAGGACTACAGGTTTTTGGGCAACAGAACCCTGATATTAAAATTGAAAAATTAAAAGATAATTTATACGTCTATACAACCTATAATACCTTCAAAGGAACTAAATATGCGGCTAATGCGGTATATATGGTAACCGATAAAGGAGTAGTGGTTATAGACTCTCCATGGGGAGAAGATAAATTTAAAAGTTTTACAGACGAGATTTATAAAAAGCACGGAAAGAAAGTTATCATGAACATTGCAACCCACTCTCATGATGATAGAGCCGGAGGTCTTGAATATTTTGGTAAACTAGGTGCAAAAACTTATTCTACTAAAATGACAGATTCTATTTTAGCAAAAGAGAATAAGCCAAGAGCAAAGTACACTTTTGATAATAATAAATCTTTTAAAGTAGGAAAGACTGAGTTTCAGGTTTATTATCCGGGAAAAGGTCATACAGCAGATAATGTGGTTGTGTGGTTTCCTAAAGACAAAGTATTAGTAGGAGGCTGCATTGTAAAAAGTGGTGATTCGAAAGACCTTGGGTTTATTGGGGAAGCTTATGTAAACGACTGGACACAGTCCATACACAACATTCAGCAGAAATTTCCCTATGTTCAGTATGTCGTTGCAGGTCATGACGACTGGAAAGATCAAACATCAATACAACATACACTGGATTTAATCAGTGAATATCAACAAAAACAAAAGGCTTCAAATTAA"), []byte("+"), []byte("ATGAAAGGATTAAAAGGGCTATTGGTTCTGGCTTTAGGCTTTACAGGACTACAGGTTTTTGGGCAACAGAACCCTGATATTAAAATTGAAAAATTAAAAGATAATTTATACGTCTATACAACCTATAATACCTTCAAAGGAACTAAATATGCGGCTAATGCGGTATATATGGTAACCGATAAAGGAGTAGTGGTTATAGACTCTCCATGGGGAGAAGATAAATTTAAAAGTTTTACAGACGAGATTTATAAAAAGCACGGAAAGAAAGTTATCATGAACATTGCAACCCACTCTCATGATGATAGAGCCGGAGGTCTTGAATATTTTGGTAAACTAGGTGCAAAAACTTATTCTACTAAAATGACAGATTCTATTTTAGCAAAAGAGAATAAGCCAAGAGCAAAGTACACTTTTGATAATAATAAATCTTTTAAAGTAGGAAAGACTGAGTTTCAGGTTTATTATCCGGGAAAAGGTCATACAGCAGATAATGTGGTTGTGTGGTTTCCTAAAGACAAAGTATTAGTAGGAGGCTGCATTGTAAAAAGTGGTGATTCGAAAGACCTTGGGTTTATTGGGGAAGCTTATGTAAACGACTGGACACAGTCCATACACAACATTCAGCAGAAATTTCCCTATGTTCAGTATGTCGTTGCAGGTCATGACGACTGGAAAGATCAAACATCAATACAACATACACTGGATTTAATCAGTGAATATCAACAAAAACAAAAGGCTTCAAATTAA"))
if err != nil {
return nil, nil, err
}
seed := &lshforest.Key{
seed := &lshe.Key{
GraphID: 1,
Node: 2,
OffSet: 0,
Expand Down
16 changes: 8 additions & 8 deletions src/graph/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"sync"

"github.com/will-rowe/gfa"
"github.com/will-rowe/groot/src/lshforest"
"github.com/will-rowe/groot/src/lshe"
"github.com/will-rowe/groot/src/misc"
"github.com/will-rowe/groot/src/seqio"
)
Expand Down Expand Up @@ -226,7 +226,7 @@ func (GrootGraph *GrootGraph) GetSketchStats() (int, int, int, error) {
}

// WindowGraph is a method to slide a window over each path through the graph, sketching the paths and getting window information
func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int) (map[string][]lshforest.Key, error) {
func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int) (map[string]lshe.Keys, error) {

// get the linear sequences for this graph
pathSeqs, err := GrootGraph.Graph2Seqs()
Expand All @@ -241,7 +241,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)
}

// window each path
pathWindows := make(chan lshforest.Key, 100)
pathWindows := make(chan lshe.Key, 100)
var pathWG sync.WaitGroup
pathWG.Add(len(GrootGraph.Paths))
for pathID := range GrootGraph.Paths {
Expand Down Expand Up @@ -281,7 +281,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)
}

// hold a window until a new sketch is encountered
var windowHolder lshforest.Key
var windowHolder lshe.Key
sketchSent := false

// start windowing the path sequence
Expand Down Expand Up @@ -310,7 +310,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)

// if the first window, or we have just sent a window on, init a windowHolder
if !merge {
windowHolder = lshforest.Key{
windowHolder = lshe.Key{
GraphID: GrootGraph.GraphID,
Node: segs[i],
OffSet: offSets[i],
Expand Down Expand Up @@ -347,7 +347,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)
}()

// collect sketched windows from all paths and merge identical windows from different paths if same start node+offset
windowLookup := make(map[string][]lshforest.Key)
windowLookup := make(map[string]lshe.Keys)
for window := range pathWindows {

// convert the graph window data to a key that links the sketch to the graphID, start node and offset
Expand All @@ -361,7 +361,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)
for _, existingWindow := range existingWindowLocation {

// if the sketches match, merge the window into the existing one
if misc.Uint64SliceEqual(existingWindow.GetSketch(), window.Sketch) {
if misc.Uint64SliceEqual(existingWindow.Sketch, window.Sketch) {
for node, freq := range window.ContainedNodes {
existingWindow.ContainedNodes[node] += freq
}
Expand All @@ -382,7 +382,7 @@ func (GrootGraph *GrootGraph) WindowGraph(windowSize, kmerSize, sketchSize int)
GrootGraph.numDistinctSketches++
}
} else {
windowLookup[key] = []lshforest.Key{window}
windowLookup[key] = lshe.Keys{window}
GrootGraph.numDistinctSketches++
}
}
Expand Down
191 changes: 0 additions & 191 deletions src/graph/index.go

This file was deleted.

Loading

0 comments on commit 4526946

Please sign in to comment.