forked from ekg/guix-genomics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
smoothxg.scm
68 lines (64 loc) · 2.72 KB
/
smoothxg.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
(define-module (smoothxg)
#:use-module (guix utils)
#:use-module (guix packages)
#:use-module (guix git-download)
#:use-module (guix build-system cmake)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (gnu packages compression)
#:use-module (gnu packages gcc)
#:use-module (gnu packages jemalloc)
#:use-module (gnu packages python)
#:use-module (gnu packages python-xyz)
#:use-module (gnu packages version-control))
(define-public smoothxg
(let ((version "0.4.0")
(commit "410e72dc6c7b1d01f5ebb6eb39779d3ba20b4ecc")
(package-revision "40"))
(package
(name "smoothxg")
(version (string-append version "+" (string-take commit 7) "-" package-revision))
(source (origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/ekg/smoothxg.git")
(commit commit)
(recursive? #t)))
(file-name (git-file-name name version))
(sha256
(base32
"1xql93amlwz0acn3dbim834a1vpq7ddfy7893fv9k3cwi15z48l3"))))
(build-system cmake-build-system)
(arguments
`(#:tests? #f
#:make-flags (list (string-append "CC=" ,(cc-for-target)))))
(native-inputs
`(("pybind11" ,pybind11)
("python" ,python)))
(inputs
`(("gcc" ,gcc-10)
("jemalloc" ,jemalloc)
("zlib" ,zlib)
("zstd" ,zstd "lib")))
(synopsis "linearize and simplify variation graphs using blocked partial order alignment")
(description
"Pangenome graphs built from raw sets of alignments may have complex
local structures generated by common patterns of genome
variation. These local nonlinearities can introduce difficulty in
downstream analyses, visualization, and interpretation of variation
graphs.
smoothxg finds blocks of paths that are collinear within a variation
graph. It applies partial order alignment to each block, yielding an
acyclic variation graph. Then, to yield a smoothed graph, it walks
the original paths to lace these subgraphs together. The resulting
graph only contains cyclic or inverting structures larger than the
chosen block size, and is otherwise manifold linear. In addition to
providing a linear structure to the graph, smoothxg can be used to
extract the consensus pangenome graph by applying the heaviest bundle
algorithm to each chain.
To find blocks, smoothxg applies a greedy algorithm that assumes that
the graph nodes are sorted according to their occurence in the graph's
embedded paths. The path-guided stochastic gradient descent based 1D
sort implemented in odgi sort -Y is designed to provide this kind of
sort.")
(home-page "https://github.com/ekg/smoothxg")
(license license:expat))))