From a6247b102252c19717e1a2b7c491090685a4a34a Mon Sep 17 00:00:00 2001 From: Karl Dyrhage Date: Thu, 14 Nov 2024 12:29:06 +0100 Subject: [PATCH] Fixes #17 --- src/GFF/writer.jl | 8 +++---- src/GenomicAnnotations.jl | 1 + src/record.jl | 46 +++++++++++++++++++++++++++++++-------- test/example.gbk | 2 ++ test/runtests.jl | 19 ++++++++++++++-- 5 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/GFF/writer.jl b/src/GFF/writer.jl index bbd1d2d..c3ec3b0 100644 --- a/src/GFF/writer.jl +++ b/src/GFF/writer.jl @@ -58,14 +58,14 @@ function gffstring(gene::Gene) if ismultilocus(gene) s = String(take!(buf)) res = IOBuffer() - for pos in (locus(gene) isa Complement ? locus(gene).loc.loc : locus(gene).loc) + for loc in locus(gene) println(res, join([parent(gene).name, get(gene, :source, "."), feature(gene), - pos.start, - pos.stop, + loc.start, + loc.stop, get(gene, :score, "."), - locus(gene).strand, + loc.strand, get(gene, :phase, "."), s], '\t')) end diff --git a/src/GenomicAnnotations.jl b/src/GenomicAnnotations.jl index 6a988f1..89b7e49 100644 --- a/src/GenomicAnnotations.jl +++ b/src/GenomicAnnotations.jl @@ -15,6 +15,7 @@ export SpanLocus, ClosedSpan, OpenSpan, OpenRightSpan, OpenLeftSpan export PointLocus, SingleNucleotide, BetweenNucleotides export Join, Order, Complement export Locus +export eachposition export relative_position diff --git a/src/record.jl b/src/record.jl index 335bf5e..af8fc35 100644 --- a/src/record.jl +++ b/src/record.jl @@ -67,6 +67,8 @@ OpenSpan(p) = SpanLocus(p, OpenSpan) SingleNucleotide(p) = PointLocus(p, SingleNucleotide) BetweenNucleotides(p) = PointLocus(p, BetweenNucleotides) +Complement(loc::Complement) = loc.loc + Base.convert(::Type{ClosedSpan}, p::UnitRange{Int}) = ClosedSpan(p) Base.convert(::Type{Complement{L}}, p::UnitRange{Int}) where L <: AbstractLocus = Complement(L(p)) function Base.convert(::Type{ClosedSpan}, p::StepRange{Int, Int}) @@ -441,24 +443,50 @@ Base.isless(g1::AbstractGene, g2::AbstractGene) = ((locus(g1) == locus(g2)) && ( Base.:(==)(loc1::SpanLocus{ClosedSpan}, loc2::SpanLocus{ClosedSpan}) = loc1.position == loc2.position Base.:(==)(loc1::PointLocus{SingleNucleotide}, loc2::PointLocus{SingleNucleotide}) = loc1.position == loc2.position +Base.:(==)(loc1::PointLocus{BetweenNucleotides}, loc2::PointLocus{BetweenNucleotides}) = loc1.position == loc2.position Base.:(==)(loc1::Join{T}, loc2::Join{T}) where {T <: AbstractLocus} = (length(loc1.loc) == length(loc2.loc)) && all(pair -> pair[1] == pair[2], zip(loc1.loc, loc2.loc)) Base.:(==)(loc1::Order{T}, loc2::Order{T}) where {T <: AbstractLocus} = (length(loc1.loc) == length(loc2.loc)) && all(pair -> pair[1] == pair[2], zip(loc1.loc, loc2.loc)) Base.:(==)(loc1::Complement{T}, loc2::Complement{T}) where {T <: AbstractLocus} = loc1.loc == loc2.loc Base.:(==)(loc1::AbstractLocus, loc2::AbstractLocus) = false -Base.in(loc::PointLocus{SingleNucleotide}, r::UnitRange) = loc.position in r -Base.in(loc::PointLocus{BetweenNucleotides}, r::UnitRange) = loc.position in r[1:end-1] -Base.in(loc::SpanLocus, r::UnitRange) = loc.position in r -Base.in(loc::Join, r::UnitRange) = all(in.(loc.loc, r)) -Base.in(loc::Order, r::UnitRange) = all(in.(loc.loc, r)) -Base.in(loc::Complement, r::UnitRange) = all(in.(loc.loc, r)) +# Base.in(loc::PointLocus{SingleNucleotide}, r::UnitRange) = loc.position in r +# Base.in(loc::PointLocus{BetweenNucleotides}, r::UnitRange) = loc.position in r[1:end-1] +# Base.in(loc::SpanLocus, r::UnitRange) = loc.position in r +# Base.in(loc::Join, r::UnitRange) = all(in.(loc.loc, Ref(r))) +# Base.in(loc::Order, r::UnitRange) = all(in.(loc.loc, Ref(r))) +# Base.in(loc::Complement, r::UnitRange) = all(in.(loc.loc, Ref(r))) # Base.intersect(loc1::PointLocus{SingleNucleotide}, loc2::A) # Base.intersect(loc1::Locus, loc2::Locus) = intersect(loc1.position, loc2.position) -Base.iterate(loc::PointLocus{SingleNucleotide}) = iterate(loc.position) -Base.iterate(loc::SpanLocus{T}) where T = iterate(loc.position) -Base.iterate(loc::AbstractLocus) = iterate(union(x.loc for x in loc.loc)) +# Base.iterate(loc::PointLocus{SingleNucleotide}) = iterate(loc.position) +# Base.iterate(loc::SpanLocus{T}) where T = iterate(loc.position) +# Base.iterate(loc::AbstractLocus) = iterate(union(x.loc for x in loc.loc)) + +Base.iterate(loc::Union{PointLocus, SpanLocus}) = (loc, nothing) +Base.iterate(loc::Union{PointLocus, SpanLocus}, ::Any) = nothing +Base.iterate(loc::Union{Join, Order}, i = 1) = i > length(loc.loc) ? nothing : (loc.loc[i], i+1) +Base.iterate(loc::Complement{T}) where {T <: Union{PointLocus, SpanLocus}} = (loc, 1) +Base.iterate(loc::Complement{T}, ::Any) where {T <: Union{PointLocus, SpanLocus}} = nothing +Base.iterate(loc::Complement{T}, i = 1) where {T <: Union{Join, Order}} = i > length(loc.loc.loc) ? nothing : (Complement(loc.loc.loc[end-i+1]), i+1) + +Base.IteratorSize(loc::AbstractLocus) = Base.SizeUnknown() + +""" + eachposition(loc::AbstractLocus) + +Returns an object that iterates over each position in the locus in the specified order. Returns `nothing` for `PointLocus{BetweenNucleotides}`. + +```julia +julia> eachposition(Locus("join(1..3,complement(7..9))")) +[1,2,3,9,8,7] +""" +eachposition(loc::PointLocus{BetweenNucleotides}) = nothing +eachposition(loc::PointLocus{SingleNucleotide}) = loc.position +eachposition(loc::SpanLocus) = loc.position +eachposition(loc::Union{Join, Order}) = Iterators.flatten(map(eachposition, loc.loc)) +eachposition(loc::Complement{T}) where {T <: Union{PointLocus, SpanLocus}} = Iterators.reverse(loc.position) +eachposition(loc::Complement) = Iterators.reverse(eachposition(loc.loc)) index(g::Gene) = getfield(g, :index) locus(g::Gene) = getfield(g, :locus) diff --git a/test/example.gbk b/test/example.gbk index 2aed77c..8878f03 100644 --- a/test/example.gbk +++ b/test/example.gbk @@ -82,6 +82,8 @@ FEATURES Location/Qualifiers LISGDDKILNGVYSQYEEGESIFGSLF" regulatory complement(4047..4052) /locus_tag="reg01" + gene join(complement(4053..4060),4070..4080) + /locus_tag="joined_gene" ORIGIN 1 aaatgtccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct diff --git a/test/runtests.jl b/test/runtests.jl index 5a2732d..6857b7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -63,14 +63,29 @@ using Test end @testset "Iteration" begin - @test length([g.locus_tag for g in chr.genes]) == 7 + @test length([g.locus_tag for g in chr.genes]) == 8 + @test [loc for loc in Locus("complement(join(1..3,7..9))")] == + [loc for loc in Locus("complement(order(1..3,7..9))")] == [Complement(ClosedSpan(7:9)), Complement(ClosedSpan(1:3))] + @test [loc for loc in Locus("complement(1..3)")] == [Complement(ClosedSpan(1:3))] + @test [loc for loc in Locus("join(complement(1..3),complement(7..9))")] == + [loc for loc in Locus("order(complement(1..3),complement(7..9))")] == [Complement(ClosedSpan(1:3)), Complement(ClosedSpan(7:9))] + @test [loc for loc in Locus("1^2")] == [Locus("1^2")] + @test [loc for loc in Locus("1..3")] == [Locus("1..3")] + @test [loc for loc in Locus("1")] == [Locus("1")] + end + + @testset "eachposition" begin + @test collect(eachposition(Locus("1..3"))) == [1, 2, 3] + @test collect(eachposition(Locus("join(1..3,7..9)"))) == [1, 2, 3, 7, 8, 9] + @test collect(eachposition(Locus("complement(join(1..3,7..9))"))) == reverse([1, 2, 3, 7, 8, 9]) + @test collect(eachposition(Locus("order(complement(1..3),7..9)"))) == [3, 2, 1, 7, 8, 9] end @testset "Adding/removing genes" begin addgene!(chr, :CDS, ClosedSpan(300:390), locus_tag = "tag04") @test chr.genes[end].locus_tag == "tag04" delete!(chr.genes[end]) - @test chr.genes[end].locus_tag == "reg01" + @test chr.genes[end-1].locus_tag == "reg01" end @testset "@genes" begin