Skip to content

Commit

Permalink
Fixing discrepancies between number of sequences and resulting geneti…
Browse files Browse the repository at this point in the history
…c data structure size
  • Loading branch information
jhellewell14 committed Nov 15, 2024
1 parent 09d046f commit 1071f17
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 98 deletions.
37 changes: 23 additions & 14 deletions src/genetic_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ pub fn char_to_likelihood(e: &char) -> [f64; 4] {
}
}

pub fn count_sequences(filename: &str) -> usize {
let mut reader = parse_fastx_file(filename).expect("Error parsing file");
let mut n_seqs: usize = 0;
while let Some(_record) = reader.next() {
n_seqs += 1;
};
n_seqs
}


pub fn create_genetic_data(filename: &str, topology: &Topology, rate_matrix: &na::Matrix4<f64>) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> {
// Count number of sequences and their length
Expand All @@ -60,7 +69,7 @@ pub fn create_genetic_data(filename: &str, topology: &Topology, rate_matrix: &na
}
// Create pre-filled array
let mut gen_data: ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> =
ndarray::Array3::from_elem(((2 * n_seqs) + 1, n_bases, 4), -99.0);
ndarray::Array3::from_elem(((2 * n_seqs) - 1, n_bases, 4), -99.0);
// println!("Assigning data for {} leaves and {} total nodes", n_seqs, (2 * n_seqs) + 1);

let mut reader2 = parse_fastx_file(filename).expect("Error parsing file");
Expand All @@ -82,7 +91,7 @@ pub fn create_genetic_data(filename: &str, topology: &Topology, rate_matrix: &na
}


pub fn create_internal_data(mut gen_data: ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>>,
pub fn create_internal_data(mut data: ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>>,
topology: &Topology, rate_matrix: &na::Matrix4<f64>) ->
ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> {
// Iterate over internal nodes postorder
Expand All @@ -93,17 +102,17 @@ pub fn create_internal_data(mut gen_data: ndarray::ArrayBase<ndarray::OwnedRepr<
// Calculate node likelihood
let lchild = node.get_lchild().unwrap();
let rchild = node.get_rchild().unwrap();
let node_ll = node_likelihood(slice_gen_data(lchild, &gen_data),
slice_gen_data(rchild, &gen_data),
let node_ll = node_likelihood(slice_data(lchild, &data),
slice_data(rchild, &data),
&matrix_exp(rate_matrix, topology.nodes[lchild].get_branchlen()),
&matrix_exp(rate_matrix, topology.nodes[lchild].get_branchlen()));
// let node_ll = node_likelihood(node.get_lchild().unwrap(), node.get_rchild().unwrap(), &gen_data, topology, rate_matrix);
// let node_ll = node_likelihood(i, &gen_data, topology, rate_matrix);
// Add to genetic data array
gen_data.slice_mut(s![i, .., ..]).assign(&node_ll);
data.slice_mut(s![i, .., ..]).assign(&node_ll);
}

gen_data
data
}

pub fn create_dummy_gendata(n_bases: usize, topology: &Topology, rate_matrix: &na::Matrix4<f64>) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> {
Expand All @@ -121,7 +130,7 @@ pub fn create_dummy_gendata(n_bases: usize, topology: &Topology, rate_matrix: &n
gen_data[[i, j, k]] = NEGINF;
}
}

create_internal_data(gen_data, topology, rate_matrix)
}

Expand All @@ -139,10 +148,10 @@ pub fn matrix_exp(rate_matrix: &na::Matrix4<f64>, branch_len: f64) -> na::Matrix
na::Matrix::exp(&(rate_matrix * branch_len))
}

pub fn slice_gen_data(index: usize, gen_data: &ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>>) ->
pub fn slice_data(index: usize, data: &ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>>) ->
ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 2]>> {
gen_data.slice(s![index, .., ..])
}
data.slice(s![index, .., ..])
}

pub fn node_likelihood(seql: ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 2]>>,
seqr: ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 2]>>,
Expand Down Expand Up @@ -232,15 +241,15 @@ impl Topology {
},
(true, false) => {
seql = temp_likelihoods.get(&lchild).unwrap().slice(s![.., ..]);
seqr = slice_gen_data(rchild, &gen_data);
seqr = slice_data(rchild, &gen_data);
},
(false, true) => {
seql = slice_gen_data(lchild, &gen_data);
seql = slice_data(lchild, &gen_data);
seqr = temp_likelihoods.get(&rchild).unwrap().slice(s![.., ..]);
},
(false, false) => {
seql = slice_gen_data(lchild, &gen_data);
seqr = slice_gen_data(rchild, &gen_data);
seql = slice_data(lchild, &gen_data);
seqr = slice_data(rchild, &gen_data);
},
};

Expand Down
12 changes: 9 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ mod rate_matrix;
mod topology;
mod genetic_data;
mod moves;
mod state_data;

use rate_matrix::RateMatrix;
use state_data::create_dummy_statedata;
use topology::Topology;

use crate::newick_to_vec::*;
Expand All @@ -24,19 +26,23 @@ pub fn main() {

// let mut tr = vector_to_tree(&random_vector(4));
// tr.add_genetic_data(&String::from("/Users/joel/Downloads/listeria0.aln"));
let v = random_vector(27);
// let n_seqs = count_sequences(&args.alignment);

// let mut tr = vector_to_tree(&v, &GTR::default());
// tr.add_genetic_data(&args.alignment);
let v = random_vector(28);

let mut t: Topology = Topology::from_vec(&v);

// println!("{:?}", t.count_leaves());

let p = &rate_matrix::GTR::default();
let mut gen_data = create_genetic_data(&args.alignment, &t, &p.get_matrix());

println!("{:?}", likelihood(&t, &gen_data));
println!("{:?}", t.get_newick());
println!("{:?}", t.tree_vec);

// let mge_mat = na::Matrix2::new(0.4, 0.6, 0.6, 0.4);
// let mut st = create_dummy_statedata(1, &t, &mge_mat);

if !args.no_optimise {
let start = Instant::now();
Expand Down
2 changes: 1 addition & 1 deletion src/moves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,6 @@ pub fn hillclimb_accept(old_ll: &f64, new_ll: &f64) -> bool {
new_ll.gt(old_ll)
}

pub fn always_accept(old_ll: &f64, new_ll: &f64) -> bool {
pub fn always_accept(_old_ll: &f64, _new_ll: &f64) -> bool {
true
}
4 changes: 2 additions & 2 deletions src/newick_to_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ pub mod ffi {
////////////////////////////////////////////////
// Create a random vector with a given length //
////////////////////////////////////////////////
pub fn random_vector(k: usize) -> Vec<usize> {
pub fn random_vector(n_seqs: usize) -> Vec<usize> {
let mut rng = rand::thread_rng();

vec![0; k + 1]
vec![0; n_seqs]
.iter()
.enumerate()
.map(|(i, _el)| {
Expand Down
27 changes: 27 additions & 0 deletions src/rate_matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,31 @@ impl Default for JC69 {
out.update_matrix();
out
}
}

#[derive(Debug, Clone, Copy)]
pub struct MGE {
matrix: na::Matrix2<f64>,
gain_rate: f64,
}

impl MGE {
fn get_matrix(&self) -> na::Matrix2<f64> {
self.matrix
}

fn get_params(&self) -> Vec<f64> {
vec![self.gain_rate]
}

fn update_matrix(&mut self) {
self.matrix = na::Matrix2::new(-self.gain_rate,
self.gain_rate,
self.gain_rate,
-self.gain_rate);
}

fn update_params(&mut self, params: Vec<f64>) {
self.gain_rate = params[0];
}
}
77 changes: 77 additions & 0 deletions src/state_data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use crate::topology::Topology;
use rand::{thread_rng, Rng};
use crate::genetic_data::{node_likelihood, slice_data, matrix_exp};
use ndarray::s;
use logaddexp::LogAddExp;

const NEGINF: f64 = -f64::INFINITY;

pub fn create_dummy_statedata(n_mges: usize, topology: &Topology, rate_matrix: &na::Matrix2<f64>) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> {
let n_seqs = topology.count_leaves();

let mut state_data: ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> =
ndarray::Array3::from_elem(((2 * n_seqs) - 1, n_mges, 2), 0.0);

let mut rng = thread_rng();

for i in 0..n_seqs {
for j in 0..n_mges {
let k = rng.gen_range(0..2);
state_data[[i, j, k]] = NEGINF;
}
}

// create internal state_data
create_internal_statedata(state_data, &topology, rate_matrix)
}

pub fn create_internal_statedata(mut data: ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>>,
topology: &Topology, rate_matrix: &na::Matrix2<f64>) ->
ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 3]>> {
// Iterate over internal nodes postorder
let mut nodes = topology.postorder_notips(topology.get_root());

while let Some(node) = nodes.next() {
let i = node.get_id();
// Calculate node likelihood
let lchild = node.get_lchild().unwrap();
let rchild = node.get_rchild().unwrap();
let node_ll = state_likelihood(slice_data(lchild, &data),
slice_data(rchild, &data),
&matrix_exp2(rate_matrix, topology.nodes[lchild].get_branchlen()),
&matrix_exp2(rate_matrix, topology.nodes[lchild].get_branchlen()));

// Add to genetic data array
data.slice_mut(s![i, .., ..]).assign(&node_ll);
}

data
}


pub fn matrix_exp2(rate_matrix: &na::Matrix2<f64>, branch_len: f64) -> na::Matrix2<f64> {
na::Matrix::exp(&(rate_matrix * branch_len))
}

pub fn state_likelihood(seql: ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 2]>>,
seqr: ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 2]>>,
matrixl: &na::Matrix2<f64>,
matrixr: &na::Matrix2<f64>,
) -> ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 2]>> {

let out = ndarray::Array2::from_shape_fn((seql.dim().0, 2), |(i, j)|
state_likelihood_i(j, seql.slice(s![i, ..]), &matrixl) +
state_likelihood_i(j, seqr.slice(s![i, ..]), &matrixr));

out
}

pub fn state_likelihood_i(i: usize, ll: ndarray::ArrayBase<ndarray::ViewRepr<&f64>, ndarray::Dim<[usize; 1]>>, p: &na::Matrix2<f64>) -> f64 {
p
.row(i)
.iter()
.zip(ll.iter())
.map(|(a, b)| a.ln() + *b)
.reduce(|a, b| a.ln_add_exp(b))
.unwrap()
}
2 changes: 0 additions & 2 deletions src/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#[cfg(test)]
mod tests {
use predicates::constant::always;

use crate::{likelihood, newick_to_vec::*};
use crate::rate_matrix::{RateMatrix, GTR};
use crate::topology::Topology;
Expand Down
76 changes: 0 additions & 76 deletions src/tree_moves.rs

This file was deleted.

0 comments on commit 1071f17

Please sign in to comment.