-
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* feat: Add Nucleotide Count (resolves #161) * Nucleotide Count: output counts as list
- Loading branch information
1 parent
86898ea
commit ee3d6c7
Showing
7 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Instructions | ||
|
||
Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed. | ||
All known life depends on DNA! | ||
|
||
> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise. | ||
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine. | ||
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important! | ||
We call the order of these nucleotides in a bit of DNA a "DNA sequence". | ||
|
||
We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides. | ||
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine. | ||
|
||
Given a string representing a DNA sequence, count how many of each nucleotide is present. | ||
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error. | ||
|
||
For example: | ||
|
||
```text | ||
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2 | ||
"INVALID" -> error | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"authors": [ | ||
"keiravillekode" | ||
], | ||
"files": { | ||
"solution": [ | ||
"impl.mips" | ||
], | ||
"test": [ | ||
"runner.mips" | ||
], | ||
"example": [ | ||
".meta/example.mips" | ||
] | ||
}, | ||
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.", | ||
"source": "The Calculating DNA Nucleotides_problem at Rosalind", | ||
"source_url": "https://rosalind.info/problems/dna/" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Count nucleotides in strand | ||
# | ||
# $a0 - pointer to null-terminated input string | ||
# $a1 - pointer to output array (4 words) | ||
# $t0 - 'A' | ||
# $t1 - 'C' | ||
# $t2 - 'G' | ||
# $t3 - 'T' | ||
# $t4 - count 'A' | ||
# $t5 - count 'C' | ||
# $t6 - count 'G' | ||
# $t7 - count 'T' | ||
# $t8 - pointer into input text | ||
# $t9 - current character | ||
|
||
.globl nucleotide_counts | ||
|
||
nucleotide_counts: | ||
li $t0, 'A' | ||
li $t1, 'C' | ||
li $t2, 'G' | ||
li $t3, 'T' | ||
move $t4, $zero # count 'A' | ||
move $t5, $zero # count 'C' | ||
move $t6, $zero # count 'G' | ||
move $t7, $zero # count 'T' | ||
move $t8, $a0 # copy input address | ||
j read | ||
|
||
increment_a: | ||
addi $t4, $t4, 1 | ||
j advance | ||
|
||
increment_c: | ||
addi $t5, $t5, 1 | ||
j advance | ||
|
||
increment_g: | ||
addi $t6, $t6, 1 | ||
j advance | ||
|
||
increment_t: | ||
addi $t7, $t7, 1 | ||
|
||
advance: | ||
addi $t8, $t8, 1 | ||
|
||
read: | ||
lb $t9, 0($t8) # load next input byte | ||
beq $t9, $t0, increment_a | ||
beq $t9, $t1, increment_c | ||
beq $t9, $t2, increment_g | ||
beq $t9, $t3, increment_t | ||
beqz $t9, done # if null, done | ||
li $t4, -1 | ||
li $t5, -1 | ||
li $t6, -1 | ||
li $t7, -1 | ||
|
||
done: | ||
sw $t4, 0($a1) | ||
sw $t5, 4($a1) | ||
sw $t6, 8($a1) | ||
sw $t7, 12($a1) | ||
jr $ra |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# This is an auto-generated file. | ||
# | ||
# Regenerating this file via `configlet sync` will: | ||
# - Recreate every `description` key/value pair | ||
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications | ||
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) | ||
# - Preserve any other key/value pair | ||
# | ||
# As user-added comments (using the # character) will be removed when this file | ||
# is regenerated, comments can be added via a `comment` key. | ||
|
||
[3e5c30a8-87e2-4845-a815-a49671ade970] | ||
description = "empty strand" | ||
|
||
[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec] | ||
description = "can count one nucleotide in single-character input" | ||
|
||
[eca0d565-ed8c-43e7-9033-6cefbf5115b5] | ||
description = "strand with repeated nucleotide" | ||
|
||
[40a45eac-c83f-4740-901a-20b22d15a39f] | ||
description = "strand with multiple nucleotides" | ||
|
||
[b4c47851-ee9e-4b0a-be70-a86e343bd851] | ||
description = "strand with invalid nucleotides" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
# | ||
# Test nucleotide_counts with some examples | ||
# | ||
# a0 - input string, for callee | ||
# a1 - pointer to output array, for callee | ||
# s0 - num of tests left to run | ||
# s1 - address of input string | ||
# s2 - address of expected output words | ||
# s3 - char byte of input | ||
# s5 - copy of output location | ||
# t0 - actual output word | ||
# t1 - expected output word | ||
# | ||
# nucleotide_counts must: | ||
# - be named nucleotide_counts and declared as global | ||
# - read input string from a0 | ||
# - follow the convention of using the t0-9 registers for temporary storage | ||
# - (if it uses s0-7 then it is responsible for pushing existing values to the stack then popping them back off before returning) | ||
# - write counts for A,C,G,T to the word array with address given in a1 | ||
# - write -1 values to the array if the input is invalid | ||
|
||
.data | ||
|
||
# number of test cases | ||
n: .word 5 | ||
# input values and expected output values (all null terminated) | ||
ins: .asciiz | ||
"", | ||
"G", | ||
"GGGGGGG", | ||
"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC", | ||
"AGXXACT" | ||
|
||
outs: .word | ||
0, 0, 0, 0, | ||
0, 0, 1, 0, | ||
0, 0, 7, 0, | ||
20, 12, 17, 21, | ||
-1, -1, -1, -1 | ||
|
||
failmsg: .asciiz "failed for test input: " | ||
expectedmsg: .asciiz ". expected " | ||
tobemsg: .asciiz " to be " | ||
okmsg: .asciiz "all tests passed" | ||
|
||
listprefix: "[" | ||
listseparator: ", " | ||
listsuffix: "]" | ||
|
||
.text | ||
|
||
runner: | ||
lw $s0, n | ||
la $s1, ins | ||
la $s2, outs | ||
|
||
li $v0, 9 # code for allocating heap memory | ||
li $a0, 16 # specify 16 bytes - size of array | ||
syscall | ||
move $s5, $v0 # location of allocated memory is where callee writes result | ||
|
||
run_test: | ||
jal clear_output # zero out output location | ||
move $a0, $s1 # load input value into a0 | ||
move $a1, $s5 | ||
jal nucleotide_counts # call subroutine under test | ||
|
||
lw $t0, 0($s2) | ||
lw $t1, 0($s5) | ||
bne $t0, $t1, exit_fail # Compare A counts | ||
|
||
lw $t0, 4($s2) | ||
lw $t1, 4($s5) | ||
bne $t0, $t1, exit_fail # Compare C counts | ||
|
||
lw $t0, 8($s2) | ||
lw $t1, 8($s5) | ||
bne $t0, $t1, exit_fail # Compare G counts | ||
|
||
lw $t0, 12($s2) | ||
lw $t1, 12($s5) | ||
bne $t0, $t1, exit_fail # Compare T counts | ||
|
||
input_scan: | ||
lb $s3, 0($s1) | ||
addi $s1, $s1, 1 | ||
bne $s3, $zero, input_scan | ||
|
||
done_scan: | ||
addi $s2, $s2, 16 | ||
subi $s0, $s0, 1 # decrement num of tests left to run | ||
bgt $s0, $zero, run_test # if more than zero tests to run, jump to run_test | ||
|
||
exit_ok: | ||
la $a0, okmsg # put address of okmsg into a0 | ||
li $v0, 4 # 4 is print string | ||
syscall | ||
|
||
li $v0, 10 # 10 is exit with zero status (clean exit) | ||
syscall | ||
|
||
exit_fail: | ||
la $a0, failmsg # put address of failmsg into a0 | ||
li $v0, 4 # 4 is print string | ||
syscall | ||
|
||
move $a0, $s1 # print input that failed on | ||
li $v0, 4 | ||
syscall | ||
|
||
la $a0, expectedmsg | ||
li $v0, 4 | ||
syscall | ||
|
||
move $a0, $s5 # address of actual counts | ||
jal print_list | ||
|
||
la $a0, tobemsg | ||
li $v0, 4 | ||
syscall | ||
|
||
move $a0, $s2 # address of expected counts | ||
jal print_list | ||
|
||
li $a0, 1 # set error code to 1 | ||
li $v0, 17 # 17 is exit with error | ||
syscall | ||
|
||
clear_output: | ||
sw $zero, 0($s5) # zero out output by storing 4 words (16 bytes) of zeros | ||
sw $zero, 4($s5) | ||
sw $zero, 8($s5) | ||
sw $zero, 12($s5) | ||
jr $ra | ||
|
||
print_list: | ||
move $t2, $a0 | ||
|
||
la $a0, listprefix | ||
li $v0, 4 | ||
syscall | ||
|
||
lw $a0, 0($t2) # A count | ||
li $v0, 1 | ||
syscall | ||
|
||
la $a0, listseparator | ||
li $v0, 4 | ||
syscall | ||
|
||
lw $a0, 4($t2) # C count | ||
li $v0, 1 | ||
syscall | ||
|
||
la $a0, listseparator | ||
li $v0, 4 | ||
syscall | ||
|
||
lw $a0, 8($t2) # G count | ||
li $v0, 1 | ||
syscall | ||
|
||
la $a0, listseparator | ||
li $v0, 4 | ||
syscall | ||
|
||
lw $a0, 12($t2) # T count | ||
li $v0, 1 | ||
syscall | ||
|
||
la $a0, listsuffix | ||
li $v0, 4 | ||
syscall | ||
|
||
jr $ra | ||
|
||
# # Include your implementation here if you wish to run this from the MARS GUI. | ||
# .include "impl.mips" |