Skip to content

Commit

Permalink
feat: Add Nucleotide Count (resolves #161) (#166)
Browse files Browse the repository at this point in the history
* feat: Add Nucleotide Count (resolves #161)

* Nucleotide Count: output counts as list
  • Loading branch information
keiravillekode authored Oct 13, 2023
1 parent 86898ea commit ee3d6c7
Show file tree
Hide file tree
Showing 7 changed files with 321 additions and 0 deletions.
11 changes: 11 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,17 @@
"difficulty": 3,
"topics": []
},
{
"slug": "nucleotide-count",
"name": "Nucleotide Count",
"uuid": "53d459dc-c557-4a45-828c-4933a7f020c7",
"practices": [],
"prerequisites": [],
"difficulty": 2,
"topics": [
"strings"
]
},
{
"slug": "rna-transcription",
"name": "RNA Transcription",
Expand Down
23 changes: 23 additions & 0 deletions exercises/practice/nucleotide-count/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Instructions

Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
All known life depends on DNA!

> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
We call the order of these nucleotides in a bit of DNA a "DNA sequence".

We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.

Given a string representing a DNA sequence, count how many of each nucleotide is present.
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.

For example:

```text
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
"INVALID" -> error
```
19 changes: 19 additions & 0 deletions exercises/practice/nucleotide-count/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"authors": [
"keiravillekode"
],
"files": {
"solution": [
"impl.mips"
],
"test": [
"runner.mips"
],
"example": [
".meta/example.mips"
]
},
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
"source": "The Calculating DNA Nucleotides_problem at Rosalind",
"source_url": "https://rosalind.info/problems/dna/"
}
65 changes: 65 additions & 0 deletions exercises/practice/nucleotide-count/.meta/example.mips
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Count nucleotides in strand
#
# $a0 - pointer to null-terminated input string
# $a1 - pointer to output array (4 words)
# $t0 - 'A'
# $t1 - 'C'
# $t2 - 'G'
# $t3 - 'T'
# $t4 - count 'A'
# $t5 - count 'C'
# $t6 - count 'G'
# $t7 - count 'T'
# $t8 - pointer into input text
# $t9 - current character

.globl nucleotide_counts

nucleotide_counts:
li $t0, 'A'
li $t1, 'C'
li $t2, 'G'
li $t3, 'T'
move $t4, $zero # count 'A'
move $t5, $zero # count 'C'
move $t6, $zero # count 'G'
move $t7, $zero # count 'T'
move $t8, $a0 # copy input address
j read

increment_a:
addi $t4, $t4, 1
j advance

increment_c:
addi $t5, $t5, 1
j advance

increment_g:
addi $t6, $t6, 1
j advance

increment_t:
addi $t7, $t7, 1

advance:
addi $t8, $t8, 1

read:
lb $t9, 0($t8) # load next input byte
beq $t9, $t0, increment_a
beq $t9, $t1, increment_c
beq $t9, $t2, increment_g
beq $t9, $t3, increment_t
beqz $t9, done # if null, done
li $t4, -1
li $t5, -1
li $t6, -1
li $t7, -1

done:
sw $t4, 0($a1)
sw $t5, 4($a1)
sw $t6, 8($a1)
sw $t7, 12($a1)
jr $ra
25 changes: 25 additions & 0 deletions exercises/practice/nucleotide-count/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[3e5c30a8-87e2-4845-a815-a49671ade970]
description = "empty strand"

[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
description = "can count one nucleotide in single-character input"

[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
description = "strand with repeated nucleotide"

[40a45eac-c83f-4740-901a-20b22d15a39f]
description = "strand with multiple nucleotides"

[b4c47851-ee9e-4b0a-be70-a86e343bd851]
description = "strand with invalid nucleotides"
Empty file.
178 changes: 178 additions & 0 deletions exercises/practice/nucleotide-count/runner.mips
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#
# Test nucleotide_counts with some examples
#
# a0 - input string, for callee
# a1 - pointer to output array, for callee
# s0 - num of tests left to run
# s1 - address of input string
# s2 - address of expected output words
# s3 - char byte of input
# s5 - copy of output location
# t0 - actual output word
# t1 - expected output word
#
# nucleotide_counts must:
# - be named nucleotide_counts and declared as global
# - read input string from a0
# - follow the convention of using the t0-9 registers for temporary storage
# - (if it uses s0-7 then it is responsible for pushing existing values to the stack then popping them back off before returning)
# - write counts for A,C,G,T to the word array with address given in a1
# - write -1 values to the array if the input is invalid

.data

# number of test cases
n: .word 5
# input values and expected output values (all null terminated)
ins: .asciiz
"",
"G",
"GGGGGGG",
"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC",
"AGXXACT"

outs: .word
0, 0, 0, 0,
0, 0, 1, 0,
0, 0, 7, 0,
20, 12, 17, 21,
-1, -1, -1, -1

failmsg: .asciiz "failed for test input: "
expectedmsg: .asciiz ". expected "
tobemsg: .asciiz " to be "
okmsg: .asciiz "all tests passed"

listprefix: "["
listseparator: ", "
listsuffix: "]"

.text

runner:
lw $s0, n
la $s1, ins
la $s2, outs

li $v0, 9 # code for allocating heap memory
li $a0, 16 # specify 16 bytes - size of array
syscall
move $s5, $v0 # location of allocated memory is where callee writes result

run_test:
jal clear_output # zero out output location
move $a0, $s1 # load input value into a0
move $a1, $s5
jal nucleotide_counts # call subroutine under test

lw $t0, 0($s2)
lw $t1, 0($s5)
bne $t0, $t1, exit_fail # Compare A counts

lw $t0, 4($s2)
lw $t1, 4($s5)
bne $t0, $t1, exit_fail # Compare C counts

lw $t0, 8($s2)
lw $t1, 8($s5)
bne $t0, $t1, exit_fail # Compare G counts

lw $t0, 12($s2)
lw $t1, 12($s5)
bne $t0, $t1, exit_fail # Compare T counts

input_scan:
lb $s3, 0($s1)
addi $s1, $s1, 1
bne $s3, $zero, input_scan

done_scan:
addi $s2, $s2, 16
subi $s0, $s0, 1 # decrement num of tests left to run
bgt $s0, $zero, run_test # if more than zero tests to run, jump to run_test

exit_ok:
la $a0, okmsg # put address of okmsg into a0
li $v0, 4 # 4 is print string
syscall

li $v0, 10 # 10 is exit with zero status (clean exit)
syscall

exit_fail:
la $a0, failmsg # put address of failmsg into a0
li $v0, 4 # 4 is print string
syscall

move $a0, $s1 # print input that failed on
li $v0, 4
syscall

la $a0, expectedmsg
li $v0, 4
syscall

move $a0, $s5 # address of actual counts
jal print_list

la $a0, tobemsg
li $v0, 4
syscall

move $a0, $s2 # address of expected counts
jal print_list

li $a0, 1 # set error code to 1
li $v0, 17 # 17 is exit with error
syscall

clear_output:
sw $zero, 0($s5) # zero out output by storing 4 words (16 bytes) of zeros
sw $zero, 4($s5)
sw $zero, 8($s5)
sw $zero, 12($s5)
jr $ra

print_list:
move $t2, $a0

la $a0, listprefix
li $v0, 4
syscall

lw $a0, 0($t2) # A count
li $v0, 1
syscall

la $a0, listseparator
li $v0, 4
syscall

lw $a0, 4($t2) # C count
li $v0, 1
syscall

la $a0, listseparator
li $v0, 4
syscall

lw $a0, 8($t2) # G count
li $v0, 1
syscall

la $a0, listseparator
li $v0, 4
syscall

lw $a0, 12($t2) # T count
li $v0, 1
syscall

la $a0, listsuffix
li $v0, 4
syscall

jr $ra

# # Include your implementation here if you wish to run this from the MARS GUI.
# .include "impl.mips"

0 comments on commit ee3d6c7

Please sign in to comment.