feat: Add Nucleotide Count (resolves #161) (#166)

* feat: Add Nucleotide Count (resolves #161) * Nucleotide Count: output counts as list
exercism · Oct 13, 2023 · ee3d6c7 · ee3d6c7
1 parent 86898ea
commit ee3d6c7
Show file tree

Hide file tree

Showing 7 changed files with 321 additions and 0 deletions.
diff --git a/config.json b/config.json
@@ -210,6 +210,17 @@
         "difficulty": 3,
         "topics": []
       },
+      {
+        "slug": "nucleotide-count",
+        "name": "Nucleotide Count",
+        "uuid": "53d459dc-c557-4a45-828c-4933a7f020c7",
+        "practices": [],
+        "prerequisites": [],
+        "difficulty": 2,
+        "topics": [
+          "strings"
+        ]
+      },
       {
         "slug": "rna-transcription",
         "name": "RNA Transcription",

diff --git a/exercises/practice/nucleotide-count/.docs/instructions.md b/exercises/practice/nucleotide-count/.docs/instructions.md
@@ -0,0 +1,23 @@
+# Instructions
+
+Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
+All known life depends on DNA!
+
+> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
+
+DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
+A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
+We call the order of these nucleotides in a bit of DNA a "DNA sequence".
+
+We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
+'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.
+
+Given a string representing a DNA sequence, count how many of each nucleotide is present.
+If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.
+
+For example:
+
+```text
+"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
+"INVALID" -> error
+```
diff --git a/exercises/practice/nucleotide-count/.meta/config.json b/exercises/practice/nucleotide-count/.meta/config.json
@@ -0,0 +1,19 @@
+{
+  "authors": [
+    "keiravillekode"
+  ],
+  "files": {
+    "solution": [
+      "impl.mips"
+    ],
+    "test": [
+      "runner.mips"
+    ],
+    "example": [
+      ".meta/example.mips"
+    ]
+  },
+  "blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
+  "source": "The Calculating DNA Nucleotides_problem at Rosalind",
+  "source_url": "https://rosalind.info/problems/dna/"
+}
diff --git a/exercises/practice/nucleotide-count/.meta/example.mips b/exercises/practice/nucleotide-count/.meta/example.mips
@@ -0,0 +1,65 @@
+# Count nucleotides in strand
+#
+# $a0 - pointer to null-terminated input string
+# $a1 - pointer to output array (4 words)
+# $t0 - 'A'
+# $t1 - 'C'
+# $t2 - 'G'
+# $t3 - 'T'
+# $t4 - count 'A'
+# $t5 - count 'C'
+# $t6 - count 'G'
+# $t7 - count 'T'
+# $t8 - pointer into input text
+# $t9 - current character
+
+.globl nucleotide_counts
+
+nucleotide_counts:
+        li      $t0, 'A'
+        li      $t1, 'C'
+        li      $t2, 'G'
+        li      $t3, 'T'
+        move    $t4, $zero              # count 'A'
+        move    $t5, $zero              # count 'C'
+        move    $t6, $zero              # count 'G'
+        move    $t7, $zero              # count 'T'
+        move    $t8, $a0                # copy input address
+        j       read
+
+increment_a:
+        addi    $t4, $t4, 1
+        j       advance
+
+increment_c:
+        addi    $t5, $t5, 1
+        j       advance
+
+increment_g:
+        addi    $t6, $t6, 1
+        j       advance
+
+increment_t:
+        addi    $t7, $t7, 1
+
+advance:
+        addi    $t8, $t8, 1
+
+read:
+        lb      $t9, 0($t8)             # load next input byte
+        beq     $t9, $t0, increment_a
+        beq     $t9, $t1, increment_c
+        beq     $t9, $t2, increment_g
+        beq     $t9, $t3, increment_t
+        beqz    $t9, done               # if null, done
+        li      $t4, -1
+        li      $t5, -1
+        li      $t6, -1
+        li      $t7, -1
+
+done:
+        sw      $t4, 0($a1)
+        sw      $t5, 4($a1)
+        sw      $t6, 8($a1)
+        sw      $t7, 12($a1)
+        jr      $ra
diff --git a/exercises/practice/nucleotide-count/.meta/tests.toml b/exercises/practice/nucleotide-count/.meta/tests.toml
@@ -0,0 +1,25 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[3e5c30a8-87e2-4845-a815-a49671ade970]
+description = "empty strand"
+
+[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
+description = "can count one nucleotide in single-character input"
+
+[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
+description = "strand with repeated nucleotide"
+
+[40a45eac-c83f-4740-901a-20b22d15a39f]
+description = "strand with multiple nucleotides"
+
+[b4c47851-ee9e-4b0a-be70-a86e343bd851]
+description = "strand with invalid nucleotides"
diff --git a/exercises/practice/nucleotide-count/impl.mips b/exercises/practice/nucleotide-count/impl.mips
diff --git a/exercises/practice/nucleotide-count/runner.mips b/exercises/practice/nucleotide-count/runner.mips
@@ -0,0 +1,178 @@
+#
+# Test nucleotide_counts with some examples
+#
+# a0 - input string, for callee
+# a1 - pointer to output array, for callee
+# s0 - num of tests left to run
+# s1 - address of input string
+# s2 - address of expected output words
+# s3 - char byte of input
+# s5 - copy of output location
+# t0 - actual output word
+# t1 - expected output word
+#
+# nucleotide_counts must:
+# - be named nucleotide_counts and declared as global
+# - read input string from a0
+# - follow the convention of using the t0-9 registers for temporary storage
+# - (if it uses s0-7 then it is responsible for pushing existing values to the stack then popping them back off before returning)
+# - write counts for A,C,G,T to the word array with address given in a1
+# - write -1 values to the array if the input is invalid
+
+.data
+
+# number of test cases
+n: .word 5
+# input values and expected output values (all null terminated)
+ins:  .asciiz
+        "",
+        "G",
+        "GGGGGGG",
+        "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC",
+        "AGXXACT"
+
+outs: .word
+        0, 0, 0, 0,
+        0, 0, 1, 0,
+        0, 0, 7, 0,
+        20, 12, 17, 21,
+        -1, -1, -1, -1
+
+failmsg: .asciiz "failed for test input: "
+expectedmsg: .asciiz ". expected "
+tobemsg: .asciiz " to be "
+okmsg: .asciiz "all tests passed"
+
+listprefix: "["
+listseparator: ", "
+listsuffix: "]"
+
+.text
+
+runner:
+        lw      $s0, n
+        la      $s1, ins
+        la      $s2, outs
+
+        li      $v0, 9                  # code for allocating heap memory
+        li      $a0, 16                 # specify 16 bytes - size of array
+        syscall
+        move    $s5, $v0                # location of allocated memory is where callee writes result
+
+run_test:
+        jal     clear_output            # zero out output location
+        move    $a0, $s1                # load input value into a0
+        move    $a1, $s5
+        jal     nucleotide_counts       # call subroutine under test
+
+        lw      $t0, 0($s2)
+        lw      $t1, 0($s5)
+        bne     $t0, $t1, exit_fail     # Compare A counts
+
+        lw      $t0, 4($s2)
+        lw      $t1, 4($s5)
+        bne     $t0, $t1, exit_fail     # Compare C counts
+
+        lw      $t0, 8($s2)
+        lw      $t1, 8($s5)
+        bne     $t0, $t1, exit_fail     # Compare G counts
+
+        lw      $t0, 12($s2)
+        lw      $t1, 12($s5)
+        bne     $t0, $t1, exit_fail     # Compare T counts
+
+input_scan:
+        lb      $s3, 0($s1)
+        addi    $s1, $s1, 1
+        bne     $s3, $zero, input_scan
+
+done_scan:
+        addi    $s2, $s2, 16
+        subi    $s0, $s0, 1             # decrement num of tests left to run
+        bgt     $s0, $zero, run_test    # if more than zero tests to run, jump to run_test
+
+exit_ok:
+        la      $a0, okmsg              # put address of okmsg into a0
+        li      $v0, 4                  # 4 is print string
+        syscall
+
+        li      $v0, 10                 # 10 is exit with zero status (clean exit)
+        syscall
+
+exit_fail:
+        la      $a0, failmsg            # put address of failmsg into a0
+        li      $v0, 4                  # 4 is print string
+        syscall
+
+        move    $a0, $s1                # print input that failed on
+        li      $v0, 4
+        syscall
+
+        la      $a0, expectedmsg
+        li      $v0, 4
+        syscall
+
+        move    $a0, $s5                # address of actual counts
+        jal     print_list
+
+        la      $a0, tobemsg
+        li      $v0, 4
+        syscall
+
+        move    $a0, $s2                # address of expected counts
+        jal     print_list
+
+        li      $a0, 1                  # set error code to 1
+        li      $v0, 17                 # 17 is exit with error
+        syscall
+
+clear_output:
+        sw      $zero, 0($s5)           # zero out output by storing 4 words (16 bytes) of zeros
+        sw      $zero, 4($s5)
+        sw      $zero, 8($s5)
+        sw      $zero, 12($s5)
+        jr      $ra
+
+print_list:
+        move    $t2, $a0
+
+        la      $a0, listprefix
+        li      $v0, 4
+        syscall
+
+        lw      $a0, 0($t2)             # A count
+        li      $v0, 1
+        syscall
+
+        la      $a0, listseparator
+        li      $v0, 4
+        syscall
+
+        lw      $a0, 4($t2)             # C count
+        li      $v0, 1
+        syscall
+
+        la      $a0, listseparator
+        li      $v0, 4
+        syscall
+
+        lw      $a0, 8($t2)             # G count
+        li      $v0, 1
+        syscall
+
+        la      $a0, listseparator
+        li      $v0, 4
+        syscall
+
+        lw      $a0, 12($t2)            # T count
+        li      $v0, 1
+        syscall
+
+        la      $a0, listsuffix
+        li      $v0, 4
+        syscall
+
+        jr      $ra
+
+# # Include your implementation here if you wish to run this from the MARS GUI.
+# .include "impl.mips"