diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000..d84e315
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,773 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import argparse\n",
+    "from Bio import SeqIO"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_sequences(directory):\n",
+    "    \"\"\"\n",
+    "    Reads in the sequences from the fastq files\n",
+    "    :param directory:\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "    files = os.listdir(directory)\n",
+    "    sequences = []\n",
+    "\n",
+    "    for file in files:\n",
+    "        sequence = \"\"\n",
+    "        filename = os.path.join(directory, file)\n",
+    "        for record in SeqIO.parse(filename, \"fastq\"):\n",
+    "            sequence = sequence + ''.join(list(record.seq))\n",
+    "        sequences.append(sequence)\n",
+    "    return sequences"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def split_into_kmers(sequence, k, threshold):\n",
+    "    kmers = {}\n",
+    "\n",
+    "    for i in range(0, len(sequence), k):\n",
+    "        kmer = sequence[i: i + k + 1]\n",
+    "        if len(kmer) > k:\n",
+    "            if kmer in kmers.keys():\n",
+    "                kmers[kmer] = kmers[kmer] + 1\n",
+    "            else:\n",
+    "                kmers[kmer] = 1\n",
+    "\n",
+    "    for key, count in kmers:\n",
+    "        if count <= threshold:\n",
+    "            kmers.pop(key)\n",
+    "\n",
+    "    return kmers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_de_bruijin_graph(kmers):\n",
+    "    # Set of all nodes in the DB Graph\n",
+    "    nodes = set()\n",
+    "    # Set of nodes having in-degrees\n",
+    "    not_starts = set()\n",
+    "    # List of all directed edges in the graph\n",
+    "    edges = []\n",
+    "\n",
+    "    for kmer in kmers:\n",
+    "        # From k-mers, get k-1mers\n",
+    "        k1mer1 = kmer[:-1]\n",
+    "        k1mer2 = kmer[1:]\n",
+    "        # Add k-1 mers to the nodes list\n",
+    "        nodes.add(k1mer1)\n",
+    "        nodes.add(k1mer2)\n",
+    "\n",
+    "        # Add an edge between the two k-1mers\n",
+    "        edges.append((k1mer1, k1mer2))\n",
+    "\n",
+    "        # Add destination node to the set of nodes having in degrees\n",
+    "        not_starts.add(k1mer2)\n",
+    "\n",
+    "    start_nodes = list(nodes - not_starts)\n",
+    "\n",
+    "    # Return the nodes, edges and the starting nodes in the graph.\n",
+    "    return nodes, edges, start_nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_de_bruijin_graph_alt(kmers):\n",
+    "    \n",
+    "    # to keep track of degree of nodes\n",
+    "    node_degree = {}\n",
+    "    \n",
+    "    # Set of all nodes in the DB Graph\n",
+    "    nodes = set()\n",
+    "    \n",
+    "    # Set of nodes having in-degrees\n",
+    "    not_starts = set()\n",
+    "    \n",
+    "    # List of all directed edges in the graph\n",
+    "    edges = []\n",
+    "\n",
+    "    for kmer in kmers:\n",
+    "        \n",
+    "        # From k-mers, get k-1mers\n",
+    "        prefix = kmer[:-1]\n",
+    "        suffix = kmer[1:]\n",
+    "        \n",
+    "        # Add k-1 mers to the nodes list\n",
+    "        nodes.add(prefix)\n",
+    "        nodes.add(suffix)\n",
+    "\n",
+    "        # Add an edge between the two k-1mers\n",
+    "        edges.append((prefix, suffix))\n",
+    "        \n",
+    "        # fetching and updating degrees\n",
+    "        p_count = node_degree.setdefault(prefix, 0)\n",
+    "        node_degree[prefix] = p_count+1\n",
+    "        s_count = node_degree.setdefault(suffix, 0)\n",
+    "        node_degree[suffix] = s_count-1\n",
+    "    \n",
+    "    degree_node = {v:k for k, v in node_degree.items() if v != 0}\n",
+    "    \n",
+    "    start = None\n",
+    "    \n",
+    "    if len(degree_node) > 0:\n",
+    "        start_stop = sorted(degree_node, reverse=True)\n",
+    "        start = degree_node[start_stop[0]]\n",
+    "        \n",
+    "    # Return the nodes, edges and the starting nodes in the graph.\n",
+    "    return nodes, edges, start"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_node_edge_map(edges):\n",
+    "\n",
+    "    # Make a map of starting nodes to the adjacency list of that node\n",
+    "    node_edge_map = {}\n",
+    "\n",
+    "    # Go through all edges\n",
+    "    for e in edges:\n",
+    "        n = e[0]\n",
+    "        # If start node exists, add destination node to adjacency list\n",
+    "        if n in node_edge_map:\n",
+    "            node_edge_map[n].append(e[1])\n",
+    "        # Add start node to map and initialize the adjacency list with the destination node\n",
+    "        else:\n",
+    "            node_edge_map[n] = [e[1]]\n",
+    "    return node_edge_map"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def traverse_graph(graph, start):\n",
+    "    # if there is no explicit start node\n",
+    "    if len(start) == 0:\n",
+    "        # pick any node as the start\n",
+    "        start = list(graph.keys())[0]\n",
+    "    else:\n",
+    "        start = start[0]\n",
+    "    \n",
+    "    # maintain a stack to store the nodes to visit\n",
+    "    path = [start]\n",
+    "    \n",
+    "    # accumulates the eulerian path\n",
+    "    eulerian_path = []\n",
+    "    \n",
+    "    # while stack is non-empty\n",
+    "    while path:\n",
+    "        # pick up the topmost node in the stack\n",
+    "        curr_node = str(path[-1])\n",
+    "        \n",
+    "        # if the current node is a key and has entries in its ajacency list\n",
+    "        if curr_node in graph and graph[curr_node]:\n",
+    "            \n",
+    "            # get the adjacency list\n",
+    "            adj_nodes = graph[curr_node]\n",
+    "            next_node = None\n",
+    "            \n",
+    "            # if only one entry, proceed to that node\n",
+    "            if len(adj_nodes) == 1:\n",
+    "                next_node = adj_nodes.pop()\n",
+    "            \n",
+    "            # if not, proceed to next node that would allow us to traverse\n",
+    "            # the rest of the graph\n",
+    "            else:\n",
+    "                # iterate through all adj_nodes\n",
+    "                for node in adj_nodes:\n",
+    "                    # if the node leads to other nodes, set that as next\n",
+    "                    if node in graph.keys():\n",
+    "                        next_node = node\n",
+    "                        break\n",
+    "                # clear the edge from the current node\n",
+    "                graph[curr_node].remove(next_node)\n",
+    "            # update the path we want to explore\n",
+    "            path.append(next_node)\n",
+    "        else:\n",
+    "            # if we can go any further, append the node to the path\n",
+    "            eulerian_path.append(path.pop())\n",
+    "            \n",
+    "    # reverse the accumulator as the path was populated in reverse\n",
+    "    eulerian_path.reverse()\n",
+    "    \n",
+    "    return eulerian_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def traverse_graph_alt(graph, start):\n",
+    "    \n",
+    "    # to collect all eulerian paths/cycles in a graph\n",
+    "    all_trails = list()\n",
+    "    \n",
+    "    # the eularian tour of the entire graph\n",
+    "    tour = []\n",
+    "    tour.append(start)\n",
+    "    \n",
+    "    skip_trail = True\n",
+    "    \n",
+    "    while (True):\n",
+    "        # start an eulerian trail\n",
+    "        trail = []\n",
+    "        \n",
+    "        curr_node = start\n",
+    "        \n",
+    "        # traverse a trail until we can't go further\n",
+    "        while (True):\n",
+    "            \n",
+    "            # terminate if can't go further\n",
+    "            if curr_node not in graph:\n",
+    "                break\n",
+    "            \n",
+    "            # pick a next node\n",
+    "            next_node = graph[curr_node].pop()\n",
+    "            \n",
+    "            # if the adjacency list becomes emtpy for the current node, delete\n",
+    "            if len(graph[curr_node]) == 0:\n",
+    "                del graph[curr_node]\n",
+    "            \n",
+    "            # append next node to trail\n",
+    "            trail.append(next_node)\n",
+    "            \n",
+    "            # if we circle back to start we have covered the trail\n",
+    "            if next_node == start:\n",
+    "                break;\n",
+    "            \n",
+    "            # if not move on to next node\n",
+    "            curr_node = next_node\n",
+    "        \n",
+    "        # we skip adding the first trail as it would reflect in the tour\n",
+    "        if skip_trail == False:\n",
+    "            # after finishing a trail, add it to all tours\n",
+    "            all_trails.append(list(trail))\n",
+    "        \n",
+    "        skip_trail = False\n",
+    "        \n",
+    "        # where to append the trail in the tour\n",
+    "        append_at = tour.index(start)\n",
+    "        \n",
+    "        # introducing the trail inbetween the tour\n",
+    "        tour = tour[:append_at+1] + trail + tour[append_at+1:]\n",
+    "        \n",
+    "        # done if no more nodes to explore\n",
+    "        if len(graph) == 0:\n",
+    "            break\n",
+    "            \n",
+    "        new_start_possible = False\n",
+    "        \n",
+    "        for node in tour:\n",
+    "            if node in graph:\n",
+    "                start = node\n",
+    "                new_start_possible = True\n",
+    "                break\n",
+    "                \n",
+    "        if not new_start_possible:\n",
+    "            print(\"error, tour exploration terminated with remaining graph:\")\n",
+    "            print(graph)\n",
+    "            break\n",
+    "            \n",
+    "    return tour, all_trails"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def eulerian_trail(m,v):\n",
+    "    nemap = m\n",
+    "    result_trail = []\n",
+    "    start = v\n",
+    "    result_trail.append(start)\n",
+    "    while(True):\n",
+    "        trail = []\n",
+    "        previous = start\n",
+    "        while(True):\n",
+    "            \n",
+    "            if(previous not in nemap):\n",
+    "                break\n",
+    "            next = nemap[previous].pop()\n",
+    "            if(len(nemap[previous]) == 0):\n",
+    "                nemap.pop(previous,None)\n",
+    "            trail.append(next)\n",
+    "            if(next == start):\n",
+    "                break;\n",
+    "            previous = next\n",
+    "        # completed one trail\n",
+    "        print(trail)\n",
+    "        index = result_trail.index(start)\n",
+    "        result_trail = result_trail[0:index+1] + trail + result_trail[index+1:len(result_trail)]\n",
+    "        # choose new start\n",
+    "        if(len(nemap)==0):\n",
+    "            break\n",
+    "        found_new_start = False\n",
+    "        for n in result_trail:\n",
+    "            if n in nemap:\n",
+    "                start = n\n",
+    "                found_new_start = True\n",
+    "                break # from for loop\n",
+    "        if not found_new_start:\n",
+    "            print(\"error\")\n",
+    "            print(\"result_trail\",result_trail)\n",
+    "            print(nemap)\n",
+    "            break\n",
+    "    return result_trail"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 161,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def align_to_reference(assembled_seq, args):\n",
+    "    return []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    # add the command line arguments\n",
+    "    parser = argparse.ArgumentParser()\n",
+    "    parser.add_argument(\"--k\", default=30, help=\"k-mer size\")\n",
+    "    parser.add_argument(\"--threshold\", default=2, help=\"min count of k-mers to consider\")\n",
+    "    parser.add_argument(\"--input\", default=\"sars-cov-2-trimmed/SE-SW-4-15\", help=\"input directory\")\n",
+    "    parser.add_argument(\"--reference\", default=None, help=\"reference genome\")\n",
+    "\n",
+    "    args = parser.parse_args()\n",
+    "\n",
+    "    # each file is a seperate sequence\n",
+    "    sequences = read_sequences(args.input)\n",
+    "\n",
+    "    for sequence in sequences:\n",
+    "        kmers = split_into_kmers(sequence, args.k, args.threshold)\n",
+    "        k1mers = split_into_kmers(sequence, args.k - 1, 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = 30\n",
+    "threshold = 2\n",
+    "input_dir = \"./sars-cov-2-trimmed/SE-SW-4-15\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[WinError 3] The system cannot find the path specified: './sars-cov-2-trimmed/SE-SW-4-15'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-31-1e231103bb02>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msequences\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mread_sequences\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_dir\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;32m<ipython-input-2-d55c8aca5b85>\u001b[0m in \u001b[0;36mread_sequences\u001b[1;34m(directory)\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;33m:\u001b[0m\u001b[1;32mreturn\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m     \"\"\"\n\u001b[1;32m----> 7\u001b[1;33m     \u001b[0mfiles\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      8\u001b[0m     \u001b[0msequences\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 3] The system cannot find the path specified: './sars-cov-2-trimmed/SE-SW-4-15'"
+     ]
+    }
+   ],
+   "source": [
+    "sequences = read_sequences(input_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kmers = [\"ATG\", \"GGT\", \"GTG\", \"TAT\", \"TGC\", \"TGG\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kmers = [\"AAA\", \"AAB\", \"ABB\", \"BBB\", \"BBA\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 190,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kmers = [\"ATG\", \"TGG\", \"GGC\", \"GCG\", \"CGT\", \"GCA\", \"GTG\", \"CAA\", \"AAT\", \"TGC\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kmers = [\"ATG\", \"TGC\", \"GCT\", \"CTA\", \"TAG\", \"AGC\", \"GCA\", \"CAC\", \"ACA\", \"CAT\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kmers = ['ATCG', 'TCGT', 'CGTT', 'GTTG', 'TTGC', 'TGCG', 'GCGC', 'CGCG', 'GCGA', 'CGAC', 'GACC', 'ACCG']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes, edges, start = generate_de_bruijin_graph_alt(kmers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if start == None:\n",
+    "    nodes = list(nodes)\n",
+    "    start = nodes[0]\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ATC'"
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "start"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('ATC', 'TCG'),\n",
+       " ('TCG', 'CGT'),\n",
+       " ('CGT', 'GTT'),\n",
+       " ('GTT', 'TTG'),\n",
+       " ('TTG', 'TGC'),\n",
+       " ('TGC', 'GCG'),\n",
+       " ('GCG', 'CGC'),\n",
+       " ('CGC', 'GCG'),\n",
+       " ('GCG', 'CGA'),\n",
+       " ('CGA', 'GAC'),\n",
+       " ('GAC', 'ACC'),\n",
+       " ('ACC', 'CCG')]"
+      ]
+     },
+     "execution_count": 134,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "edges"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ACC',\n",
+       " 'ATC',\n",
+       " 'CCG',\n",
+       " 'CGA',\n",
+       " 'CGC',\n",
+       " 'CGT',\n",
+       " 'GAC',\n",
+       " 'GCG',\n",
+       " 'GTT',\n",
+       " 'TCG',\n",
+       " 'TGC',\n",
+       " 'TTG'}"
+      ]
+     },
+     "execution_count": 135,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'ATC'"
+      ]
+     },
+     "execution_count": 136,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "start"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph = make_node_edge_map(edges)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ATC': ['TCG'],\n",
+       " 'TCG': ['CGT'],\n",
+       " 'CGT': ['GTT'],\n",
+       " 'GTT': ['TTG'],\n",
+       " 'TTG': ['TGC'],\n",
+       " 'TGC': ['GCG'],\n",
+       " 'GCG': ['CGC', 'CGA'],\n",
+       " 'CGC': ['GCG'],\n",
+       " 'CGA': ['GAC'],\n",
+       " 'GAC': ['ACC'],\n",
+       " 'ACC': ['CCG']}"
+      ]
+     },
+     "execution_count": 138,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = traverse_graph(graph, start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ATC',\n",
+       " 'TCG',\n",
+       " 'CGT',\n",
+       " 'GTT',\n",
+       " 'TTG',\n",
+       " 'TGC',\n",
+       " 'GCG',\n",
+       " 'CGC',\n",
+       " 'GCG',\n",
+       " 'CGA',\n",
+       " 'GAC',\n",
+       " 'ACC',\n",
+       " 'CCG']"
+      ]
+     },
+     "execution_count": 140,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "path[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[['CGC', 'GCG']]"
+      ]
+     },
+     "execution_count": 141,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "path[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'eulerian_trail' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-19-61fa5403916e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0meulerian_trail\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m: name 'eulerian_trail' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "path = eulerian_trail(graph, start)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dict = {'Name': 'Zara', 'Age': 7}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n = dict.setdefault('Name', 'Tara')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Zara\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/assembler.py b/assembler.py
index ce7319b..10bd1f5 100644
--- a/assembler.py
+++ b/assembler.py
@@ -66,6 +66,51 @@ def generate_de_bruijin_graph(kmers):
     # Return the nodes, edges and the starting nodes in the graph.
     return nodes, edges, start_nodes
 
+def generate_de_bruijin_graph_alt(kmers):
+    
+    # to keep track of degree of nodes
+    node_degree = {}
+    
+    # Set of all nodes in the DB Graph
+    nodes = set()
+    
+    # Set of nodes having in-degrees
+    not_starts = set()
+    
+    # List of all directed edges in the graph
+    edges = []
+
+    for kmer in kmers:
+        
+        # From k-mers, get k-1mers
+        prefix = kmer[:-1]
+        suffix = kmer[1:]
+        
+        # Add k-1 mers to the nodes list
+        nodes.add(prefix)
+        nodes.add(suffix)
+
+        # Add an edge between the two k-1mers
+        edges.append((prefix, suffix))
+        
+        # fetching and updating degrees
+        p_count = node_degree.setdefault(prefix, 0)
+        node_degree[prefix] = p_count+1
+        s_count = node_degree.setdefault(suffix, 0)
+        node_degree[suffix] = s_count-1
+    
+    # reverse mapping to figure out start and stop nodes
+    degree_node = {v:k for k, v in node_degree.items() if v != 0}
+    
+    start = None
+    
+    # reverse mapping would be empty if there is not eulerian path, but only cycles
+    if len(degree_node) > 0:
+        start_stop = sorted(degree_node, reverse=True)
+        start = degree_node[start_stop[0]]
+        
+    # Return the nodes, edges and the starting nodes in the graph.
+    return nodes, edges, start
 
 def make_node_edge_map(edges):
 
@@ -77,16 +122,138 @@ def make_node_edge_map(edges):
         n = e[0]
         # If start node exists, add destination node to adjacency list
         if n in node_edge_map:
-            node_edge_map[n].append(e[1])
+            node_edge_map[n].add(e[1])
         # Add start node to map and initialize the adjacency list with the destination node
         else:
-            node_edge_map[n] = [e[1]]
+            adjacency_set = set() # using a set to support faster udpates
+            adjacency_set.add(e[1])
+            node_edge_map[n] = adjacency_set
+
     return node_edge_map
 
+def traverse_graph(graph, start):
+    # if there is no explicit start node
+    if len(start) == 0:
+        # pick any node as the start
+        start = list(graph.keys())[0]
+    else:
+        start = start[0]
 
-def traverse_graph(graph):
-    return []
+    # maintain a stack to store the nodes to visit
+    path = [start]
+
+    # accumulates the eulerian path
+    eulerian_path = []
+
+    # while stack is non-empty
+    while path:
+        # pick up the topmost node in the stack
+        curr_node = str(path[-1])
+
+        # if the current node is a key and has entries in its ajacency list
+        if curr_node in graph and graph[curr_node]:
+
+            # get the adjacency list
+            adj_nodes = graph[curr_node]
+            next_node = None
+
+            # if only one entry, proceed to that node
+            if len(adj_nodes) == 1:
+                next_node = adj_nodes.pop()
+
+            # if not, proceed to next node that would allow us to traverse
+            # the rest of the graph
+            else:
+                # iterate through all adj_nodes
+                for node in adj_nodes:
+                    # if the node leads to other nodes, set that as next
+                    if node in graph.keys():
+                        next_node = node
+                        break
+                # clear the edge from the current node
+                graph[curr_node].remove(next_node)
+            # update the path we want to explore
+            path.append(next_node)
+        else:
+            # if we can go any further, append the node to the path
+            eulerian_path.append(path.pop())
+    # reverse the accumulator as the path was populated in reverse
+    eulerian_path.reverse()
+
+    return eulerian_path
 
+def traverse_graph_alt(graph, start):
+    
+    # to collect all eulerian paths/cycles in a graph
+    all_trails = list()
+    
+    # the eularian tour of the entire graph
+    tour = []
+    tour.append(start)
+    
+    skip_trail = True
+    
+    while (True):
+        # start an eulerian trail
+        trail = []
+        
+        curr_node = start
+        
+        # traverse a trail until we can't go further
+        while (True):
+            
+            # terminate if can't go further
+            if curr_node not in graph:
+                break
+            
+            # pick a next node
+            next_node = graph[curr_node].pop()
+            
+            # if the adjacency list becomes emtpy for the current node, delete
+            if len(graph[curr_node]) == 0:
+                del graph[curr_node]
+            
+            # append next node to trail
+            trail.append(next_node)
+            
+            # if we circle back to start we have covered the trail
+            if next_node == start:
+                break;
+            
+            # if not move on to next node
+            curr_node = next_node
+        
+        # we skip adding the first trail as it would reflect in the tour
+        if skip_trail == False:
+            # after finishing a trail, add it to all tours
+            all_trails.append(list(trail))
+        
+        skip_trail = False
+        
+        # where to append the trail in the tour
+        append_at = tour.index(start)
+        
+        # introducing the trail inbetween the tour
+        tour = tour[:append_at+1] + trail + tour[append_at+1:]
+        
+        # done if no more nodes to explore
+        if len(graph) == 0:
+            break
+            
+        new_start_possible = False
+        
+        for node in tour:
+            if node in graph:
+                start = node
+                new_start_possible = True
+                break
+                
+        if not new_start_possible:
+            print("error, tour exploration terminated with remaining graph:")
+            print(graph)
+            break
+            
+    return tour, all_trails
 
 def align_to_reference(assembled_seq, args):
     return []
@@ -108,6 +275,14 @@ def align_to_reference(assembled_seq, args):
     for sequence in sequences:
         kmers = split_into_kmers(sequence, args.k, args.threshold)
         k1mers = split_into_kmers(sequence, args.k - 1, 0)
+        nodes, edges, start = generate_de_bruijin_graph_alt(kmers)
+        # start would be None if there is not eulerian path, but only cycles
+        if start == None:
+            nodes = list(nodes)
+            start = nodes[0]
+        graph = make_node_edge_map(edges)
+        tour, all_trails = traverse_graph(graph, start)
+    
     #     graph = generate_de_bruijin_graph(kmers, k1mers, args)  # save the graph for plotting
     #     assembled_seq = traverse_graph(graph)
     #     mismatches = align_to_reference(assembled_seq, args)  # save the mismatches for plotting