From 81a9ab85b91b0c530f9faeedfa4e7e0d68a5e8c2 Mon Sep 17 00:00:00 2001 From: Adam English Date: Thu, 3 Aug 2023 12:02:30 -0400 Subject: [PATCH] Increasing phab determinism using `pool.imap_unordered` for haplotype creation made non-deterministic fastas. this resulted in fake_mafft needing multiple, redundant lookup md5sum. Furthermore, mafft appears to be sensitive to input sequence order which causes different msa results. Switching to `pool.imap` may be slightly slower but still keeps the memory saving compared to `pool.map` while also making phab results deterministic --- docs/api/truvari.rst | 5 + .../fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa | 55 + .../fm_047c63d2654e7637150b4279117c89de.msa | 85 + .../fm_04f59078ccc583035a295f8ad3526dbf.msa | 115 + .../fm_05d37467dfbfe82df764ccf557e66822.msa | 75 + .../fm_06b42ffdbc8be646eb8718b5d6951dcc.msa | 195 + .../fm_0b1f7552f96468523661603cd4269fe6.msa | 95 + .../fm_0c9dec1fcbfa8df4c43412bac17355db.msa | 145 + .../fm_0dad6d58073d73f50143dbc812c12c48.msa | 280 ++ .../fm_0e637d93ff26bf168ee302bf13d84ca8.msa | 80 + .../fm_141ad6632a3577b19e9a4616ecdf6bbe.msa | 190 + .../fm_146844a1e2baa87f3462a854600507de.msa | 50 + .../fm_14df5b1a78900153927390d7bafee41c.msa | 180 + .../fm_16a9570a0bae9b86665a101c47ea2a7e.msa | 115 + .../fm_16e133fb197adcc9bd932887b742868b.msa | 45 + .../fm_19907622767ec08f4f2a547410d2ecfb.msa | 75 + .../fm_2009907a3d9c420147c4ae73f6df2941.msa | 120 + .../fm_205f920d471a6a80b754572645188116.msa | 65 + .../fm_21e1c5c85ccc2580c7576396ab027d39.msa | 130 + .../fm_2ad6d5e80b30a1cebe34e993b5700918.msa | 55 + .../fm_31498c5319740498de0c72700ff85d56.msa | 140 + .../fm_34829e853039a0b83260fb740af39cda.msa | 110 + .../fm_376415b15834e0e44f2cec357e45842b.msa | 295 ++ .../fm_37a09937c45cf5969ce12d15082abfbb.msa | 60 + .../fm_392581e26c6c6574757f793b72e768f0.msa | 100 + .../fm_3a4684a9cebe728f709cb7d7bf349ea7.msa | 55 + .../fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa | 185 + .../fm_4857d0eafa15c85843680e3bfaf22f43.msa | 165 + .../fm_4891d56deb85c16ff9f7722808d6cd91.msa | 90 + .../fm_4abdcd3bd686c6653b94c23219e870d1.msa | 170 + .../fm_4d5d51153a3589a4d6cd04e486f7c879.msa | 190 + .../fm_4dff1cfc21c902e0e7568be207ccbd40.msa | 100 + .../fm_56e2cc0318de6e4a5452f63745c5e250.msa | 75 + .../fm_5c074fb37f70d5e11502be5c009c13e2.msa | 105 + .../fm_652adf9a4e7824ea572dca71a0562a04.msa | 100 + .../fm_66a3a22d415ebc8e9c28636a422a27fb.msa | 65 + .../fm_67b17e44da59d4e4cbdbdfe00749d51f.msa | 240 + .../fm_67fd23689620e3340fe2a29e2ea77933.msa | 155 + .../fm_69a6cc9af825b6514565ccdd59336cd0.msa | 105 + .../fm_709f39dc3727958f3af64bf428a7aa8c.msa | 50 + .../fm_71fa13a9fc89d5f2aef29ccb10589a60.msa | 250 + .../fm_72f8e0bf8162cc30b5455b515c643479.msa | 75 + .../fm_73d49ee7e8516143213fa7125202aeab.msa | 50 + .../fm_76590aeb93d34a21c775c3a21a1e3bdf.msa | 60 + .../fm_79d4b9c2180cea7a5edfdeebbbf56834.msa | 385 ++ .../fm_7ef060f5be4ce0075857e188d7852082.msa | 4375 +++++++++++++++++ .../fm_7fd9ee27216345e87277b3eb261479c6.msa | 150 + .../fm_85aa1e76d010d424a529fed9eb355830.msa | 165 + .../fm_861c06fcaf8a2444ad0e8a092dbbb547.msa | 80 + .../fm_89449e11a504a87076c2d55f89a99cbd.msa | 85 + .../fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa | 140 + .../fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa | 180 + .../fm_8d91e35548435d3392527d244ebe8371.msa | 260 + .../fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa | 50 + .../fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa | 85 + .../fm_918ac60720a8b1e5a939e5ef71c3dd30.msa | 70 + .../fm_946b797ceaa32d6c1b711e085ad7903e.msa | 180 + .../fm_94d9fc221f90d8b60251faa405fdfde1.msa | 110 + .../fm_95ddd1fba647caa20dd06cdf75db78e8.msa | 475 ++ .../fm_9977644ffc467b937358e85a6ba26103.msa | 155 + .../fm_9be1536e40a58a7128ddc75d12940a3e.msa | 225 + .../fm_9f052ef8e75238e8dd45906b8a291987.msa | 205 + .../fm_a3b02bde8a503aab8bb0da7f33f98a9d.msa | 130 + .../fm_ae16a0bdeb31c91e26795edee8ad33fb.msa | 660 +++ .../fm_afe59e2c01695d7d2653b661248a80ef.msa | 50 + .../fm_b6f4c42dfe3dd25d3029d99aa8472281.msa | 105 + .../fm_b778319de388d363fb74713d18656b39.msa | 365 ++ .../fm_b8a80f7b3dfaf0a255d5fa6ea4cdadc6.msa | 90 + .../fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa | 125 + .../fm_bd5862045dd8751a6c83a3c3d6321d88.msa | 75 + .../fm_be4bf563ccc78c76f28205e487e0c322.msa | 70 + .../fm_c35059068940421b229c17e87e720ab8.msa | 145 + .../fm_c3e3f9602fee97132e713a8085d0122f.msa | 350 ++ .../fm_c7c98c4fca6fe269a9994162cd302ea0.msa | 170 + .../fm_cc4f07f5b329711758a6a183602ba892.msa | 75 + .../fm_d00295e693e29721bd316e2a98d6b49d.msa | 80 + .../fm_d27af66810a53f2070224212487e9431.msa | 345 ++ .../fm_d342db009b70a5841aa5b850478f78c4.msa | 30 + .../fm_dbde05d75f12d66b45769c5ffa50cc28.msa | 95 + .../fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa | 65 + .../fm_e0a1b90b66779bb5b8c7086145d6f100.msa | 40 + .../fm_e19b79eecb589d0faff3fd8eba62aeef.msa | 120 + .../fm_e42e2915a756c6b9535d501ffcc3f9b1.msa | 70 + .../fm_eee9610d6083d41536c3c93d53b55f73.msa | 90 + .../fm_ef7cdfd52679d33bb0bc51f849df45e1.msa | 225 + .../fm_fbbef252bd5883cf143b0f4c4649c512.msa | 195 + .../fm_ff308abc6d0e72bcf1670b2c17001984.msa | 95 + truvari/msatovcf.py | 2 +- truvari/phab.py | 3 +- 89 files changed, 16382 insertions(+), 3 deletions(-) create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_2009907a3d9c420147c4ae73f6df2941.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_2ad6d5e80b30a1cebe34e993b5700918.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_7ef060f5be4ce0075857e188d7852082.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_7fd9ee27216345e87277b3eb261479c6.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_a3b02bde8a503aab8bb0da7f33f98a9d.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_b778319de388d363fb74713d18656b39.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_b8a80f7b3dfaf0a255d5fa6ea4cdadc6.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_d342db009b70a5841aa5b850478f78c4.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa create mode 100644 repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa diff --git a/docs/api/truvari.rst b/docs/api/truvari.rst index 30333c35..ded7f385 100644 --- a/docs/api/truvari.rst +++ b/docs/api/truvari.rst @@ -223,6 +223,11 @@ BenchOutput .. autoclass:: BenchOutput :members: +StatsBox +^^^^^^^^ +.. autoclass:: StatsBox + :members: + GT ^^ .. autoclass:: GT diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa new file mode 100644 index 00000000..b67a5751 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa @@ -0,0 +1,55 @@ +>syndip_1_chr20:16395049-16395522 +acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt +gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa +ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag +gaggagaggggaggagaggggaggccaagggaggggaggggaggggaggagaggggaggc +caagggaggggaggggaggggaggagaggggaggccaagggaggggaggggaggggagga +gaggggaggccaagggaggggaggggaggagaggggaggccaagggaggggaggggaggg +gaggagaggggaggccaagggaggggaggggaggggaggagaggggaggccaagggagaa +gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc +atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg +gtattcaaa +>syndip_2_chr20:16395049-16395522 +acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt +gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtccaggcagaaa +ggagatgaggaaggggagaggaggggagaggaggggag---------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------gggaggggaggggagaggaggggaggccaagggagaa +gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc +atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg +gtattcaaa +>p:HG002_1_chr20:16395049-16395522 +acagtcatggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcattggt +gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtccaggcagaaa +ggagatgaggaaggggagaggaggggagaggaggggag---------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------gggaggggaggggagaggaggggaggccaagggagaa +gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc +atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg +gtattcaaa +>p:HG002_2_chr20:16395049-16395522 +acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt +gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa +ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag +gaggagaggggaggagaggggaggccaagggaggggaggggaggggaggagaggggaggc +caagggaggggaggggaggggaggagaggggaggccaagggaggggaggggaggggagga +gaggggaggccaagggaggggaggggaggagaggggaggccaagggaggggaggggaggg +gaggagaggggaggccaagggaggggaggggaggggaggagaggggaggccaagggagaa +gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc +atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg +gtattcaaa +>ref_chr20:16395049-16395522 +acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt +gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa +ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag +gaggagaggggaggagaggggaggccaagggaggggaggggaggggaggagaggggaggc +caa-----gggaggggaggggaggagaggggaggccaagggaggggaggggaggggagga +gaggggaggccaa----------------------------------------------- +-----------------------gggaggggaggggaggagaggggaggccaagggagaa +gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc +atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg +gtattcaaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa new file mode 100644 index 00000000..0cb1fc7f --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa @@ -0,0 +1,85 @@ +>syndip_1_chr20:62360090-62360813 +aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca +gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt +gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca +gatggatggaaacaaggaataaagggtgggtgctgcataggtaggggggtgggtgggtgg +atggatgggtagggggtgggtgggtggatacgttgggggtgggtgggtgggtgggtcagc +aggcagagggatgggataggataggtgggtgggtgggtggatagggagggggtggggtgg +gtgcataggtaggggggtggatgggtcagcaggcagcgggatgggataggataggtgggt +gggtggagggatagatgggtggtgggtgggtggagggatagatgggtgggtgggtgggta +gaaggataggtaggtgggtgggtgggtggatggataggtggatgggtgggtggaggaata +gatgggtgggtgggtgggtgggtggatggataggtgggtgggtgggtggagggatagatg +ggtgggtggatgggtgggtggagggatagatgggtggtgggtgggtggagggatagatgg +gtgggtgggtgggtagaaggataggtaggtgggtgggtgggtggatggataggtgggtgg +gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg +gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg +agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc +>syndip_2_chr20:62360090-62360813 +aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca +gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt +gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca +gatggatggaaacaaggaataaagggtgggtgctgcataggtaggggggtgggtgggtgg +atggatgggtagggggtgggtgggtggatacgttgggggtgggtgggtgggtgggtcagc +aggcaga----------------------------------------------------- +-------------------------------------gggatgggataggataggtgggt +gggtggagggatagatgggtggtgggtgggtggagggatagatgggtgggtggagggata +----------------------------------------gatgggtgggtgggtagaag +gataggtaggtgggtgggtgggtggatggataggtggatgggtgggtggaggaatagatg +ggtg-------------------------------------------------------- +---------------------------ggtgggtgggtgggtggatggataggtgggtgg +gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg +gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg +agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc +>p:HG002_1_chr20:62360090-62360813 +aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca +gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt +gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca +gatggatggaaacaaggaataaagggtgggtgctgcataggtaggggggtgggtgggtgg +atggatgggtagggggtgggtgggtggatacgttgggggtgggtgggtgggtgggtcagc +aggcaga----------------------------------------------------- +-------------------------------------gggatgggataggataggtgggt +gggtggagggatagatgggtggtgggtgggtggagggatagatgggtgggtggagggata +----------------------------------------gatgggtgggtgggtagaag +gataggtaggtgggtgggtgggtggatggataggtggatgggtgggtggaggaatagatg +ggtg-------------------------------------------------------- +---------------------------ggtgggtgggtgggtggatggataggtgggtgg +gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg +gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg +agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc +>p:HG002_2_chr20:62360090-62360813 +aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca +gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt +gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca +gatggatggaaacaaggaataaagggtgggtgctgcataggtaggggggtgggtgggtgg +atggatgggtagggggtgggtgggtggatacgttgggggtgggtgggtgggtgggtcagc +aggcagagggatgggataggataggtgggtgggtgggtggatagggagggggtggggtgg +gtgcataggtaggggggtggatgggtcagcaggcagcgggatgggataggataggtgggt +gggtggagggatagatgggtggtgggtgggtggagggatagatgggtgggtgggtgggta +gaaggataggtaggtgggtgggtgggtggatggataggtggatgggtgggtggaggaata +gatgggtgggtgggtgggtgggtggatggataggtgggtgggtgggtggagggatagatg +ggtgggtggatgggtgggtggagggatagatgggtggtgggtgggtggagggatagatgg +gtgggtgggtgggtagaaggataggtaggtgggtgggtgggtggatggataggtgggtgg +gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg +gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg +agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc +>ref_chr20:62360090-62360813 +aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca +gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt +gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca +gatggatggaaacaaggaataaagggtgggtgctgcataggtaggggggtgggtgggtgg +atggatgggta-ggggtgggtgggtggatacgttgggggtgggtgggtgggtgggtcagc +aggcaga----------------------------------------------------- +-------------------------------------gggatgggataggataggtgggt +gggtggagggatgggtg---agtgggtgggtggagggatagatgggtgg----------- +------------------------------------------tgggtgggtggagggata +gatgggt-ggtgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatg +ggt--------------------------------------------------------- +---------------------------ggtgggtgggtggagggatagatgggtggatgg +gtgggtagaaggataggtaggtgggtgggtgggtggatgggtgggtggagggatagatgg +gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg +agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa new file mode 100644 index 00000000..60256db1 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa @@ -0,0 +1,115 @@ +>syndip_1_chr20:21721074-21721836 +agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt +cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag +accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat +attatatatgtaatgtatataatatatgtaatgtatataatatatatgtaatacatatat +attatatatgtatataatacatatatatgtatatatatgtatgtatatatatatgtatat +atgtatatatatatgtatataatacatatatatacatatatattatatatgtaatacata +tataatatatatgtaatgtatatatatgtaatgtatataatatatatgtaatacatatat +aatatatatgtaatgtatataatatatatgtaatacatatataatatatatgtaatgtat +attatatatgtaatgtatataatatgtatgtaatacatatattatatatgtaatgtatat +tatatatgtattacatatatgtattacatatataacatatatgtattacatatatgtatt +acatatataacatatatgtattacatatatgtattacatatataacatatatgtattaca +tatatgtattacatatataacatatatgtaatacatatatgtattacatatataacatat +atgtattacatatatgtattacatatataacatatatgtattacatatatgtattacata +tataacatatatgtattacatatatgtattacatatataacatatatgtattacatatat +gtattacatatataacatatatgtattacatatatgtaatacatatataacatatatgta +ttacatatatgtaatacatatataacatatatgtattacatatatgtaatacatatataa +catatatgtattacatatatgtaatacatatataac----atgtattacatatatgtaat +acatatataac--------------------------atgtattacatatatgtaataca +tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat +atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc +tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc +ctcccacctcaacctcct +>syndip_2_chr20:21721074-21721836 +agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt +cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag +accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat +attatatatgtaatgtatataatatatgtaatgtatataatacatatgtaatacatatat +attatatatgtatataatacatatatatgtatatatatgtatgtatatatatatgtatat +atgtatatatatatgtatataatacatatatatacatatatattatatatgtaatacata +tataatatatatgtaatgtatatatatgtaatgtatataatatatatgtaatacatatat +aatatatatgtaatgtatataatatatatgtaatacatatataatatatatgtaatgtat +attatatatgtaatgtatataatatgtatgtaatacatatattatatatgtaatgtatat +tatatatgtattacatatatgtattacatatataacatatatgtattacatatatgtatt +acatatataacatatatgtattacatatatgtattacatatataacatatatgtattaca +tatatgtattacatatataacatatatgtattacatatatgtattacatatataacatat +atgtattacatatatgtattacatatataacatatatgtattacatatatgtattacata +tataacatatatgtattacatatatgtattacatatataacatatatgtattacatatat +gtattacatatataacatatatgtattacatatatgtattacatatataacatatatgta +ttacatatatgtattacatatataacatatatgtattacatatatgtattacatatataa +catatatgtattacatatatgtaatacatatataacatatatgtattacatatatgtaat +acatatataacatatatgtaatacatatataacatatatgtattacatatatgtaataca +tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat +atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc +tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc +ctcccacctcaacctcct +>p:HG002_1_chr20:21721074-21721836 +agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt +cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag +accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat +attatatatgtaatgtatataatatatgtaatgtatataatacatatgtaatacatatat +attatatatgtatataatacatatatatgtatatatatgtatgtatatatatatgtatat +atgtatatatatatgtatataatacatatatatacatatatattatatatgtaatacata +tataatatatatgtaatgtatatatatgtaatgtatataatatatatgtaatacatatat +aatatatatgtaatgtatataatatatatgtaatacatatataatatatatgtaatgtat +attatatatgtaatgtatataatatgtatgtaatacatatattatatatgtaatgtatat +tatatatgtattacatatatgtattacatatataacatatatgtattacatatatgtatt +acatatataacatatatgtattacatatatgtattacatatataacatatatgtattaca +tatatgtattacatatataacatatatgtattacatatatgtattacatatataacatat +atgtattacatatatgtattacatatataacatatatgtattacatatatgtattacata +tataacatatatgtattacatatatgtattacatatataacatatatgtattacatatat +gtattacatatataacatatatgtattacatatatgtattacatatataacatatatgta +ttacatatatgtattacatatataacatatatgtattacatatatgtattacatatataa +catatatgtattacatatatgtaatacatatataacatatatgtattacatatatgtaat +acatatataacatatatgtaatacatatataacatatatgtattacatatatgtaataca +tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat +atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc +tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc +ctcccacctcaacctcct +>p:HG002_2_chr20:21721074-21721836 +agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt +cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag +accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat +attatatatgtaatgtatataatatatgtaatgtatataatatatatgtaatacatatat +attatatatgtatataatacatatatatgtatatatatgtatgtatatatatatgtatat +atgtatatatatatgtatataatacatatatatacatatatattatatatgtaatacata +tataatatatatgtaatgtatatatatgtaatgtatataatatatatgtaatacatatat +aatatatatgtaatgtatataatatatatgtaatacatatataatatatatgtaatgtat +attatatatgtaatgtatataatatgtatgtaatacatatattatatatgtaatgtatat +tatatatgtattacatatatgtattacatatataacatatatgtattacatatatgtatt +acatatataacatatatgtattacatatatgtattacatatataacatatatgtattaca +tatatgtattacatatataacatatatgtaatacatatatgtattacatatataacatat +atgtattacatatatgtattacatatataacatatatgtattacatatatgtattacata +tataacatatatgtattacatatatgtattacatatataacatatatgtattacatatat +gtattacatatataacatatatgtattacatatatgtaatacatatataacatatatgta +ttacatatatgtaatacatatataacatatatgtattacatatatgtaatacatatataa +catatatgtattacatatatgtaatacatatataac----atgtattacatatatgtaat +acatatataac--------------------------atgtattacatatatgtaataca +tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat +atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc +tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc +ctcccacctcaacctcct +>ref_chr20:21721074-21721836 +agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt +cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag +accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat +attatatatgtaatgtatataatatatgtaatgtatataatatatatgtaatacatatat +attatatatgtatataatacatatatatgtatatatatgtatgtatatatatatgtatat +atgtatatatatatgtatataatacatatatatacatatatattatatatgtaatacata +tataatatatatgtaatgt----------------------------------------- +-atatatatgtaatgtatataatatatatgtaatacatatataatatatatgtaatgtat +attatatatgtaatgtatataatatgtatgtaatacatatattatatatgtaatgtatat +tatatatgtattacatatatgtattacatatataacatatatgtattacatatatgtatt +acatatataacatatatgtattacatatatgtaatacacatataatatatatgttatata +tatatatat--------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--atataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc +tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc +ctcccacctcaacctcct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa new file mode 100644 index 00000000..36b5fec8 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:50775444-50776124 +tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa +aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc +caaagatttcc--------------------atatatatatatatatatatataaatata +aatatattctatatattcatatatatatattctatatattcatatatataaatatattct +atatattcatatatataaatatattctatatattcatatataaatatattccatatatat +aaatatattccatatatttatatataaatatatatcatatatttatatataaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatcatatattta +tatataaatatatatcatatatttatatataaatatatatcatatatttatatataatat +atatcatatattatatataaatatatatcatatatttatatataaatatatatcatatat +taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt +tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt +gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa +tt +>syndip_2_chr20:50775444-50776124 +tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa +aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc +caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata +aatatattctatatattcatatatatatattctatatattcatatatataaatatattct +atatattcatatatataaatatattctatatattcatatataaatatattccatatatat +aaatatattc-------------------------catatatttatatataaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatcatatattta +tatataaatatatatcatatatttatatataaatatatatcatatatttatatataatat +atatcatatattatatataaatatatatcatatatttatatataaatatatatcatatat +taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt +tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt +gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa +tt +>p:HG002_1_chr20:50775444-50776124 +tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa +aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc +caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata +aatatattctatatattcatatatatatattctatatattcatatatataaatatattct +atatattcatatatataaatatattctatatattcatatataaatatattccatatatat +aaatatattc-------------------------catatatttatatataaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatcatatattta +tatataaatatatatcatatatttatatataaatatatatcatatatttatatataatat +atatcatatattatatataaatatatatcatatatttatatataaatatatatcatatat +taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt +tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt +gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa +tt +>p:HG002_2_chr20:50775444-50776124 +tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa +aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc +caaagatttcc--------------------atatatatatatatatatatataaatata +aatatattctatatattcatatatatatattctatatattcatatatataaatatattct +atatattcatatatataaatatattctatatattcatatataaatatattccatatatat +aaatatattccatatatttatatataaatatatatcatatatttatatataaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatcatatattta +tatataaatatatatcatatatttatatataaatatatatcatatatttatatataatat +atatcatatattatatataaatatatatcatatatttatatataaatatatatcatatat +taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt +tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt +gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa +tt +>ref_chr20:50775444-50776124 +tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa +aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc +caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata +aatatattctatatattcatatat---------------------------atatattct +atatattcatatatataaatatattctatatattcatatataaatatattccatatatat +aaatatattc-------------------------catatatttatatataaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatcatatattta +tatat-aatatatatcatatat-------------------------------------- +----------ttatatataaatatatatcatatatttatatataaatatatatcatatat +taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat +catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt +tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt +gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa +tt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa new file mode 100644 index 00000000..50ae4b6d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa @@ -0,0 +1,195 @@ +>syndip_1_chr20:61100692-61102788 +ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca +gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac +tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc +ctgtgccacatctggagagacccctgtgagggcagagagctacctcctcttcttcctgtg +tcacacctggagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcac +acctggagagaccccgagtgagggcagatggccacatcctctacttcctgtgtcacacct +ggggagaccctgagtgagggcagacagccatgtcctctacgtcctgtgccacatccggag +agaccccaagagagggcagacagccacctcctctacttcctgtgtcacacctggagagac +cccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggagagaccccg +agtgagggcagatggccacatcctctacttcctgtgtcacacct---------------- +------------------------------------------------------------ +------------------------------------ggggagaccctgagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccaagagagggcagacagc +cacctcctctacttcctgtgtcacacctggagagactccaagtgaggagagacggccacg +tcttctacttcctgtgtcacacctggagagaccccgagtgagggtggatggccacatcct +ctaattcctgtgtcatacctggagagaccccgagtgagggcagatggccacctcctctac +ttcctgtgtcacacctggagagaccccgagtgagggcagactgccacctcctctacttcc +tgtgtcacacctagagagaccccgagtgaggacagccagccatgtcctctacttcctgtg +ccacatccggagagatcccgagtgagggcagacagccatgtcctctacttcctgtgtcac +acctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct +ggagagaccccgagtgagggcagacggccacctcctctacttcctgtgtcacacctggag +agaccccgagtgagggcagacggccacatcctctaattcctgtgtcatgcctggagagac +cccgagtgagggcagatggccacatcctctaattcctatgtcacacctggagagaccccg +agtgagggtggatggccacatcctctaattcctgtgtcatacctggagagaccccgagtg +agggcagatggccacctcctctacttcctgtgtcacacctggagagaccccgagtgaggg +cagatggccacctcctctacttcctgtgtcacacctggagagaccccaagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccga-tgagggcagacagc +taccccctctacttcctgtgtcacacctgcagagaccccaagtgagggcagatggccacc +tcttctacttcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcct +ctacttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctat +ttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctaattcc +tgtgtcacacatggagagaccccgagtgagggcagaaggccacatcctctacttcctgtg +tcacacctagagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcac +acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct +ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag +acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg +tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta +aaaatgttatcaatttaataacattgatggttacagagctgtgtaa +>syndip_2_chr20:61100692-61102788 +ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca +gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac +tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc +ctgtgccacatctggagagacccctgtgagggcagagagctacctcctcttcttcctgtg +tcacacctggagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcac +acctggagagaccccgagtgagggcagatggccacatcctctacttcctgtgtcacacct +ggggagaccctgagtgagggcagacagccatgtcctctacgtcctgtgccacatccggag +agaccccaagagagggcagacagccacctcctctacttcctgtgtcacacctggagagac +tccaagtgaggagagacggccacgtcttctacttcctgtgtcacacctggagagacctcg +agtgagggcagatggccacatcctctaattcctgtgtcacacctggagaaaccccaagtg +agggcagactgccacctcctctacttcctgtgtcacacctagagagaccccgagtgagga +cagccagccatgtcctctacttcctgtgccacatccggagagatcccgagtgagggcaga +cagccatgtcctctacttcctgtgtcacacctggagagaccccgagtgagggcagacagc +cacatcctctaattcctgtgtcacacctggagagaccccgagtgagggcagacggctaca +tcctctaattcctgtgtcatgcctggagagaccccgagtgagggcagatggccacatcct +ctaattcctatgtcacacctggagagaccccgagtgagggtggatggccacatcctctaa +ttcctgtgtcatacctggagagaccccgagtgagggcagatggccacctcctctacttcc +tgtgtcacacctggagagaccccaagtgagggcagacagccatgtcctctacgtcctgtg +ccacatccggagagaccccga-tgagggcagacagctaccccctctacttcctgtgtcac +acctgcagagaccccaagtgagggcagatggccacctcttctacttcctgtgtcacacct +ggagaggccccgagtgagggcagacggccacctcctctacttcctgtgtcacacctggag +agaccccgagtgagggcagacggccacatcctctatttcctgtgtcacacctggagagac +cccgagtgagggcagacggccacatcctctaattcctgtgtcacacatggagagaccccg +agtgaaggcagaaggccacatcctctacttcctgtgtcacacctagagagaccccgagtg +agggcggatggccacatcctctaattcctatgtcacacctggagagaccccgagtgaggg +tggatggccacatcctctaattcctgtgtcatacctggagagaccccgagtgagggcaga +tggccacctcctctacttcctgtgtcacacctggagagaccccaagtgagggcagacagc +catgtcctctacgtcctgtgccacatccggagagaccccga-tgagggcagacagctacc +ccctctacttcctgtgtcacacctgcagagaccccaagtgagggcagatggccacctctt +ctacttcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctac +ttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctaattcc +tgtgtcacacatggagagaccctgagtgagggcagaaggccacatcctctacttcctgtg +tcacacctagagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcac +acctggggagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct +ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag +acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg +tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta +aaaatgttatcaatttaataacattgatggttacagagctgtgtaa +>p:HG002_1_chr20:61100692-61102788 +ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca +gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac +tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc +ctgtgccacatctggagagacccctgtgagggcagagagctacctcctcttcttcctgtg +tcacacctggagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcac +acctggagagaccccgagtgagggcagatggccacatcctctacttcctgtgtcacacct +ggggagaccctgagtgagggcagacagccatgtcctctacgtcctgtgccacatccggag +agaccccaagagagggcagacagccacctcctctacttcctgtgtcacacctggagagac +tccaagtgaggagagacggccacgtcttctacttcctgtgtcacacctggagagacctcg +agtgagggcagatggccacatcctctaattcctgtgtcacacctggagaaaccccaagtg +agggcagactgccacctcctctacttcctgtgtcacacctagagagaccccgagtgagga +cagccagccatgtcctctacttcctgtgccacatccggagagatcccgagtgagggcaga +cagccatgtcctctacttcctgtgtcacacctggagagaccccgagtgagggcagacagc +cacatcctctaattcctgtgtcacacctggagagaccccgagtgagggcagacggctaca +tcctctaattcctgtgtcatgcctggagagaccccgagtgagggcagatggccacatcct +ctaattcctatgtcacacctggagagaccccgagtgagggtggatggccacatcctctaa +ttcctgtgtcatacctggagagaccccgagtgagggcagatggccacctcctctacttcc +tgtgtcacacctggagagaccccaagtgagggcagacagccatgtcctctacgtcctgtg +ccacatccggagagaccccga-tgagggcagacagctaccccctctacttcctgtgtcac +acctgcagagaccccaagtgagggcagatggccacctcttctacttcctgtgtcacacct +ggagaggccccgagtgagggcagacggccacctcctctacttcctgtgtcacacctggag +agaccccgagtgagggcagacggccacatcctctatttcctgtgtcacacctggagagac +cccgagtgagggcagacggccacatcctctaattcctgtgtcacacatggagagaccccg +agtgaaggcagaaggccacatcctctacttcctgtgtcacacctagagagaccccgagtg +agggcggatggccacatcctctaattcctatgtcacacctggagagaccccgagtgaggg +tggatggccacatcctctaattcctgtgtcatacctggagagaccccgagtgagggcaga +tggccacctcctctacttcctgtgtcacacctggagagaccccaagtgagggcagacagc +catgtcctctacgtcctgtgccacatccggagagaccccga-tgagggcagacagctacc +ccctctacttcctgtgtcacacctgcagagaccccaagtgagggcagatggccacctctt +ctacttcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctac +ttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctaattcc +tgtgtcacacatggagagaccctgagtgagggcagaaggccacatcctctacttcctgtg +tcacacctagagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcac +acctggggagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct +ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag +acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg +tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta +aaaatgttatcaatttaataacattgatggttacagagctgtgtaa +>p:HG002_2_chr20:61100692-61102788 +ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca +gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac +tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc +ctgtgccacatctggagagacccctgtgagggcagagagctacctcctcttcttcctgtg +tcacacctggagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcac +acctggagagaccccgagtgagggcagatggccacatcctctacttcctgtgtcacacct +ggggagaccctgagtgagggcagacagccatgtcctctacgtcctgtgccacatccggag +agaccccaagagagggcagacagccacctcctctacttcctgtgtcacacctggagagac +cccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggagagaccccg +agtgagggcagatggccacatcctctacttcctgtgtcacacct---------------- +------------------------------------------------------------ +------------------------------------ggggagaccctgagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccaagagagggcagacagc +cacctcctctacttcctgtgtcacacctggagagactccaagtgaggagagacggccacg +tcttctacttcctgtgtcacacctggagagaccccgagtgagggtggatggccacatcct +ctaattcctgtgtcatacctggagagaccccgagtgagggcagatggccacctcctctac +ttcctgtgtcacacctggagagaccccgagtgagggcagactgccacctcctctacttcc +tgtgtcacacctagagagaccccgagtgaggacagccagccatgtcctctacttcctgtg +ccacatccggagagatcccgagtgagggcagacagccatgtcctctacttcctgtgtcac +acctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct +ggagagaccccgagtgagggcagacggccacctcctctacttcctgtgtcacacctggag +agaccccgagtgagggcagacggccacatcctctaattcctgtgtcatgcctggagagac +cccgagtgagggcagatggccacatcctctaattcctatgtcacacctggagagaccccg +agtgagggtggatggccacatcctctaattcctgtgtcatacctggagagaccccgagtg +agggcagatggccacctcctctacttcctgtgtcacacctggagagaccccgagtgaggg +cagatggccacctcctctacttcctgtgtcacacctggagagaccccaagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccga-tgagggcagacagc +taccccctctacttcctgtgtcacacctgcagagaccccaagtgagggcagatggccacc +tcttctacttcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcct +ctacttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctat +ttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctaattcc +tgtgtcacacatggagagaccccgagtgagggcagaaggccacatcctctacttcctgtg +tcacacctagagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcac +acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct +ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag +acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg +tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta +aaaatgttatcaatttaataacattgatggttacagagctgtgtaa +>ref_chr20:61100692-61102788 +ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca +gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac +tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc +ctgtgccacatctggagagacccctgtgagggcagagagctacctcctcttcttcctgtg +tcacacctggagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcac +acctggagagaccccgagtgagggcagatggccacatcctctacttcctgtgtcacacct +ggggagaccctgagtgagggcagacagccatgtcctctacgtcctgtgccacatccggag +agaccccaagagagggcagacagccacctcctctacttcctgtgtcacacctggagagac +cccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggagagaccccg +agtgagggcagatggccacatcctctacttcctgtgtcacacct---------------- +------------------------------------------------------------ +------------------------------------ggggagaccctgagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccaagagagggcagacagc +cacctcctctacttcctgtgtcacacctggagagactccaagtgaggagagacggccaca +tcttctacttcctgtgtcacacctggagagacctcgagtgagggcagatggccacatcct +ctaa-------------------------------------------------------- +ttcctgtgtcacacctggagagaccccaagtgagggcagactgccacctcctctacttcc +tgtgtcacacctagagagaccccgagtgaggacagccagccatgtcctctacttcctgtg +ccacatccggagagatcccgagtgagggcagacagccatgtcctctacttcctgtgtcac +acctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct +ggagagaccccgagtgagggcagacggccacctcctctacttcctgtgtcacacctggag +agaccccgagtgagggcagacggccacatcctctaattcctgtgtcatgcctggagagac +cccgagtgagggcatatggccacatcctctaattcctatgtcacacctggagagaccccg +agtgagggtggatggccacatcctctaattcctgtgtcatacctggagagaccccgagtg +agggcagatggccacctcctctacttcctgtgtcacacctggagagaccccgagtgaggg +cagatggccacctcctctacttcctgtgtcacacctggagagaccccaagtgagggcaga +cagccatgtcctctacgtcctgtgccacatccggagagaccccga-tgagggcagacagc +taccccctctacttcctgtgtcacacctgcagagaccccaagtgagggcagatggccacc +tcttctacttcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcct +ctacttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctat +ttcctgtgtcacacctggagagaccccgagtgagggcagacggccacatcctctaattcc +tgtgtcacacatggagagaccccgagtgagggcagaaggccacatcctctacttcctgtg +tcacacctagagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcac +acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct +ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag +acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg +tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta +aaaatgttatcaatttaataacattgatggttacagagctgtgtaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa new file mode 100644 index 00000000..23695bd4 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa @@ -0,0 +1,95 @@ +>syndip_1_chr20:5039849-5040600 +agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt +gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg +tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat +ttggatatggaagagtgctgagaaggtgtggtggctcatgcctataatcctagtactttg +ggaggctgaggcaggaggatcacttaagtccaggagtttgagagcagcctgggcaacaaa +gtgagaccttgtctctacaaaaaataaacaaaattagctggtggtggcacctgcctgttg +tcccagctactcagggggctgaggtgggaggatggcttgagcctgggagattgaagcttc +agttagccatgatcatgccactgcgctccagcctgggtgacagagagagatcctgcctca +aaaaaaaaaataaaaataaaaaagaggggccgggtgtggtggctcaggcctgtaatctta +gcactctttgggaggctgagactggaggatcctttgagcccaggggtttgagaccagcct +gggcaacatagggagaacttgtctctattt------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct +caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca +acccggattagcctttt +>syndip_2_chr20:5039849-5040600 +agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt +gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg +tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat +ttggatatggaagagtgctgagaaggtgtggtggctcatgcctataatcctagtactttg +ggaggctgaggcaggaggatcacttaagtccaggagtttgagagcagcctgggcaacaaa +gtgagaccttgtctctacaaaaaataaacaaaattagctggtggtggcacctgcctgttg +tcccagctactcagggggctgaggtgggaggatggcttgagcctgggagattgaagcttc +agttagccatgatcatgccactgcgctccagcctgggtgacagagagagatcctgcctc- +aaaaaaaaaataaaaataaaaaagaggggccgggtgtggtggctcaggcctgtaatctta +gcactctttgggaggctgagactggaggatcctttgagcccaggggtttgagaccagcct +gggcaacatagggagaacttgtctctattt------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct +caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca +acccggattagcctttt +>p:HG002_1_chr20:5039849-5040600 +agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt +gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg +tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat +ttggatatggaagagtgctgagaaggtgtggtggctcatgcctataatcctagtactttg +ggaggctgaggcaggaggatcacttaagtccaggagtttgagagcagcctgggcaacaaa +gtgagaccttgtctctacaaaaaataaacaaaattagctggtggtggcacctgcctgttg +tcccagctactcagggggctgaggtgggaggatggcttgagcctgggagattgaagcttc +agttagccatgatcatgccactgcgctccagcctgggtgacagagagagatcctgcctc- +aaaaaaaaaataaaaataaaaaagaggggccgggtgtggtggctcaggcctgtaatctta +gcactctttgggaggctgagactggaggatcttttgagcccaggggtttgagaccagcct +gggcaacatagggagaacttgtctctatttaaaaaaaaaggccgggcgcggtggctcacg +cctgtaatcccagcactttgggaggccgaggcgggtggatcatgaggtcaggagatcgag +accatcctggctaaaacggtgaaaccccgtctctactaaaaatacaaaaaattagccggg +cgcggtggcgggcgcctaggcaggagaatggcgtgaacccgggaagcggagcttgcagtg +agccgagattgcgccactgcagtccgcagtccgacctgggcgacagagagagactccgtc +tcaaaaaaaaaaaaaaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct +caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca +acccggattagcctttt +>p:HG002_2_chr20:5039849-5040600 +agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt +gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg +tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat +ttggatatggaagagtgctgagaaggtgtggtggctcatgcctataatcctagtactttg +ggaggctgaggcaggaggatcacttaagtccaggagtttgagagcagcctgggcaacaaa +gtgagaccttgtctctacaaaaaataaacaaaattagctggtggtggcacctgcctgttg +tcccagctactcagggggctgaggtgggaggatggcttgagcctgggagattgaagcttc +agttagccatgatcatgccactgcgctccagcctgggtgacagagagagatcctgcctca +aaaaaaaaaataaaaataaaaaagaggggccgggtgtggtggctcaggcctgtaatctta +gcactctttgggaggctgagactggaggatcttttgagcccaggggtttgagaccagcct +gggcaacatagggagaacttgtctctattttaaaaaaaaggccgggcgcggtggctcacg +cctgtaatcccagcactttgggaggccgaggcgggtggatcatgaggtcaggagatcgag +accatcctggctaaaacggtgaaaccccgtctctactaaaaatacaaaaaattagccggg +cgcggtggcgggcgcctaggcaggagaatggcgtgaacccgggaagcggagcttgcagtg +agccgagattgcgccactgcagtccgcagtccgacctgggcgacagagagagactccgtc +tc---aaaaaaaaaaaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct +caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca +acccggattagcctttt +>ref_chr20:5039849-5040600 +agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt +gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg +tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat +ttggatatggaagagtgctgagaaggtgtggtggctcatgcctataatcctagtactttg +ggaggctgaggcaggaggatcacttaagtccaggagtttgagagcagcctgggcaacaaa +gtgagaccttgtctctacaaaaaataaacaaaattagctggtggtggcacctgcctgttg +tcccagctactcagggggctgaggtgggaggatggcttgagcctgggagattgaagcttc +agttagccatgatcatgccactgcgctccagcctgggtgacagagagagatcctgcctc- +aaaaaaaaaataaaaataaaaaagaggggccgggtgtggtggctcaggcctgtaatctta +gcactctttgggaggctgagactggaggatcctttgagcccaggggtttgagaccagcct +gggcaacatagggagaacttgtctctattt------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct +caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca +acccggattagcctttt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa new file mode 100644 index 00000000..b856638b --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa @@ -0,0 +1,145 @@ +>syndip_1_chr20:61282406-61283770 +ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca +gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa +tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta +cttgtacctgcacacacaactggatgagcatggtgtgtgtgtacacatgtattgtgttca +tatgtacatatgtgggtgccttgtgcacacgtgtgtggctgtgggatgttcagatgtgca +tgtgagttggtgtgtgtgcatgcccacaagcatgcatgtaaatgtgatgtgctgtgtatg +cacacggataagcatgtgtgaaggagtgcatgagagtggtgcatggctgtggtgtgcacg +tgtgtgcgtgtgatgcatgtgcatttgcacgcatgtgctgtggtgtgtgtgatgtacgtg +catttgcacgcatgtgctgtggcgtgtgtgatgtacgtgcatttgcacgcgtgtgctgtg +tgtgtgatgtacgtgcatttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgt-------------- +-----------------------gtgctgtggcgtgtgtgatgtacgtgcgtttgcacgc +gtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtg----------- +-----gtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtg +cgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtg +gcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgt +ttgcacgcgtgttctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcg +tgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgt +gtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcac +gcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtg +atgtgtgtgcgtttgcacgcgtgtgc---------------------------------- +-------------------------------------------------tgtgtgatgtg +tgtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg +tggtgtgtgatgtaggtgcatttgcacgcgtgtgctgtggtgtgtgtgatgtgtgtgcgt +ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc +tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc +atttctttgtgatgggtcattagaagcaggattctgaatt +>syndip_2_chr20:61282406-61283770 +ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca +gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa +tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta +cttgtacctgcacacacaactggatgagcatggtgtgtgtgtacacatgtaatgtgttca +tatgtacatatgtgggtgccttgtgcacacgtgtgtggctgtgggatgttcagatgtgca +tgtgagttggtgtgtgtgcatgcccacaagcatgcatgtaaatgtgatgtgctgtgtatg +cacacggataagcatgtgtgaaggagtgcatgagagtggtgcatggctgtggtgtgcacg +tgtgtgcgtgtgatgcatgtgcatttgcacgcatgtgctgtggtgtgtgtgatgtacgtg +catttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcatttgcacgcgtgtgctgtg +tgtgtgatgtacgtgcatttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgt +gatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgc +gcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgat +gtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtg +cgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtg +gcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgt +ttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcg +tgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttg +cacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgt +gtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcac +gcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtg +atgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcg +tgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggtgtgtgatgta +ggtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcgtttgcacgcgtgtgctg +tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat +ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc +tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc +atttctttgtgatgggtcattagaagcaggattctgaatt +>p:HG002_1_chr20:61282406-61283770 +ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca +gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa +tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta +cttgtacctgcacacacaactggatgagcatggtgtgtgtgtacacatgtaatgtgttca +tatgtacatatgtgggtgccttgtgcacacgtgtgtggctgtgggatgtttagatgtgca +tgtgagttggtgtgtgtgcatgcccacaagcatgcatgtaaatgtgatgtgctgtgtatg +cacacggataagcatgtgtgaaggagtgcatgagagtggtgcatggctgtggtgtgcacg +tgtgtgcgtgtgatgcatgtgcatttgcacgcatgtgctgtggtgtgtgtgatgtacgtg +catttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcatttgcacgcgtgtgctgtg +tgtgtgatgtacgtgcatttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgt +gatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgc +gcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgat +gtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtg +cgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtg +gcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgt +ttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcg +tgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttg +cacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgt +gtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcac +gcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtg +atgtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcg +tgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggtgtgtgatgta +ggtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcgtttgcacgcgtgtgctg +tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat +ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc +tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc +atttctttgtgatgggtcattagaagcaggattctgaatt +>p:HG002_2_chr20:61282406-61283770 +ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca +gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa +tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta +cttgtacctgcacacacaactggatgagcatggtgtgtgtgtacacatgtattgtgttca +tatgtacatatgtgggtgccttgtgcacacgtgtgtggctgtgggatgttcggatgtgca +tgtgagttggtgtgtgtgcatgcccacaagcatgcatgtaaatgtgatgtgctgtgtatg +cacacggataagcatgtgtgaaggagtgcatgagagtggtgcatggctgtggtgtgcacg +tgtgtgcgtgtgatgcatgtgcatttgcacgcatgtgctgtggtgtgtgtgatgtacgtg +catttgcacgcatgtgctgtggcgtgtgtgatgtacgtgcatttgcacgcgtgtgctgtg +tgtgtgatgtacgtgcatttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgt-------------- +-----------------------gtgctgtggcgtgtgtgatgtacgtgcgtttgcacgc +gtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtg----------- +-----gtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtg +cgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtg +gcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcgt +ttgcacgcgtgttctgtggcgtgtgtgatgtacgtgcgtttgcacgcgtgtgctgtggcg +tgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttg +cacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgt +gtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcac +gcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggcgtgtgtg +atgtgtgtgcgtttgcacgcgtgtgc---------------------------------- +-------------------------------------------------tgtgtgatgtg +tgtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg +tggtgtgtgatgtaggtgcatttgcacgcgtgtgctgtggtgtgtgtgatgtgtgtgcgt +ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc +tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc +atttctttgtgatgggtcattagaagcaggattctgaatt +>ref_chr20:61282406-61283770 +ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca +gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa +tctttggcatggtgtgtgtgcatgtgtgtgtgtgtgtgtgtgtgtgtatgtctatctgta +cttgtacctgcacacacaactggatgagcatggtgtgtgtgtacacatgtaatgtgttca +tatgtacatatgtgggtgccttgtgcacacgtgtgtggctgtgggatgttcagatgtgca +tgtgagttggtgtgtgtgcatgcccacaagcatgcatgtaaatgtgatgtgctgtgtatg +cacacggataagcatgtgtgaaggagtgcatgagagtggtgcatggctgtggtgtgcacg +tgtgtgcgtgtgatgcatgtgcatttgcacgcatgtgctgtggtgtgtgtgatgtacgtg +catttgcacgcgtgtgctgtggcgtgtgtgatgtacgtgcatttgcacgcgtgtgctgtg +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------gcgtgtgatgtacgtgcgtttgcacgcgcgtgctgtg----------- +-----gcgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtg +cgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtg +gcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgt +ttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcg +tgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttg +cacgcgcgtgctgtggcgtgtgtgatgtacgtgcgtttgcacgcgcgtgctgtggc---- +-----gtgtgtgcgtttgcacgcgcgtgctgtggcgtgtgtgatgtgtgtgcgtttgcac +gcgtgtgctgtggcgtgtgtgatgtgtgtgcgtttgcacgcgtgtgctgtggtgtgtgtg +atgtgtgtgcgtttgcacgcgtgtgc---------------------------------- +--------------------------------------------tgtggtgtgtgatgta +ggtgcatttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg +tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat +ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc +tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc +atttctttgtgatgggtcattagaagcaggattctgaatt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa new file mode 100644 index 00000000..db18f85a --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa @@ -0,0 +1,280 @@ +>syndip_1_chr20:64125009-64128075 +attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt +ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc +tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt +ttttttttgttttgtttttttagtagcagtgctgggattacaggtgtcagccaccatgcc +tggctcaaatactacactgtcttgattactacagctttatactcaaatactacaccatct +tgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatactc +aaataccacaccgtcttgattaccacagctttatactcaaataccacaccgtcttgatta +ccaaagctttata-------------------------------ctcaaataccacactg +tcttgattaccacagctttatactcaaataacacaccgtcttgattaccacagctttata +ctcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtcttga +ttaccacagctttata-------------------------------ctcaaataccaca +ctgtcttgattcctatagctttatactcaaataccacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaatactacactgtct +tgattactatagctttatactcaaatactacactgtctgattactacagctttatactca +aataccaccctgtcttgattaccacagctttatactcaaataccacaccgtcttgattac +cacagctgtatactcaaatact------------------------------------ac +accgtcttgattaccacagctgtatactcaaatactacactgtcttgattaccacagctt +tatactcaaataccacactgtcttgattaccaccgctttatactcaaataccacactgtc +ttgattactatagctttatactcaaatactacactgtctgattactacagctttatactc +aaataccacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaatactacactgtctgattactacagctttatactcaaatacc +acactatcttgattactacagctttatactcaaatactacactgtctgattactacagct +ttatactcaaataccacaccgtcttgattaccacagctttatactcaaatactacactgt +cttgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatac +tcaaatactacaccgtcttgattaccacagctttatactcaaatactacaccgtcttgat +taccacagctttatactcaaataccacactgtcttgattactatagctttatactcaaat +actacactgtctgattactacagctttatactcaaataccacactgtcttgattaccaca +gctttatactcaaatactacactgtctgattactacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaatactacactgtcttgattaccacagctttat +actcaaataccacaccgtcttgattaccacagctgtatactcaaataccacactgtcttg +attaccacagctgtatactcaaataccacactgtcttgattaccacagctttatactcaa +atacc------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acactgtcttgattaccacagctttatac +gcaaataccacactgtcttgattactatagctttatactcaaataccacactgtcttgat +tactacagctttatactcaaataccacactgtcttgattactacagctttatactcaaat +actacactgtcttgattaacacagctttatactcaaatactacactgtcttgattaccac +agctttatactcaaatactacactgtcttgattactatagctttatactcaaataccaca +ctgtcttgattactatagctttatactcaaatactacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaataccacactgtct +tgattaccac-------------------------------------------------- +------------------------------------------------------------ +----agctttatactcaaataccacactgtcttgattaccacagctttatactcaaatac +tacactgtcttgattactac---------------------------------------- +------------------------------------agctttatactcaaatacc----- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------acactgtcttgattact +atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga +ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt +ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct +tgc +>syndip_2_chr20:64125009-64128075 +attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt +ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc +tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt +ttttttttgttttgtttttttagtagcagtgctgggattacaggtgtcagccaccatgcc +tggctcaaatactacactgtcttgattactacagctttatactcaaatactacaccatct +tgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatactc +aaataccacaccgtcttgattaccacagctttatactcaaataccacaccgtcttgatta +ccaaagctttata-------------------------------ctcaaataccacactg +tcttgattaccacagctttatactcaaataacacaccgtcttgattaccacagctttata +ctcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtcttga +ttaccacagctttata-------------------------------ctcaaataccaca +ctgtcttgattcctatagctttatactcaaataccacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaatactacactgtct +tgattactatagctttatactcaaatactacactgtctgattactacagctttatactca +aataccaccctgtcttgattaccacagctttatactcaaataccacaccgtcttgattac +cacagctgtatactcaaatact------------------------------------ac +accgtcttgattaccacagctgtatactcaaatactacactgtcttgattaccacagctt +tatactcaaataccacactgtcttgattaccaccgctttatactcaaataccacactgtc +ttgattactatagctttatactcaaatactacactgtctgattactacagctttatactc +aaataccacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaatactacactgtctgattactacagctttatactcaaatacc +acactatcttgattactacagctttatactcaaatactacactgtctgattactacagct +ttatactcaaataccacaccgtcttgattaccacagctttatactcaaatactacactgt +cttgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatac +tcaaatactacaccgtcttgattaccacagctttatactcaaatactacaccgtcttgat +taccacagctttatactcaaataccacactgtcttgattactatagctttatactcaaat +actacactgtctgattactacagctttatactcaaataccacactgtcttgattaccaca +gctttatactcaaatactacactgtctgattactacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaatactacactgtcttgattaccacagctttat +actcaaataccacaccgtcttgattaccacagctgtatactcaaataccacactgtcttg +attaccacagctgtatactcaaataccacactgtcttgattaccacagctttatactcaa +atacc------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acactgtcttgattaccacagctttatac +gcaaataccacactgtcttgattactatagctttatactcaaataccacactgtcttgat +tactacagctttatactcaaataccacactgtcttgattactacagctttatactcaaat +actacactgtcttgattaacacagctttatactcaaatactacactgtcttgattaccac +agctttatactcaaatactacactgtcttgattactatagctttatactcaaataccaca +ctgtcttgattactatagctttatactcaaatactacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaataccacactgtct +tgattaccac-------------------------------------------------- +------------------------------------------------------------ +----agctttatactcaaataccacactgtcttgattaccacagctttatactcaaatac +cacactgtcttgattactac---------------------------------------- +------------------------------------agctttatactcaaatacc----- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------acactgtcttgattact +atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga +ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt +ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct +tgc +>p:HG002_1_chr20:64125009-64128075 +attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt +ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc +tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt +ttttttttgttttgtttttttagtagcagtgctgggattacaggtgtcagccaccatgcc +tggctcaaatactacactgtcttgattactacagctttatactcaaatactacaccatct +tgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatactc +aaataccacaccgtcttgattaccacagctttatactcaaataccacaccgtcttgatta +ccaaagctttata-------------------------------ctcaaataccacactg +tcttgattaccacagctttatactcaaataacacaccgtcttgattaccacagctttata +ctcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtcttga +ttaccacagctttata-------------------------------ctcaaataccaca +ctgtcttgattcctatagctttatactcaaataccacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaatactacactgtct +tgattactatagctttatactcaaatactacactgtctgattactacagctttatactca +aataccaccctgtcttgattaccacagctttatactcaaataccacaccgtcttgattac +cacagctgtatactcaaatact------------------------------------ac +accgtcttgattaccacagctgtatactcaaatactacactgtcttgattaccacagctt +tatactcaaataccacactgtcttgattaccaccgctttatactcaaataccacactgtc +ttgattactatagctttatactcaaatactacactgtctgattactacagctttatactc +aaataccacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaatactacactgtctgattactacagctttatactcaaatacc +acactatcttgattactacagctttatactcaaatactacactgtctgattactacagct +ttatactcaaataccacaccgtcttgattaccacagctttatactcaaatactacactgt +cttgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatac +tcaaatactacaccgtcttgattaccacagctttatactcaaatactacaccgtcttgat +taccacagctttatactcaaataccacactgtcttgattactatagctttatactcaaat +actacactgtctgattactacagctttatactcaaataccacactgtcttgattaccaca +gctttatactcaaatactacactgtctgattactacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaatactacactgtcttgattaccacagctttat +actcaaataccacaccgtcttgattaccacagctgtatactcaaataccacactgtcttg +attaccacagctgtatactcaaataccacactgtcttgattaccacagctttatactcaa +atacc------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acactgtcttgattaccacagctttatac +gcaaataccacactgtcttgattactatagctttatactcaaataccacactgtcttgat +tactacagctttatactcaaataccacactgtcttgattactacagctttatactcaaat +actacactgtcttgattaacacagctttatactcaaatactacactgtcttgattaccac +agctttatactcaaatactacactgtcttgattactatagctttatactcaaataccaca +ctgtcttgattactatagctttatactcaaatactacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaataccacactgtct +tgattaccac-------------------------------------------------- +------------------------------------------------------------ +----agctttatactcaaataccacactgtcttgattaccacagctttatactcaaatac +cacactgtcttgattactac---------------------------------------- +------------------------------------agctttatactcaaatacc----- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------acactgtcttgattact +atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga +ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt +ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct +tgc +>p:HG002_2_chr20:64125009-64128075 +attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt +ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc +tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt +ttttttttgttttgtttttttagtagcagtgctgggattacaggtgtcagccaccatgcc +tggctcaaatactacactgtcttgattactacagctttatactcaaatactacaccatct +tgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatactc +aaataccacaccgtcttgattaccacagctttatactcaaataccacaccgtcttgatta +ccaaagctttata-------------------------------ctcaaataccacactg +tcttgattaccacagctttatactcaaataacacaccgtcttgattaccacagctttata +ctcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtcttga +ttaccacagctttata-------------------------------ctcaaataccaca +ctgtcttgattcctatagctttatactcaaataccacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaatactacactgtct +tgattactatagctttatactcaaatactacactgtctgattactacagctttatactca +aataccaccctgtcttgattaccacagctttatactcaaataccacaccgtcttgattac +cacagctgtatactcaaatact------------------------------------ac +accgtcttgattaccacagctgtatactcaaatactacactgtcttgattaccacagctt +tatactcaaataccacactgtcttgattaccaccgctttatactcaaataccacactgtc +ttgattactatagctttatactcaaatactacactgtctgattactacagctttatactc +aaataccacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaatactacactgtctgattactacagctttatactcaaatacc +acactatcttgattactacagctttatactcaaatactacactgtctgattactacagct +ttatactcaaataccacaccgtcttgattaccacagctttatactcaaatactacactgt +cttgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatac +tcaaatactacaccgtcttgattaccacagctttatactcaaatactacaccgtcttgat +taccacagctttatactcaaataccacactgtcttgattactatagctttatactcaaat +actacactgtctgattactacagctttatactcaaataccacactgtcttgattaccaca +gctttatactcaaatactacactgtctgattactacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaatactacactgtcttgattaccacagctttat +actcaaataccacaccgtcttgattaccacagctgtatactcaaataccacactgtcttg +attaccacagctgtatactcaaataccacactgtcttgattaccacagctttatactcaa +atacc------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acactgtcttgattaccacagctttatac +gcaaataccacactgtcttgattactatagctttatactcaaataccacactgtcttgat +tactacagctttatactcaaataccacactgtcttgattactacagctttatactcaaat +actacactgtcttgattaacacagctttatactcaaatactacactgtcttgattaccac +agctttatactcaaatactacactgtcttgattactatagctttatactcaaataccaca +ctgtcttgattactatagctttatactcaaatactacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaataccacactgtct +tgattaccac-------------------------------------------------- +------------------------------------------------------------ +----agctttatactcaaataccacactgtcttgattaccacagctttatactcaaatac +tacactgtcttgattactac---------------------------------------- +------------------------------------agctttatactcaaatacc----- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------acactgtcttgattact +atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga +ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt +ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct +tgc +>ref_chr20:64125009-64128075 +attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt +ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc +tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt +ttttttttgttttgtttttttagtagcagtgctgggattacaggtgtcagccaccatgcc +tggctcaaatactacactgtcttgattactacagctttatactcaaatactacaccatct +tgattactacagctt--------------------------------------tatactc +aaataccacaccgtcttgattaccacagctgtatactcaaataccacaccgtcttgatta +ccaaagctttatactcaaataccacactgtcttgattaccacagct-------------- +-----------------ttatactcaaataacacaccgtcttgattaccacagctttata +ctcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtcttga +ttaccacagctttatactcaaatactacactgtcttgattcctatagc------------ +-------------------tttatactcaaataccacactgtcttgattaccacagcttt +atactcaaatactacactgtcttgattactatagctttatactcaaatactacactgtct +tgattactatagctttatactcaaatactacactgtctgattactacagctttatactca +aataccaccctgtcttgattaccacagctttatactcaaataccacaccgtcttgattac +cacagctgtatactcaaatactactgtcctgattactacagctttatactcaaataccac +accgtcttgattaccacagctgtatactcaaatactacaccgtcttgattaccacagctt +tatactcaaatactacaccgtcttgattaccacagctttatactcaaataccacactgtc +ttgattactatagctttatactcaaatactacactgtctgattactacagctttatactc +aaataccacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaatactacactgtctgattactacagctttatactcaaatacc +acactatcttgattactacagctttatactcaaatactacactgtctgattactacagct +ttatactcaaataccacaccgtcttgattaccacagctttatactcaaatactacactgt +cttgattactacagctttatactcaaataccacaccgtcttgattaccacagctgtatac +tcaaatactacaccgtcttgattaccacagctttatactcaaatactacaccgtcttgat +taccacagctttatactcaaataacacactgtcttgattactatagctttatactcaaat +actacactgtctgattactacagctttatactcaaataccacactgtcttgattaccaca +gctttatactcaaatactacactgtctgattactacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaatactacactgtcttgattaccacagctttat +actcaaataccacaccgtcttgattaccacagctgtatactcaaataccacactgtcttg +attaccacagctgtatactcaaataccacactgtcttgattaccacagctttatactcaa +ataccacactgtcttgattactatagctttatactcaaataccacactgtcttgattacc +acagctttatacgcaaataccacactgtcttgattactatagctttatactcaaatacca +cactgtcttgattactacagctttatactcaaataccacactgtcttgattactacagct +ttatactcaaatactacactgtcttgattaacacagctttatactcaaatactacactgt +cttgattaccacagctttatactcaaatactacactgtcttgattactatagctttatac +tcaaataccacactgtcttgattactatagctttatactcaaatactacactgtcttgat +taccacagctttatactcaaatactacactgtcttgattact------------------ +--------------------atagctttatactcaaataccacactgtcttgattaccac +--------------------------------------agctttatactcaaataccaca +ctgtcttgattaccacagctttatactcaaataccacactgtcttgattaccacagcttt +atactcaaataccacactgtcttgattactatagctttatactcaaataccacactgtct +tgattactacagctttatactcaaataccacactgtcttgattaacacagctttatactc +aaatactacactgtcttgattaccacagctttatactcaaatactacactgtcttgatta +ctatagctttatactcaaataccacactgtcttgattactatagctttatactcaaatac +tacactgtcttgattaccacagctttatactcaaatactacactgtcttgattactatag +ctttgtactcaaataccacactgtcttgattaccacagctttatactcaaataccacact +gtcttgattaccacagctttatactcaaataccacactgtcttgattactatagctttat +actcaaataccacactgtcttgattactacagctttatactcaaataccacactgtcttg +attactacagctttatactcaaatactacactgtcttgattaacacagctttatactcaa +atactacactgtcttgattaccacagctttatactcaaatactacactgtcttgattact +atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga +ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt +ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct +tgc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa new file mode 100644 index 00000000..b9fb50de --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa @@ -0,0 +1,80 @@ +>syndip_1_chr20:63641679-63642236 +tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc +tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct +cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca +gtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgag +ctccgccctggccccgccccggcccctgcccgctccgagctccgccctggccccgccccc +cgcccagtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgc +tccgagctccgccccgg------------------------------------------- +------------------------------------------------------------ +------------------------------ccccgccccggcccctgcccgctccgagct +ccgccctggccccgcccccgcccagtgccccgccccctgactgctgctagccctgccccc +gccccggcccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctcc +gagctccgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc +ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc +gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg +>syndip_2_chr20:63641679-63642236 +tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc +tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct +cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca +gtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgag +ctccgccctggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccc +cgcccagtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgc +tccgagctccaccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgcc +ctgccccctccccggcccctgcccgctccgagctccgccccggccccgccccggcccagt +gccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagct +ccgccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccc +tccccggcccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctcc +gagcttcgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc +ccggcccctgcccgctccgaactccgccccggccccgcccccgcaccttctcgcgcagcc +gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg +>p:HG002_1_chr20:63641679-63642236 +tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc +tgctggggacgcccagtaaacacgggaggagcccccgatccccaccccagctcagcgcct +cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca +gtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgag +ctccgccctggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccc +cgcccagtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgc +tccgagctccaccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgcc +ctgccccctccccggcccctgcccgctccgagctccgccccggccccgccccggcccagt +gccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagct +ccgccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccc +tccccggcccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctcc +gagcttcgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc +ccggcccctgcccgctccgaactccgccccggccccgcccccgcaccttctcgcgcagcc +gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg +>p:HG002_2_chr20:63641679-63642236 +tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc +tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct +cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca +gtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgag +ctccgccctggccccgccccggcccctgcccgctccgagctccgccctggccccgccccc +cgcccagtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgc +tccgagctccgccccgg------------------------------------------- +------------------------------------------------------------ +------------------------------ccccgccccggcccctgcccgctccgagct +ccgccctggccccgcccccgcccagtgccccgccccctgactgctgctagccctgccccc +gccccggcccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctcc +gagctccgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc +ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc +gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg +>ref_chr20:63641679-63642236 +tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc +tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct +cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca +gtgccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgag +ctccgccccggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccc +cgcccagtgccccgccccctgactgctgctagccctgc---------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------ccccgccccggcccctgcccgctccgagcttcgccccggccccgcc +ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc +gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa new file mode 100644 index 00000000..ed52f700 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa @@ -0,0 +1,190 @@ +>syndip_1_chr20:55624382-55625908 +atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg +cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc +cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat +actatattatataatatactataatatatataatatagtatatattgtatataatatagt +atatataatataatagtatataatatagtatagatagtatagaatatatagtatatacct +agtatactatattatatagtataactattataatatttatagtatattatactatataat +acagtatactgtattatatactataatctactagattataatatactatataatcagtat +actatattatatagtatagtataatatagtattatattatagtaatatattataaagtat +tatagtaatatagtataatatactatacattatatagtatagtatattatatagtatact +atatataagtatatataatagtatattgtatatagtaattatatatagtatatataatat +acaatgtatagtatattatagtatactatacgatactgtagtatagtatgctatatagta +tactatatacgacagtatcatatagtatactacaaaatatatagtatagcatagtatact +atataatatactatattatatatactatattatatatactacactatattatatatacta +tattatatatactatactatacataaaatactatatatagtatattatatatagtatact +atatatagtatatatagtatactatatatactatatatagtatactatatagtatactat +atatagtatatatagtatactatatatagtatactatatatagtatatatatagtatact +atatatagtatactatatatagtatatatagtatactatatatagtatactatatatagt +atatatagtatactatatatagtatactatatatagtatatatagtatactatatatagt +atactatatatagtatatatagtatactatatatagtatatatagtatactatatatagt +atatatagtatactatatatactatatatactatatatagtatactatatatagtatata +tagtatactatatatcatatatagtattatatacagtatactatatataatatagtatac +tatactataatatactatttgttataattatctagtatagtatactatatatactatatt +atatagtatatagtatatatagtatatttatagtatatatactatatactatatatacta +tatactataatatatactatatatactaaataaatataatatacttatttaaatatattt +aaataaatata--aaatatttaaatataattacttaaatatatatttaaaatatttaata +aacatatatttaaatatttatttaatatttaaatattaaatatatataattaatataatt +atatatatattatatactatatattatatatact-----atatactatatatagtatact +aatataatatactatatatactatatataatatactatatatagtatatatagtatagta +tactataatatactatat-------------------------------------tatat +aatatagtatactatatattgtatactatactatgctatagtatgctatatactacacta +tagtatactgtatactatac-----tatacagtatactgtatactatactatagta---- +---------tacagtatactatatactattgtatagtatatagtatactgtatactatag +tatagtatactatatactatactatactatagtatactctatagtatataaagacatata +tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat +acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa +tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg +aaggtcaccttcatcc +>syndip_2_chr20:55624382-55625908 +atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg +cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc +cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat +actatattatataatatactataatatatataatatagtatatattgtatataatatagt +atatataatataatagtatataatatagtatagatagtatagaatatatagtatatacct +agtatactatattatatagtataactattataatatttatagtatattatactatataat +acagtatactgtattatatactataatctactagattataatatactatataatcagtat +actatattatatagtatagtataatatagtattatattatagtaatatattataaagtat +tatagtaatatagtataatatactatac-----------------attatatagtatact +atatataagtatatataatagtatattgtatatagtaattatatatagtatatataatat +acaatgtatagtatattatagtatactatacgatactgtagtatagtatgctatatagta +tactatatacgacagtatcatatagtatactacaaaatatatagtatagcatagtatact +atataatat---------------------------------actatattatatatacta +tattatatatactatactatacataaaatactatatatagtatat--------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------tatatatagtatactatatatagtatata +tagtatactatatatcatatatagtattatatacagtatactatatataatatagtatac +tatactataatatactatttgttataattatctagtatagtatactatatatactatat- +------tatatagtatatatagtatatttatagtatatatactatatactatatatacta +tataatataatatataatatatatactaaata-----aatatacttatttaaatatattt +aaataaatatattaaatatttaaatataattacttaaatatatatttaaaatatttaata +aacatatatttaaatatttatttaatatttaaatattaaatatatataattaatataatt +atatatatattatatactatatattatatatactatataatatactatatatagtatact +aatataatatactatatatactatatataatatactatatatagtatatatagtatagta +tactataatatactatatatagtatatatagtatagtatactataatatactatatatag +tatatagtatactatatattgtatactatactatgctatagtatgctatatactacacta +tagtatactgtatactatactatagtatacagtatactatatactatactatagtatagt +atactatagtatagtatactatatactatactatagtatatagtatactgtatac----- +tatagtatactatatactatactatactatagtatactctatagtatataaagacatata +tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat +acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa +tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg +aaggtcaccttcatcc +>p:HG002_1_chr20:55624382-55625908 +atcccagctacttgggaggctgaggcaggataatcacttcaatcagtgaggtggaggctg +cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc +cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat +actatattatataatatactataatatatataatatagtatatattgtatataatatagt +atatataatataatagtatataatatagtatagatagtatagaatatatagtatatacct +agtatactatattatatagtataactattataatatttatagtatattatactatataat +acagtatactgtattatatactataatctactagattataatatactatataatcagtat +actatattatatagtatagtataatatagtattatattatagtaatatattataaagtat +tatagtaatatagtataatatactatac-----------------attatatagtatact +atatataagtatatataatagtatattgtatatagtaattatatatagtatatataatat +acaatgtatagtatattatagtatactatacgatactgtagtatagtatgctatatagta +tactatatacgacagtatcatatagtatactacaaaatatatagtatagcatagtatact +atataatat---------------------------------actatattatatatacta +tattatatatactatactatacataaaatactatatatagtatat--------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------tatatatagtatactatatatagtatata +tagtatactatatatcatatatagtattatatacagtatactatatataatatagtatac +tatactataatatactatttgttataattatctagtatagtatactatatatactatat- +------tatatagtatatatagtatatttatagtatatatactatatactatatatacta +tataatataatatataatatatatactaaata-----aatatacttatttaaatatattt +aaataaatatattaaatatttaaatataattacttaaatatatatttaaaatatttaata +aacatatatttaaatatttatttaatatttaaatattaaatatatataattaatataatt +atatatatattatatactatatattatatatactatataatatactatatatagtatact +aatataatatactatatatactatatataatatactatatatagtatatatagtatagta +tactataatatactatatatagtatatatagtatagtatactataatatactatatatag +tatatagtatactatatattgtatactatactatgctatagtatgctatatactacacta +tagtatactgtatactatactatagtatacagtatactatatactatactatagtatagt +atactatagtatagtatactatatactatactatagtatatagtatactgtatac----- +tatagtatactatatactatactatactatagtatactctatagtatataaagacatata +tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat +acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa +tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg +aaggtcaccttcatcc +>p:HG002_2_chr20:55624382-55625908 +atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg +cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc +cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat +actatattatataatatactataatatatataatatagtatatattgtatataatatagt +atatataatataatagtatataatatagtatagatagtatagaatatatagtatatacct +agtatactatattatatagtataactattataatatttatagtatattatactatataat +acagtatactgtattatatactataatctactagattataatatactatataatcagtat +actatattatatagtatagtataatatagtattatattatagtaatatattataaagtat +tatagtaatatagtataatatactatacattatatagtatagtatattatatagtatact +atatataagtatatataatagtatattgtatatagtaattatatatagtatatataatat +acaatgtatagtatattatagtatactatacgatactgtagtatagtatgctatatagta +tactatatacgacagtatcatatagtatactacaaaatatatagtatagcatagtatact +atataatatactatattatatatactatattatatatactacactatattatatatacta +tattatatatactatactatacataaaatactatatatagtatattatatatagtatact +atatatagtatatatagtatactatatatactatatatagtatactatatagtatactat +atatagtatatatagtatactatatatagtatactatatatagtatatatatagtatact +atatatagtatactatatatagtatatatagtatactatatatagtatactatatatagt +atatatagtatactatatatagtatactatatatagtatatatagtatactatatatagt +atactatatatagtatatatagtatactatatatagtatatatagtatactatatatagt +atatatagtatactatatatactatatatactatatatagtatactatatatagtatata +tagtatactatatatcatatatagtattatatacagtatactatatataatatagtatac +tatactataatatactatttgttataattatctagtatagtatactatatatactatatt +atatagtatatagtatatatagtatatttatagtatatatactatatactatatatacta +tatactataatatatactatatatactaaataaatataatatacttatttaaatatattt +aaataaatata--aaatatttaaatataattacttaaatatatatttaaaatatttaata +aacatatatttaaatatttatttaatatttaaatattaaatatatataattaatataatt +atatatatattatatactatatattatatatact-----atatactatatatagtatact +aatataatatactatatatactatatataatatactatatatagtatatatagtatagta +tactataatatactatat-------------------------------------tatat +aatatagtatactatatattgtatactatactatgctatagtatgctatatactacacta +tagtatactgtatactatac-----tatacagtatactgtatactatactatagta---- +---------tacagtatactatatactattgtatagtatatagtatactgtatactatag +tatagtatactatatactatactatactatagtatactctatagtatataaagacatata +tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat +acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa +tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg +aaggtcaccttcatcc +>ref_chr20:55624382-55625908 +atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg +cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc +cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat +actatattatataatatactataatatatataatatagtatatattgtatataatatagt +atatataatataatagtatataatatagtatagatagtatagaatatatagtatatacct +agtatactatattatatagtataactattataatatttatagtatattatactatataat +acagtatactgtattatatactataatctactagattataatatactatataatcagtat +actatatt-----gtatagtataatatagtattatattatagtaatatattataaagtat +tatagtaatatagtataatatactatacattatatagtatagtatattatatagtatact +atatataagtatatttaatagtatattgtatatagtaattatatatagtatatataatat +acaatgtatagtatattatagtatactatacgatactgtagtatagtatgctatatagta +tactatatacgacagtatcatatagtatactacaaaatatatagtatagcatagtatact +atataatatactatattatatatactatattatatatactatactatattatatatacta +tattatatatactatactatacataaaatactatatatagtatattatatatagtatact +atatatagtatataatatata--------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------gtatactatatatactatatatagtatactatatatactatata +tagtatactatatatcatatatagtattatatacagtatactatatataatatagtatac +tatactataatatactatttgttataattatctagtatagtatactatatatactatatt +atatagtatatagtatatatagta----tatagtatatatactatatactatatatacta +tatactatactatataatatatatacta-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------atatattgtatactatactatgctatagtatgctatatactacacta +tagtatactgtgtactatactatagtatacagtatactatatactatactatagtatagt +atactatagtatagtatactatatactatagtatagtatatagtatactgtatactatag +tatagtatactatatactatactatactatagtatactctatagtatataaagacatata +tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat +acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa +tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg +aaggtcaccttcatcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa new file mode 100644 index 00000000..b7aa8658 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa @@ -0,0 +1,50 @@ +>syndip_1_chr20:21120112-21120640 +acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga +gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca +gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta +tgtatatatatatata-------------------------------------------- +------------------------------------------------------------ +--------------------tatatttatatatatatatatatt----tatatatatata +tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca +tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc +atccacattttggagagcagtctactttcctcaaagcctactgcttcaa +>syndip_2_chr20:21120112-21120640 +acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga +gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca +gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta +tgtatatatatatata-------------------------------------------- +------------------------------------------------------------ +--------------------tatatttatatatatatatatatttatatatatatatata +tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca +tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc +atccacattttggagagcagtctactttcctcaaagcctactgcttcaa +>p:HG002_1_chr20:21120112-21120640 +acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga +gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca +gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta +tgtatatatatatata-------------------------------------------- +------------------------------------------------------------ +--------------------tatatttatatatatatatatatttatatatatatatata +tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca +tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc +atccacattttggagagcagtctactttcctcaaagcctactgcttcaa +>p:HG002_2_chr20:21120112-21120640 +acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga +gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca +gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta +tgtatatatatatata-------------------------------------------- +------------------------------------------------------------ +--------------------tatatttatatatatatatatatt----tatatatatata +tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca +tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc +atccacattttggagagcagtctactttcctcaaagcctactgcttcaa +>ref_chr20:21120112-21120640 +acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga +gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca +gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta +tgtatatatatatatatatttatatatatatatatttatatatatatatttatatatata +tatttatatatatatatatttatatatatatatttatatatatatatttatatatatata +tatttatatatatatatatttatatatatatatttatatatatatatatttatatatata +tatttatatatatatatatttatatggagagagagagatttatttttaggaagtggttca +tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc +atccacattttggagagcagtctactttcctcaaagcctactgcttcaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa new file mode 100644 index 00000000..d154fe9a --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa @@ -0,0 +1,180 @@ +>syndip_1_chr20:38123490-38124806 +ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg +gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg +tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa +taatatataataataatatataatatgtataataataatatataatatttataataataa +tatatataatatgtatatataatatacatatacctattatatataatatacctattatat +ataatatacatatacctattatatataatatacctattatatataagatacctattatat +ataatatacatatacctattatatatataatatacctattatatatatataatataccta +ttatatataatatacctattatatatataatatacctattatatatataatatacctatt +atatatataatatacctattatatataatatacctattatatataaaatatacctattat +atataatatacctattatatacattatatataatatacctattatatgcattatatataa +tatacctattatatgcattatatataatatacctattatatgcattatatataatatacc +tattatatgcattatatataatatacctattatatgcattatataatatattatatataa +tatgcatattatatgtattatatattatatattatatatataatatacatattatatgta +ttatatattatatattatatatataatatacatattatatgtattatatattatatataa +tatacatattatatattatatattatatacatattatatattatatattatatacatatt +atatattatatacatattatatattatatattatatacatattatacattatatatatct +aaaatatataatacacattatatattatataatacacattatatataatatataatacac +attatatattatatataatacacattatatattatatataatacacattatatat----- +------------------tatataatacacattatatattatatataatacacattatat +attatatataatacacattatatattatatataatacacattatatattatataatacac +attatatattatatataatacacattatatatataatacacattatatattatatataat +acataatacacattatatattatatataatacacattatatataatatataacacacatt +atatataatatataacacacattatatattatatataatacacattatatattatatata +atacacatatattatatataatacacattatatattatatataatacacatatattatat +ataatacacattatatattatatataatacacattatatataatatataatacacattat +atataatacttattatatattatacataatatacatattatatataatacataatatacg +tatattatataatatacatattatatataatacataatatacgtatattatataatatac +atattatatatattatactatatataatactcatatatatacatattatatatacatatt +atatataatactcatattatatgtacatattatacatacatattatatataatacacata +tatacatattatatataatacacacacacattatatatgtcttatatataatatacatat +tatatatgtattatattattatagatataatatacatattatatctatatataaaatata +tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca +atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc +cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg +tagagatggggtttcgccatgttgccgaggctggtgtcaa +>syndip_2_chr20:38123490-38124806 +ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg +gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg +tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa +taatatataataataatatataatatgtataataataatatataatatttataataataa +tatatataatatgtatatataatatacatatacctattatatataatatacctattatat +ataatatacatatacctattatatataatatacctattatatataagatacctattatat +ataatatacatatacctattatatatataatatacctattatatatatataatataccta +ttatatataatatacctattatatatataatatacctattatatatataatatacctatt +atatatataatatacctattatatataatatacctattatatataaaatatacctattat +atataatatacctattatata---------------------------cattatatataa +tatacctattatatgcattatatataatatacctattatatgcattatatataatatacc +tattatatgcattatatataatatacctattatatgcattatataatatattatatataa +tatgcatattatatgtattatatattatatattatatatataatatacatattatatgta +ttatatattatatattatatatataatatacatattatatgtattatatattatatataa +tatacatattatatattatatattatatacatattatatattatatattatatacatatt +atatattatatacatattatatattatatattatatacatattatacattatatatatct +aaaatatataatacacattatatattatataatacacattatatataatatataatacac +attatatattatatataatacacattatatattatatataatacacattatatattatat +aatacacattatatattatatataatacacattatatattatatataatacacattatat +attatatataatacacattatatattatatataatacacattatatattatataatacac +attatatattatatataatacacattatatatataatacacattatatattatatataat +acataatacacattatatattatatataatacacattatatataatatataacacacatt +atatataatatataacacacattatatattatatataatacacattatatattatatata +atacacatatattatatataatacacattatatattatatataatacacatatattatat +ataatacacattatatattatatataatacacattatatataatatataatacacattat +atataatacttattatatattatacataatatacatattatatataatacataatatacg +tatattatataatatacatattatatataatacataatatacgtatattatataatatac +atattatatatattatactatatataatactcatatatatacatattatatatacatatt +atatataatactcatattatatgtacatattatacatacatattatatataatacacata +tatacatattatatataatacacacacacattatatatgtcttatatataatatacatat +tatatatgtattatattattatagatataatatacatattatatctatatataaaatata +tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca +atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc +cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg +tagagatggggtttcgccatgttgccgaggctggtgtcaa +>p:HG002_1_chr20:38123490-38124806 +ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg +gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg +tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa +taatatataataataatatataatatgtataataataatatataatatttataataataa +tatatataatatgtatatataatatacatatacctattatatataatatacctattatat +ataatatacatatacctattatatataatatacctattatatataagatacctattatat +ataatatacatatacctattatatatataatatacctattatatatatataatataccta +ttatatataatatacctattatatatataatatacctattatatatataatatacctatt +atatatataatatacctattatatataatatacctattatatataaaatatacctattat +atataatatacctattatata---------------------------cattatatataa +tatacctattatatgcattatatataatatacctattatatgcattatatataatatacc +tattatatgcattatatataatatacctattatatgcattatataatatattatatataa +tatgcatattatatgtattatatattatatattatatatataatatacatattatatgta +ttatatattatatattatatatataatatacatattatatgtattatatattatatataa +tatacatattatatattatatattatatacatattatatattatatattatatacatatt +atatattatatacatattatatattatatattatatacatattatacattatatatatct +aaaatatataatacacattatatattatataatacacattatatataatatataatacac +attatatattatatataatacacattatatattatatataatacacattatatattatat +aatacacattatatattatatataatacacattatatattatatataatacacattatat +attatatataatacacattatatattatatataatacacattatatattatataatacac +attatatattatatataatacacattatatatataatacacattatatattatatataat +acataatacacattatatattatatataatacacattatatataatatataacacacatt +atatataatatataacacacattatatattatatataatacacattatatattatatata +atacacatatattatatataatacacattatatattatatataatacacatatattatat +ataatacacattatatattatatataatacacattatatataatatataatacacattat +atataatacttattatatattatacataatatacatattatatataatacataatatacg +tatattatataatatacatattatatataatacataatatacgtatattatataatatac +atattatatatattatactatatataatactcatatatatacatattatatatacatatt +atatataatactcatattatatgtacatattatacatacatattatatataatacacata +tatacatattatatataatacacacacacattatatatgtcttatatataatatacatat +tatatatgtattatattattatagatataatatacatattatatctatatataaaatata +tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca +atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc +cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg +tagagatggggtttcgccatgttgccgaggctggtgtcaa +>p:HG002_2_chr20:38123490-38124806 +ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg +gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg +tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa +taatatataataataatatataatatgtataataataatatataatatttataataataa +tatatataatatgtatatataatatacatatacctattatatataatatacctattatat +ataatatacatatacctattatatataatatacctattatatataagatacctattatat +ataatatacatatacctattatatatataatatacctattatatatatataatataccta +ttatatataatatacctattatatatataatatacctattatatatataatatacctatt +atatatataatatacctattatatataatatacctattatatataaaatatacctattat +atataatatacctattatatacattatatataatatacctattatatgcattatatataa +tatacctattatatgcattatatataatatacctattatatgcattatatataatatacc +tattatatgcattatatataatatacctattatatgcattatataatatattatatataa +tatgcatattatatgtattatatattatatattatatatataatatacatattatatgta +ttatatattatatattatatatataatatacatattatatgtattatatattatatataa +tatacatattatatattatatattatatacatattatatattatatattatatacatatt +atatattatatacatattatatattatatattatatacatattatacattatatatatct +aaaatatataatacacattatatattatataatacacattatatataatatataatacac +attatatattatatataatacacattatatattatatataatacacattatatat----- +------------------tatataatacacattatatattatatataatacacattatat +attatatataatacacattatatattatatataatacacattatatattatataatacac +attatatattatatataatacacattatatatataatacacattatatattatatataat +acataatacacattatatattatatataatacacattatatataatatataacacacatt +atatataatatataacacacattatatattatatataatacacattatatattatatata +atacacatatattatatataatacacattatatattatatataatacacatatattatat +ataatacacattatatattatatataatacacattatatataatatataatacacattat +atataatacttattatatattatacataatatacatattatatataatacataatatacg +tatattatataatatacatattatatataatacataatatacgtatattatataatatac +atattatatatattatactatatataatactcatatatatacatattatatatacatatt +atatataatactcatattatatgtacatattatacatacatattatatataatacacata +tatacatattatatataatacacacacacattatatatgtcttatatataatatacatat +tatatatgtattatattattatagatataatatacatattatatctatatataaaatata +tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca +atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc +cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg +tagagatggggtttcgccatgttgccgaggctggtgtcaa +>ref_chr20:38123490-38124806 +ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg +gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg +tcctgtaacctctctgggcctcagaaacctctctctctctctatatatatatatatataa +taatatataataataatatataatatgtataataataatatataatatttataataataa +tatatataatatgtatatataatatacatatacctattatatataatatacctattatat +ataatatacataaacctattatatataatatacctattatatataagatacctattatat +ataatatacctat------------------------------tatatataagataccta +ttatatataatatac--------------atatacctattatatatataatatacctatt +ata----------------tatatataatatacctattatatatataatatacctatta- +---------------------------------------------------tatatataa +tatacctattatatatataata-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------tacctattatatataatacacattatatataatatataacacacatt +atatataatatataacacacattatatattatatataatacacattatatattatatata +atacacatatattatatataatacacattatatattatatataatacacatatattatat +ataatacacattatatattatatataatacacattatatataatatataatacacattat +atataatacttattatatattatacataatatacatattatatataatacataatatacg +tatattatataatatacatattatatataatacataatatacgtatattatataatatac +atattatatatattatactatatataatactcatatatatacatattatatatacatatt +atatataatactcatattatatgtacatattatacatacatattatatataatacacata +tatacatattatatataatacacacacacattatatatgtcttatatataatatacatat +tatatatgtattatattattatagatataatatacatattatatctatatataaaatata +tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca +atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc +cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg +tagagatggggtttcgccatgttgccgaggctggtgtcaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa new file mode 100644 index 00000000..56d3b9e2 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa @@ -0,0 +1,115 @@ +>syndip_1_chr20:63048933-63049513 +gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca +ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat +gtgggggcagactcctcctcctcctcctcttcctcctcc--------------------- +------tcctcctcttcctcttcctcctcctcctcctcttcctcttcctcctcctcctcc +tcttcctcctcctcctcttcctcctccccctcctcttcctcctcctcctcttccttctcc +tcctccttctcctcctcctcttcctcctcttcctcttcctcctcctcttcctcctcctct +tcctcctcctctttctcctcctcttcctcttcctcctcttcctcctcctcttcctcctcc +tcctcttcctcctcctcctcttcctcctcctcctcttcctccttctcttcctcttcctcc +tcctcttcctcttcctcctcctcttcctctttctcctcctcctcctcttcctcttcctcc +tcc---------------------------------------tcttcctctttctcctcc +tcctcctcttcctcctcttcctcctcctcttcctcctcctcctcctcttcctcctcctcc +tcgtcttcctccttctcctcttcctcctcttcctcttcctcctcctcttcctcttcctct +tcctcttcctcttcctcctcctcctcctctttctcctcctcctcctcttcctcctcctcc +tcttcctcctcctcctcttcctcctcctcctcttcctcctcttcctcctcctcctcttcc +tcctcttcttcctcctcttcctcctcctcctcttcctcctcttcctcctcctcttcctcc +tcctcctcttcctcctcctcctcttcctcctcctcctcttcctcct-cctcctcttcctc +ctcctcctcttcctcctcctcttcctcctcctcctcctcttcctcccctcctcctcctct +tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct +cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag +gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac +tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca +ggacccagaagcctggtagaaagtgaccatgtgtggcaggt +>syndip_2_chr20:63048933-63049513 +gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca +ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat +gtgggggcagactcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctct +tcctcttcctcctcctcctcctcttcctcttcctcctcctcctcctcttcctcctcctcc +tcttcctcctcctcctcttcctcctccccctcctcttcctcctcctcctcttcctcctcc +tcctccttctcctcctcctcttcctcctcttcctcttcctcctcctcttcctcctcctct +ttctcctcctcttcctcttcctcctcttcctcctcctcttcctcctcctcctcttcctcc +tcctcctcttcctcctcctcctcttcctcctcctcttcctccttctcttcctcttcctcc +tcctcttcctcttcctcctcctcttcctctttctcctcctcctcctcttcctcctcttcc +tcctcctcttcctcctcctcctcctcttcctcctcctcctcgtcttcctccttctcctct +tcctcctcttcctcttcctcctcctcttcctcttcctcttcctcttcctcctcctcctcc +tcttcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctcttcctcctcc +tcctcttcctcctcctcctcttcctcctcttcctcctcctcctcttcctcctcttcttcc +tcctcttcctcctcctcctcttcctcctcttcctcctcctcttcctcctcctcctcttcc +tcctcctcttcctcctcttcttcctcctcttcctcctcctcctcttcctcctcttcctcc +tcctcttcctcctcctcctcctcttcctcc-cctcctcttcctcct-cctcctcttcctc +ctcctcctcttcctcctcctcttcctcctcctcctcctcttcctcccctcctcctcctct +tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct +cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag +gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac +tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca +ggacccagaagcctggtagaaagtgaccatgtgtggcaggt +>p:HG002_1_chr20:63048933-63049513 +gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca +ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat +gtgggggcagactcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctct +tcctcttcctcctcctcctcctcttcctcttcctcctcctcctcctcttcctcctcctcc +tcttcctcctcctcctcttcctcctccccctcctcttcctcctcctcctcttcctcctcc +tcctccttctcctcctcctcttcctcctcttcctcttcctcctcctcttcctcctcctct +ttctcctcctcttcctcttcctcctcttcctcctcctcttcctcctcctcctcttcctcc +tcctcctcttcctcctcctcctcttcctcctcctcttcctccttctcttcctcttcctcc +tcctcttcctcttcctcctcctcttcctctttctcctcctcctcctcttcctcctcttcc +tcctcctcttcctcctcctcctcctcttcctcctcctcctcgtcttcctccttctcctct +tcctcctcttcctcttcctcctcctcttcctcttcctcttcctcttcctcctcctcctcc +tcttcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctcttcctcctcc +tcctcttcctcctcctcctcttcctcctcttcctcctcctcctcttcctcctcttcttcc +tcctcttcctcctcctcctcttcctcctcttcctcctcctcttcctcctcctcctcttcc +tcctcctcttcctcctcttcttcctcctcttcctcctcctcctcttcctcctcttcctcc +tcctcttcctcctcctcctcctcttcctcc-cctcctcttcctcct-cctcctcttcctc +ctcctcctcttcctcctcctcttcctcctcctcctcctcttcctcccctcctcctcctct +tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct +cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag +gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac +tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca +ggacccagaagcctggtagaaagtgaccatgtgtggcaggt +>p:HG002_2_chr20:63048933-63049513 +gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca +ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat +gtgggggcagactcctcctcctcctcctcttcctcctcc--------------------- +------tcctcctcttcctcttcctcctcctcctcctcttcctcttcctcctcctcctcc +tcttcctcctcctcctcttcctcctccccctcctcttcctcctcctcctcttccttctcc +tcctccttctcctcctcctcttcctcctcttcctcttcctcctcctcttcctcctcctct +tcctcctcctctttctcctcctcttcctcttcctcctcttcctcctcctcttcctcctcc +tcctcttcctcctcctcctcttcctcctcctcctcttcctccttctcttcctcttcctcc +tcctcttcctcttcctcctcctcttcctctttctcctcctcctcctcttcctcttcctcc +tcc---------------------------------------tcttcctctttctcctcc +tcctcctcttcctcctcttcctcctcctcttcctcctcctcctcctcttcctcctcctcc +tcgtcttcctccttctcctcttcctcctcttcctcttcctcctcctcttcctcttcctct +tcctcttcctcttcctcctcctcctcctctttctcctcctcctcctcttcctcctcctcc +tcttcctcctcctcctcttcctcctcctcctcttcctcctcttcctcctcctcctcttcc +tcctcttcttcctcctcttcctcctcctcctcttcctcctcttcctcctcctcttcctcc +tcctcctcttcctcctcctcctcttcctcctcctcctcttcctcct-cctcctcttcctc +ctcctcctcttcctcctcctcttcctcctcctcctcctcttcctcccctcctcctcctct +tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct +cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag +gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac +tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca +ggacccagaagcctggtagaaagtgaccatgtgtggcaggt +>ref_chr20:63048933-63049513 +gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca +ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat +gtgggggcaga------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------ctcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctct +tcctcttcctcctcctcctcctcttcctcctcctcctcctcttcctccctcctcttcctc +ctcctcctcttcctcctcctcttcctcctcctcctcctcttcctcccctcctcctcctct +tcctcctcctcctcttcctctcctcctcctcctcctcttcctcctcctcctcctcttcct +cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag +gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac +tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca +ggacccagaagcctggtagaaagtgaccatgtgtggcaggt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa new file mode 100644 index 00000000..cb9e5d5b --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa @@ -0,0 +1,45 @@ +>syndip_1_chr20:60702842-60703216 +tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc +acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac +atttgtcattgtctttctttcttttctttctctctctctctttcttttccttctttcttt +ctttctttctttctttctttctttctttctttctttctttctttcttttctttccttctt +ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt +ctttctttctttctttcttctttctttctttttctttcttttttttttgggatggagtct +cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg +>syndip_2_chr20:60702842-60703216 +tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc +acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac +atttgtcattgtctttctttcttttctttctctctctctctctctttcttttccttcttt +ctttctttctttctttctttcttcctttctttctttctttctttctttctttctttcttt +ctttct---tttctttccttctttctttctttctttc----------------------- +---------------------------------------tttttttttgggatggagtct +cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg +>p:HG002_1_chr20:60702842-60703216 +tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc +acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac +atttgtcattgtctttctttcttttctttctctctctctctctctttcttttccttcttt +ctttctttctttctttctttcttcctttctttctttctttctttctttctttctttcttt +ctttct---tttctttccttctttctttctttctttc----------------------- +---------------------------------------tttttttttgggatggagtct +cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaacctccacct +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg +>p:HG002_2_chr20:60702842-60703216 +tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc +acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac +atttgtcattgtctttctttcttttctttctctctctctctttcttttccttctttcttt +ctttctttctttctttctttctttctttctttctttctttctttcttttctttccttctt +ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt +ctttctttctttctttcttctttctttctttttctttcttttttttttgggatggagtct +cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaacctccacct +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg +>ref_chr20:60702842-60703216 +tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc +acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac +atttgtcattgtctttctttcttttctttctctctctctctttct--------------- +-------------tttccttctttctttctttctttctttctttctttctttctttcttt +cttttc---tttccttctttctttctttctttctttc----------------------- +---------------------------------------tttttttttgggatggagtct +cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa new file mode 100644 index 00000000..446dd9db --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:13848064-13848750 +tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag +aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg +cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag +agagaaaagaagaagagaaggttagagaaaagaagagaaggagaaggagaaggaggagga +gaaggagaaggagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaa +gaagaagaagaagaagaagtcaaatatctggagaagaagaagaagaagaggaagaagaag +aagaggaagaagaagaagaagaggaagaggaagaaagaagaagaacaagaagaagaagaa +gtcaaatatctggagaagaagaagaagaagaggaggaagaagaagaagaagaggaggagg +aagaagaagaagaagaagaagaagaagaagcaggagcaggagaagaagaagaagaagaag +aagaagaggaagaggaagaggaagaggaagaagaagaagaagaagaagaagaagaagaag +aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga +caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac +tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca +gagaatc +>syndip_2_chr20:13848064-13848750 +tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag +aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg +cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag +agagaaaagaagaagagaaggttagagaaaagaagagaaggagaaggagaaggaggagga +gaaggagaaggagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaa +gaagaagaagaagaagaagtcaaatatctggagaagaagaagaagaagaggaagaagaag +aagaggaagaagaagaagaagaggaagaggaagaaagaagaagaacaagaagaagaagaa +gtcaaatatctggagaagaagaagaagaagaggaggaagaagaagaagaagaggaggagg +aagaagaagaagaagaagaagaagaagaagcaggagcaggagaagaagaagaagaagaag +aagaagaggaagaggaagaggaagag---------------gaagaagaagaagaagaag +aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga +caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac +tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca +gagaatc +>p:HG002_1_chr20:13848064-13848750 +tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag +aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg +cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag +agagaaaagaagaagagaaggttagagaaaagaagagaaggagaaggagaaggaggagga +gaaggagaaggagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaa +gaagaagaagaagaagaagtcaaatatctggagaagaagaagaagaagaggaagaagaag +aagaggaagaagaagaagaagaggaagaggaagaaagaagaagaacaagaagaagaagaa +gtcaaatatctggagaagaagaagaagaagaggaggaagaagaagaagaagaggaggagg +aagaagaagaagaagaagaagaagaagaagcaggagcaggagaagaagaagaagaagaag +aagaagaggaagaggaagaggaagag---------------gaagaagaagaagaagaag +aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga +caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac +tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca +gagaatc +>p:HG002_2_chr20:13848064-13848750 +tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag +aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg +cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag +agagaaaagaagaagagaaggttagagaaaagaagagaaggagaaggagaaggaggagga +gaaggagaaggagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaa +gaagaagaagaagaagaagtcaaatatctggagaagaagaagaagaagaggaagaagaag +aagaggaagaagaagaagaagaggaagaggaagaaagaagaagaacaagaagaagaagaa +gtcaaatatctggagaagaagaagaagaagaggaggaagaagaagaagaagaggaggagg +aagaagaagaagaagaagaagaagaagaagcaggagcaggagaagaagaagaagaagaag +aagaagaggaagaggaagaggaagaggaagaagaagaagaagaagaagaagaagaagaag +aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga +caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac +tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca +gagaatc +>ref_chr20:13848064-13848750 +tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag +aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg +cgtctcaaaaaaaaaaaagaagaagaaaagtagaagacgaagatgaagaagaataagaag +agagaaaagaagaagagaaggttagagaaaagaagagaa--------------------- +----gagaaggagaaggagaaggaggaggagaaggagaaggagaagaagaagaagaagaa +gaagaagaagaagaagaagtcaaatatctgg---agaagaagaagaagaggaagaagaag +aaaaggaagaagaagaagaagaggaagaggaagaaagaagaagaacaagaagaagaagaa +gtcaaatatctggagaagaagaagaagaagaggaagaagaagaagaagaggaggaggaag +aaggagaaggagaaggagaaggagaagga------------------------------- +-----------------------------------------gaaggagaaggagaagaag +aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaaaata +caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac +tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca +gagaatc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_2009907a3d9c420147c4ae73f6df2941.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2009907a3d9c420147c4ae73f6df2941.msa new file mode 100644 index 00000000..ae63f17e --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2009907a3d9c420147c4ae73f6df2941.msa @@ -0,0 +1,120 @@ +>syndip_1_chr20:35579913-35580987 +aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga +ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct +gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca +ggcagactgctcaaactcaggagttcgagaccagcctgggcaacatggcaagacaccatc +tgtaccaaaaatacaaaaaaatagccgggcatagtggtgcgcacctgtggtcccagctac +tcaggaggttgaggtgggaggatcacttgagcccagggggcggaggttacagtgagccaa +gatttgcactccagccttggtgacagagcaagagaccctgtctaaaatatatatgtgtat +atacacacacacacacacaaatatattttttggctgggcatggtggctgacacctgtaat +cccagcactttgggaggccaaggcaggcagatcacttgaggtcaggagtttgagaccagc +ctggccaagatgatgaaatcctgtctctactaaaaatacaaaaaaattagctgggtgtgg +taacgggtggctgtaatcccagctacttgggaagctgaggcagaagaatagcttgaacct +gggaggcacaggttgcagtgagccgagatcgcgccactgcagcctaggcagcagagtgag +actctgtcttaaaatatatattatggtttattttattttatggttaaaggccatatatat +atatatatatatatatataatatatatatatataaaatatatatatataaaatatatata +tatataaaatatatatatataaaatatatatatatatataaaatatatatatataaaata +tatatatataaaatatatatatatataaaatatatatatatataaaatatatatatataa +aatatatatatatataaaatatatatatataaaatatatatatataaaatatatatatat +ataaaatatatatatataaaatatatatatataaaatatatatatatataatatatatat +ataaaatatatatatataaaatatatatataaaatatatatatataaaatatatatatat +aaaatatatatatataaaatatatatatataaaatatatatataaaatatatatatataa +aatatatatatataaaatatatatatataaaatatatatatggaattccatatctatcaa +tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag +atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa +>syndip_2_chr20:35579913-35580987 +aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga +ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct +gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca +ggcagactgctcaaactcaggagttcgagaccagcctgggcaacatggcaagacaccatc +tgtaccaaaaatacaaaaaaatagccgggcatagtggtgcgcacctgtggtcccagctac +tcaggaggttgaggtgggaggatcacttgagcccagggggcggaggttacagtgagccaa +gatttgcactccagccttggtgacagagcaagagaccctgtctaaaatatatatgtgtat +atacacacacacacacacaaatatattttttggctgggcatggtggctgacacctgtaat +cccagcactttgggaggccaaggcaggcagatcacttgaggtcaggagtttgagaccagc +ctggccaagatgatgaaatcctgtctctactaaaaatacaaaaaaattagctgggtgtgg +taacgggtggctgtaatcccagctacttgggaagctgaggcagaagaatagcttgaacct +gggaggcacaggttgcagtgagccgagatcgcgccactgcagcctaggcagcagagtgag +actctgtcttaaaatatatattatggtttattttattttatggttaaaggccatatatat +at----------------------------------atatatatatataaa------ata +tatataatatatatatatata--------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------atatat +atat---------------aaaatatatataaaatatatatatataaaatatatatatat +aaaatatatatatataaaatat-------------------------------------- +----------------atatatatatataaaatatatatatggaattccatatctatcaa +tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag +atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa +>p:HG002_1_chr20:35579913-35580987 +aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga +ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct +gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca +ggcagactgctcaaactcaggagttcgagaccagcctgggcaacatggcaagacaccatc +tgtaccaaaaatacaaaaaaatagccgggcatagtggtgcgcacctgtggtcccagctac +tcaggaggttgaggtgggaggatcacttgagcccagggggcggaggttacagtgagccaa +gatttgcactccagccttggtgacagagcaagagaccctgtctaaaatatatatgtgtat +atacacacacacacacacaaatatattttttggctgggcatggtggctgacacctgtaat +cccagcactttgggaggccaaggcaggcagatcacttgaggtcaggagtttgagaccagc +ctggccaagatgatgaaatcctgtctctactaaaaatacaaaaaaattagctgggtgtgg +taacgggtggctgtaatcccagctacttgggaagctgaggcagaagaatagcttgaacct +gggaggcacaggttgcagtgagccgagatcgcgccactgcagcctaggcagcagagtgag +actctgtcttaaaatatatattatggtttattttattttatggttaaaggccatatatat +at----------------------------------atatatatatataaa------ata +tatataatatatatatatata--------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------atatat +atat---------------aaaatatatataaaatatatatatataaaatatatatatat +aaaatatatatatataaaatat-------------------------------------- +----------------atatatatatataaaatatatatatggaattccatatctatcaa +tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag +atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa +>p:HG002_2_chr20:35579913-35580987 +aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga +ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct +gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca +ggcagactgctcaaactcaggagttcgagaccagcctgggcaacatggcaagacaccatc +tgtaccaaaaatacaaaaaaatagccgggcatagtggtgcgcacctgtggtcccagctac +tcaggaggttgaggtgggaggatcacttgagcccagggggcggaggttacagtgagccaa +gatttgcactccagccttggtgacagagcaagagaccctgtctaaaatatatatgtgtat +atacacacacacacacacaaatatattttttggctgggcatggtggctgacacctgtaat +cccagcactttgggaggccaaggcaggcagatcacttgaggtcaggagtttgagaccagc +ctggccaagatgatgaaatcctgtctctactaaaaatacaaaaaaattagctgggtgtgg +taacgggtggctgtaatcccagctacttgggaagctgaggcagaagaatagcttgaacct +gggaggcacaggttgcagtgagccgagatcgtgccactgcagcctaggcagcagagtgag +actctgtcttaaaatatatattatggtttattttattttatggttaaaggccatatatat +atatatatatatatatataatatatatatatataaaatatatatatataaaatatatata +tatataaaatatatatatataaaatatatatatatatataaaatatatatatataaaata +tatatatataaaatatatatatatataaaatatatatatatataaaatatatatatataa +aatatatatatatataaaatatatatatataaaatatatatatataaaatatatatatat +ataaaatatatatatataaaatatatatatataaaatatatatatatataatatatatat +ataaaatatatatatataaaatatatatataaaatatatatatataaaatatatatatat +aaaatatatatatataaaatatatatatataaaatatatatataaaatatatatatataa +aatatatatatataaaatatatatatataaaatatatatatggaattccatatctatcaa +tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag +atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa +>ref_chr20:35579913-35580987 +aaaactccatctc-aaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga +ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct +gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca +ggcagactgctcaaactcaggagttcgagaccagcctgggcaacatggcaagacaccatc +tgtaccaaaaatacaaaaaaatagccgggcatagtggtgcgcacctgtggtcccagctac +tcaggaggttgaggtgggaggatcacttgagcccagggggcggaggttacagtgagccaa +gatttgcactccagccttggtgacagagcaagagaccctgtctaaaatatatatgtgtat +atacacacacacacacacaaatatattttttggctgggcatggtggctgacacctgtaat +cccagcactttgggaggccaaggcaggcagatcacttgaggtcaggagtttgagaccagc +ctggccaagatgatgaaatcctgtctctactaaaaatacaaaaaaattagctgggtgtgg +taacgggtggctgtaatcccagctacttgggaagctgaggcagaagaatagcttgaacct +gggaggcacaggttgcagtgagccgagatcgcgccactgcagcctaggcagcagagtgag +actctgtcttaaaatatatattatggtttattttattttatggttaaaggccatatatat +at-------------------------atatataaaatatatatatataaa------ata +tatataaaatatatatatataaa------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------atatatatataatatatatat +ataa---------------aatatatatatataatatatatatataaaatatatatatat +aaaatatatatatatataaaat-------------------------------------- +----------------atatatatatataaaatatatatatggaattccatatctatcaa +tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag +atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa new file mode 100644 index 00000000..38013a4c --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa @@ -0,0 +1,65 @@ +>syndip_1_chr20:62830512-62830939 +cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag +gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta +cccatgtaccacagtcccccaccccccaccgtgctccacc-------------------- +----------------------ccccaccacagtcccccaccccccaccccagtccccaa +acccctaccacagtcccccaa-ccccaccacagtcccccaccccctaccacagtccccca +ccccctaccacagtcccccaacccccaccacagtcccccacccccaccacagtccgccaa +cccccatgacagtccctca--------------------acccccaccacagtcccccaa +cccccatgacagtcccccaacccccaccacagtcccccaacccccaccacagtccccacc +ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatccccca-c +ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa +tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt +ggcgcctttggcccctctgaccttcca +>syndip_2_chr20:62830512-62830939 +cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag +gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta +cccatgtaccacagtcccccaccccccaccacagtcccccaccccccaccccagtcccca +aacccctaccacagtcccccaaccccaccacagtcccccaccccctaccacagtccccca +acccctaccacagtcccccaacccccaccacagtcccccaccccc-accacagtccgcca +acccccatgacagtcccccaacccccaccacagtcccccacccccaccacagtccgccaa +cccccatgacagtcccccaacccccaccacagtcccccaacccccatgacagtcccccaa +cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc +ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc +ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa +tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt +ggcgcctttggcccctctgaccttcca +>p:HG002_1_chr20:62830512-62830939 +cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag +gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta +cccatgtaccacagtcccccaccccccaccacagtcccccaccccccaccccagtcccca +aacccctaccacagtcccccaaccccaccacagtcccccaccccctaccacagtccccca +acccctaccacagtcccccaacccccaccacagtcccccaccccc-accacagtccgcca +acccccatgacagtcccccaacccccaccacagtcccccacccccaccacagtccgccaa +cccccatgacagtcccccaacccccaccacagtcccccaacccccatgacagtcccccaa +cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc +ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc +ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa +tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt +ggcgcctttggcccctctgaccttcca +>p:HG002_2_chr20:62830512-62830939 +cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag +gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta +cccatgtaccacagtcccccaccccccaccgtgctccacc-------------------- +----------------------ccccaccacagtcccccaccccccaccccagtccccaa +acccctaccacagtcccccaa-ccccaccacagtcccccaccccctaccacagtccccca +ccccctaccacagtcccccaacccccaccacagtcccccacccccaccacagtccgccaa +cccccatgacagtccctca--------------------acccccaccacagtcccccaa +cccccatgacagtcccccaacccccaccacagtcccccaacccccaccacagtccccacc +ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatccccca-c +ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa +tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt +ggcgcctttggcccctctgaccttcca +>ref_chr20:62830512-62830939 +cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag +gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta +cccatgtaccacagtccccc---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------acccccatgacagtcccccaa +cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc +ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc +ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa +tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt +ggcgcctttggcccctctgaccttcca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa new file mode 100644 index 00000000..ecf894da --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa @@ -0,0 +1,130 @@ +>syndip_1_chr20:48449605-48450562 +catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg +tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa +taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg +gatggatggggtgggatgggatgggatg-----ggatggatggatgggatggatggatgg +gatgggataggatggatggggtgagatgggatggaatggatgggatgggatggatggatg +ggatgggatggatgggatgggatggatgggatgggatggatgggatggatgggatggaat +gggatggatgggatgggatgggattggatggggtgggatgggatggatggatgggatgga +tggatgggatggaatggatggatgggatgggatggatgggatgggatggatggatgggat +gggatggatgcaatggatggatgggatgggatgggatgggatggatggaatggatggatg +ggatgggatgggatggatggggtgggatgagatggatgggatggatgggatgggatggat +ggat-----------------------ggatgggatgggatggatggatgggatgggatg +ggatggggtggatgggatggatggatggatgggatgggatggatggatgggatggaatgg +atgaatgggatgtgatggatgggatggaatgggatagatgggatgggattggatgggatg +gggtgggatggggtaagatggggtgggatgggatggtatgggatggatggatgagatggg +atgggatgagatgggattggatggggtgggatgcaatgggatgagatgagatgagatgag +atgggatggatggcatgggatgggatgggattggatggggtgggatgggatggggtagga +tgggatgggatgggatgggatgggatgggatgggatgggatgggatggggtgggatgggt +tgggatgggatggggtgggttgggatggggtggggtgggatgggatgggatgggatgggg +tggggtggggtggggtgggatggggtgggatggggtgggatgggatggggtgggatggga +tgggatggggtgggatgggatggggtggggtggggtggggtgggatgaggtggggtggga +tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga +ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg +ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa +atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg +gacaaagcacttaaaaccacacagt +>syndip_2_chr20:48449605-48450562 +catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg +tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa +taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg +gatggatggggtgggatgggatgggatg-------------ggatggggtggatggatgg +gatgggataggatggatggggtgagatgggatggaatggatggaatgggatggatggatg +ggatgggatggatgggatgggatggatgggatggatgggatgggatggatgggatggaat +gggatggatgggatgggatgggattggatggggtgggatgggatggatggatgggatgga +tggatgggatggaatggatggatgggatgggatggatgggatgggatggatggatgggat +gggatggatgcaatggatggatgggatgggatgggatgggatggatggaatggatggatg +ggatgggatgggatggatggggtgggatgagatggatgggatggatgggatgggatggat +ggatggatgggatgggatggatggatgggatgggatgggatggatggatgggatgggatg +ggatggggtggatgggatggatggatggatgggatgggatggatggatgggatggaatgg +atgaatgggatgtgatggatgggatggaatgggatagatgggatgggattggatgggatg +gggtgggatggggtaagatggggtgggatgggatggtatgggatggatggatgagatggg +atgggatgagatgggattggatggggtgggatgcaatgggatgagatgagatgagatgag +atgggatggatggcatgggatggaatgggattggatggggtgggatgggatggggtagga +tgggatgggatgggatgggatgggatgggatggggtgggatgggttgggatgggatgggg +tgggttgggatgggg----------tggggtgggatgggatgggatggggtgggatgggg +tgggatggggtgggatggggtgggatgggatggggtgggatgggatgggatgggatgggg +tgggatggggtgggatgggatggggtggggtggggtggggtgggatgaggtggggtggga +tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga +ttggatggggtgggatgcgatgggatgagatgagatgagatgaatggcatgggatgggtg +ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa +atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg +gacaaagcacttaaaaccacacagt +>p:HG002_1_chr20:48449605-48450562 +catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg +tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa +taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg +gatggatggggtgggatgggatgggatg-------------ggatggggtggatggatgg +gatgggataggatggatggggtgagatgggatggaatggatggaatgggatggatggatg +ggatgggatggatgggatgggatggatgggatggatgggatgggatggatgggatggaat +gggatggatgggatgggatgggattggatggggtgggatgggatggatggatgggatgga +tggatgggatggaatggatggatgggatgggatggatgggatgggatggatggatgggat +gggatggatgcaatggatggatgggatgggatgggatgggatggatggaatggatggatg +ggatgggatgggatggatggggtgggatgagatggatgggatggatgggatgggatggat +ggatggatgggatgggatggatggatgggatgggatgggatggatggatgggatgggatg +ggatggggtggatgggatggatggatggatgggatgggatggatggatgggatggaatgg +atgaatgggatgtgatggatgggatggaatgggatagatgggatgggattggatgggatg +gggtgggatggggtaagatggggtgggatgggatggtatgggatggatggatgagatggg +atgggatgagatgggattggatggggtgggatgcaatgggatgagatgagatgagatgag +atgggatggatggcatgggatggaatgggattggatggggtgggatgggatggggtagga +tgggatgggatgggatgggatgggatgggatggggtgggatgggttgggatgggatgggg +tgggttgggatgggg----------tggggtgggatgggatgggatggggtgggatgggg +tgggatggggtgggatggggtgggatgggatggggtgggatgggatgggatgggatgggg +tgggatggggtgggatgggatggggtggggtggggtggggtgggatgaggtggggtggga +tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga +ttggatggggtgggatgcgatgggatgagatgagatgagatgaatggcatgggatgggtg +ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa +atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg +gacaaagcacttaaaaccacacagt +>p:HG002_2_chr20:48449605-48450562 +catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg +tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa +taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg +gatggatggggtgggatgggatgggatg-----ggatggatggatgggatggatggatgg +gatgggataggatggatggggtgagatgggatggaatggatgggatgggatggatggatg +ggatgggatggatgggatgggatggatgggatgggatggatgggatggatgggatggaat +gggatggatgggatgggatgggattggatggggtgggatgggatggatggatgggatgga +tggatgggatggaatggatggatgggatgggatggatgggatgggatggatggatgggat +gggatggatgcaatggatggatgggatgggatgggatgggatggatggaatggatggatg +ggatgggatgggatggatggggtgggatgagatggatgggatggatgggatgggatggat +ggat-----------------------ggatgggatgggatggatggatgggatgggatg +ggatggggtggatgggatggatggatggatgggatgggatggatggatgggatggaatgg +atgaatgggatgtgatggatgggatggaatgggatagatgggatgggattggatgggatg +gggtgggatggggtaagatggggtgggatgggatggtatgggatggatggatgagatggg +atgggatgagatgggattggatggggtgggatgcaatgggatgagatgagatgagatgag +atgggatggatggcatgggatgggatgggattggatggggtgggatgggatggggtagga +tgggatgggatgggatgggatgggatgggatgggatgggatgggatggggtgggatgggt +tgggatgggatggggtgggttgggatggggtggggtgggatgggatgggatgggatgggg +tggggtggggtggggtgggatggggtgggatggggtgggatgggatggggtgggatggga +tgggatggggtgggatgggatggggtggggtggggtggggtgggatgaggtggggtggga +tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga +ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg +ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa +atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg +gacaaagcacttaaaaccacacagt +>ref_chr20:48449605-48450562 +catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg +tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa +taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg +gatggatggggtgggatgggatgggatgggatgggatggatggatgggatggatggatgg +gatgggataggatggatggggtgagatgggatggaatggatgggatgggatggatggatg +ggatgggatggatgggatgggatggatgggatgggatggatgggatggatgggatggaat +gggatggatgggatgggatgggattggatggggtgggatgggatggatggatgggatgga +tggatgggatggaatggatggatgggatgggatggatgggatgggatggatggatgggat +gggatggatgcaatggatggatgggatgggatgggatgggatggatggaatggatggatg +ggatgggatgggatggatggggtgggatgagatggatgggatggatgggatgggatggat +ggat-----------------------ggatgggatgggatggatggatgggatgggatg +ggatggggtggatgggatggatggatggatgggatgggatggatggatgggatgggat-- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------------gga +tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga +ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg +ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa +atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg +gacaaagcacttaaaaccacacagt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_2ad6d5e80b30a1cebe34e993b5700918.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2ad6d5e80b30a1cebe34e993b5700918.msa new file mode 100644 index 00000000..926221e5 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2ad6d5e80b30a1cebe34e993b5700918.msa @@ -0,0 +1,55 @@ +>syndip_1_chr20:24681888-24682378 +ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag +ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg +cagcaggaggcaaactgtgaagtacctctctggc-------------------------- +------------------------------------------------------------ +------------------------------ctggcctttgtgaattaccacagtgcgggg +tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt +tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta +tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc +agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga +ggaggagac +>syndip_2_chr20:24681888-24682378 +ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag +ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg +cagcaggaggcaaactgtgaagtacctctctggc-------------------------- +--------------------------------ctggcctttgtgaattaccacagtcagg +ggtccacccaggaaactgtgaagtacctctgtggcctttgtgaattaccacagtcagggg +tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt +tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta +tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc +agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga +ggaggagac +>p:HG002_1_chr20:24681888-24682378 +ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag +ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg +cagcaggaggcaaactgtgaagtacctctctggcctggcctttgtgaattaccacagtca +ggggtctacccaggaaactgtgaagtacctctctggcctttgtgaattaccacagtcagg +ggtccacccaggaaactgtgaagtacctctctggcctttgtgaattaccacagtcagggg +tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt +tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta +tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc +agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga +ggaggagac +>p:HG002_2_chr20:24681888-24682378 +ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag +ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg +cagcaggaggcaaactgtgaagtacctctctggc-------------------------- +------------------------------------------------------------ +------------------------------ctggcctttgtgaattaccacagtgcgggg +tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt +tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta +tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc +agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctctga +ggaggagac +>ref_chr20:24681888-24682378 +ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag +ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg +cagcaggaggcaaactgtgaagtacctctctggc-------------------------- +--------------------------------ctggcctttgtgaattaccacagtgcgg +ggtccacccaggaaactgtgaagtacctctgtggcctttgtgaattaccacagtcagggg +tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt +tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta +tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc +agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga +ggaggagac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa new file mode 100644 index 00000000..8dbb59b0 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa @@ -0,0 +1,140 @@ +>syndip_1_chr20:62349512-62350033 +gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac +aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa +gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg +atgatggtgatggtgggggtgatgatggtgatggtgatggtgatggtgggggtggtgatg +gtgggggtgggggtggtgatggtgggggtgggggtgatgatggtgatggtgggggtgatg +acggtggggctgggggtgatgatggtgggggtgatgacggtgatgatgggggtggagggg +tgatgacggtgatgatgggggtgggggggtgatgacggtgatgtgggggtgatgacggtg +ggggtgggggtgatgacggtgat---------------------ggtgggggtgatgacg +gtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatg +acggtgggggtgggggtgatgatggtgatggtgggggtgatgatggtgggggtgggggtg +atgatggtgagggtgatgatggtgggggtaatgacggtggggtgggggtgatgacggtgg +gggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgatgacgg +tgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtga +tgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggggg +tgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatggtag +aggtgatgatggtgggggtgatggtgggggtgggggtggg---------ggtgggggtga +tgatggtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtggggg +tggg-------------------------------------------------------- +ggtgggggtgggggtgatgacggtgat--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------ggtgggggtgatgatggtgggagtgggctggtagtg +ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg +gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc +>syndip_2_chr20:62349512-62350033 +gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac +aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa +gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg +atgatggtgatggtgggggtgatgatggtgatggtga---------------tggtgatg +gtgatggtgggggtggtgatggtgggggtgggggtgatgatggtgatggtgggggtgatg +acggtggggctgggggtgatgatggtgggggtgatgacggtgatgatgggggtggagggg +tgatgacggtgatgatgggggtgggggggtgatgacggtgatgtgggggtgatgacggtg +ggggtgggggtgatgacggtgatggtgggggtgatgacggtgggggtgggggtgatgacg +gtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatg +acggtgggggtgggggtgatgatggtgatggtgggggtgatgatggtgggggtgggggtg +atgatggtgagggtgatgatggtgggggtaatgacggtggggtgggggtgatgacggtgg +gggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgatgacgg +tgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtga +tgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggggg +tgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatggtag +aggtgatgatggtgggggtgggggtgggggtgggggtgagggtgatgatggtgggggtga +tgatggtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtggggg +tgggggtgggggtaatgacggtggggtgggggtaatgacggtgggggtgggggtgatgac +ggtgggggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgat +gacggtgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggg +ggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggt +gggggtgatgacggtgatggtagaggtgatgatggtgggggtgatggtgggggtgggggt +gggggtgggggtgatgatggtgatggtgggggtgatgatggtgagggtgatgatggtggg +ggtgatggtgggggtgggggtgggggtgggggtgatgatggtgggagtgggctggtagtg +ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg +gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc +>p:HG002_1_chr20:62349512-62350033 +gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac +aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa +gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg +atgatggtgatggtgggggtgatgatggtgatggtga---------------tggtgatg +gtgatggtgggggtggtgatggtgggggtgggggtgatgatggtgatggtgggggtgatg +acggtggggctgggggtgatgatggtgggggtgatgacggtgatgatgggggtggagggg +tgatgacggtgatgatgggggtgggggggtgatgacggtgatgtgggggtgatgacggtg +ggggtgggggtgatgacggtgatggtgggggtgatgacggtgggggtgggggtgatgacg +gtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatg +acggtgggggtgggggtgatgatggtgatggtgggggtgatgatggtgggggtgggggtg +atgatggtgagggtgatgatggtgggggtaatgacggtggggtgggggtgatgacggtgg +gggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgatgacgg +tgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtga +tgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggggg +tgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatggtag +aggtgatgatggtgggggtgggggtgggggtgggggtgagggtgatgatggtgggggtga +tgatggtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtggggg +tgggggtgggggtaatgacggtggggtgggggtaatgacggtgggggtgggggtgatgac +ggtgggggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgat +gacggtgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggg +ggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggt +gggggtgatgacggtgatggtagaggtgatgatggtgggggtgatggtgggggtgggggt +gggggtgggggtgatgatggtgatggtgggggtgatgatggtgagggtgatgatggtggg +ggtgatggtgggggtgggggtgggggtgggggtgatgatggtgggagtgggctggtagtg +ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg +gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc +>p:HG002_2_chr20:62349512-62350033 +gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac +aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa +gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg +atgatggtgatggtgggggtgatgatggtgatggtgatggtgatggtgggggtggtgatg +gtgggggtgggggtggtgatggtgggggtgggggtgatgatggtgatggtgggggtgatg +acggtggggctgggggtgatgatggtgggggtgatgacggtgatgatgggggtggagggg +tgatgacggtgatgatgggggtgggggggtgatgacggtgatgtgggggtgatgacggtg +ggggtgggggtgatgacggtgat---------------------ggtgggggtgatgacg +gtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatg +acggtgggggtgggggtgatgatggtgatggtgggggtgatgatggtgggggtgggggtg +atgatggtgagggtgatgatggtgggggtaatgacggtggggtgggggtgatgacggtgg +gggtgggggtgatgacggtgatggtgggggtgatgacgatgatggtgggggtgatgacgg +tgatggcggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtga +tgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgggggtggggg +tgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatggtag +aggtgatgatggtgggggtgatggtgggggtgggggtggg---------ggtgggggtga +tgatggtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtggggg +tggg-------------------------------------------------------- +ggtgggggtgggggtgatgacggtgat--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------ggtgggggtgatgatggtgggagtgggctggtagtg +ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg +gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc +>ref_chr20:62349512-62350033 +gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac +aattccggatagactgtcatggaatagaaaaactacagggtgcatttcag---------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------gaaggaggaag +cagcaacccaggggatggggatggggatggggatggtgggggtgatgggggtgatgatgg +tgatggtgatggtgatggtgatggtgatggtgatggtggg---------ggtggtgatg- +----------ggtgggggtgatgacggtgggagtgggggtgatgacggtgatggtggggg +tgat-----------------------------------------------------gac +ggtgatggtgggggtgatgacggtgat--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------ggtgggggtgatgatggtgggagtgggctggtagtg +ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg +gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa new file mode 100644 index 00000000..5365dadd --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa @@ -0,0 +1,110 @@ +>syndip_1_chr20:24407883-24409098 +ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag +ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg +ttggagccacaggaggtgatatagggtgcttccggccctggccatagggtacttctggga +ggtgatatagggtatttccagccttggccatagggtacttctgggaggtgatatagggta +tttccagccttggccatagggtacttctgggaggtgatatagggtacttccagccttggc +catagggtacttctgggaggtgatatagggtacttccagccctggccatagggtacttct +gggaggagatatagggtatttccagccttggccatagggtacttctgggaggtgatatag +ggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttccagcct +tggccatagggtacttctgggaggtgatatagggtatttccagccctggccatagggtac +ttctaggaggtgatatagggtacttccagccctggccatagggtacttctgggaggtgat +ataggatacttccagccctggccatagggtacttctgggaggtgatataggatacttcca +g----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------cctggccatagggtacttctgg +gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga +tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg +gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt +cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta +ctaaaagcacaaaaat +>syndip_2_chr20:24407883-24409098 +ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag +ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg +ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga +ggtgatatagggtatttccagccttggccatagggtacttctgggaggtgatatagggta +tttccagccttggccatagggtacttctgggaggtgatatagggtacttccagccttggc +catagggtacttctgggaggtgatatagggtacttccagccctggccatagggtacttct +gggaggagatatagggtatttccagccttggccatagggtacttctgggaggtgatatag +ggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttccagcct +tggccatagggtacttctgggaggtgatatagggtacttccagccttggccatagggtac +ttctgggaggtgatatagggtacttccagccctggccatagggtacttctgggaggagat +atagggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttcca +gccttggccatagggtacttctgggaggtgatatagggtatttccagccttggccatagg +gtacttctgggaggtgatatagggtatttccagccttggccatagggtacttctaggagg +tgatatagggtacttccagccctggccatagggtacttctgggaggtgatataggatact +tccagccctgg-----------------------------------------------ca +tagggtacttctgggaggtgatataggatacttccagccctggccatagggtacttctgg +gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga +tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg +gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt +cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta +ctaaaagcacaaaaat +>p:HG002_1_chr20:24407883-24409098 +ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag +ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg +ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga +ggtgatatagggtatttccagccttggccatagggtacttctgggaggtgatatagggta +tttccagccttggccatagggtacttctgggaggtgatatagggtacttccagccttggc +catagggtacttctgggaggtgatatagggtacttccagccctggccatagggtacttct +gggaggagatatagggtatttccagccttggccatagggtacttctgggaggtgatatag +ggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttccagcct +tggccatagggtacttctgggaggtgatatagggtacttccagccttggccatagggtac +ttctgggaggtgatatagggtacttccagccctggccatagggtacttctgggaggagat +atagggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttcca +gccttggccatagggtacttctgggaggtgatatagggtatttccagccttggccatagg +gtacttctgggaggtgatatagggtatttccagccttggccatagggtacttctaggagg +tgatatagggtacttccagccctggccatagggtacttctgggaggtgatataggatact +tccagccctgg-----------------------------------------------ca +tagggtacttctgggaggtgatataggatacttccagccctggccatagggtacttctgg +gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga +tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg +gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt +cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta +ctaaaagcacaaaaat +>p:HG002_2_chr20:24407883-24409098 +ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag +ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg +ttggagccacaggaggtgatatagggtgcttccggccctggccatagggtacttctggga +ggtgatatagggtatttccagccttggccatagggtacttctgggaggtgatatagggta +tttccagccttggccatagggtacttctgggaggtgatatagggtacttccagccttggc +catagggtacttctgggaggtgatatagggtacttccagccctggccatagggtacttct +gggaggagatatagggtatttccagccttggccatagggtacttctgggaggtgatatag +ggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttccagcct +tggccatagggtacttctgggaggtgatatagggtatttccagccctggccatagggtac +ttctaggaggtgatatagggtacttccagccctggccatagggtacttctgggaggtgat +ataggatacttccagccctggccatagggtacttctgggaggtgatataggatacttcca +g----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------cctggccatagggtacttctgg +gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga +tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg +gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt +cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta +ctaaaagcacaaaaat +>ref_chr20:24407883-24409098 +ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag +ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg +ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga +ggtgatatagggtatttccagccttggccatagggtacttctgggaggtgatatagggta +cttccagccttggccatagggtacttctgggaggtgatatagggtacttccagccttggc +catagggtacttctgggaggtgatatagggtacttccagccctggccatagggtacttct +gggaggagatatagggtatttccagccttggccatagggtacttctgggaggtgatatag +gttatttccagccttggccatagggtacttctgggaggtgatatagggtatttccagcct +tggccatagggtacttctgggaggtgatatagggtacttccagccctggccatagggtac +ttctgggaggtgatatagggtacttccagccctggccatagggtacttctgggaggagat +atagggtatttccagccttggccatagggtacttctgggaggtgatatagggtatttcca +gccttggccatagggtacttctgggaggtgatatagggtatttccagccttggccatagg +gtacttctgggaggtgatatagggtatttccagccttggccatagggtacttctaggagg +tgatatagggtacttccagccctggccatagggtacttctgggaggtgatataggatact +tccagccctggccatagggtacttctgggaggtgatataggatacttccagccctggcca +tagggtacttctgggaggtgatataggatacttccagccctggccatagggtacttctgg +gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga +tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg +gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt +cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta +ctaaaagcacaaaaat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa new file mode 100644 index 00000000..f0fd442d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa @@ -0,0 +1,295 @@ +>syndip_1_chr20:62057473-62059240 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcc------------------------------------------------- +------------------------------------------------------------ +---------------------------------------cagcaagacctcaggaaggac +ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc +caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat +ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc +aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac +aggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca +ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct +cagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggct +cggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcca +cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct +acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga +cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa +gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac +caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc +tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc +tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc +acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc +ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaag +gacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagca +agacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggg +gccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagaca +atggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggcctacacac +caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacct +caggaaggacggacacaggtctacacaccaggccacagacagtggggctcggggggccaa +cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg +gctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc +cacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacagg +cctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagga +aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +ggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg +gacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga +cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc +aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg +gggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccag +gccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacag +gtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagg +aaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc +cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc +tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag +ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg +caggcctggagaagagcctgggaccaccgcggactgacc +>syndip_2_chr20:62057473-62059240 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------------cagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc +caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat +ggggctc----------------------------------------------------- +------------------------------------------------------------ +-----------------------------------gggggccaacctcagcaagacctca +ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct +cagcaagacctcaggaagga---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +cggacacaggtctacacaccaggccacagacaatggggctc------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------gggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc +tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc +tc---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------gggggccaacctctccagtccccaaacacagtc +cccaacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccc +tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag +ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg +caggcctggagaagagcctgggaccaccgcggactgacc +>p:HG002_1_chr20:62057473-62059240 +tccaagatccatgtaccccaacaccaccctacctggccacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------------cagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc +caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat +ggggctc----------------------------------------------------- +------------------------------------------------------------ +-----------------------------------gggggccaacctcagcaagacctca +ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct +cagcaagacctcaggaagga---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +cggacacaggtctacacaccaggccacagacaatggggctc------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------gggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc +tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc +tc---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------gggggccaacctctccagtccccaaacacagtc +cccaacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccc +tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag +ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg +caggcctggagaagagcctgggaccaccgcggactgacc +>p:HG002_2_chr20:62057473-62059240 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcc------------------------------------------------- +------------------------------------------------------------ +---------------------------------------cagcaagacctcaggaaggac +ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc +caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat +ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc +aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac +aggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca +ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct +cagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggct +cggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcca +cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct +acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga +cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa +gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac +caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc +tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc +tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc +acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc +ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaag +gacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagca +agacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggg +gccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagaca +atggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggcctacacac +caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacct +caggaaggacggacacaggtctacacaccaggccacagacagtggggctcggggggccaa +cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg +gctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc +cacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacagg +cctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagga +aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +ggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg +gacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga +cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc +aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg +gggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccag +gccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacag +gtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagg +aaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc +cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc +tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag +ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg +caggcctggagaagagcctgggaccaccgcggactgacc +>ref_chr20:62057473-62059240 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggcctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctac +acaccaggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggac +ggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc +caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat +ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc +aggccacagacaatggggctc--------------------------------------- +-----------------------------------gggggccaacctcagcaagacctca +ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct +cagcaagacctcaggaagga---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa +gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac +caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc +tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc +tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc +acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc +ctacacaccaggccacagacaatggggctc------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------gggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc +cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc +tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag +ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg +caggcctggagaagagcctgggaccaccgcggactgacc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa new file mode 100644 index 00000000..96858d9d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa @@ -0,0 +1,60 @@ +>syndip_1_chr20:49834009-49834659 +gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg +tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta +atattagcttgtctctctctctctctctctctctctctctctctctctctctctc----- +-------tatatatatatatatatacacacacacacacactatgtatatacacacactgt +atatacgcactatatgcactatatatacacacagtatatgtatatacacacactatatat +acagtgtgtatatatatatatactgtgtgtatatatatatatactgtata---------- +--------------------tatatatatatatactgtgtatatatatatactg------ +---------------------------------------tgtatatatatatatatactg +tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg +aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag +>syndip_2_chr20:49834009-49834659 +gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg +tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta +atattagcttgtctctctctctctctctctctctctctctctctctctctctctctctct +ctctatatatatatatatatatatatatacacacacacactatgtatatacacacactgt +atatacgcactatatgcactatatatacacacagtatatgtatatacacacactatatat +acagtgtgtatatatatatatactgtgtatatatatatatactgtatatatatatatact +gtgtatatatatatatactgtatatatatatatactgtgtatatatatatactgtatata +tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg +tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg +aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag +>p:HG002_1_chr20:49834009-49834659 +gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg +tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta +atattagcttgtctctctctctctctctctctctctctctctctctctctctctctctct +ctctatatatatatatatatatatatatacacacacacactatgtatatacacacactgt +atatacgcactatatgcactatatatacacacagtatatgtatatacacacactatatat +acagtgtgtatatatatatatactgtgtatatatatatatactgtatatatatatatact +gtgtatatatatatatactgtatatatatatatactgtgtatatatatatactgtatata +tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg +tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg +aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag +>p:HG002_2_chr20:49834009-49834659 +gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg +tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta +atattagcttgtctctctctctctctctctctctctctctctctctctctctctc----- +-------tatatatatatatatatacacacacacacacactatgtatatacacacactgt +atatacgcactatatgcactatatatacacacagtatatgtatatacacacactatatat +acagtgtgtatatatatatatactgtgtgtatatatatatatactgtata---------- +--------------------tatatatatatatactgtgtatatatatatactg------ +---------------------------------------tgtatatatatatatatactg +tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg +aaatacattcattttctctgttgggtagattgatttgacctaactttgagcctcaatctc +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag +>ref_chr20:49834009-49834659 +gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg +tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta +atattagcttg--tctctctctctctctctctctctctctctctctctctctctctctct +atatatatatatatatatatatatatatacacacacacactatgtatatacacacactgt +atatacgcactatatgcactatatatacacacagtatatgtatatacacacactatatat +acagtgtgtatatatatatatactgtgtatatatatatatactgtatatatatatatact +gtgtatatatatatatactgtgtatatatatatactgtgtatatatatatactgtatata +tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg +tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg +aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa new file mode 100644 index 00000000..f270ff15 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa @@ -0,0 +1,100 @@ +>syndip_1_chr20:57090572-57091512 +agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt +aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg +ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc +accaccacacactccaccatgcacacacacctccacgcacacacacctccacccgcacac +acacacaccttcaccaccacacactccacccacacacacctccacccacacacctcca-- +--------------cccacacacacctccgcccacacacacctccgcccacacacacctc +cacccacacacacctccacccacacacctccgcccacacacacctccgccctcacacacc +tccacccacacacacctccacccacacacgcctccacccgcacacacctccaccaccaca +cacacctccacccacacacaccacccatgcacacacctccacccacacacacacacctcc +acccacacacacctccacccacacgccacccatgcacacacctccacccacacacacaca +caccacccacccacacacacctccacccgcacacacctccaccaccacacacacctccac +ccacacacaccacccatgcacacacctccacccacacacacacacacctccacccacaca +cacctccacccacacacacctccacccacacacacacacctccacccacccacacacacc +tccacccgcacacacctccaccaccacacacatctccacccacacacaccacccatgtgc +acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc +tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc +tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg +attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct +ctttacctcaacaaggag +>syndip_2_chr20:57090572-57091512 +agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt +aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg +ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc +accaccacacactccgccatgcacacacacctccacgcacacacacctccacccgcacac +acacacaccttcaccaccacacactccacccacacacacctccacccacacacctccacc +cacacacacctccgcccacacacacctccgcccacacacacctccgcccacacacacctc +cacccacacacacctccgcccacacacctccgcccacacacacctccgccctcacacacc +tccacccacacacacctccacccacacacgcctccacccgcacacacctccaccaccaca +cacacctccacccacacacaccacccatgcacacacctccacccacacacacacacctcc +acccacacaca-----------------------cacacacctccacccacacaca---- +--------------------------------------------------------ccac +ccacacacaccacccatgcacacacctccacccaca------------------------ +--------------------------------------------------cacacacacc +tccacccgcacacacctccaccaccacacacatctccacccacacacaccacccatgtgc +acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc +tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc +tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg +attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct +ctttacctcaacaaggag +>p:HG002_1_chr20:57090572-57091512 +agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt +aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg +ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc +accaccacacactccgccatgcacacacacctccacgcacacacacctccacccgcacac +acacacaccttcaccaccacacactccacccacacacacctccacccacacacctccacc +cacacacacctccgcccacacacacctccgcccacacacacctccgcccacacacacctc +cacccacacacacctccgcccacacacctccgcccacacacacctccgccctcacacacc +tccacccacacacacctccacccacacacgcctccacccgcacacacctccaccaccaca +cacacctccacccacacacaccacccatgcacacacctccacccacacacacacacctcc +acccacacaca-----------------------cacacacctccacccacacaca---- +--------------------------------------------------------ccac +ccacacacaccacccatgcacacacctccacccaca------------------------ +--------------------------------------------------cacacacacc +tccacccgcacacacctccaccaccacacacatctccacccacacacaccacccatgtgc +acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc +tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc +tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg +attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct +ctttacctcaacaaggag +>p:HG002_2_chr20:57090572-57091512 +agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt +aagccacgaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg +ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc +accaccacacactccaccatgcacacacacctccacgcacacacacctccacccgcacac +acacacaccttcaccaccacacactccacccacacacacctccacccacacacctcca-- +--------------cccacacacacctccgcccacacacacctccgcccacacacacctc +cacccacacacacctccacccacacacctccgcccacacacacctccgccctcacacacc +tccacccacacacacctccacccacacacgcctccacccgcacacacctccaccaccaca +cacacctccacccacacacaccacccatgcacacacctccacccacacacacacacctcc +acccacacacacctccacccacacgccacccatgcacacacctccacccacacacacaca +caccacccacccacacacacctccacccgcacacacctccaccaccacacacacctccac +ccacacacaccacccatgcacacacctccacccacacacacacacacctccacccacaca +cacctccacccacacacacctccacccacacacacacacctccacccacccacacacacc +tccacccgcacacacctccaccaccacacacatctccacccacacacaccacccatgtgc +acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc +tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc +tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg +attactgggtcctgccggtctggccaggcctggtcacaggccgggtcccagtcagcagct +ctttacctcaacaaggag +>ref_chr20:57090572-57091512 +agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt +aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg +ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc +accaccacacactccgccatgcacacacacctccacgcacacacacctccacccgcacac +acacacaccttcaccaccacacactccacccacacacacctccacccacacacctccacc +cacacacacctccgcccacacacacctccgcccacacacacctccgcccacacacacctc +cacccacacacacctccgcccacacacctccgcccacacacacctccgccctcacacacc +tccacccacacacacctccacccacacacgcctccacccgcacacacctccaccaccaca +cacacctccacccacacacaccacccatgcacacacctccacccacacacacacacctcc +acccacacaca-----------------------cacacacctccacccacacaca---- +--------------------------------------------------------ccac +ccacacacaccacccatgcacacacctccacccaca------------------------ +--------------------------------------------------cacacacacc +tccacccgcacacacctccaccaccacacacatctccacccacacacaccacccatgtgc +acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc +tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc +tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg +attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct +ctttacctcaacaaggag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa new file mode 100644 index 00000000..7e9c806f --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa @@ -0,0 +1,55 @@ +>syndip_1_chr20:45600469-45600847 +aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc +atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt +gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt +aaaagcagtctctctctctctctctctctctctctctctctctctctatccccccctccc +tccctccttccctccctccctctctctctctctctccctttctctgtctctccctctctc +tctctctctctccctctctctctctttctccctctctctccctctctctctctctctctc +cctctctctctctttctccctctctctccctctctccccctctctctccctctctccctc +tctccctctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact +tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc +catcataccagctcttcccctggtatattcctaggagtgga +>syndip_2_chr20:45600469-45600847 +aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc +atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt +gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt +aaaagcagtctctctctctctctctctctctctct--ctctctctctatccccccctccc +tccctccttccctccctccctctctctctctctctccctttctctgtctctccctctctc +tctctctctctccctctctct--------------------------------------- +-------------------------------------ccctctctccccctctctctccc +tctctccctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact +tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc +catcataccagctcttcccctggtatattcctaggagtgga +>p:HG002_1_chr20:45600469-45600847 +aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc +atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt +gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt +aaaagcagtctctctctctctctctctctctctct--ctctctctctatccccccctccc +tccctccttccctccctccctctctctctctctctccctttctctgtctctccctctctc +tctctctctctccctctctct--------------------------------------- +-------------------------------------ccctctctccccctctctctccc +tctctccctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact +tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc +catcataccagctcttcccctggtatattcctaggagtgga +>p:HG002_2_chr20:45600469-45600847 +aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc +atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt +gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt +aaaagcagtctctctctctctctctctctctctctctctctctctctatccccccctccc +tccctccttccctccctccctctctctctctctctccctttctctgtctctccctctctc +tctctctctctccctctctctctctttctccctctctctccctctctctctctctctctc +cctctctctctctttctccctctctctccctctctccccctctctctccctctctccctc +tctccctctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact +tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc +catcataccagctcttcccctggtatattcctaggagtgga +>ref_chr20:45600469-45600847 +aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc +atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt +gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt +aaaagcagtctctctctctctctctctctctctct------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------------ctc +tatccccccctctccctctctctctacccctgaatctggcctttctaaacaccctcaact +tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc +catcataccagctcttcccctggtatattcctaggagtgga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa new file mode 100644 index 00000000..c4967b74 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa @@ -0,0 +1,185 @@ +>syndip_1_chr20:18208997-18210828 +gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata +aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta +tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca +cacccacacgaacctagacgattccttccacaccttccccacacccacacgaacctagat +gaattccttccacaccttctccacacccacacgaacctagacgattccttccacaccttc +cccacacccattctccacacccacacgaacctagacgattccttccacacctccacaccc +acacgaacctagatgaattccttccacaccttccccacacccacacaaacctagacgatt +ccttccacaccttctccacacccacacgaacctagacgaattccttccagaccttctcca +cacccacacagacctagacgattccttccacacctccacacccacacgaacctagacgaa +ttccttccacaccttctccacacccacacgaacctagacgaattccttccacaccttctc +cacacccacacgaacctagacgaattccttccacaccttctccacacccacacgaaccta +gacgattccttccacactttccccacacccacacgaacctagacg-attccttccacac- +--ctccacacccacacgaacctagatgaattccttccacaccttccccacacccacacaa +acctagacgattccttccacaccttctccacacccacacaaacctagacaattccttcca +cacacacatgaacctagatgaattccttccacaccttctccacacccacatgaacctaga +cgattccttccacaccttctccacacccacacacacctagatgaattccttccacacctt +ccccacacccacacagacctagacgaattccttccacaccttctccacacccacacgaac +ctagacgattccttccacaccttctccatacccacacgaacctagacaattccttccaca +ccttctcacacccacacaaacctagatgaattccttccacaccttctccacacccacacg +aacctaaacgattccttccacaccttctccacacccacacacacctagatgaattccttc +cacaccttccccacacccacacagacctagacgaattccttccacaccttctccacaccc +acacgaacctagacgattccttccacaccttctccatacccacacgaacctagacaattc +cttccacaccttctcacacccacacaaacctagatgaattccttccacaccttccccaca +cccacacagacctagacgaattccttccacaccttctccacacccacacgaacctagaca +attccttccacaccttctcacacccacacaaacctagacgaattccttccacaccttctc +cacacccacacaaacctagatgaattccttccacaccttcctcacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttccacacc +ttctccacacccacacgaacctagacgattccttccacaccttccccacacccacacgaa +cctagacaattccttccacaccttctcacacccacacaaacctagatgaattccttccac +accttctccacacccacacaaacctagacgaattccttccacaccttcctcacacccaca +cgaacctagacgattccttccacaccttccccacacccacacagacctagacgaattcct +tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac +acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc +ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta +attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg +ctggagtgcag +>syndip_2_chr20:18208997-18210828 +gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata +aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta +tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca +cacccacacgaacctagacgattccttccacaccttccccacacccacacgaacctagat +gaattccttccacaccttctccacacccacacgaacctagacgattccttccacaccttc +cccacacccattctccacacccacacgaacctagacgattccttccacacctccacaccc +acacgaacctagatgaattccttccacaccttccccacacccacacaaacctagacgatt +ccttccacaccttctccacacccacacgaacctagacgaattccttccagaccttctcca +cacccacacagacctagacgattccttccacacctccacacccacacgaacctagacgaa +ttccttccacaccttctccacacccacacgaacctagacgaattccttccacaccttctc +cacacccacacgaacctagacgaattccttccacaccttctccacacccacacgaaccta +gacgattccttccacactttccccacacccacaagaacctagacg-attccttccacac- +--ctccacacccacacgaacctagatgaattccttccacaccttccccacacccacacaa +acctagacgattccttccacaccttctccacacccacacaaacctagacaattccttcca +cacacacatgaacctagatgaattccttccacaccttctccacacccacatgaacctaga +cgattccttccacaccttctccacacccacacacacctagatgaattccttccacacctt +ccccacacccacacagacctagacgaattccttccacacattctccacacccacacgaac +ctagacgattccttccacaccttctccatacccacacgaacctagacaattccttccaca +ccttctcacacccacacaaacctagatgaattccttccacaccttctccacacccacacg +aacctaaacgattccttccacaccttctccacacccacacacacctagatgaattccttc +cacaccttccccacacccacacagacctagacgaattccttccacaccttctccacaccc +acacgaacctagacgattccttccaca------------ccacacgaacctagacaattc +cttccacaccttctcacacccacacaaacctagatgaattccttccacaccttccccaca +cccacacagacctagacgaattccttccacaccttctccacacccacacgaacctagaca +attccttccacaccttctcacacccacacaaacctagacgaattccttccacaccttctc +cacacccacacaaacctagacgaattccttccacaccttcctcacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttccacacc +ttctccacacccacacgaacctagacgattccttccacaccttccccacacccacacgaa +cctagacaattccttccacaccttctcacacccacacaaacctagatgaattccttccac +accttctccacacccacacaaacctagacgaattccttccacaccttcctcacacccaca +cgaacctagacgattccttccacaccttccccacacccacacagacctagacgaattcct +tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac +acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc +ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta +attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg +ctggagtgcag +>p:HG002_1_chr20:18208997-18210828 +gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata +aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta +tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca +cacccacacgaacctagacgattccttccacaccttccccacacccacacgaacctagat +gaattccttccacaccttctccacacccacacgaacctagacgattccttccacaccttc +cccacacccattctccacacccacacgaacctagacgattccttccacacctccacaccc +acacgaacctagatgaattccttccacaccttccccacacccacacaaacctagacgatt +ccttccacaccttctccacacccacacgaacctagacgaattccttccagaccttctcca +cacccacacagacctagacgattccttccacacctccacacccacacgaacctagacgaa +ttccttccacaccttctccacacccacacgaacctagacgaattccttccacaccttctc +cacacccacacgaacctagacgaattccttccacaccttctccacacccacacgaaccta +gacgattccttccacactttccccacacccacaagaacctagacg-attccttccacac- +--ctccacacccacacgaacctagatgaattccttccacaccttccccacacccacacaa +acctagacgattccttccacaccttctccacacccacacaaacctagacaattccttcca +cacacacatgaacctagatgaattccttccacaccttctccacacccacatgaacctaga +cgattccttccacaccttctccacacccacacacacctagatgaattccttccacacctt +ccccacacccacacagacctagacgaattccttccacacattctccacacccacacgaac +ctagacgattccttccacaccttctccatacccacacgaacctagacaattccttccaca +ccttctcacacccacacaaacctagatgaattccttccacaccttctccacacccacacg +aacctaaacgattccttccacaccttctccacacccacacacacctagatgaattccttc +cacaccttccccacacccacacagacctagacgaattccttccacaccttctccacaccc +acacgaacctagacgattccttccaca------------ccacacgaacctagacaattc +cttccacaccttctcacacccacacaaacctagatgaattccttccacaccttccccaca +cccacacagacctagacgaattccttccacaccttctccacacccacacgaacctagaca +attccttccacaccttctcacacccacacaaacctagacgaattccttccacaccttctc +cacacccacacaaacctagacgaattccttccacaccttcctcacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttccacacc +ttctccacacccacacgaacctagacgattccttccacaccttccccacacccacacgaa +cctagacaattccttccacaccttctcacacccacacaaacctagatgaattccttccac +accttctccacacccacacaaacctagacgaattccttccacaccttcctcacacccaca +cgaacctagacgattccttccacaccttccccacacccacacagacctagacgaattcct +tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac +acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc +ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta +attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg +ctggagttcag +>p:HG002_2_chr20:18208997-18210828 +gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata +aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta +tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca +cacccacacgaacctagacgattccttccacaccttccccacacccacacgaacctagat +gaattccttccacaccttctccacacccacacgaacctagacgattccttccacaccttc +cccacacccattctccacacccacacgaacctagacgattccttccacacctccacaccc +acacgaacctagatgaattccttccacaccttccccacacccacacaaacctagacgatt +ccttccacaccttctccacacccacacgaacctagacgaattccttccagaccttctcca +cacccacacagacctagacgattccttccacacctccacacccacacgaacctagacgaa +ttccttccacaccttctccacacccacacgaacctagacgaattccttccacaccttctc +cacacccacacgaacctagacgaattccttccacaccttctccacacccacacgaaccta +gacgattccttccacactttccccacacccacacgaacctagacg-attccttccacac- +--ctccacacccacacgaacctagatgaattccttccacaccttccccacacccacacaa +acctagacgattccttccacaccttctccacacccacacaaacctagacaattccttcca +cacacacatgaacctagatgaattccttccacaccttctccacacccacatgaacctaga +cgattccttccacaccttctccacacccacacacacctagatgaattccttccacacctt +ccccacacccacacagacctagacgaattccttccacaccttctccacacccacacgaac +ctagacgattccttccacaccttctccatacccacacgaacctagacaattccttccaca +ccttctcacacccacacaaacctagatgaattccttccacaccttctccacacccacacg +aacctaaacgattccttccacaccttctccacacccacacacacctagatgaattccttc +cacaccttccccacacccacacagacctagacgaattccttccacaccttctccacaccc +acacgaacctagacgattccttccacaccttctccatacccacacgaacctagacaattc +cttccacaccttctcacacccacacaaacctagatgaattccttccacaccttccccaca +cccacacagacctagacgaattccttccacaccttctccacacccacacgaacctagaca +attccttccacaccttctcacacccacacaaacctagacgaattccttccacaccttctc +cacacccacacaaacctagatgaattccttccacaccttcctcacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttccacacc +ttctccacacccacacgaacctagacgattccttccacaccttccccacacccacacgaa +cctagacaattccttccacaccttctcacacccacacaaacctagatgaattccttccac +accttctccacacccacacaaacctagacgaattccttccacaccttcctcacacccaca +cgaacctagacgattccttccacaccttccccacacccacacagacctagacgaattcct +tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac +acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc +ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta +attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg +ctggagtgcag +>ref_chr20:18208997-18210828 +gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata +aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta +tccttccgtagcttctccacacct------------------------------------ +-----acacgaacctagacgattccttccacaccttccccacacccacacgaacctagat +gaattccttccacaccttctccacacccacacgaacctagacgattccttccacaccttc +cccacacccattctccacacccacacgaacctagacgattccttccacacctccacaccc +acacgaacctagatgaattccttccacaccttccccacacccacacaaacctagacgatt +ccttccacaccttctccacacccacacgaacctagacgaattccttccagaccttctcca +cacccacacagacctagacgattccttccacacctccacacccacacgaacctagacgaa +ttccttccacaccttgtgcacacccacacgaacctagacgaattccttccacaccttctc +cacacccacacgaacctagacgaattccttccacaccttctccacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttgcacacc +ttctccacacccacacgaacctagacgaattccttccacaccttctccatacccacacga +acctagacaattccttccacaccttct-------------------------------ca +cacccacacaaacctagatgaattccttccacaccttctccacacccacacgaacctaaa +cgattccttccacaccttctccacacccacacacacctagatgaattccttccacacctt +ccccacacccacacagacctagacgaattccttccacaccttctccacacccacacgaac +ctagacgattccttccacaccttctccatacccacacgaacctagacaattccttccaca +ccttctcacacccacacaa----------------------------------------- +------------------------------------------acctagatgaattccttc +cacaccttccccacacccacacagacctagacgaattccttccacaccttctccacaccc +acacgaacctagacg--------------------------------------------- +------------------------------------------------------------ +-------------------attccttccacaccttccccacacccacacgaacctagaca +attccttgcacaccttctcacacccacacaaacctagacgaattccttccacaccttctc +cacacccacacaaacctagacgaattccttccacaccttcctcacacccacacgaaccta +gacgattccttccacaccttccccacacccacacagacctagacgaattccttccacacc +ttctccacacccacacgaacctagacgattccttccacaccttccccacacccacacgaa +cctagacaattccttccacaccttctcacacccacacaaacctagatgaattccttccac +accttctccacacccacacaaacctagacgaattccttccacaccttcctcacacccaca +cgaacctagatgattccttccacaccttccccacacccacacagacctagacgaattcct +tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac +acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc +ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta +attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg +ctggagtgcag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa new file mode 100644 index 00000000..ca25e333 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa @@ -0,0 +1,165 @@ +>syndip_1_chr20:60314230-60315121 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttgtccttccttccttccttccttccttccttccttccttccttccttccttccttc +cacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaaccatc +ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc +catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc +ctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatccact +cttccatccatccatccacccaccctt----------------ccatccatccatcctcc +catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc +ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc +catccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcctcc +catcctcccatccatccatccatcctcccatccgtccatccatccttccatccac----- +-----------cctcccatccatccatcctcccatccatctaccctcccatccatccatc +ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc +catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc +ataatctatcttcccatccatttgtccttccttccttccttccttccttccttccttcgt +tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca +tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca +t--------------------ccatcctcccatccatcctcccatcctcccatccattct +cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca +tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca +tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca +tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca +tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca +ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct +cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt +cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca +tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca +tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag +ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat +gaaaaaaagaatatattgatttcaggccagttacataga +>syndip_2_chr20:60314230-60315121 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttg--------------------tccttccttccttccttccttccttccttccttc +cacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaaccatc +ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc +catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc +ctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatccacc +cttccatccatccatccacccacccttccatccatccatcctcccatccatccatcctcc +catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc +ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc +catccatctaccctcccatccatccatcctcccatccaccctcccatccatccatccatc +catccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccctcc +catccatccatcctcccatccatccatcctcccatccatctaccctcccatccatccatc +ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc +catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc +ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t +tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca +tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca +tccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccattct +cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca +tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca +tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca +tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca +tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca +ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct +c----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt +cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca +tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca +tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag +ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat +gaaaaaaagaatatattgatttcaggccagttacataga +>p:HG002_1_chr20:60314230-60315121 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttg--------------------tccttccttccttccttccttccttccttccttc +cacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaaccatc +ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc +catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc +ctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatccacc +cttccatccatccatccacccacccttccatccatccatcctcccatccatccatcctcc +catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc +ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc +catccatctaccctcccatccatccatcctcccatccaccctcccatccatccatccatc +catccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccctcc +catccatccatcctcccatccatccatcctcccatccatctaccctcccatccatccatc +ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc +catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc +ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t +tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca +tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca +tccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccattct +cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca +tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca +tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca +tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca +tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca +ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct +c----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt +cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca +tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca +tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag +ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat +gaaaaaaagaatatattgatttcaggccagttacataga +>p:HG002_2_chr20:60314230-60315121 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttgtccttccttccttccttccttccttccttccttccttccttccttccttccttc +cacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaaccatc +ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc +catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc +ctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatccact +cttccatccatccatccacccaccctt----------------ccatccatccatcctcc +catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc +ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc +catccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcctcc +catcctcccatccatccatccatcctcccatccgtccatccatccttccatccac----- +-----------cctcccatccatccatcctcccatccatctaccctcccatccatccatc +ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc +catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc +ataatctatcttcccatccatttgtccttccttccttccttccttccttccttccttcgt +tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca +tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca +t--------------------ccatcctcccatccatcctcccatcctcccatccattct +cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca +tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca +tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca +tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca +tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca +ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct +cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt +cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca +tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca +tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag +ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat +gaaaaaaagaatatattgatttcaggccagttacataga +>ref_chr20:60314230-60315121 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaa-------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-atccatccatctgtcctcctacccatc---ctgctatccatccatccatccatcctccc +ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t +tct---------------------tccttccttccttccttccttccttccacccatcca +tcctcccattcatcctcccatgcatcctc----ccatccatccacccaaccatcctccca +tccatcctcacatcgatcttcgcatccatccatcgatcgatctatccatccatccattct +cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca +tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca +tccaccctcccatccatccatcctcccat------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------ccatctatccatccatcctcccatcctcccatcct +cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt +cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca +tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca +tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag +ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat +gaaaaaaagaatatattgatttcaggccagttacataga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa new file mode 100644 index 00000000..f4bd8df9 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa @@ -0,0 +1,90 @@ +>syndip_1_chr20:62270179-62271195 +cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca +gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc +tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg +ggtcctctccttgtccctccctgacctctctgggtcctctccttgtccctctctggcctc +tctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgg +gtcctctccttgtccctc-ctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctctctggcctctctgggtcctctct---gacctctctgagtcctctcct +tgtc-----------------------------cctctctgggtcctctccttgtccctc +tctgggtcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctctctggcctctctgggtcctctccttgtccctctctggcctctctggg +tcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctc +tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt +gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt +ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc +tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa +>syndip_2_chr20:62270179-62271195 +cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca +gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc +tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg +ggtcctctccttgtccctccctgacctctctgggtcctctccttgtccctctctggcctc +tctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctc-ctgg-----------------------cctctctgggtcctctcct +tgtc-------------------------------------------------------- +---------------------------------------------------cctctctgg +gtcctctccttgtccctctctgacctctctgggtcctctccttgtccctctctgggtcct +ctccttgtc-----------------------------------------cctctctggg +tcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctc +tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt +gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt +ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc +tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa +>p:HG002_1_chr20:62270179-62271195 +cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca +gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc +tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg +ggtcctctccttgtccctccctgacctctctgggtcctctccttgtccctctctggcctc +tctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctc-ctgg-----------------------cctctctgggtcctctcct +tgtc-------------------------------------------------------- +---------------------------------------------------cctctctgg +gtcctctccttgtccctctctgacctctctgggtcctctccttgtccctctctgggtcct +ctccttgtc-----------------------------------------cctctctggg +tcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctc +tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt +gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt +ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc +tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa +>p:HG002_2_chr20:62270179-62271195 +cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca +gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc +tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg +ggtcctctccttgtccctccctgacctctctgggtcctctccttgtccctctctggcctc +tctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgg +gtcctctccttgtccctc-ctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctctctggcctctctgggtcctctct---gacctctctgagtcctctcct +tgtc-----------------------------cctctctgggtcctctccttgtccctc +tctgggtcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctctctggcctctctgggtcctctccttgtccctctctggcctctctggg +tcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctc +tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt +gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt +ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc +tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgagaa +>ref_chr20:62270179-62271195 +cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca +gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc +tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg +ggtcctctccttgtccctccctgacctctctgggtcctctccttgtccctctctggcctc +tctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctc-ctggcctctctgggtcctctccttgtccctctctgggtcctctcct +tgtccctctctggcctctctgggtcctctctgacctctctgggtcctctccttgtccctc +tctgggtcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgg +gtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcct +ctccttgtccctctctggcctctctgggtcctctccttgtccctctctggcctctctggg +tcctctccttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctc +tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt +gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt +ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc +tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa new file mode 100644 index 00000000..5f9abff0 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa @@ -0,0 +1,170 @@ +>syndip_1_chr20:4032119-4033702 +cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag +cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa +gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct +gcttggccacccatcgtctgggatatgaggagcccctctgcctggctgcccagtctggaa +agt----------------------------------------gaggagcgtctctgccc +ggccgccatcccatctaggaagcgaggagcgcctcttcccggctgccatcccatctagga +agtgaggagcgtctctgcccggccgcccatcgtctgagatgtggggagcacctctgcccc +accgccctgtctgggatgtgaggagcgcctctgctgggccgcaaccctgtctgggaggtg +aggagcgtctctgccccgccgctccgtctgagaagtgaggaaaccctctgcctggcaacc +gccccgtctgagaagtgaggagcccctccgtccggcaaccaccccgtctgggaagtgagg +agcgtctccgcccggcagccaccccgtccgggagggaggtggggggggtcagccccccgc +ccggccagccgccccgtccgggaggtgaggggctcctctgcccggccgcccctactggga +agtgaggagcccctctgcccggccagccgccccgtccgggagggaggcgggggggggggt +cggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggccggccgc +cccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgaggacccc +tctgcccggccagccgccccgtccgggagggaggtgggggggtcagtcccccgcccggcc +ggctgccccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgag +gagcccctctgcccggccagctgccccgcccgggagggaggtgggggggtcagccccccg +cctggccagccgccccatccgggagggaggtgggggggtcagccccccgcccggccagcc +gccccgtccgggaggggggagggggggtcagccccctgcccggccagccgccccgtccgg +gagggaggtggg--ggggggtcagccccccgcccggccagccgccccgtccgggagggag +gtggggggatcagccccccgcctggccagccgccccgtccgggaggtgaggggcgcctct +gcccggccacccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgg +gagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggggggtca +gccccccttccggccggccgccctgtccgggaggtgaggggcgcctctgcctggccgccc +ctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggaggtgggg +gggacagccccccgcccggccagccgccctatccaggaggtgaggggcgcctctgcccgg +ccgcccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgggagggt +agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc +tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg +gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat +agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg +atggaagtagacatg +>syndip_2_chr20:4032119-4033702 +cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag +cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa +gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct +gcttggccacccatcgtctgggatatgaggagcccctctgcctggctgcccagtctggaa +agtgaggagcgtctctgcccggccgccatcccatctaggaagcgaggagcgtctctgccc +ggccgccatcccatctaggaagcgaggagcgcctcttcccggctgccatcccatctagga +actgaggagcgtctctgcccggccgcccatcgtctgagatgtggggagcacctctgcccc +accgccctgtctgggatgtgaggagcgcctctgctgggccgcaaccctgtctgggaggtg +aggagcgtctctgccccgccgctccgtctgagaagtgaggaaaccctctgcctggcaacc +gccccgtctgagaagtgaggagcccctccgtccggcaaccaccccgtctgggaagtgagg +agcgtctccgcccggcagccaccccgtccgggagggaggt-------------------- +------------------------------------------------------------ +------------------------------------------------gggggggggggt +cggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggccggccgc +cccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgaggacccc +tctgcccggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggcc +ggctgccccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgag +gagcccctctgcccggccagctgccccgcccgggagggaggtgggggggtcagccccccg +cctggccagccgccccatccgggagggaggtgggggggtcagccccccgcccggccagcc +gccccgtccgggaggggggagggggggtcagccccctgcccggccagccgccccgtccgg +gagggaggtgggggggggggtcagccccccgcccggccagccgccccgtccgggagggag +gtggggggatcagccccccgcctggccagccgccccgtccgggaggtgaggggcgcctct +gcccggccacccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgg +gagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggggggtca +gccccccttccggccggccgccctgtccgggaggtgaggggcgcctctgcctggccgccc +ctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggaggtgggg +gggacagccccccgcccggccagccgccctatccaggaggtgaggggcgcctctgcccgg +ccgcccctactgggaagtgaggagcccctctgcctggccagccgccccgtccgggagggt +agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc +tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg +gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat +agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg +atggaagtagacatg +>p:HG002_1_chr20:4032119-4033702 +cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag +cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa +gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct +gcttggccacccatcgtctgggatatgaggagcccctctgcctggctgcccagtctggaa +agtgaggagcgtctctgcccggccgccatcccatctaggaagcgaggagcgtctctgccc +ggccgccatcccatctaggaagcgaggagcgcctcttcccggctgccatcccatctagga +actgaggagcgtctctgcccggccgcccatcgtctgagatgtggggagcacctctgcccc +accgccctgtctgggatgtgaggagcgcctctgctgggccgcaaccctgtctgggaggtg +aggagcgtctctgccccgccgctccgtctgagaagtgaggaaaccctctgcctggcaacc +gccccgtctgagaagtgaggagcccctccgtccggcaaccaccccgtctgggaagtgagg +agcgtctccgcccggcagccaccccgtccgggagggaggt-------------------- +------------------------------------------------------------ +------------------------------------------------gggggggggggt +cggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggccggccgc +cccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgaggacccc +tctgcccggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggcc +ggctgccccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgag +gagcccctctgcccggccagctgccccgcccgggagggaggtgggggggtcagccccccg +cctggccagccgccccatccgggagggaggtgggggggtcagccccccgcccggccagcc +gccccgtccgggaggggggagggggggtcagccccctgcccggccagccgccccgtccgg +gagggaggtgggggggggggtcagccccccgcccggccagccgccccgtccgggagggag +gtggggggatcagccccccgcctggccagccgccccgtccgggaggtgaggggcgcctct +gcccggccacccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgg +gagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggggggtca +gccccccttccggccggccgccctgtccgggaggtgaggggcgcctctgcctggccgccc +ctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggaggtgggg +gggacagccccccgcccggccagccgccctatccaggaggtgaggggcgcctctgcccgg +ccgcccctactgggaagtgaggagcccctctgcctggccagccgccccgtccgggagggt +agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc +tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg +gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat +agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg +atggaagtagacatg +>p:HG002_2_chr20:4032119-4033702 +cgggattgcagatgaagtctcgttcactcagtgctcaatggtgcccaggctggagtgcag +tggcgtgatctccgctcgctacaacctccacctcccagccgcctgccttggcctcccaaa +gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct +gcttggccacccatcgtctgggatatgaggagcccctctgcctggctgcccagtctggaa +agt----------------------------------------gaggagcgtctctgccc +ggccgccatcccatctaggaagcgaggagcgcctcttcccggctgccatcccatctagga +agtgaggagcgtctctgcccggccgcccatcgtctgagatgtggggagcacctctgcccc +accgccctgtctgggatgtgaggagcgcctctgctgggccgcaaccctgtctgggaggtg +aggagcgtctctgccccgccgctccgtctgagaagtgaggaaaccctctgcctggcaacc +gccccgtctgagaagtgaggagcccctccgtccggcaaccaccccgtctgggaagtgagg +agcgtctccgcccggcagccaccccgtccgggagggaggtggggggggtcagccccccgc +ccggccagccgccccgtccgggaggtgaggggctcctctgcccggccgcccctactggga +agtgaggagcccctctgcccggccagccgccccgtccgggagggaggcgggggggggggt +cggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggccggccgc +cccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgaggacccc +tctgcccggccagccgccccgtccgggagggaggtgggggggtcagtcccccgcccggcc +ggctgccccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgag +gagcccctctgcccggccagctgccccgcccgggagggaggtgggggggtcagccccccg +cctggccagccgccccatccgggagggaggtgggggggtcagccccccgcccggccagcc +gccccgtccgggaggggggagggggggtcagccccctgcccggccagccgccccgtccgg +gagggaggtggg--ggggggtcagccccccgcccggccagccgccccgtccgggagggag +gtggggggatcagccccccgcctggccagccgccccgtccgggaggtgaggggcgcctct +gcccggccacccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgg +gagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggggggtca +gccccccttccggccggccgccctgtccgggaggtgaggggcgcctctgcctggccgccc +ctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggaggtgggg +gggacagccccccgcccggccagccgccctatccaggaggtgaggggcgcctctgcccgg +ccgcccctactgggaagtgaggagcccctctgcccggccagccgccccgtccgggagggt +agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc +tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg +gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat +agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg +atggaagtagacatg +>ref_chr20:4032119-4033702 +cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag +cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa +gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct +gcttggccacccatcgtctgggatatgaggagcccctctgcctggctgcccagtctggaa +agtgaggagcgtctctgcccggccgccatcccatctaggaagcgaggagcgtctctgccc +ggccgccatcccatctaggaagcgaggagcgcctcttcccggctgccatcccatctagga +actgaggagcgtctctgcccggccgcccatcgtctgagatgtggggagcacctctgcccc +accgccctgtctgggatgtgaggagcgcctctgctgggccgcaaccctgtctgggaggtg +aggagcgtctctgccccgccgctccgtctgagaagtgaggaaaccctctgcctggcaacc +gccccgtctgagaagtgaggagcccctccgtccggcaaccaccccgtctgggaagtgagg +agcgtctccgcccggcagccaccccgtccgggagggaggt-------------------- +------------------------------------------------------------ +------------------------------------------------gggggggggggt +cggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggccggccgc +cccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgaggacccc +tctgcccggccagccgccccgtccgggagggaggtgggggggtcagccccccgcccggcc +ggctgccccgtccgggaggtgaggggcgcctctgcccggccgcccctactgggaagtgag +gagcccctctgcccggccagctgccccgcccgggagggaggtgggggggtcagccccccg +cctggccagccgccccatccgggagggaggtgggggggtcagccccccgcccggccagcc +gccccgtccgggaggggggagggggggtcagccccctgcccggccagccgccccgtccgg +gagggaggtggg------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------gggggggtca +gccccctttccggccg--ccgcctgtccgggaggtgaggggcgcctctgcctggccgccc +ctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggaggtgggg +gggacagccccccgcccggccagccgccctatccaggaggtgaggggcgcctctgcccgg +ccgcccctactgggaagtgaggagcccctctgcctggccagccgccccgtccgggagggt +agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc +tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg +gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat +agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg +atggaagtagacatg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa new file mode 100644 index 00000000..55b397dc --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa @@ -0,0 +1,190 @@ +>syndip_1_chr20:20337100-20337808 +tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg +aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac +tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat +ggatggatggatggatggatggatggatagatgggtgggtggatggatggatggatagat +gggtggatggatagatgggtggatggataaatggatggatggatggatggatagatgggt +gggtggatggatggatggatggatggatggatggatggatggatagatggatagatgggt +gggtggatggatggatggatggatggatggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatgggtggatggataaatggatggatggatggat +ggatagatgtgtgggtggatggatggatggatggatggatggatggatggatggatagat +ggatagatgggtgggtggatggatggatggatggatggatggatggatggatggatagat +gggtggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatggatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatggatggatggatggat +ggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatggatagat +gggtgggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggatggatggatagat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +ggatggatggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggatggat +ggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggtggat +ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt +ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt +cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt +tacaatcctgcctgcctctcacccagacctcttg +>syndip_2_chr20:20337100-20337808 +tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg +aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac +tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat +ggatggatggatggatggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtggatggataaatggatgg----atggatggatggatagatgggtgggtggat +ggatggatggatggatggatggatggatggatggatagatggatagatgggtgggtggat +ggatggatggatggatggatggatggatggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatgggtggatggataaatggatggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatggatagatggataga----- +-----------tgggtgggtggatggatggatggatggatggatggatggatggatagat +gggtggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtggg----tggatggatggatggatggatggatggat +ggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatggatagat +gggtgggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggata--------------------gatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggataga--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------tgggtgggtgggtggatggatggatagatgggtgggtggat +ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt +ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt +cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt +tacaatcctgcctgcctctcacccagacctcttg +>p:HG002_1_chr20:20337100-20337808 +tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg +aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac +tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat +ggatggatggatggatggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtggatggataaatggatgg----atggatggatggatagatgggtgggtggat +ggatggatggatggatggatggatggatggatggatagatggatagatgggtgggtggat +ggatggatggatggatggatggatggatggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatgggtggatggataaatggatggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatggatagatggataga----- +-----------tgggtgggtggatggatggatggatggatggatggatggatggatagat +gggtggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtggg----tggatggatggatggatggatggatggat +ggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatggatagat +gggtgggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggata--------------------gatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggataga--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------tgggtgggtgggtggatggatggatagatgggtgggtggat +ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt +ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt +cttagcagacgtcccaccatggaagaggtcatggggcacaggggctttgggaaaggtgtt +tacaatcctgcctccctctcacccagacctcttg +>p:HG002_2_chr20:20337100-20337808 +tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg +aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac +tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat +ggatggatggatggatggatggatggatagatgggtgggtggatggatggatggatagat +gggtggatggatagatgggtggatggataaatggatggatggatggatggatagatgggt +gggtggatggatggatggatggatggatggatggatggatggatagatggatagatgggt +gggtggatggatggatggatggatggatggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatgggtggatggataaatggatggatggatggat +ggatagatgtgtgggtggatggatggatggatggatggatggatggatggatggatagat +ggatagatgggtgggtggatggatggatggatggatggatggatggatggatggatagat +gggtggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatggatagatgggtgggtggatggatggatggatagat +gggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatgggtgggtggatggatggatggatggatggatggatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatggatggatggatggat +ggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggat +ggatggatggatagatggatagatgggtgggtggatggatggatggatagatggatagat +gggtgggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggt +gggtggatggatggatggatagatgggtgggtggatggatggatggatggatggatagat +ggatagatgggtgggtggatggatggatggatagatgggtgggtggatggatggatggat +ggatggatggatggatggatggatagatggatagatgggtgggtggatggatggatggat +agatgggtgggtggatggatggatggatagatggatagatgggtgggtggatggatggat +ggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtggatggat +ggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggtggat +ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt +ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt +ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt +cttagcagacgtcccaccatggaagaggtcatggggcacaggggctttgggaaaggtgtt +tacaatcctgcctccctctcacccagacctcttg +>ref_chr20:20337100-20337808 +tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg +aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac +tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat +ggatggatggatagatggatggatggatggatggatggatggatggatggatggatggat +agatgggtgggtgg----gtggatgg----atggatggatggatggatggatggatggat +ggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggtggat +ggatggatggatagatgggtgggtggatggatggatggatagatgggtggatggataaat +ggatggatggatggatggatagatgggtgggtggatggatggatggatggatggatggat +ggatggatagat------------------gtgggtggatggatggatggatagatgggt +gggtgggtggatggatggatggatggatagatgggtgggtgggtggatggatggatggat +ggatagaataaaaaagaaa----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------gctaggacatggttctagtgtt +cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt +tacaatcctgcctgcctctcacccagacctcttg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa new file mode 100644 index 00000000..46a6aa62 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa @@ -0,0 +1,100 @@ +>syndip_1_chr20:278800-279287 +gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa +tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------gggcg +gggaaaggcaggagagggcgcgcgagtgcgcggagggagggcgggacggag----ggagg +gagggcgggagggagggacggagggaggaagggcgggacggagggagggagggcgggagg +gacggagggcgggacggcgggagggcgggacggagggacggagggagggcgggacggagg +gcgggagggcgggacggagggagggag---ggagggagggcgggacggagggagggaggg +agggagggacggagggcgggacggagggagggagggcggagggagggagggcgggacgga +gggagggagggcgggacggagggtgggagggagggagggatggaggga----gggaggga +gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga +gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca +gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc +ggttgagcttgctgaggaagttctcacggc +>syndip_2_chr20:278800-279287 +gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa +tgcgtcttccaggtggaggaagcaccacgtgcagtgggcggggaaaggcaggagagggcg +cgcgagtgcgcggagggagggcgggacggagggagggagggcgggacggagggagggagg +gagggcgggacggagggagggagggcgggacggagggagggagggagggacggagggcgg +gacggcgggagggcgggacggagggacggagggagggcgggacggagggcgggagggcgg +gacggagggagggagggagggagggcgggacggagggagggagggcgggacggagggagg +gagggagggacggagggcgggacggcgggagggcgggacggagggacggagggagggcgg +gacggagggcgggagggcgggacggagggagggagggcgggacggagggacggagggagg +gagggcgggacggagggagggagggcgggacggagggacggagggagggagggcgggacg +gagggagggagggagggacggagggcgggacggcgggagggcgggacggagggacggagg +gagggcgggacggagggcgggagggagggagggcgggacggagggagggagggagggagg +gcgggacggagggagggagggagggagggacggagggacggagggagggagggagggagg +gacggagggcgggacggagggagggagggcggagggagggagggcgggacggagggaggg +agggagggacggagggcgggacggagggagggagggcggagggagggagggcgggacgga +gggagggagggcgggagggatggagggagggagggcgggacggagggagggcgggaggga +gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga +gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca +gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc +ggttgagcttgctgaggaagttctcacggc +>p:HG002_1_chr20:278800-279287 +gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa +tgcgtcttccaggtggaggaagcaccacgtgcagtgggcggggaaaggcaggagagggcg +cgcgagtgcgcggagggagggcgggacggagggagggagggcgggacggagggagggagg +gagggcgggacggagggagggagggcgggacggagggagggagggagggacggagggcgg +gacggcgggagggcgggacggagggacggagggagggcgggacggagggcgggagggcgg +gacggagggagggagggagggagggcgggacggagggagggagggcgggacggagggagg +gagggagggacggagggcgggacggcgggagggcgggacggagggacggagggagggcgg +gacggagggcgggagggcgggacggagggagggagggcgggacggagggacggagggagg +gagggcgggacggagggagggagggcgggacggagggacggagggagggagggcgggacg +gagggagggagggagggacggagggcgggacggcgggagggcgggacggagggacggagg +gagggcgggacggagggcgggagggagggagggcgggacggagggagggagggagggagg +gcgggacggagggagggagggagggagggacggagggacggagggagggagggagggagg +gacggagggcgggacggagggagggagggcggagggagggagggcgggacggagggaggg +agggagggacggagggcgggacggagggagggagggcggagggagggagggcgggacgga +gggagggagggcgggagggatggagggagggagggcgggacggagggagggcgggaggga +gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga +gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca +gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc +ggttgagcttgctgaggaagttctcacggc +>p:HG002_2_chr20:278800-279287 +gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa +tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------gggcg +gggaaaggcaggagagggcgcgcgagtgcgcggagggagggcgggacggag----ggagg +gagggcgggagggagggacggagggaggaagggcgggacggagggagggagggcgggagg +gacggagggcgggacggcgggagggcgggacggagggacggagggagggcgggacggagg +gcgggagggcgggacggagggagggag---ggagggagggcgggacggagggagggaggg +agggagggacggagggcgggacggagggagggagggcggagggagggagggcgggacgga +gggagggagggcgggacggagggtgggagggagggagggatggaggga----gggaggga +gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga +gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca +gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc +ggttgagcttgctgaggaagttctcacggc +>ref_chr20:278800-279287 +gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa +tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------gggcg +gggaaaggcaggagagggcgcgcgagtgcgcggagggagggcgggacggag----ggagg +gagggcg--------------------------------ggacggagggagggagggagg +gacggagggcgggacggcgggagggcgggacggagggacggagggagggcgggacggagg +gcgggagggcgggacggag----------------------------------------- +------------------------------------------------------------ +-----------------------------------------ggaggga----gggaggga +gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga +gggacggagggcgggcggatgccgcgggtctcaccgccagggtgtcctgctgctgcagca +gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc +ggttgagcttgctgaggaagttctcacggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa new file mode 100644 index 00000000..48276576 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:35539020-35539825 +tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc +tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac +agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat +ataacagtttatatatatatataacagtttatatatatattatatataacagtt--tata +tatata------------------------------------------------------ +----------------atatatatgtttatatattatatataacagtttatatataacag +tttatatataatatatataagtttatatataacagtttatatatataatatatataagtt +tatatataatatatataagtttatatatg------------------------------- +---------------------tttatatataatatatataacagtttatatatataatat +atataacagtttatatatataatatatataacagtttatatatataatatatataagttt +atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa +agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct +cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga +aggggtgcagccagttctaggcagca +>syndip_2_chr20:35539020-35539825 +tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc +tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac +agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat +ataacagtttatatatatatataacagtttatatatataatatatataacagtt--tata +tatatatataacagtttatatatataat-------------------------------- +----------------atatataagtttatatattatatataacagtttatatataacag +tttatatataatatatataagtttatatataacagtttatatatataatatatataagtt +tatatataatatatataagtttatatatatgtttatatataatatatataacagtt---- +---------------------tatatatataatatatataacagtttatatatataatat +atataacagtttatatatataatatatataacagtttatatatataatatatataagttt +atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa +agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct +cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga +aggggtgcagccagttctaggcagca +>p:HG002_1_chr20:35539020-35539825 +tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc +tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac +agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat +ataacagtttatatatatatataacagtttatatatataatatatataacagtt--tata +tatatatataacagtttatatatataat-------------------------------- +----------------atatataagtttatatattatatataacagtttatatataacag +tttatatataatatatataagtttatatataacagtttatatatataatatatataagtt +tatatataatatatataagtttatatatatgtttatatataatatatataacagtt---- +---------------------tatatatataatatatataacagtttatatatataatat +atataacagtttatatatataatatatataacagtttatatatataatatatataagttt +atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa +agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct +cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga +aggggtgcagccagttctaggcagca +>p:HG002_2_chr20:35539020-35539825 +tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc +tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac +agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat +ataacagtttatatatatatataacagtttatatatatattatatataacagtt--tata +tatata------------------------------------------------------ +----------------atatatatgtttatatattatatataacagtttatatataacag +tttatatataatatatataagtttatatataacagtttatatatataatatatataagtt +tatatataatatatataagtttatatatg------------------------------- +---------------------tttatatataatatatataacagtttatatatataatat +atataacagtttatatatataatatatataacagtttatatatataatatatataagttt +atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa +agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct +cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga +aggggtgcagccagttctaggcagca +>ref_chr20:35539020-35539825 +tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc +tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac +agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat +ataacagtttatatatatatataacagtttatatatataatatatataacagtttatata +tatatatataacagtttatatatataatatatataacagtttatatatataatatatata +acagtttatatatataatatataagtttatatattatatataacagtttatatataacag +tttatatataatatatataagtttatatataacagtttatatatataatatatataagtt +tatatataatatatataagtttatatatatgtttatatataatatatataacagtttata +tatataatatatataacagtttatatatataatatatataacagtttatatatataatat +atataacagtttatatatataaaatatataacagtttatatatataatatatataagttt +atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa +agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct +cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga +aggggtgcagccagttctaggcagca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa new file mode 100644 index 00000000..053f59de --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa @@ -0,0 +1,105 @@ +>syndip_1_chr20:20295855-20296666 +gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc +agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca +acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt +cttcctccctcccttcct---tcccttccttcccttccttccctcctttt--cttccttc +tttccttccttccttccctccctccttcccttccttccttccttcc-cttccttccttct +ctccttccttccttccctccctccttcccttccttccttctctccttccttcccttcctt +ccttctctccttccttcccttccttccttctctccttccttcccttccttccttctctcc +ttccttcccttccttccttctctccttccttcccttgcttcctttcttccttcctttgct +tccttgcttccttctttcctttcttcatccctccctccctccttccttcctttccttcct +tccctctatccttccttccttcctttctttctccctcccttccttcccttccttccttcc +ctcttttccttccatctttcctttcttcctccctccctccctccttccctccttcattac +ttccttccctcctttccttccttcacctccttcccttctctcctcccttcatcccttcct +tcctttcttccttccctccctcttccttcctttcttcacccctcccccttccctcatccc +ttccctccttcccttccttccttccttccctccttccttcccttccttccttccttgctt +ccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttccttc +tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc +ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc +ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt +taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca +gatagtcaaataactgtaag +>syndip_2_chr20:20295855-20296666 +gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc +agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca +acgcctgcatgcttccctccttccttccttccttccttcctt-ccttccttccttccttc +cttcctgcctgcctgcctgccttcctgccttcccttcctccctccctccctacctccctc +cctccttccttcccttcctccctccctcccttctttacttccttcctccttccctcctcc +tttcctccatccttccctcccttctttcctttcttcctcctgccct------ccctcctt +ctttcccttcctccctcccttccttcccatcctccttccctccct--ccttccttcctgc +cttccttccttccctccatccttccttccttccttt-cttcctccctcccttccttcctc +ccttccttcccttccttccttccctcttttccttccatctttcctttcttcctccctccc +tccctccttccctccttcattcctttct---tccctcctttccttccttcacctccttcc +cttctctcctcccttcatcccttccttcctttcttccttccctccctcttccttcctttc +ttcacccctcccccttccctcatcccttccctccttcccttccttccttccttccctcct +tccttcccttccttccttc--cttccctccttcctt---------cccttccttccttgc +ttccttccttccttcccttccttccttgcttccttccttccttcccttccttccttgctt +ccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttccttc +tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc +ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc +ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt +taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca +gatagtcaaataactgtaag +>p:HG002_1_chr20:20295855-20296666 +gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc +agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca +acgcctgcatgcttccctccttccttccttccttccttcctt-ccttccttccttccttc +cttcctgcctgcctgcctgccttcctgccttcccttcctccctccctccctacctccctc +cctccttccttcccttcctccctccctcccttctttacttccttcctccttccctcctcc +tttcctccatccttccctcccttctttcctttcttcctcctgccct------ccctcctt +ctttcccttcctccctcccttccttcccatcctccttccctccct--ccttccttcctgc +cttccttccttccctccatccttccttccttccttt-cttcctccctcccttccttcctc +ccttccttcccttccttccttccctcttttccttccatctttcctttcttcctccctccc +tccctccttccctccttcattcctttct---tccctcctttccttccttcacctccttcc +cttctctcctcccttcatcccttccttcctttcttccttccctccctcttccttcctttc +ttcacccctcccccttccctcatcccttccctccttcccttccttccttccttccctcct +tccttcccttccttccttc--cttccctccttcctt---------cccttccttccttgc +ttccttccttccttcccttccttccttgcttccttccttccttcccttccttccttgctt +ccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttccttc +tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc +ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc +ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt +taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaaactgca +gatagtcaaataactgtaag +>p:HG002_2_chr20:20295855-20296666 +gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc +agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca +acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt +cttcctccctcccttcct---tcccttccttcccttccttccctcctttt--cttccttc +tttccttccttccttccctccctccttcccttccttccttccttcc-cttccttccttct +ctccttccttccttccctccctccttcccttccttccttctctccttccttcccttcctt +ccttctctccttccttcccttccttccttctctccttccttcccttccttccttctctcc +ttccttcccttccttccttctctccttccttcccttgcttcctttcttccttcctttgct +tccttgcttccttctttcctttcttcatccctccctccctccttccttcctttccttcct +tccctctatccttccttccttcctttctttctccctcccttccttcccttccttccttcc +ctcttttccttccatctttcctttcttcctccctccctccctccttccctccttcattac +ttccttccctcctttccttccttcacctccttcccttctctcctcccttcatcccttcct +tcctttcttccttccctccctcttccttcctttcttcacccctcccccttccctcatccc +ttccctccttcccttccttccttccttccctccttccttcccttccttccttccttgctt +ccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttccttc +tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc +ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc +ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt +taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca +gatagtcaaataactgtaag +>ref_chr20:20295855-20296666 +gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc +agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca +acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt +cttcctccctcccttcct---tcccttccttcccttccttccctcctttt--cttccttc +tttccttccttccttccctccctccttcccttccttccttc------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------cttcctccctccctccctccttccctccttcattcc +ttccttccctcctttccttccttcacctccttcccttctctcctcccttcatcccttcct +tcctttcttccttccctccctcttccttcctttcttcacccctcccccttccctcatccc +ttccctccttcccttccttccttccttccctccttccttcccttccttccttccttgctt +ccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttccttc +tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc +ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc +ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt +taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca +gatagtcaaataactgtaag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa new file mode 100644 index 00000000..d158507f --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa @@ -0,0 +1,100 @@ +>syndip_1_chr20:61289525-61290656 +tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc +ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata +tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga +gacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaac +gagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatcca +acgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatc +caacgagacttgctgggtttatccccg-------------------------------ta +tccaacgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccg +tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc +cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc +cccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggttta +tcccgatatccaatgagacttgctggatttatccccgtatccaatgagacttgctggg-- +-----------------------------tttatccccgtatccaatgagacttgctggg +tttatccccgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctg +gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc +tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt +gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca +cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc +>syndip_2_chr20:61289525-61290656 +tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc +ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata +tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga +gacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaac +gagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatcca +acgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatc +caacgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatcccccta +tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg +tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc +cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc +cccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggttta +tccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctggg-- +-----------------------------tttatccccgtatccaatgagacttgctggg +tttatccccgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctg +gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc +tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt +gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca +cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc +>p:HG002_1_chr20:61289525-61290656 +tctgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc +ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata +tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga +gacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaac +gagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatcca +acgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatc +caacgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatcccccta +tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg +tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc +cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc +cccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggttta +tccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctggg-- +-----------------------------tttatccccgtatccaatgagacttgctggg +tttatccccgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctg +gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc +tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt +gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca +cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc +>p:HG002_2_chr20:61289525-61290656 +tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc +ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata +tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga +gacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaac +gagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatcca +acgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatc +caacgagacttgctgggtttatccccg-------------------------------ta +tccaacgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccg +tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc +cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc +cccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggttta +tcccgatatccaatgagacttgctggatttatccccgtatccaatgagacttgctggg-- +-----------------------------tttatccccgtatccaatgagacttgctggg +tttatccccgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctg +gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc +tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt +gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca +cagaggaaattgatggcttggcagacagcgtgacatgttgactacctgccctggtccttg +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc +>ref_chr20:61289525-61290656 +tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc +ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata +tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga +gacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaac +gagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatcca +acgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgtatc +caacgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccgta +tccaacgagacttgctgggtttatccccgtatccaacgagacttgctgggtttatccccg +tatccaacgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc +cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc +cccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggttta +tccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtt +tatcccgatatccaatgagacttgctggatttatccccgtatccaatgagacttgctggg +tttatccccgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctg +gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc +tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt +gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca +cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa new file mode 100644 index 00000000..f560985e --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa @@ -0,0 +1,65 @@ +>syndip_1_chr20:61744088-61744755 +tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca +ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa +aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca +gggagacagcaaaggagagggagaagagaatgagagagggatggggagaaagagagaaga +gaggtggagagagacaggaaaggagagggagggagaggaagagagggagagagagaagga +gggagagagatggaaagaggtggagggagagagagaaggaaagggagggaga-------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------gagagagacagacagacagcttcacacagacataa +aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc +ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt +tcatcctg +>syndip_2_chr20:61744088-61744755 +tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca +ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa +aggagccaaccagagagtggggaggcgatggagagaataagagagaggaaggcaggtgca +gggagacagcaaaggagagggagaagagaatgagagagggatggggagaaagagagaaga +gaggtggagagagacaggaaaggagagggagggagaggaagagagggagagagagaagga +gggagagagatggaaagaggtggagggagagagagaaggaaagggagggagaggaagaga +gggagagagagaaggagggagagagatggagagaggtggagggagagagagaaggaaagg +gagg-------------------------------------------------------- +-------------------------gagagagagagacagacagcttcacacagacataa +aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc +ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt +tcatcctg +>p:HG002_1_chr20:61744088-61744755 +tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca +ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa +aggagccaaccagagagtggggaggcgatggagagaataagagagaggaaggcaggtgca +gggagacagcaaaggagagggagaagagaatgagagagggatggggagaaagagagaaga +gaggtggagagagacaggaaaggagagggagggagaggaagagagggagagagagaagga +gggagagagatggaaagaggtggagggagagagagaaggaaagggagggagaggaagaga +gggagagagagaaggagggagagagatggagagaggtggagggagagagagaaggaaagg +gagg-------------------------------------------------------- +-------------------------gagagagagagacagacagcttcacacagacataa +aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc +ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt +tcatcctg +>p:HG002_2_chr20:61744088-61744755 +tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca +ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa +aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca +gggagacagcaaaggagagggagaagagaatgagagagggatggggagaaagagagaaga +gaggtggagagagacaggaaaggagagggagggagaggaagagagggagagagagaagga +gggagagagatggaaagaggtggagggagagagagaaggaaagggagggaga-------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------gagagagacagacagacagcttcacacagacataa +aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc +ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt +tcatcctg +>ref_chr20:61744088-61744755 +tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca +ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa +aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca +gggagacagcaaaggagagggagaagagaatgagagagggatggggagaaagagagaaga +gaggtggagagagacaggaaaggagagggagggagaggaagagagggagagagagaagga +gggagagagatggaaagaggtggagggagagagagaaggaaagggagggagaggaagaga +gggagagagagaaggagggagagagatggagagaggtggagggagagagagaaggaaagg +gaggggagaggaagagagggagagagagaaggagggagagagatggagagaggtggaggg +agagagagaaggaaagggagggagagagagagacagacagacagcttcacacagacataa +aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc +ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt +tcatcctg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa new file mode 100644 index 00000000..7c44fb6d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa @@ -0,0 +1,240 @@ +>syndip_1_chr20:22081861-22084223 +ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg +tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc +gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt +attaatatatattaatataatataataataatacaatataatatatgtatatatgtaggt +atataataatatacctacatatatacatatattatatatactatctgtatattatatgta +taatatatgtatatataatatatgtatatactatacatgtatatattatattatacatgt +atatattataagtatatactatacatgcatgtattatatgtatatac------------- +---------------tatacatgcatgtattatatgtatatattatacatgcatgtatta +tatgtatatattatacatgcatgtattatatgtatatattatacatgtatgtattatatg +tatagattatacatgtatatattatatatgtatatattacacatgtacatatatgtatat +attaaacacgtttatattatttattatacatgactatattatataggtatatatgatata +tgcatatatgtcatataggtatatatgacatatatgcatatatcatgtaggtatatatga +catatgcatatatcatataggtatatatgacatatgcatatatcatataggtatatatga +tatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatataggtatat +atgatatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatataggt +atatatgatatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatat +aggtatatatgatatatgcgcatatatcatacaggtatatgtgcatatatcatataggta +taggtataggtagatatatgatatatgcacacatatcatataggtatatatgatatatga +taggtatatatgatatatgataggtatatatgatatatatcatattggtatatatgatat +atatcatatcggtatatatgatatatatcatatcggtatatataggtatatatcataggt +atatataggtatatatcacataggtatatataggtatatatcatataggtatatataata +tatgcatttatattatataggtatatatattacataggtatatgtaatatgtgcatatat +attatataggtatatatattatacaggcatatataatacgtatatatattatatgggcat +atatatgtatatatattatataggtatatataatatatgtgtatatattatgtgggtata +tataatatatgtgtacatattatgtgggtatatataatatatgtgtacatattatgtggg +tatatataatatatgtgtacatattatgtgggtatatataatatatgtgtatatattatg +tgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtatatat +tatgtgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtat +atattatgtgggtatatataatatatgtgtatatattatgtgggtatatataatatatgt +gtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatat +atgtgtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatata +atatatgtgtatatattatgtaggtatatataatatatgtgtatatattatatatgtaat +aggaagcaggaatgaatgcacagaagaccttagagtttaaggcattatttgtgtaatgag +aaatattatatatacg---------------------------------tatattaatat +agacgtatatataatatatacatatatacatatatagtatatactatgtatacatatata +ctatacatacgtatattatatacatgtgtattatatacatgtgtattatatatatgtgta +tacatatatactatatatacatacattatatacatacattatatatgtgtattatatatg +tatatattatgtatgtatatattatatatacgtatatattaatatgcatatatatgtgtc +tctttataccaataatgccttaaactctaaggtcttctgtgcattcattcctgcttccta +ttacatatatattatatagatacatatataatatatacatatatactacataatatatgc +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata +cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca +tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag +aattaaagtaagagttttctttctttcatctcctttttatctctctca +>syndip_2_chr20:22081861-22084223 +ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg +tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc +gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt +attactatatattaatataatataataataatacaatataatatatgtatatatgtaggt +atataataatatacctacatatatacatatattatatatactatctgtatattatatgta +taatatatgtatatataatatatgtatatactatacatgtatatattatattatacatgt +atatattataagtatatactatacatgcatgtattatatgtatatac------------- +---------------tatacatgcatgtattatatgtatatattatacatgcatgtatta +tatgtatatattatacatgcatgtattatatgtatatattatacatgtatgtattatatg +tatagattatacatgtatatattatatatgtatatattacacatgtacatatatgtatat +attaaacacgtttatattatttattata-------------------------------- +-------------------------------------------catgtaggtatatatga +catatgcatatatcatataggtatatatgacatatgcatatatcatataggtatatatga +tatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatacagg----- +----tatatgcgcatatatcatataggtataggt------------------ataggtag +atatatgatatatgcacacatatcatataggtatatatgatatatgcgcatatatcatat +aggtatatatgatatatg------------------------------------------ +---------------------------------------ataggtatatatgatatatga +ta--------------------------------------------ggtatatatgatat +atatcatatcggtatatatgatatatatcatatcggtatatataggtatatatcataggt +atatataggtatatatcacataggtatatataggtatatatcatataggtatatataata +tatgcatttatattatataggtatatatattacataggtatatgtaatatgtgcatatat +attatataggtatatatattatacaggcatatataatacgtatatatattatatgggcat +atatatgtgtatatattatata-------------------------------------- +------------------------------------------------------------ +------------------------------ggtatatataatatatgtgtatatattatg +tgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtacatat +tatgtgggtatatataatatatgtgtatatattatgtaggtatatataatatatgtgtat +atattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatatatgt +gtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatat +atgtgtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatata +atatatgtgtatatattatgtaggtatatataatatatgtgtatatattatatatgtaat +aggaagcaggaatgaatgcacagaagaccttagagtttaaggcattatttgtgtaatgag +aaatattatatatacgtatattaatatagacgtatatataatatatacatatattaatat +agacgtatatataatatatacatatatacatatatagtatatactatgtatacatatata +ctatacatacgtatattatatacatgtgtattatatacatgtgtattatatatatgtgta +tacatatatactatatatacatacattatatacatacattatatatgtgtattatatatg +tatatattatgtatgtatatattatatatacgtatatattaatatacatatatatgtgtc +tcattataccaataatgccttaaactctaaggtcttctgtgcattcattcctgcttccta +ttacatatatattatatagatacatatataatatatacatatatactacataatatatgc +------------------------------------------------------------ +------------------------------atatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata +cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca +tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag +aattaaagtaagagttttctttctttcatctcctttttatctctctca +>p:HG002_1_chr20:22081861-22084223 +ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg +tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc +gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt +attactatatattaatataatataataataatacaatataatatatgtatatatgtaggt +atataataatatacctacatatatacatatattatatatactatctgtatattatatgta +taatatatgtatatataatatatgtatatactatacatgtatatattatattatacatgt +atatattataagtatatactatacatgcatgtattatatgtatatac------------- +---------------tatacatgcatgtattatatgtatatattatacatgcatgtatta +tatgtatatattatacatgcatgtattatatgtatatattatacatgtatgtattatatg +tatagattatacatgtatatattatatatgtatatattacacatgtacatatatgtatat +attaaacacgtttatattatttattata-------------------------------- +-------------------------------------------catgtaggtatatatga +catatgcatatatcatataggtatatatgacatatgcatatatcatataggtatatatga +tatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatacagg----- +----tatatgcgcatatatcatataggtataggt------------------ataggtag +atatatgatatatgcacacatatcatataggtatatatgatatatgcgcatatatcatat +aggtatatatgatatatg------------------------------------------ +---------------------------------------ataggtatatatgatatatga +ta--------------------------------------------ggtatatatgatat +atatcatatcggtatatatgatatatatcatatcggtatatataggtatatatcataggt +atatataggtatatatcacataggtatatataggtatatatcatataggtatatataata +tatgcatttatattatataggtatatatattacataggtatatgtaatatgtgcatatat +attatataggtatatatattatacaggcatatataatacgtatatatattatatgggcat +atatatgtgtatatattatata-------------------------------------- +------------------------------------------------------------ +------------------------------ggtatatataatatatgtgtatatattatg +tgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtacatat +tatgtgggtatatataatatatgtgtatatattatgtaggtatatataatatatgtgtat +atattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatatatgt +gtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatat +atgtgtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatata +atatatgtgtatatattatgtaggtatatataatatatgtgtatatattatatatgtaat +aggaagcaggaatgaatgcacagaagaccttagagtttaaggcattatttgtgtaatgag +aaatattatatatacgtatattaatatagacgtatatataatatatacatatattaatat +agacgtatatataatatatacatatatacatatatagtatatactatgtatacatatata +ctatacatacgtatattatatacatgtgtattatatacatgtgtattatatatatgtgta +tacatatatactatatatacatacattatatacatacattatatatgtgtattatatatg +tatatattatgtatgtatatattatatatacgtatatattaatatacatatatatgtgtc +tcattataccaataatgccttaaactctaaggtcttctgtgcattcattcctgcttccta +ttacatatatattatatagatacatatataatatatacatatatactacataatatatgc +------------------------------------------------------------ +------------------------------atatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata +cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca +tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag +aattaaagtaagagttttctttctttcatctcctttttatctctctca +>p:HG002_2_chr20:22081861-22084223 +ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg +tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc +gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt +attaatatatattaatataatataataataatacaatataatatatgtatatatgtaggt +atataataatatacctacatatatacatatattatatatactatctgtatattatatgta +taatatatgtatatataatatatgtatatactatacatgtatatattatattatacatgt +atatattataagtatatactatacatgcatgtattatatgtatatac------------- +---------------tatacatgcatgtattatatgtatatattatacatgcatgtatta +tatgtatatattatacatgcatgtattatatgtatatattatacatgtatgtattatatg +tatagattatacatgtatatattatatatgtatatattacacatgtacatatatgtatat +attaaacacgtttatattatttattatacatgactatattatataggtatatatgatata +tgcatatatgtcatataggtatatatgacatatatgcatatatcatgtaggtatatatga +catatgcatatatcatataggtatatatgacatatgcatatatcatataggtatatatga +tatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatataggtatat +atgatatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatataggt +atatatgatatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatat +aggtatatatgatatatgcgcatatatcatacaggtatatgtgcatatatcatataggta +taggtataggtagatatatgatatatgcacacatatcatataggtatatatgatatatga +taggtatatatgatatatgataggtatatatgatatatatcatattggtatatatgatat +atatcatatcggtatatatgatatatatcatatcggtatatataggtatatatcataggt +atatataggtatatatcacataggtatatataggtatatatcatataggtatatataata +tatgcatttatattatataggtatatatattacataggtatatgtaatatgtgcatatat +attatataggtatatatattatacaggcatatataatacgtatatatattatatgggcat +atatatgtatatatattatataggtatatataatatatgtgtatatattatgtgggtata +tataatatatgtgtacatattatgtgggtatatataatatatgtgtacatattatgtggg +tatatataatatatgtgtacatattatgtgggtatatataatatatgtgtatatattatg +tgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtatatat +tatgtgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtat +atattatgtgggtatatataatatatgtgtatatattatgtgggtatatataatatatgt +gtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatat +atgtgtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatata +atatatgtgtatatattatgtaggtatatataatatatgtgtatatattatatatgtaat +aggaagcaggaatgaatgcacagaagaccttagagtttaaggcattatttgtgtaatgag +aaatattatatatacg---------------------------------tatattaatat +agacgtatatataatatatacatatatacatatatagtatatactatgtatacatatata +ctatacatacgtatattatatacatgtgtattatatacatgtgtattatatatatgtgta +tacatatatactatatatacatacattatatacatacattatatatgtgtattatatatg +tatatattatgtatgtatatattatatatacgtatatattaatatgcatatatatgtgtc +tctttataccaataatgccttaaactctaaggtcttctgtgcattcattcctgcttccta +ttacatatatattatatagatacatatataatatatacatatatactacataatatatgc +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata +cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca +tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag +aattaaagtaagagttttctttctttcatctcctttttatctctctca +>ref_chr20:22081861-22084223 +ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg +tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc +gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt +attactatatattaatataatataataataatacaatataatatatgtatatatgtaggt +atataataatatacctacatatatacatatattatatatactatctgtatattatatgta +taatatatgtatatataatatatgtatatactatacatgtatatattatattatacatgt +atatattataagtatatactatacatgcatgtattatatgtatatactatacatgcatgt +attatatgtatatattatacatgcatgtattatatgtatatattatacatgcatgtatta +tatgtatatattatacatgcatgtattatatgtatatattatacatgtatgtattatatg +tatagattatacatgtatatattatatatgtatatattacacatgtacatatatgtatat +attaaacacgtttatattatttattata-------------------------------- +-------------------------------------------catgtaggtatatatga +catatgcatatatcatataggtatatatgacatatgcatatatcatataggtatatatga +tatatgcgcatatatcatataggtatatatgatatatgcgcatatatcatacagg----- +----tatatgcgcatatatcatataggtataggt------------------ataggtag +atatatgatatatgcacacatatcatataggtatatatgatatatgcgcatatatcatat +aggtatatatgatatatg------------------------------------------ +---------------------------------------ataggtatatatgatatatga +ta--------------------------------------------ggtatatatgatat +atatcatatcggtatatatgatatatatcatatcggtatatataggtatatatcataggt +atatataggtatatatcacataggtatatataggtatatatcatataggtatatataata +tatgcatttatattatataggtatatatattacataggtatatgtaatatgtgcatatat +attatataggtatatatattatacaggcatatataatacgtatatatattatatgggcat +atatatgtgtatatattatata-------------------------------------- +------------------------------------------------------------ +------------------------------ggtatatataatatatgtgtatatattatg +tgggtatatataatatatgtgtatatattatgtgggtatatataatatatgtgtacatat +tatgtgggtatatataatatatgtgtatatattatgtaggtatatataatatatgtgtat +atattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatatatgt +gtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatataatat +atgtgtatatattatgtaggtatatataatatatgtgtatatattatgtaggtatatata +atatatgtgtatatattatgtaggtatatataatatatgtgtatatattatatatgtaat +aggaagcaggaatgaatgcacagaagaccttagagtttaaggcattatttgtgtaatgag +aaatattatatatacgtatattaatatagacgtatatataatatatacatatattaatat +agacgtatatataatatatacatatatacatatatagtatatactatgtatacatatata +ctatacatacgtatattatatacatgtgtattatatacatgtgtattatatatatgtgta +tacatatatactatatatacatacattatatacatacattatatatgtgtattatatatg +tatatattatgtatgtatatattatatatacgtatatattaatatacatatatatgtgtc +tcattataccaataatgccttaaactctaaggtcttctgtgcattcattcctgcttccta +ttacatatatattatatagatacatatataatatatacatatatactacataatatatgc +------------------------------------------------------------ +------------------------------atatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt +atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata +cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca +tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag +aattaaagtaagagttttctttctttcatctcctttttatctctctca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa new file mode 100644 index 00000000..fbf24462 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa @@ -0,0 +1,155 @@ +>syndip_1_chr20:20320185-20320680 +atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat +ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga +tggtatttcagatatatag--atatatatattatatatatgtaatatatataatatatgt +a--atatattatatatatgtaatatatataatatatgtaatatattatatatatgtaata +tatataatatatgtaatatatattatatatgtaatatatataatatatgtaatatatatt +atatatgtaatatatataatatatgtaatatatattatatatgtaatatataatatatgt +aatatatattatatatgtaatataatatatgtaatatatattatatatgtaatatatatt +acatatattatatatgcaatatatattatatatattatatatgcaatatatattatatat +attatatatgcaatatatattatatatgcaatatatattatatatattatatatgcaata +tatattatatatattacatatgtaatatatattacatatgtaatatataatatattatat +attacatatgtaatatataatatattatatattacatatgtaatatataatatatattac +atatgtaatatataatatatattacatatgtaatatattatatattacatatgtaatata +ttatatattacatatgtaatatattatatattacatatgtaatatattatatattacata +tgtaatatattatatattacatatgtaatatatataatatattacatatgtaatatataa +tatattatatatgtaatatatattacatatataatatatataatatatatgtaatatatt +atatatattatatatgtaatatatattacatatataatatatattatatatattacatat +attatatatgtaatatatattacatatgtaatatatgtaatatatattacatatgtaata +tatattatatatgtaatatatattacatatattatatatgtaatatatattatatatatt +atatatgtaatgtataatatatatattatatatgtaatatatattatatataatatatgt +aatatatattatatatgtaatgtatattatatatattatatatgtaatgtatattatata +taatatatgtaatgtatattatatatgtaatatatattatgtatattatatatgtaatat +atattatatatattatatatgtaatatatattatatatattatatatgtaatatatatta +tatatattatatatgtaatatatattatatatattatatatgtaatatatattatatata +ttatatatgtaatatatattatatatattatatatgtaatatatattatatatattatat +atgtaatatatattatatatattatatatgtaatatatattatatatattatatatgtaa +tatatattatatatattatatatgtaatatatattatatatattatatatgtaatatata +ttatatatattatatatgtaatatatattatatatattatatatgtaatatatattatat +atattatatatgtaatatatatatatttaaaaacagaaccattatcttttagagatacat +actgaagtgtctggagacatgcttcaagataacccaggagggagaatggtagaaggaact +agagatgacccaagactgcccttgagctaataactgt +>syndip_2_chr20:20320185-20320680 +atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat +ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga +tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt +aatatatattatatatatgtaatatatataatatatgta---------atatatgtaata +tatattatatatgtaatata----atatatgtaatatatattatatatgtaatatataat +atatgta----atatataatatatgtaatatatattatatatgtaatatataatatatat +tatatatgtaatatatatatatttaaaaacagaaccattatctttta-gagatacatact +gaagtgtctggagacatgcttcaagata-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acccaggagggagaatggtagaaggaact +agagatgacccaagactgcccttgagctaataactgt +>p:HG002_1_chr20:20320185-20320680 +atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat +ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga +tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt +aatatatattatatatatgtaatatatataatatatgta---------atatatgtaata +tatattatatatgtaatata----atatatgtaatatatattatatatgtaatatataat +atatgta----atatataatatatgtaatatatattatatatgtaatatataatatatat +tatatatgtaatatatatatatttaaaaacagaaccattatctttta-gagatacatact +gaagtgtctggagacatgcttcaagata-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acccaggagggagaatggtagaaggaact +agagatgacccaagactgcccttgagctaataactgt +>p:HG002_2_chr20:20320185-20320680 +atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat +ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga +tggtatttcagatatatag--atatatatattatatatatgtaatatatataatatatgt +a--atatattatatatatgtaatatatataatatatgtaatatattatatatatgtaata +tatataatatatgtaatatatattatatatgtaatatatataatatatgtaatatatatt +atatatgtaatatatataatatatgtaatatatattatatatgtaatatataatatatgt +aatatatattatatatgtaatataatatatgtaatatatattatatatgtaatatatatt +acatatattatatatgcaatatatattatatatattatatatgcaatatatattatatat +attatatatgcaatatatattatatatgcaatatatattatatatattatatatgcaata +tatattatatatattacatatgtaatatatattacatatgtaatatataatatattatat +attacatatgtaatatataatatattatatattacatatgtaatatataatatatattac +atatgtaatatataatatatattacatatgtaatatattatatattacatatgtaatata +ttatatattacatatgtaatatattatatattacatatgtaatatattatatattacata +tgtaatatattatatattacatatgtaatatatataatatattacatatgtaatatataa +tatattatatatgtaatatatattacatatataatatatataatatatatgtaatatatt +atatatattatatatgtaatatatattacatatataatatatattatatatattacatat +attatatatgtaatatatattacatatgtaatatatgtaatatatattacatatgtaata +tatattatatatgtaatatatattacatatattatatatgtaatatatattatatatatt +atatatgtaatgtataatatatatattatatatgtaatatatattatatataatatatgt +aatatatattatatatgtaatgtatattatatatattatatatgtaatgtatattatata +taatatatgtaatgtatattatatatgtaatatatattatgtatattatatatgtaatat +atattatatatattatatatgtaatatatattatatatattatatatgtaatatatatta +tatatattatatatgtaatatatattatatatattatatatgtaatatatattatatata +ttatatatgtaatatatattatatatattatatatgtaatatatattatatatattatat +atgtaatatatattatatatattatatatgtaatatatattatatatattatatatgtaa +tatatattatatatattatatatgtaatatatattatatatattatatatgtaatatata +ttatatatattatatatgtaatatatattatatatattatatatgtaatatatattatat +atattatatatgtaatatatatatatttaaaaacagaaccattatcttttagagatacat +actgaagtgtctggagacatgcttcaagataacccaggagggagaatggtagaaggaact +agagatgacccaagactgcccttgagctaataactgt +>ref_chr20:20320185-20320680 +atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat +ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga +tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt +aatatatattatatatatgtaatatatataatatatgta---------atatatgtaata +tatattatatatgtaatata----atatatgtaatatatattatatatgtaatatataat +atatgta----atatataatatatgtaatatatattatatatgtaatatataatatatat +tatatatgtaatatatatatatttaaaaacagaaccattatctttta-gagatacatact +gaagtgtctggagacatgcttcaagata-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------acccaggagggagaatggtagaaggaact +agagatgacccaagactgcccttgagctaataactgt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa new file mode 100644 index 00000000..088b90c7 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa @@ -0,0 +1,105 @@ +>syndip_1_chr20:62875072-62875657 +aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac +cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt +catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac +ccccacccccacccgcacccccacccccacccgcacccccacccccacccccacccgcac +ccccacccccacccccacccgcacccccacccccacccgcacccccacccccacccccac +ccgcacccccacccccacccccacccgcacccgcacccccacccccacccgcacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccccacccgcacccgcac +ccgcacccccacccccaccctcacccgcacccgcacccccacccccacccccacccgcac +ccgcacccccacccccacccc------------cacccccacccccacccgcacccgcac +ccgcacccccaccctcaccctcacccgcacccgcacccccacccccacccgcacccccac +ccgcacccccacccccacccccacccccacccgcacccgcacccccacccccacccccac +ccccacccccacccccacccccacccccaccctcacccgcacccccaccctcacccccac +cctcacccccaccctcacccgcaccctcacccgcacccgcacccccaccct------cac +ccacaccctcacccccaccctcacccccacccccaccctcacccgcacccgcacccccac +cctcacccccacccccacccccaccctcaccctcaccctcacccgcacccgcacccccac +ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac +ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc +acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact +gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag +ctatcctgcc +>syndip_2_chr20:62875072-62875657 +aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac +cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt +catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac +ccccacccgcacccgcacccccacccccacccgcacccccacccccacccccacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccgcacccgcacccgcac +ccccacccccacccgcacccccacccccacccgcacccgcacccccacccgcacccccac +ccccacccccaccctcacccccacccgcacccccacccccacccccacccgcacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccgcacccgcacccccac +ccgcacccccacccccacccccacccgcacccgcacccccacccgcacccgcacccccac +ccgcacccccacccccacctccacccccacccgcacccgcacccccacccccacccgcac +ccccacccccacccgcacccccacccccaccctcacccgcacccccacccccacccgcac +ccccacccccaccctcacccccaccctcacccgcacccgcacccccaccctcacccccac +cctcacccccaccctcacccgcacccccacccccacccccacccccaccctcccccccac +ccgcacccgcacccccaccctcacccgcacccccaccctcacccccaccctcacccccac +cctcacccccacccccaccctcacccgcacccccactcgcacccgcacccccaccctcac +ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac +ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc +acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact +gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag +ctatcctgcc +>p:HG002_1_chr20:62875072-62875657 +aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac +cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt +catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac +ccccacccgcacccgcacccccacccccacccgcacccccacccccacccccacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccgcacccgcacccgcac +ccccacccccacccgcacccccacccccacccgcacccgcacccccacccgcacccccac +ccccacccccaccctcacccccacccgcacccccacccccacccccacccgcacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccgcacccgcacccccac +ccgcacccccacccccacccccacccgcacccgcacccccacccgcacccgcacccccac +ccgcacccccacccccacctccacccccacccgcacccgcacccccacccccacccgcac +ccccacccccacccgcacccccacccccaccctcacccgcacccccacccccacccgcac +ccccacccccaccctcacccccaccctcacccgcacccgcacccccaccctcacccccac +cctcacccccaccctcacccgcacccccacccccacccccacccccaccctcccccccac +ccgcacccgcacccccaccctcacccgcacccccaccctcacccccaccctcacccccac +cctcacccccacccccaccctcacccgcacccccactcgcacccgcacccccaccctcac +ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac +ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc +acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact +gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag +ctatcctgcc +>p:HG002_2_chr20:62875072-62875657 +aactttcctggcacactggagaggacggcagaacgctggaatggatttatcgtgcaagac +cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt +catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac +ccccacccccacccgcacccccacccccacccgcacccccacccccacccccacccgcac +ccccacccccacccccacccgcacccccacccccacccgcacccccacccccacccccac +ccgcacccccacccccacccccacccgcacccgcacccccacccccacccgcacccgcac +ccccacccccacccccacccgcacccccacccccacccccacccccacccgcacccgcac +ccgcacccccacccccaccctcacccgcacccgcacccccacccccacccccacccgcac +ccgcacccccacccccacccc------------cacccccacccccacccgcacccgcac +ccgcacccccaccctcaccctcacccgcacccgcacccccacccccacccgcacccccac +ccgcacccccacccccacccccacccccacccgcacccgcacccccacccccacccccac +ccccacccccacccccacccccacccccaccctcacccgcacccccaccctcacccccac +cctcacccccaccctcacccgcaccctcacccgcacccgcacccccaccct------cac +ccacaccctcacccccaccctcacccccacccccaccctcacccgcacccgcacccccac +cctcacccccacccccacccccaccctcaccctcaccctcacccgcacccgcacccccac +ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac +ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc +acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact +gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag +ctatcctgcc +>ref_chr20:62875072-62875657 +aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac +cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt +catcggggagc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------cccgcaccctcacctgcaccctcac +ccccacccccacccccacccccacccccacccgcacccccacccccaccctcccccccac +ccgcacccgcacccccaccctcacccgcacccccaccctcacccccaccctcacccccac +cctcacccccacccccaccctcacccgcacccccactcgcacccgcacccccaccctcac +ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac +ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc +acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact +gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag +ctatcctgcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa new file mode 100644 index 00000000..3b5e22b1 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa @@ -0,0 +1,50 @@ +>syndip_1_chr20:57350708-57351088 +aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc +ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta +tttttagtactgacggggggacggggggacgggggacgggggacggggggcgggggggac +gggggacggggggcggggggggcgggggggacggggggcggggggg-gcgggggccgggg +ggggcggggggacgggggcggggggccggggggggcggggggacgggggcgggggggcgg +ggggggcggggggacgggggcggggggcgcgggggggcggggggggcggcgcgggggggc +gggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga +ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc +gcgcctggccagttgcggatttataaa +>syndip_2_chr20:57350708-57351088 +aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc +ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta +tttttagtactgacggggggacgggggggc------------------------------ +------------------------gggggggcgggggggcgggggg-acggggggcgggg +ggggcggggggacgggggcggggggcgggggggggc------------------------ +-------------------------ggcgcgggggggc---------------------- +-ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga +ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc +gcgcctggccagttgcggatttataaa +>p:HG002_1_chr20:57350708-57351088 +aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc +ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta +tttttagtactgacggggggacgggggggc------------------------------ +------------------------gggggggcgggggggcgggggg-acggggggcgggg +ggggcggggggacgggggcggggggcgggggggggc------------------------ +-------------------------ggcgcgggggggc---------------------- +-ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga +ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc +gcgcctggccagttgcggatttataaa +>p:HG002_2_chr20:57350708-57351088 +aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc +ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta +tttttagtactgacggggggacggggggacgggggacgggggacggggggcgggggggac +gggggacggggggcggggggggcgggggggacggggggcggggggg-gcgggggccgggg +ggggcggggggacgggggcggggggccggggggggcggggggacgggggcgggggggcgg +ggggggcggggggacgggggcggggggcgcgggggggcggggggggcggcgcgggggggc +gggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga +ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc +gcgcctggccagttgcggatttataaa +>ref_chr20:57350708-57351088 +aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc +ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta +tttttagtactgacggggggacgggggggc------------------------------ +------------------------gggggggcggggggacggggggcggggggggcgggg +ggggcggggggacgggggcggggggcgggggggggc------------------------ +-------------------------ggcgcgggggggc---------------------- +-ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga +ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc +gcgcctggccagttgcggatttataaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa new file mode 100644 index 00000000..cf2b055d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa @@ -0,0 +1,250 @@ +>syndip_1_chr20:51952383-51954993 +ctctgtctcaaaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat +ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc +aaatattgatttgcttaa--ttttttttttttttgagatggggttttgctctgtcgccca +ggctggagtgcaatggcatgatcttggctcactgcaacctctgcctcctgggttcagcca +ttctcctgcctcagcctcccgagtggctgggactacaggtgcccgccaccacgcctggct +aattttttttttttatatttttagtagagacggggtttcaccatgttagccaggatggtc +tcgatctcctgacctcgtgatccgcccgcctcggcctcccaaagtgctaggattacaggc +ttgagccaccgcgcctggcctttaatataatttttatgaagacattatttccacttcatt +gggctctctagaattagattgcataaatactgtagttttatttcaataaaaagtcaactg +ttatttaaaccatgactaaagcttgacatgaatagagactttaaaacaataaacattgcc +agacatggtggcacatgcctgtagtctcagctactcaagaaactgagacaggaggatctc +ctgagccaaggagttccagcccagcctggacaacatagtgagatcccatctctt---aaa +aaaaaaaaaaaaagatttgcttaattttacttttacaattttttcttttctttttttttt +tttttttttttttttttttgagatggagtcttgctctgttaccaggctggagtgcagtgg +tgtgatatcggctccctgcaacctctgcctcctgggttaaagcaattttcctgccccagc +ctcccgagtagctgggactacaggcgcgcgtcaccacacccagctaatttttgtattttt +agtagagatggggtttcaccatgttggccaggatggtctcaatctcttgatctcgtgatc +cgcccaccttggcctcccaaagtgctggaattacaggcgtgatccgccataacctggcct +tccttcttttctttaaaaatttttttctgagacagagtctcactctgttgcccagcctgg +agtgcagtggcacaatctcggctcactgtagcctccaccttctgggttcaagtgattctc +gtgcctcagcctcccgagtagctgtgactacaggtgtgtgccctcatgcctggctaattt +ttgtatttttagtagagatggggtttcaccatgttggccaggctggtctcgaaccctggg +ctcaagtgttctgtctgcctcagccccccaaagtgctgggattacaggcatgagccaaag +tgcctggctagaaaattgtaaaagtaacttaatatctgcctcatatacttttgttttatt +t----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------aaaaatgcaaatcctcaaccacaaatcgaaacaagaacaaccctctgat +gacttccctccctctctctttgtacaaagaagactttctgctgtctgactctgaccacct +tggtctattttatggagctccaaccctgccttgtttccttctcaggccatctgtcctggt +tggcctcactgttggaaatgctcttctcaagatgttcacgtggctggttctttgtcattc +agtctcagttcaaatactccctcctccaggaagccttccctaacttcctaggtgaaagtg +gttcccttcccctgagtcacgattatggtttcttcagtgtacttattaatggccaacact +gtctcctttattgtttgcttatgaatttaaagtctgttagcagcacctgcagcttttaaa +agcctccttgtctctctc----tatttttttttttttttttttgaggcggagtttcgctc +ttgttggccaggctggaacgcagtgatgcgatctcggctcagtgcaacctccacctcttg +ggttcaagtgattctcctgcctcagcctccttagtagctgagactacaggcatgtgccac +catgctggctaatttttgtatttttagtagagacagggtttcgtcatattggccaagctg +gtctcgaacccctgacctcaggtgatccacccatcttggcctcccagatgctgggattac +aggcgtgagccactgcgcctggcctaattgtcattttcgtttttttgttgagacagagtt +tctgtcacccaggctggaatgcagtggcacgatctcagctcactgcagcctctgcctctt +gggttcaattgattctcctgcctcagcctcccaagtagctgggactacaggtgtgtgcca +ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc +ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat +tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt +tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc +ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag +>syndip_2_chr20:51952383-51954993 +ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat +ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc +aaatattgatttgcttaa-tttttttttttttttgagatggggttttgctctgtcgccca +ggctggagtgcaatggcatgatcttggctcactgcaacctctgcctcctgggttcagcca +ttctcctgcctcagcctcccgagtggctgggactacaggtgcccgccaccacgcctggct +aattttttttttttatatttttagtagagacggggtttcaccatgttagccaggatggtc +tcgatctcctgacctcgtgatccgcccgcctcggcctcccaaagtgctaggattacaggc +ttgagccaccgcgcctggcctttaatataatttttatgaagacattatttccacttcatt +gggctctctagaattagattgcataaatactgtagttttatttcaataaaaagtcaactg +ttatttaaaccatgactaaagcttgacatgaatagagactttaaaacaataaacattgcc +agacatggtggcacatgcctgtagtctcagctactcaagaaactgagacaggaggatctc +ctgagccaaggagttccagcccagcctggacaacatagtgagatcccatctcttaaaaaa +aaaaaaaaaaaaagatttgcttaattttacttttacaattttttcttttc--tttttttt +tttttttttttttttttttgagatggagtcttgctctgttaccaggctggagtgcagtgg +tgtgatatcggctccctgcaacctctgcctcctgggttaaagcaattttcctgccccagc +ctcccgagtagctgggactacaggcgcgcgtcaccacacccagctaatttttgtattttt +agtagagatggggtttcaccatgttggccaggatggtctcaatctcttgatctcgtgatc +cgcccaccttggcctcccaaagtgctggaattacaggcgtgatccgccataacctggcct +tccttcttttctttaaaaatttttttctgagacagagtctcactctgttgcccagcctgg +agtgcagtggcacaatctcggctcactgtagcctccaccttctgggttcaagtgattctc +gtgcctcagcctcccgagtagctgtgactacaggtgtgtgccctcatgcctggctaattt +ttgtatttttagtagagatggggtttcaccatgttggccaggctggtctcgaaccctggg +ctcaagtgttctgtctgcctcagccccccaaagtgctgggattacaggcatgagccaaag +tgcctggctagaaaattgtaaaagtaacttaatatctgcctcatatacttttgttttatt +t----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------aaaaatgcaaatcctcaaccacaaatcgaaacaagaacaaccctctgat +gacttccctccctctctctttgtacaaagaagactttctgctgtctgactctgaccacct +tggtctattttatggagctccaaccctgccttgtttccttctcaggccatctgtcctggt +tggcctcactgttggaaatgctcttctcaagatgttcacgtggctggttctttgtcattc +agtctcagttcaaatactccctcctccaggaagccttccctaacttcctaggtgaaagtg +gttcccttcccctgagtcacgattatggtttcttcagtgtacttattaatggccaacact +gtctcctttattgtttgcttatgaatttaaagtctgttagcagcacctgcagcttttaaa +agcctccttgtctctctc--tatttttttttttttttttttttgaggcggagtttcgctc +ttgttggccaggctggaacgcagtgatgcgatctcggctcagtgcaacctccacctcttg +ggttcaagtgattctcctgcctcagcctccttagtagctgagactacaggcatgtgccac +catgctggctaatttttgtatttttagtagagacagggtttcgtcatattggccaagctg +gtctcgaacccctgacctcaggtgatccacccatcttggcctcccagatgctgggattac +aggcgtgagccactgcgcctggcctaattgtcattttcgtttttttgttgagacagagtt +tctgtcacccaggctggaatgcagtggcacgatctcagctcactgcagcctctgcctctt +gggttcaattgattctcctgcctcagcctcccaagtagctgggactacaggtgtgtgcca +ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc +ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat +tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt +tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc +ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag +>p:HG002_1_chr20:51952383-51954993 +ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat +ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc +aaatattgatttgcttaa-tttttttttttttttgagatggggttttgctctgtcgccca +ggctggagtgcaatggcatgatcttggctcactgcaacctccgcctcctgggttcagcca +ttctcctgcctcagcctcccgagtggctgggactacaggtgcccgccaccacgcctggct +aattttttttttttatatttttagtagagacggggtttcaccatgttagccaggatggtc +tcgatctcctgacctcgtgatccgcccgcctcggcctcccaaagtgctaggattacaggc +ttgagccaccgcgcctggcctttaatataatttttatgaagacattatttccacttcatt +gggctctctagaattagattgcataaatactgtagttttatttcaataaaaagtcaactg +ttatttaaaccatgactaaagcttgacatgaatagagactttaaaacaataaacattgcc +agacatggtggcacatgcctgtagtctcagctactcaagaaactgagacaggaggatctc +ctgagccaaggagttccagcccagcctggacaacatagtgagatcccatctcttaaaaaa +aaaaaaaaaaaaagatttgcttaattttacttttacaattttttcttttc--tttttttt +tttttttttttttttttttgagatggagtcttgctctgttaccaggctggagtgcagtgg +tgtgatatcagctccctgcaacctctgcctcctgggttaaagcaattttcctgccccagc +ctcccgagtagctgggactacaggcgcgcgtcaccacacccagctaatttttgtattttt +agtagagatggggtttcaccatgttggccaggatggtctcaatctcttgatctcgtgatc +cgcccaccttggcctcccaaagtgctggaattacaggcgtgatccgccataacctggcct +tccttctttcctttaaaaatttttttctgagacagagtctcactctgttgcccagcctgg +agtgcagtggcacaatctcggctcactgtagcctccaccttctgggttcaagtgattctc +gtgcctcagcctcccgagtagctgtgactacaggtgtgtgccctcatgcctggctaattt +ttgtatttttagtagagatggggtttcaccatgttggccaggctggtctcgaaccctggg +ctcaagtgttctgtctgcctcagccccccaaagtgctgggattacaggcatgagccaaag +tgcctggctagaaaattgtaaaagtaacttaatatctgcctcatatacttttgttttatt +t----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------aaaaatgcaaatcctcaaccacaaatcgaaacaagaacaaccctctgat +gacttccctccctctctctttgtacaaagaagactttctgctgtctgactctgaccacct +tggtctattttatggagctccaaccctgccttgtttccttctcaggccatctgtcctggt +tggcctcactgttggaaatgctcttctcaagatgttcacgtggctggttctttgtcattc +agtctcagttcaaatactccctcctccaggaagccttccctaacttcctaggtgaaagtg +gttcccttcccctgagtcacgattatggtttcttcagtgtacttattaatggccaacact +gtctcctttattgtttgcttatgaatttaaagtctgttagcagcacctgcagcttttaaa +agcctccttgtctctctc--tatttttttttttttttttttttgaggcggagtttcgctc +ttgttggccaggctggaacgcagtgatgcgatctcggctcagtgcaacctccacctcttg +ggttcaagtgattctcctgcctcagcctccttagtagctgagactacaggcatgtgccac +catgctggctaatttttgtatttttagtagagacagggtttcgtcatattggccaagctg +gtctcgaacccctgacctcaggtgatccacccatcttggcctcccagatgctgggattac +aggcgtgagccactgcgcctggcctaattgtcattttcgtttttttgttgagacagagtt +tctgtcacccaggctggaatgcagtggcacgatctcagctcactgcagcctctgcctctt +gggttcaattgattctcctgcctcagcctcccaagtagctgggactacaggtgtgtgcca +ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc +ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat +tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt +tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc +ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag +>p:HG002_2_chr20:51952383-51954993 +ctctgtctcaaaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat +ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc +aaatattgatttgcttaa--ttttttttttttttgagatggggttttgctctgtcgccca +ggctggagtgcaatggcatgatcttggctcactgcaacctccgcctcctgggttcagcca +ttctcctgcctcagcctcccgagtggctgggactacaggtgcccgccaccacgcctggct +aattttttttttttatatttttagtagagacggggtttcaccatgttagccaggatggtc +tcgatctcctgacctcgtgatccgcccgcctcggcctcccaaagtgctaggattacaggc +ttgagccaccgcgcctggcctttaatataatttttatgaagacattatttccacttcatt +gggctctctagaattagattgcataaatactgtagttttatttcaataaaaagtcaactg +ttatttaaaccatgactaaagcttgacatgaatagagactttaaaacaataaacattgcc +agacatggtggcacatgcctgtagtctcagctactcaagaaactgagacaggaggatctc +ctgagccaaggagttccagcccagcctggacaacatagtgagatcccatctctt---aaa +aaaaaaaaaaaaagatttgcttaattttacttttacaattttttcttttctttttttttt +tttttttttttttttttttgagatggagtcttgctctgttaccaggctggagtgcagtgg +tgtgatatcggctccctgcaacctctgcctcctgggttaaagcaattttcctgccccagc +ctcccgagtagctgggactacaggcgcgcgtcaccacacccagctaatttttgtattttc +agtagagatggggtttcaccatgttggccaggatggtctcaatctcttgatctcgtgatc +cgcccaccttggcctcccaaagtgctggaattacaggcgtgatccgccataacctggcct +tccttcttttctttaaaaatttttttctgagacagagtctcactctgttgcccagcctgg +agtgcagtggcacaatctcggctcactgtagcctccaccttctgggttcaagtgattctc +gtgcctcagcctcccgagtagctgtgactacaggtgtgtgccctcatgcctggctaattt +ttgtatttttagtagagatggggtttcaccatgttggccaggctggtctcgaaccctggg +ctcaagtgttctgtctgcctcagccccccaaagtgctgggattacaggcatgagccaaag +tgcctggctagaaaattgtaaaagtaacttaatatctgcctcatatacttttgttttatt +taaaaatgcaaatccggccgggcgcggtggctcacgcctgtaatcccagcactttgggag +gccgaggcgggcggatcacgaggtcaggagatcgagaccatcccggctaaaacggtgaaa +ccccgtctctactaaaaatacaaaaaattagccgggcgtggtggcgggcgcctgtagtcc +cagctacttgggaggctgaggcaggagaatggcgtgaacccgggaggcggagcttgcagt +gagccgagatcccgccactgcactccagcctgggcgacagagcgagactccgtctcaaaa +aaaaaaaaaaaaaaaatgcaaatcctcaaccacaaatcgaaacaagaacaaccctctgat +gacttccctccctctctctttgtacaaagaagactttctgctgtctgactctgaccacct +tggtctattttatggagctccaaccctgccttgtttccttctcaggccatctgtcctggt +tggcctcactgttggaaatgctcttctcaagatgttcacgtggctggttctttgtcattc +agtctcagttcaaatactccctcctccaggaagccttccctaacttcctaggtgaaagtg +gttcccttcccctgagtcacgattatggtttcttcagtgtacttattaatggccaacact +gtctcctttattgtttgcttatgaatttaaagtctgttagcagcacctgcagcttttaaa +agcctccttgtctctctc----tatttttttttttttttttttgaggcggagtttcgctc +ttgttggccaggctggaatgcagtgatgcgatctcggctcagtgcaacctccacctcttg +ggttcaagtgattctcctgcctcagcctccttagtagctgagactacaggcatgtgccac +catgctggctaatttttgtatttttagtagagacagggtttcgtcatattggccaagctg +gtctcgaacccctgacctcaggtgatccacccatcttggcctcccagatgctgggattac +aggcgtgagccactgcgcctggcctaattgtcattttcgtttttttgttgagacagagtt +tctgtcacccaggctggaatgcagtggcacgatctcagctcactgcagcctctgcctctt +gggttcaattgattctcctgcctcagcctcccaagtagctgggactacaggtgtgtgcca +ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc +ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat +tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt +tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc +ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag +>ref_chr20:51952383-51954993 +ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat +ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc +aaatattgatttgcttaattttttttttttttttgagatggggttttgctctgtcgccca +ggctggagtgcaatggcatgatcttggctcactgcaacctctgcctcctgggttcagcca +ttctcctgcctcagcctcccgagtggctgggactacaggtgcccgccaccacgcctggct +aattttttttttttatatttttagtagagacggggtttcaccatgttagccaggatggtc +tcgatctcctgacctcgtgatccgcccgcctcggcctcccaaagtgctaggattacaggc +ttgagccaccgcgcctggcctttaatataatttttatgaagacattatttccacttcatt +gggctctctagaattagattgcataaatactgtagttttatttcaataaaaagtcaactg +ttatttaaaccatgactaaagcttgacatgaatagagactttaaaacaataaacattgcc +agacatggtggcacatgcctgtagtctcagctactcaagaaactgagacaggaggatctc +ctgagccaaggagttccagcccagcctggacaacatagtgagatcccatctctt--aaaa +aaaaaaaaaaaaagatttgcttaattttacttttacaattttttcttttctttttttttt +tttttttttttttttttttgagatggagtcttgctctgttaccaggctggagtgcagtgg +tgtgatatcggctccctgcaacctctgcctcctgggttaaagcaattttcctgccccagc +ctcccgagtagctgggactacaggcgcgcgtcaccacacccagctaatttttgtattttt +agtagagatggggtttcaccatgttggccaggatggtctcaatctcttgatctcgtgatc +cgcccaccttggcctcccaaagtgctggaattacaggcgtgatccgccataacctggcct +tccttcttttctttaaaaatttttttctgagacagagtctcactctgttgcccagcctgg +agtgcagtggcacaatctcggctcactgtagcctccaccttctgggttcaagtgattctc +gtgcctcagcctcccgagtagctgtgactacaggtgtgtgccctcatgcctggctaattt +ttgtatttttagtagagatggggtttcaccatgttggccaggctggtctcgaaccctggg +ctcaagtgttctgtctgcctcagccccccaaagtgctgggattacaggcatgagccaaag +tgcctggctagaaaattgtaaaagtaacttaatatctgcctcatatacttttgttttatt +t----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------aaaaatgcaaatcctcaaccacaaatcgaaacaagaacaaccctctgat +gacttccctccctctctctttgtacaaagaagactttctgctgtctgactctgaccacct +tggtctattttatggagctccaaccctgccttgtttccttctcaggccatctgtcctggt +tggcctcactgttggaaatgctcttctcaagatgttcacgtggctggttctttgtcattc +agtctcagttcaaatactccctcctccaggaagccttccctaacttcctaggtgaaagtg +gttcccttcccctgagtcacgattatggtttcttcagtgtacttattaatggccaacact +gtctcctttattgtttgcttatgaatttaaagtctgttagcagcacctgcagcttttaaa +agcctccttgtctctctctctttttttttttttttttttttttgaggcggagtttcgctc +ttgttggccaggctggaacgcagtgatgcgatctcggctcagtgcaacctccacctcttg +ggttcaagtgattctcctgcctcagcctccttagtagctgagactacaggcatgtgccac +catgctggctaatttttgtatttttagtagagacagggtttcgtcatattggccaagctg +gtctcgaacccctgacctcaggtgatccacccatcttggcctcccagatgctgggattac +aggcgtgagccactgcgcctggcctaattgtcattttcgtttttttgttgagacagagtt +tctgtcacccaggctggaatgcagtggcacgatctcagctcactgcagcctctgcctctt +gggttcaattgattctcctgcctcagcctcccaagtagctgggactacaggtgtgtgcca +ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc +ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat +tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt +tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc +ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa new file mode 100644 index 00000000..09fa7e28 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:41196110-41196749 +agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt +atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca +tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa +taaatatataaaaataaatatatatttataataaatatatttttatttcatatataaata +tatatcttataaatatatatatttatataacataaatatatatttatatatattata--- +---------------------aatatatattatatataatatatatttatataaatatat +atattatatataatatatttatatataatatatatttatatattataaatatatattata +tataatatatatttatataaatatatattatatataaatatatattatatataatatata +tttatataaatatatattatatattatataaatattatatataatatatatttatataaa +tatatattatatattataaatatatattatatattatatattatataaatattatatata +atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc +aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt +ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca +aagtgaaggctttgaaggcca +>syndip_2_chr20:41196110-41196749 +agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt +atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca +tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa +taaatatataaaaataaatatatatttataataaatatatttttatttcatatataaata +tatatcttataaatatatatatttatataacataaatatatatttatatatattatataa +cataaatatatatttatatatattatatattatatataatatatatttatataaatatat +atattatatataatatatttatatataatatatatttatatattataaatatatattata +tataatatatatttatataaatatatattatatat------------------------- +----tataaatatatattatatattatataaatattatatataatatatatttatataaa +tatataatatatattataaatatatattatatattatatattatataaatattatatata +atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc +aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt +ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca +aagtgaaggctttgaaggcca +>p:HG002_1_chr20:41196110-41196749 +agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt +atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca +tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa +taaatatataaaaataaatatatatttataataaatatatttttatttcatatataaata +tatatcttataaatatatatatttatataacataaatatatatttatatatattatataa +cataaatatatatttatatatattatatattatatataatatatatttatataaatatat +atattatatataatatatttatatataatatatatttatatattataaatatatattata +tataatatatatttatataaatatatattatatat------------------------- +----tataaatatatattatatattatataaatattatatataatatatatttatataaa +tatataatatatattataaatatatattatatattatatattatataaatattatatata +atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc +aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt +ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca +aagtgaaggctttgaaggcca +>p:HG002_2_chr20:41196110-41196749 +agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt +atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca +tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa +taaatatataaaaataaatatatatttataataaatatatttttatttcatatataaata +tatatcttataaatatatatatttatataacataaatatatatttatatatattata--- +---------------------aatatatattatatataatatatatttatataaatatat +atattatatataatatatttatatataatatatatttatatattataaatatatattata +tataatatatatttatataaatatatattatatataaatatatattatatataatatata +tttatataaatatatattatatattatataaatattatatataatatatatttatataaa +tatatattatatattataaatatatattatatattatatattatataaatattatatata +atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc +aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt +ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca +aagtgaaggctttgaaggcca +>ref_chr20:41196110-41196749 +agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt +atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca +tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa +taaatatataaaaataaatatatatttataataaatatatttttatttcatatataaata +tatatcttataaatatatatatttatataacataaatatatatttatatat--------- +---------------------attatatattatatataatatatatttatataaatatat +atattatatataatatatttatatataatatatatttatatattataaatatatat---- +------------------------------------------------------------ +------------------------------------------------------------ +-------tatatattataaatatatattatatattatatattatataaatattatatata +atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc +aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt +ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca +aagtgaaggctttgaaggcca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa new file mode 100644 index 00000000..54b133c9 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa @@ -0,0 +1,50 @@ +>syndip_1_chr20:37361652-37362159 +catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa +aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg +agtgctgctgagggc--------------------------------------------- +-------------------------------------------------------ctctg +tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg +caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg +tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt +tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact +gcagcctcaaactcctgggtgcaagcaa +>syndip_2_chr20:37361652-37362159 +catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa +aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg +agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcag----- +---------------------------------------gtagagatgccggggtctctg +tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg +caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg +tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt +tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact +gcagcctcaaactcctgggtgcaagcaa +>p:HG002_1_chr20:37361652-37362159 +catcaggggtggggcagctgggcgctctcggaaagatgacctacggctgggagagagcaa +aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg +agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcag----- +---------------------------------------gtagagatgccggggtctctg +tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg +caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg +tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt +tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact +gcagcctcaaactcctgggtgcaagcaa +>p:HG002_2_chr20:37361652-37362159 +catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa +aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg +agtgctgctgagggc--------------------------------------------- +-------------------------------------------------------ctctg +tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg +caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg +tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt +tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact +gcagcctcaaactcctgggtgcaagcaa +>ref_chr20:37361652-37362159 +catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa +aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg +agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcaggtaga +gatgtagagatgccggggtctctgtgtgcaggtagagatgtagagatgccggggtctctg +tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg +caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg +tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt +tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact +gcagcctcaaactcctgggtgcaagcaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa new file mode 100644 index 00000000..206e6406 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa @@ -0,0 +1,60 @@ +>syndip_1_chr20:63154485-63155096 +gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc +ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc +cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct +tcctcctccctcctcttc------------------------------------------ +--------------cctccctcctgcttcctccctctgccctccctcctccctcttcctc +cctcctcctcccctctcctctttcctcctccctctcccttcctcctcctcccccctttcc +caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca +---------------------tcccttcctcctcctccctccttcctccctcctccctcc +tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca +gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg +tacaaggcagaaattg +>syndip_2_chr20:63154485-63155096 +gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc +ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc +cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct +tcctcctccctcctcttccctccctcctgcttcctccctctgccctccctcctccctctt +cctccctcctcttccctccctcctgcttcctccctctgccctccctcctccctcttcctc +cctcctcctcccctctcctctttcctcctccctctcccttcctcctcctcccccctttcc +caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca +tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc +tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca +gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg +tacaaggcagaaattg +>p:HG002_1_chr20:63154485-63155096 +gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc +ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc +cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct +tcctcctccctcctcttccctccctcctgcttcctccctctgccctccctcctccctctt +cctccctcctcttccctccctcctgcttcctccctctgccctccctcctccctcttcctc +cctcctcctcccctctcctctttcctcctccctctcccttcctcctcctcccccctttcc +caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca +tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc +tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca +gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg +tacaaggcagaaattg +>p:HG002_2_chr20:63154485-63155096 +gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc +ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc +cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct +tcctcctccctcctcttc------------------------------------------ +--------------cctccctcctgcttcctccctctgccctccctcctccctcttcctc +cctcctcctcccctctcctctttcctcctccctctcccttcctcctcctcccccctttcc +caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca +---------------------tcccttcctcctcctccctccttcctccctcctccctcc +tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca +gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg +tacaaggcagaaattg +>ref_chr20:63154485-63155096 +gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc +ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc +cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct +tcctcctccctcctcttccctccctcctgcttcctccctctgccctccctcctccctctt +cctccctcct----cctccttcctgcttcctccctctgccctccctcctccctcttcctc +cctcctcctcccctctcctctttcctcctccctctcccttcctcctcctcccccctttcc +caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca +tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc +tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca +gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg +tacaaggcagaaattg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa new file mode 100644 index 00000000..c27b67dc --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa @@ -0,0 +1,385 @@ +>syndip_1_chr20:63167287-63167728 +aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg +ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcacggt +tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc +agtgcccacctcagccccctcctcagcccccacctccgcccccacctcagcccct----- +-------------------cctcagcccccacctcagccccctcctcagcccccacctcc +-----------------------gcccccacctccgcccccacctctgcccct------- +------cctcagcccccacctcagccccctcctcagcccccacctccgcccccacctccg +cccccacctctgcccct-------------------------------------cctcag +cccccacctcagccccctcctcagcccccacctccgcccccacctccgcccccacctctg +cccctcctcagcccccacctcagccccctcctcag-----------cccccacctccgcc +ccctcctcagcccccacctctgcccct------------cctcagcccccacctcagccc +cctcctcagcccccacctccgcccccacctccgcccccacctccgcccctcctccgcccc +cacctcagcccccacctcc-----------------------gccccctcctcagccccc +acctctgcccctcctcagcccccacctcagcccccacctcagcccccacctc-------- +------------------------------------------------------------ +----------------------------ccccctcctcagcccccacctcagcccccacc +tcagccccctcctcagcccccacctcagcccccacctccgcccct------------cct +cagcccccacctccgcccccacctcagcccccacctcagcccccacctccgcccccacct +cagcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacct +ctgcccctcctcagcccccacctccgccccctcctcagcccccacctctg---------- +--------------------------cccctcctcagcccccacctcagcccccacctca +gcccccacctcc-cccctcctcagcccccacctcagcccccacctcagccccctcctcag +cccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacctcagc +ccccacctccgcccctcctcagcccccacctcag-------------------------- +------------------------------------------------------------ +--------ccccctcctcagcccccacctccgcccccacctccgcccccacctctgcccc +tcctcagcccccacctcagccccctcctcagcccccacctccgcccccacctccgccccc +acctccgcccctcctccgcccccacctcagcccccacctccgccccct------------ +----------------------cctcagcccccacctctgcccctcctcagcccccacct +cagcccccacctcagcccccacct-------------------------cccccctcctc +agcccccacctcagcccccacctccgcccctcctcagcccccacctccgccccca----- +------------------------------------------------------cctcag +cccccacctcagcccccacctccgcccccacctcagcccccacctccgcccccacctcag +cccccacctcc-----------gcccccacctcagcccccacctctgcccctcctcagcc +cccacctccgccccctcctcagcccccacctctgcccctcctcagcccccacctcagccc +ccacctcagcccccacct-------------------------cccccctcctcagcccc +ca-----------------------------------cctcagcccccacctcagccccc +tcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagccccca +-----------------------cctcagcccccacctccgcccctcctccgcccccacc +tcagccccctcctcagcccccacctctgcccctcctcagcccccacctcagcccccacct +ccgcccccacctcagccccctcctcagcccccacctc----------------------- +--------------ccccctcctccgcccccacctcagcccccacctcagcccccacctc +agcccccacctc------------------------------------------------ +-----------------------------------------------------------c +cccctcctccgcccccacctcagccccca-----------cctcagcccccacctccgcc +cccacctcagcccccacctcc--------------------------------------- +------------------------------------------------------------ +-------cccctcctcagcccccacctcagcccccacctccgccccca-----------c +ctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacc +tccgcccccacctcagcccccacctcagcccccacctccgccccca-------------- +-------------------------------------------------------cctca +gcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacctcc +gcccccacctccgcccccacctcc-----------gcccccacctcagcccccacctcag +cccccacctccgccccca------------------------------------------ +----cctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagccc +ccacctcagcccctcctcagcccccacctcagcccccacctcagcccccacctcagcccc +t-------------------------cctcagcccccacctcagccccctcctcagcccc +ctcctcagcccccacctcagcccccacctccgcccccacctccgcccccacctcagcccc +acctcagcccctcctcagcccccacctcagccc--------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------cctcagccccca +cctcagcccccacctccgcccccacctccgcccccacctcc------------------- +------------------------------------------------------------ +----gcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacct +cc---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +----------acccccacctccgtccccacctccgccccctcctcagccccctcctcagc +ccccacctcagcccccacctccgcccccacctcagcccctcctcagcccccacctctgcc +cccacctcagaccctcctcagcccctcctcagcccctcctcatcccctcctcagcagctg +ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc +cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg +ccttgt +>syndip_2_chr20:63167287-63167728 +aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg +ctatccatcccctcgactaatacttaccaaattaattccagatttggattcactcacggt +tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc +agtgcccacctcagccccctcctcagcccccacctccgcccccacctccgccccctcctc +agcccctcctccgcccccacctcagcccccacctcagccccctcctcagcccccacctcc +gcccctcctcagcccccacctcagcccccacctcagcccccacctccgccccctcctcag +cccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctccg +cccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctcag +cccccacctcagccccctcctcagcccccacctcagcccccacctccgccccctcctcag +cccccacctcccccccacctccgccccctcctcagcccccacctcccccccacctccgcc +ccctcctcagcccccacctctgcccctcctcaggccccacctcagcccccacctcagccc +cctcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccc +cacctcagcccccacctccgcccctcctctgcccccacctcagccccctcctcagccccc +acctctgcccctcctcagcccccacctcagcccccacctccgcccccacctcagccccca +cctcagcccccacctccccccctcctcagcccccacctccacccccacctcagcccccac +ctcagcccccacctccccccctcctcagcccccacctccgcccccacctcagcccccacc +tccgcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacct +cagcccccacctccgcccccacctccgcccccacctcagcccccacctccgccccctcct +cagcccccacctcagcccccacctcagcccccacctccacccccacctcagcccccacct +cagcccccacctcccccccacctccgccccctcctcagcccccacctctgcccctcctca +ggccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctca +gcccccacctccgcccctcctcagcccccacctcagcccccacctccacccccacctcag +cccccacctccgcccccacctcagcccccacctccgcccctcctcagcccccacctcagc +ccccacctccgcccctcctctgcccccacctcagccccctcctcagcccccacctctgcc +cctcctcagcccccacctcagcccccacctccgcccccacctcagcccccacctcagccc +ccacctccccccctcctcagcccccacctccacccccacctcagcccccacctccgcccc +tcctcagcccccacctcagcccccacctccacccccacctcagcccccacctccgccccc +acctccgcccctcctcagcccccacctccgcccccacctcagcccctcctcagcccccac +ctccgcccctcctcagcccccacctcagcccccacctccgcccctcctcagcccccacct +cagcccccacctccacccccacctcagcccccacctcagcccccacctcccccccacctc +cgccccctcctcagcccccacctctgcccctcctcaggccccacctcagcccccacctca +gccccctcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcag +cccccacctcagcccccacctccacccccacctcagcccccacctccgcccccacctcag +cccccacctccgcccctcctcagcccccacctcagcccccacctccgcccctcctctgcc +cccacctcagccccctcctcagcccccacctctgcccctcctcagcccccacctcagccc +ccacctccgcccccacctcagcccccacctcagcccccacctccccccctcctcagcccc +cacctccacccccacctcagcccccacctccgcccctcctcagcccccacctcagccccc +acctccacccccacctcagcccccacctcagcccccacctccgcccctcctcagccccca +cctccgcccccacctcagcccctcctcagcccccacctccgcccctcctcagcccccacc +tccgcccccacctcagccccctcctccgcccctcctcagcccccacctccacccccacct +cagcccccacctcagccccctcctcagcccccacctccgcccctcctcagcccccacctc +agcccccacctcagcccccacctccgccccctcctcagcccccacctcagcccccacctc +agccccctcctccacccctcctcagcccccacctccgcccccacctcagcccccacctcc +gcccctcctcagcccccacctccacccccacctcagcccccacctccgcccctcctcagc +ccccacctccgcccccacctcagcccccacctccccccctcctcagcccccacctccgcc +cccacctcagcccccacctccgcccctcctcagcccccacctccacccccacctcagccc +ccacctccgcccctcctcagcccccacctccgcccccacctcagcccctcctcagccccc +acctccgcccctcctcagcccccacctccgcccccacctcagcccccacctccccccctc +ctcagcccccacctccgcccccacctcagcccccacctccgcccctcctcagcccccacc +tcagcccccacctccacccccacctcagcccccacctcagcccccacctccgcccctcct +cagcccccacctccgcccccacctcagcccctcctcagcccccacctccgcccctcctca +gcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacctcc +gcccccacctcagcccccacctccgcccctcctcagcccccacctcagcccccacctcag +accccacctccgcccccacctccgcccctcctcagcccccacctcagcccccacctcagc +ccctcctcagcccccacctccgcccccacctccgcccccacctccgcccctcctcagccc +ccacctcagcccctcctcagcccccacctcagcccccacctcagccccctcctcagcccc +cacctcagcccccacctccgcccccacctcagccccctcctcagcccccacctcagcccc +ctcctcagcccccacctcagcccccacctccgcccccacctccgcccccacctcagcccc +acctcagcccctcctcagcccccacctcagcccctcctcagcccccacctcagcccctcc +tcagcccccacctcagccccctcagcccccacctcagcccccacctccgcccccacctcc +gcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgc +ccccacctccgcccccacctcagcccctcctcagcccccacctcagcccccacctcagcc +ccctcctcagcccccacctccgcccccacctccgcccccacctcagccccacctcagccc +ctcctcagcccccacctcagcccctcctcagcccccacctcagcccctcctcagccccca +cctcagcccccacctccgcccccacctccgcccccacctccgcccccacctcagcccctc +ctcagcccccacctcagcccccacctcagccccctcctcagcccccacctccgcccccac +ctcagcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacct +ccgcccccacctccgcccccacctcagcccctcctcagcccccacctcagcccccacctc +agccccctcctcagcccccacctccgcccccacctccgcccccacctcagccccacctca +gcccctcctcagcccccacctcagcccctcctcagccccctcctcagcccccacctcagc +ccccacctcagcccccacctccgcccccacctccgcccccacctcagccccctcctcagc +cccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgcc +cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg +ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc +cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg +ccttgt +>p:HG002_1_chr20:63167287-63167728 +aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg +ctatccatcccctcgactaatacttaccaaattaattccagatttggattcactcacggt +tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc +agtgcccacctcagccccctcctcagcccccacctccgcccccacctccgccccctcctc +agcccctcctccgcccccacctcagcccccacctcagccccctcctcagcccccacctcc +gcccctcctcagcccccacctcagcccccacctcagcccccacctccgccccctcctcag +cccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctccg +cccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctcag +cccccacctcagccccctcctcagcccccacctcagcccccacctccgccccctcctcag +cccccacctcccccccacctccgccccctcctcagcccccacctcccccccacctccgcc +ccctcctcagcccccacctctgcccctcctcaggccccacctcagcccccacctcagccc +cctcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccc +cacctcagcccccacctccgcccctcctctgcccccacctcagccccctcctcagccccc +acctctgcccctcctcagcccccacctcagcccccacctccgcccccacctcagccccca +cctcagcccccacctccccccctcctcagcccccacctccacccccacctcagcccccac +ctcagcccccacctccccccctcctcagcccccacctccgcccccacctcagcccccacc +tccgcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacct +cagcccccacctccgcccccacctccgcccccacctcagcccccacctccgccccctcct +cagcccccacctcagcccccacctcagcccccacctccacccccacctcagcccccacct +cagcccccacctcccccccacctccgccccctcctcagcccccacctctgcccctcctca +ggccccacctcagcccccacctcagccccctcctcagcccccacctcagcccccacctca +gcccccacctccgcccctcctcagcccccacctcagcccccacctccacccccacctcag +cccccacctccgcccccacctcagcccccacctccgcccctcctcagcccccacctcagc +ccccacctccgcccctcctctgcccccacctcagccccctcctcagcccccacctctgcc +cctcctcagcccccacctcagcccccacctccgcccccacctcagcccccacctcagccc +ccacctccccccctcctcagcccccacctccacccccacctcagcccccacctccgcccc +tcctcagcccccacctcagcccccacctccacccccacctcagcccccacctccgccccc +acctccgcccctcctcagcccccacctccgcccccacctcagcccctcctcagcccccac +ctccgcccctcctcagcccccacctcagcccccacctccgcccctcctcagcccccacct +cagcccccacctccacccccacctcagcccccacctcagcccccacctcccccccacctc +cgccccctcctcagcccccacctctgcccctcctcaggccccacctcagcccccacctca +gccccctcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcag +cccccacctcagcccccacctccacccccacctcagcccccacctccgcccccacctcag +cccccacctccgcccctcctcagcccccacctcagcccccacctccgcccctcctctgcc +cccacctcagccccctcctcagcccccacctctgcccctcctcagcccccacctcagccc +ccacctccgcccccacctcagcccccacctcagcccccacctccccccctcctcagcccc +cacctccacccccacctcagcccccacctccgcccctcctcagcccccacctcagccccc +acctccacccccacctcagcccccacctcagcccccacctccgcccctcctcagccccca +cctccgcccccacctcagcccctcctcagcccccacctccgcccctcctcagcccccacc +tccgcccccacctcagccccctcctccgcccctcctcagcccccacctccacccccacct +cagcccccacctcagccccctcctcagcccccacctccgcccctcctcagcccccacctc +agcccccacctcagcccccacctccgccccctcctcagcccccacctcagcccccacctc +agccccctcctccacccctcctcagcccccacctccgcccccacctcagcccccacctcc +gcccctcctcagcccccacctccacccccacctcagcccccacctccgcccctcctcagc +ccccacctccgcccccacctcagcccccacctccccccctcctcagcccccacctccgcc +cccacctcagcccccacctccgcccctcctcagcccccacctccacccccacctcagccc +ccacctccgcccctcctcagcccccacctccgcccccacctcagcccctcctcagccccc +acctccgcccctcctcagcccccacctccgcccccacctcagcccccacctccccccctc +ctcagcccccacctccgcccccacctcagcccccacctccgcccctcctcagcccccacc +tcagcccccacctccacccccacctcagcccccacctcagcccccacctccgcccctcct +cagcccccacctccgcccccacctcagcccctcctcagcccccacctccgcccctcctca +gcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacctcc +gcccccacctcagcccccacctccgcccctcctcagcccccacctcagcccccacctcag +accccacctccgcccccacctccgcccctcctcagcccccacctcagcccccacctcagc +ccctcctcagcccccacctccgcccccacctccgcccccacctccgcccctcctcagccc +ccacctcagcccctcctcagcccccacctcagcccccacctcagccccctcctcagcccc +cacctcagcccccacctccgcccccacctcagccccctcctcagcccccacctcagcccc +ctcctcagcccccacctcagcccccacctccgcccccacctccgcccccacctcagcccc +acctcagcccctcctcagcccccacctcagcccctcctcagcccccacctcagcccctcc +tcagcccccacctcagccccctcagcccccacctcagcccccacctccgcccccacctcc +gcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgc +ccccacctccgcccccacctcagcccctcctcagcccccacctcagcccccacctcagcc +ccctcctcagcccccacctccgcccccacctccgcccccacctcagccccacctcagccc +ctcctcagcccccacctcagcccctcctcagcccccacctcagcccctcctcagccccca +cctcagcccccacctccgcccccacctccgcccccacctccgcccccacctcagcccctc +ctcagcccccacctcagcccccacctcagccccctcctcagcccccacctccgcccccac +ctcagcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacct +ccgcccccacctccgcccccacctcagcccctcctcagcccccacctcagcccccacctc +agccccctcctcagcccccacctccgcccccacctccgcccccacctcagccccacctca +gcccctcctcagcccccacctcagcccctcctcagccccctcctcagcccccacctcagc +ccccacctcagcccccacctccgcccccacctccgcccccacctcagccccctcctcagc +cccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgcc +cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg +ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc +cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg +ccttgt +>p:HG002_2_chr20:63167287-63167728 +aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg +ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcacggt +tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc +agtgcccacctcagccccctcctcagcccccacctccgcccccacctcagcccct----- +-------------------cctcagcccccacctcagccccctcctcagcccccacctcc +-----------------------gcccccacctccgcccccacctctgcccct------- +------cctcagcccccacctcagccccctcctcagcccccacctccgcccccacctccg +cccccacctctgcccct-------------------------------------cctcag +cccccacctcagccccctcctcagcccccacctccgcccccacctccgcccccacctctg +cccctcctcagcccccacctcagccccctcctcag-----------cccccacctccgcc +ccctcctcagcccccacctctgcccct------------cctcagcccccacctcagccc +cctcctcagcccccacctccgcccccacctccgcccccacctccgcccctcctccgcccc +cacctcagcccccacctcc-----------------------gccccctcctcagccccc +acctctgcccctcctcagcccccacctcagcccccacctcagcccccacctc-------- +------------------------------------------------------------ +----------------------------ccccctcctcagcccccacctcagcccccacc +tcagccccctcctcagcccccacctcagcccccacctccgcccct------------cct +cagcccccacctccgcccccacctcagcccccacctcagcccccacctccgcccccacct +cagcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacct +ctgcccctcctcagcccccacctccgccccctcctcagcccccacctctg---------- +--------------------------cccctcctcagcccccacctcagcccccacctca +gcccccacctcc-cccctcctcagcccccacctcagcccccacctcagccccctcctcag +cccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacctcagc +ccccacctccgcccctcctcagcccccacctcag-------------------------- +------------------------------------------------------------ +--------ccccctcctcagcccccacctccgcccccacctccgcccccacctctgcccc +tcctcagcccccacctcagccccctcctcagcccccacctccgcccccacctccgccccc +acctccgcccctcctccgcccccacctcagcccccacctccgccccct------------ +----------------------cctcagcccccacctctgcccctcctcagcccccacct +cagcccccacctcagcccccacct-------------------------cccccctcctc +agcccccacctcagcccccacctccgcccctcctcagcccccacctccgccccca----- +------------------------------------------------------cctcag +cccccacctcagcccccacctccgcccccacctcagcccccacctccgcccccacctcag +cccccacctcc-----------gcccccacctcagcccccacctctgcccctcctcagcc +cccacctccgccccctcctcagcccccacctctgcccctcctcagcccccacctcagccc +ccacctcagcccccacct-------------------------cccccctcctcagcccc +ca-----------------------------------cctcagcccccacctcagccccc +tcctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagccccca +-----------------------cctcagcccccacctccgcccctcctccgcccccacc +tcagccccctcctcagcccccacctctgcccctcctcagcccccacctcagcccccacct +ccgcccccacctcagccccctcctcagcccccacctc----------------------- +--------------ccccctcctccgcccccacctcagcccccacctcagcccccacctc +agcccccacctc------------------------------------------------ +-----------------------------------------------------------c +cccctcctccgcccccacctcagccccca-----------cctcagcccccacctccgcc +cccacctcagcccccacctcc--------------------------------------- +------------------------------------------------------------ +-------cccctcctcagcccccacctcagcccccacctccgccccca-----------c +ctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagcccccacc +tccgcccccacctcagcccccacctcagcccccacctccgccccca-------------- +-------------------------------------------------------cctca +gcccccacctccgcccccacctcagcccccacctccgcccccacctcagcccccacctcc +gcccccacctccgcccccacctcc-----------gcccccacctcagcccccacctcag +cccccacctccgccccca------------------------------------------ +----cctcagcccccacctcagcccccacctcagcccccacctccgcccctcctcagccc +ccacctcagcccctcctcagcccccacctcagcccccacctcagcccccacctcagcccc +t-------------------------cctcagcccccacctcagccccctcctcagcccc +ctcctcagcccccacctcagcccccacctccgcccccacctccgcccccacctcagcccc +acctcagcccctcctcagcccccacctcagccc--------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------cctcagccccca +cctcagcccccacctccgcccccacctccgcccccacctcc------------------- +------------------------------------------------------------ +----gcccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacct +cc---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +----------acccccacctccgtccccacctccgccccctcctcagccccctcctcagc +ccccacctcagcccccacctccgcccccacctcagcccctcctcagcccccacctctgcc +cccacctcagaccctcctcagcccctcctcagcccctcctcatcccctcctcagcagctg +ctggaatgggctaccctcaggagttggcaccccacagcctccttcccttcccacctccgc +cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg +ccttgt +>ref_chr20:63167287-63167728 +aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg +ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcatggt +tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc +agtgcccacctcagccccctcctcagccccca---------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----cctcagcccccacctccgcccccacctcagcccctcctcagcccccacctccgcc +cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg +ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc +cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg +ccttgt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_7ef060f5be4ce0075857e188d7852082.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7ef060f5be4ce0075857e188d7852082.msa new file mode 100644 index 00000000..bfe7a627 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7ef060f5be4ce0075857e188d7852082.msa @@ -0,0 +1,4375 @@ +>HG002_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG005_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00096_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00171_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00438_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattata--- +----tattatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00512_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00513_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00514_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00621_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00673_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00731_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00732_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00733_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00735_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG00741_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00864_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattatatataattatatataat +tatatataatataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatatattaatatataattgtata +tataattatatatatattaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01071_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01106_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01109_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +ttaatatataattgtatatataattatatatatattaatatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01114_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01123_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01175_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatattatata--- +------atatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01243_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01258_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01358_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtggagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01361_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01505_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01596_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01891_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01928_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01952_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01978_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02011_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02055_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatattatatattatat +attaatatataattatattatatattatatattaatatataattatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02080_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02109_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02145_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataatatataatt +atatataataaaattatatataattatatata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02148_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02257_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02486_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +aaaattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat------------------------------------tatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02492_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatata----atatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02559_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02572_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatat--tatatattatatat--------- +tatatattatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02587_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02622_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02630_1_chr1:600-1000 +ctcccagctaatttatatatataatatata------------------------------ +------------------------------------------------------------ +------------------------------------------------------taatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02717_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02723_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +--------------------------------------------------ttaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02818_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02886_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03009_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03065_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03098_1_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatatta-tatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03125_1_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatatta-tatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03371_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03453_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03486_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03492_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03516_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03540_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03579_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03683_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03732_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--at--at +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA12329_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA12878_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtggagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA18534_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA18906_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--ta---------tatatattatatat--------- +tatatattatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA18939_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19238_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19239_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatatt----------- +------------------------------------------atatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatat--tatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19240_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19650_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatat-tatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19983_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA20129_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-aatatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA20509_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattata-tatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataatatatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctc-gctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA20847_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA21309_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatatt----------- +------------------------------------------atatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatat--tatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA24385_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>PGP1_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattat-ttattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:HG00733_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:HG02818_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:HG03486_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:NA19240_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>li:HG00733_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>li:NA12878_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>li:NA24385_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG002_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG005_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatgcatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00096_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00171_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00438_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00512_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatatattattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00513_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00514_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00621_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00673_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattata--- +----tattatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00731_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00732_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00733_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00735_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataattataatatataattgtatattatatattatataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataattt +tatatatattaatatataattgtatatataattatatatatattaatatataattgtata +tataattatatatatattaatatataattgtatatataattatatatatattaatatata +attgtatatataattatatatatattaatatataattgtatatataattatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG00741_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG00864_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01071_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01106_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01109_2_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatatta-tatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01114_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatatt----------- +------------------------------------------atatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatat--tatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01123_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01175_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01243_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01258_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata-------------------------------------taatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +--------------------------------------------------ttaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtggagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01358_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01361_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01505_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01596_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01891_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01928_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG01952_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG01978_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatataattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtggagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02011_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02055_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatattatatattatat +attaatatataattatattatatattatatattaatatataattatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02080_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02109_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-at---------tataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02145_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02148_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02257_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02486_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatatat------taatatataattatattatat-------attaatat +ataattatattatatattatatat-aatatataattatattata-attatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa-----------------------------tat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02492_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02559_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatattatata--- +------atatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +ttaatatataattgtatatataattatatatatattaatatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02572_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02587_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02622_2_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatattattatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02630_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattata-tattataat +aatataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02717_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02723_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatataaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--tatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattattt-attataatatataattatataatatataattata-ttatataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG02818_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG02886_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +--aattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat------------------------------------tatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03009_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaat-------------------------atatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataatta--------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03065_2_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatatta-tatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03098_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataat------------------------------------------------------ +------------tatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaat-------------------------atatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03125_2_chr1:600-1000 +ctcccagctaatttatatatataatatatatataatatataattatatt----------- +------------------------------------------atatattatatattaata +tataattatattatatattatatataatatataattatattatatatta-tatataatat +ataattatattatatattatatat-aatatataattatattatatattatatat-aatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +g--------------------tatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattat-------------------------- +-atataattatatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatattatatata--atatat +-----tatatatataatata----------------------------tat--------- +---atattatataatatatataaat------------------------tatatatatta +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tatatattatataatatataattataatatac +aattatatattatataatatataattatata--------ttat----------------- +-------------ataatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +----tatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcagctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>HG03371_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03453_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03486_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03492_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03516_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03540_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03579_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03683_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatata---------------atatatataaat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>HG03732_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatat-attataatatataattatataatatataattata-tattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatatt +atatatataattatatgtaaatataat-tatatatattatatataatatatataataaat +ataattatatatataatata-tat--tatatatattatatatattatatatattata--- +----tattatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata--ta +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA12329_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa-----------------------------tat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA12878_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA18534_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA18906_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA18939_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19238_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19239_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19240_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19650_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gt-tatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA19983_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataattatatt----------- +------------------------------------------atatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatatattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatat--tatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA20129_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------ttaatat +ataattatattatatattatatattaatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataatatataattatataatatataattatattattataat +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tat--tatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatataatatataatatatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat---------------------------------------------- +------------------------tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA20509_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA20847_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataat--tatatataattaattataattaactatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat----------------------------------tatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>NA21309_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>NA24385_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>PGP1_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:HG00733_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:HG02818_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattata--atataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatattaatatataattgtatatataattatatatata------------------ +----------------ttaatatataattgtatatataattatatatatattaatatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>ei:HG03486_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ei:NA19240_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata----atat +ataattatatatataatata-tattatatatatattatatatattatatat--------- +tatataatatataatatacataatt------------------------tatatatattg +tat-attatataatatata---------atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattatatataataaaattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>li:HG00733_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-aaataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>li:NA12878_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatattaata +tataattatattatata------------------------------------ttaatat +ataattatattatatattatatat-aatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaa---------------------------tatat-atatat +ataattatatat---------------------------tatataatatataatatataa +tatataatatataatatatataat-----------------------------tataata +tataattgtatattatatattatata--atatataatatatataattataatatataatt +gtatat----------------------tata-attatataatatataattataatatac +aattatatattatataatatataattgtata--ttataattat----------------- +--------atataattatatataattatatataatataattatatataattgtatatata +tta----------------------------------atatataattgtatatataatta +tatatatatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataattatatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtggagtggcacaatctcggctcactgcaagctccacctcccaggttcacgccattctc +ctgcctc +>li:NA24385_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>syndip_1_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>syndip_2_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +------------------------------------------------------------ +-------------------tatattaatatataattatattatatattatatataaatat +ataatgtattatacatatataattatataaattatgtatgtattatataattttatatat +gtatatatatgtttatatatgtatatataattatatatgtattataatgtattatataat +tgtataatataatatataataatataatatataattatttattataatatataattatat +aatatataattatattattataat------------------------------------ +-atataat--tatatataattatatatataattaattataattaattatatttatatat- +-tatatataattatatgtaaatataat-tatatatattatatataatatata--ataaat +ataattatatatataatata-tattatatatatattatatatattatatatattatatat +tatataatatataatatacataatt--------------------------tatatatag +tat-attatataatatataatatataatatatataatatatataattataata------- +----------------------------tata-attatatataataaaattat------- +---------------------------------atataattat----------------- +--------atataattatatat-attatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------t----------- +-----------------------atatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc +>ref_chr1:600-1000 +ctcccagctaatttatatatataatatata--taatatataat----------------- +-------------------------------------------tatattatatatta--- +-------------tata------------------------------------tt----- +-------------------------aatatataattatattatatattatatattaatat +ataatgtattatacatatataattatataaattatgtatgtat----------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------tatataattatatataatataattatatataattgtata---- +------------------------------------------------------------ +------tatta------------------------------------------------- +-----------------------------------------------------atatata +attgtatatataat----------------------------------tatatatatatt +aatatataattgtatatataat--tatatatattaatatataattgtatatataattata +tatataatatatacatatatattttttcgagaaagagtctttctctgtcacccacgctgg +agtgcagtggcacaatctcggctcactgcaagctccacctcccaggttcatgccattctc +ctgcctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_7fd9ee27216345e87277b3eb261479c6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7fd9ee27216345e87277b3eb261479c6.msa new file mode 100644 index 00000000..5cb30a93 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7fd9ee27216345e87277b3eb261479c6.msa @@ -0,0 +1,150 @@ +>syndip_1_chr20:64134783-64136487 +agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc +cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg +acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca +cgctccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaca +cacgctccctcagttttgtgcgtc------------------------------------ +-acacacacgctccctcagttttgtgcg-------------------------------- +------------------------------------------------------------ +----------------------------------------ccacaaacacgcgcactccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacaaacacacac +gctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaac +acacacgctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttc +acaaacacacacgctccctcagttatggacatcacaaacacacacgctccctcagttttg +tgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctccctcagttt +tgtgtgtcacaaacacacacgctccttcagttttgtgcttcacaaacacacacgctccct +cagttttgtgcatcacacacacattccctcagttttgtgcgccacaaacacgcgcgctcc +ctcagttttgtgcatcacaaacgcacatgctccctcagttttgggcatcacaaacacatt +ccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaaacaca +cacgctacctcagttttgtgtgttacaaacacacacgctccctcagtttcgtgcatccca +aacatgctccctcagttatgtgcgtcacaaacacgctccctcagttttgtgtgtcacaaa +cacacacgctccctcagttttgtgcatcacaaacacacacattccctcagttttgtgcgt +cacaaacacgcgtgctctctcagttatgtgcgtcacaaacacagacgctccctcagtttt +gcgcatcacacacacgctccctcagctttgtgcttcacaaacacacacgctccctcagtt +ttgtgtgttacaaacacacacgctccctcagtttcgtgcatcccaaagacgattcctcag +ttttgtgcatcacaaacacatatgctccctcagttttgtgcgtcacaaacgcgcgctccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc +cctcagttttgtgtg---------------------------------tcacaaacacac +acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg +aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgt--------------- +------------------------- +>syndip_2_chr20:64134783-64136487 +agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc +cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg +acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca +cgctccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaca +cacgctccctcagttttgtgcgccacaaacacgcgcactccctcagttatgtgcgtcaca +aacacagacgctccctcagttttgcgca-------------------------------- +------------------------------------------------------------ +----------------------------------------tcacaaacacacacgctccc +tcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaacacacac +gctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaac +acacacgctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttc +acaaacacacacgctccctcagttatggacatcacaaacacacacgctccctcagttttg +tgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctccctcagttt +tgtgtgtcacaaacacacacgctccttcagttttgtgcttcacaaacacacacgctccct +cagttttgtgcatcacacacacattccctcagttttgtgcgccacaaacacgcgcgctcc +ctcagttttgtgcatcacaaacgcacatgctccctcagttttgggcatcacaaacacatt +ccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaaacaca +cacgctacctcagttttgtgtgttacaaacacacacgctccctcagtttcgtgcatccca +aacatgctccctcagttatgtgcgtcacaaacacgctccctcagttttgtgtgtcacaaa +cacacacgctccctcagttttgtgcatcacaaacacacacattccctcagttttgtgcgt +cacaaacacgcgtgctctctcagttatgtgcgtcacaaacacagacgctccctcagtttt +gcgcatcacacacacgctccctcagctttgtgcttcacaaacacacacgctccctcagtt +ttgtgtgttacaaacacacacgctccctcagtttcgtgcatcccaaagacgattcctcag +ttttgtgcatcacaaacacatatgctccctcagttttgtgcgtcacaaacgcgcgctccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc +cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac +acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg +aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgt--------------- +------------------------- +>p:HG002_1_chr20:64134783-64136487 +agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc +cggtgtgtgatgttccccttgaaaaggaagcgtttctgcttctcctctttccggtccttg +acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca +cgctccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaca +cacgctccctcagttttgtgcgccacaaacacgcgcactccctcagttatgtgcgtcaca +aacacagacgctccctcagttttgcgca-------------------------------- +------------------------------------------------------------ +----------------------------------------tcacaaacacacacgctccc +tcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaacacacac +gctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaac +acacacgctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttc +acaaacacacacgctccctcagttatggacatcacaaacacacacgctccctcagttttg +tgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctccctcagttt +tgtgtgtcacaaacacacacgctccttcagttttgtgcttcacaaacacacacgctccct +cagttttgtgcatcacacacacattccctcagttttgtgcgccacaaacacgcgcgctcc +ctcagttttgtgcatcacaaacgcacatgctccctcagttttgggcatcacaaacacatt +ccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaaacaca +cacgctacctcagttttgtgtgttacaaacacacacgctccctcagtttcgtgcatccca +aacatgctccctcagttatgtgcgtcacaaacacgctccctcagttttgtgtgtcacaaa +cacacacgctccctcagttttgtgcatcacaaacacacacattccctcagttttgtgcgt +cacaaacacgcgtgctctctcagttatgtgcgtcacaaacacagacgctccctcagtttt +gcgcatcacacacacgctccctcagctttgtgcttcacaaacacacacgctccctcagtt +ttgtgtgttacaaacacacacgctccctcagtttcgtgcatcccaaagacgattcctcag +ttttgtgcatcacaaacacatatgctccctcagttttgtgcgtcacaaacgcgcgctccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc +cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac +acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg +aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgt--------------- +------------------------- +>p:HG002_2_chr20:64134783-64136487 +agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc +cggtgtgtgatgttccccttgaaaaggaagcgtttctgcttctcctctttccggtccttg +acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca +cgctccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaca +cacgctccctcagttttgtgcgtc------------------------------------ +-acacacacgctccctcagttttgtgcg-------------------------------- +------------------------------------------------------------ +----------------------------------------ccacaaacacgcgcactccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacaaacacacac +gctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaac +acacacgctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttc +acaaacacacacgctccctcagttatggacatcacaaacacacacgctccctcagttttg +tgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctccctcagttt +tgtgtgtcacaaacacacacgctccttcagttttgtgcttcacaaacacacacgctccct +cagttttgtgcatcacacacacattccctcagttttgtgcgccacaaacacgcgcgctcc +ctcagttttgtgcatcacaaacgcacatgctccctcagttttgggcatcacaaacacatt +ccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaaacaca +cacgctacctcagttttgtgtgttacaaacacacacgctccctcagtttcgtgcatccca +aacatgctccctcagttatgtgcgtcacaaacacgctccctcagttttgtgtgtcacaaa +cacacacgctccctcagttttgtgcatcacaaacacacacattccctcagttttgtgcgt +cacaaacacgcgtgctctctcagttatgtgcgtcacaaacacagacgctccctcagtttt +gcgcatcacacacacgctccctcagctttgtgcttcacaaacacacacgctccctcagtt +ttgtgtgttacaaacacacacgctccctcagtttcgtgcatcccaaagacgattcctcag +ttttgtgcatcacaaacacatatgctccctcagttttgtgcgtcacaaacgcgcgctccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc +cctcagttttgtgtg---------------------------------tcacaaacacac +acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg +aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgt--------------- +------------------------- +>ref_chr20:64134783-64136487 +agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc +cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg +acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca +cgctccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaca +cacgctccctcagttttgtgcgccacaaacacgcgcactccctcagttatgtgcgtcaca +aacacagacgctccctcagttttgcgcatcacaaacacacacgctccctcagttatggac +atcacaaacacacacgctccctcagttttgtgcttcacaaacacacacgctccctcagtt +atggacatcacaaacacacacgctccctcagttttgtgcttcacaaacacacacgctccc +tcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaacacacac +gctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttcacaaac +acacacgctccctcagttatggacatcacaaacacacacgctccctcagttttgtgcttc +acaaacacacacgctccctcagttatggacatcacaaacacacacgctccctcagttttg +tgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctccctcagttt +tgtgtgtcacaaacacacacgctccttcagttttgtgcttcacaaacacacacgctccct +cagttttgtgcatcacacacacattccctcagttttgtgcgccacaaacacgcgcgctcc +ctcagttttgtgcatcacaaacgcacatgctccctcagttttgggcatcacaaacacatt +ccctcagttttgtgcatcacaaacgcacacgctccctcagttttgtgcgtcacaaacaca +cacgctacctcagttttgtgtgttacaaacacacacgctccctcagtttcgtgcatccca +aacatgctccctcagttatgtgcgtcacaaacacgctccctcagttttgtgtgtcacaaa +cacacacgctccctcagttttgtgcatcacaaacacacacattccctcagttttgtgcgt +cacaaacacgcgtgctctctcagttatgtgcgtcacaaacacagacgctccctcagtttt +gcgcatcacacacacgctccctcagctttgtgcttcacaaacacacacgctccctcagtt +ttgtgtgttacaaacacacacgctccctcagtttcgtgcatcccaaagacgattcctcag +ttttgtgcatcacaaacacatatgctccctcagttttgtgcgtcacaaacgcgcgctccc +tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc +cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac +acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg +aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta +gccgttattgagggacatcgaggtt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa new file mode 100644 index 00000000..af467528 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa @@ -0,0 +1,165 @@ +>syndip_1_chr20:61201583-61202575 +aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata +ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccgtta +gagaaggaatttcctcactcaggaccctctgcggatgtcacctccatcctcatcaggacc +ctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatccttac +tcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgagtgccacctc +catcctcactcaggatcccctgtaagtgtcacctccatcctcaccaggaccctccatg-g +tgtcaccgccatcctcactcaggaccctccatgagtgccacctccatcctcaccaggatc +ccctgtaagtgtcacctccatcctcac-caggaccctccatgagtgtcacctccatc--- +-ctcaggaccctccatgagtgtcacctccatcctcactcaggaccctccat-ggtgtcac +ctccatcctcactcaggaccctccat-ggtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctca------ +----------------------ccaggaccctccatgagtgtcacctccatcctcactca +ggaccctccatggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccat-ggtgtcaccgccatcctca-------------------- +-----------ctcaggaccctccatgagtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcaccagga +tcccctgtaagtgtcacctccatcctcaccaggaccctccatgagtgtcacctccatcct +cactcaggaccctccatggtgtcacctccatcctcactcaggaccctccatg-gtgtcac +ctccatcctcaccaggatcccctgtaagtgtcacctccatcctcac-caggaccctccat +gagtgtcacctccatcctca--------------------------ctcaggaccctcca +tg-gtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcag +gaccctccat-ggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccatg-gtgtcacctccatcctcaccaggaccctccatgagtgt +cacctccatc----ctcaggaccctccatgagtgtcacctccatcctcactcaggaccct +ccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactc +aggaccctccatg-----------------------------------gtgtcacctcca +tcctcactcaggaccctccatggtgtcacctccatcctcaccaggaccctccatgagtgt +cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc +cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct +caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt +cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc +acctctctctcagactaggct +>syndip_2_chr20:61201583-61202575 +aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata +ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta +gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc +ctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatccttac +tcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgggtgtcacctc +catcctcac-caggatcccctgtaagtgtcacctccatcctcaccaggaccctccatgag +tgtcacctccatcctcactcaggaccctccatg-gtgtcacctccatcctcactcaggac +cctccatggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctc +actcaggaccctccatgggtgtcacctccatcctcactcaggaccctccatgggtgtcac +ctccatcctcactcaggaccctccatgggtgccacctccatcctcaccaggatcccctgt +aagtgtcacctccatcctcaccaggaccctccatgagtgtcacctccatcctcaggaccc +tccatgagtgtcacctccatcctcaggaccctccatgagtgtcacctccatcctcactca +ggaccctccatggtgtcacctccatcctcactcaggaccctccat-ggtgtcacctccat +cctcactcaggaccctccatgggtgtcacctccatcctcactcaggaccctccatgagtg +tcacctccatcctcaggaccctccatgagtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcagg +accctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatcct +tactcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgagtgccac +ctccatcctcaccaggatcccctgtaagtgtcacctccatcctcactcaggaccctccat +gagtgtcacctccatcctcaggaccctccatggtgtcacctccatcctcaggaccctcca +tgagtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcag +gaccctccatgggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccatgagtgtcacctccatcctcactcaggaccctccatggtgt +cacctccatcctcactcaggaccctccatg-gtgtcacctccatcctcactcaggacact +ccatggtgtcacctccatccttactcaggaccctccatggtgtcaccgccatcctcactc +aggaccctccatgagtgccacctccatcctcaccaggatcccctgtaagtgtcacctcca +tcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccatggtgt +cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc +cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct +aaggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt +cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc +acctctctctcagactaggct +>p:HG002_1_chr20:61201583-61202575 +aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata +ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta +gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc +ctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatccttac +tcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgggtgtcacctc +catcctcac-caggatcccctgtaagtgtcacctccatcctcaccaggaccctccatgag +tgtcacctccatcctcactcaggaccctccatg-gtgtcacctccatcctcactcaggac +cctccatggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctc +actcaggaccctccatgggtgtcacctccatcctcactcaggaccctccatgggtgtcac +ctccatcctcactcaggaccctccatgggtgccacctccatcctcaccaggatcccctgt +aagtgtcacctccatcctcaccaggaccctccatgagtgtcacctccatcctcaggaccc +tccatgagtgtcacctccatcctcaggaccctccatgagtgtcacctccatcctcactca +ggaccctccatggtgtcacctccatcctcactcaggaccctccat-ggtgtcacctccat +cctcactcaggaccctccatgggtgtcacctccatcctcactcaggaccctccatgagtg +tcacctccatcctcaggaccctccatgagtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcagg +accctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatcct +tactcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgagtgccac +ctccatcctcaccaggatcccctgtaagtgtcacctccatcctcactcaggaccctccat +gagtgtcacctccatcctcaggaccctccatggtgtcacctccatcctcaggaccctcca +tgagtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcag +gaccctccatgggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccatgagtgtcacctccatcctcactcaggaccctccatggtgt +cacctccatcctcactcaggaccctccatg-gtgtcacctccatcctcactcaggacact +ccatggtgtcacctccatccttactcaggaccctccatggtgtcaccgccatcctcactc +aggaccctccatgagtgccacctccatcctcaccaggatcccctgtaagtgtcacctcca +tcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccatggtgt +cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc +cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct +aaggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt +cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc +acctctctctcagactaggct +>p:HG002_2_chr20:61201583-61202575 +aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata +ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccgtta +gagaaggaatttcctcactcaggaccctctgcggatgtcacctccatcctcatcaggacc +ctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatccttac +tcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgagtgccacctc +catcctcactcaggatcccctgtaagtgtcacctccatcctcaccaggaccctccatg-g +tgtcaccgccatcctcactcaggaccctccatgagtgccacctccatcctcaccaggatc +ccctgtaagtgtcacctccatcctcac-caggaccctccatgagtgtcacctccatc--- +-ctcaggaccctccatgagtgtcacctccatcctcactcaggaccctccat-ggtgtcac +ctccatcctcactcaggaccctccat-ggtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctca------ +----------------------ccaggaccctccatgagtgtcacctccatcctcactca +ggaccctccatggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccat-ggtgtcaccgccatcctca-------------------- +-----------ctcaggaccctccatgagtgtcacctccatcctcactcaggaccctcca +tggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcaccagga +tcccctgtaagtgtcacctccatcctcaccaggaccctccatgagtgtcacctccatcct +cactcaggaccctccatggtgtcacctccatcctcactcaggaccctccatg-gtgtcac +ctccatcctcaccaggatcccctgtaagtgtcacctccatcctcac-caggaccctccat +gagtgtcacctccatcctca--------------------------ctcaggaccctcca +tg-gtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcag +gaccctccat-ggtgtcacctccatcctcactcaggaccctccatgggtgtcacctccat +cctcactcaggaccctccatg-gtgtcacctccatcctcaccaggaccctccatgagtgt +cacctccatc----ctcaggaccctccatgagtgtcacctccatcctcactcaggaccct +ccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactc +aggaccctccatg-----------------------------------gtgtcacctcca +tcctcactcaggaccctccatggtgtcacctccatcctcaccaggaccctccatgagtgt +cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc +cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct +caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt +cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc +acctctctctcagactaggct +>ref_chr20:61201583-61202575 +aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata +ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta +gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc +ctccatggtgtcacctccatcctcactcaggacactccatggtgtcacctccatccttac +tcaggaccctccatggtgtcaccgccatcctcactcaggaccctccatgagtgccacctc +catcctcac-caggatcccctgtaagtgtcacctccatcctcaccaggaccctccatgag +tgtcacctccatcct--------------------------------------------- +----------------------------caggaccctccatgagtgtcacctccatcctc +actcaggaccctccat------------------------------------ggtgtcac +cgccatcctcactcaggaccctccatgagtgccacctccatcctcaccaggatcccctgt +aa---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------gtgtcacctccatcctcaccaggaccctccatgagtgt +cacctccatc----ctcaggaccctccatgagtgtcacctccatcctcactcaggaccct +ccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcctcactc +aggaccctccatg----------------------------------ggtgtcacctcca +tcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccatggtgt +cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc +cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct +caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt +cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc +acctctctctcagactaggct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa new file mode 100644 index 00000000..0aa2b622 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa @@ -0,0 +1,80 @@ +>syndip_1_chr20:57948698-57949591 +cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt +tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg +ccctgcacagatggacagatggttggaa----ggatggatggatggatggatggatggat +ggatggggttggatggatggatggatggatggatggatagtggatagatggatgaatagt +ggatggataaatggattaatggatggatgaatggatgatggatggatgaatgagatggat +ggatt----------------gatggatggatggatggatggatggatggaagaatggat +gatggatggatga--------------------gatggatggatggatggatggaagaat +ggatgatggatggatga--------gatggatggatggatggggggttggatggatggat +gg------------------------------------------------------atgg +atggatggatagtggatagatggatgaatagtggatggataaatggattaatggatggat +gaatggatgatggatggatgaatgagatggatggatt------------gatggatggat +ggatggatggaagaatggatgatggatggatgaatgagatgggtggattgatggatggat +ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag +gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc +>syndip_2_chr20:57948698-57949591 +cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt +tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg +ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat +ggatggggttggatggatggatggatggatggatggatagtggatagatggatgaatagt +ggatggataaatggattaatggatggatgaatggatgatggatggatgaatgagatggat +ggattgatggatggatggatggatggatggatggatggatggatggatggaagaatggat +gatggatggatgagatggatggatggatggatggatggatggatggatggatggaagaat +ggatgatggatggatgagatggatggatggatggatggatggggggttggatggatggat +ggatggatggatggatagtggatagatggatgaatagtgaatggataaatggattaatgg +atggatggatagtggatagatggatgaatagtggatggataaatggattaatggatggat +gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat +ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat +ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag +gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc +>p:HG002_1_chr20:57948698-57949591 +cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt +tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg +ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat +ggatggggttggatggatggatggatggatggatggatagtggatagatggatgaatagt +ggatggataaatggattaatggatggatgaatggatgatggatggatgaatgagatggat +ggattgatggatggatggatggatggatggatggatggatggatggatggaagaatggat +gatggatggatgagatggatggatggatggatggatggatggatggatggatggaagaat +ggatgatggatggatgagatggatggatggatggatggatggggggttggatggatggat +ggatggatggatggatagtggatagatggatgaatagtgaatggataaatggattaatgg +atggatggatagtggatagatggatgaatagtggatggataaatggattaatggatggat +gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat +ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat +ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag +gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc +>p:HG002_2_chr20:57948698-57949591 +cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt +tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg +ccctgcacagatggacagatggttggaa----ggatggatggatggatggatggatggat +ggatggggttggatggatggatggatggatggatggatagtggatagatggatgaatagt +ggatggataaatggattaatggatggatgaatggatgatggatggatgaatgagatggat +ggatt----------------gatggatggatggatggatggatggatggaagaatggat +gatggatggatga--------------------gatggatggatggatggatggaagaat +ggatgatggatggatga--------gatggatggatggatggggggttggatggatggat +gg------------------------------------------------------atgg +atggatggatagtggatagatggatgaatagtggatggataaatggattaatggatggat +gaatggatgatggatggatgaatgagatggatggatt------------gatggatggat +ggatggatggaagaatggatgatggatggatgaatgagatgggtggattgatggatggat +ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag +gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc +>ref_chr20:57948698-57949591 +cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt +tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg +ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat +ggatggggttggatggatggatggatggatggatggatagtggatagatggatgaatagt +ggatggataaatggattaatggatggatgaatggatgatggatggatgaatgagatggat +ggattgatggatggatggatggatggatggatggatggatggatggatggaagaatggat +gatggatggatgagatggatggatggatggatggatggatggatggatggatggaagaat +ggatgatggatggatgagatggatggatggatggatggatggggggttggatggatggat +ggatggatggatggatagtggatagatggatgaatagtgaatggataaatggattaatgg +atggatggatagtggatagatggatgaatagtggatggataaatggattaatggatggat +gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat +ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat +ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag +gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa new file mode 100644 index 00000000..df5fb594 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa @@ -0,0 +1,85 @@ +>syndip_1_chr20:23155404-23156076 +gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct +gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc +ctgcagcacaaaagaggaaggaaagcaagggaagggaaggggaggggaggaagggaaggg +gaggggaggaagggaaggggaggggaggaaggcgaggtgaggggaggagaggtgagggga +gggggggaggggaggggagggagagtgaggaagagagagagaaagggagagaggaaggaa +agaagaaaggaaggaaggaaagaaggaaggaagggagggagggagggaggaaagaaggaa +ggaaaggagggaggaagggagggaaagagagagagaggaaaagagagagaaagagaaaga +gagaaagagaggaaggaaggaaggaaggagagagagaaagaaagaaggaaagaggaagga +aggaagaaagaaaagaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag +agaaagaaagaaagaaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaag +agaaaagaaggagagaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaag +gaaagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag +gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg +gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct +gcaaagccccca +>syndip_2_chr20:23155404-23156076 +gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct +gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc +ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg +gaggggaggaagggaaggggaggggaggaaggcgaggtgaggggaggagaggtgagggga +gggggggaggggaggggagggagagtgaggaagagagagagaaagggagagaggaaggaa +agaagaaaggaaggaaggaaagaaggaaggaagggagggagggagggaggaaagaaggaa +ggaaaggagggaggaagggagggaaagagagagagaggaaaagagagagaaagagaaaga +gagaaagagaggaaggaaggaaggaaggagagagagaaagaaagaaggaaagaggaagga +aggaagaaagaaaagaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag +----agaaagaaagaaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaag +agaaaagaaggagagaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaag +gaaagaaag------------------------aagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag +gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg +gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct +gcaaagccccca +>p:HG002_1_chr20:23155404-23156076 +gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct +gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc +ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg +gaggggaggaagggaaggggaggggaggaaggcgaggtgaggggaggagaggtgagggga +gggggggaggggaggggagggagagtgaggaagagagagagaaagggagagaggaaggaa +agaagaaaggaaggaaggaaagaaggaaggaagggagggagggagggaggaaagaaggaa +ggaaaggagggaggaagggagggaaagagagagagaggaaaagagagagaaagagaaaga +gagaaagagaggaaggaaggaaggaaggagagagagaaagaaagaaggaaagaggaagga +aggaagaaagaaaagaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag +----agaaagaaagaaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaag +agaaaagaaggagagaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaag +gaaagaaag------------------------aagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag +gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg +gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct +gcaaagccccca +>p:HG002_2_chr20:23155404-23156076 +gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct +gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc +ctgcagcacaaaagaggaaggaaagcaagggaagggaaggggaggggaggaagggaaggg +gaggggaggaagggaaggggaggggaggaaggcgaggtgaggggaggagaggtgagggga +gggggggaggggaggggagggagagtgaggaagagagagagaaagggagagaggaaggaa +agaagaaaggaaggaaggaaagaaggaaggaagggagggagggagggaggaaagaaggaa +ggaaaggagggaggaagggagggaaagagagagagaggaaaagagagagaaagagaaaga +gagaaagagaggaaggaaggaaggaaggagagagagaaagaaagaaggaaagaggaagga +aggaagaaagaaaagaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag +agaaagaaagaaagaaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaag +agaaaagaaggagagaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaag +gaaagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag +gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg +gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct +gcaaagccccca +>ref_chr20:23155404-23156076 +gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct +gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc +ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg +gaggggaggaaggga--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------aggaaaagagagagaaagagaaaga +gagaaagagaggaaggaaggaaggaaggagagagagaaagaaagaaggaaagaggaagga +aggaagaaagaaaagaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag +--------agaaagaaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaag +agaaaagaaggagagaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaag +gaaagaaag------------aagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag +gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg +gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct +gcaaagccccca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa new file mode 100644 index 00000000..d04a37da --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa @@ -0,0 +1,140 @@ +>syndip_1_chr20:53203887-53204574 +cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt +aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc +cttcatttttataatatgatactattatatcataattttataatatactattatatcata +ttatatgatactatttatatcatcatatgatgatatactattatatcatcatatgatgat +atactattatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgat---atactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca +tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga +tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga +tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat +catcacatgatgatatgatactatatcatcacatgatgatatgatactatatcatcacat +gatgatatgatactatatcatcacatgatgatatg--atactatatcatcacatgatgat +atgatactatatcatcacatgatgatatgatactatatcatcacatgatgatatgatact +atatcatc---------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------acatgatgatatgatactat +atcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgatact +atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata +ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga +tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt +tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg +aaattccatgcatagctgacattctacttctattg +>syndip_2_chr20:53203887-53204574 +cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt +aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc +cttcatttttataatatgatactattatatcataattttataatatactattatatcata +ttatatgatactatttatatcatcatatgatgatatactattatatcatcatatgatgat +atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca +tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga +tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga +tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat +catcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatat +gatgatatgatactatatcatcatatgatgatatg--atactatatcatcatatgatgat +atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcataacatgatgatatgatactatatcataacatgatgatatgatactatatca +tcatataacatgatgatatgatactatatcatcatataacatgatgatatgatactatat +catcatataacatgatgatatgatactatatcatcatataacatgatgatatgatactat +atcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgatact +atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata +ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga +tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt +tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg +aaattccatgcatagctgacattctacttctattg +>p:HG002_1_chr20:53203887-53204574 +cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt +aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc +cttcatttttataatatgatactattatatcataattttataatatactattatatcata +ttatatgatactatttatatcatcatatgatgatatactattatatcatcatatgatgat +atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca +tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga +tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga +tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat +catcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatat +gatgatatgatactatatcatcatatgatgatatg--atactatatcatcatatgatgat +atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcataacatgatgatatgatactatatcataacatgatgatatgatactatatca +tcatataacatgatgatatgatactatatcatcatataacatgatgatatgatactatat +catcatataacatgatgatatgatactatatcatcatataacatgatgatatgatactat +atcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgatact +atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata +ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga +tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt +tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg +aaattccatgcatagctgacattctacttctattg +>p:HG002_2_chr20:53203887-53204574 +cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt +aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc +cttcatttttataatatgatactattatatcataattttataatatactattatatcata +ttatatgatactatttatatcatcatatgatgatatactattatatcatcatatgatgat +atactattatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatcatatgatgat---atactatatcatcatatgatgatatgatactatatcatc +atatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatg +atatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgata +ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca +tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga +tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga +tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat +catcacatgatgatatgatactatatcatcacatgatgatatgatactatatcatcacat +gatgatatgatactatatcatcacatgatgatatg--atactatatcatcacatgatgat +atgatactatatcatcacatgatgatatgatactatatcatcacatgatgatatgatact +atatcatc---------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------acatgatgatatgatactat +atcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgatact +atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata +ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga +tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt +tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg +aaattccatgcatagctgacattctacttctattg +>ref_chr20:53203887-53204574 +cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt +aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc +cttcatttttataatatgatactattatatcataattttataatatactattatatcata +ttatatgatactatttatatcatcatatgatgatatgat--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------atactatatcatcatatgatgatatgatatactatatcatcatatgatgat +atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact +atatcatc---------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------atatgatgatatgatactat +atcatcat-----atgatgatatgatactatatcatcatataacatgatgatatgatact +atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata +ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga +tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt +tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg +aaattccatgcatagctgacattctacttctattg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa new file mode 100644 index 00000000..2d37ed2b --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa @@ -0,0 +1,180 @@ +>syndip_1_chr20:63964668-63966718 +cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc +aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg +aggag------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------caccctgacc +tggaacgagcccccataccttttctcaccactgttccctgacctgggacgagcccccata +ccttttctcaccactgttcctccccgacctgggacgagcccccataccttttctcaccac +tgt---tccctgacctgggacgagcccccataccttttcttaccactgttcctccctgtg +gcacgaggtgctttgttctccgacctggtcagcagcaccattttgcagggcggtgtgtgt +ctgctgatagcaatgggcctgctgactgtcaccggcttgctgaccacaatgggcctgctg +actgtcactggcttggtgaccggcacgggtttcgtaactgtcacaagctttgttaccgta +atgggtttggtgacaggtacgggtttggtgaccggcacagacttggtgactgtgatgggt +ttggtgaccggcatggccttggtgacgggcatgggcctgctgactgtgacaggtttggtg +actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag +actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt +ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg +atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc +tccagctgggt +>syndip_2_chr20:63964668-63966718 +cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc +aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg +aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg +ggacaagcccccataccttttcttaccactgttccctgacctgggacgagcccccatacc +ttttctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttc +cccgacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagc +ccccataccttttctcaccactgttcctccccgacctgggacgagcccccataccttttc +tcaccactgttccccgacctgggacgagcccccataccttttctcaccactgttccctga +cctggaacgagcccccataccttttctcaccactgttccccgacctgggacgagccccca +taccttttctcaccactgttccccgacctgggacgagcccccataccttttctcaccact +gttccccgacctgggacgagcccccataccttttctcaccactgttccccgacctgggac +gagcccccataccttttctcaccactgttccccgacctgggacgagcccccatacctttt +ctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttccccg +acctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccccc +ataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttcttac +cactgttcctccccaacctgggacgagcccccataccttttctcaccactgttcctcccc +gacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagcccc +cataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttctta +ccactgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccc +cgacctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccc +ccataccttttctcaccactgttccctgacctgggacgagcccccataccttttcttacc +actgttccctgacctgggacgagcccccataccttttctcaccactgttcctccccgacc +tgggacgagcccccataccttttctcaccactgttccccgacctgggacgagcccccata +ccttttcttaccactgttcctccctgacctgggacgagcccccataccttttcttaccac +tgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccctgtg +gcacgaggtgctttgttctccgacctggtcagcagcaccattttgcagggcggtgtgtgt +ctgctgatagcaatgggcctgctgactgtcaccggcttgctgaccacaatgggcctgctg +actgtcactggcttggtgaccggcacgggtttcgtaactgtcacaagctttgttaccgta +atgggtttggtgacaggtacgggtttggtgaccggcacagacttggtgactgtgatgggt +ttggtgaccggcatggccttggtgacgggcatgggcctgctgactgtgacaggtttggtg +actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag +actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt +ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg +atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc +tccagctgggt +>p:HG002_1_chr20:63964668-63966718 +cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc +aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg +aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg +ggacaagcccccataccttttcttaccactgttccctgacctgggacgagcccccatacc +ttttctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttc +cccgacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagc +ccccataccttttctcaccactgttcctccccgacctgggacgagcccccataccttttc +tcaccactgttccccgacctgggacgagcccccataccttttctcaccactgttccctga +cctggaacgagcccccataccttttctcaccactgttccccgacctgggacgagccccca +taccttttctcaccactgttccccgacctgggacgagcccccataccttttctcaccact +gttccccgacctgggacgagcccccataccttttctcaccactgttccccgacctgggac +gagcccccataccttttctcaccactgttccccgacctgggacgagcccccatacctttt +ctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttccccg +acctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccccc +ataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttcttac +cactgttcctccccaacctgggacgagcccccataccttttctcaccactgttcctcccc +gacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagcccc +cataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttctta +ccactgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccc +cgacctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccc +ccataccttttctcaccactgttccctgacctgggacgagcccccataccttttcttacc +actgttccctgacctgggacgagcccccataccttttctcaccactgttcctccccgacc +tgggacgagcccccataccttttctcaccactgttccccgacctgggacgagcccccata +ccttttcttaccactgttcctccctgacctgggacgagcccccataccttttcttaccac +tgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccctgtg +gcacgaggtgctttgttctccgacctggtcagcagcaccattttgcagggcggtgtgtgt +ctgctgatagcaatgggcctgctgactgtcaccggcttgctgaccacaatgggcctgctg +actgtcactggcttggtgaccggcacgggtttcgtaactgtcacaagctttgttaccgta +atgggtttggtgacaggtacgggtttggtgaccggcacagacttggtgactgtgatgggt +ttggtgaccggcatggccttggtgacgggcatgggcctgctgactgtgacaggtttggtg +actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag +actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt +ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg +atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc +tccagctgggt +>p:HG002_2_chr20:63964668-63966718 +cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc +aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg +aggag------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------------------caccctgacc +tggaacgagcccccataccttttctcaccactgttccctgacctgggacgagcccccata +ccttttctcaccactgttcctccccgacctgggacgagcccccataccttttctcaccac +tgt---tccctgacctgggacgagcccccataccttttcttaccactgttcctccctgtg +gcacgaggtgctttgttctccgacctggtcagcagcaccattttgcagggcggtgtgtgt +ctgctgatagcaatgggcctgctgactgtcaccggcttgctgaccacaatgggcctgctg +actgtcactggcttggtgaccggcacgggtttcgtaactgtcacaagctttgtgaccgta +atgggtttggtgacaggtacgggtttggtgaccggcacagacttggtgactgtgatgggt +ttggtgaccggcatggccttggtgacgggcatgggcctgctgactgtgacaggtttggtg +actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag +actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt +ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg +atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc +tccagctgggt +>ref_chr20:63964668-63966718 +cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc +aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg +aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg +ggacaagcccccataccttttcttaccactgttccctgacctgggacgagcccccatacc +ttttctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttc +cccgacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagc +ccccataccttttctcaccactgttcctccccgacctgggacgagcccccataccttttc +tcaccactgttccccgacctgggacgagcccccataccttttctcaccactgttccctga +cctggaacgagcccccataccttttctcaccactgttccccgacctgggacgagccccca +taccttttctcaccactgttccccgacctgggacgagcccccataccttttctcaccact +gttccccgacctgggacgagcccccataccttttctcaccactgttccccgacctgggac +gagcccccataccttttctcaccactgttccccgacctgggacgagcccccatacctttt +ctcaccactgttccctgacctgggacgagcccccataccttttctcaccactgttccccg +acctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccccc +ataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttctcac +cactgttcctccccaacctgggacgagcccccataccttttctcaccactgttcctcccc +gacctgggacgagcccccataccttttctcaccactgttccctgacctgggacgagcccc +cataccttttcttaccactgttcctccccgacctgggacgagcccccataccttttctta +ccactgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccc +cgacctgggacgagcccccataccttttcttaccactgttccccgacctgggacgagccc +ccataccttttctcaccactgttccctgacctgggacgagcccccataccttttcttacc +actgttccctgacctgggacgagcccccataccttttctcaccactgttcctccccgacc +tgggacgagcccccataccttttctcaccactgttccccgacctgggacgagcccccata +ccttttcttaccactgttcctccctgacctgggacgagcccccataccttttcttaccac +tgttcctccccgacctgggacgagcccccataccttttcttaccactgttcctccctgtg +gcacgaggtgctttgttctccgacctggtcagcagcaccattttgcagggcggtgtgtgt +ctgctgatagcaatgggcctgctgactgtcaccggcttgctgaccacaatgggcctgctg +actgtcactggcttggtgaccggcacgggtttcgtaactgtcacaagctttgttaccgta +atgggtttggtgacaggtacgggtttggtgaccggcacagacttggtgactgtgatgggt +ttggtgaccggcatggccttggtgacgggcatgggcctgctgactgtgacaggtttggtg +actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag +actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt +ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg +atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc +tccagctgggt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa new file mode 100644 index 00000000..3696203d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa @@ -0,0 +1,260 @@ +>syndip_1_chr20:63491489-63493165 +gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc +actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc +tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg +gatggatgtatgaatggggtggtagatggatggggaggtggatggacagaaggatgggtg +gggagatggatgagtagggagatggatggatggattggtggatggatggatggatggatg +gatgaggaggtagatgagtggatggacagaaggatggatgggtaggaaggtgggtggatg +aatggatggatggggaggtggatggatagatggatggatggatggatggatggatggatg +gatggatggggggatagatggatggatggatggatggatggacggacg----gatgggga +ggtggatggatggatggatggatggatggatggatggatgaggagatgaatggatggatg +agatggatggatggatgatggagggaaggatggctgggtggggagatggatggatagaga +ggtggatgggtggatggggagatggatggatggatggatggatggggaagtggatggatg +gggaggtggatggatggataggtggatgggtggatggacagaaggatgaatgggtaggga +gatggatggatggatggatggggagggatggatggatggatggatggatgggtggatgga +tagaaggatggatgggtggggaggtgggtgggtgg----gtggatggatggagagatgga +tagatggata----gatggatggatggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtgggtggatgggtggatggatggatgggtggggaggtgggcgggtgga +tggatggatggatgcagaggtgggtggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtggatggatggatgagtggggaggtgggtgggtgggtggatggatgga +tggatggatgcatggatggatgga------------------------------------ +------------------------------------------------------------ +-------------------tggagaggtgggtggacggatggatggattgggaggtgggt +agatgggtggatggacagaaggatggatgggtggggaggtgggtgggtgggtggatggat +ggatggggaggtggatggatggatggttagatggggaggtgggtggatgggtggatggac +agaaggatggatgggtagggaggtaggtgagtggatggatggatggggagatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatggatagaaggatggac +gggtggggagatggatggatggatggatggatggatggatggatggacagaaggatgtat +gggtggggaggtgggtggatagatggatggatggatggatggggaaatggatggatggaa +ggatggatggatagggaggtgggtggatggatggatggatggatgtacagaaggatggat +gggtggggaggtgggtgagtggatgaatggatggatggatagatagaaggatggatgggt +gggacaatagatggatggatagagagaaggatggatgggtggggagatggatggatggat +ggatggatggatggatggatggatggatagagagaaggatggatggttgagaca----at +ggatggatggatggatggagagatggatggatggacggatggacagaaggatggatgggt +ggggaggtggatggatggattgaaggatggatgggtgggga----catggatggatggat +ggatggatggatggatggatggacagaaggatgtatgggtggggaggtgggtggata--- +-----gatggatggatggggaaatggatggatggaaggatggatggatagggaggtgggt +ggatggatggatggatggatgtacagaaggatggatgggtggggaggtgggtgagtggat +gaatggatggatggatagatagaaggatggatgggtgggacaatagatggatggatagag +agaaggatggatgggtggggagatggatggatggatggatggatggatggatagagagaa +ggatggatggttgagacaatggatggatggatggatggatggagagatggatggatggat +ggatggacagaaggatggatgggtggggaggtggatggatggatggaaggatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatgcatggatggatggat +ggagggatagagagaaggatggatgggtgggacaatcaatggatggatggatggatggat +ggatagagagaaggatggatgggtggggagatggatggatggatggagaggtggatggat +ggatggatggatggattgatggacagaaggatggatgggtggggaggtggatagatggag +agatggagggatggttcgatagagagaaggatggatggatgggacaatggatggatggat +ggatggagtgaaggatggatgggtggggagatggatggatcgatggatggatggatggat +agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg +ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg +tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc +tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat +cttgtacacgtcctgcagcggc +>syndip_2_chr20:63491489-63493165 +gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc +actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc +tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg +gatggatgtatgaatggggtggtagatggatggggaggtggatggacagaaggatgggtg +gggagatggatgagtagggagatggatggatggattggtggatggatggatggatggatg +gatgaggaggtagatgagtggatggacagaaggatggatgggtaggaaggtgggtggatg +aatggatggatggggaggtggatggatagatggatggatggatggatggatggatggatg +gatggatggggggatagatggatggatggatggatggacggacggatggggaggtggatg +gatggatggatggatggatggatggatggatggatggatgaggagatgaatggatggatg +agatggatggatggatgatggagggaaggatggctgggtggggagatggatggatagaga +ggtggatgggtggatggggagatggatggatggatggatggatggggaagtggatggatg +gggaggtggatggatggataggtggatgggtggatggacagaaggatgaatgggtaggga +gatggatggatggatggatgcggagggatggatggatggatggatggatgggtggatgga +tagaaggatggatgggtggggaggtgggtgggtgggtgcgtggatggatggagagatgga +tagatggatagatggatggatggatggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtgggtggatgggtggatggatggatgggtggggaggtgggcgggtgga +tggatggatggatgcagaggtgggtggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtggatggatggatgagtggggaggtgggtgggtggatggatggatgga +tggatgcatggatggatggatggagaggtgggtggatgggtggatggacagaaggatgga +tgggtggggaggtgggtgagtgggtggatggatgaatggggaggtggatggatggatgga +tggatgaaggatggatgggtggggaggtgggtggacggatggatggattgggaggtgggt +agatgggtggatggacagaaggatggatgggtggggaggtgggtgggtgggtggatggat +ggatggggaggtggatggatggatggttagatggggaggtgggtggatgggtggatggac +agaaggatggatgggtagggaggtaggtgagtggatggatggatggggagatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatggatagaaggatggac +gggtgggga----gatggatggatggatggatggatggatggatggacagaaggatgtat +gggtggggaggtgggtggatagatggatggatggatggatggggaaatggatggatggaa +ggatggatggatagggaggtgggtggatggatggatggatggatgtacagaaggatggat +gggtggggaggtgggtgagtggatgaatggatggatggatagatagaaggatggatgggt +gggacaatagatggatggatagagagaaggatggatgggtgggga--------gatggat +ggatggatggatggatggatggatggatagagagaaggatggatggttgagacaatggat +ggatggatggatggatggagagatggatggatggatggatggacagaaggatggatgggt +ggggaggtggatggatggattgaaggatggacgggtggggagatggatggatggatggat +ggatggatggatggatggatggacagaaggatgtatgggtggggaggtgggtggatagat +ggatggatggatggatggggaaatggatggatggaaggatggatggatagggaggtgggt +ggatggatggatggatggatgtacagaaggatggatgggtggggaggtgggtgagtggat +gaatggatggatggatagatagaaggatggatgggtgggacaaca----gatggatagag +agaaggatggatgggtgggga----gatggatggatggatggatggatggatagagagaa +ggatggatggttgagacaatggatggatggatggatggatggagagatggatggatggat +ggatggacagaaggatggatgggtggggaggtggatggatggatggaaggatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatgcatggatggatggat +ggagggatagagagaaggatggatgggtgggacaatcaatggatggatggatggatggat +ggatagagagaaggatggatgggtggggagatggatggatggatggagaggtggatggat +ggatggatggatggattgatggacagaaggatggatgggtggggaggtggatagatggag +agatggagggatggttcgatagagagaaggatggatggatgggacaatggatggatggat +ggatggagtgaaggatggatgggtggggagatggatggatcgatggatggatggatggat +agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg +ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg +tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc +tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat +cttgtacacgtcctgcagcggc +>p:HG002_1_chr20:63491489-63493165 +gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc +actggctgggggtctcctatcagtgcccttctctggggtctcagatttgtttatggtttc +tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg +gatggatgtatgaatggggtggtagatggatggggaggtggatggacagaaggatgggtg +gggagatggatgagtagggagatggatggatggattggtggatggatggatggatggatg +gatgaggaggtagatgagtggatggacagaaggatggatgggtaggaaggtgggtggatg +aatggatggatggggaggtggatggatagatggatggatggatggatggatggatggatg +gatggatggggggatagatggatggatggatggatggacggacggatggggaggtggatg +gatggatggatggatggatggatggatggatggatggatgaggagatgaatggatggatg +agatggatggatggatgatggagggaaggatggctgggtggggagatggatggatagaga +ggtggatgggtggatggggagatggatggatggatggatggatggggaagtggatggatg +gggaggtggatggatggataggtggatgggtggatggacagaaggatgaatgggtaggga +gatggatggatggatggatgcggagggatggatggatggatggatggatgggtggatgga +tagaaggatggatgggtggggaggtgggtgggtgggtgcgtggatggatggagagatgga +tagatggatagatggatggatggatggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtgggtggatgggtggatggatggatgggtggggaggtgggcgggtgga +tggatggatggatgcagaggtgggtggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtggatggatggatgagtggggaggtgggtgggtggatggatggatgga +tggatgcatggatggatggatggagaggtgggtggatgggtggatggacagaaggatgga +tgggtggggaggtgggtgagtgggtggatggatgaatggggaggtggatggatggatgga +tggatgaaggatggatgggtggggaggtgggtggacggatggatggattgggaggtgggt +agatgggtggatggacagaaggatggatgggtggggaggtgggtgggtgggtggatggat +ggatggggaggtggatggatggatggttagatggggaggtgggtggatgggtggatggac +agaaggatggatgggtagggaggtaggtgagtggatggatggatggggagatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatggatagaaggatggac +gggtgggga----gatggatggatggatggatggatggatggatggacagaaggatgtat +gggtggggaggtgggtggatagatggatggatggatggatggggaaatggatggatggaa +ggatggatggatagggaggtgggtggatggatggatggatggatgtacagaaggatggat +gggtggggaggtgggtgagtggatgaatggatggatggatagatagaaggatggatgggt +gggacaatagatggatggatagagagaaggatggatgggtgggga--------gatggat +ggatggatggatggatggatggatggatagagagaaggatggatggttgagacaatggat +ggatggatggatggatggagagatggatggatggatggatggacagaaggatggatgggt +ggggaggtggatggatggattgaaggatggacgggtggggagatggatggatggatggat +ggatggatggatggatggatggacagaaggatgtatgggtggggaggtgggtggatagat +ggatggatggatggatggggaaatggatggatggaaggatggatggatagggaggtgggt +ggatggatggatggatggatgtacagaaggatggatgggtggggaggtgggtgagtggat +gaatggatggatggatagatagaaggatggatgggtgggacaaca----gatggatagag +agaaggatggatgggtgggga----gatggatggatggatggatggatggatagagagaa +ggatggatggttgagacaatggatggatggatggatggatggagagatggatggatggat +ggatggacagaaggatggatgggtggggaggtggatggatggatggaaggatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatgcatggatggatggat +ggagggatagagagaaggatggatgggtgggacaatcaatggatggatggatggatggat +ggatagagagaaggatggatgggtggggagatggatggatggatggagaggtggatggat +ggatggatggatggattgatggacagaaggatggatgggtggggaggtggatagatggag +agatggagggatggttcgatagagagaaggatggatggatgggacaatggatggatggat +ggatggagtgaaggatggatgggtggggagatggatggatcgatggatggatggatggat +agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg +ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg +tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc +tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat +cttgtacacgtcctgcagcggc +>p:HG002_2_chr20:63491489-63493165 +gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc +actggctgggggtctcctatcagtgcccttctctggggtctcagatttgtttatggtttc +tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg +gatggatgtatgaatggggtggtagatggatggggaggtggatggacagaaggatgggtg +gggagatggatgagtagggagatggatggatggattggtggatggatggatggatggatg +gatgaggaggtagatgagtggatggacagaaggatggatgggtaggaaggtgggtggatg +aatggatggatggggaggtggatggatagatggatggatggatggatggatggatggatg +gatggatggggggatagatggatggatggatggatggatggacggacg----gatgggga +ggtggatggatggatggatggatggatggatggatggatgaggagatgaatggatggatg +agatggatggatggatgatggagggaaggatggctgggtggggagatggatggatagaga +ggtggatgggtggatggggagatggatggatggatggatggatggggaagtggatggatg +gggaggtggatggatggataggtggatgggtggatggacagaaggatgaatgggtaggga +gatggatggatggatggatggggagggatggatggatggatggatggatgggtggatgga +tagaaggatggatgggtggggaggtgggtgggtgg----gtggatggatggagagatgga +tagatggata----gatggatggatggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtgggtggatgggtggatggatggatgggtggggaggtgggcgggtgga +tggatggatggatgcagaggtgggtggatggatggatggagagaaggatggatgggtggg +gaggtgggtgggtggatggatggatgagtggggaggtgggtgggtgggtggatggatgga +tggatggatgcatggatggatgga------------------------------------ +------------------------------------------------------------ +-------------------tggagaggtgggtggacggatggatggattgggaggtgggt +agatgggtggatggacagaaggatggatgggtggggaggtgggtgggtgggtggatggat +ggatggggaggtggatggatggatggttagatggggaggtgggtggatgggtggatggac +agaaggatggatgggtagggaggtaggtgagtggatggatggatggggagatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatggatagaaggatggac +gggtggggagatggatggatggatggatggatggatggatggatggacagaaggatgtat +gggtggggaggtgggtggatagatggatggatggatggatggggaaatggatggatggaa +ggatggatggatagggaggtgggtggatggatggatggatggatgtacagaaggatggat +gggtggggaggtgggtgagtggatgaatggatggatggatagatagaaggatggatgggt +gggacaatagatggatggatagagagaaggatggatgggtggggagatggatggatggat +ggatggatggatggatggatggatggatagagagaaggatggatggttgagaca----at +ggatggatggatggatggagagatggatggatggacggatggacagaaggatggatgggt +ggggaggtggatggatggattgaaggatggatgggtgggga----catggatggatggat +ggatggatggatggatggatggacagaaggatgtatgggtggggaggtgggtggata--- +-----gatggatggatggggaaatggatggatggaaggatggatggatagggaggtgggt +ggatggatggatggatggatgtacagaaggatggatgggtggggaggtgggtgagtggat +gaatggatggatggatagatagaaggatggatgggtgggacaatagatggatggatagag +agaaggatggatgggtggggagatggatggatggatggatggatggatggatagagagaa +ggatggatggttgagacaatggatggatggatggatggatggagagatggatggatggat +ggatggacagaaggatggatgggtggggaggtggatggatggatggaaggatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatgcatggatggatggat +ggagggatagagagaaggatggatgggtgggacaatcaatggatggatggatggatggat +ggatagagagaaggatggatgggtggggagatggatggatggatggagaggtggatggat +ggatggatggatggattgatggacagaaggatggatgggtggggaggtggatagatggag +agatggagggatggttcgatagagagaaggatggatggatgggacaatggatggatggat +ggatggagtgaaggatggatgggtggggagatggatggatcgatggatggatggatggat +agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg +ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg +tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc +tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat +cttgtacacgtcctgcagcggc +>ref_chr20:63491489-63493165 +gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc +actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc +tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg +gatggatgtatgaatggggtggtagatggatggggaggtggatggacagaaggatgggtg +gggagatggatgagtagggagatggatggatggattggtggatggatggatggatggatg +gatgaggaggtagatgagtggatggacagaaggatggatgggtaggaaggtgggtggatg +aatggatggatggggaggtggatggata----gatggatggatggatggatggatggatg +gatggatggggggatagatggatggatggatggatggacggacggatggggaggtggatg +gatggatggatggatggatggatggatggatggatggatgaggagatgaacggatggatg +agatggatggatggatgatggagggaaggatggctgggtggggagatggatggatagaga +ggtggatgggtggatggggagatggatggatggatggatggatggggaagtggatggatg +gggaggtggatggat--------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------ggatgtat +gggtgggtaggtgggtggatagatggatg------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----gatggatggatggataaatggatggatggaaggatggatggataggtaggtgggt +ggatggatggatggatggatgtacagaaggatggatgggtgggtaggtgggtgagtggat +gaatggatggatggatagatagaaggatggatgggtgggtcaatagatggatggatagag +agaaggatggatgggtgggta----gatggatggatggatggatggatggatagagagaa +ggatggatggttgagacaatggatggatggatggatggatggagagatggatggatggat +ggatggacagaaggatggatgggtggggaggtggatggatggatggaaggatggatggat +ggatagagagaaggatggatgggtgggacaatggatggatggatgcatggatggatggat +ggagggatagagagaaggatggatgggtgggacaatcaatggatggatggatggatggat +ggatagagagaaggatggatgggtggggagatggatggatggatggagaggtggatggat +ggatggatggatggattgatggacagaaggatggatgggtggggaggtggatagatggag +agatggagggatggttcgatagagagaa----ggatggatgggacaatggatggatggat +ggatggagtgaaggatggatgggtggggagatggatggatcgatggatggatggatggat +agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg +ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg +tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc +tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat +cttgtacacgtcctgcagcggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa new file mode 100644 index 00000000..461d372c --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa @@ -0,0 +1,50 @@ +>syndip_1_chr20:10802576-10803059 +gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt +tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca +atgtcctctgaatatatatatatatatcctccc----------atatatatatatatcct +ccc--atatatatatatatat--------------------------------------- +-----------------------------atatatatatatatgtcctcccatatatata +tatatcctcccatatatatatctatcctccc----atatatatatatatatattttcaat +gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac +ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca +caac +>syndip_2_chr20:10802576-10803059 +gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt +tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca +atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct +ccc--atatatatatatatatatatatatcctcccatatatatatatatatatatatcct +cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata +tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat +gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac +ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca +caac +>p:HG002_1_chr20:10802576-10803059 +gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt +tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca +atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct +ccc--atatatatatatatatatatatatcctcccatatatatatatatatatatatcct +cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata +tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat +gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac +ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca +caac +>p:HG002_2_chr20:10802576-10803059 +gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt +tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca +atgtcctctgaatatatatatatatatcctccc----------atatatatatatatcct +ccc--atatatatatatatat--------------------------------------- +-----------------------------atatatatatatatgtcctcccatatatata +tatatcctcccatatatatatctatcctccc----atatatatatatatatattttcaat +gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac +ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca +caac +>ref_chr20:10802576-10803059 +gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt +tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca +atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct +cccatatatatatatatatatatatatatcctcccatatatatatatatatatatatcct +cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata +tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat +gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac +ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca +caac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa new file mode 100644 index 00000000..cd1e16cd --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa @@ -0,0 +1,85 @@ +>syndip_1_chr20:61561820-61562604 +acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc +ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt +ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg +accccagggctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctc +ccggagagagagaggggcctccgtgtggagaggtggaccccagggctcccggagagaggg +acttccgtgtggagaggtggaccccagggctcccggagagagagagggacctccctgaga +gagagagggacctccctgtggagaggtggaccccagggctcccggagagagagagggacc +tccctgtggagaggtggaccccagggctcccagagagagagagggaccttcgtgtggaga +ggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtggaccccag +ggctcccggagagagagagggacctccgtgtggagaggtggaccccagggctcccggaga +gagggacctccgtgtggagagctggaccccagggctcccggagagagagagggacctccg +tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg +gaccccagggctcccggagagagagagggaccttcgtgtggagaggtggaccccaggact +cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag +gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc +>syndip_2_chr20:61561820-61562604 +acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc +ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt +ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg +accccagggctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctc +ccggagagagagaggggcctccgtgtggagaggtggaccccagggctcccggagagaggg +acctccg----------------------------------------------------- +----------------tgtggagaggtggaccccagggctcccggagagagagagggacc +tccctgtggagaggtggaccccagggctcccagagagagagagggaccttcgtgtggaga +ggtggaccccagggctcccgga-------------------------------------- +--------------------------------------------------------gaga +gagggacctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctg +tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg +gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact +cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag +gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc +>p:HG002_1_chr20:61561820-61562604 +acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc +ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt +ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg +accccagggctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctc +ccggagagagagaggggcctccgtgtggagaggtggaccccagggctcccggagagaggg +acctccg----------------------------------------------------- +----------------tgtggagaggtggaccccagggctcccggagagagagagggacc +tccctgtggagaggtggaccccagggctcccagagagagagagggaccttcgtgtggaga +ggtggaccccagggctcccgga-------------------------------------- +--------------------------------------------------------gaga +gagggacctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctg +tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg +gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact +cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag +gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc +>p:HG002_2_chr20:61561820-61562604 +acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc +ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt +ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg +accccagggctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctc +ccggagagagagaggggcctccgtgtggagaggtggaccccagggctcccggagagaggg +acttccgtgtggagaggtggaccccagggctcccggagagagagagggacctccctgaga +gagagagggacctccctgtggagaggtggaccccagggctcccggagagagagagggacc +tccctgtggagaggtggaccccagggctcccagagagagagagggaccttcgtgtggaga +ggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtggaccccag +ggctcccggagagagagagggacctccgtgtggagaggtggaccccagggctcccggaga +gagggacctccgtgtggagagctggaccccagggctcccggagagagagagggacctccg +tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg +gaccccagggctcccggagagagagagggaccttcgtgtggagaggtggaccccaggact +cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag +gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac +ttggtgcatccctgcaggatctgagcacagaacggcctgcttcagccc +>ref_chr20:61561820-61562604 +acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc +ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt +ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg +accccagggctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctc +ccggagagagagaggggcctccgtgtggagaggtggaccccagggctcccggagagaggg +acctccg----------------------------------------------------- +----------------tgtggagaggtggaccccagggctcccggagagagagagggacc +tccctgtggagaggtggaccccagggctcccagagagagagagggaccttcgtgtggaga +ggtggaccccagggctcccgga-------------------------------------- +--------------------------------------------------------gaga +gagggacctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctg +tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg +gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact +cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag +gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa new file mode 100644 index 00000000..7ce5f8b5 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa @@ -0,0 +1,70 @@ +>syndip_1_chr20:57110112-57110886 +agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga +aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg +cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct +tggctcagcgtgtgcggggtgctgagctaagtcttcccaaacggcgtgtgcggggtgctg +cgctgtcttccagatccgcttgtgcagggcgctgtgctgtcttctggttcggcgtgtgcg +gggcgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctaagtcttccggc +tgggcgtgtgcggggc-------------------------------------------- +------------------------------------------------------------ +--gctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg +cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct +tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc +ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca +>syndip_2_chr20:57110112-57110886 +agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga +aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg +cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct +tggctcagcgtgtgcggggtgctgagctaagtcttcccaaacggcgtgtgcggggtgctg +cgctgtcttccagatccgcttgtgcagggcgctgtgctgtcttctggttc---------- +--------------------------ggcgtgtgcggggcgctgtgctaagtcttccggc +tgggcgtgtgcggggc-------------------------------------------- +------------------------------------------------------------ +--gctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg +cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct +tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc +ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca +>p:HG002_1_chr20:57110112-57110886 +agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga +aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg +cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct +tggctcagcgtgtgcggggtgctgagctaagtcttcccaaacggcgtgtgcggggtgctg +cgctgtcttccagatccgcttgtgcagggcgctgtgctgtcttctggttc---------- +--------------------------ggcgtgtgcggggcgctgtgctaagtcttccggc +tgggcgtgtgcggggc-------------------------------------------- +------------------------------------------------------------ +--gctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg +cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct +tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc +ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca +>p:HG002_2_chr20:57110112-57110886 +agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga +aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg +cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct +tggctcagcgtgtgcggggtgctgagctaagtcttcccaaacggcgtgtgcggggtgctg +cgctgtcttccagatccgcttgtgcagggcgctgtgctgtcttctggttcggcgtgtgcg +gggcgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctaagtcttccggc +tgggcgtgtgcggggc-------------------------------------------- +------------------------------------------------------------ +--gctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg +cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct +tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc +ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca +>ref_chr20:57110112-57110886 +agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga +aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg +cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct +tggctcagcgtgtgcggggtgctgagctaagtcttcccaaacggcgtgtgcggggtgctg +cgctgtcttccagatccgcttgtgcagggcgctgtgctgtcttctggttcggcgtgtgcg +gggcgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctaagtcttccggc +tgggcgtgtgcggggcgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgct +gtcttccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgggcgtgtgcggg +gtgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg +cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct +tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc +ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa new file mode 100644 index 00000000..eac01e91 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa @@ -0,0 +1,180 @@ +>syndip_1_chr20:59383704-59385336 +ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc +aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc +tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg +ggaagagatgagtgggcaggaggtgagcagggaggaggtgagcagggatgagtggaggaa +ggaggtgatagggaggaggtgagtgggcaggaggtgagcagggagaagatgagcaggggg +gtggtgagcggggaggaggtggaggggaaggaagtgatgggggaggaggtggggggaagg +aggtgatggggaggaggtgaatgggcaggaggtgactggggaggaggtgagcagggagga +ggtgagcagggaggaggtgagtggagtggggatgggagtggagtggggggaaggaagtga +tagggaggagatgagtgggcaggaggtgagcagagaggagatgagtagagcagggatgag +taggggaaggaggtgataggaggtgagtgggcaggaggtgagcagggaggaggcgagcag +ggaggaggtagaggggaaggaagtgatggggaggaggtgaggggggaaagaggtggcggg +ggaaggaggtgatggggaggaggtgagtggggaggaaggtgggaggaaggaggtgatggg +gaggatgtgggggaggtggggagcaaggaggtgatggggaggaggtgagtggggaggtgg +tgagca------------------------------------------------------ +-------gggaggaggtgagggggcaagggaggtgatggggaggaaggaggttatgggga +ggtggtgagtggggagggaggtaaggggtaaggtggtgatgtggaggtggtgagtgggga +ggtggtgagaggggaggagatgagggggcaagggaggtgatggggaggaggtgagtgggg +aggaaggtggggggaaggaggtgatggggaggatgtgggggaggtgggggggaaggaggt +gaggcagaggaggggagtggggagggagatgagtaaggaagtgatggggagagggtgagt +ggggaagtggtgagaggggaggaggtgagggggcaagggaggtgatgcggaggaaggagg +tgatgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggt +gatgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtg +atgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtga +tgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgat +gggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatg +ggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgg +gaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggg +aaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggga +aggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggaa +ggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggag +gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg +aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg +aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt +taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc +ccatgaaataactttgctccccagattctgagcatgggt +>syndip_2_chr20:59383704-59385336 +ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc +aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc +tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg +ggaagagatgagtgggcaggaggtgagcagggaggaggtgagcagggatgagtggaggaa +ggaggtgatagggaggaggtgagtgggcaggaggtgagcagggagaagatgagcaggggg +gtggtgagcggggaggaggtggaggggaaggaagtgatgggggaggaggtggggggaagg +aggtgatggggaggaggtgaatgggcaggaggtgactggggaggaggtgagcagggagga +ggtgagcagggaggaggtgagtggagtggggatgggagtggagtggggggaaggaagtga +tagggaggagatgagtgggcaggaggtgagcagagaggagatgagtagagcagggatgag +taggggaaggaggtgataggaggtgagtgggcaggaggtgagcagggaggaggcgagcag +ggaggaggtagaggggaaggaagtgatggggaggaggtgaggggggaaagaggtggcggg +ggaaggaggtgatggggaggaggtgagtggggaggaaggtgggaggaaggaggtgatggg +gaggatgtgggggaggtggggagcaaggaggtgatggggaggaggtgagtggggaggtgg +tgagcagggaggaggtgagggggcaagggaggtgatggggaggaggtgagtggggaggtg +gtgagcggggaggaggtgagggggcaagggaggtgatggggaggaaggaggttatgggga +ggtggtgagtggggagggaggtaaggggtaaggtggtgatgtggaggtggtgagtgggga +ggtggtgagaggggaggagatgagggggcaagggaggtgatggggaggaggtgagtgggg +aggaaggtggggggaaggaggtgatggggaggatgtgggggaggtgggggggaaggaggt +gaggcagaggaggggagtggggagggagatgagtaaggaagtgatggggagagggtgagt +ggggaagtggtgagaggggaggaggtgagggggcaagggaggcgatgcgga--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------ggaaggaggtgat +gggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatg +ggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgg +gaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggg +gaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatggga +aggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggaa +ggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggag +gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg +aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg +aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt +taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc +ccatgaaataactttgctccccagattctgagcatgggt +>p:HG002_1_chr20:59383704-59385336 +ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc +aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc +tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg +ggaagagatgagtgggcaggaggtgagcagggaggaggtgagcagggatgagtggaggaa +ggaggtgatagggaggaggtgagtgggcaggaggtgagcagggagaagatgagcaggggg +gtggtgagcggggaggaggtggaggggaaggaagtgatgggggaggaggtggggggaagg +aggtgatggggaggaggtgaatgggcaggaggtgactggggaggaggtgagcagggagga +ggtgagcagggaggaggtgagtggagtggggatgggagtggagtggggggaaggaagtga +tagggaggagatgagtgggcaggaggtgagcagagaggagatgagtagagcagggatgag +taggggaaggaggtgataggaggtgagtgggcaggaggtgagcagggaggaggcgagcag +ggaggaggtagaggggaaggaagtgatggggaggaggtgaggggggaaagaggtggcggg +ggaaggaggtgatggggaggaggtgagtggggaggaaggtgggaggaaggaggtgatggg +gaggatgtgggggaggtggggagcaaggaggtgatggggaggaggtgagtggggaggtgg +tgagcagggaggaggtgagggggcaagggaggtgatggggaggaggtgagtggggaggtg +gtgagcggggaggaggtgagggggcaagggaggtgatggggaggaaggaggttatgggga +ggtggtgagtggggagggaggtaaggggtaaggtggtgatgtggaggtggtgagtgggga +ggtggtgagaggggaggagatgagggggcaagggaggtgatggggaggaggtgagtgggg +aggaaggtggggggaaggaggtgatggggaggatgtgggggaggtgggggggaaggaggt +gaggcagaggaggggagtggggagggagatgagtaaggaagtgatggggagagggtgagt +ggggaagtggtgagaggggaggaggtgagggggcaagggaggcgatgcgga--------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------ggaaggaggtgat +gggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatg +ggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgg +gaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggg +gaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatggga +aggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggaa +ggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggag +gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg +aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg +aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt +taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc +ccatgaaataactttgctccccagattctgagcatgggt +>p:HG002_2_chr20:59383704-59385336 +ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc +aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc +tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg +ggaagagatgagtgggcaggaggtgagcagggaggaggtgagcagggatgagtggaggaa +ggaggtgatagggaggaggtgagtgggcaggaggtgagcagggagaagatgagcaggggg +gtggtgagcggggaggaggtggaggggaaggaagtgatgggggaggaggtggggggaagg +aggtgatggggaggaggtgaatgggcaggaggtgactggggaggaggtgagcagggagga +ggtgagcagggaggaggtgagtggagtggggatgggagtggagtggggggaaggaagtga +tagggaggagatgagtgggcaggaggtgagcagagaggagatgagtagagcagggatgag +taggggaaggaggtgataggaggtgagtgggcaggaggtgagcagggaggaggcgagcag +ggaggaggtagaggggaaggaagtgatggggaggaggtgaggggggaaagaggtggcggg +ggaaggaggtgatggggaggaggtgagtggggaggaaggtgggaggaaggaggtgatggg +gaggatgtgggggaggtggggagcaaggaggtgatggggaggaggtgagtggggaggtgg +tgagca------------------------------------------------------ +-------gggaggaggtgagggggcaagggaggtgatggggaggaaggaggttatgggga +ggtggtgagtggggagggaggtaaggggtaaggtggtgatgtggaggtggtgagtgggga +ggtggtgagaggggaggagatgagggggcaagggaggtgatggggaggaggtgagtgggg +aggaaggtggggggaaggaggtgatggggaggatgtgggggaggtgggggggaaggaggt +gaggcagaggaggggagtggggagggagatgagtaaggaagtgatggggagagggtgagt +ggggaagtggtgagaggggaggaggtgagggggcaagggaggtgatgcggaggaaggagg +tgatgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggt +gatgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtg +atgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtga +tgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgat +gggaaggaggtgagtgtgtaggaggtaagcagggaggaagtggggaggaaggaggtgatg +ggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgg +gaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggg +aaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggga +aggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggaa +ggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggag +gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg +aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg +aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt +taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc +ccatgaaataactttgctccccagattctgagcatgggt +>ref_chr20:59383704-59385336 +ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc +aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc +tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg +ggaagagatgagtgggcaggaggtgagcagggaggaggtgagcagggatgagtggaggaa +ggaggtgatagggaggaggtgagtgggcaggaggtgagcagggagaagatgagcaggggg +gtggtgagcggggaggaggtggaggggaaggaagtgatgggggaggaggtggggggaagg +aggtgatggggaggaggtgaatgggcaggaggtgactggggaggaggtgagcagggagga +ggtgagcagggaggaggtgagtggagtggggatgggagtggagtggcgggaaggaagtga +tagggaggagatgagtgggcaggaggtgagcagagaggagatgagtagagcagggatgag +taggggaaggaggtgataggaggtgagtgggcaggaggtgagcagggaggaggcgagcag +ggaggaggtagaggggaaggaagtgatggggaggaggtgaggggggaaagaggtggcggg +ggaa-------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------ggaggtgatggggaggaggtgagtgggg +aggaaggtggggggaaggaggtgatagggaggatgtgggggaggtgggggggaaggaggt +gaggcagaggaggggagtggggagggagatgagtaaggaagtgatggggagagggtgagt +ggggaagtggtgagaggggaggaggtgagggggcaagggaggcgatgcgga--------- +------------------------------------------------------------ +-------------------------------------------------ggaaggaggtg +atgggaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtga +tggggaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgat +ggggaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatg +gggaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgg +gaaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggg +gaggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggg +aggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatgggga +ggaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggag +gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg +aggtgagaggggaggagctgagtggggaaggaggtaatgaggacaaggtgaggggaaagg +aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt +taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc +ccatgaaataactttgctccccagattctgagcatgggt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa new file mode 100644 index 00000000..38be0070 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa @@ -0,0 +1,110 @@ +>syndip_1_chr20:5041757-5042810 +cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt +gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta +gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat +gcataac------------------------------ttttttttttttttttttttgag +acggagtctcgctctcccaggctggagtgcagtggtgtgatctcggctcactgcaacctg +tgcctcccaggttcaagcgatctcctgcctcagcctccccagtacctaggattacaggtg +cgcgcctccagcatagctaatgtttgtatttttagtagagacggggtttcaccatgttgg +ccaggctggttatgcataactttttatcacagatgtttgatctcagttttaatcttttca +acatttgtaaacccttggttcattgtgcttctccctttattactc--------------- +-ttttttttttttttttgagacggagtttcgctcttgttgcccaggctggagtgcagtgg +cgcactctcggttcactgcaatctccacctcataggttcaagcgattctcctgcctcagc +ctcctgagtagctgggattacaggcacacgccaccatgcctggctaattt---------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------ttctttttcttttttgtatttttagtagagatgg +ggtttcactatgttggccagactggtcttgaactcctgacctcgtgatctgcccacctca +gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt +ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt +gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga +cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg +ttctgttttttttctttttctcttgccttctatgctttccatt +>syndip_2_chr20:5041757-5042810 +cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt +gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta +gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat +gcgtaacttttttttttttttttttttttttttttttttttttttttttttttttttgag +acggagtctcgctctcccaggctggagtgcagtggtgtgatctcggctcactgcaacctg +tgcctcccaggttcaagcgatctcctgcctcagcctccccagtacctaggattacaggtg +cgcgcctccagcatagctaatgtttgtatttttagtagagacggggtttcaccatgttgg +ccaggctggttatgcataactttttatcacagatgtttgatctcagttttaatcttttca +acatttgtaaacccttggttcattgtgcttctccctttattactcttttttttttttttt +tttttttttttttttttgagacggagtttcgctcttgttgcccaggctggagtgcagtgg +cgcactctcggttcactgcaatctccacctcataggttcaagcgattctcctgcctcagc +ctcctgagtagctgggattacaggcacacgccaccatgcctggctaattt---------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------ttctttttcttttttgtatttttagtagagatgg +ggtttcactatgttggccagactggtcttgaactcctgacctcgtgatctgcccacctca +gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt +ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt +gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga +cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg +ttctgttttttttctttttctcttgccttctatgctttccatt +>p:HG002_1_chr20:5041757-5042810 +cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt +gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta +gcgggatttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat +gcgtaacttttttttttttttttttttttttttttttttttttttttttttttttttgag +acggagtctcgctctcccaggctggagtgcagtggtgtgatctcggctcactgcaacctg +tgcctcccaggttcaagcgatctcctgcctcagcctccccagtacctaggattacaggtg +cgcgcctccagcatagctaatgtttgtatttttagtagagacggggtttcaccatgttgg +ccaggctggttatgcataactttttatcacagatgtttgatctcagttttaatcttttca +acatttgtaaacccttggttcattgtgcttctccctttattactcttttttttttttttt +tttttttttttttttttgagacggagtttcgctcttgttgcccaggctggagtgcagtgg +cgcactctcggttcactgcaatctccacctcataggttcaagcgattctcctgcctcagc +ctcctgagtagctgggattacaggcacacgccaccatgcctggctaattt---------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------ttctttttcttttttgtatttttagtagagatgg +ggtttcactatgttggccagactggtcttgaactcctgacctcgtgatctgcccacctca +gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt +ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt +gatctgctcgtcttggcctcccaaagtgttgggattataggcatgagccaccacaaccga +cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg +ttctgttttttttctttttctcttgccttctatgctttccatt +>p:HG002_2_chr20:5041757-5042810 +cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt +gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta +gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat +gcataac------------------------------ttttttttttttttttttttgag +acggagtctcgctctcccaggctggagtgcagtggtgtgatctcggctcaccgcaacctg +tgcctcccaggttcaagtgatctcctgcctcagcctccccagtacctaggattacaggtg +cgcgcctccagcatagctaatgtttgtatttttagtagaaacggggtttcaccatgttgg +ccaggatggttatgcataactttttatcacagatgtttgatctcagttttaatcttttca +acatttgtaaacccttggttcattgtgcttctccctttattactc--------------- +-ttttttttttttttttgagacggagtttcgctcttgttgcccaggctggagtgcagtgg +agcactctcggttcactgcaacctccacctcataggttcaagcgattctcctgcctcagc +ctcctgagtagctgggattacaggtgcccaccaccatgcccagctaatttttttttttga +gagtctcacactgtcgcccaggctggagtgcaatggcactatcttgactcactgcaacct +cctcctgtgttcacgtgattctcctgcctcagcctcctgagtagctgggattacaggcac +atgccaccatgcccggctaatttttctttttttttttttttgtatttttagtagagatgg +ggtttcactatgttggccagactggtcttgaactcctgacctcgtgatctgcccgcctcg +gactcccaaagtgctgggattacaggtgtgagccaccgtgcccggcctaattttgtattt +ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcccgatctcaggt +gatccgctcgtcttggcctcccaaagtgttgggattacaggcatgagccaccacacccag +cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg +ttctgttttttttctttttctcttgccttctatgctttccatt +>ref_chr20:5041757-5042810 +cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt +gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta +gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat +gcgtaac----------------------------ttttttttttttttttttttttgag +acggagtctcgctctcccaggctggagtgcagtggtgtgatctcggctcactgcaacctg +tgcctcccaggttcaagcgatctcctgcctcagcctccccagtacctaggattacaggtg +cgcgcctccagcatagctaatgtttgtatttttagtagagacggggtttcaccatgttgg +ccaggctggttatgcataactttttatcacagatgtttgatctcagttttaatcttttca +acatttgtaaacccttggttcattgtgcttctccctttattactc----ttttttttttt +tttttttttttttttttgagacggagtttcgctcttgttgcccaggctggagtgcagtgg +cgcactctcggttcactgcaatctccacctcataggttcaagcgattctcctgcctcagc +ctcctgagtagctgggattacaggcacacgccaccatgcctggctaattt---------- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------ttctttttcttttttgtatttttagtagagatgg +ggtttcactatgttggccagactggtcttgaactcctgacctcgtgatctgcccacctca +gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt +ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt +gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga +cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg +ttctgttttttttctttttctcttgccttctatgctttccatt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa new file mode 100644 index 00000000..1f228f20 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa @@ -0,0 +1,475 @@ +>syndip_1_chr20:20354470-20358424 +tccagtctgggtgacagagagagaccttgtctcaaaaaaaaaaaaaaaaaaatcaggtac +agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata +agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg +gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac +tgtgtgaggggaggtggtcacactgtgagagaaaaagtaatcatactgtgaaagggaggt +ggtcacgctgagaggggaggtggtcacactgtgtcaggggaggtggtcacactgtgagag +aaaaagtaatcacactgtgaggggatgtggtcacactgagaggggaggtggtcagactgt +gtcagaggaggtggtcacactgaggggaggtggtcacactgtgagaggggaggtggtcag +actctgtcagaggaggtggtcacactgtgagaggggaggtggtcacactgaggggaagtg +gtcacactgtgagaggggaggtggtcacactgaggggagatggtcacactgcaagagggg +aggtgatcacactgagggg-----------------------aggtggtcacactgtgag +agggaaggtggtcacactgtgagagggaaggtggtcacactgtgtgaggggaggtggtca +cactgtgaggggaggtggtcacattgtgagagggaagggggtcactgtgagaggggagaa +ggtcacactaaggggaggtagtcacactgtgaggggaggtgatcacactaaggggaggtg +gtcacactgtgagaggaggtggtcacactgtgaggggtggtcacactgtgaggggaggtc +acactgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggt +cacactgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtgg +tcacactgaggggaggtactcacactgtgaggggaggtggtcacactgtgaggggaggtg +gtcacactgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggg +ggtcacactgtgagggggaaggtggtcacactgtgagaggggagaaggtcacactaaggg +gaggtagtcacactgtgaggggaggtgatcacactaaggggaggtggtcacactgtgaga +ggaggtggtcacactgtgaggggaggtcacactgaggggaggtagtgacactatgagcag +agatggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgagca +gagatggtcacactgtgaggggaggtggtcacactgaggggaggtagtcacactgtgagg +ggaggtggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgag +cagagatggtcacactgtgaggggagggggtcacactgtgagggggaaggtggtcacact +gtgagaggggagaaggtcacactaaggggaggtagtcacactgtgaggggaggtgatcac +actaaggggaggtggtcacactgtgagaggaggtggtcacactgtgaggggaggtcacac +tgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggtcaca +ctgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtggtcac +actgaggggaggtagtcacactgtgaggggaggtggtcacactgtgaggggaggtggtca +cactgaggggaggtagtgacactgtgagcagagatggtcacactgtaaggggaggtgatc +acactgtgaggggacgtggtcacactgtgaggggaggtggtcacactgtgagaggggagg +tgaccacacaggggaggtggtcacactgaggggaggtagtgacactgtgagcagaggtgg +tcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtg +gtcacactgaggggaagtggtcacactgagggggggtggtcacactgaggggaggtggtc +atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg +tcacactgtgaagggtggtagtcacactgagaggaggtgttcacactgaggggaagtggt +cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc +atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc +acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca +tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcaca-- +ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactgtg +aggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga +ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgtgag +gggaggtggtcatactgtgagtgaggaggtagtcacac--tgaggggagggggtcatact +gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac +tgaggggaagtggtcacactgaggggaggtggtcatactg-------------------- +--------------------tgaggggaagtggtcacactgaggggaggtggtcatagtg +tga--ggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +c--tgaggggaggtggtcacactgaggggaggtggtcacagtgtgagaggaggtggtcac +actgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcaca +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcaca-------------------gt +gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacact +gaggggaggtg------------------------------------------gtcacac +tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca +ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg +tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggag----------- +------------------------------------------------------------ +------------------------------gtggtcacactgaggggaggtggtcacact +gaggggaggtggtcacagtgtga--ggggaggtggtcacactgaggggaggtggtcacag +tgtacggggaggtggtcatac--tgaggggaggtggtcacagtgtgaggggaggtggtca +cactgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcaca-c-tgaggagaggtggtcat +-------------------agtgtgaggggaggtggtcacac------------------ +------------------------------------------------------------ +-----------------------tgaggggaggtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca +gtg--------------------------------------tgaggggaggtggtcacac +tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat +tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg +tgcggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt +gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga +ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac +tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacactga +ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg +ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga +gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg +cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga +>syndip_2_chr20:20354470-20358424 +tccagtctgggtgacagagagagaccttgtctc--aaaaaaaaaaaaaaaaatcaggtac +agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata +agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg +gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac +tgtgtgaggggaggtggtcacactgtgagagaaaaagtaatcatactgtgaaagggaggt +ggtcacgctgagaggggaggtggtcacactgtgtcaggggaggtggtcacactgtgagag +aaaaagtaatcacactgtgaggggatgtggtcacactgagaggggaggtggtcagactgt +gtcagaggaggtggtcacactgaggggaggtggtcacactgtgagaggggaggtggtcag +actctgtcagaggaggtggtcacactgtgagaggggaggtggtcacactgaggggaagtg +gtcacactgtgagaggggaggtggtcacactgaggggagatggtcacactgcaagagggg +aggtgatcacactgaggggaggtggtcacactgtgagagggaaggtggtcacactgtgag +agggaaggtggtcacactgtgagagggaaggtggtcacactgtgtgaggggaggtggtca +cactgtgaggggaggtggtcacattgtgagagggaagggggtcactgtgagaggggagaa +ggtcacactaaggggaggtagtcacactgtgaggggaggtgatcacactaaggggaggtg +gtcacactgtgagaggaggtggtcacactgtgaggggtggtcacactgtgaggggaggtc +acactgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggt +cacactgaggggaggtagtgacactgtgagcagaga------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----tggtcacactgtgaggggaggtggtcacactgaggggaggtactcacactgtgagg +ggaggtggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgag +cagagatggtcacactgtgaggggagggggtcacactgtgagggggaaggtggtcacact +gtgagaggggagaaggtcacactaaggggaggtagtcacactgtgaggggaggtgatcac +actaaggggaggtggtcacactgtgagaggaggtggtcacactgtgaggggaggtcacac +tgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggtcaca +ctgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtggtcac +actgaggggaggtagtcacactgtgaggggaggtggtcacactgtgaggggaggtggtca +cactgaggggaggtagtgacactgtgagcagagatggtcacactgtaaggggaggtgatc +acactgtgaggggacgtggtcacactgtgaggggaggtggtcacactgtgagaggggagg +tgaccacacaggggaggtggtcacactgaggggaggtagtgacactgtgagcagaggtgg +tcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtg +gtcacactgaggggaagtggtcacactgagggggggtggtcacactgaggggaggtggtc +atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg +tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt +cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc +atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc +acactgaggggaggtggtcacagtg-------------------tgaggggaggtggtca +tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac +actgtgaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- +-ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga +ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag +gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact +gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac +tgaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg +tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca +ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac +actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaagtagtcacactgaggggaagtggtcacact +gaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcacac +tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca +ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg +tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gtgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactg +tgagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac +tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca +cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac +actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +gtgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcat +actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca +ctgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac +tgaggggaggtggtcatagtgtgaggggaggtggtgacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat +tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt +gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga +ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac +tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga +ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg +ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga +gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg +cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga +>p:HG002_1_chr20:20354470-20358424 +tccagtctgggtgacagagagagaccttgtctc--aaaaaaaaaaaaaaaaatcaggtac +agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata +agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg +gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac +tgtgtgaggggaggtggtcacactgtgagagaaaaagtaatcatactgtgaaagggaggt +ggtcacgctgagaggggaggtggtcacactgtgtcaggggaggtggtcacactgtgagag +aaaaagtaatcacactgtgaggggatgtggtcacactgagaggggaggtggtcagactgt +gtcagaggaggtggtcacactgaggggaggtggtcacactgtgagaggggaggtggtcag +actctgtcagaggaggtggtcacactgtgagaggggaggtggtcacactgaggggaagtg +gtcacactgtgagaggggaggtggtcacactgaggggagatggtcacactgcaagagggg +aggtgatcacactgaggggaggtggtcacactgtgagagggaaggtggtcacactgtgag +agggaaggtggtcacactgtgagagggaaggtggtcacactgtgtgaggggaggtggtca +cactgtgaggggaggtggtcacattgtgagagggaagggggtcactgtgagaggggagaa +ggtcacactaaggggaggtagtcacactgtgaggggaggtgatcacactaaggggaggtg +gtcacactgtgagaggaggtggtcacactgtgaggggtggtcacactgtgaggggaggtc +acactgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggt +cacactgaggggaggtagtgacactgtgagcagaga------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----tggtcacactgtgaggggaggtggtcacactgaggggaggtactcacactgtgagg +ggaggtggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgag +cagagatggtcacactgtgaggggagggggtcacactgtgagggggaaggtggtcacact +gtgagaggggagaaggtcacactaaggggaggtagtcacactgtgaggggaggtgatcac +actaaggggaggtggtcacactgtgagaggaggtggtcacactgtgaggggaggtcacac +tgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggtcaca +ctgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtggtcac +actgaggggaggtagtcacactgtgaggggaggtggtcacactgtgaggggaggtggtca +cactgaggggaggtagtgacactgtgagcagagatggtcacactgtaaggggaggtgatc +acactgtgaggggacgtggtcacactgtgaggggaggtggtcacactgtgagaggggagg +tgaccacacaggggaggtggtcacactgaggggaggtagtgacactgtgagcagaggtgg +tcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtg +gtcacactgaggggaagtggtcacactgagggggggtggtcacactgaggggaggtggtc +atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg +tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt +cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc +atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc +acactgaggggaggtggtcacagtg-------------------tgaggggaggtggtca +tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac +actgtgaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- +-ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga +ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag +gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact +gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac +tgaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg +tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca +ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac +actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaagtagtcacactgaggggaagtggtcacact +gaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcacac +tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca +ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg +tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gtgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactg +tgagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac +tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca +cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac +actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +gtgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcat +actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca +ctgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac +tgaggggaggtggtcatagtgtgaggggaggtggtgacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat +tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt +gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga +ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac +tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga +ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg +ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga +gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg +cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga +>p:HG002_2_chr20:20354470-20358424 +tccagtctgggtgacagagagagaccttgtctcaaaaaaaaaaaaaaaaaaatcaggtac +agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata +agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg +gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac +tgtgtgaggggaggtggtcacactgtgagagaaaaagtaatcatactgtgaaagggaggt +ggtcacgctgagaggggaggtggtcacactgtgtcaggggaggtggtcacactgtgagag +aaaaagtaatcacactgtgaggggatgtggtcacactgagaggggaggtggtcagactgt +gtcagaggaggtggtcacactgaggggaggtggtcacactgtgagaggggaggtggtcag +actctgtcagaggaggtggtcacactgtgagaggggaggtggtcacactgaggggaagtg +gtcacactgtgagaggggaggtggtcacactgaggggagatggtcacactgcaagagggg +aggtgatcacactgagggg-----------------------aggtggtcacactgtgag +agggaaggtggtcacactgtgagagggaaggtggtcacactgtgtgaggggaggtggtca +cactgtgaggggaggtggtcacattgtgagagggaagggggtcactgtgagaggggagaa +ggtcacactaaggggaggtagtcacactgtgaggggaggtgatcacactaaggggaggtg +gtcacactgtgagaggaggtggtcacactgtgaggggtggtcacactgtgaggggaggtc +acactgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggt +cacactgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtgg +tcacactgaggggaggtactcacactgtgaggggaggtggtcacactgtgaggggaggtg +gtcacactgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggg +ggtcacactgtgagggggaaggtggtcacactgtgagaggggagaaggtcacactaaggg +gaggtagtcacactgtgaggggaggtgatcacactaaggggaggtggtcacactgtgaga +ggaggtggtcacactgtgaggggaggtcacactgaggggaggtagtgacactatgagcag +agatggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgagca +gagatggtcacactgtgaggggaggtggtcacactgaggggaggtagtcacactgtgagg +ggaggtggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgag +cagagatggtcacactgtgaggggagggggtcacactgtgagggggaaggtggtcacact +gtgagaggggagaaggtcacactaaggggaggtagtcacactgtgaggggaggtgatcac +actaaggggaggtggtcacactgtgagaggaggtggtcacactgtgaggggaggtcacac +tgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggtcaca +ctgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtggtcac +actgaggggaggtagtcacactgtgaggggaggtggtcacactgtgaggggaggtggtca +cactgaggggaggtagtgacactgtgagcagagatggtcacactgtaaggggaggtgatc +acactgtgaggggacgtggtcacactgtgaggggaggtggtcacactgtgagaggggagg +tgaccacacaggggaggtggtcacactgaggggaggtagtgacactgtgagcagaggtgg +tcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtg +gtcacactgaggggaagtggtcacactgagggggggtggtcacactgaggggaggtggtc +atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg +tcacactgtgaagggtggtagtcacactgagaggaggtgttcacactgaggggaagtggt +cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc +atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc +acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca +tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcaca-- +ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactgtg +aggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga +ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgtgag +gggaggtggtcatactgtgagtgaggaggtagtcacac--tgaggggagggggtcatact +gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac +tgaggggaagtggtcacactgaggggaggtggtcatactg-------------------- +--------------------tgaggggaagtggtcacactgaggggaggtggtcatagtg +tga--ggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +c--tgaggggaggtggtcacactgaggggaggtggtcacagtgtgagaggaggtggtcac +actgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcaca +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcaca-------------------gt +gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacact +gaggggaggtg------------------------------------------gtcacac +tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca +ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg +tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggag----------- +------------------------------------------------------------ +------------------------------gtggtcacactgaggggaggtggtcacact +gaggggaggtggtcacagtgtga--ggggaggtggtcacactgaggggaggtggtcacag +tgtacggggaggtggtcatac--tgaggggaggtggtcacagtgtgaggggaggtggtca +cactgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcaca-c-tgaggagaggtggtcat +-------------------agtgtgaggggaggtggtcacac------------------ +------------------------------------------------------------ +-----------------------tgaggggaggtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca +gtg--------------------------------------tgaggggaggtggtcacac +tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat +tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg +tgcggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt +gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga +ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac +tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacactga +ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg +ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga +gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg +cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga +>ref_chr20:20354470-20358424 +tccagtctgggtgacagagagagaccttgtctc-aaaaaaaaaaaaaaaaaatcaggtac +agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata +agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatagtcacactgagg +gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac +tgtgtgaggggaggtggtcacactgtgagagaaaaagtaatcatactgtgaaagggaggt +ggtcacgctgagaggggaggtggtcacactgtgtcaggggaggtggtcacactgtgagag +aaaaagtaatcacactgtgaggggatgtggtcacactgagaggggaggtggtcagactgt +gtcagaggaggtggtcacactgagg----------------------------------- +-----------ggaggtggtcacactgtgagaggggaggtggtcacactgaggggaagtg +gtcacactgtgagaggggaggtggtcacactgaggggagatggtcacactgcaagagggg +aggtgatcacactgagggg-----------------------aggtggtcacactgtgag +agggaaggtggtcacactgtgagagggaaggtggtcacactgtgtgaggggaggtggtca +cactgtgaggggaggtggtcacattgtgagagggaagggggtcactgtgagaggggagaa +ggtcacactaaggggaggtagtcacactgtgaggggaggtgatcacactaaggggaggtg +gtcacactgtgagaggaggtggtcacactgtgaggggtggtcacactgtgaggggaggtc +acactgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggt +cacactgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtgg +tcacactgaggggagg-------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----tggtcacactgtgaggggaggtggtcacactgaggggaggtggtcacactgtgagg +ggaggtggtcacactgtgaggggaggtggtcacactgaggggaggtagtgacactgtgag +cagagatggtcacactgtgaggggagggggtcacactgtgagggggaaggtggtcacact +gtgagaggggagaaggtcacactaaggggaggtagtcacactgtgaggggaggtgatcac +actaaggggaggtggtcacactgtgagaggaggtggtcacactgtgaggggaggtcacac +tgaggggaggtagtgacactatgagcagagatggtcacactgtgaggggaggtggtcaca +ctgaggggaggtagtgacactgtgagcagagatggtcacactgtgaggggaggtggtcac +actgaggggaggtagtcacactgtgaggggaggtggtcacactgtgaggggaggtggtca +cactgaggggaggtagtgacactgtgagcagagatggtcacactgtaaggggaggtgatc +acactgtgaggggac---gtcacactgtgaggggaggtggtcacactgtgagaggggagg +tgaccacacaggggaggtggtcacactgaggggaggtagtgacactgtgagcagagatgg +tcacactgtgaggggaggtggtcacac--tgaggggaggtagtcacactgagaggaggtg +gtcacactgaggggaagtggtcacactgagggggggtggtcacactgaggggaggtggtc +atactgtgagtgaggagctagtcacactgaggggaggggtcatactgcgagtggaggtag +tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt +cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc +atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc +acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca +tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac +actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- +-ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga +ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag +gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact +gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac +tgagaggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg +tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca +ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac +actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcaca---------------------------------------- +------------------------------------------------------------ +----------------------------------------------------------gt +gtgcggggaggtggtcatactgaggggaggtggtcacactgaggggaggtggtcacactg +aggggaggtggtcatactgtgagt------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------gaggaggtagtcacactgaggggagggggtcatac +tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca +cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac +actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------actgaggggaggtggtcacagtgt +gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga +ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac +tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga +ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg +ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga +gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg +cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa new file mode 100644 index 00000000..285703c8 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa @@ -0,0 +1,155 @@ +>syndip_1_chr20:14861837-14862842 +gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt +attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac +attggaggttttatatataaatatatatatatatttatatatatatttatatatatatat +atttatatatatttatatatatatttatatatatatttatatatatatttatatatatat +ttatatatatatttatatatatatttatatatatatttatatatatatttatatatattt +atatatatttatatatatttttatatatttttatatatataaatatataaaaatatatat +aaatatataaaaatatataaaaatatataaatatatataaatatataaatatatataaat +atatataaatatatatttatatataaatatatataaatatatatttatacataaatatat +ataaatatatatttatacataaatatataaatatatataatatatataaatatatattat +acataaatatataaatatatattatacataaatatataaatatatataaatatatataaa +tatatatttatacataaatatataaatatatatataaatatatataaatatatataaata +tatatataaatatatatataaatatatataaatatatatataaatatatatataaatata +tataaatatatataaatatatatataaatatatataaatatatataaatatatatttata +tattaatatataaatatatataaatatatatttatatattaatatataaatatatatata +ttaatatatataaatatatataaatatatatttatatattaatatatatatattaatata +tatatatttatatattaatatatatatttatatattaatatatataaatatattactatg +tatatttatatatatttatataaatatatataaatatatgtaaatatatatataaaaata +tatattaatatataaaatatatatataaatataaataaatatatatataaatatatataa +ataaatatatatataaatatatataaataaatatataaatatatataaatatatatataa +atatatataaatatatataaatatatataaaaatatatataaatatatatataaatatat +ataaatatatatataaatatatatataaatatatatataaatatatataaatatatatat +aaatatatataaatatatatataattatatataaatatatataaatatatataaatatat +atataaatatatataaatatatataaatatatataaatatatatataaatatatataaat +atatatatataaatatatataaatatatatataaatatatataaatatatatatataaat +atatataaatatatatataaatatatataaatatatatataaatataaatatatatgtat +aaatatatatataaatataaatatatatgtataaatatatatataaatatatatatataa +atatatatatacatatataaatatatatataaatatatataaaaatatatatatatatat +ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct +tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca +gaag +>syndip_2_chr20:14861837-14862842 +gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt +attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac +attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat +atttatatatatt----------------------------------------------- +---------------tatatatatttatatatatatttttatatatatttatatatatt- +-tatatatttatatatatttatatatatttttatatatataaatatataaaa-------- +------------------------------------------------------------ +----------------------------atatataaatatatataaatatataaatatat +ataaatatatatttatacataaatatataaatatatataatatatataaatatatattat +acataaatatataaatatatattatacataaatatataaatatatataaatatatataaa +tatatatttatacataaatatataaatatat------atatatataaatatatataa--- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------atatatatttatatattaatatatataaatatattactatg +tatatttatatatatttatataa--atatataaatatatgtaaatatatatataaaaata +tatattaatatataaaa---------atatatatataaatatatatataaatatatataa +ataaatatatataaatatatatataaat--atatataaatatatataaatatatatataa +atatatataaatatatataaatatatatataaatatatatataaatatatataaatatat +ataaataaatatataaatatatatataaatatatataaatatatatataaatatatataa +atatatatataaatatatataaatatatatataaatatatataaat--------atatat +ataaatatatatataaatatatataaatatatataaatatatatatataaatatatataa +atatatatataaatataaatatatataaatatataaaaatatatatataaatataaatat +atataaatatatatgtataaatatatatataaatataaatatatataaatatatatatat +aaatatatatataaatatatataaatatatataaaaatatatataaatatatatataaat +atatatataaatatatataaatatatatataaatatatataaaaatatatatatattttt +ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct +tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca +gaag +>p:HG002_1_chr20:14861837-14862842 +gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt +attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac +attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat +atttatatatatt----------------------------------------------- +---------------tatatatatttatatatatatttttatatatatttatatatatt- +-tatatatttatatatatttatatatatttttatatatataaatatataaaa-------- +------------------------------------------------------------ +----------------------------atatataaatatatataaatatataaatatat +ataaatatatatttatacataaatatataaatatatataatatatataaatatatattat +acataaatatataaatatatattatacataaatatataaatatatataaatatatataaa +tatatatttatacataaatatataaatatat------atatatataaatatatataa--- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------atatatatttatatattaatatatataaatatattactatg +tatatttatatatatttatataa--atatataaatatatgtaaatatatatataaaaata +tatattaatatataaaa---------atatatatataaatatatatataaatatatataa +ataaatatatataaatatatatataaat--atatataaatatatataaatatatatataa +atatatataaatatatataaatatatatataaatatatatataaatatatataaatatat +ataaataaatatataaatatatatataaatatatataaatatatatataaatatatataa +atatatatataaatatatataaatatatatataaatatatataaat--------atatat +ataaatatatatataaatatatataaatatatataaatatatatatataaatatatataa +atatatatataaatataaatatatataaatatataaaaatatatatataaatataaatat +atataaatatatatgtataaatatatatataaatataaatatatataaatatatatatat +aaatatatatataaatatatataaatatatataaaaatatatataaatatatatataaat +atatatataaatatatataaatatatatataaatatatataaaaatatatatatattttt +ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct +tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca +gaag +>p:HG002_2_chr20:14861837-14862842 +gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt +attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac +attggaggttttatatataaatatatatatatatttatatatatatttatatatatatat +atttatatatatttatatatatatttatatatatatttatatatatatttatatatatat +ttatatatatatttatatatatatttatatatatatttatatatatatttatatatattt +atatatatttatatatatttttatatatttttatatatataaatatataaaaatatatat +aaatatataaaaatatataaaaatatataaatatatataaatatataaatatatataaat +atatataaatatatatttatatataaatatatataaatatatatttatacataaatatat +ataaatatatatttatacataaatatataaatatatataatatatataaatatatattat +acataaatatataaatatatattatacataaatatataaatatatataaatatatataaa +tatatatttatacataaatatataaatatatatataaatatatataaatatatataaata +tatatataaatatatatataaatatatataaatatatatataaatatatatataaatata +tataaatatatataaatatatatataaatatatataaatatatataaatatatatttata +tattaatatataaatatatataaatatatatttatatattaatatataaatatatatata +ttaatatatataaatatatataaatatatatttatatattaatatatatatattaatata +tatatatttatatattaatatatatatttatatattaatatatataaatatattactatg +tatatttatatatatttatataaatatatataaatatatgtaaatatatatataaaaata +tatattaatatataaaatatatatataaatataaataaatatatatataaatatatataa +ataaatatatatataaatatatataaataaatatataaatatatataaatatatatataa +atatatataaatatatataaatatatataaaaatatatataaatatatatataaatatat +ataaatatatatataaatatatatataaatatatatataaatatatataaatatatatat +aaatatatataaatatatatataattatatataaatatatataaatatatataaatatat +atataaatatatataaatatatataaatatatataaatatatatataaatatatataaat +atatatatataaatatatataaatatatatataaatatatataaatatatatatataaat +atatataaatatatatataaatatatataaatatatatataaatataaatatatatgtat +aaatatatatataaatataaatatatatgtataaatatatatataaatatatatatataa +atatatatatacatatataaatatatatataaatatatataaaaatatatatatatatat +ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct +tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca +gaag +>ref_chr20:14861837-14862842 +gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt +attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac +attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat +atttatatatatt----------------------------------------------- +---------------tatatatatttatatatatatttatat------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------atatataaatatatataaatatataaatatat +ataaatatatatttatacataaatatataaatatata----------------------- +----atatatataaatatatattatacataaatatataaatatatataaatatatataaa +tatatatttatacataaatatataaatatat------atatatataaatatatataa--- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------atatatatttatatattaatatatataaatatattactatg +tatatttatatatatttatataaatatatataaatatatgtaaatatatatataaaaata +tatattaatatataaaa---------atatatat-------------------------- +ataaatatatataaatatatatataaat--atatataaat-------------------- +------------------------------------------------------------ +--------------------------------------atatatatataaatatatataa +atatatatataaatatatataaatatatatataaatatatataaat--------atatat +atataaatatatataa----------------------atatatatataaatatatataa +atatatatataaatataaatatatataaatatataaaaatatatatataaatataaatat +atataaatatatatgtataaatatatatataaatataaatatatataaatatatatatat +aaatatatatataaat------------------------------atatatatataaat +atatatataaatatatataaatatatatataaatatatataaatatatatatatatattt +ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct +tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca +gaag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa new file mode 100644 index 00000000..79c1ab78 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa @@ -0,0 +1,225 @@ +>syndip_1_chr20:16257641-16259446 +ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa +gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata +cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat +cccaaagagacacaccccttccccatcccaaagagacacaccccttccccatcccaaaca +cacacaccccttccccatcccaaacacacacaccccttccccatcccaaacacacacacc +cctttcccatcccaaacacacacacccattgcccatcctaaacacacacacaccttccct +tcgcaaacacacacaccccttccccatcacaaacacacacaccctttccctatcccatgc +acacacaccttccacttcccaaacacacacaccccttccccatcccaaacacacacaacc +cttccccaattctaaacacacacacctcttccccatcccaaacacaaacaccccttcccc +atcccaaacacacccaccctttcctgcatcccaaacacccacaccccttccccatcccaa +acacacaccccatccccaattccaaacacacacaccccttccccatcccaaacacacaca +ccccttccacatcagaaacacacacaccccttctcatcccaaacacaaacacacccattc +cccatcacaaacacacg--------------------------caccccttccccatccc +aaacacacacaccccttccccaattctaaacacacacaccccttccccatcccaaacaca +caccccccttccccatcccaaacacacaccccccttccccatcccaaacacacccacccc +ttcccacatcccaaacacacacaccccttccctatcccaaacacacactccatccccaat +tccaaacacacacaccccttccccatcccaaacacacacaccccttcccccatcccaaac +gcacacaccccttccctatcccaaacacacaccccttccctttccaaaacacacccatcc +cttccccatcccaaacacacacaacccttcccatcccagacacacccaccccttcccatc +tcaaatacacgcaacccttccccatcccaaacacacacacctattccccatccgaaacac +acacaccccttcctcatcccacacacacccattccccatcccagacacacccatcccttc +cccatactaaacacaaccaccccttccccatcccacacacacacaccccttccccaattc +caaacacacacaccccttccccattcaaaacacacgcaccccttcccatctgaaacacac +ccactccttcccatctgaaacacacccactccttcccatcccaaacacacccactccttc +ccatcccaaacacacacaccccttccccatcccaaaaacacacaccctttcgcaatccca +aatacacaagccccttcccaaattccacacacacacacctcttccccatcccaaacaaac +ccaccctttccccattcgaaacacacacaccccttctccaattccaaacacacacacccc +ttcccatctcaaacacacggacaccttccacatcccaaatacacacaccccttcccatcc +caaacacacacatcccttccccaattccaaaaacacacaccccttccccatcccaaacac +ccccaccccttcccatccccaaaacacacacccctttcccatcccaaatacacaaacccc +ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat +cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac +acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc +cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat +cccaaacacacacccattccccatcccagacacacccacccgttccccatcccagacaca +cacaacccttccccaattccaaacacacaccccttcccatcccaaacacacacacccatt +tcccatcccagacacatccaccccttccacaactctgaacacccaccccttccccatccc +aaacacacacaccccttccccatcccaaacacacacacccattccccatcccaaacacac +acacccattccccatcccaaacacaaccaccccttccccatcccaaacacacacacccct +tccccatcccaaacacacacaca-------------------------ccttccccatcc +caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata +cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact +cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag +>syndip_2_chr20:16257641-16259446 +ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa +gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata +cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat +cccaaagagacacaccccttccccatcccaaagag------------------------- +-acacaccccttccccatcccaaacacacacaccccttccccatcccaaacacacacacc +cctttcccatcccaaacacacacacccattgcccatcctaaacacacacacaccttccct +tcgcaaacacacacaccccttccccatcacaaacacacacaccctttccctatcccatgc +acacacaccttccacttcccaaacacacacaccccttccccatcccaaacacacacaacc +cttccccaattctaaacacacacacctcttccccatcccaaacacaaacaccccttcccc +atcccaaacacacccaccctttcctgcatcccaaacacacacaccccttccccatcccaa +acacacaccccatccccaattccaaacacacacaccccttccccatcccaaacacacaca +ccccttccacatcagaaacacacacaccccttctcatcccaaacacaaacacacccattc +cccatcacaaacacacgcaccccttccccatcccaaacatacacaccccttccccatccc +aaacacacacaccccttccccaattctaaacacacacaccccttccccatcccaaacaca +caccccccttccccatcccaaacacacaccccccttccccatcccaaacacacccacccc +ttcccacatcccaaacacacacaccccttccctatcccaaacacacactccatccccaat +tccaaacacacacaccccttccccatcccaaacacacacaccccttcccccatcccaaac +gcacacaccccttccctatcccaaacacacaccccttccctttccaaaacacacccatcc +cttccccatcccaaacacacacaacccttcccatcccagacacacccaccccttcccatc +tcaaatacacgcaacccttc---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------------------c +ccatcccaaacacacacaccccttccccatcccaaaaacacacaccctttcgcaatccca +aatacacaagccccttcccaaattccacacacacacacctcttccccatcccaaacaaac +ccaccctttccccattcgaaacacacacaccccttctccaattccaaacacacacacccc +ttcccatctcaaacacacggacaccttccacatcccaaatacacacaccccttcccatcc +caaacacacacatcccttccccaattccaaaaacacacaccccttccccatcccaaacac +ccccaccccttcccatccccaaaacacacacccctttcccatcccaaatacacaaacccc +ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat +cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac +acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc +cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat +cccaaacacacacccattccccatcccagacacacccacccgttccccatcccagacaca +cacaacccttccccaattccaaacacacaccccttcccatcccaaacacacacacccatt +tcccatcccagacacatccaccccttccacaactccgaacacccaccccttccccatccc +aaacacacacaccccttccccatcccaaacacacacacccattccccatcccaaacacac +acacccattccccatcccaaacacaaccaccccttccccatcccaaacacacacacccct +tccccatcccaaacacacacacaccttcccatcccaaacacacacaccccttccccatcc +caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata +cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact +cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag +>p:HG002_1_chr20:16257641-16259446 +ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa +gtctgtttctaagcagcagcctgatggatccatttttataatctgaaatcagatatcata +cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat +cccaaagagacacaccccttccccatcccaaagag------------------------- +-acacaccccttccccatcccaaacacacacaccccttccccatcccaaacacacacacc +cctttcccatcccaaacacacacacccattgcccatcctaaacacacacacaccttccct +tcgcaaacacacacaccccttccccatcacaaacacacacaccctttccctatcccatgc +acacacaccttccacttcccaaacacacacaccccttccccatcccaaacacacacaacc +cttccccaattctaaacacacacacctcttccccatcccaaacacaaacaccccttcccc +atcccaaacacacccaccctttcctgcatcccaaacacacacaccccttccccatcccaa +acacacaccccatccccaattccaaacacacacaccccttccccatcccaaacacacaca +ccccttccacatcagaaacacacacaccccttctcatcccaaacacaaacacacccattc +cccatcacaaacacacgcaccccttccccatcccaaacatacacaccccttccccatccc +aaacacacacaccccttccccaattctaaacacacacaccccttccccatcccaaacaca +caccccccttccccatcccaaacacacaccccccttccccatcccaaacacacccacccc +ttcccacatcccaaacacacacaccccttccctatcccaaacacacactccatccccaat +tccaaacacacacaccccttccccatcccaaacacacacaccccttcccccatcccaaac +gcacacaccccttccctatcccaaacacacaccccttccctttccaaaacacacccatcc +cttccccatcccaaacacacacaacccttcccatcccagacacacccaccccttcccatc +tcaaatacacgcaacccttc---------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------------------------c +ccatcccaaacacacacaccccttccccatcccaaaaacacacaccctttcgcaatccca +aatacacaagccccttcccaaattccacacacacacacctcttccccatcccaaacaaac +ccaccctttccccattcgaaacacacacaccccttctccaattccaaacacacacacccc +ttcccatctcaaacacacggacaccttccacatcccaaatacacacaccccttcccatcc +caaacacacacatcccttccccaattccaaaaacacacaccccttccccatcccaaacac +ccccaccccttcccatccccaaaacacacacccctttcccatcccaaatacacaaacccc +ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat +cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac +acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc +cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat +cccaaacacacacccattccccatcccagacacacccacccgttccccatcc-------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------- +>p:HG002_2_chr20:16257641-16259446 +ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa +gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata +cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat +cccaaagagacacaccccttccccatcccaaagagacacaccccttccccatcccaaaca +cacacaccccttccccatcccaaacacacacaccccttccccatcccaaacacacacacc +cctttcccatcccaaacacacacacccattgcccatcctaaacacacacacaccttccct +tcgcaaacacacacaccccttccccatcacaaacacacacaccctttccctatcccatgc +acacacaccttccacttcccaaacacacacaccccttccccatcccaaacacacacaacc +cttccccaattctaaacacacacacctcttccccatcccaaacacaaacaccccttcccc +atcccaaacacacccaccctttcctgcatcccaaacacccacaccccttccccatcccaa +acacacaccccatccccaattccaaacacacacaccccttccccatcccaaacacacaca +ccccttccacatcagaaacacacacaccccttctcatcccaaacacaaacacacccattc +cccatcacaaacacacg--------------------------caccccttccccatccc +aaacacacacaccccttccccaattctaaacacacacaccccttccccatcccaaacaca +caccccccttccccatcccaaacacacaccccccttccccatcccaaacacacccacccc +ttcccacatcccaaacacacacaccccttccctatcccaaacacacactccatccccaat +tccaaacacacacaccccttccccatcccaaacacacacaccccttcccccatcccaaac +gcacacaccccttccctatcccaaacacacaccccttccctttccaaaacacacccatcc +cttccccatcccaaacacacacaacccttcccatcccagacacacccaccccttcccatc +tcaaatacacgcaacccttccccatcccaaacacacacacctattccccatccgaaacac +acacaccccttcctcatcccacacacacccattccccatcccagacacacccatcccttc +cccatactaaacacaaccaccccttccccatcccacacacacacaccccttccccaattc +caaacacacacaccccttccccattcaaaacacacgcaccccttcccatctgaaacacac +ccactccttcccatctgaaacacacccactccttcccatcccaaacacacccactccttc +ccatcccaaacacacacaccccttccccatcccaaaaacacacaccctttcgcaatccca +aatacacaagccccttcccaaattccacacacacacacctcttccccatcccaaacaaac +ccaccctttccccattcgaaacacacacaccccttctccaattccaaacacacacacccc +ttcccatctcaaacacacggacaccttccacatcccaaatacacacaccccttcccatcc +caaacacacacatcccttccccaattccaaaaacacacaccccttccccatcccaaacac +ccccaccccttcccatccccaaaacacacacccctttcccatcccaaatacacaaacccc +ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat +cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac +acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc +cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat +cccaaacacacacccattccccatcccagacacacccacccgttccccatcccagacaca +cacaacccttccccaattccaaacacacaccccttcccatcccaaacacacacacccatt +tcccatcccagacacatccaccccttccacaactctgaacacccaccccttccccatccc +aaacacacacaccccttccccatcccaaacacacacacccattccccatcccaaacacac +acacccattccccatcccaaacacaaccaccccttccccatcccaaacacacacacccct +tccccatcccaaacacacacaca-------------------------ccttccccatcc +caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata +cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact +cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag +>ref_chr20:16257641-16259446 +ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa +gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata +cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat +cccaaagagacacaccccttccccatcccaaagagacacaccccttccccatcccaaaca +cacacaccccttccccatcccaaacacacacaccccttccccatcccaaacacacacacc +cctttcccatcccaaacacacacacccattgcccatcctaaacacacacacaccttgcct +tcgcaaacacacacaccccttccccatcacaaacacacacaccctttccctatcccatgc +acacacaccttccacttcccaaacacacacaccccttccccatgccaaacacacacaacc +cttccccaattctaaacacacacacctcttccccatcccaaacacaaacaccccttcccc +atcccaaacacacccaccctttcctgcatgccaaacacccacaccccttccccatcccaa +acacacaccccatccccaattccaaacacacacaccccttccccatcccaaacacacaca +ccccttccacatgagaaacacacacaccccttctcatcccaaacacaaacacacccattc +cccatcacaaacacacg--------------------------caccccttccccatccc +aaacacacacaccccttccccaattgt--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------aaacacacacaccccttctccaattccaaacacacacacccc +ttcccatcacaaacacacacacaccttccacatcccaaatacacacaccccttcccatct +caaacacacacatcccttccccaattccaaaaacacacaccccttccccatcccaaacac +ccccaccccttcccatcccc-aaacacacacccctttcccatcccaaatacacaaacccc +ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat +cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac +acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc +cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat +cccaaacacacacccattccccatcccagacacacccacccgttccccatcccagacaca +cacaacccttccccaattccaaacacacaccccttcccatcccaaacacacacacccatt +tcccatcccagacacatccaccccttccacaactctgaacacccaccccttccccatccc +aaacacacacaccccttccccatcccaaacacacacacccattccccatcccaaacacac +acacccattccccatcccaaacacaaccaccccttccccatcccaaacacacacacccct +tccccatcccaaacacacacaca-------------------------ccttccccatcc +caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata +cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact +cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa new file mode 100644 index 00000000..a00f7abb --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa @@ -0,0 +1,205 @@ +>syndip_1_chr20:63948414-63948859 +acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca +agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc +catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga +-ggggcgtgtgtgagagggaggggcgtgtgtgagggaggggtgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgagaggcagggg-gcgtgtgtgtgaggggcgtg +tgtgagagggaggggcgtgtgtgagagggagggg-gcgtgtgtgagggggggcgtgtgtg +agaggtaggggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagag +gg-----------------aggggcgtgtgtgtgagggaggggcgtgtgagag-ggaggg +ggcgtgtgtgagaggcaggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgt +gtgagagggagggggcgtgtgtgagggggggcgtgtgtgagaggtaggggcgtgtgt--- +-gggagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgaga +cggaggggcgtgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagg +gaggggcgtgtgtgagggagggg---gcgtgtgagagggaggggcgtgtgtgagagggag +ggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagggaggggg +cgtgtgtgagagggaggggcgtgtgagaggg-aggggcgtgtgtgagagg---------- +gagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgaaggga +ggggcgtgtgtgagagggagggggcgtgagagggaggggcgtgtgtgagagggaggggcg +tgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtga +gag--------ggagggggcgtgtgagagggagggggcgtgtgtgagagggagaggcgtg +tgtgagaggga-ggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtg +agagggagggggcgtgtgtgagagggagggggcgtgt-------------gtgagaggga +gaggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagagggagagg +cgtgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcg +tgtgtgagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgt +gtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtg +agagggaggggcgtgtgtgagagggag--------------ggggcgtgtgtgagaggga +ggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgagaggga-g +gggcgtgtgtgagagggaggggcgtgtgtgaagggaggggcgtgtgtgagagtgggggcg +tgtgtgagttaggggcgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgaga +gggagggggcgtg-----agagggaggggcgtgtgtgagagggaggggcgtgtgtgagag +ggaggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagaggg +aggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggg +-gcgtgtgagagggaggggcgtgtgtgagagggaggggc--gtgtgagagggaggggcgt +gtgtgagaggga-ggggcgtgtgtgagagggagggg-cgtgtgtgagagggaggggcgtg +agagggagggggcgtgtgt--gagggaggggcgtgtgtgagaggga-ggggggcgtgtgt +gagagggagggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa +gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc +tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt +gccactttgttgccataaaccaaaccgtagtctgt +>syndip_2_chr20:63948414-63948859 +acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca +agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc +catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga +gggggcgtgtgtgagagggaggggcgtgtgtgagggaggggtgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgaaagggaggggcgtgtgtgagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagggagggggcgtgtgtg +agagggaggggcgtgtgtgagaggcaggggcgtgtgtgagagtgggggcgtgtgtgaggg +ggggcgtgtgtgagagggagggggcgtgtgtgagaggtaggggcgtgtgtgagagggagg +ggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgt +gtgag-gggggggcgtgtgtgagagggagggggcgtgtgagagggaggggcgtgtgtgag +agggagggggcgtgtgtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtga-- +-agggaggggtgtgtgtgagagtgggggcgtgtgtgagaggg--------gcgtgagagg +taggggcgtgtgtgagagggaggggtgtgtgtgagagggaggggcgtgtgtgagaggga- +ggggcgtgtgtgag-ggagggggcgtgtgtgagagggaggggcgtgtgtgagggaggggg +cgtgtgtgagagggaggggcgtgtgtgagagtgggggcgtgtgtgggggggcgtgtgtga +gggggggcgtgtgtgagaggta-ggggcgtgtgtgagagggaggggcgtgtgtg-aggga +ggggcgtgtgtgagaggcaggggcgtgtgtgggggggggcgtgtgt--gaggggggggcg +tgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgagg +ggggcgtgtgagagggaggggcgtgtgtggggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtg +tgaagggaggggtgtgtgtgagagtgggggcgtgtgtgagagttaggggcgtgagaggta +ggggcgtgtgtgagagggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcg +tgtgtgagggagggggcgtgtgcgagagggaggggcgtgtgtgagagggagggggcgtgt +gtgag-ggagggggcgtgtgtgagagggaggggcgtgtgagag-ggagggggcgtgtgtg +agaggcaggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgtgtga---ggg +ggggcgtgtgtgagaggtaggggcgtgtgtgagagggagggggcgtgtgtgagagggagg +gggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgt--------------- +----------------gtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgaga +gggagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtga-ag +ggaggggcgtgtgtgagagtgggggcgtgtgtgagaggg----------gcgtgagaggt +aggggcgtgtgtgagagggagggg-gcgtgagagggaggggcgtgtgtgagagggagggg +cgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgt +gtgtgagagggagggggcgtgtgtgagagggagggg-cgtgt---------------gtg +agagggaaggggcgtgtga--gagggaggggcgtgtgtgagaggga-ggggcgtgtgtga +ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa +gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc +tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt +gccactttgttgccataaaccaaaccgtagtctgt +>p:HG002_1_chr20:63948414-63948859 +acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca +agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc +catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga +gggggcgtgtgtgagagggaggggcgtgtgtgagggaggggtgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgaaagggaggggcgtgtgtgagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagggagggggcgtgtgtg +agagggaggggcgtgtgtgagaggcaggggcgtgtgtgagagtgggggcgtgtgtgaggg +ggggcgtgtgtgagagggagggggcgtgtgtgagaggtaggggcgtgtgtgagagggagg +ggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgt +gtgag-gggggggcgtgtgtgagagggagggggcgtgtgagagggaggggcgtgtgtgag +agggagggggcgtgtgtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtga-- +-agggaggggtgtgtgtgagagtgggggcgtgtgtgagaggg--------gcgtgagagg +taggggcgtgtgtgagagggaggggtgtgtgtgagagggaggggcgtgtgtgagaggga- +ggggcgtgtgtgag-ggagggggcgtgtgtgagagggaggggcgtgtgtgagggaggggg +cgtgtgtgagagggaggggcgtgtgtgagagtgggggcgtgtgtgggggggcgtgtgtga +gggggggcgtgtgtgagaggta-ggggcgtgtgtgagagggaggggcgtgtgtg-aggga +ggggcgtgtgtgagaggcaggggcgtgtgtgggggggggcgtgtgt--gaggggggggcg +tgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgagg +ggggcgtgtgagagggaggggcgtgtgtggggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtg +tgaagggaggggtgtgtgtgagagtgggggcgtgtgtgagagttaggggcgtgagaggta +ggggcgtgtgtgagagggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcg +tgtgtgagggagggggcgtgtgcgagagggaggggcgtgtgtgagagggagggggcgtgt +gtgag-ggagggggcgtgtgtgagagggaggggcgtgtgagag-ggagggggcgtgtgtg +agaggcaggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgtgtga---ggg +ggggcgtgtgtgagaggtaggggcgtgtgtgagagggagggggcgtgtgtgagagggagg +gggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgt--------------- +----------------gtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgaga +gggagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtga-ag +ggaggggcgtgtgtgagagtgggggcgtgtgtgagaggg----------gcgtgagaggt +aggggcgtgtgtgagagggagggg-gcgtgagagggaggggcgtgtgtgagagggagggg +cgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgt +gtgtgagagggagggggcgtgtgtgagagggagggg-cgtgt---------------gtg +agagggaaggggcgtgtga--gagggaggggcgtgtgtgagaggga-ggggcgtgtgtga +ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa +gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc +tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt +gccactttgttgccataaaccaaaccgtagtctgt +>p:HG002_2_chr20:63948414-63948859 +acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca +agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc +catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga +-ggggcgtgtgtgagagggaggggcgtgtgtgagggaggggtgtgtgtgagagggagggg +cgtgtgtgagagggagggggcgtgtgtgagaggcagggg-gcgtgtgtgtgaggggcgtg +tgtgagagggaggggcgtgtgtgagagggagggg-gcgtgtgtgagggggggcgtgtgtg +agaggtaggggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagag +gg-----------------aggggcgtgtgtgtgagggaggggcgtgtgagag-ggaggg +ggcgtgtgtgagaggcaggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgt +gtgagagggagggggcgtgtgtgagggggggcgtgtgtgagaggtaggggcgtgtgt--- +-gggagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgaga +cggaggggcgtgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagg +gaggggcgtgtgtgagggagggg---gcgtgtgagagggaggggcgtgtgtgagagggag +ggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagggaggggg +cgtgtgtgagagggaggggcgtgtgagaggg-aggggcgtgtgtgagagg---------- +gagggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgaaggga +ggggcgtgtgtgagagggagggggcgtgagagggaggggcgtgtgtgagagggaggggcg +tgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg +tgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtga +gag--------ggagggggcgtgtgagagggagggggcgtgtgtgagagggagaggcgtg +tgtgagaggga-ggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtg +agagggagggggcgtgtgtgagagggagggggcgtgt-------------gtgagaggga +gaggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagagggagagg +cgtgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcg +tgtgtgagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgt +gtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtg +agagggaggggcgtgtgtgagagggag--------------ggggcgtgtgtgagaggga +ggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgagaggga-g +gggcgtgtgtgagagggaggggcgtgtgtgaagggaggggcgtgtgtgagagtgggggcg +tgtgtgagttaggggcgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgaga +gggagggggcgtg-----agagggaggggcgtgtgtgagagggaggggcgtgtgtgagag +ggaggggcgtgtgtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagaggg +aggggcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggg +-gcgtgtgagagggaggggcgtgtgtgagagggaggggc--gtgtgagagggaggggcgt +gtgtgagaggga-ggggcgtgtgtgagagggagggg-cgtgtgtgagagggaggggcgtg +agagggagggggcgtgtgt--gagggaggggcgtgtgtgagaggga-ggggggcgtgtgt +gagagggagggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa +gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc +tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt +gccactttgttgccataaaccaaaccgtagtctgt +>ref_chr20:63948414-63948859 +acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca +agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc +catgcgcctcg------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--gtgtgaggagggaggcgtgtgtgagagggaggggatgtgt---------------gtg +agagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaaggggcgtgtgtga +ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa +gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc +tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt +gccactttgttgccataaaccaaaccgtagtctgt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_a3b02bde8a503aab8bb0da7f33f98a9d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_a3b02bde8a503aab8bb0da7f33f98a9d.msa new file mode 100644 index 00000000..21ba1dec --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_a3b02bde8a503aab8bb0da7f33f98a9d.msa @@ -0,0 +1,130 @@ +>syndip_1_chr20:63028776-63029503 +caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg +agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca +aacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg +cctccgtgtgcaggtcccccgggcctccgcctctccgtgtgcaggtcccccgggcctccg +cgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctc +cgcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcc +tccgcgtctgtgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccggg +cctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccg +ggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccc +cgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctccgtgtgcaggtcc +cccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggt +cccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcag +gtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgc +aggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgt +gcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgt +gtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctcc +gtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcct +ccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgc +ctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgc +gcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctcc +gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct +ccgcgcctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg +ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt +catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca +t +>syndip_2_chr20:63028776-63029503 +caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg +agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca +aa---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------cgcac +ccga-------------------------------------------------------- +------------------------------------------------------------ +--------gtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcct +ccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgc +ctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgc +gcctccgtgtgcaggtcccccgggcctccgcgtctccgtgtgcaggtcccccgggcctcc +gcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct +ccgcgtctccgtgtgcaggtcccccgggcccctgcgtctctgtgtgcaggctcaagtttg +ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt +catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca +t +>p:HG002_1_chr20:63028776-63029503 +caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg +agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca +aa---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------cgcac +ccga-------------------------------------------------------- +------------------------------------------------------------ +--------gtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcct +ccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgc +ctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgc +gcctccgtgtgcaggtcccccgggcctccgcgtctccgtgtgcaggtcccccgggcctcc +gcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct +ccgcgtctccgtgtgcaggtcccccgggcccctgcgtctctgtgtgcaggctcaagtttg +ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt +catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca +t +>p:HG002_2_chr20:63028776-63029503 +caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg +agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca +aacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg +cctccgtgtgcaggtcccccgggcctccgcctctccgtgtgcaggtcccccgggcctccg +cgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctc +cgcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcc +tccgcgtctgtgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccggg +cctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccg +ggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccc +cgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctccgtgtgcaggtcc +cccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggt +cccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcag +gtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgc +aggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgt +gcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgt +gtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctcc +gtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcct +ccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgc +ctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgc +gcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctcc +gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct +ccgcgcctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg +ccaacgtccatgcacgtctcagcctctcagcctggactggacaactcggcttcgggaatt +catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgaattca +t +>ref_chr20:63028776-63029503 +caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg +agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca +aa---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------cgcac +ccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgt +gtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctccgcgcctcc +gtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcct +ccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgc +ctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgc +gcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctcc +gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct +ccgcgtctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg +ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt +catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca +t diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa new file mode 100644 index 00000000..a5d29a6d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa @@ -0,0 +1,660 @@ +>syndip_1_chr20:63693225-63693985 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc +accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc +accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc +accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc +acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc +accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc +acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc +acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc +tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc +acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc +acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccacca---ccacctccaccaccaccaccaccaccaccacctccacc +tccaccacctccaccaccaccaccaccacctccacctccaccacctccaccacctccacc +accaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccacctccacctccacctccaccaccacctcc +acctccaccaccacctccaccaccacctccacctccaccacctccacctccacctccacc +acctccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctcc +acctccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccacc +accacctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc +tccacctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctcc +accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc +tccaccacctccaccacctccacctccaccacctccacctccacctccaccacctccacc +accaccaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacc +tccaccacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacc +tccaccaccacctccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacc +tccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccacc +accaccacctccaccaccacctccaccacctccaccaccacctccacctccaccacctcc +accaccacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctcc +accaccacctccacctccaccacctccacctccacctccaccacctccaccaccaccacc +accaccaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacc +tccacctccaccacctccaccaccacctccacctccaccacctccacctccacctccacc +acctccacctccaccacctccacctccacctccaccacctccaccaccaccacctccacc +accacctccaccacctccaccaccacctccacctccaccacctccaccaccacctccacc +accacctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacc +tccaccacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccacc +accaccacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctcc +accaccaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacc +accaccacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacc +tccacctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccacc +accaccacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccacc +accaccaccacctccaccacctccaccacctccaccaccacctccaccacctccaccacc +accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc +acctccaccacctccacctccaccacctccacctccaccacctccacctccacctccacc +acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc +acctccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctcc +accaccacctccaccaccacctccacctccacctccaccacctccaccaccacctccacc +acctccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctct +accacctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccacc +accaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctacc +acctccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctcc +accaccaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccacc +accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctcc +acctccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc +accacctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccacc +acctccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctcc +accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc +accacctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctcc +accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc +accacctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccacc +accaccaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccacc +accacctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc +acctccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccacc +accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctct +accaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctcc +acctccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccacc +acctccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc +acctccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccacc +accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc +accaccaccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc +accacctccacctccaccacctccaccaccacctccaccaccaccacctccacctccacc +accaccacctccaccaccaccacctccacctccaccaccaccacctccaccacctccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc +accacctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc +tccaccacctccaccaccaccaccaccacctccacctccacctccaccacctccaccacc +accacctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacc +tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc +accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc +acctccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccacc +accaccaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacc +tccaccaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctcc +accaccaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctcc +acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc +tccaccacctccaccaccaccacctccaccaccaccacctccacctccaccacctccacc +acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc +tctaccaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctcc +accacctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacc +tccacctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc +accaccaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctcc +accaccacctccacctccaccaccaccacctccacctccacctccaccaccaccaccacc +acctccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccacc +accacctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc +tccaccaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccacc +accaccaccacctccacctccacctccaccaccaccacctctaccaccacctccaccacc +accaccaccaccaccaccaccacctccacctccacctccaccacctccacctccaccacc +accaccaccaccaccacctccaccaccacctccacctccacctccaccacctccacctcc +accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccacc +acctctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccacc +acctccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccacc +accaccaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccacc +accaccacctccaccacctccaccaccacctccacctccaccaccaccacctccaccacc +accacctccacctccaccaccaccacctccacctccacctccaccacctccacctccacc +acctccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacc +tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc +acctccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccacc +accacctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacc +tccaccaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacc +tccaccaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctcc +accaccaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgc +accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact +tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc +taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg +tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct +gtttggggtggagtccaagtctc +>syndip_2_chr20:63693225-63693985 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- +-------------------------------cctccacctccaccacct----------- +----------------ccacctccacctccaccaccacctccacctccacctccacctcc +acctccaccaccaccaccaccacctccaccacctccacctccacctccacctccacctcc +acctccacctccacctccaccacgtcca---------------------ccacctccacc +accacctccaccaccacctccacctccacctccacctccaccaccacctccaccaccacc +tccaccaccacct------------------ccaccaccacctccaccaccaccaccacc +accaccacctccaccacctccaccacctccacctccacctccaccaccaccacctccacc +acctccaccacctccaccaccaccaccaccaccacctccacctccaccacct-------- +----------------------------------------ccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacct-------------------------- +-------------------------------ccaccacctccaccacctccacctccacc +acctccaccacctccaccaccaccaccaccaccacctccaccaccacctccaccacc--- +---------acctccaccaccacctccaccaccacctccaccacctccacctccacctcc +accacctccaccacctccaccaccaccaccaccaccaccacctccacca----------- +-------------------------------ccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacctccacc +tccaccacctccacctccacctccaccacctccacctccacctccaccaccacctccacc +acctccaccacctccaccacctccacctccaccacctccaccacctccaccaccaccacc +accaccacctccaccaccacctccaccaccacctccaccaccacctccaccaccacct-- +------------------------------------------------------------ +----------------------ccaccacctccacctccacctccaccacctccacctcc +acctccaccaccaccacctccacct---------ccaccaccacctccaccaccaccacc +tccacctccaccacctccaccaccagcaccacctccaccaccaccacctccacctccacc +tccacctccacctccacctccacctccaccaccacctccacctccaccacctccacctcc +accaccaccaccaccaccaccaccaccaccacctccacctccaccaccacctccaccacc +tccaccacctccaccaccaccacctccaccacctccacct-------------------- +-------ccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc +tccaccacct---ccaccacctccaccaccaccaccaccacctccaccacctccaccacc +accacctccacctccacctccaccaccaccaccaccacctccaccacctccaccaccacc +acctccacctccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc +tccaccaccaccacctccaccaccacctccacca---------------------ccacc +acctccacctccaccaccacctccaccacctccaccacctccaccacctccacctccacc +tccaccacctccaccaccacctccaccacctccaccaccacctccaccacctccacctcc +acctccacct---------------------ccacctccaccaccacctccaccacctcc +accaccacctccacctccaccacctccacctccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccaccacca----------------------------- +-ccacctccaccacctccaccaccacctccaccacctccacctccacctccacct----- +-------------ccacctccaccaccacctccaccacct---ccaccaccacctccacc +accacctccaccacctccacctccacctccacct-------------------------- +----------------ccacctccaccaccacctccaccacctccaccaccacctccacc +tccaccacctccacctccacctccaccacctccaccaccacctccaccacctccaccacc +acctccacct-------------------------------------------------- +----------ccaccacctccaccaccacctccaccacctccaccacctccaccaccacc +accaccacct-------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------ccaccacctccaccaccacctccaccacctccacctccacctcc +acct---------------------------------ccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacc +tc---------------------------------------------------------- +------------------------------------------------------------ +-----------------------------------caccacctccaccaccacctccacc +acctccaccacctccaccaccaccacc--------------------------------- +accacctccaccacctccaccaccacctccaccaccaccacctccacctccacctccacc +acctccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccacc +acctcca----------------------------------------------------- +------------------------------------------------------------ +-------------------------------ccacctccacctccaccaccaccacctcc +accacctccaccaccacctccaccacct-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------ccacctccacctccacctccacctccaccaccacctcc +accacctccaccaccacctccacctccaccacctccacctccacctccaccacctccacc +tccacctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacc +a----------------------------------------------------------- +-------------------------------------------------------cctcc +accacctccacctccacctccacctccacctccaccacctccacctccacctccaccacc +tccacctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccacc +acct-------------------------------------------------------- +------------------------------------------------------------ +-------------------------ccaccaccacctccacctccaccaccacctccacc +accacctccaccaccaccacct---------------ccacctccaccaccaccacctcc +acctccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacc +tccacctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacc +tccaccaccaccaccaccaccacct---------------------ccaccaccacctcc +acctccacctccacctccaccaccacctccaccaccacctccacca-------------- +------------------------------------------------------------ +----------------------------------------------------cctccacc +accacctccacctccaccacctccacctccacctccaccacctccacctccacctccacc +acctccacct-------------------------------------------------- +----------------ccacctccacctccaccaccaccacct----------------- +----------------------------------ccacctccacctccacctccaccacg +tccaccacctccaccacctccacctccacctccacctccacctccaccacct-------- +----------------------------------ccacctccacctccaccacctccacc +tccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcc +a---------------------ccaccacctccacctccaccaccacctccaccaccacc +tccaccaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccacc +acca-------------------------------------------------------- +----------------------------------------------------------cc +tccaccaccacctccacctccaccaccacctccacctccaccacctccaccaccaccacc +acctccacctccacca------------------ccaccacctccacctccacctccacc +tccaccaccacctccaccaccacctccaccaccaccacctccacctccacca-------- +----ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc +acctccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc +tccacctccacctccacctccacctccaccacctccacctccacctccaccaccaccacc +acctccaccacct----------------------------------------------- +----ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccacc +acctccaccacca---------------------------------cctccaccacctcc +accacctccacctccaccaccacctccacctccacctccaccacctccaccaccacca-- +-------------------------------------ccaccaccacctccaccaccacc +tccaccaccacctccacctccacct------ccaccacctccaccaccacctccaccacc +accaccacctccacctccaccacctccacctccacctccacca----------------- +----------------------------------------------------ccacctcc +accacctccaccaccaccaccaccacctccaccacctccacctccacct----------- +------------------------------------------------------------ +-------------------ccacctccacctccaccacctccacctccacctccaccacc +accaccacctccaccacctccacca----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------ccacctccaccacctccacctcc +acctccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacc +tccacct------------------------------------ccaccaccacctccacc +tccacctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccacc +acctccaccacctccaccaccacctcca-------------------------------- +-------------------------------------ccaccacctccaccaccacctcc +tccacctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcact +tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc +taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg +tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct +gtttggggtggagtccaagtctc +>p:HG002_1_chr20:63693225-63693985 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- +-------------------------------cctccacctccaccacct----------- +----------------ccacctccacctccaccaccacctccacctccacctccacctcc +acctccaccaccaccaccaccacctccaccacctccacctccacctccacctccacctcc +acctccacctccacctccaccacgtcca---------------------ccacctccacc +accacctccaccaccacctccacctccacctccacctccaccaccacctccaccaccacc +tccaccaccacct------------------ccaccaccacctccaccaccaccaccacc +accaccacctccaccacctccaccacctccacctccacctccaccaccaccacctccacc +acctccaccacctccaccaccaccaccaccaccacctccacctccaccacct-------- +----------------------------------------ccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacct-------------------------- +-------------------------------ccaccacctccaccacctccacctccacc +acctccaccacctccaccaccaccaccaccacca------------cctccaccaccacc +tccaccaccacctccaccaccacctccaccaccacctccaccacctccacctccacctcc +accacctccaccacctccaccaccaccaccaccaccaccacctccacca----------- +-------------------------------ccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacctccacc +tccaccacctccacctccacctccaccacctccacctccacctccaccaccacctccacc +acctccaccacctccaccacctccacctccaccacctccaccacctccaccaccaccacc +accaccacctccaccaccacctccaccaccacctccaccaccacctccaccaccacct-- +------------------------------------------------------------ +----------------------ccaccacctccacctccacctccaccacctccacctcc +acctccaccaccaccacctccacct---------ccaccaccacctccaccaccaccacc +tccacctccaccacctccaccaccagcaccacctccaccaccaccacctccacctccacc +tccacctccacctccacctccacctccaccaccacctccacctccaccacctccacctcc +accaccaccaccaccaccaccaccaccaccacctccacctccaccaccacctccaccacc +tccaccacctccaccaccaccacctccaccacctccacct-------------------- +-------ccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc +tccaccacct---ccaccacctccaccaccaccaccaccacctccaccacctccaccacc +accacctccacctccacctccaccaccaccaccaccacctccaccacctccaccaccacc +acctccacctccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc +tccaccaccaccacctccaccaccacctccacca---------------------ccacc +acctccacctccaccaccacctccaccacctccaccacctccaccacctccacctccacc +tccaccacctccaccaccacctccaccacctccaccaccacctccaccacctccacctcc +acctccacct---------------------ccacctccaccaccacctccaccacctcc +accaccacctccacctccaccacctccacctccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccaccacca----------------------------- +-ccacctccaccacctccaccaccacctccaccacctccacctccacctccacct----- +-------------ccacctccaccaccacctccaccacct---ccaccaccacctccacc +accacctccaccacctccacctccacctccacct-------------------------- +----------------ccacctccaccaccacctccaccacctccaccaccacctccacc +tccaccacctccacctccacctccaccacctccaccaccacctccaccacctccaccacc +acctccacct-------------------------------------------------- +----------ccaccacctccaccaccacctccaccacctccaccacctccaccaccacc +accaccacct-------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------ccaccacctccaccaccacctccaccacctccacctccacctcc +acct---------------------------------ccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacc +tc---------------------------------------------------------- +------------------------------------------------------------ +-----------------------------------caccacctccaccaccacctccacc +acctccaccacctccaccaccaccacc--------------------------------- +accacctccaccacctccaccaccacctccaccaccaccacctccacctccacctccacc +acctccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccacc +acctcca----------------------------------------------------- +------------------------------------------------------------ +-------------------------------ccacctccacctccaccaccaccacctcc +accacctccaccaccacctccaccacct-------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------ccacctccacctccacctccacctccaccaccacctcc +accacctccaccaccacctccacctccaccacctccacctccacctccaccacctccacc +tccacctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacc +a----------------------------------------------------------- +-------------------------------------------------------cctcc +accacctccacctccacctccacctccacctccaccacctccacctccacctccaccacc +tccacctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccacc +acct-------------------------------------------------------- +------------------------------------------------------------ +-------------------------ccaccaccacctccacctccaccaccacctccacc +accacctccaccaccaccacct---------------ccacctccaccaccaccacctcc +acctccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacc +tccacctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacc +tccaccaccaccaccaccaccacct---------------------ccaccaccacctcc +acctccacctccacctccaccaccacctccaccaccacctccacca-------------- +------------------------------------------------------------ +----------------------------------------------------cctccacc +accacctccacctccaccacctccacctccacctccaccacctccacctccacctccacc +acctccacct-------------------------------------------------- +----------------ccacctccacctccaccaccaccacct----------------- +----------------------------------ccacctccacctccacctccaccacg +tccaccacctccaccacctccacctccacctccacctccacctccaccacct-------- +----------------------------------ccacctccacctccaccacctccacc +tccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcc +a---------------------ccaccacctccacctccaccaccacctccaccaccacc +tccaccaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccacc +acca-------------------------------------------------------- +----------------------------------------------------------cc +tccaccaccacctccacctccaccaccacctccacctccaccacctccaccaccaccacc +acctccacctccacca------------------ccaccacctccacctccacctccacc +tccaccaccacctccaccaccacctccaccaccaccacctccacctccacca-------- +----ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc +acctccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc +tccacctccacctccacctccacctccaccacctccacctccacctccaccaccaccacc +acctccaccacct----------------------------------------------- +----ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccacc +acctccaccacca---------------------------------cctccaccacctcc +accacctccacctccaccaccacctccacctccacctccaccacctccaccaccacca-- +-------------------------------------ccaccaccacctccaccaccacc +tccaccaccacctccacctccacct------ccaccacctccaccaccacctccaccacc +accaccacctccacctccaccacctccacctccacctccacca----------------- +----------------------------------------------------ccacctcc +accacctccaccaccaccaccaccacctccaccacctccacctccacct----------- +------------------------------------------------------------ +-------------------ccacctccacctccaccacctccacctccacctccaccacc +accaccacctccaccacctccacca----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------ccacctccaccacctccacctcc +acctccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacc +tccacct------------------------------------ccaccaccacctccacc +tccacctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccacc +acctccaccacctccaccaccacctcca-------------------------------- +-------------------------------------ccaccacctccaccaccacctcc +tccacctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcact +tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc +taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg +tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct +gtttggggtggagtccaagtctc +>p:HG002_2_chr20:63693225-63693985 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc +accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc +accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc +accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc +acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc +accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc +acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc +acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc +tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc +acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc +acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccacca---ccacctccaccaccaccaccaccaccaccacctccacc +tccaccacctccaccaccaccaccaccacctccacctccaccacctccaccacctccacc +accaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccacctccacctccacctccaccaccacctcc +acctccaccaccacctccaccaccacctccacctccaccacctccacctccacctccacc +acctccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctcc +acctccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccacc +accacctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc +tccacctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctcc +accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc +tccaccacctccaccacctccacctccaccacctccacctccacctccaccacctccacc +accaccaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacc +tccaccacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacc +tccaccaccacctccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacc +tccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccacc +accaccacctccaccaccacctccaccacctccaccaccacctccacctccaccacctcc +accaccacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctcc +accaccacctccacctccaccacctccacctccacctccaccacctccaccaccaccacc +accaccaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacc +tccacctccaccacctccaccaccacctccacctccaccacctccacctccacctccacc +acctccacctccaccacctccacctccacctccaccacctccaccaccaccacctccacc +accacctccaccacctccaccaccacctccacctccaccacctccaccaccacctccacc +accacctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacc +tccaccacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccacc +accaccacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctcc +accaccaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacc +accaccacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacc +tccacctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccacc +accaccacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccacc +accaccaccacctccaccacctccaccacctccaccaccacctccaccacctccaccacc +accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc +acctccaccacctccacctccaccacctccacctccaccacctccacctccacctccacc +acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc +acctccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctcc +accaccacctccaccaccacctccacctccacctccaccacctccaccaccacctccacc +acctccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctct +accacctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccacc +accaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctacc +acctccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctcc +accaccaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccacc +accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctcc +acctccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc +accacctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccacc +acctccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctcc +accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc +accacctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctcc +accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc +accacctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccacc +accaccaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccacc +accacctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc +acctccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccacc +accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctct +accaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctcc +acctccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccacc +acctccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc +acctccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccacc +accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc +accaccaccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc +accacctccacctccaccacctccaccaccacctccaccaccaccacctccacctccacc +accaccacctccaccaccaccacctccacctccaccaccaccacctccaccacctccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc +accacctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc +tccaccacctccaccaccaccaccaccacctccacctccacctccaccacctccaccacc +accacctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacc +tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc +accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc +acctccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccacc +accaccaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacc +tccaccaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctcc +accaccaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctcc +acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc +tccaccacctccaccaccaccacctccaccaccaccacctccacctccaccacctccacc +acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc +tctaccaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctcc +accacctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacc +tccacctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc +accaccaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctcc +accaccacctccacctccaccaccaccacctccacctccacctccaccaccaccaccacc +acctccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccacc +accacctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc +tccaccaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccacc +accaccaccacctccacctccacctccaccaccaccacctctaccaccacctccaccacc +accaccaccaccaccaccaccacctccacctccacctccaccacctccacctccaccacc +accaccaccaccaccacctccaccaccacctccacctccacctccaccacctccacctcc +accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccacc +acctctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccacc +acctccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccacc +accaccaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccacc +accaccacctccaccacctccaccaccacctccacctccaccaccaccacctccaccacc +accacctccacctccaccaccaccacctccacctccacctccaccacctccacctccacc +acctccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacc +tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc +acctccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccacc +accacctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacc +tccaccaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacc +tccaccaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctcc +accaccaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgc +accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact +tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc +taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg +tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct +gtttggggtggagtccaagtctc +>ref_chr20:63693225-63693985 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacct-- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------ccacctccacct-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------------cc +acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc +accaccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc +t----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------ccaccacc +accacctccacctccaccaccacctccaccaccaccacctccacctccacca-------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------ccacctcc +accaccaccaccaccaccaccaccaccaccacca-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------ccacctccaccaccaccacctgc +accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact +tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc +taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg +tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct +gtttggggtggagtccaagtctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa new file mode 100644 index 00000000..7fd8c4ee --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa @@ -0,0 +1,50 @@ +>syndip_1_chr20:23560783-23561292 +taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat +tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca +gcattcctttctctctctttctttctttctttctttctttccttccttccttccttcctt +ccttccttccttccttccttccttccttcct----------------------------- +------------------------------------------------------------ +----------------------ttctttctttctttctttctttctttctttctttcttt +ttttctttct-------ttctttctttctttttcttttacagagtcttactcttgttgcc +caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca +ttcaagaaattctcctgcctcagcctccagagtagctgggat +>syndip_2_chr20:23560783-23561292 +taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat +tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca +gcattcctttctctctctttctttctttctttctttctttctttctttctttctttcttt +ctttctttccttccttccttccttccttccttccttccttccttccttccttctttcctt +ctttccttctttccttctttccttctttccttctttccttctttccttctttccttcttt +ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt +ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc +caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca +ttcaagaaattctcctgcctcagcctccagagtagctgggat +>p:HG002_1_chr20:23560783-23561292 +taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat +tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca +gcattcctttctctctctttctttctttctttctttctttctttctttctttctttcttt +ctttctttccttccttccttccttccttccttccttccttccttccttccttctttcctt +ctttccttctttccttctttccttctttccttctttccttctttccttctttccttcttt +ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt +ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc +caggctgtagtgcagtggtgtgatctcggctgactgcaacctccacctctgcctcctgca +ttcaagaaattctcctgcctcagcctccagagtagctgggat +>p:HG002_2_chr20:23560783-23561292 +taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat +tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca +gcattcctttctctctctttctttctttctttctttctttccttccttccttccttcctt +ccttccttccttccttccttccttccttcct----------------------------- +------------------------------------------------------------ +----------------------ttctttctttctttctttctttctttctttctttcttt +ttttctttct-------ttctttctttctttttcttttacagagtcttactcttgttgcc +caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca +ttcaagaaattctcctgcctcagcctccagagtagctgggat +>ref_chr20:23560783-23561292 +taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat +tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca +gcattcctttctctttctttctttctttctttctttctttctttctttctttctttcctt +ctttccttccttccttccttccttccttccttccttccttccttccttccttccttcctt +ctttccttctttccttctttccttctttccttctttccttctttccttctttcc------ +------ttctttccttctttccttctttctttctttctttctttctttctttctttcttt +ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc +caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca +ttcaagaaattctcctgcctcagcctccagagtagctgggat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa new file mode 100644 index 00000000..070e9101 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa @@ -0,0 +1,105 @@ +>syndip_1_chr20:62320931-62321974 +aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc +agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca +gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga +aggtagggccagcacaaggggttggggccggaggtggggccagtgaaggggcggggccgg +tggaggcagggtcagtgaaggggtggggccagtggaggaagagggggccagtggaggagg +cggggcctgtggaggggtggggccagtggaggcagggtcagtgaaggggtggggccggtg +gaggaagagggggccagtgaaggtgtggggccagtggaggaggtgcagccagtggaagag +gtggggccagtgggggtggggtcagtggaggggtggggtcagaggacaggtgggttcaga +ggacgggtgggttcagaggacgggtgggttcagaggacgggtggggtcagtggaggggtg +gggtcagtggaggggtagggtcagaggacgggtagggtcagtggaggggtagggccagca +gaggggtggggtcagcggaggggtggggtcagtggaggggtggggtcagtggaggggtgg +ggtcagtggaggcagggtcagtgaaagggtggcgccagtggaggaagagggcgccagtgg +aggaagagggcgccagtggaggaagagggggccagtggaggcagggcctgtggaggggtg +gggccagtggaggtggggtcagtgaaggggtggggccagcggaggaggtgcagccagtgg +aagaggtggggccaatgggggtggggtcagtggaggggtggggtcagtgggggaggcagg +gccagcagaggggtggggccagt-----------------ggaggggtggggccagcgga +ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga +aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt +tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag +gtcgacgttaacactggggtcaccaggctgtgtgggacttggat +>syndip_2_chr20:62320931-62321974 +aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc +agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca +gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga +aggtagggccagcacaaggggttggggccggaggtggggccagtgaaggggcggggccgg +tggaggcagggtcagtgaaggggtggggccagtggaggaagagggggccagtggaggagg +cggggcctgtggaggggtggggccagtggaggcagggtcagtgaaggggtggggccggtg +gaggaagagggggccagtgaaggtgtggggccagtggaggaggtgcagccagtggcagag +gtggggccagtgggggtggggtcagtggaggggtggggtcagaggac------------- +------------------------------------------------------------ +----------------------------------------------gggtagggtcagtg +gaggggtagggccagcagaggggtggggtcagtggaggggtggggtcagtggaggggtgg +ggtcagtggaggcagggtcagtgaaagggt-------------------ggcgccagtgg +aggaagagggcgccagtggaggaagagggggccagtggaggcagggcctgtggaggggtg +gggccagtggaggtggggtcagtgaaggggtggggccagcggaggaggtgcagccagtgg +aagaggtggggccaatgggggtggggtcagtggaggggtggggtcagtgggggaggcagg +gccagcagaggggtggggccagtggaggggtggggccagcggaggggtggggccagcgga +ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga +aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt +tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag +gtcgacgttaacactggggtcaccaggctgtgtgggacttggat +>p:HG002_1_chr20:62320931-62321974 +aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc +agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca +gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga +aggtagggccagcacaaggggttggggccggaggtggggccagtgaaggggcggggccgg +tggaggcagggtcagtgaaggggtggggccagtggaggaagagggggccagtggaggagg +cggggcctgtggaggggtggggccagtggaggcagggtcagtgaaggggtggggccggtg +gaggaagagggggccagtgaaggtgtggggccagtggaggaggtgcagccagtggcagag +gtggggccagtgggggtggggtcagtggaggggtggggtcagaggac------------- +------------------------------------------------------------ +----------------------------------------------gggtagggtcagtg +gaggggtagggccagcagaggggtggggtcagtggaggggtggggtcagtggaggggtgg +ggtcagtggaggcagggtcagtgaaagggt-------------------ggcgccagtgg +aggaagagggcgccagtggaggaagagggggccagtggaggcagggcctgtggaggggtg +gggccagtggaggtggggtcagtgaaggggtggggccagcggaggaggtgcagccagtgg +aagaggtggggccaatgggggtggggtcagtggaggggtggggtcagtgggggaggcagg +gccagcagaggggtggggccagtggaggggtggggccagcggaggggtggggccagcgga +ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga +aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt +tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag +gtcgacgttaacactggggtcaccaggctgtgtgggacttggat +>p:HG002_2_chr20:62320931-62321974 +aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc +agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca +gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga +aggtagggccagcacaaggggttggggccggaggtggggccagtgaaggggcggggccgg +tggaggcagggtcagtgaaggggtggggccagtggaggaagagggggccagtggaggagg +cggggcctgtggaggggtggggccagtggaggcagggtcagtgaaggggtggggccggtg +gaggaagagggggccagtgaaggtgtggggccagtggaggaggtgcagccagtggaagag +gtggggccagtgggggtggggtcagtggaggggtggggtcagaggacaggtgggttcaga +ggacgggtgggttcagaggacgggtgggttcagaggacgggtggggtcagtggaggggtg +gggtcagtggaggggtagggtcagaggacgggtagggtcagtggaggggtagggccagca +gaggggtggggtcagcggaggggtggggtcagtggaggggtggggtcagtggaggggtgg +ggtcagtggaggcagggtcagtgaaagggtggcgccagtggaggaagagggcgccagtgg +aggaagagggcgccagtggaggaagagggggccagtggaggcagggcctgtggaggggtg +gggccagtggaggtggggtcagtgaaggggtggggccagcggaggaggtgcagccagtgg +aagaggtggggccaatgggggtggggtcagtggaggggtggggtcagtgggggaggcagg +gccagcagaggggtggggccagt-----------------ggaggggtggggccagcgga +ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga +aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt +tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag +gtcgacgttaacactggggtcaccaggctgtgtgggacttggat +>ref_chr20:62320931-62321974 +aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc +agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca +gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga +aggtagggccagcacaaggggttggggccggaggtggggccagtgaaggggcggggccgg +tggaggcagggtcagtgaaggggtggggccagtggaggaagagggggccagtggaggagg +cggggcctgtggaggggtggggccagtggaggcagggtcagtgaaggggtggggccggtg +gaggaagagggggccagtgaaggtgtggggccagtggaggaggtgcagccagtggaagag +gtggggccagtgggggtggggtcagtggaggggtggggtcagaggacgggtgggt----- +------------------------------------------------------------ +--------------------tcagaggacgggtagggtcagtggaggggtagggccagca +gaggggtagggccagcagaggggtggggtcagtggaggggtggggtcagtggaggggtgg +ggtcagtggaggcagggtcagtgaaagggt------------------------------ +--------ggcgccagtggaggaagagggggccagtggaggcagggcctgtggaggggtg +gggccagtggaggtggggtcagtgaaggggtggggccagcggaggaggtgcagccagtgg +aagaggtggggccaatgggggtggggtcagtggaggggtggggtcagtgggggaggcagg +gccagcagaggggtggggtcagt-----------------ggaggggtggggccagtgga +ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga +aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt +tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag +gtcgacgttaacactggggtcaccaggctgtgtgggacttggat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b778319de388d363fb74713d18656b39.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b778319de388d363fb74713d18656b39.msa new file mode 100644 index 00000000..bbe6d2b6 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b778319de388d363fb74713d18656b39.msa @@ -0,0 +1,365 @@ +>syndip_1_chr20:64173309-64176630 +cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc +ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac +cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc +cctttagttgtgtccatttcccctccctgacttctcctcctgcagcatctttcctagttg +tgtccatttcccctccctgacttctcctcctgcagcatctttccttgtagttgtgtccat +ttcccctccctggcttctcctcctgcagcatctttcctagttgtgtccatttcccctccc +tgacttctcctcctgcagcatccttccctgtagttgtgtccatttcccctccctgacttc +tcctcctgcagcatctttccctgtagttgtgtccatttcccctccctgacttctcctcct +gcagcatctttccttgtagttgtgtccatttcccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccattccccctccctggcttctcctgtagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttccc +ctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +cagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatcttt +ccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctg------------------------------ +--------------------cagcatctttccctgtagttgtgtccatttcccctccctg +gctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctc +ctgtagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgtagcatc +tttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgtagcaactttcc---tagttgtgtccattt +cccctccctggcttctcctgtagcatctttcc---tagttgtgtccatttcccctccctg +gcttctcctgtagcatctttccctgtagttgtgtccatttcccctccctggctt---ctc +ctgcagcatctttccctgtagttgtgtccatttcccctccctggcgtctcctcctgcagc +atctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatctttcc-- +-tagttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtcca +ttccccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcccctcc +ctggcttctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +tagcatctttcctagttgtgtccatttcccctccctggcttctcctgtagcatctttcct +agttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtccatt +tcccctccctggcttctcctgcagcatctttccc-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctgg--------------------------------------------------- +------------------------------------------------------------ +---------------------------------------------cttctcctgtagcat +ctttcc---tagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcccctc +cctggcgtctcctcctgcagcatctttcctagttgtgtccatttcccctccctggcttct +cctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctcctgcag +catctttccctgtagttgtgtccattccccctccctggcttctcctcctgcagcatcttt +ccctgtagttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgta +gttgtgtccattccccctccctggcttctcctcctgtagcatctttccctgtagttgtgt +ccattccccctccctgacttctcctcctgcagcatctttccctgtagttgtgtccatttc +ccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcccctccc +tggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcc +tgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatct +ttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtag +ttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgtgtc +cattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggctt------------------------------------------------ +-----ctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgta +gcatctttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttccctg +tagttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtc +catttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccct +ccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttc +tcctgtagcatctttccctgtagttgtgttcatttcccctccctggcttctcctgcagca +tctttccctgtagttgtgtccatttcccctccctggctt--------------------- +--------------------------------ctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggctt------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------ctcctgcag +catctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccc +tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtg +tccatttcccctccctggcttctcctcttctagcatctttcctgctccctggtagcctct +tgggagccacctatttctggaggtccccactctgggttccttgtcctgggtgtggggcga +atgtgctggactggggtcacagcattgaaccccacttggagctgagga +>syndip_2_chr20:64173309-64176630 +cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc +ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac +cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc +cctttagttgtgtccatttcccctccctgacttctcctcctgcagcatctttcctagttg +tgtccatttcccctccctgacttctcctcctgcagcatctttccttgtagttgtgtccat +ttcccctccctggcttctcctcctgcagcatctttcctagttgtgtccatttcccctccc +tgacttctcctcctgcagcatccttccctgtagttgtgtccatttcccctccctgacttc +tcctcctgcagcatctttccctgtagttgtgtccatttcccctccctgacttctcctcct +gcagcatctttccttgtagttgtgtccatttcccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccattccccctccctggcttctcctgtagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttccc +ctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +cagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatcttt +ccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgtagcatctttcc---tagttgtgtccattt +cccctccctggcttctcctgtagcatctttcc---tagttgtgtccatttcccctccctg +gctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctc +ctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatc +tttcctagttgtgtccatttcccctccctggcttctcctgtagcatctttcc---tagtt +gtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccattt +cccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggctt---ctc +ctacagcatctttccctgtagttgtgtccatttcccctccctgg---cttctcctgtagc +atctttcc---tagttgtgtccatttcccctccctggcttctcctgtagcatctttccct +gtagttgtgtccatttcccctccctgg--------------------------------- +------------------------------------------------------------ +----cttctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +tagcatctttcc-----------------------------------------------t +agttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtccatt +tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct +ggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcct +gtagcatctttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttcc +ctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcc +cctccctggcgtctcctcctgcagcatctttcctagttgtgtccatttcccctccctggc +ttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctcct +gcagcatctttccctgtagttgtgtccattccccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctgcagcatctttccctg--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------tagttgtgtccatttcccctccctggcttctcctgcagca +tctttccctgtagttgtgt----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------ccatttcccctccctggcttctcctgcagcatctttccctgtagttgtg +tccatttcccctccctggcttctcctcttctagcatctttcctgctccctggtagcctct +tgggagccacctatttctggaggtccccactctgggttccttgtcctgggtgtggggcga +atgtgctggactggggtcacagcattgaaccccacttggagctgagga +>p:HG002_1_chr20:64173309-64176630 +cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc +ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac +cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc +cctttagttgtgtccatttcccctccctgacttctcctcctgcagcatctttcctagttg +tgtccatttcccctccctgacttctcctcctgcagcatctttccttgtagttgtgtccat +ttcccctccctggcttctcctcctgcagcatctttcctagttgtgtccatttcccctccc +tgacttctcctcctgcagcatccttccctgtagttgtgtccatttcccctccctgacttc +tcctcctgcagcatctttccctgtagttgtgtccatttcccctccctgacttctcctcct +gcagcatctttccttgtagttgtgtccatttcccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccattccccctccctggcttctcctgtagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttccc +ctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +cagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatcttt +ccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgtagcatctttcc---tagttgtgtccattt +cccctccctggcttctcctgtagcatctttcc---tagttgtgtccatttcccctccctg +gctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctc +ctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatc +tttcctagttgtgtccatttcccctccctggcttctcctgtagcatctttcc---tagtt +gtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccattt +cccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggctt---ctc +ctacagcatctttccctgtagttgtgtccatttcccctccctgg---cttctcctgtagc +atctttcc---tagttgtgtccatttcccctccctggcttctcctgtagcatctttccct +gtagttgtgtccatttcccctccctgg--------------------------------- +------------------------------------------------------------ +----cttctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +tagcatctttcc-----------------------------------------------t +agttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtccatt +tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct +ggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcct +gtagcatctttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttcc +ctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcc +cctccctggcgtctcctcctgcagcatctttcctagttgtgtccatttcccctccctggc +ttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctcct +gcagcatctttccctgtagttgtgtccattccccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctgcagcatctttccctg--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------tagttgtgtccatttcccctccctggcttctcctgcagca +tctttccctgtagttgtgt----------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----------ccatttcccctccctggcttctcctgcagcatctttccctgtagttgtg +tccatttcccctccctggcttctcctcttctagcatctttcctgctccctggtagcctct +tgggagccacctatttctggaggtccccactctgggttccttgtcctgggtgtggggcga +atgtgctggactggggtcacagcattgaaccccacttggagctgagga +>p:HG002_2_chr20:64173309-64176630 +cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc +ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac +cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc +cctttagttgtgtccatttcccctccctgacttctcctcctgcagcatctttcctagttg +tgtccatttcccctccctgacttctcctcctgcagcatctttccttgtagttgtgtccat +ttcccctccctggcttctcctcctgcagcatctttcctagttgtgtccatttcccctccc +tgacttctcctcctgcagcatccttccctgtagttgtgtccatttcccctccctgacttc +tcctcctgcagcatctttccctgtagttgtgtccatttcccctccctgacttctcctcct +gcagcatctttccttgtagttgtgtccatttcccctccctggcttctcctcctgcagcat +ctttccctgtagttgtgtccattccccctccctggcttctcctgtagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttccc +ctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +cagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgt--------- +-----------------------------------------agcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccattt +cccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctc +ctgtagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgtagcatc +tttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgtagcaactttcc---tagttgtgtccattt +cccctccctggcttctcctgtagcatctttcc---tagttgtgtccatttcccctccctg +gcttctcctgtagcatctttccctgtagttgtgtccatttcccctccctggctt---ctc +ctgcagcatctttccctgtagttgtgtccatttcccctccctgg---cttctcctgcagc +atctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatctttcc-- +-tagttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtcca +ttccccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcccctcc +ctggcttctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctg +tagcatctttcctagttgtgtccatttcccctccctggcttctcctgtagcatctttcct +agttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtccatt +tcccctccctggcttctcctgcagcatctttccc-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctgg--------------------------------------------------- +------------------------------------------------------------ +---------------------------------------------cttctcctgtagcat +ctttcc---tagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcccctc +cctggcgtctcctcctgcagcatctttcctagttgtgtccatttcccctccctggcttct +cctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctcctgcag +catctttccctgtagttgtgtccattccccctccctggcttctcctcctgcagcatcttt +ccctgtagttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgta +gttgtgtccattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgt +ccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttc +ccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcccctccc +tggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcc +tgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatct +ttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtag +ttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgtgtc +cattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggctt------------------------------------------------ +-----ctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgta +gcatctttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttccctg +tagttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtc +catttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccct +ccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttc +tcctgtagcatctttccctgtagttgtgttcatttcccctccctggcttctcctgcagca +tctttccctgtagttgtgtccatttcccctccctggctt--------------------- +--------------------------------ctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggctt------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------ctcctgcag +catctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccc +tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtg +tccatttcccctccctggcttctcctcttctagcatctttcctgctccctggtagcctct +tgggagccacctatttctggaggtccccactctgggttccttgtcctgggtgtggggcga +atgtgctggactggggtcacagcattgaaccccacttggagctgagga +>ref_chr20:64173309-64176630 +cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc +ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac +cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc +cctt--------------------------------------------------tagttg +tgtccatttcccctccctgacttctcctcctgcagcatccttccctttagttgtgtccat +ttcccctccctgacttctcctcctgcagcatctttcctagttgtgtccatttcccctccc +tgacttctcctcctgcagcatctttccttgtagttgtgtccatttcccctccctggcttc +tcctcctgcag------------------------------------------------- +----catctttccttgtagttgtgtccatttcccctccctgacttctcctcctgc----- +---------------------------------------------agcatccttccctgt +agttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtcc +atttcccctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttccc +ctccctggctt---ctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgtagcatctttccctgtagttgtgtccattccccctccctggcttctcctg +tagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgc--------- +-----------------------------------------agcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctg------------------------------ +--------------------tagcatctttcc---tagttgtgtccatttcccctccctg +gcttctcctcctgcagcatctttccctgtagttgtgtccattccccctccctggcttctc +ctgtagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgtagcatc +tttcctagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagtt +gtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccattt +cccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctg +gcttctcctgtagcatctttccctgtagttgtgtccattccccctccctgacttctcctc +ctgcagcatctttccctgtagttgtgtccatttcccctccctggcgtctcctcctgcagc +atctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatctttcc-- +-tagttgtgtccatttcccctccctggcttctc--------------------------- +------------------------------------------------------------ +----ctcctgcagcatctttccctgtagttgtgtccattccccctccctggcttctcctg +tagcatctttcc-----------------------------------------------t +agttgtgtccatttcccctccctggcttctcctgtagcatctttcctagttgtgtccatt +tcccctccctggcttctcctgcagcatctttccc-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtccatttcc +cctccctgg--------------------------------------------------- +------------------------------------------------------------ +---------------------------------------------cttctcctgcagcat +ctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgt +agttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtagttgtgtcc +atttcccctccctggcttctcctgcagcatctttccctg--------------------- +-----------------------------tagttgtgtccatttcccctccctggcttct +cctgtagcatctttccctgtagttgtgtccattccccctccctggcgtctcctcctgcag +catctttccctgtagttgtgtccattccccctccctggctt------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------ctcctgcagcatctttccctgtagttgtgtccatttcccctccc +tggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcc +tgtagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgcagcatct +ttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatctttccctgtag +ttgtgtccattccccctccctgacttctcctcctgcagcatctttccctgtagttgtgtc +catttcccctccctggcgtctcctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggcttctcctgtagcatctttcctagttgtgtccatttcccctccctggcttc +tcctcctgcagcatctttccctgtagttgtgtccattccccctccctggcttctcctgta +gcatctttcctagttgtgtccatttcccctccctggcttctcctgtagcatctttcc--- +tagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtc +catttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccct +ccctggcttctcctgtagcatctttccctgtagttgtgtccatttcccctccctggcttc +tcctgcagcatctttcc---tagttgtgtccatttcccctccctggcttctcctgcagca +tctttccctgtagttgtgtccatttcccctccctggcttctcctgtagcatctttccctg +tagttgtgtccattccccctccctggcgtctcctcctgcagcatctttccctgtagttgt +gtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcc +cctccctggcttctcctgtagcatctttccctgtagttgtgtccattccccctccctggc +gtctcctcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcct +cctgcagcatctttccctgtagttgtgtccattccccctccctggcttctcctcctgcag +catctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccc +tgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtg +tccatttcccctccctggcttctcctcttctagcatctttcctgctccctggtagcctct +tgggagccacctatttctggaggtccccactctgggttccttgtcctgggtgtggggcga +atgtgctggactggggtcacagcattgaaccccacttggagctgagga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b8a80f7b3dfaf0a255d5fa6ea4cdadc6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b8a80f7b3dfaf0a255d5fa6ea4cdadc6.msa new file mode 100644 index 00000000..da248481 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b8a80f7b3dfaf0a255d5fa6ea4cdadc6.msa @@ -0,0 +1,90 @@ +>syndip_1_chr20:32722324-32723313 +acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct +cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg +gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc +acctgaggtcaggagttcaagaccagcctggccaacatggtgaaacttggtctctactaa +aaatacaaaaattagccaggcatggtggtgggcgcctgcaatcccagctacccagaaggc +tgagtcagaagaatcacttgaacccgggaggtggaggttgcagtgagccgagatcgtgcc +actgcactccagcctgggtgacagagtgagactcaggaaaaaaaaaaaaaagacttggcc +gagcccagtggctcacacctgtaatcccagcactttgggaggctgaggcgggtggatcac +gaggtcaagagatcgagaccatcctggctaacacggtgaaaccccatctctactaaaaaa +tacaaaaaattagccgggcgtggtggtgggcacctgtagtcccagctactcgggaggctg +aggcagaatggcgtgaacctggtaggcagagcttgcagtgagctgagatcacgccactgc +actccagcctgggtgacagagcaagactccatctcaaaaataaataaataaataaataaa +taaataa-taataataataataaaaaa--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------- +>syndip_2_chr20:32722324-32723313 +acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct +cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg +gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc +acctgaggtcaggagttcaagaccagcctggccaacatggtgaaacttggtctctactaa +aaatacaaaaattagccaggcatggtggtgggcgcctgcaatcccagctacccagaaggc +tgagtcagaagaatcacttgaacccgggaggtggaggttgcagtgagccgagatcgtgcc +actgcactccagcctgggtgacagagtgagactcaggaaaaaaaaaaaaaagacttggcc +gagcccagtggctcacacctgtaatcccagcactttgggaggctgaggcgggtggatcac +gaggtcaagagatcgagaccatcctggctaacacggtgaaaccccatctctactaaaaaa +tacaaaaaattagccgggcgtggtggtgggcacctgtagtcccagctactcgggaggctg +aggcagaatggcgtgaacctggtaggcagagcttgcagtgagctgagatcacaccactgc +actccagcctgggtgacagagcaagactccatctcaaaaataaataaataaataaataaa +taaataaataataataataataataat--------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------- +>p:HG002_1_chr20:32722324-32723313 +acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct +cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg +gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc +acctgaggtcaggagttcaagaccagcctggccaacatggtgaaacttggtctctactaa +aaatacaaaaattagccaggcatggtggtgggcgcctgcaatcccagctacccagaaggc +tgagtcagaagaatcacttgaacccgggaggtggaggttgcagtgagccgagatcgtgcc +actgcactccagcctgggtgacagagtgagactcaggaaaaaaaaaaaaaagacttggcc +gagcccagtggctcacacctgtaatcccagcactttgggaggctgaggcgggtggatcac +gaggtcaagagatcgagaccatcctggctaacacggtgaaaccccatctctactaaaaaa +tacaaaaaattagccgggcgtggtggtgggcacctgtagtcccagctactcgggaggctg +aggcagaatggcgtgaacctggtaggcagagcttgcagtgagctgagatcacaccactgc +actccagcctgggtgacagagcaagactccatctcaaaaataaataaataaataaataaa +taaataa----------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------- +>p:HG002_2_chr20:32722324-32723313 +acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct +cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg +gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc +acctgaggtcaggagttcaagaccagcctggccaacatggtgaaacttggtctctactaa +aaatacaaaaattagccaggcatggtggtgggcgcctgcaatcccagctacccagaaggc +tgagtcagaagaatcacttgaacccgggaggtggaggttgcagtgagccgagatcgtgcc +actgcactccagcctgggtgacagagtgagactcaggaaaaaaaaaaaaaagacttggcc +gagcccagtggctcacacctgtaatcccagcactttgggaggctgaggcgggtggatcac +gaggtcaagagatcgagaccatcctggctaacacggtgaaaccccatctctactaaaaaa +tacaaaaaattagccgggcgtggtggtgggcacctgtagtcccagctactcgggaggctg +aggcagaatggcgtgaacctggtaggcagagcttgcagtgagctgagatcacgccactgc +actccagcctgggtgacagagcaagactccatctcaaaaataaataaataaataaataaa +taaataa-taataataataat--------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------- +>ref_chr20:32722324-32723313 +acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct +cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg +gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc +acctgaggtcaggagttcaagaccagcctggccaacatggtgaaacttggtctctactaa +aaatacaaaaattagccaggcatggtggtgggcgcctgcaatcccagctacccagaaggc +tgagtcagaagaatcacttgaacccgggaggtggaggttgcagtgagccgagatcgtgcc +actgcactccagcctgggtgacagagtgagactcaggaaaaaaaaaaaaaagacttggcc +gagcccagtggctcacacctgtaatcccagcactttgggaggctgaggcgggtggatcac +gaggtcaagagatcgagaccatcctggctaacacggtgaaaccccatctctactaaaaaa +tacaaaaaattagccgggcgtggtggtgggcacctgtagtcccagctactcgggaggctg +aggcagaatggcgtgaacctggtaggcagagcttgcagtgagctgagatcacgccactgc +actccagcctgggtgacagagcaagactccatctcaaaaataaataaataaataaataaa +taaataa-taataataataataaagacttgtgatcctctccctctccctctccctctccg +tctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtctccctc +tccctctccccacggtctccctctcatgcggagccgaagctggactgtactgctgccatc +tcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcctgcgat +tgcaggcacgcgccgccacgcctgactggtt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa new file mode 100644 index 00000000..69a6d6b4 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa @@ -0,0 +1,125 @@ +>syndip_1_chr20:641736-642566 +atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct +gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc +cccccaatctggttgcctgggaggggggcccagcgggggtggagttgcctgggggggggg +gcccagcgggggtggagttgcctgggggggg-ggcccagcgggggtggagttgcctgggg +gggggaggcccagcgggggtggagttgcctgggggggg--gcccagcgggggtggagttg +cctggg-----------------------------ggggggaggcccagcgggggtggag +ttgcctgggggggggcccagcgggggtggagttgcctgggggggg-ggcccagcgggggt +ggagttgcctgggggggggaggcccagcgggggtggagttgcctgggggggg--gcccag +cgggggtggagttgcctgggg---------------------------gggggaggccca +gcgggggtggagttgcctgggggggg-------------------------------ggc +ccagcgggggtggagttgcctg-gggggg---ggcccagcgggggtggagttgcctgggg +gggg---cccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcctg +-gggggggccccagcgggggtggagttgcctgggggggg--gcccagcgggggtggagtt +gcctgggggggg--gcccagcgggggtggagttgcctggggggggcccagcgggggtgga +gttgcctggggggggccccagcgggggtggagttgcctgggggggggcccagcgggggtg +gagttgcctggggggggggcccagcgggggtggagttgcctggggggggggcccagcggg +ggtggagttgcctgggggggggggcccagcgggggtggagttgcctgggggggggcccag +cgggggtggagttgcctg-gggggggggcccagcgggggtggagttgcct-ggggggggc +ccagcgggggtggagttgcct-gggggggggcccagcgggggtggagttgcctg-ggggg +ggggcccagcgggggtggagttgcctg-gggggggcccagcgggggtggagttgcctgtg +ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct +gggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag +gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg +tcgggcggggggcggcgctcgcagttccggaggggg +>syndip_2_chr20:641736-642566 +atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct +gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc +cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg +gcccagcgggggtggagttgcctggggggggaggcccagcgggggtggagttgcctgggg +gggggaggcccagcgggggtggagttgcctgggggggggtgcccagcgggggtggagttg +cctggggggggcccagcgggggtggagttgcctgtggggggaggcccagcgggggtggag +ttgcct-ggggggggcccagcgggggtggggttgcctggggggggaggcccagcgggggt +ggagttgcctgggggggggaggcccagcgggggtggagttgcctgggggggggtgcccag +cgggggtggagttgcctgggggggggcccagcgggggtggagttgcctggggggggccca +gcgggggtggggttgcctggggggggcccagcgggggtggggttgcctggggggggaggc +ccagcgggggtggagttgcctg-ggggggggaggcccagcgggggtggagttgcctgggg +gggggtgcccagcgggggtggagttgcctggggggggcccagcgggggtggggttgcctg +-ggggggg-cccagcgggggtggggttgcctggggggggaggcccagcaggggtggagtt +gcctgggggggggtgcccagcgggggtggagttgcctggggggggcccagcgggggtgga +gttgcctgggggggg-cccagcgggggtggagttgcctgggggggggcccagcggggatg +gggttgcct-gggggggggcccagcgggggtggggttgcct-gggggggggcccagcggg +ggtggagttgcct--gggggggggcccagcgggggtggagttgcct-ggggggggcccag +cgggggtggagttgcctgtgggggggggcccagcgggggtggagttgcctgggggggggc +ccagcgggggtggagttgcctggggggggggcccagcgggggtggagttgcctgtggtgg +ggggcccagcgggggtggagttgcctgtgggggggcccagcgggggtggagttgcctgtg +ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct +-ggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag +gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg +tcgggcggggggcggcgctcgcagttccggaggggg +>p:HG002_1_chr20:641736-642566 +atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct +gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc +cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg +gcccagcgggggtggagttgcctggggggggaggcccagcgggggtggagttgcctgggg +gggggaggcccagcgggggtggagttgcctgggggggggtgcccagcgggggtggagttg +cctggggggggcccagcgggggtggagttgcctgtggggggaggcccagcgggggtggag +ttgcct-ggggggggcccagcgggggtggggttgcctggggggggaggcccagcgggggt +ggagttgcctgggggggggaggcccagcgggggtggagttgcctgggggggggtgcccag +cgggggtggagttgcctgggggggggcccagcgggggtggagttgcctggggggggccca +gcgggggtggggttgcctggggggggcccagcgggggtggggttgcctggggggggaggc +ccagcgggggtggagttgcctg-ggggggggaggcccagcgggggtggagttgcctgggg +gggggtgcccagcgggggtggagttgcctggggggggcccagcgggggtggggttgcctg +-ggggggg-cccagcgggggtggggttgcctggggggggaggcccagcaggggtggagtt +gcctgggggggggtgcccagcgggggtggagttgcctggggggggcccagcgggggtgga +gttgcctgggggggg-cccagcgggggtggagttgcctgggggggggcccagcggggatg +gggttgcct-gggggggggcccagcgggggtggggttgcct-gggggggggcccagcggg +ggtggagttgcct--gggggggggcccagcgggggtggagttgcct-ggggggggcccag +cgggggtggagttgcctgtgggggggggcccagcgggggtggagttgcctgggggggggc +ccagcgggggtggagttgcctggggggggggcccagcgggggtggagttgcctgtggtgg +ggggcccagcgggggtggagttgcctgtgggggggcccagcgggggtggagttgcctgtg +ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct +-ggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag +gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg +tcgggcggggggcggcgctcgcagttccggaggggg +>p:HG002_2_chr20:641736-642566 +atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct +gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc +cccccaatctggttgcctgggaggggggcccagcgggggtggagttgcctgggggggggg +gcccagcgggggtggagttgcctgggggggg-ggcccagcgggggtggagttgcctgggg +gggggaggcccagcgggggtggagttgcctgggggggg--gcccagcgggggtggagttg +cctggg-----------------------------ggggggaggcccagcgggggtggag +ttgcctgggggggggcccagcgggggtggagttgcctgggggggg-ggcccagcgggggt +ggagttgcctgggggggggaggcccagcgggggtggagttgcctgggggggg--gcccag +cgggggtggagttgcctgggg---------------------------gggggaggccca +gcgggggtggagttgcctgggggggg-------------------------------ggc +ccagcgggggtggagttgcctg-gggggg---ggcccagcgggggtggagttgcctgggg +gggg---cccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcctg +-gggggggccccagcgggggtggagttgcctgggggggg--gcccagcgggggtggagtt +gcctgggggggg--gcccagcgggggtggagttgcctggggggggcccagcgggggtgga +gttgcctggggggggccccagcgggggtggagttgcctgggggggggcccagcgggggtg +gagttgcctggggggggggcccagcgggggtggagttgcctggggggggggcccagcggg +ggtggagttgcctgggggggggggcccagcgggggtggagttgcctgggggggggcccag +cgggggtggagttgcctg-gggggggggcccagcgggggtggagttgcct-ggggggggc +ccagcgggggtggagttgcct-gggggggggcccagcgggggtggagttgcctg-ggggg +ggggcccagcgggggtggagttgcctg-gggggggcccagcgggggtggagttgcctgtg +ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct +gggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag +gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg +tcgggcggggggcggcgctcgcagttccggaggggg +>ref_chr20:641736-642566 +atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct +gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc +cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg +gcccagcgggggtggagttgcctggggggggaggcccagcgggggtggagttgcctgggg +gggggaggcccagcgggggtggagttgcctgggggggggtgcccagcgggggtggagttg +cctggggggggcccagcgggggtggagttgcctgtggggggaggcccagcgggggtggag +ttgcct-ggggggggcccagcgggggtggggttgcctggggggggaggcccagcgggggt +ggagttgcctgggggggggaggcccagcgggggtggagttgcctgggggggggtgcccag +cgggggtggagttgcct------------------------------------------- +--------------------gggggg-------------------------------ggc +ccagcgggggtggagttgcctgtggtggg---gggccagcgggggtgaagttgcctgggg +gggg----ccagcgggggtggagttgcctgggggggggccagcgggggtggagttgcctg +tgggggggggccagcgggggtggagttgcctgggggggg----ccagcgggggtggagtt +gcctgggggggg----ccagcgggggtggagcaccctggacggcccc--------gcgga +gccccctgcgggaggcgagggcggccctcgggctctggaaggggccgcgcggcctggaag +acattacacggctgtcgggcggggggcggcgctcgcagttccggaggggg---------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa new file mode 100644 index 00000000..4d33da27 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:63371965-63372792 +ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag +catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca +caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga +cctggccatgcagcaggtctgcagggacctcagagcccagggccactgtcctgtccccag +ccccattaaacgcccagggccactgtcctgtccccagccccattaaac------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------gcccagggccactgtcctgtccccagccccattaaacgcccagg +gccactgtcctgtccccagccccattaaacgcccagggccactgtcctgtccccagcccc +attaaacgcccagggccactggcctgtccccagccccattaaacgcccagggccactgtc +ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc +ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc +gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct +>syndip_2_chr20:63371965-63372792 +ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag +catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca +caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga +cctggccatgcagcaggtctgcagggacctcagagcccagggccactgtcctgtccccag +ccccattaaacgcccagggccactgtcctgtccccagccccattaaac------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------gcccagggccactgtcctgtccccagccccattaaacgcccagg +gccactgtcctgtccccagccccattaaacgcccagggccactgt--------------- +----------------------cctgtccccagccccattaaacgcccagggccactgtc +ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc +ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc +gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct +>p:HG002_1_chr20:63371965-63372792 +ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag +catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca +caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga +cctggccatgcagcaggtctgcagggacctcagagcccagggccactgtcctgtccccag +ccccattaaacgcccagggccactgtcctgtccccagccccattaaac------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------gcccagggccactgtcctgtccccagccccattaaacgcccagg +gccactgtcctgtccccagccccattaaacgcccagggccactgt--------------- +----------------------cctgtccccagccccattaaacgcccagggccactgtc +ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc +ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc +gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct +>p:HG002_2_chr20:63371965-63372792 +ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag +catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca +caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga +cctggccatgcagcaggtctgcagggacctcagagcccagggccactgtcctgtccccag +ccccattaaacgcccagggccactgtcctgtccccagccccattaaac------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------gcccagggccactgtcctgtccccagccccattaaacgcccagg +gccactgtcctgtccccagccccattaaacgcccagggccactgtcctgtccccagcccc +attaaacgcccagggccactggcctgtccccagccccattaaacgcccagggccactgtc +ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc +ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc +gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct +>ref_chr20:63371965-63372792 +ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag +catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca +caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga +cctggccatgcagcaggtctgcagggacctcagagcccagggccactgtcctgtccccag +ccccattaaacgcccagggccactgtcctgtccccagccccattaaacgcccagggccac +tgtcctgtccccagccccattaaacgcccagggccactgtcctgtccccagccccattaa +acgcccagggccactgtcctgtccccagccccattaaacgcccagggccactgtcctgtc +cccagccccattaaatgcccagggccactgtcctgtccccagccccattaaacgcccagg +gccactgtcctgtccccagccccattaaacgcccagggccactgtcctgtccccagcccc +attaaacgcccagggccactgtcctgtccccagccccattaaacgcccagggccactgtc +ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc +ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc +gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa new file mode 100644 index 00000000..ed4a8e48 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa @@ -0,0 +1,70 @@ +>syndip_1_chr20:34235650-34236314 +agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca +ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga +gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaggaaggaaggaaggaaggaaggaaggaaggaaggaaggaagggaaggaag +gaaggaaggaaggaagcgggagggagggaagaaggaaggaaggaggagggagggaagaat +gaaggaaggaaggaaggagggaaggaaggaaggaagcgggagggagggaagaaggaagga +aggaaggaaggagggagggagggaagaaggaaggaggagggagggagggaagcaggaaag +aaggaaggaaggagaaagagagaa--agagagagaagaagaaagagaaagaaacagaaaa +ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa +aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa +agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag +gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat +cccagcactttgggaggccgaagcgggc +>syndip_2_chr20:34235650-34236314 +agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca +ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga +gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaggaaggaaggaaggaaggaaggaaggaaggaaggaaggaagggaaggaag +gaaggaaggaaggaagcgggagggagggaagaaggaaggaaggaggagggagggaagaat +gaaggaaggaaggaaggagggaaggaaggaaggaagcgggagggagggaagaaggaagga +aggaaggaaggagggagggagggaagaaggaaggaggagggagggagggaagcaggaaag +aaggaaggaaggagaaagagagaa--agagagagaagaagaaagagaaagaaacagaaaa +ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa +aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa +agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag +gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat +cccagcactttgggaggccgaagcgggc +>p:HG002_1_chr20:34235650-34236314 +agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca +ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga +gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaggaaggaaggaaggaaggaaggaaggaaggaaggaaggaagggaaggaag +gaaggaaggaaggaagcgggagggagggaagaaggaaggaaggaggagggagggaagaat +gaaggaaggaaggaaggagggaaggaaggaaggaagcgggagggagggaagaaggaagga +aggaaggaaggagggagggagggaagaaggaaggaggagggagggagggaagcaggaaag +aaggaaggaaggagaaagagagaa--agagagagaagaagaaagagaaagaaacagaaaa +ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa +aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa +agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag +gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat +cccagcactttgggaggccgaagcaggc +>p:HG002_2_chr20:34235650-34236314 +agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca +ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga +gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaggaaggaaggaaggaaggaaggaaggaaggaaggaaggaagggaaggaag +gaaggaaggaaggaagcgggagggagggaagaaggaaggaaggaggagggagggaagaat +gaaggaaggaaggaaggagggaaggaaggaaggaagcgggagggagggaagaaggaagga +aggaaggaaggagggagggagggaagaaggaaggaggagggagggagggaagcaggaaag +aaggaaggaaggagaaagagagaa--agagagagaagaagaaagagaaagaaacagaaaa +ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa +aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa +agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag +gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat +cccagcactttgggaggccgaagcaggc +>ref_chr20:34235650-34236314 +agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca +ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga +gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag +aaagaaagaaagaaggaaggaaggaaggaaggaaggaaaggaaggaaggaaggaaggaag +gaaggaagga-------------------------------------------------- +-----aaggaaggaaggaaggaaggaaggaaggaagcgggagggagggaagaaggaagga +aggaaggaaggagggagggagggaaga--------------------------aggaaag +aaggaaggaaggagaaagagagaaagagagagagaagaagaaagagaaagaaacagaaaa +ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggaaggagaaag--aa +aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa +agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag +gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat +cccagcactttgggaggccgaagcgggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa new file mode 100644 index 00000000..0c0de308 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa @@ -0,0 +1,145 @@ +>syndip_1_chr20:61783303-61784940 +ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt +tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc +tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat +gtcctgtgcccccgaaagaaatgtaatcccagttccttgggatagttctcaaggccctcc +gatatcctgtgcccctgggagaaatgtaagcccagttcctcaggacagttctcgaggccc +tcagatgtcctgtgctcccaggagaatataagcctagttcctcgggacagttctcgaggc +cctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagttctccg +ggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggacagttct +cgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcctcgggacagt +tctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagttccttgggac +agttctcaaggccctccgatatcctgtgcccccaggagaatgtaagcccagttcctcggg +acagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcccagttccttg +ggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcc +tcgggacagttctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagt +tccttgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgtaagccca +gttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcc +cagttccttgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaa +tcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgt +aatcccagttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaa +tgtaatcccagttcctcgggacagttctccaggccctccgatatcctgtgcccccaggag +aatgtaagcccagttcctcgggacagttctcgaggccctcagatgtcctgtgcccccaag +agaaatgtaatcccagttcctcgggacagttctcaaggccctccgatatcctgtgccccc +aggagaatgtaag----------------------------------------------- +----------------cccagttcctcgggacagttctccaggccctccgatatcctgtg +cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg +tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg +agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc +ctccttcctggcctccag +>syndip_2_chr20:61783303-61784940 +ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt +tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc +tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat +gtcctgtgcccccgaaagaaatgtaatcccagttccttgggatagttctcaaggccctcc +gatatcctgtgcccctgggagaaatgtaagcccagttcctcaggacagttctcgaggccc +tcagatgtcctgtgctcccaggagaatataagcctagttcctcgggacagttctcgaggc +cctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagttctccg +ggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggacagttct +cgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagt +tctcgaggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggac +agttctcaaggccctccgatatcctgtgcccccaggagaatgtaagcccagttcctc--- +-----------------------------------------------------------g +ggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcc +tcgggacagttctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagt +tccttgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgtaagccca +gttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcc +cagttccttgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaa +tcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgt +aagcccagttcctcgggacagttctcgaggccttcaggtgtcttgtgcccccaagagaaa +tgtaatcccagttcctcggaacagttctccaggccctcagatgtcctgtgcccccaagag +a----------------------------------------------------------- +------------------------------------------------------------ +-----aatgtaat----------------------------------------------- +----------------cccagttcctcgggacagttctccaggccctccgatatcctgtg +cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg +tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg +agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc +ctccttcctggcctccag +>p:HG002_1_chr20:61783303-61784940 +ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt +tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc +tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat +gtcctgtgcccccgaaagaaatgtaatcccagttccttgggatagttctcaaggccctcc +gatatcctgtgcccctgggagaaatgtaagcccagttcctcaggacagttctcgaggccc +tcagatgtcctgtgctcccaggagaatataagcctagttcctcgggacagttctcgaggc +cctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagttctccg +ggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggacagttct +cgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagt +tctcgaggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggac +agttctcaaggccctccgatatcctgtgcccccaggagaatgtaagcccagttcctc--- +-----------------------------------------------------------g +ggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcc +tcgggacagttctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagt +tccttgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgtaagccca +gttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcc +cagttccttgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaa +tcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgt +aagcccagttcctcgggacagttctcgaggccttcaggtgtcttgtgcccccaagagaaa +tgtaatcccagttcctcggaacagttctccaggccctcagatgtcctgtgcccccaagag +a----------------------------------------------------------- +------------------------------------------------------------ +-----aatgtaat----------------------------------------------- +----------------cccagttcctcgggacagttctccaggccctccgatatcctgtg +cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg +tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg +agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc +ctccttcctggcctccag +>p:HG002_2_chr20:61783303-61784940 +ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt +tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc +tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat +gtcctgtgcccccgaaagaaatgtaatcccagttccttgggatagttctcaaggccctcc +gatatcctgtgcccctgggagaaatgtaagcccagttcctcaggacagttctcgaggccc +tcagatgtcctgtgctcccaggagaatataagcctagttcctcgggacagttctcgaggc +cctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagttctccg +ggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggacagttct +cgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcctcgggacagt +tctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagttccttgggac +agttctcaaggccctccgatatcctgtgcccccaggagaatgtaagcccagttcctcggg +acagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcccagttccttg +ggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcc +tcgggacagttctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagt +tccttgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgtaagccca +gttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcc +cagttccttgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaa +tcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgt +aatcccagttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaa +tgtaatcccagttcctcgggacagttctccaggccctccgatatcctgtgcccccaggag +aatgtaagcccagttcctcgggacagttctcgaggccctcagatgtcctgtgcccccaag +agaaatgtaatcccagttcctcgggacagttctcaaggccctccgatatcctgtgccccc +aggagaatgtaag----------------------------------------------- +----------------cccagttcctcgggacagttctccaggccctccgatatcctgtg +cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg +tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg +agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc +ctccttcctggcctccag +>ref_chr20:61783303-61784940 +ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt +tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc +tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat +gtcctgtgcccccgaaagaaatgtaatcccagttccttgggatagttctcaaggccctcc +gatatcctgtgcccctgggagaaatgtaagcccagttcctcaggacagttctcgaggccc +tcagatgtcctgtgctcccaggagaatataagcctagttcctcgggacagttctcgaggc +cctcaggtgtcttgtgcccccaagagaaatgtaatcccagttccttgggacagttctccg +ggccctcagatgtcctgtgcccccgggagaaatgtaagcctagttcctcgggacagttct +cgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcctcgggacagt +tctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagttccttgggac +agttctcaaggccctccgatatcctgtgcccccaggagaatgtaagcccagttcctcggg +acagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcccagttccttg +ggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaatcccagttcc +tcgggacagttctcaaggccctcagatgtcctgtgcccccaagagaaatgtaatcccagt +tccttgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgtaagccca +gttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaggagaatgtaagcc +cagttccttgggacagttctcgaggccctcaggtgtcttgtgcccccaagagaaatgtaa +tcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggagaatgt +aatcccagttcctcgggacagttctcgaggccctcagatgtcctgtgcccccaagagaaa +tgtaatcccagttcctcgggacagttctcaaggccctccgatatcctgtgcccccaggag +aatgtaagcccagttcctcgggacagttctcgaggccctcaggtgtcttgtgcccccaag +agaaatgtaatcccagttcctcgggacagttctccaggccctccgatatcctgtgccccc +aggagaatgtaagcccagttcctcgggacagttctcgaggccctcagatgtcctgtgccc +ccaagagaaatgtaatcccagttcctcgggacagttctcaaggccctccgatatcctgtg +cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg +tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg +agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc +ctccttcctggcctccag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa new file mode 100644 index 00000000..eda44657 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa @@ -0,0 +1,350 @@ +>syndip_1_chr20:38462831-38464884 +caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc +tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca +agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat +ataaatatatatttatatatatttatatataatatattatatataaatatatttatatat +aatatattatatataaatatattatatataaaatattatatataaatatatatatttata +tatataaatatatataaatatatatatatataaaaataccagcacatcagtatcaggaat +aaatgggctgggcagtatatatatatatatatatgtatatatatatactgtttttcatat +atatatactgtcttatatatatatatactgttttacatatatatactgtttcatatatat +aatgttttatatatatactgttatatatattatatgtactgttatatattttttatatat +atactgttatatatattttttatatatactgttatatattatatatatactgttatatat +atattttacatatatactgttatatatatattttatatatatatactgttatatatatat +attttatatatgtatactgttatatatttatattttttatatgtatactgttatatatat +attatatgtatactgttatatatatattttatatgtatactgttatatatatatatttca +tatatatatactgttatatatattttatatatatatactgtcatatatatattttatata +tatactgtcatatatatattttatatatatatactgtcatatatatattttatatatata +tatactgtcatatatatattttatatatatatactgtcatatatattttatatatatact +gtcatatatatattttatatatatactgtcatatatatattttatatatatactgtcata +tatatattt--tatatatatactgtcatatatatattttatatatatatactgtcatata +tatattttatatatatatactgtcatatatatattttatatatatactgtcatatgtata +ttttatatatatactgtcatatgtatattt------------------------------ +-tatatatatactgttatatgtatattttatatatatatactgttatatatatattttta +tatatatactgttatatatatttttttatatatatactgttatatatatttttatatata +tactgttatatatatattttatatatatactgttatatatatattttatatatactgtta +tatatatatattttatatatatactgttatatataatttttttcatatatatactgttat +atatatattttatatatatatactgttatatatatattttatatatatactgttatatat +atatattttatatatatactgttattttttatatatatactgttatatatatattttata +tatatactgttatatatatattatctatatactgttatgtgtatattttatctatatact +gttatgtgtatattttatctatatactgttatgtgtatattttacctatatacttttatg +tgtatattttatatatatactgttatgtgtatattttatatatatactgttatgtgtata +ttttttatatatactgttatgtgtatattttatatatatatactgttatgtgtatatttt +atatatatatactgttatgtgtatattttatatatatactgttatgtgtatattttatat +atatactgttatatatagttcatatatatactgttatatgtatagttcatatatatactg +ttatatatatagttcatatgtatactgttatatatatagttcatatatatactgttatat +atatagttcatatatatactgttatatatatatagttcatatatatactgttatatatag +ttcatatatatactgttatatatatagttcatatatatactgttatatatatagttcata +tatatactgttatatatagttcatatatatatactgttctatatatagttcatatatata +tactgttatatatatagttcatatatatatactgttatatatagttcatatatatatagt +tcatatatatactgttatatatatagttcatatatatactgttatatacatagttcatat +atatactgttatatatatagttcatatatatatactgttatatatatagttcatatatat +actgttgtatatagttcatatatatactgttatatatatagttcatatattactgttata +tatatagttcatatatatactgttatatatatatagttcatatatatactgttatatata +gtttatatatatatactgttatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatactgttatatatatagtttat +atatatactgttatatatatagtttatatatatactgttatatatatagtttatatatat +actgttatatatatagtttatatatatactgttatatatatagtttatatatatactgtt +atatatatagtttatatatatactgttatatatatagtttatatatatatactgt--tat +atatagtttatatatactgttatatatatagtttatatactgttatatatagtttatata +tatatactgttatatatagtttatatatatatactgttatatatatatagtttatatata +tatactgttatatatatatagtttatatatatactgttatatatatatagtttatatata +tactgttgtatatatagtttatatatatactgttatatatatatttgtgtatatatactg +ttatataaatatatttgtgtatatatactgttatatatatattttatgtatatatactgt +tatatatatattttatgtatatatactgttatatatatattttatgtatatatactgtta +tatatatattttatgtatatatactgttatatatatattttatgtatatatactgttata +tatattttatgtatatatactgttatatatattttatgtatatatactgttatatatata +ttttatgtatatatactgttatatctgtatattttatgtttatatactgttatatctgta +tattttatgtatatatactgttatatctgtatattttatgtatatactgttatatatata +ctgttatatatatactgatatatatactgttatatatatactgatatatatatactgtta +tatatatactgttatatatatatactgttacatatacatatatatactgttatatatata +ctgttatatatattgttatatatatacacatatatatatac--atatatatacatatata +tatacatatatatatacatatatatatacacgtatatatatacatatatatatacacata +tatatatatacacacacatatatatatatatatatactggctgtgtgtggtggtgcacac +ctgtaatcccagcactttgggaggccaaggcaggctgatggcttgagctcaggagttcaa +gaccagctaagcaacatactgagatcccatctctaaaaaaaatacaaaaaggccaggcac +agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc +agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa +cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg +agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc +cagcctgggcaacagagcaagactctgtctccaaaaaaa- +>syndip_2_chr20:38462831-38464884 +caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc +tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca +agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat +ataaatatatatttatatatatttatatataatatattatatat-aatatatttatatat +aatatattatatataaatatattatatataaaatattatatataaatatatatatttata +tatataaatatatataaatatatatatatataaaaataccagcacatcagtatcaggaat +aaatgggctgggcag--tatatatatatatatatgtatatatatatactgtttttcatat +atatatactgtcttatatatatatatactgttttacatatatatactgtttcatatatat +aatgttttatatatatactgttatatatattatatgtactgttatatattttttatatat +atactgttatatatattttttatatatactgttatatattatatatatactgttatatat +atattttacatatatactgttatatatatattttatatatatatactgttatatatatat +attttatatatgtatactgttatatatttatattttttatatgtatactgttatatatat +attatatgtatactgttatatatatattttatatgtatactgttatatatatatatttca +tatatatatactgttatatatattttatatatatatactgtcatatatatattttatata +tatactgtcatatatatattttatatatatatactgtcatatatatattt--tatatata +tatactgtcatatatatattttatatatatatactgtcatatatattttatatatatact +gtcatatatatattttatatatatactgtcatatatatattttatatatatactgtcata +tatatattttatatatatatactgtcatatatatattttatatatatatactgtcatata +tatattt--tatatatatactgtcatatgtatattttatatatatactgtcatatgtata +ttttatatatatactgttatatgtatattttatatatatatactgttatatatatatttt +atatatatatactgttatatatatattttatatatatatactgttatatatatattttta +tatatatactgttatatatatttttttatatatatactgttatatatatttttatatata +tactgttatatatatattttatatatatactgttatatatatattttatatatactgtta +tatatatatattttatatatatactgttatatataatttttttcatatatatactgttat +atatatattttatatatatatactgttatatatatattttatatatatactgttatatat +atatattttatatatatactgttattttttatatatatactgttatatatatattttata +tatatactgttatatatatattatctatatactgttatgtgtatattttatctatatact +gttatgtgtatattttatctatatactgttatgtgtatattttacctatatacttttatg +tgtatattttatatatatactgttatgtgtatattttatatatatactgttatgtgtata +ttttttatatatactgttatgtgtatattttatatatatatactgttatgtgtatatttt +atatatatatactgttatgtgtatattttatatatatactgttatgtgtatattttatat +atatactgttatatatagttcatatatatactgttatatgtatagttcatatatatactg +ttatatatatagttcatatgtatactgttatatatatagttcatatatatactgttatat +atatagttcatatatatactgttatatatatatagttcatatatatactgttatatatag +ttcatatatatactgttatatatatagttcatatatatactgttatatatatagttcata +tatatactgttatatatagttcatatatatatactgttctatatatagttcatatatata +tactgttatatatatagttcatatatatatactgttatatatagttcatatatatatagt +tcatatatatactgttatatatatagttcatatatatactgttatatacatagttcatat +atatactgttatatatatagttcatatatatatactgttatatatatagttcatatatat +actgttgtatatagttcatatatatactgttatatatatagttcatatattactgttata +tatatagttcatatatatactgttatatatatatagttcatatatatactgttatatata +gtttatatatatatactgttatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgt--tatatatatagtttatatatactgtta-------------- +-----------tatatatatagtttatatatatactgttatatatatagtttatatatat +actgttatatatatagtttatatatatactgttatatatatagtttatatatatactgtt +atatatatagtttatatatatactgttatatatatagtttatatatatatactgttatat +atatagtttataaatactgttatatatatagtttatatactgttatatatagtttatata +tatatactgttatatatagtttatatatatatactgttatatatatatagtttatatata +tatactgttatatatatatagtttatatatatactgttatatatatatagtttatatata +tactgttgtatatatagtttatatatatactgttatatatatatttgtgtatatatactg +ttatataaatatatttgtgtatatatactgttatatatatattttatgtatatatactgt +tatatatatattttatgtatatatactgttatatatatattttatgtatatatactgtta +tatatatattttatgtatatatactgtta---------------------------tata +tatattttatgtatatatactgttatatatattttatgtatatatactgttatatatata +ttttatgtatatatactgttatatctgtatattttatgtttatatactgttatatctgta +tattttatgtatatatactgttatatctgtatattttatgtatatactgttatatatata +ctgttatatatatactgatatatatactgttatatatatactgatatatatatactgtta +tatatatactgttatatatatatactgttacatatacatatatatactgttatatatata +ctgttatatatattgttatatatatacacat------------atatatatacatatata +tatacatatatatatacatatatatatacacgtatatatatacatatatatatacacata +tatatatatacacacacatatatatatatatatatactggctgtgtgtggtggtgcacac +ctgtaatcccagcactttgggaggccaaggcaggctgatggcttgagctcaggagttcaa +gaccagctaagcaacatactgagatcccatctctaaaaaaaatacaaaaaggccaggcac +agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc +agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa +cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg +agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc +cagcctgggcaacagagcaagactctgtctccaaaaaaaa +>p:HG002_1_chr20:38462831-38464884 +caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc +tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca +agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat +ataaatatatatttatatatatttatatataatatattatatat-aatatatttatatat +aatatattatatataaatatattatatataaaatattatatataaatatatatatttata +tatataaatatatataaatatatatatatataaaaataccagcacatcagtatcaggaat +aaatgggctgggcag--tatatatatatatatatgtatatatatatactgtttttcatat +atatatactgtcttatatatatatatactgttttacatatatatactgtttcatatatat +aatgttttatatatatactgttatatatattatatgtactgttatatattttttatatat +atactgttatatatattttttatatatactgttatatattatatatatactgttatatat +atattttacatatatactgttatatatatattttatatatatatactgttatatatatat +attttatatatgtatactgttatatatttatattttttatatgtatactgttatatatat +attatatgtatactgttatatatatattttatatgtatactgttatatatatatatttca +tatatatatactgttatatatattttatatatatatactgtcatatatatattttatata +tatactgtcatatatatattttatatatatatactgtcatatatatattt--tatatata +tatactgtcatatatatattttatatatatatactgtcatatatattttatatatatact +gtcatatatatattttatatatatactgtcatatatatattttatatatatactgtcata +tatatattttatatatatatactgtcatatatatattttatatatatatactgtcatata +tatattt--tatatatatactgtcatatgtatattttatatatatactgtcatatgtata +ttttatatatatactgttatatgtatattttatatatatatactgttatatatatatttt +atatatatatactgttatatatatattttatatatatatactgttatatatatattttta +tatatatactgttatatatatttttttatatatatactgttatatatatttttatatata +tactgttatatatatattttatatatatactgttatatatatattttatatatactgtta +tatatatatattttatatatatactgttatatataatttttttcatatatatactgttat +atatatattttatatatatatactgttatatatatattttatatatatactgttatatat +atatattttatatatatactgttattttttatatatatactgttatatatatattttata +tatatactgttatatatatattatctatatactgttatgtgtatattttatctatatact +gttatgtgtatattttatctatatactgttatgtgtatattttacctatatacttttatg +tgtatattttatatatatactgttatgtgtatattttatatatatactgttatgtgtata +ttttttatatatactgttatgtgtatattttatatatatatactgttatgtgtatatttt +atatatatatactgttatgtgtatattttatatatatactgttatgtgtatattttatat +atatactgttatatatagttcatatatatactgttatatgtatagttcatatatatactg +ttatatatatagttcatatgtatactgttatatatatagttcatatatatactgttatat +atatagttcatatatatactgttatatatatatagttcatatatatactgttatatatag +ttcatatatatactgttatatatatagttcatatatatactgttatatatatagttcata +tatatactgttatatatagttcatatatatatactgttctatatatagttcatatatata +tactgttatatatatagttcatatatatatactgttatatatagttcatatatatatagt +tcatatatatactgttatatatatagttcatatatatactgttatatacatagttcatat +atatactgttatatatatagttcatatatatatactgttatatatatagttcatatatat +actgttgtatatagttcatatatatactgttatatatatagttcatatattactgttata +tatatagttcatatatatactgttatatatatatagttcatatatatactgttatatata +gtttatatatatatactgttatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgt--tatatatatagtttatatatactgtta-------------- +-----------tatatatatagtttatatatatactgttatatatatagtttatatatat +actgttatatatatagtttatatatatactgttatatatatagtttatatatatactgtt +atatatatagtttatatatatactgttatatatatagtttatatatatatactgttatat +atatagtttataaatactgttatatatatagtttatatactgttatatatagtttatata +tatatactgttatatatagtttatatatatatactgttatatatatatagtttatatata +tatactgttatatatatatagtttatatatatactgttatatatatatagtttatatata +tactgttgtatatatagtttatatatatactgttatatatatatttgtgtatatatactg +ttatataaatatatttgtgtatatatactgttatatatatattttatgtatatatactgt +tatatatatattttatgtatatatactgttatatatatattttatgtatatatactgtta +tatatatattttatgtatatatactgtta---------------------------tata +tatattttatgtatatatactgttatatatattttatgtatatatactgttatatatata +ttttatgtatatatactgttatatctgtatattttatgtttatatactgttatatctgta +tattttatgtatatatactgttatatctgtatattttatgtatatactgttatatatata +ctgttatatatatactgatatatatactgttatatatatactgatatatatatactgtta +tatatatactgttatatatatatactgttacatatacatatatatactgttatatatata +ctgttatatatattgttatatatatacacat------------atatatatacatatata +tatacatatatatatacatatatatatacacgtatatatatacatatatatatacacata +tatatatatacacacacatatatatatatatatatactggctgtgtgtggtggtgcacac +ctgtaatcccagcactttgggaggccaaggcaggctgatggcttgagctcaggagttcaa +gaccagctaagcaacatactgagatcccatctctaaaaaaaatacaaaaaggccaggcac +agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc +agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa +cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg +agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc +cagcctgggcaacagagcaagactctgtctccaaaaaaaa +>p:HG002_2_chr20:38462831-38464884 +caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc +tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca +agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat +ataaatatatatttatatatatttatatataatatattatatataaatatatttatatat +aatatattatatataaatatattatatataaaatattatatataaatatatatatttata +tatataaatatatataaatatatatatatataaaaataccagcacatcagtatcaggaat +aaatgggctgggcagtatatatatatatatatatgtatatatatatactgtttttcatat +atatatactgtcttatatatatatatactgttttacatatatatactgtttcatatatat +aatgttttatatatatactgttatatatattatatgtactgttatatattttttatatat +atactgttatatatattttttatatatactgttatatattatatatatactgttatatat +atattttacatatatactgttatatatatattttatatatatatactgttatatatatat +attttatatatgtatactgttatatatttatattttttatatgtatactgttatatatat +attatatgtatactgttatatatatattttatatgtatactgttatatatatatatttca +tatatatatactgttatatatattttatatatatatactgtcatatatatattttatata +tatactgtcatatatatattttatatatatatactgtcatatatatattttatatatata +tatactgtcatatatatattttatatatatatactgtcatatatattttatatatatact +gtcatatatatattttatatatatactgtcatatatatattttatatatatactgtcata +tatatattt--tatatatatactgtcatatatatattttatatatatatactgtcatata +tatattttatatatatatactgtcatatatatattttatatatatactgtcatatgtata +ttttatatatatactgtcatatgtatattt------------------------------ +-tatatatatactgttatatgtatattttatatatatatactgttatatatatattttta +tatatatactgttatatatatttttttatatatatactgttatatatatttttatatata +tactgttatatatatattttatatatatactgttatatatatattttatatatactgtta +tatatatatattttatatatatactgttatatataatttttttcatatatatactgttat +atatatattttatatatatatactgttatatatatattttatatatatactgttatatat +atatattttatatatatactgttattttttatatatatactgttatatatatattttata +tatatactgttatatatatattatctatatactgttatgtgtatattttatctatatact +gttatgtgtatattttatctatatactgttatgtgtatattttacctatatacttttatg +tgtatattttatatatatactgttatgtgtatattttatatatatactgttatgtgtata +ttttttatatatactgttatgtgtatattttatatatatatactgttatgtgtatatttt +atatatatatactgttatgtgtatattttatatatatactgttatgtgtatattttatat +atatactgttatatatagttcatatatatactgttatatgtatagttcatatatatactg +ttatatatatagttcatatgtatactgttatatatatagttcatatatatactgttatat +atatagttcatatatatactgttatatatatatagttcatatatatactgttatatatag +ttcatatatatactgttatatatatagttcatatatatactgttatatatatagttcata +tatatactgttatatatagttcatatatatatactgttctatatatagttcatatatata +tactgttatatatatagttcatatatatatactgttatatatagttcatatatatatagt +tcatatatatactgttatatatatagttcatatatatactgttatatacatagttcatat +atatactgttatatatatagttcatatatatatactgttatatatatagttcatatatat +actgttgtatatagttcatatatatactgttatatatatagttcatatattactgttata +tatatagttcatatatatactgttatatatatatagttcatatatatactgttatatata +gtttatatatatatactgttatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatatatactgttatatatatagt +ttatatatatatactgttatatatatatagtttatatatactgttatatatatagtttat +atatatactgttatatatatagtttatatatatactgttatatatatagtttatatatat +actgttatatatatagtttatatatatactgttatatatatagtttatatatatactgtt +atatatatagtttatatatatactgttatatatatagtttatatatatatactgt--tat +atatagtttatatatactgttatatatatagtttatatactgttatatatagtttatata +tatatactgttatatatagtttatatatatatactgttatatatatatagtttatatata +tatactgttatatatatatagtttatatatatactgttatatatatatagtttatatata +tactgttgtatatatagtttatatatatactgttatatatatatttgtgtatatatactg +ttatataaatatatttgtgtatatatactgttatatatatattttatgtatatatactgt +tatatatatattttatgtatatatactgttatatatatattttatgtatatatactgtta +tatatatattttatgtatatatactgttatatatatattttatgtatatatactgttata +tatattttatgtatatatactgttatatatattttatgtatatatactgttatatatata +ttttatgtatatatactgttatatctgtatattttatgtttatatactgttatatctgta +tattttatgtatatatactgttatatctgtatattttatgtatatactgttatatatata +ctgttatatatatactgatatatatactgttatatatatactgatatatatatactgtta +tatatatactgttatatatatatactgttacatatacatatatatactgttatatatata +ctgttatatatattgttatatatatacacatatatatatac--atatatatacatatata +tatacatatatatatacatatatatatacacgtatatatatacatatatatatacacata +tatatatatacacacacatatatatatatatatatactggctgtgtgtggtggtgcacac +ctgtaatcccagcactttgggaggccaaggcaggctgatggcttgagctcaggagttcaa +gaccagctaagcaacatactgagatcccatctctaaaaaaaatacaaaaaggccaggcac +agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc +agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa +cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg +agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc +cagcctgggcaacagagcaagactctgtctccaaaaaaa- +>ref_chr20:38462831-38464884 +caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc +tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca +agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat +ataaatatatatttatatatatttatatataatatattatatataaatatatttatatat +aatatattatatataaatatattatatataaaatattatatataaatatatatatttata +tatataaatatatataaatatatatatatataaaaataccagcacatcagtatcaggaat +aaatgggctgggcagtatatatatatatatatatgtatatatatatactgtttttcatat +atatatactgtcttatatatatatatactgttttacatatatatactgtttcatatatat +aatgttttatatatatactgttatatatattatatgtactgttatatattttttatatat +atactgttatatatattttttatatatactgttatatattatatatatactgttatatat +atattttacatatatactgttatatatatattttatatatatatactgttatatatatat +attttatatatgtatactgttatatatttatattttttatatgtatactgttatatatat +attatatgtatactgttatatatatattttatatgtatactgttatatatatatatttca +tatatatatactgttatatatattttatatatatatactgtcatatatatattttatata +tatactgtcatatatatattttatatatatatactgtcatatatatattttatatatata +tatactgtcatatatatattttatatatatatactgtcatatatattttatatatatact +gtcatatatatattttatatatatactgtcatatatatattttatatatatactgtcata +tatatattt--tatatatatactgtcatatatatattttatatatatatactgtcatata +tatattttatatatatatactgtcatatatatattttatatatatactgtcatatgtata +ttttatatatatactgtcatatgtatattt------------------------------ +-tatatatatactgttatatgtatattttatatatatatactgttatatatatattttta +tatatatactgttatatatatttttttatatatatactgttatatatatttttatatata +tactgttatatatatattttatatatatactgttatatatatatttta------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------tatatactgttatatagata +ctgttatatatatactgatatatatactgttatatatatactgatatatatatactgtta +tatatatactgttatatatatatactgttacatatacatatatatactgttatatatata +ctgttatatatattgttatatatatacacatatatatatacatatatatatacatatata +tatacatatatatatacatatatatatacacgtatatatatacatatatatatacacata +tatatatatacacacacatatatatatatatatatactggctgtgtgtggtggtgcacac +ctgtaatcccagcactttgggaggccaaggcaggctgatggcttgagctcaggagttcaa +gaccagctaagcaacatactgagatcccatctctaaaaaaaatacaaaaaggccaggcac +agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc +agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa +cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg +agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc +cagcctgggcaacagagcaagactctgtctccaaaaaaa- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa new file mode 100644 index 00000000..02da97d1 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa @@ -0,0 +1,170 @@ +>syndip_1_chr20:8661811-8662758 +gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa +gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc +tcttaaaagtgtatatatatataattatataattatttattatataattatatataatta +tataattatttattatataattatatataattatataattatttattatataattatata +taa-tatacaattatttattatataattatatataattatatatttattatatttat--- +--tatacagttatataataaataattatataattatatataattatata---------ta +tataattatatataattatata-----taattatataattatatataattg--------- +----------------------tatataattatttattatataattatat---------- +-attatttattatataattatata----------------attatat------------- +-----attatttattatata---------------------------------------- +--------------------ttatataattatataattatatattatttattatata--- +--atatataattatatat-------tatttattatataattatatattatttatta---- +-----tatatattatttattatata----------------------attatatattatg +tattatataattatataattatatattatgtattatataat----------------tat +atattatgtattatataattatataattatatattatgtattatataattatatattatt +tattatataattatataattatatattatttattatataattatataattatatattatt +tattatataat---------------------tatataattatatattatttattatata +attatataattatat-attattt--------attatataattatatattatttattatat +aattatatattatttattacataaatatatattatttattacataagtatatatcactta +ttatataattgcatataattatttattatataattgcatataa----ttattatataatt +gtatatagttatttattatataattgtatatagttatttattatataattctatataatt +atttattatctaattctatattaaatataattatttattatc------------------ +--------------taattctatattatatataattatttattatataattctatattat +atataattattatataattctatatataattatttattatataattctatacataagtat +ttattatataattatgtatataattatttattatataattatgtataatatgtaattatt +tattatataattatgtataatatgtaattatttattatataattatgtataatatataat +tatttattatataattatgtataatatataattatttattatataattatgtgttatgta +taa----ttattatataattatgtataatatatatataattttttattatataattatgt +ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat +atatattattaacataatatctaaatatattaataatctaatatctataattaatataat +atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa +tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca +aaggcccacacc +>syndip_2_chr20:8661811-8662758 +gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa +gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc +tcttaaaagtgtatatatatataattatataattatttattatataatgatatataatta +tataattatttattatataatgatatataattatataattatttattatataattatata +taattatataattatttattatataattatatataattatataattatttattatataat +tatatataattatataattatttattatataattatatataattatataattatttatta +tataattatatataattatataattatttattatataattatatataattatataattat +ttattatataattatatataattatataattatttattatataattatatataattatat +aattatttattatataattatatataatatacaattatttattatataattatatataat +atacaattatttattatataattatatataattatatatttattatatttattatacagt +tatataataaatatataattatatataaatatataattatatataattatatatatataa +ttatatgtaattatatataattatataattattatataattgtatataatatataattat +ataattatatattatttattatataatatataattatataattatatattatttattata +taatatataattatataattatatattatttattatataattatatattatttattatat +atattatttattatataattatataattatatattatttattatataattatatattatt +tattatataa-tatataattatatattatttattatataa-tatataattatatattatt +tattatataattatatattatgtattatataatatataattatatattatgtattatata +attatataattatat-attatttattatataattatataattatatattatttattacat +aattatatattatttattacataagtatatattatttattacataagtatatatcactta +ttatataattgtatataattatttattatataattgcatataattatttattatataatt +gtatatagttatttattatataattctatatagttatttattatataattctatataatt +atttattatctaattctatattatatataattatttattatctaattctatattatatat +aattatttattatataattctatattatatataattatttattatataattctatattat +atataattattatataattctatatataattatttattatataattctatacataagtat +ttattatataattatgtatataattatttattatataattatgtataatatgtaattatt +tattatataattatgtataatatataattgtttattatataattatgtataatatataat +tatttattatataattatgtataatatataattatttattatataattatgtgttatgta +taattatttattatataattatgtataatatatatataattttttattatataattatgt +ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat +atatattattaacataatatctaaatatattaataatctaatatctataattaatataat +atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa +tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca +aaggcccacacc +>p:HG002_1_chr20:8661811-8662758 +gccaacctacgttaagtaattcataacttcagtattcaataaatcttcagtgaaaggcaa +gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc +tcttaaaagtgtatatatatataattatataattatttattatataatgatatataatta +tataattatttattatataatgatatataattatataattatttattatataattatata +taattatataattatttattatataattatatataattatataattatttattatataat +tatatataattatataattatttattatataattatatataattatataattatttatta +tataattatatataattatataattatttattatataattatatataattatataattat +ttattatataattatatataattatataattatttattatataattatatataattatat +aattatttattatataattatatataatatacaattatttattatataattatatataat +atacaattatttattatataattatatataattatatatttattatatttattatacagt +tatataataaatatataattatatataaatatataattatatataattatatatatataa +ttatatgtaattatatataattatataattattatataattgtatataatatataattat +ataattatatattatttattatataatatataattatataattatatattatttattata +taatatataattatataattatatattatttattatataattatatattatttattatat +atattatttattatataattatataattatatattatttattatataattatatattatt +tattatataa-tatataattatatattatttattatataa-tatataattatatattatt +tattatataattatatattatgtattatataatatataattatatattatgtattatata +attatataattatat-attatttattatataattatataattatatattatttattacat +aattatatattatttattacataagtatatattatttattacataagtatatatcactta +ttatataattgtatataattatttattatataattgcatataattatttattatataatt +gtatatagttatttattatataattctatatagttatttattatataattctatataatt +atttattatctaattctatattatatataattatttattatctaattctatattatatat +aattatttattatataattctatattatatataattatttattatataattctatattat +atataattattatataattctatatataattatttattatataattctatacataagtat +ttattatataattatgtatataattatttattatataattatgtataatatgtaattatt +tattatataattatgtataatatataattgtttattatataattatgtataatatataat +tatttattatataattatgtataatatataattatttattatataattatgtgttatgta +taattatttattatataattatgtataatatatatataattttttattatataattatgt +ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat +atatattattaacataatatctaaatatattaataatctaatatctataattaatataat +atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa +tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca +aaggcccacacc +>p:HG002_2_chr20:8661811-8662758 +gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa +gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc +tcttaaaagtgtatatatatataattatataattatttattatataattatatataatta +tataattatttattatataattatatataattatataattatttattatataattatata +taa-tatacaattatttattatataattatatataattatatatttattatatttat--- +--tatacagttatataataaataattatataattatatataattatata---------ta +tataattatatataattatata-----taattatataattatatataattg--------- +----------------------tatataattatttattatataattatat---------- +-attatttattatataattatata----------------attatat------------- +-----attatttattatata---------------------------------------- +--------------------ttatataattatataattatatattatttattatata--- +--atatataattatatat-------tatttattatataattatatattatttatta---- +-----tatatattatttattatata----------------------attatatattatg +tattatataattatataattatatattatgtattatataat----------------tat +atattatgtattatataattatataattatatattatgtattatataattatatattatt +tattatataattatataattatatattatttattatataattatataattatatattatt +tattatataat---------------------tatataattatatattatttattatata +attatataattatat-attattt--------attatataattatatattatttattatat +aattatatattatttattacataaatatatattatttattacataagtatatatcactta +ttatataattgcatataattatttattatataattgcatataa----ttattatataatt +gtatatagttatttattatataattgtatatagttatttattatataattctatataatt +atttattatctaattctatattaaatataattatttattatc------------------ +--------------taattctatattatatataattatttattatataattctatattat +atataattattatataattctatatataattatttattatataattctatacataagtat +ttattatataattatgtatataattatttattatataattatgtataatatgtaattatt +tattatataattatgtataatatgtaattatttattatataattatgtataatatataat +tatttattatataattatgtataatatataattatttattatataattatgtgttatgta +taa----ttattatataattatgtataatatatatataattttttattatataattatgt +ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat +atatattattaacataatatctaaatatattaataatctaatatctataattaatataat +atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa +tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca +aaggcccacacc +>ref_chr20:8661811-8662758 +gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa +gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc +tcttaaaag--------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------tgtata +tatatataattatataattattt--------attatataattatatataattatata--- +--------attatttattatataattatatataatatac------------aattattta +ttatataattatatataattatat--------atttattatat----ttattatacagt- +-tatataataaataattatataattatatataattatat------------------att +atttattatctaattctatattaaatataattatttattatc------------------ +--------------taattctatattatatataattatttattatataattctatattat +atataattattatataattctatatataattatttattatataattctatacataagtat +ttattatataattatgtatataattatttattatataattatgtataatatgtaattatt +tattatataattatgtataatatgtaattatttattatataattatgtataatatataat +tatttattatataattatgtataatatataattatttattatataattatgtgttatgta +taa----ttattatataattatgtataatatatatataattttttattatataattatgt +ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat +atatattattaacataatatctaaatatattaataatctaatatctataattaatataat +atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa +tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca +aaggcccacacc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa new file mode 100644 index 00000000..a3c838e8 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa @@ -0,0 +1,75 @@ +>syndip_1_chr20:57189846-57190641 +ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg +gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta +cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc +acatgccaggttcatgagaaatacacggtgtgtgtcagcacagtgaagaggccagtgagg +ctggagaagtgagccaggaaccagagaggttgagactggagatagtgagcgaggaaccgg +agggggtgaagctggagagagtgagccaggaatcagaggaggtgaggccagagagagtga +gcgaggaaccagagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggc +c----agagtgagtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccag +agggggtgaggctggagagagtgagccaggaaccagaggaggtgaggccaga----gtga +gtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggc +tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca +agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg +ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc +gtgtggggaaccaacaaggc +>syndip_2_chr20:57189846-57190641 +ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg +gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta +cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc +acatgccaggttcatgagaaatacacggtgtgtgtcagcacagtgaagaggccagtgagg +ctggagaagtgagccaggaaccagaggggttgaggctggagatagtgagcgaggaaccgg +agggggtgaagctggagagagtgagccaggaatcagaggaggtgaggccagagagagtga +gcgaggaaccagagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggc +tggagagagtgagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttg +aaggggtgaggctggagacagtgagcgaggaacca------------------------- +-----------gagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggc +tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca +agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg +ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc +gtgtggggaaccaacaaggc +>p:HG002_1_chr20:57189846-57190641 +ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg +gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta +cagggcagagtgagccatgggaggctttggggaggaaatgttcctggcagagggaatggc +acatgccaggttcatgagaaatacacggtgtgtgtcagcacagtgaagaggccagtgagg +ctggagaagtgagccaggaaccagaggggttgaggctggagatagtgagcgaggaaccgg +agggggtgaagctggagagagtgagccaggaatcagaggaggtgaggccagagagagtga +gcgaggaaccagagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggc +tggagagagtgagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttg +aaggggtgaggctggagacagtgagcgaggaacca------------------------- +-----------gagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggc +tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca +agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg +ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc +gtgtggggaaccaacaaggc +>p:HG002_2_chr20:57189846-57190641 +ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg +gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta +cagggcagagtgagccatgggaggctttggggaggaaatgttcctggcagagggaatggc +acatgccaggttcatgagaaatacacggtgtgtgtcagcacagtgaagaggccagtgagg +ctggagaagtgagccaggaaccagagaggttgagactggagatagtgagcgaggaaccgg +agggggtgaagctggagagagtgagccaggaatcagaggaggtgaggccagagagagtga +gcgaggaaccagagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggc +c----agagtgagtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccag +agggggtgaggctggagagagtgagccaggaaccagaggaggtgaggccaga----gtga +gtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggc +tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca +agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg +ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc +gtgtggggaaccaacaaggc +>ref_chr20:57189846-57190641 +ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg +gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta +cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc +acatgccaggttcatgagaaatacacggtgtgtgtcagcacagtgaagaggccagtgagg +ctggagaagtgagccaggaaccagagaggttgagactggagatagtgagcgaggaaccgg +agggggtgaagctggagagagtgagccaggaatcagaggaggtgaggccagagagagtga +gcgaggaaccagagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggc +tggagagagtgagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttg +aaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggctggagagcgtga +gtgaggagcccgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggc +tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca +agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg +ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc +gtgtggggaaccaacaaggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa new file mode 100644 index 00000000..ac205f8c --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa @@ -0,0 +1,80 @@ +>syndip_1_chr20:63559188-63560064 +tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc +aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct +gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt +ctcctgtgagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggt +caggtgggag----------------------------------------tcagtcaggg +tcaggtgggaggagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagg +gtcaggtgggaggagtcagggtcaggt---cggagtcagggtcaggtgggaggagtcagg +gtcaggtgggagtcagtcagggtcaggtgggagg-------------------------- +----------------------------------------------------agtcaggg +tcaggtgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcaga +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc +aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa +gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac +attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg +ccgctggtccaggtcgctcttggcacaggtgcggacg +>syndip_2_chr20:63559188-63560064 +tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc +aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct +gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt +ctcctgtgagggtgggagtcagatgggagtcagtcagggtcaggtggga----------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------ggagtcagggtcaggtgggaggagtcagg +gtcaggtgggagtc---------------------------------------------- +----------------------------------------------------agtcaggg +tcaggtgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcagg +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc +aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa +gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac +attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg +ccgctggtccaggtcgctcttggcacaggtgcggacg +>p:HG002_1_chr20:63559188-63560064 +tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc +aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct +gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt +ctcctgtgagggtgggagtcagatgggagtcagtcagggtcaggtggga----------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------ggagtcagggtcaggtgggaggagtcagg +gtcaggtgggagtc---------------------------------------------- +----------------------------------------------------agtcaggg +tcaggtgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcagg +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc +aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa +gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac +gttcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg +ccgctggtccaggtcgctcttggcacaggtgcggacg +>p:HG002_2_chr20:63559188-63560064 +tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc +aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct +gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt +ctcctgtgagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggt +caggtgggag----------------------------------------tcagtcaggg +tcaggtgggaggagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagg +gtcaggtgggaggagtcagggtcaggt---cggagtcagggtcaggtgggaggagtcagg +gtcaggtgggagtcagtcagggtcaggtgggagg-------------------------- +----------------------------------------------------agtcaggg +tcaggtgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcaga +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc +aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa +gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac +gttcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg +ccgctggtccaggtcgctcttggcacaggtgcggacg +>ref_chr20:63559188-63560064 +tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc +aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct +gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt +ctcctgtgagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggt +caggtgggaggagtcagggtcaggtgggagtcagtcagggtcaggtgggaggagtcaggg +tcaggtgggaggagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagg +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtgggaggagtcagg +gtcagatgggagtcagtcagagtcaggtgggaggagtcagggtcaggtgggaggagtcag +ggtcaggtcggagtcagggtcaggtgggaggagtcagggtcaggtgggagtcagtcaggg +tcaggtgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcaga +gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc +aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa +gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac +attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg +ccgctggtccaggtcgctcttggcacaggtgcggacg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa new file mode 100644 index 00000000..3c5fc164 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa @@ -0,0 +1,345 @@ +>syndip_1_chr20:64131704-64134056 +aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc +acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc +tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag +accccctcactgtcaactctcatgtcctatagagaccccctcactgtcaactctcatgtc +ctatgagaccccctcactgtccactctcatggcctgtagagaccccctcactgtccaccc +tcatgtcctata--------------------------------gagaccccctcactgt +ccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctat-gagacccc +ctcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatg +agaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccactctcatg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctatagagaccccctcactgtccaccctcatggcctgtagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccactctcatgtcctatagaga +ccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcct +atagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccct +catgtcctatgagaccccctcactgtccactctcatgtcctat-gagaccccctcactgt +ccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcgtgtcctata +gagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccactctcatg +tcctatgagaccccctcactgtccactctcatggcctgtagagaccccctcactgtccac +cctcatgtcctgtagagaccccctcactgtccaccctcgtgtcctatagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccaccctcatgt------------------------------------- +------------------------------------------------------------ +----cctatagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgt +ccaccctcatggcctgtagagaccccctcactgtccaccctcatggcctgtagagacccc +ctcactgtccactctcatgtcctatagagacccctcactgtccaccctcatggcctatag +agaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +tctcatgtcctatgagaccccctcactgtccactctcatggcctgtagagaccccctcac +tgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcctgtagagacc +ccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtccta +tagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctca +tgtcctatagagaccccctcactgtccaccctcatggc---------------------- +------------------------------------------------------------ +---------------------------------------------------ctgtagaga +ccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctat----------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +gagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcat +gtcctatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtcc +accctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctc +actgtccaccctcgtgtcctatagagaccccctcactgtcaactctcatgtcctatagag +accccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactg +tccaccctcgtgtcctatagagaccccctcactgtcaactctcatgtcctatagagaccc +cctcactgtccaccctcgtgtcctatagagaccccctcactctccaccctcatgtcctat +agagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctca +tgtcctat-gagaccccctcactgtccaccctcatggcctatagagaccccctcactgtc +caccctcatgtcctat-gagaccccctcactgtccaccctcatgtcctatagagaccccc +tcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatag +agaccccctcactctccaccctcatggcctatagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactctccaccctcatggcctatagagaccccctcactgtcca +ccctcatggcctatagagaccccctcactgtccactctcatgtcctatgagaccccctca +ctgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccactctcatgtcctat-gagaccccctcactgtccaccctcatggcct +gtagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctc +gtgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtc +caccctcatgtcctatagagaccccctcactgtccactctcatgtcctatgagaccccct +cactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatggcctatagag +accccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagaccccctcactgtccactctcatggcctgtagagaccccctcact +gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt +gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc +>syndip_2_chr20:64131704-64134056 +aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc +acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc +tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag +accccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatggc +ctatgagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagacccctcactgtccaccctcatggcctatgagaccccctcactgt +ccactctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagagacccc +ctcactgtccaccctcatgtcctatagaga-cccctcactgtccaccctcatggcctatg +agaccccctcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctatagagaccccctcactgtccaccctcatgtcctat-gagaccccctca +ctgtccactctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagaga +ccccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctcgtgtcct +atagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccct +catgtcctatgagaccccctcactgtccaccctcatgtcctatagagaccccctcactgt +ccaccctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctata +gagaccccctcactgtccaccctcgtgtccta---------------------------- +------------------------------------------------------------ +------------tagagaccccctcactgtccaccctcatggcctatagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccaccctcatgg------------------------------------- +------------------------------------------------------------ +----cctatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgt +ccaccctcatgtcctat------------------------------------------- +-----------------------------------------------------------g +agaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctata---------------------------------------------- +--------------------gagaccccctcactgtccaccctcatggcctatagagacc +ccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatggccta +tagagaccccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctca +tggcctatagagaccccctcactgtccaccctcatggcctatgagaccccctcactgtcc +accctcgtgtcctatgagaccccctcactgtccaccctcatggcctatagagaccccctc +actgtccaccctcatggcctatgagaccccctcactgtccaccctcatggcctgtagaga +ccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccc +tcatgtcctatgagaccccctcactgtccactctcatgtcctatagagacccctcactgt +ccaccctcatggcctatagagaccccctcactgtccaccctcatggcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctata +gagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtccaccctcat +ggcctgtagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtcc +accctcatgtcctatagagaccccctcactgtccaccctcatggcctata---------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccc +tcatggcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactg +tccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatagagaccc +cctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcgtgtcctat +agagaccccctcactgtccaccctcatggcctatagagaccccctcactgtccaccctca +tgtcctatagagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtc +caccctcatggcctatagagaccccctcactgtccaccctcatgtcctat-gagaccccc +tcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatag +agaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatg +tcctatagagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtcca +ccctcatggcctatagagaccccctcactgtccaccctcatggcctatgagaccccctca +ctgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctatagaga +cccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcgtgtcct +atagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctc +atgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtc +caccctcatgtcctatagagaccccctcactgtccaccctcatgtcctata--------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcgtgtcctatagagaccccctcactgtccacccatatggcctgtagagaccccctcact +gtccaccctcatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt +gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc +>p:HG002_1_chr20:64131704-64134056 +aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc +acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc +tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag +accccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatggc +ctatgagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagacccctcactgtccaccctcatggcctatgagaccccctcactgt +ccactctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagagacccc +ctcactgtccaccctcatgtcctatagaga-cccctcactgtccaccctcatggcctatg +agaccccctcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctatagagaccccctcactgtccaccctcatgtcctat-gagaccccctca +ctgtccactctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagaga +ccccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctcgtgtcct +atagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccct +catgtcctatgagaccccctcactgtccaccctcatgtcctatagagaccccctcactgt +ccaccctcatgtcctatagagaccccctcactgtccaccctcgtgtcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctata +gagaccccctcactgtccaccctcgtgtccta---------------------------- +------------------------------------------------------------ +------------tagagaccccctcactgtccaccctcatggcctatagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccaccctcatgg------------------------------------- +------------------------------------------------------------ +----cctatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgt +ccaccctcatgtcctat------------------------------------------- +-----------------------------------------------------------g +agaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctata---------------------------------------------- +--------------------gagaccccctcactgtccaccctcatggcctatagagacc +ccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatggccta +tagagaccccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctca +tggcctatagagaccccctcactgtccaccctcatggcctatgagaccccctcactgtcc +accctcgtgtcctatgagaccccctcactgtccaccctcatggcctatagagaccccctc +actgtccaccctcatggcctatgagaccccctcactgtccaccctcatggcctgtagaga +ccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccc +tcatgtcctatgagaccccctcactgtccactctcatgtcctatagagacccctcactgt +ccaccctcatggcctatagagaccccctcactgtccaccctcatggcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctata +gagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtccaccctcat +ggcctgtagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtcc +accctcatgtcctatagagaccccctcactgtccaccctcatggcctata---------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccc +tcatggcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactg +tccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatagagaccc +cctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcgtgtcctat +agagaccccctcactgtccaccctcatggcctatagagaccccctcactgtccaccctca +tgtcctatagagaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtc +caccctcatggcctatagagaccccctcactgtccaccctcatgtcctat-gagaccccc +tcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatag +agaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatg +tcctatagagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtcca +ccctcatggcctatagagaccccctcactgtccaccctcatggcctatgagaccccctca +ctgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctatagaga +cccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcgtgtcct +atagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctc +atgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtc +caccctcatgtcctatagagaccccctcactgtccaccctcatgtcctata--------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcgtgtcctatagagaccccctcactgtccacccatatggcctgtagagaccccctcact +gtccaccctcatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt +gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc +>p:HG002_2_chr20:64131704-64134056 +aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc +acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc +tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag +accccctcactgtcaactctcatgtcctatagagaccccctcactgtcaactctcatgtc +ctatgagaccccctcactgtccactctcatggcctgtagagaccccctcactgtccaccc +tcatgtcctata--------------------------------gagaccccctcactgt +ccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctat-gagacccc +ctcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatg +agaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccactctcatg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +cctcatgtcctatagagaccccctcactgtccaccctcatggcctgtagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccactctcatgtcctatagaga +ccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcct +atagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccct +catgtcctatgagaccccctcactgtccactctcatgtcctat-gagaccccctcactgt +ccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcgtgtcctata +gagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccactctcatg +tcctatgagaccccctcactgtccactctcatggcctgtagagaccccctcactgtccac +cctcatgtcctgtagagaccccctcactgtccaccctcgtgtcctatagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccaccctcatgt------------------------------------- +------------------------------------------------------------ +----cctatagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgt +ccaccctcatggcctgtagagaccccctcactgtccaccctcatggcctgtagagacccc +ctcactgtccactctcatgtcctatagagacccctcactgtccaccctcatggcctatag +agaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccac +tctcatgtcctatgagaccccctcactgtccactctcatggcctgtagagaccccctcac +tgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcctgtagagacc +ccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtccta +tagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctca +tgtcctatagagaccccctcactgtccaccctcatggc---------------------- +------------------------------------------------------------ +---------------------------------------------------ctgtagaga +ccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctat----------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +gagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcat +gtcctatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtcc +accctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctc +actgtccaccctcgtgtcctatagagaccccctcactgtcaactctcatgtcctatagag +accccctcactgtccaccctcgtgtcctatgagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactg +tccaccctcgtgtcctatagagaccccctcactgtcaactctcatgtcctatagagaccc +cctcactgtccaccctcgtgtcctatagagaccccctcactctccaccctcatgtcctat +agagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctca +tgtcctat-gagaccccctcactgtccaccctcatggcctatagagaccccctcactgtc +caccctcatgtcctat-gagaccccctcactgtccaccctcatgtcctatagagaccccc +tcactgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatag +agaccccctcactctccaccctcatggcctatagagaccccctcactgtccaccctcgtg +tcctatagagaccccctcactctccaccctcatggcctatagagaccccctcactgtcca +ccctcatggcctatagagaccccctcactgtccactctcatgtcctatgagaccccctca +ctgtccaccctcatggcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccactctcatgtcctat-gagaccccctcactgtccaccctcatggcct +gtagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctc +gtgtcctatagagaccccctcactgtccaccctcatgtcctatgagaccccctcactgtc +caccctcatgtcctatagagaccccctcactgtccactctcatgtcctatgagaccccct +cactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatggcctatagag +accccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctcatgtcc +tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc +tcatgtcctatagagaccccctcactgtccactctcatggcctgtagagaccccctcact +gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt +gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc +>ref_chr20:64131704-64134056 +aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc +acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc +tcctggcctctctcatgtcctatgagaccccctcactgtccaccctcatgtcctgtagag +accccctcactgtccactctcatggcctatagagaccccctcactgtccaccctcatggc +ctgta------------------------------------------------------- +--------------------------------------------gagaccccctcactgt +ccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatagagacccc +ctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcatgtcctatg +agaccccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatg +tcctata----------------------------------------------------- +------------------------------------------------gagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctgtagaga +ccccctcactgtccaccctcatgtcctatgagaccccctcactgtccaccctcgtgt--- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------cctatagagaccccctca +ctgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctatgagac +cccctcactgtccaccctcatgtcctatagagaccccctcactgtccactctcatgtcct +atagagaccccctcactgtccaccctcatggcctatgagaccccctcactgtccactctc +atggcctatagagaccccctcactgtccaccctcatggcctatagagaccccctcactgt +ccaccctcatggcctgt------------------------------------------- +----------------------------------------------------------ag +agaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatg +gcctgtagagaccccctcactgtccaccctcatggcctatgagaccccctcactgtccac +cctcatgtcctat----------------------------------------------- +--------------------gagaccccctcactgtccaccctcatgtcctatagagacc +ccctcactgtccaccctcgtgtcctatagagaccccctcactgtccaccctcatgtccta +tagagaccccctcactgtccgccctcatgtcctatgagaccccctcactgtccaccctca +tgtcctatagagaccccctcactgtccaccctcatggc---------------------- +------------------------------------------------------------ +---------------------------------------------------ctgtagaga +ccccctcactgtccgccctcatgtcctat------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------gagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtcc +accctcatgtcctatagagaccccctcactgtccaccctcatgtcctat----------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatgtcc +tat-gagaccccctcactgtccaccctcatgtcctatgaga------------------- +-----------------cccctcactgtccaccctcatgtcctatgagaccccctcactg +tccaccctcatgtcctat------------------------------------------ +------------------------------------------------------------ +-----------------------------------gagaccccctcactgtccactctca +tgtcctat-gaga-cccctcactgtccaccctcatgtcctatagagaccccctcactgtc +caccctcatggcctgtagagaccccctcactgtcaactctcatgtcctat-gagaccccc +tcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcc----- +------------------------------aaagagaccccctcactgtccaccctcatg +gcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtcca +ccctcatggcctatagagaccccctcactgtccaccctcatgtccaaagagaccccctca +ctgtccaccctcatggcctgtagagaccccctcactgtccaccctcatggcctgta---- +-----------------------------gagaccccctcactgtccaccctcatgtcct +atagagaccccctcactgtccaccctcatgtccaaagagaccccctcactgtccaccctc +atggcctgta---------------------------------gagaccccctcactgtc +cacccatatggcctgtagagaccccctcactgtccaccctcatgtcctata--------- +------------------------------------------------------------ +------------------------------gagaccccctcactgtccaccctcatggcc +tgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccc +tcatggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcact +gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt +gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d342db009b70a5841aa5b850478f78c4.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d342db009b70a5841aa5b850478f78c4.msa new file mode 100644 index 00000000..69f73d0a --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d342db009b70a5841aa5b850478f78c4.msa @@ -0,0 +1,30 @@ +>syndip_1_chr20:7720811-7721104 +tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat +tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca +aatgac--ttttctttttctttt---------------tttttttttttgggatggagtt +ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct +>syndip_2_chr20:7720811-7721104 +tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat +tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca +aatgacgttttctttttcttttc---------------tttttttttttgggatggagtt +ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct +>p:HG002_1_chr20:7720811-7721104 +tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat +tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca +aatgacgttttctttttcttttc---------------tttttttttttgggatggagtt +ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct +>p:HG002_2_chr20:7720811-7721104 +tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat +tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca +aatgacattttctttttcttttcttttctttctttttttttttttttttgggatggagtt +ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct +>ref_chr20:7720811-7721104 +tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat +tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca +aatgacattttctttttcttttcttttctttctttttttttttttttttgggatggagtt +ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa new file mode 100644 index 00000000..9af4c6cc --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa @@ -0,0 +1,95 @@ +>syndip_1_chr20:2240825-2241827 +gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg +agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat +cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt +tctctctctctccttccttccttctttccttccctccctcccttccttttttcttctttc +tttcattttctttctttctctctttcttctttctcttcctttcttc-------------- +----------------------------------------------tttcttttcctcct +tcctcccttctttccttcccttccttcttccctccctccctctttgtttctttctttctc +tccctc-----------------------------------cttccctccctccctttct +ttctcctt-----------ttcttccttccctccctgcctgtctccctctttctttctct +tttttcttttccttccttccgtctttttcctcccttctttcctccctcctccctccctcc +ctcctccctctttttctttctttctttcttttttctttctctcttc-ttttttttttttt +tttttaagagatagggtctgttgcccaggctggagtgcagtggcatgatcatggttcact +gcagccttgaactcctgggctcaggtgatcctcctacctcagcctcctaagtagctggga +ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg +gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc +agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct +taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc +tttgcagtttaccagattcacttatgtacattctc +>syndip_2_chr20:2240825-2241827 +gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg +agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat +cttctcaaagttttctt-------------------tttctttctttctttttctttctt +tctctctctctccttccttccttctttccttccctccctcccttccttttttcttcttcc +tttcattttctttctttctctctttcttctttctcttcctttcttctttcttttcctcct +tcctcccttctttccttcccttccttcttccctccctccctctttgtttctttctttctc +tccctccttccctccctccctttcttccttctttccttatccctcccttctttccttcct +tccctttgtttctctttctttctttctttctctctctcttccttccttccctccctccct +ccctccttcctcccttctttccttccctccctgcctctctgtctccctctttctttctct +tttttcttttccttccttccgtctttttcctcccttctttcctccctcctccctccctcc +ctcctccctctttttctttctttctttcttttttctctctctcttctttttttttttttt +tttttaagagatagggtctgttgcccaggctggagtgcagtggcatgatcatggttcact +gcagccttgaactcctgggctcaggtgatcctcctacctcagcctcctaagtagctggga +ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg +gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc +agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct +taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc +tttgcagtttaccagattcacttatgtacattctc +>p:HG002_1_chr20:2240825-2241827 +gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg +agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat +cttctcaaagttttctt-------------------tttctttctttctttttctttctt +tctctctctctccttccttccttctttccttccctccctcccttccttttttcttcttcc +tttcattttctttctttctctctttcttctttctcttcctttcttctttcttttcctcct +tcctcccttctttccttcccttccttcttccctccctccctctttgtttctttctttctc +tccctccttccctccctccctttcttccttctttccttatccctcccttctttccttcct +tccctttgtttctctttctttctttctttctctctctcttccttccttccctccctccct +ccctccttcctcccttctttccttccctccctgcctctctgtctccctctttctttctct +tttttcttttccttccttccgtctttttcctcccttctttcctccctcctccctccctcc +ctcctccctctttttctttctttctttcttttttctctctctcttctttttttttttttt +tttttaagagatagggtctgttgcccaggctggagtgcagtggcatgatcatggttcact +gcagccttgaactcctgggctcaggtgatcctcctacctcagcctcctaagtagctggga +ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg +gtcttgctatgttccccaggctagtctcaaactcctgggctcaagtgatcctccagcctc +agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct +taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc +tttgcagtttaccagattcacttatgtacattctc +>p:HG002_2_chr20:2240825-2241827 +gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg +agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat +cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt +tctctctctctccttccttccttctttccttccctccctcccttccttttttcttctttc +tttcattttctttctttctctctttcttctttctcttcctttcttc-------------- +----------------------------------------------tttcttttcctcct +tcctcccttctttccttcccttccttcttccctccctccctctttgtttctttctttctc +tccctc-----------------------------------cttccctccctccctttct +ttctcctt-----------ttcttccttccctccctgcctgtctccctctttctttctct +tttttcttttccttccttccgtctttttcctcccttctttcctccctcctccctccctcc +ctcctccctctttttctttctttctttcttttttctttctctcttc-ttttttttttttt +tttttaagagatagggtctgttgcccaggctggagtgcagtggcatgatcatggttcact +gcagccttgaactcctgggctcaggtgatcctcctacctcagcctcctaagtagctggga +ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg +gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc +agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct +taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc +tttgcagtttaccagattcacttatgtacattctc +>ref_chr20:2240825-2241827 +gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg +agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat +cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt +tctctctctctccttccttccttctttccttccctccctcccttccttttttcttctttc +tttcattttctttctttctctctttcttctttctcttcctttcttt-------------- +-ccttccgtccctcccttccttccttttttcttcttcctttcat--tttctttctttctc +tctttcttctttctcttcctttcttctttcttttcctccttcctcccttctttccttccc +ttcctt-----------------------------------cttccctccctccctcttt +gtttctttctttctctccctccttccctccctccctttctgtctccctctttctttctct +tttttcttttccttccttccgtctttttcctcccttctttcctccctcctccctccctcc +ctcctccctctttttctttctttctttcttttttctttctctcttctttttttttttttt +tttttaagagatagggtctgttgcccaggctggagtgcagtggcatgatcatggttcact +gcagccttgaactcctgggctcaggtgatcctcctacctcagcctcctaagtagctggga +ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg +gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc +agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct +taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc +tttgcagtttaccagattcacttatgtacattctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa new file mode 100644 index 00000000..f35203e4 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa @@ -0,0 +1,65 @@ +>syndip_1_chr20:63027897-63028267 +acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg +ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa +gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagagggccggtg +gggagaggaccggtggggagggggccggtggagagggggccggtggggagggggccggtg +gggagaggaccggtggggaggggccggtggggagggggccggtggggagaggaccggtgg +ggaggggccggtggggagggggccggtggggagaggaccggtggggaggggccggtgggg +agggggccggcggggagggggccggcggggagggtgccggtggggagtggtctggtgggg +agggggccggtggggagggggcccgcggggaggggccggtggggagggggcctgcgggga +gggggccggcggggaggggccggtggggaggggccggtggggtggggccggtggggaggg +ggccggtgggggggggcctgcagggagggggccggtggggaggaggctggtgaagtagcg +tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca +gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc +>syndip_2_chr20:63027897-63028267 +acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg +ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa +gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg +ggga----------------ggggccggtggagagggggccggtggggagggggccggcg +gggag--------------tggtttggtggggagggggccggtggggagggggcctgcgg +ggaggggccggtggggag-----------------------------ggggccggtgggg +agggggccggcggggagggggccggcggggagggggccggcggggagtggtctggtgggg +ag-----------------------------ggggccggtggggagggggccggcgggga +gggggccggcggggaggggccggtggggagtggtctg----------------------- +-------------------gtggggagggggccggtggggaggaggctggtgaagtagcg +tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca +gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc +>p:HG002_1_chr20:63027897-63028267 +acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg +ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa +gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg +ggga----------------ggggccggtggagagggggccggtggggagggggccggcg +gggag--------------tggtttggtggggagggggccggtggggagggggcctgcgg +ggaggggccggtggggag-----------------------------ggggccggtgggg +agggggccggcggggagggggccggcggggagggggccggcggggagtggtctggtgggg +ag-----------------------------ggggccggtggggagggggccggcgggga +gggggccggcggggaggggccggtggggagtggtctg----------------------- +-------------------gtggggagggggccggtggggaggaggctggtgaagtagcg +tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca +gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc +>p:HG002_2_chr20:63027897-63028267 +acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg +ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa +gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagagggccggtg +gggagaggaccggtggggagggggccggtggagagggggccggtggggagggggccggtg +gggagaggaccggtggggaggggccggtggggagggggccggtggggagaggaccggtgg +ggaggggccggtggggagggggccggtggggagaggaccggtggggaggggccggtgggg +agggggccggcggggagggggccggcggggagggtgccggtggggagtggtctggtgggg +agggggccggtggggagggggcccgcggggaggggccggtggggagggggcctgcgggga +gggggccggcggggaggggccggtggggaggggccggtggggtggggccggtggggaggg +ggccggtgggggggggcctgcagggagggggccggtggggaggaggctggtgaagtagcg +tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca +gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc +>ref_chr20:63027897-63028267 +acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg +ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa +gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg +ggga----------------ggggccggtggagagggggccggtggggagggggccggcg +gggag--------------tggttt----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------ggtggggaggaggctggtgaagtagcg +tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca +gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa new file mode 100644 index 00000000..e9579aa1 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa @@ -0,0 +1,40 @@ +>syndip_1_chr20:17081116-17081511 +tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat +tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca +tgtggaattttattttattttattttattttattttattttattttattttattttattt +tatt-------------------------------------------------------- +----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt +gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca +cctcagcctcccaagtagctgggactacaagtacgt +>syndip_2_chr20:17081116-17081511 +tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat +tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca +tgtgga-----attttattttattttattttattttattttattttattttattttattt +tatt-------------------------------------------------------- +----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt +gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca +cctcagcctcccaagtagctgggactacaagtacgt +>p:HG002_1_chr20:17081116-17081511 +tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat +tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca +tgtgga-----attttattttattttattttattttattttattttattttattttattt +tatt-------------------------------------------------------- +----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt +gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca +cctcagcctcccaagtagctgggactacaagtacgt +>p:HG002_2_chr20:17081116-17081511 +tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat +tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca +tgtggaattttattttattttattttattttattttattttattttattttattttattt +tatt-------------------------------------------------------- +----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt +gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca +cctcagcctcccaagtagctgggactacaagtacgt +>ref_chr20:17081116-17081511 +tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat +tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca +tgtggaattttattttattttattttattttattttattttattttattttattttattt +tattattttattttattttattttattttattttattttattttattttattttatttta +ttttattttattattttattttattacgacaaggtctggctctgtcgcctaggctggagt +gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca +cctcagcctcccaagtagctgggactacaagtacgt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa new file mode 100644 index 00000000..65babcd3 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa @@ -0,0 +1,120 @@ +>syndip_1_chr20:55627244-55628551 +gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat +tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt +atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt +catgtatatatttttatatgtatatatgtgggtgtgcatgtatacattaaatgatgaatt +tatatgtatatattaaatgtatatattatatgtatatattacatatatacatattgtata +ttacatatacataatacataaaatgtttaatacatacataatatatacataatataatat +tgtatattattaaattatacatgtatatatgtatactatatgtatatatgtatacaaaat +atgtatataatatatacaaaaatatgtgtatttttgtacatatattgtacaaaaatatgt +atattatatatacaaaatatgtatataatatatacatatattatgtaatataatatatac +ataatgtgtattacatacattatatgtaatatatacatatattatata---atatatatg +tatatattatatacatatatattatatacatatatgtatatattatata----------- +------catatatgtatacattatatattatgtatacatatatgtatatatgtatacatt +atatattatgtatacatatatgtatatatgtatacattatatattatgtatacatatatg +tatatatgtatacattatatattatgtatacatatatgtatatatgtatacattatatat +tatgtatacatatatgtatatatgtatacattatatattatgtatacatatatgtatata +tgtatacattatatattatgtatacatatatgtata------------------------ +------------------------------------------------------------ +----------tatgtatacatgtatatacatatatacgtatacacatatatacgtg---- +----tatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg +tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata +cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta +gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat +atttggttat +>syndip_2_chr20:55627244-55628551 +gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat +tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt +atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt +catgtatatatttttatatgtatatatgtgggtgtgcatgtatacattaaatgatgaatt +tatatgtatatattaaatgtatatattatatgtatatattacatatatacatattgtata +ttacatatacataatacataaaatgtttaatacatacataatatatacataatataatat +tgtatattattaaattatacatgtatatatgtatac-----------atgtatacaaaat +atgtatataatatatacaaaaa-atgtgtatttttgtacatatattgtacaaaaatatgt +atattatatatacaaaatatgtatataatatatacatatattatgtaatataatatatac +gtaatgtgtattacatacattatatgtaatatatacatatatattatatacatatatatg +tatatattatatacatatatattatatacatatatgtatatattatatattatgtataca +catatgtatatatgtatatattatatattatgtatacacatatgtatatatgtatatatt +atatattatgtatacac--------atatgtatatattatatattatgtatacacatatg +tatatatgtatatattatatattatgtatac--------acatatgtatatattatatat +tatgtatacacatatgtatatatgtatatattatatattatgtatacacatatgtatat- +-------attatatattatgtatacacatatgtatatattatatat-------------- +------------------------------------------------------------ +----------tatgtatacacgtatatacatatatacgtatacacgtatatacgta--ta +tacgtatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg +tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata +cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta +gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat +atttggttat +>p:HG002_1_chr20:55627244-55628551 +gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat +tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt +atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt +catgtatatatttttatatgtatatatgtgggtgtgcatgtatacattaaatgatgaatt +tatatgtatatattaaatgtatatattatatgtatatattacatatatacatattgtata +ttacatatacataatacataaaatgtttaatacatacataatatatacataatataatat +tgtatattattaaattatacatgtatatatgtatac-----------atgtatacaaaat +atgtatataatatatacaaaaa-atgtgtatttttgtacatatattgtacaaaaatatgt +atattatatatacaaaatatgtatataatatatacatatattatgtaatataatatatac +gtaatgtgtattacatacattatatgtaatatatacatatatattatatacatatatatg +tatatattatatacatatatattatatacatatatgtatatattatatattatgtataca +catatgtatatatgtatatattatatattatgtatacacatatgtatatatgtatatatt +atatattatgtatacac--------atatgtatatattatatattatgtatacacatatg +tatatatgtatatattatatattatgtatac--------acatatgtatatattatatat +tatgtatacacatatgtatatatgtatatattatatattatgtatacacatatgtatat- +-------attatatattatgtatacacatatgtatatattatatat-------------- +------------------------------------------------------------ +----------tatgtatacacgtatatacatatatacgtatacacgtatatacgta--ta +tacgtatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg +tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata +cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta +gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat +atttggttat +>p:HG002_2_chr20:55627244-55628551 +gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat +tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt +atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt +catgtatatatttttatatgtatatatgtgggtgtgcatgtatacattaaatgatgaatt +tatatgtatatattaaatgtatatattatatgtatatattacatatatacatattgtata +ttacatatacataatacataaaatgtttaatacatacataatatatacataatataatat +tgtatattattaaattatacatgtatatatgtatactatatgtatatatgtatacaaaat +atgtatataatatatacaaaaatatgtgtatttttgtacatatattgtacaaaaatatgt +atattatatatacaaaatatgtatataatatatacatatattatgtaatataatatatac +ataatgtgtattacatacattatatgtaatatatacatatattatata---atatatatg +tatatattatatacatatatattatatacatatatgtatatattatata----------- +------catatatgtatacattatatattatgtatacatatatgtatatatgtatacatt +atatattatgtatacatatatgtatatatgtatacattatatattatgtatacatatatg +tatatatgtatacattatatattatgtatacatatatgtatatatgtatacattatatat +tatgtatacatatatgtatatatgtatacattatatattatgtatacatatatgtatata +tgtatacattatatattatgtatacatatatgtata------------------------ +------------------------------------------------------------ +----------tatgtatacatgtatatacatatatacgtatacacatatatacgtg---- +----tatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg +tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata +cataggagagagtcactgccctgagtcttaaatgtaacccttattaaagaaaatatttta +gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat +atttggttat +>ref_chr20:55627244-55628551 +gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat +tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt +atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt +catgtatatatttttatatgtatatatgtgggtgtgcatgtatacattaaatgatgaatt +tatatgtatatattaaatgtatatattatatgtatatattacatatatacatattgtata +ttacatatacataatacataaaatgtttaatacatacataatatatacataatataatat +tgtatattattaaattatacatgtatatatgtatac-----------atgtatacaaaat +atgtatataatatatacaaaaa-atgtgtatttttgtacatatattgtacaaaaatatgt +atattatatatacaaaatatgtatataatatatacatatattatgtaatataatatatac +ataatgtgtattacatacattatatgtaatatatacatatatattatatacatatatat- +tatata-catatatgtatatattatatacatatatgtatatattatatattatgtataca +catatgtatatatgtatatattatatattatgtatacacatatgtatatatgtatatatt +atatattatgtatacacatatgtatatatgtatatattatatattatgtatacacatatg +tatatatgtatatattatatattatgtatacatatatgtatatatgtatatattatatat +tatgtatacatatatgtatatatgtatatattatatattatgtatacatatatgtatat- +-------attatatattatgtatacatatatgtatatattatatattatgtatacatata +tgtatatattatatattatgtatacatatatgtatatattatatattatgtatacatata +tgtatatatgtatgtatacatgtatatacatatatacgtatacacatatatacgtgtata +tacatatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg +tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata +cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta +gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat +atttggttat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa new file mode 100644 index 00000000..782feb92 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa @@ -0,0 +1,70 @@ +>syndip_1_chr20:44763796-44764386 +gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt +gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt +tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc +atccatccacctatcacttcatccatccatccatttatccatccatccatttatccatcc +atccatccatttatccatccatccatccatttatccatccatccatccatccatccaccc +atccatccgtccatccatttatccatccatccatccatccatttatccatccatttatcc +atccatccatccatccatccatccatccatccatttatcc----atccatccatccatcc +acccatccatccatccgtccatccatttatccatccatccatccatccatccatccatcc +atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc +atccacttatccatccacccatccatccatccgtccatccattcatccatccatccatcc +tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt +tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa +tggggtcagtg +>syndip_2_chr20:44763796-44764386 +gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt +gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt +tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc +atccatccacctatcacttcatccatccatccatttatccatccatccatttatccatcc +atccatccatttatccatccatccatccatttatccatccatccatccatccatccaccc +atccatccgtccatccatttatccatccatccatccatccatttatccatccatttatcc +atccatccatccatccatccatccatccatccatccatccatttatccatccatccatcc +atccacccatccatccatccgtccatccatttatccatccatccatccatccatccatcc +atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc +atccatttatccatccacccatccatccatccgtccatccattcatccatccatccatcc +tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt +tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa +tggggtcagtg +>p:HG002_1_chr20:44763796-44764386 +gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt +gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt +tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc +atccatccacctatcacttcatccatccatccatttatccatccatccatttatccatcc +atccatccatttatccatccatccatccatttatccatccatccatccatccatccaccc +atccatccgtccatccatttatccatccatccatccatccatttatccatccatttatcc +atccatccatccatccatccatccatccatccatccatccatttatccatccatccatcc +atccacccatccatccatccgtccatccatttatccatccatccatccatccatccatcc +atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc +atccatttatccatccacccatccatccatccgtccatccattcatccatccatccatcc +tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt +tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa +tggggtcagtg +>p:HG002_2_chr20:44763796-44764386 +gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt +gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt +tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc +atccatccacctatcacttcatccatccatccatttatccatccatccatttatccatcc +atccatccatttatccatccatccatccatttatccatccatccatccatccatccaccc +atccatccgtccatccatttatccatccatccatccatccatttatccatccatttatcc +atccatccatccatccatccatccatccatccatttatcc----atccatccatccatcc +acccatccatccatccgtccatccatttatccatccatccatccatccatccatccatcc +atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc +atccacttatccatccacccatccatccatccgtccatccattcatccatccatccatcc +tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt +tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa +tggggtcagtg +>ref_chr20:44763796-44764386 +gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt +gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt +tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc +atccatccacctatcacttcatccatccatccatttatccatccatccatttatccatcc +atccatccatttatccatccatccatccatttatccatccatccatccatccatccaccc +atccatccgtccatccatttatccatccatccatccatccatttatgcatccatttatcc +atccatccatccatccatccatccatccatccatttatcc-------------------- +------------------------------------------------------------ +------------------------------------------------------------ +atccatccatccatccacccatccatccatccgtccatccattcatccatccatccatcc +tcccacaaactcaccaagcctacaatgcaatcaattctgctgtaacatgaaatatacatt +tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa +tggggtcagtg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa new file mode 100644 index 00000000..60de427f --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa @@ -0,0 +1,90 @@ +>syndip_1_chr20:63535574-63536346 +acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc +ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg +ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaaccac +ctggcctcccgcccccccctcacctggcctcccgccccctcctcacctggcctcccgccc +cctcctcacctggcctcccgcccccccctcacctggcctcccaccccgtcctcacctggc +ctcccgcacccccctcacctggcctcccgcccccccctcatctggcctcccgccccctcc +tcacctggcctcccgccccctcctctcctggcctcccgccccccccctcacctggcctcc +cgccccctcctcacctggcctcccgccccctcctcacctggcctcccgccccccccctca +cctggcctcccgccccctcctcacctggcctcccgccccccccctcacctggcctcccgc +cccctcctcacctggcctcccgcccccccctcacctggcctcccaccccgtcctcacctg +gcctcccgcacccccctcacctggcctcccgcccccccctcacctggcctcccgccccct +cctctcctggcctcccgccccctcctctcctggcctcccgtcccctcctctcctggtctc +ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctgg +cctcccgct-ccccccccacctggcctcccaccccctcctcacctggcctcctccccact +tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc +ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc +tgctaacgaggtcgtgggtct +>syndip_2_chr20:63535574-63536346 +acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc +ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg +ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------cctggcctcccgccccccccctcacctggcctcc +cgccccctcctcacctggcctcccgccccctcctcacctggcctcccgccccccccctca +cctggcctcccgccccctcctcacctggcctcccg-ccccctcctcacctggcctcccgc +ccccccctcacctggcctcccaccccgtcctcacctggcctcccgcacccccctcacctg +gcctcccgcccccccctcacctggcctcccgccccctcctcacctggcctcccgccccct +cctctcctggcctcccgccccctcctctcctggcctcccgtcccctcctctcctggtctc +ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag +cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact +tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc +ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc +tgctaacgaggtcgtgggtct +>p:HG002_1_chr20:63535574-63536346 +acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc +ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg +ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------cctggcctcccgccccccccctcacctggcctcc +cgccccctcctcacctggcctcccgccccctcctcacctggcctcccgccccccccctca +cctggcctcccgccccctcctcacctggcctcccg-ccccctcctcacctggcctcccgc +ccccccctcacctggcctcccaccccgtcctcacctggcctcccgcacccccctcacctg +gcctcccgcccccccctcacctggcctcccgccccctcctcacctggcctcccgccccct +cctctcctggcctcccgccccctcctctcctggcctcccgtcccctcctctcctggtctc +ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag +cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact +tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc +ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc +tgctaacgaggtcgtgggtct +>p:HG002_2_chr20:63535574-63536346 +acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc +ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg +ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaaccac +ctggcctcccgcccccccctcacctggcctcccgccccctcctcacctggcctcccgccc +cctcctcacctggcctcccgcccccccctcacctggcctcccaccccgtcctcacctggc +ctcccgcacccccctcacctggcctcccgcccccccctcatctggcctcccgccccctcc +tcacctggcctcccgccccctcctctcctggcctcccgccccccccctcacctggcctcc +cgccccctcctcacctggcctcccgccccctcctcacctggcctcccgccccccccctca +cctggcctcccgccccctcctcacctggcctcccgccccccccctcacctggcctcccgc +cccctcctcacctggcctcccgcccccccctcacctggcctcccaccccgtcctcacctg +gcctcccgcacccccctcacctggcctcccgcccccccctcacctggcctcccgccccct +cctctcctggcctcccgccccctcctctcctggcctcccgtcccctcctctcctggtctc +ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctgg +cctcccgct-ccccccccacctggcctcccaccccctcctcacctggcctcctccccact +tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc +ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc +tgctaacgaggtcgtgggtct +>ref_chr20:63535574-63536346 +acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc +ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg +ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------cctggcctcccgccccccccctcacctggcctcc +cgccccctcctcacctggcctcccgccccctcctcacctggcctcccgccccccccctca +cctggcctcccgccccctcctcacctggcctcccg-ccccctcctcacctggcctcccgc +ccccccctcacctggcctcccaccccgtcctcacctggcctcccgcacccccctcacctg +gcctcccgcccccccctcacctggcctcccgccccctcctcacctggcctcccgccccct +cctctcctggcctcccgccccctcctctcctggcctcccgtcccctcctctcctggtctc +ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag +cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact +tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc +ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc +tgctaacgaggtcgtgggtct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa new file mode 100644 index 00000000..6107bb73 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa @@ -0,0 +1,225 @@ +>syndip_1_chr20:56280322-56282215 +ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag +tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca +aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa +catccatatatagatatatctatatatctatctatagctac------------------- +---------------------------------------------atctacagatatcta +tagccacatctacagatatctatagctacatctacagatatctacagatatctacagcta +catctacagatatctatagatatctatagctatatctacagatatctatcgatagatata +tctatcgatagatatatccacatagatatctagatatatatggatatatctatagatatc +tatctggatatatctatcgatagatatatagatatatatagataaagctatatagatata +tatagataaacctatatagatacatagatatatagagagaaacctatatacatatataga +tatatagagagaaacctatatagatatatagatatatagagataaacctatatagatgta +tagataaacctatatagatatatagataaacctatgtagatatatagatatatatagata +aatctatatagatatataaaatag------------------------ataaatctatat +agatatatatacataaatctatatagatatatatacataaatctatatatatacataaat +ctatatatacataaatctatatagatatatatacataaatctatatagatatatatacat +aaatctatatagatatatatacataaatctatatagatatatatacataaatctatatag +atatatatacataaatctatatagatatatacataaatctatacagatacatatacataa +atctatacagatacatatacataaatctatacagatacatatacataaatctatacagat +atatagatacatatagataaatctatacagatatatagatacatatagataaatctatac +agatatatagatacatatagatacatctatacagatatatagatacatatagatacatct +atacagatatatagatacatatagatacatctatacagatatatagatacatatagatac +atctatacagatatatagatacatatagatacatctatacagatatatagatacatatag +atacatctatacagatatatagatacatatagatacatctatacagatatattgatacat +atagatacatctatatagatatattgatacatatagatacatctatatagatatattgat +acatatagatacatctatatagatatattgatacatatagatacatctatatagattaga +tatatatagatacatctatatagattagatatatatagatacatctatatagattagata +tatatagatacatctatatatagatatatagatatatatagatacatctatatatagata +tatagatatatatagatacatctatatatagatatatagatgtatatagatacatctata +tatagatatatagatgtatatagatacatctatatatagatatatagatgtatatagata +catctatatatagatatatagatgtatatagatacatctatatatagatatatagatgta +tatagatacatctatatatagatatatagatgtatatagatacatctatatatagatata +tagatgtatatagatacatctatatatagatatatagatgtatatagatacatctatata +tagatatatagatgtatatagatacatctatatatagatatatagatgtatatagataca +tagatatatatagatacatttatgtatatatatatgtatatatagatatatagatatata +tagatacatctacacagagagatatatagaaacatctacacagggagatatatagataca +tctacatagagagatatatagatacatctacatagagagatatatagatacatctacata +gagagatatatagatacatctacatagagagatatatagatacatctacatagagagata +tatagatacatctacatagagagatatatagatacatctacatagagagatatatagata +catctacatagagagatatatagatacatctacatagagagatatatagatacatctaca +tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata +tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc +tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt +actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt +cttatccct +>syndip_2_chr20:56280322-56282215 +ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag +tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca +aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa +catccatatatagatatatctatatatctatctatagctacatctacagatatctatagc +tacatctacagatatctacagatatctatagctacatctacagatatctacagatatcta +tagctacatctacagatatctatagctacatctacagatatctacagatatctacagcta +catctacagatatctatagatatctatagctatatctacagatatctatcgatagatata +tctatcgatagatatatccacatagatatctagatatatatggatatatctatagatatc +tatctggatatatctatcgatagatatatagatatatatagataaagctatatagatata +tatagataaacctatatagatacatagatatatagagagaaacctatatacatatataga +tatatagagagaaacctatatagatatatagatatatagagataaacctatatagatgta +tagataaacctatatagatatatagataaacctatgtagatatatagatatatatagata +aatctatatagatatataaaatag------------------------ataaatctatat +agatatatatacataaatctatatagatatatatacataaatctatatatatacataaat +ctatatatacataaatctatatagatatatatacataaatctatatagatatatatacat +aaatctatatagatatatatacataaatctatatag------------------------ +--atatatacataaatctatat--atatatacataaatctatatagatatatatacataa +atctatatagatacatatagataaatctatacaga------------------------t +atataaatatatatagataaatctatacagatatatagatacatatagataaatctatac +agatatatagatacatatagataaatctatacagatatatagatacatatagataaatct +atacagatatatagatacatatagataaatctatacagatatatagatacatatagataa +atctatacagatatatagatacatatagataaatctatacagatatatagatacatatag +ataaatctatacagatatatagatacatatagataaatctatacagatatatagatacat +atagatacatctatatagatatattgatacatatagatacatctatatagatatatagat +--atatagatacatctata----------------------------------------- +------------------------------------------------------tagata +tatatagatacatctatatatagacatatagatatatatagatacatctatatatagata +tatagatatatatagatacatctatatatagatatatagat------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----atatatagatacatctacacagagagatatatagaaacatctacacagggagata +tatagatacatctacatagagagatatatagatacatctacatagagagatatatagata +catctatatagagagatatatagatacatctatatagagagatatatagatacatctaca +tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata +tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc +tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt +actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt +cttatccct +>p:HG002_1_chr20:56280322-56282215 +ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag +tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca +aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa +catccatatatagatatatctatatatctatctatagctacatctacagatatctatagc +tacatctacagatatctacagatatctatagctacatctacagatatctacagatatcta +tagctacatctacagatatctatagctacatctacagatatctacagatatctacagcta +catctacagatatctatagatatctatagctatatctacagatatctatcgatagatata +tctatcgatagatatatccacatagatatctagatatatatggatatatctatagatatc +tatctggatatatctatcgatagatatatagatatatatagataaagctatatagatata +tatagataaacctatatagatacatagatatatagagagaaacctatatacatatataga +tatatagagagaaacctatatagatatatagatatatagagataaacctatatagatgta +tagataaacctatatagatatatagataaacctatgtagatatatagatatatatagata +aatctatatagatatataaaatag------------------------ataaatctatat +agatatatatacataaatctatatagatatatatacataaatctatatatatacataaat +ctatatatacataaatctatatagatatatatacataaatctatatagatatatatacat +aaatctatatagatatatatacataaatctatatag------------------------ +--atatatacataaatctatat--atatatacataaatctatatagatatatatacataa +atctatatagatacatatagataaatctatacaga------------------------t +atataaatatatatagataaatctatacagatatatagatacatatagataaatctatac +agatatatagatacatatagataaatctatacagatatatagatacatatagataaatct +atacagatatatagatacatatagataaatctatacagatatatagatacatatagataa +atctatacagatatatagatacatatagataaatctatacagatatatagatacatatag +ataaatctatacagatatatagatacatatagataaatctatacagatatatagatacat +atagatacatctatatagatatattgatacatatagatacatctatatagatatatagat +--atatagatacatctata----------------------------------------- +------------------------------------------------------tagata +tatatagatacatctatatatagacatatagatatatatagatacatctatatatagata +tatagatatatatagatacatctatatatagatatatagat------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----atatatagatacatctacacagagagatatatagaaacatctacacagggagata +tatagatacatctacatagagagatatatagatacatctacatagagagatatatagata +catctatatagagagatatatagatacatctatatagagagatatatagatacatctaca +tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata +tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc +tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt +actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt +cttatccct +>p:HG002_2_chr20:56280322-56282215 +ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag +tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca +aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa +catccatatatagatatatctatatatctatctatagctac------------------- +---------------------------------------------atctacagatatcta +tagccacatctacagatatctatagctacatctacagatatctacagatatctacagcta +catctacagatatctatagatatctatagctatatctacagatatctatcgatagatata +tctatcgatagatatatccacatagatatctagatatatatggatatatctatagatatc +tatctggatatatctatcgatagatatatagatatatatagataaagctatatagatata +tatagataaacctatatagatacatagatatatagagagaaacctatatacatatataga +tatatagagagaaacctatatagatatatagatatatagagataaacctatatagatgta +tagataaacctatatagatatatagataaacctatgtagatatatagatatatatagata +aatctatatagatatataaaatag------------------------ataaatctatat +agatatatatacataaatctatatagatatatatacataaatctatatatatacataaat +ctatatatacataaatctatatagatatatatacataaatctatatagatatatatacat +aaatctatatagatatatatacataaatctatatagatatatatacataaatctatatag +atatatatacataaatctatatagatatatacataaatctatacagatacatatacataa +atctatacagatacatatacataaatctatacagatacatatacataaatctatacagat +atatagatacatatagataaatctatacagatatatagatacatatagataaatctatac +agatatatagatacatatagatacatctatacagatatatagatacatatagatacatct +atacagatatatagatacatatagatacatctatacagatatatagatacatatagatac +atctatacagatatatagatacatatagatacatctatacagatatatagatacatatag +atacatctatacagatatatagatacatatagatacatctatacagatatattgatacat +atagatacatctatatagatatattgatacatatagatacatctatatagatatattgat +acatatagatacatctatatagatatattgatacatatagatacatctatatagattaga +tatatatagatacatctatatagattagatatatatagatacatctatatagattagata +tatatagatacatctatatatagatatatagatatatatagatacatctatatatagata +tatagatatatatagatacatctatatatagatatatagatgtatatagatacatctata +tatagatatatagatgtatatagatacatctatatatagatatatagatgtatatagata +catctatatatagatatatagatgtatatagatacatctatatatagatatatagatgta +tatagatacatctatatatagatatatagatgtatatagatacatctatatatagatata +tagatgtatatagatacatctatatatagatatatagatgtatatagatacatctatata +tagatatatagatgtatatagatacatctatatatagatatatagatgtatatagataca +tagatatatatagatacatttatgtatatatatatgtatatatagatatatagatatata +tagatacatctacacagagagatatatagaaacatctacacagggagatatatagataca +tctacatagagagatatatagatacatctacatagagagatatatagatacatctacata +gagagatatatagatacatctacatagagagatatatagatacatctacatagagagata +tatagatacatctacatagagagatatatagatacatctacatagagagatatatagata +catctacatagagagatatatagatacatctacatagagagatatatagatacatctaca +tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata +tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc +tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt +actctcgatactgggacacttttaatcattgctataatacattaattttaatctaatctt +cttatccct +>ref_chr20:56280322-56282215 +ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag +tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca +aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa +catccatatatagatatatctatatatctatctatagctac------------------- +-------------atctacagatatctatagctacatctacagatatctacagatatcta +tagctacatctacagatatctatagctacatctacagatatctacagatatctacagcta +catctacagatatctatagatatctatagctatatctacagatatctatcgatagatata +tctatcgatagatatatccacatagatatctagatatatatggatatatctatagatatc +tatctggatatatctatcgatagatatatagatatatatagataaagctatatagatata +tatagataaacctatatagatacatagatatatagagagaaacctatatacatatataga +tatatagagagaaacctatatagatatatagatatatagagagaaacctatatagatgta +tagataaacctatatagatatatagataaacctatgtagatatatagatatatatagata +aatctatatagatatataaaatagataaatctatatagatatatatacataaatctatat +agatatatatacataaatctatatagatatatatacataaatctatatatatacataaat +ctatatatacataaatctatatagatatatatacataaatctatatagatatatatacat +aaatctatatagatatatatacataaatctatatag------------------------ +atatatatacataaatctatatagatatatacataaatctatatagatatatatacataa +atctatatagatatatatacataaatctatatagatatatatacataaatc--------t +atatagatacatatagataaatctatacagatatatagatacatatagataaatctatac +agatatatagatacatatagataaatctatacagatatatagatacatatagataaatct +atacagatatatagatacatatagataaatctatacagatatatagatacatatagataa +atctatacagatatatagatacatatagataaatctatacagatatatagatacatatag +atacatctatacagatatatagatacatatagatacatctatacagatatatagatacat +atagatacatctatacagatatattgatacatatagatacatctatatagatatatagat +--atatagatacatctata----------------------------------------- +------------------------------------------------------tagata +tatagatata----tagatacatctatatagatatatatagatacatctatatatagaca +tatagatatatatagatacatctatatatagatatatagat------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-----atatatagatacatctacacagagagatatatagaaacatctacacagggagata +tatagatacatctacacagagagatatatagatacatctac------------------- +-------atagagagatatatagatacatctacatagagagatatatagatacatctaca +tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata +tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc +tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt +actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt +cttatccct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa new file mode 100644 index 00000000..e6db8916 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa @@ -0,0 +1,195 @@ +>syndip_1_chr20:55943819-55945908 +gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc +caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca +gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat +cacttgaggttaggagtttgagaccagcctggccaacatggtgaaacctcgcctctacta +aaaatacaaaaattagctgggcatgctggcatgtgcctgtaatcccagctactcaggagg +ctgaggcaggagaactgcttgagcctgggaggcagaggttgcagtgagctgagatggcac +cactgcattccagctgggcaacagagagagactctttcttaaatatatatattttatata +tatatagatacacacacacacacacacacacacac------------------------- +---------atatatatatatatatatatatatatatatatatgcatgcatgccagatgc +agtggctcacgcctgtaatcccagcactttgagaggccaaggtgggcagatcatctgagg +tcaggagttccagaccagcctagccaacatggtgaaactccatctctactaaaaatacaa +aattagctggctgtggtggctcatgctggtaatcccagctacttgggaggctgaggcagg +agaatcacttgaacctgggaggtggaggttgcagtgagccgagatcacaccactgcactc +cagcctggatgacagagcgagactccattaaaaaaatatatctatatctatagatatcta +tagatatatatctagatatagatctatgagtatatatctatagatatctagatatatatt +tatagatagatatatctatagatatctatatatttctatcgatagagatatctatagata +tctatcaatatatctatagatttctagagatatgtatctagatataaatctatagacata +gatatagatatatagatatttaactgccaatgtcctaaacactttatactcaacacattt +tttataaaaaaaaatagatatcggtctagatagtatctatctagatatcggtctagatag +tatctatctagatatctgtgtagatgtctagatatctatctagatattatctatctagat +atct--gtcgctatctagatatctatctagatatctgtcgctatctagatatctatctag +atatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatc +tagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagata +tctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatc +tagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtc +gctatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagta +tctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctag +atagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatct +atctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctag +atatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgct +atctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatct +gtcgctatctagatatctatctagatagtatctgtcgctatctagatatctagatagtat +ctgtcgctatctagatatctagatagtatctgtctagatagctatctagatagtatctgt +ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt +ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact +gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag +gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca +tgggc +>syndip_2_chr20:55943819-55945908 +gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc +caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca +gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat +cacttgaggttaggagtttgagaccagcctggccaacatggtgaaacctcgcctctacta +aaaatacaaaaattagctgggcatgctggcatgtgcctgtaatcccagctactcaggagg +ctgaggcaggagaactgcttgagcctgggaggcagaggttgcagtgagctgagatggcac +cactgcattccagctgggcaacagagagagactctttcttaaatatatatattttatata +tatatagatacacacacacacacacacacacacacacacacacacacacacacacatata +tatatatatatatatatatatatatatatatatatatatatatgcatgcatgccagatgc +agtggctcacgcctgtaatcccagcactttgagaggccaaggtgggcagatcatctgagg +tcaggagttccagaccagcctagccaacatggtgaaactccatctctactaaaaatacaa +aattagctggctgtggtggctcatgctggtaatcccagctacttgggaggctgaggcagg +agaatcacttgaacctgggaggtggaggttgcagtgagccgagatcacaccactgcactc +cagcctggatgacagagcgagactccattaaaaaaatatatctatatctatagatatcta +tagatatatatctagatatagatctatgagtatatatctatagatatctagatatatatt +tatagatagatatatctatagatatctatatatttctatcgatagagatatctatagata +tctatcaatatatctatagatttctagagatatgtatctagatataaatctatagacata +gatatagatatatagatatttaactgccaatgtcctaaacactttatactcaacacattt +tttataaaaaaaaatagatatcggtctagatagtatctatctagatatcggtctagatag +tatctatctagatatcgg--------------------tctagatagtatctatctagat +atctgcgtagatgtctagatatctatctagatat---------------tatctatctag +a----------------------------------------------------------- +----------------------------------------tatctgtcgctatctagata +tctatctaga---tatctgtcgctatctagatatctatctaga----------------- +----------------------------------------------------tatctgtc +gctatctagatatctatctagatatcatctgtcgctatctagatatctatctagatatca +tctgtcgctatctagatatctatctagatatcatctgtcgctatctagatatctatctag +atatcatctgtcgctatctagatatctatctagatatcatctgtcgctatctagatatct +atctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctag +atatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgct +atctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatct +gtcgctatctagatatctatctagatagtatctgtcgctatctagatatctagatagtat +ctgtcgctatctagatatctagatagtatctgtctagatagctatctagatagtatctgt +ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt +ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact +gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag +gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca +tgggc +>p:HG002_1_chr20:55943819-55945908 +gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc +caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca +gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat +cacttgaggttaggagtttgagaccagcctggccaacatggtgaaacctcgcctctacta +aaaatacaaaaattagctgggcatgctggcatgtgcctgtaatcccagctactcaggagg +ctgaggcaggagaactgcttgagcctgggaggcagaggttgcagtgagctgagatggcac +cactgcattccagctgggcaacagagagagactctttcttaaatatatatattttatata +tatatagatacacacacacacacacacacacacacacacacacacacacacacacatata +tatatatatatatatatatatatatatatatatatatatatatgcatgcatgccagatgc +agtggctcacgcctgtaatcccagcactttgagaggccaaggtgggcagatcatctgagg +tcaggagttccagaccagcctagccaacatggtgaaactccatctctactaaaaatacaa +aattagctggctgtggtggctcatgctggtaattccagctacttgggaggctgaggcagg +agaatcacatgaacctgggaggtggaggttgcagtgagccgagatcacaccactgcactc +cagcctggatgacagagcgagactccattaaaaaaatatatctatatctatagatatcta +tagatatatatctagatatagatctatgagtatatatctatagatatctagatatatatt +tatagatagatatatctatagatatctatatatttctatcgatagagatatctatagata +tctatcaatatatctatagatttctagagatatgtatctagatataaatctatagacata +gatatagatatatagatatttaactgccaatgtcctaaacactttatactcaacacattt +tttataaaaaaaaatagatatcggtctagatagtatctatctagatatcggtctagatag +tatctatctagatatcgg--------------------tctagatagtatctatctagat +atctgcgtagatgtctagatatctatctagatat---------------tatctatctag +a----------------------------------------------------------- +----------------------------------------tatctgtcgctatctagata +tctatctaga---tatctgtcgctatctagatatctatctaga----------------- +----------------------------------------------------tatctgtc +gctatctagatatctatctagatatcatctgtcgctatctagatatctatctagatatca +tctgtcgctatctagatatctatctagatatcatctgtcgctatctagatatctatctag +atatcatctgtcgctatctagatatctatctagatatcatctgtcgctatctagatatct +atctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctag +atatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgct +atctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatct +gtcgctatctagatatctatctagatagtatctgtcgctatctagatatctagatagtat +ctgtcgctatctagatatctagatagtatctgtctagatagctatctagatagtatctgt +ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt +ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact +gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag +gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca +tgggc +>p:HG002_2_chr20:55943819-55945908 +gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc +caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca +gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat +cacttgaggttaggagtttgagaccagcctggccaacatggtgaaacctcgcctctacta +aaaatacaaaaattagctgggcatgctggcatgtgcctgtaatcccagctactcaggagg +ctgaggcaggagaactgcttgagcctgggaggcagaggttgcagtgagctgagatggcac +cactgcattccagctgggcaacagagagagactctttcttaaatatatatattttatata +tatatagatacacacacacacacacacacacacac------------------------- +---------atatatatatatatatatatatatatatatatatgcatgcatgccagatgc +agtggctcacgcctgtaatcccagcactttgagaggccaaggtgggcagatcatctgagg +tcaggagttccagaccagcctagccaacatggtgaaactccatctctactaaaaatacaa +aattagctggctgtggtggctcatgctggtaatcccagctacttgggaggctgaggcagg +agaatcacttgaacctgggaggtggaggttgcagtgagccgagatcacaccactgcactc +cagcctggatgacagagcgagactccattaaaaaaatatatctatatctatagatatcta +tagatatatatctagatatagatctatgagtatatatctatagatatctagatatatatt +tatagatagatatatctatagatatctatatatttctatcgatagagatatctatagata +tctatcaatatatctatagatttctagagatatgtatctagatataaatctatagacata +gatatagatatatagatatttaactgccaatgtcctaaacactttatactcaacacattt +tttataaaaaaaaatagatatcggtctagatagtatctatctagatatcggtctagatag +tatctatctagatatctgtgtagatgtctagatatctatctagatattatctatctagat +atct--gtcgctatctagatatctatctagatatctgtcgctatctagatatctatctag +atatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatc +tagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagata +tctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatc +tagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtc +gctatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagta +tctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctag +atagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatct +atctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctag +atatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgct +atctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatct +gtcgctatctagatatctatctagatagtatctgtcgctatctagatatctagatagtat +ctgtcgctatctagatatctagatagtatctgtctagatagctatctagatagtatctgt +ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt +ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact +gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag +gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca +tgggc +>ref_chr20:55943819-55945908 +gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc +caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca +gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat +cacttgaggttaggagtttgagaccagcctggccaacatggtgaaacctcgcctctacta +aaaatacaaaaattagctgggcatgctggcatgtgcctgtaatcccagctactcaggagg +ctgaggcaggagaactgcttgagcctgggaggcagaggttgcagtgagctgagatggcac +cactgcattccagctgggcaacagagagagactctttcttaaatatatatattttatata +tatatagatacacacacacacacacacacacacac------------------------- +---------atatatatatatatatatatatatatatatatatgcatgcatgccagatgc +agtggctcacgcctgtaatcccagcactttgagaggccaaggtgggcagatcatctgagg +tcaggagttccagaccagcctagccaacatggtgaaactccatctctactaaaaatacaa +aattagctggctgtggtggctcatgctggtaatcccagctacttgggaggctgaggcagg +agaatcacttgaacctgggaggtggaggttgcagtgagccgagatcacaccactgcactc +cagcctggatgacagagcgagactccattaaaaaaatatatctatatctatagatatcta +tagatatatatctagatatagatctatgagtatatatctatagatatctagatatatatt +tatagatagatatatctatagatatctatatatttctatcgatagagatatctatagata +tctatcaatatatctatagatttctagagatatgtatctagatataaatctatagacata +gatatagatatatagatatttaactgccaatgtcctaaacactttatactcaacacattt +tttataaaaaaaaatagatatcggtctagatagtatctatctagatatcggtctagatag +tatctatctagatatctgtgtagatgtctagatatctatctagatattatctatctagat +atct--gtcgctatctagatatctatctagatatctgtcgctatctagatatctatctag +a----------------------------------------------------------- +----------------------------------------tatctgtcgctatctagata +tctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatc +tagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtc +gctatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagta +tctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctag +atagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatct +atctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgctatctag +atatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgct +atctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatct +gtcgctatctagatatctatctagatagtatctgtcgctatctagatatctagatagtat +ctgtcgctatctagatatctagatagtatctgtctagatagctatctagatagtatctgt +ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt +ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact +gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag +gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca +tgggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa new file mode 100644 index 00000000..41aa4b4e --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa @@ -0,0 +1,95 @@ +>syndip_1_chr20:63221380-63221977 +tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga +tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc +gcagtagcttc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------ggactgcagggactgcagggat +tgcagggactgcagggattgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggacacc +ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt +gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg +gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc +t +>syndip_2_chr20:63221380-63221977 +tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga +tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc +gcagtagcttcggactgcagggattgcagggactgcagggattgcagggattgcagggag +tgcagggactgcagggactgcagggattgcagggactgcagggactgcagggactgcagg +gactgcagggactgcagggactgcagggattgcagggactgcagggactgcagggattgc +agggattgcagggactgcagggactgcagggattgcagggactgcagggactgcagggac +tgcagggactgcagggactgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggactgcagggactgcagggactgcagggactgcagggactgc +agggattgcagggattgcagggattgcagggattgcagggattgcagggactgcagggac +tgcagggactgcagggactgcagggattgcagggattgcagggattgcagggattgcagg +gactgcagggactgcagggattgcagggattgcagggattgcagggattgcagggattgc +agggattgcagggactgcagggactgcagggactgcagggattgcagggattgcagggat +tgcagggattgcagggactgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggcctgcattgctcttgcagcaccacccgcctccgtggacacc +ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt +gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg +gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc +t +>p:HG002_1_chr20:63221380-63221977 +tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga +tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc +gcagtagcttcggactgcagggattgcagggactgcagggattgcagggattgcagggag +tgcagggactgcagggactgcagggattgcagggactgcagggactgcagggactgcagg +gactgcagggactgcagggactgcagggattgcagggactgcagggactgcagggattgc +agggattgcagggactgcagggactgcagggattgcagggactgcagggactgcagggac +tgcagggactgcagggactgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggactgcagggactgcagggactgcagggactgcagggactgc +agggattgcagggattgcagggattgcagggattgcagggattgcagggactgcagggac +tgcagggactgcagggactgcagggattgcagggattgcagggattgcagggattgcagg +gactgcagggactgcagggattgcagggattgcagggattgcagggattgcagggattgc +agggattgcagggactgcagggactgcagggactgcagggattgcagggattgcagggat +tgcagggattgcagggactgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggcctgcattgctcttgcagcaccacccgcctccgtggccacc +ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt +gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg +gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc +t +>p:HG002_2_chr20:63221380-63221977 +tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga +tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc +gcagtagcttc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------------ggactgcagggactgcagggat +tgcagggactgcagggattgcagggactgcagggattgcagggattgcagggattgcagg +gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggccacc +ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt +gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg +gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc +t +>ref_chr20:63221380-63221977 +tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga +tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc +gcagtagcttc------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------------ggactgcagggattgcagggactgcagggattgcagggattgcagg +gattgcagggactgcagggattgcagggactgcagggactgcagggactgcagggactgc +agggactgcagggattgcagggattgcagggattgcagggactgcagggattgcagggac +tgcagggactgcagggattgcagggactgcagggactgcagggactgcagggattgcagg +gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggacacc +ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt +gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg +gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc +t diff --git a/truvari/msatovcf.py b/truvari/msatovcf.py index 69eb725d..1a513d03 100644 --- a/truvari/msatovcf.py +++ b/truvari/msatovcf.py @@ -136,7 +136,7 @@ def msa2vcf(msa, anchor_base='N'): >>> import truvari >>> from truvari.phab import fasta_reader >>> msa_dir = "repo_utils/test_files/external/fake_mafft/lookup/" - >>> msa_file = "fm_0130785485c4c1711be9dcebabcbe57c.msa" + >>> msa_file = "fm_ca43b50e2a5d770bb34202d8a7b62421.msa" >>> seqs = open(msa_dir + msa_file).read() >>> fasta = {n:s.decode() for n, s in fasta_reader(seqs, False)} >>> m_entries_str = truvari.msa2vcf(fasta) diff --git a/truvari/phab.py b/truvari/phab.py index 32e2055c..4a929def 100644 --- a/truvari/phab.py +++ b/truvari/phab.py @@ -140,8 +140,7 @@ def collect_haplotypes(ref_haps_fn, hap_jobs, threads): """ all_haps = defaultdict(BytesIO) with multiprocessing.Pool(threads, maxtasksperchild=1) as pool: - for haplotype in pool.imap_unordered(partial(extract_haplotypes, ref_fn=ref_haps_fn), - hap_jobs): + for haplotype in pool.imap(partial(extract_haplotypes, ref_fn=ref_haps_fn), hap_jobs): for location, fasta_entry in haplotype: all_haps[location].write(fasta_entry) pool.close()