Skip to content

Commit

Permalink
Update manifest (#41)
Browse files Browse the repository at this point in the history
* Update manifest

* Changed code to use openpyxl to open excel files

* Changed test file for formatting/annotating UAS Sample Details Report

* Removed old UAS Sample Details Report for testing

* Fixed error for DXS7423

* Added new observed alleles for a few loci

* Updated test files

* Updated test file

* fixed bugs with running multiple SR files at once

* Updated snp data

* Updated test files

* Sort snp output files

* Sorted UAS output

Co-authored-by: Rebecca Mitchell <[email protected]>
  • Loading branch information
rnmitchell and Rebecca Mitchell authored Feb 16, 2021
1 parent 6c3a74f commit 83a6170
Show file tree
Hide file tree
Showing 17 changed files with 9,114 additions and 9,069 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
include versioneer.py
include lusSTR/_version.py
include lusSTR/str_markers.json
include lusSTR/snp_data.json
include lusSTR/tests/data/*
include lusSTR/tests/data/STRait_Razor_test_output/*
include lusSTR/tests/data/UAS_bulk_input/*
include lusSTR/tests/data/snps/*
14 changes: 8 additions & 6 deletions lusSTR/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import lusSTR
import argparse
import glob
import openpyxl
import os
import pandas as pd
import sys
Expand All @@ -27,7 +28,6 @@ def uas_load(inpath, sexloci=False):
sex_strs = pd.DataFrame() if sexloci is True else None
files = glob.glob(os.path.join(inpath, '*.xlsx'))
for filename in sorted(files):
print(filename)
autodata, sexdata = uas_format(filename, sexloci)
auto_strs = auto_strs.append(autodata)
if sexloci is True:
Expand All @@ -38,21 +38,23 @@ def uas_load(inpath, sexloci=False):


def parse_str_table_from_sheet(infile, sheet, exclude=None):
table = pd.read_excel(io=infile, sheet_name=sheet)
file = openpyxl.load_workbook(infile)
file_sheet = file[sheet]
table = pd.DataFrame(file_sheet.values)
offset = table[table.iloc[:, 0] == "Coverage Information"].index.tolist()[0]
data = table.iloc[offset + 2:]
data.columns = table.iloc[offset + 1]
if exclude is not None:
data = data[~data.Locus.isin(exclude)]
data = data[['Locus', 'Reads', 'Repeat Sequence']]
data['SampleID'] = table.iloc[1, 1]
data['Project'] = table.iloc[2, 1]
data['Analysis'] = table.iloc[3, 1]
data['SampleID'] = table.iloc[2, 1]
data['Project'] = table.iloc[3, 1]
data['Analysis'] = table.iloc[4, 1]
return data


def uas_format(infile, sexloci=False):
auto_strs = parse_str_table_from_sheet(infile, sheet=0, exclude=['Amelogenin'])
auto_strs = parse_str_table_from_sheet(infile, sheet='Autosomal STRs', exclude=['Amelogenin'])
sex_strs = None
if sexloci is True:
y_strs = parse_str_table_from_sheet(infile, 'Y STRs')
Expand Down
6 changes: 3 additions & 3 deletions lusSTR/snp_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"rs1357617": {
"Type": "i",
"Alleles": ["A", "T"],
"ReverseCompNeeded": "No",
"ReverseCompNeeded": "Yes",
"Coord": 60
},
"rs4364205": {
Expand Down Expand Up @@ -362,7 +362,7 @@
"rs1454361": {
"Type": "i",
"Alleles": ["T", "A"],
"ReverseCompNeeded": "No",
"ReverseCompNeeded": "Yes",
"Coord": 46
},
"rs722290": {
Expand Down Expand Up @@ -494,7 +494,7 @@
"rs445251": {
"Type": "i",
"Alleles": ["G", "C"],
"ReverseCompNeeded": "No",
"ReverseCompNeeded": "Yes",
"Coord": 43
},
"rs1005533": {
Expand Down
Loading

0 comments on commit 83a6170

Please sign in to comment.