Skip to content

Commit

Permalink
Merge pull request #11 from richard-burhans/update-batched-lastz
Browse files Browse the repository at this point in the history
Updating to set correct format for output
  • Loading branch information
richard-burhans authored Jul 30, 2024
2 parents b5d08d8 + f1a3858 commit 869a01a
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 23 deletions.
2 changes: 1 addition & 1 deletion tools/batched_lastz/batched_lastz.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<param argument="--input" type="data" format="tgz" label="Tarball"/>
</inputs>
<outputs>
<data name="output" label="Output" format="maf"/>
<data name="output" label="Output" format="auto" />
</outputs>
<tests>
<test expect_num_outputs="1">
Expand Down
76 changes: 54 additions & 22 deletions tools/batched_lastz/run_lastz_tarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,38 @@ def __init__(self, pathname: str, debug: bool = False) -> None:
self.debug = debug
self.tarfile = None
self.commands: typing.List[typing.Dict[str, typing.Any]] = []
self.format_name = "tabular"
self._extract()
self._load_commands()
self._load_format()

def batch_commands(self) -> typing.Iterator[typing.Dict[str, typing.Any]]:
for command in self.commands:
yield command

def final_output_format(self) -> str:
return self.format_name

def _extract(self) -> None:
try:
self.tarball = tarfile.open(
name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT
)
except FileNotFoundError:
sys.exit(f"ERROR: unable to find input tarball: {self.pathname}")
except tarfile.ReadError:
sys.exit(f"ERROR: error reading input tarball: {self.pathname}")

begin = time.perf_counter()
self.tarball.extractall(filter="data")
self.tarball.close()
elapsed = time.perf_counter() - begin

if self.debug:
print(
f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True
)

def _load_commands(self) -> None:
try:
f = open("galaxy/commands.json")
Expand Down Expand Up @@ -173,26 +198,21 @@ def _load_command(self, command_dict: typing.Dict[str, typing.Any]) -> None:

self.commands.append(command_dict)

def _extract(self) -> None:
def _load_format(self) -> None:
try:
self.tarball = tarfile.open(
name=self.pathname, mode="r:*", format=tarfile.GNU_FORMAT
)
with open("galaxy/format.txt") as f:
format_name = f.readline()
format_name = format_name.rstrip("\n")
except FileNotFoundError:
sys.exit(f"ERROR: unable to find input tarball: {self.pathname}")
except tarfile.ReadError:
sys.exit(f"ERROR: error reading input tarball: {self.pathname}")

begin = time.perf_counter()
self.tarball.extractall(filter="data")
self.tarball.close()
elapsed = time.perf_counter() - begin

if self.debug:
print(
f"Extracted tarball in {elapsed} seconds", file=sys.stderr, flush=True
sys.exit(
f"ERROR: input tarball missing galaxy/format.txt: {self.pathname}"
)

if format_name in ["bam", "maf"]:
self.format_name = format_name
elif format_name == "differences":
self.format_name = "interval"


class TarRunner:
def __init__(
Expand Down Expand Up @@ -302,19 +322,31 @@ def run(self) -> None:

def _cleanup(self) -> None:
num_output_files = len(self.output_files.keys())
if num_output_files != 1:
sys.exit(f"ERROR: expecting a single output file, found {num_output_files}")

final_output_format = self.batch_tar.final_output_format()

for file_type, file_list in self.output_files.items():
with open(f"output.{file_type}", "w") as ofh:
print("##maf version=1", file=ofh)
with open(f"output.{final_output_format}", "w") as ofh:
if final_output_format == "maf":
print("##maf version=1", file=ofh)
for filename in file_list:
with open(f"galaxy/files/{filename}") as ifh:
for line in ifh:
ofh.write(line)

if num_output_files == 1:
file_type = list(self.output_files.keys())[0]
src_filename = f"output.{file_type}"
shutil.copy2(src_filename, self.output_pathname)
src_filename = f"output.{final_output_format}"
shutil.copy2(src_filename, self.output_pathname)

output_metadata = {
"output": {
"ext": final_output_format,
}
}

with open("galaxy.json", "w") as ofh:
json.dump(output_metadata, ofh)


def main() -> None:
Expand Down
Binary file modified tools/batched_lastz/test-data/input.tgz
Binary file not shown.

0 comments on commit 869a01a

Please sign in to comment.