From 931082bbbe7f683e8e51f31f307d93c21fe2c830 Mon Sep 17 00:00:00 2001 From: mattjvincent Date: Thu, 15 Jun 2023 11:39:47 -0400 Subject: [PATCH] fixing commands to work correctly --- src/g2gtools/commands.py | 10 +++++----- src/g2gtools/fasta.py | 2 +- src/g2gtools/g2g_utils.py | 6 +++--- src/g2gtools/vcf.py | 2 +- src/g2gtools/vcf2vci.py | 25 ++++++++++++------------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/g2gtools/commands.py b/src/g2gtools/commands.py index 749d02f..369621a 100644 --- a/src/g2gtools/commands.py +++ b/src/g2gtools/commands.py @@ -141,12 +141,12 @@ def vcf2vci( fasta_file: Annotated[Path, typer.Option('-f', '--fasta', exists=True, dir_okay=False, resolve_path=True, help='Fasta file matching VCF information')], strain: Annotated[str, typer.Option('-s', '--strain', help='Name of strain/sample (column in VCF file)')], output_file: Annotated[Path, typer.Option('-o', '--output', exists=False, dir_okay=False, writable=True, resolve_path=True, help='Name of output file')] = None, - num_processes: Annotated[int, typer.Option('-n', '--num-processes', hidden=True)] = None, + num_processes: Annotated[int, typer.Option('-p', '--num-processes', hidden=True)] = None, diploid: Annotated[bool, typer.Option('-d', '--diploid', help='Create diploid VCI file')] = False, - keep: Annotated[bool, typer.Option('-k', '--keep', help='Keep track of VCF lines that could not be converted to VCI file')] = False, - passed: Annotated[bool, typer.Option('-p', '--pass', help='Use only VCF lines that have a PASS for the filter value')] = False, - quality: Annotated[bool, typer.Option('-q', '--quality', help='Filter on quality, FI=PASS')] = False, - no_bgzip: Annotated[bool, typer.Option('-z', '--no-bgzip', help='DO NOT compress and index output')] = False, + keep: Annotated[bool, typer.Option('--keep', help='Keep track of VCF lines that could not be converted to VCI file')] = False, + passed: Annotated[bool, typer.Option('--pass', help='Use only VCF lines that have a PASS for the filter value')] = False, + quality: Annotated[bool, typer.Option('--quality', help='Filter on quality, FI=PASS')] = False, + no_bgzip: Annotated[bool, typer.Option('--no-bgzip', help='DO NOT compress and index output')] = False, verbose: Annotated[int, typer.Option('-v', '--verbose', count=True, help='specify multiple times for more verbose output')] = 0 ) -> None: """ diff --git a/src/g2gtools/fasta.py b/src/g2gtools/fasta.py index 9e67082..9b7aa8b 100644 --- a/src/g2gtools/fasta.py +++ b/src/g2gtools/fasta.py @@ -241,7 +241,7 @@ def extract( output_file_name = g2g_utils.check_file(output_file_name, 'w') fasta_out = open(output_file_name, 'w') else: - fasta_out = sys.stdout + fasta_out = sys.stderr try: if not isinstance(locations, list): diff --git a/src/g2gtools/g2g_utils.py b/src/g2gtools/g2g_utils.py index c4b3e75..8de449c 100644 --- a/src/g2gtools/g2g_utils.py +++ b/src/g2gtools/g2g_utils.py @@ -83,7 +83,7 @@ def configure_logging( Returns: logging.Logger: The logging object. """ - ensimpl_app_debug = nvli(os.environ.get('G2GTOOLS_APP_DEBUG', '0'), -1) + g2g_app_debug = nvli(os.environ.get('G2GTOOLS_APP_DEBUG', '0'), -1) rich_handler = RichHandler( level=logging.NOTSET, @@ -93,7 +93,7 @@ def configure_logging( omit_repeated_times=False, ) - if ensimpl_app_debug == 1: + if g2g_app_debug == 1: rich_handler = RichHandler( level=logging.NOTSET, show_level=True, @@ -117,7 +117,7 @@ def configure_logging( if level == 0: log.setLevel(logging.WARNING) elif level == 1: - log.setLevel(19) + log.setLevel(logging.INFO) elif level > 1: log.setLevel(logging.DEBUG) diff --git a/src/g2gtools/vcf.py b/src/g2gtools/vcf.py index 04ab751..f968d78 100644 --- a/src/g2gtools/vcf.py +++ b/src/g2gtools/vcf.py @@ -360,7 +360,7 @@ def parse_gt_tuple( gt_left = gt[0] gt_right = gt[1] - fi = sample_data['FI'] + fi = vcf_record.format.get('FI') except ValueError: # LOG.debug(ve) pass diff --git a/src/g2gtools/vcf2vci.py b/src/g2gtools/vcf2vci.py index b63dca1..2e86ec8 100644 --- a/src/g2gtools/vcf2vci.py +++ b/src/g2gtools/vcf2vci.py @@ -181,6 +181,8 @@ def process_piece(vcf2vci_params: VCF2VCIInfo) -> dict[str, Any]: if vcf2vci_params.diploid: mi = ['L', 'R'] + #logger = g2g_utils.configure_logging('g2tools', 10) + logger.warning(f'Processing Chromosome {vcf2vci_params.chromosome}...') iterators = [] @@ -253,19 +255,19 @@ def discard_record(rec): n = 0 line_numbers = 0 - # print('iterators=' + str(type(iterators))) - # print('iterators[0]=' + str(type(iterators[0]))) + #print('iterators=' + str(type(iterators))) + #print('iterators[0]=' + str(type(iterators[0]))) for vcf_records in walk_vcfs_together(iterators): # print('vcf_records=' + str(type(vcf_records))) for i, vcf_record in enumerate(vcf_records): - # print('vcf_record=' + str(type(vcf_record))) - # logger.debug(vcf_record) + #print('vcf_record=' + str(type(vcf_record))) + #logger.debug(vcf_record) if vcf_record is None: continue # logger.debug(vcf_record.alt) # logger.debug(type(vcf_record.alt)) logger.debug('------------') - logger.debug(f'{vcf_record.pos=}') + #print(f'{vcf_record.pos=}') if tabix: gt = vcf.parse_gt_tuple_orig( @@ -279,9 +281,6 @@ def discard_record(rec): # logger.debug(gt) logger.debug(f'{gt=}') - if not gt.is_snp: - logger.critical('NO SNP DETECTED') - line_numbers = line_numbers + 1 if gt.is_snp: # snp @@ -421,10 +420,10 @@ def discard_record(rec): orig_alt_seq = alt_seq - s = vcf_record[ - vcf2vci_params.vcf_files[i].sample_index - ] - logger.debug(f'SAMPLE: {s}') + #s = vcf_record[ + # vcf2vci_params.vcf_files[i].sample_index + #] + #logger.debug(f'SAMPLE: {s}') logger.debug( f'REF="{gt.ref}", ALT_L="{gt.left}", ' f'ALT_R="{gt.right}", POS={vcf_record.pos}' @@ -505,7 +504,7 @@ def discard_record(rec): alt_str = alt_seq if alt_seq else '.' out = ( f'{vcf2vci_params.chromosome}{lr_out}\t' - f'{vcf_record.pos + 1}\t' + f'{vcf_record.pos + (1 if tabix else 0)}\t' f'{shared_bases}\t{ref_str}\t{alt_str}\t' f'{fragment_size}\n' )