Skip to content

Commit

Permalink
Merge pull request #18 from dpryan79/fix17
Browse files Browse the repository at this point in the history
Fix #17
  • Loading branch information
dpryan79 committed Dec 30, 2015
2 parents b50bb72 + 304b215 commit fb177a8
Show file tree
Hide file tree
Showing 53 changed files with 113 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ language: c
os:
- linux
- osx
script: make
script: make test
compiler:
- clang
- gcc
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Version 0.1.9:

* Added the --methylKit option
* Added automated testing with `make test`

Version 0.1.8:

* Fix compilation issues with clang
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ OPTS ?= -Wall -g -O3
all: lib PileOMeth

OBJS = common.o bed.o svg.o pileup.o extract.o MBias.o mergeContext.o
VERSION = 0.1.8
VERSION = 0.1.9

#If we're building from a git repo, then append the most recent tag
ifneq "$(wildcard .git)" ""
Expand All @@ -36,6 +36,9 @@ lib: libPileOMeth.a
PileOMeth: htslib version.h libPileOMeth.a
$(CC) $(OPTS) -Ihtslib -o PileOMeth main.c libPileOMeth.a htslib/libhts.a -lm -lz -lpthread

test: PileOMeth
python tests/test.py

clean:
rm -f *.o PileOMeth libPileOMeth.a

Expand Down
4 changes: 3 additions & 1 deletion PileOMeth.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ typedef struct {
@field maxDepth Maximum depth for the pileup
@field keepDiscordant 0: Do not include discordantly aligned reads when calculating metrics
@field keepSingleton 0: Do not include singletons when calculating metrics
@field merge 1: Merge Cs in either a CpG or CHG context into single entries
@field methylKit Output in a format compatible with methylKit
@field output_fp Output file pointers (to CpG, CHG, and CHH, respectively)
@field reg A region specified by -r
@field fp Input file pointer (must be a BAM or CRAM file)
Expand All @@ -56,7 +58,7 @@ typedef struct {
int keepCpG, keepCHG, keepCHH;
int minMapq, minPhred, keepDupes, maxDepth;
int keepDiscordant, keepSingleton;
int merge;
int merge, methylKit;
int fraction, counts, logit;
FILE **output_fp;
char *reg;
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ The `--logit` option produces the first three columns of the standard output fol

Note that these options may be combined with `--mergeContext`. However, `PileOMeth mergeContext` can not be used after the fact to combine these.

methylKit-compatible output
===========================

methylKit has its own format, which can be produced with the `--methylKit` option. Merging Cs into CpGs or CHGs is forbidden in this format. Likewise, this option is mutually exclusive with `--logit` et al.

Methylation bias plotting and correction
========================================

Expand Down
108 changes: 76 additions & 32 deletions extract.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ struct lastCall{
uint32_t nmethyl, nunmethyl;
};

void writeCall(FILE *of, Config *config, char *chrom, int32_t pos, int32_t width, uint32_t nmethyl, uint32_t nunmethyl) {
if (!config->fraction && !config->logit && !config->counts) {
void writeCall(FILE *of, Config *config, char *chrom, int32_t pos, int32_t width, uint32_t nmethyl, uint32_t nunmethyl, char base) {
char strand = (base=='C' || base=='c') ? 'F' : 'R';
if (!config->fraction && !config->logit && !config->counts && !config->methylKit) {
fprintf(of, \
"%s\t%i\t%i\t%i\t%" PRIu32 "\t%" PRIu32 "\n", \
chrom, \
Expand Down Expand Up @@ -53,18 +54,29 @@ void writeCall(FILE *of, Config *config, char *chrom, int32_t pos, int32_t width
pos, \
pos+width, \
logit(((double) nmethyl)/(nmethyl+nunmethyl)));
} else if(config->methylKit) {
fprintf(of, \
"%s.%i\t%s\t%i\t%c\t%i\t%f\t%f\n", \
chrom, \
pos+1, \
chrom, \
pos+1, \
strand, \
nmethyl+nunmethyl, \
((double) nmethyl)/(nmethyl+nunmethyl), \
((double) nunmethyl)/(nmethyl+nunmethyl));
}
}

void processLast(FILE *of, Config *config, struct lastCall *last, bam_hdr_t *hdr, int32_t tid, int32_t pos, int width, uint32_t nmethyl, uint32_t nunmethyl) {
void processLast(FILE *of, Config *config, struct lastCall *last, bam_hdr_t *hdr, int32_t tid, int32_t pos, int width, uint32_t nmethyl, uint32_t nunmethyl, char base) {
if(last->tid == tid && last->pos == pos) {
nmethyl += last->nmethyl;
nunmethyl += last->nunmethyl;
writeCall(of, config, hdr->target_name[tid], pos, width, nmethyl, nunmethyl);
writeCall(of, config, hdr->target_name[tid], pos, width, nmethyl, nunmethyl, base);
last->tid = -1;
} else {
if(last->tid != -1) {
writeCall(of, config, hdr->target_name[last->tid], last->pos, width, last->nmethyl, last->nunmethyl);
writeCall(of, config, hdr->target_name[last->tid], last->pos, width, last->nmethyl, last->nunmethyl, base);
}
last->tid = tid;
last->pos = pos;
Expand All @@ -83,7 +95,7 @@ void extractCalls(Config *config) {
int idxBED = 0, strand;
uint32_t nmethyl = 0, nunmethyl = 0;
const bam_pileup1_t **plp = NULL;
char *seq = NULL, base;
char *seq = NULL, base = 'A';
mplp_data *data = NULL;
struct lastCall *lastCpG = NULL;
struct lastCall *lastCHG = NULL;
Expand Down Expand Up @@ -184,26 +196,26 @@ void extractCalls(Config *config) {

if(nmethyl+nunmethyl==0) continue;
if(!config->merge || type==2) {
writeCall(config->output_fp[type], config, hdr->target_name[tid], pos, 1, nmethyl, nunmethyl);
writeCall(config->output_fp[type], config, hdr->target_name[tid], pos, 1, nmethyl, nunmethyl, base);
} else {
//Merge into per-CpG/CHG metrics
if(type==0) {
if(base=='G' || base=='g') pos--;
processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl);
processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl, base);
} else {
if(base=='G' || base=='g') pos-=2;
processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl);
processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl, base);
}
}
}

//Don't forget the last CpG/CHG
if(config->merge) {
if(config->keepCpG && lastCpG->tid != -1) {
processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl);
processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl, base);
}
if(config->keepCHG && lastCHG->tid != -1) {
processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl);
processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl, base);
}
}

Expand Down Expand Up @@ -295,6 +307,8 @@ void extract_usage() {
" file with a .counts.bedGraph extension.\n"
" --logit Extract logit(M/(M+U)) (only) at each position. This produces\n"
" a file with a .logit.bedGraph extension.\n"
" --methylKit Output in the format required by methylKit. Note that this is\n"
" incompatible with --mergeContext, --fraction and --counts.\n"
" --OT INT,INT,INT,INT Inclusion bounds for methylation calls from reads/pairs\n"
" origination from the original top strand. Suggested values can\n"
" be obtained from the MBias program. Each integer represents a\n"
Expand Down Expand Up @@ -323,6 +337,7 @@ int extract_main(int argc, char *argv[]) {
config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0;
config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0;
config.keepSingleton = 0, config.keepDiscordant = 0;
config.methylKit = 0;
config.merge = 0;
config.maxDepth = 2000;
config.fai = NULL;
Expand Down Expand Up @@ -352,6 +367,7 @@ int extract_main(int argc, char *argv[]) {
{"CTOT", 1, NULL, 9},
{"CTOB", 1, NULL, 10},
{"mergeContext", 0, NULL, 11},
{"methylKit", 0, NULL, 12},
{"help", 0, NULL, 'h'},
{0, 0, NULL, 0}
};
Expand Down Expand Up @@ -405,6 +421,9 @@ int extract_main(int argc, char *argv[]) {
case 11 :
config.merge = 1;
break;
case 12 :
config.methylKit = 1;
break;
case 'q' :
config.minMapq = atoi(optarg);
break;
Expand Down Expand Up @@ -447,8 +466,13 @@ int extract_main(int argc, char *argv[]) {
fprintf(stderr, "-q %i is invalid. Resetting to 0, which is the lowest possible value.\n", config.minMapq);
config.minMapq = 0;
}
if(config.fraction+config.counts+config.logit > 1) {
fprintf(stderr, "More than one of --fraction, --counts, and --logit were specified. These are mutually exclusive.\n");
if(config.fraction+config.counts+config.logit+config.methylKit > 1) {
fprintf(stderr, "More than one of --fraction, --counts, --methylKit and --logit were specified. These are mutually exclusive.\n");
extract_usage();
return 1;
}
if(config.methylKit + config.merge == 2) {
fprintf(stderr, "--mergeContext and --methylKit are mutually exclusive.\n");
extract_usage();
return 1;
}
Expand Down Expand Up @@ -498,65 +522,85 @@ int extract_main(int argc, char *argv[]) {
fprintf(stderr, "writing to prefix:'%s'\n", opref);
}
if(config.fraction) {
oname = malloc(sizeof(char) * (strlen(opref)+19));
oname = malloc(sizeof(char) * (strlen(opref)+19));
} else if(config.counts) {
oname = malloc(sizeof(char) * (strlen(opref)+21));
oname = malloc(sizeof(char) * (strlen(opref)+21));
} else if(config.logit) {
oname = malloc(sizeof(char) * (strlen(opref)+20));
oname = malloc(sizeof(char) * (strlen(opref)+20));
} else if(config.methylKit) {
oname = malloc(sizeof(char) * (strlen(opref)+15));
} else {
oname = malloc(sizeof(char) * (strlen(opref)+14));
oname = malloc(sizeof(char) * (strlen(opref)+14));
}
assert(oname);
if(config.keepCpG) {
if(config.fraction) {
sprintf(oname, "%s_CpG.meth.bedGraph", opref);
sprintf(oname, "%s_CpG.meth.bedGraph", opref);
} else if(config.counts) {
sprintf(oname, "%s_CpG.counts.bedGraph", opref);
sprintf(oname, "%s_CpG.counts.bedGraph", opref);
} else if(config.logit) {
sprintf(oname, "%s_CpG.logit.bedGraph", opref);
sprintf(oname, "%s_CpG.logit.bedGraph", opref);
} else if(config.methylKit) {
sprintf(oname, "%s_CpG.methylKit", opref);
} else {
sprintf(oname, "%s_CpG.bedGraph", opref);
sprintf(oname, "%s_CpG.bedGraph", opref);
}
config.output_fp[0] = fopen(oname, "w");
if(config.output_fp[0] == NULL) {
fprintf(stderr, "Couldn't open the output CpG metrics file for writing! Insufficient permissions?\n");
return -3;
}
printHeader(config.output_fp[0], "CpG", opref, config);
if(config.methylKit) {
fprintf(config.output_fp[0], "chrBase\tchr\tbase\tstrand\tcoverage\tfreqC\tfreqT\n");
} else {
printHeader(config.output_fp[0], "CpG", opref, config);
}
}
if(config.keepCHG) {
if(config.fraction) {
sprintf(oname, "%s_CHG.meth.bedGraph", opref);
sprintf(oname, "%s_CHG.meth.bedGraph", opref);
} else if(config.counts) {
sprintf(oname, "%s_CHG.counts.bedGraph", opref);
sprintf(oname, "%s_CHG.counts.bedGraph", opref);
} else if(config.logit) {
sprintf(oname, "%s_CHG.logit.bedGraph", opref);
sprintf(oname, "%s_CHG.logit.bedGraph", opref);
} else if(config.methylKit) {
sprintf(oname, "%s_CHG.methylKit", opref);
} else {
sprintf(oname, "%s_CHG.bedGraph", opref);
sprintf(oname, "%s_CHG.bedGraph", opref);
}
config.output_fp[1] = fopen(oname, "w");
if(config.output_fp[1] == NULL) {
fprintf(stderr, "Couldn't open the output CHG metrics file for writing! Insufficient permissions?\n");
return -3;
}
printHeader(config.output_fp[1], "CHG", opref, config);
if(config.methylKit) {
fprintf(config.output_fp[1], "chrBase\tchr\tbase\tstrand\tcoverage\tfreqC\tfreqT\n");
} else {
printHeader(config.output_fp[1], "CHG", opref, config);
}
}
if(config.keepCHH) {
if(config.fraction) {
sprintf(oname, "%s_CHH.meth.bedGraph", opref);
sprintf(oname, "%s_CHH.meth.bedGraph", opref);
} else if(config.counts) {
sprintf(oname, "%s_CHH.counts.bedGraph", opref);
sprintf(oname, "%s_CHH.counts.bedGraph", opref);
} else if(config.logit) {
sprintf(oname, "%s_CHH.logit.bedGraph", opref);
sprintf(oname, "%s_CHH.logit.bedGraph", opref);
} else if(config.methylKit) {
sprintf(oname, "%s_CHH.methylKit", opref);
} else {
sprintf(oname, "%s_CHH.bedGraph", opref);
sprintf(oname, "%s_CHH.bedGraph", opref);
}
config.output_fp[2] = fopen(oname, "w");
if(config.output_fp[2] == NULL) {
fprintf(stderr, "Couldn't open the output CHH metrics file for writing! Insufficient permissions?\n");
return -3;
}
printHeader(config.output_fp[2], "CHH", opref, config);
if(config.methylKit) {
fprintf(config.output_fp[2], "chrBase\tchr\tbase\tstrand\tcoverage\tfreqC\tfreqT\n");
} else {
printHeader(config.output_fp[2], "CHH", opref, config);
}
}

//Run the pileup
Expand Down
Binary file removed htslib/bgzf.pico
Binary file not shown.
Binary file removed htslib/bgzip
Binary file not shown.
Binary file removed htslib/cram/cram_codecs.pico
Binary file not shown.
Binary file removed htslib/cram/cram_decode.pico
Binary file not shown.
Binary file removed htslib/cram/cram_encode.pico
Binary file not shown.
Binary file removed htslib/cram/cram_index.pico
Binary file not shown.
Binary file removed htslib/cram/cram_io.pico
Binary file not shown.
Binary file removed htslib/cram/cram_samtools.pico
Binary file not shown.
Binary file removed htslib/cram/cram_stats.pico
Binary file not shown.
Binary file removed htslib/cram/files.pico
Binary file not shown.
Binary file removed htslib/cram/mFILE.pico
Binary file not shown.
Binary file removed htslib/cram/open_trace_file.pico
Binary file not shown.
Binary file removed htslib/cram/pooled_alloc.pico
Binary file not shown.
Binary file removed htslib/cram/rANS_static.pico
Binary file not shown.
Binary file removed htslib/cram/sam_header.pico
Binary file not shown.
Binary file removed htslib/cram/string_alloc.pico
Binary file not shown.
Binary file removed htslib/cram/thread_pool.pico
Binary file not shown.
Binary file removed htslib/cram/vlen.pico
Binary file not shown.
Binary file removed htslib/cram/zfio.pico
Binary file not shown.
Binary file removed htslib/faidx.pico
Binary file not shown.
Binary file removed htslib/hfile.pico
Binary file not shown.
Binary file removed htslib/hfile_net.pico
Binary file not shown.
Binary file removed htslib/hts.pico
Binary file not shown.
Binary file removed htslib/htsfile
Binary file not shown.
Binary file removed htslib/kfunc.pico
Binary file not shown.
Binary file removed htslib/knetfile.pico
Binary file not shown.
Binary file removed htslib/kstring.pico
Binary file not shown.
Binary file removed htslib/libhts.a
Binary file not shown.
Binary file removed htslib/libhts.so
Binary file not shown.
1 change: 0 additions & 1 deletion htslib/libhts.so.1

This file was deleted.

Binary file removed htslib/md5.pico
Binary file not shown.
Binary file removed htslib/regidx.pico
Binary file not shown.
Binary file removed htslib/sam.pico
Binary file not shown.
Binary file removed htslib/synced_bcf_reader.pico
Binary file not shown.
Binary file removed htslib/tabix
Binary file not shown.
Binary file removed htslib/tbx.pico
Binary file not shown.
Binary file removed htslib/test/fieldarith
Binary file not shown.
Binary file removed htslib/test/hfile
Binary file not shown.
Binary file removed htslib/test/sam
Binary file not shown.
Binary file removed htslib/test/test-regidx
Binary file not shown.
Binary file removed htslib/test/test-vcf-api
Binary file not shown.
Binary file removed htslib/test/test-vcf-sweep
Binary file not shown.
Binary file removed htslib/test/test_view
Binary file not shown.
Binary file removed htslib/vcf.pico
Binary file not shown.
Binary file removed htslib/vcf_sweep.pico
Binary file not shown.
Binary file removed htslib/vcfutils.pico
Binary file not shown.
23 changes: 19 additions & 4 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,48 @@
from subprocess import check_call
import os
import os.path as op

wd = op.dirname(op.realpath(__file__))
os.chdir(wd)

def rm(f):
try:
os.unlink(f)
except OSError:
pass

rm('po')
check_call('cd .. && make', shell=True)
assert op.exists('../PileOMeth')

rm('ct_aln_CpG.bedGraph')
check_call('../PileOMeth extract ct100.fa ct_aln.bam -q 2', shell=True)
assert op.exists('ct_aln_CpG.bedGraph')
lines = sum(1 for _ in open('ct_aln_CpG.bedGraph'))
assert lines == 1

rm('ct_aln_CpG.bedGraph')

rm('cg_aln_CpG.bedGraph')
check_call('../PileOMeth extract cg100.fa cg_aln.bam -q 2', shell=True)
assert op.exists('cg_aln_CpG.bedGraph')
lines = sum(1 for _ in open('cg_aln_CpG.bedGraph'))
assert lines > 1
rm('cg_aln_CpG.bedGraph')

# should be none with q > 10
check_call('../PileOMeth extract cg100.fa cg_aln.bam -q 10', shell=True)
assert op.exists('cg_aln_CpG.bedGraph')
lines = sum(1 for _ in open('cg_aln_CpG.bedGraph'))
assert lines == 1
rm('cg_aln_CpG.bedGraph')

# Test the new methylKit option
check_call('../PileOMeth extract --methylKit --CHH --CHG cg100.fa cg_aln.bam -q 2', shell=True)
assert op.exists('cg_aln_CpG.methylKit')
lines = sum(1 for _ in open('cg_aln_CpG.methylKit'))
assert lines > 1
rm('cg_aln_CpG.methylKit')
lines = sum(1 for _ in open('cg_aln_CHG.methylKit'))
assert lines == 1
rm('cg_aln_CHG.methylKit')
assert op.exists('cg_aln_CHH.methylKit')
lines = sum(1 for _ in open('cg_aln_CHH.methylKit'))
assert lines == 1
rm('cg_aln_CHH.methylKit')

0 comments on commit fb177a8

Please sign in to comment.