Skip to content

Commit

Permalink
bug fix -- first record skipped in space delimited bed files. Fixes #…
Browse files Browse the repository at this point in the history
  • Loading branch information
jrobinso committed Nov 25, 2020
1 parent 29b84f6 commit 0225d9b
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 28 deletions.
3 changes: 2 additions & 1 deletion js/aed/AEDParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ var aedRegexpNamespace = new RegExp("([^:]*):([^(]*)\\(([^)]*)\\)"); // namespac

class AEDParser {

constructor(format, decode, config) {
constructor(config) {
const decode = config ? config.decode : undefined;
this.nameField = config ? config.nameField : undefined;
this.skipRows = 0; // The number of fixed header rows to skip. Override for specific types as needed
if (decode) {
Expand Down
16 changes: 9 additions & 7 deletions js/feature/featureFileReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class FeatureFileReader {
uriParts = URIUtils.parseUri(this.config.url);
this.filename = config.filename || uriParts.file;
}
this.format = this.config.format;
this.parser = this.getParser(this.format, this.config.decode, this.config);

this.parser = this.getParser(this.config);

if (this.config.format === "vcf" && !this.config.indexURL) {
console.warn("Warning: index file not specified. The entire vcf file will be loaded.");
Expand All @@ -90,7 +90,7 @@ class FeatureFileReader {
return this.loadFeaturesNoIndex()
}

}
}

async readHeader() {

Expand Down Expand Up @@ -137,18 +137,20 @@ class FeatureFileReader {
}
}

getParser(format, decode, config) {
switch (format) {

getParser(config) {

switch (config.format) {
case "vcf":
return new VcfParser(config);
case "seg" :
return new SegParser();
case "gwas" :
return new GWASParser(config);
case "aed" :
return new AEDParser(format, decode, config);
return new AEDParser(config);
default:
return new FeatureParser(format, decode, config);
return new FeatureParser(config);
}
}

Expand Down
136 changes: 117 additions & 19 deletions js/feature/featureParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,32 @@ import {decodeGcnv} from "../gcnv/gcnvDecoder.js";
*/
class FeatureParser {

constructor(format, decode, config) {
constructor(config) {

const format = config.format;
const decode = config.decode

this.header = {};
this.decode = decode;
this.config = config;

if (format !== undefined) {
// See if this is a custom format
const customFormat = TrackUtils.getFormat(format);
if (customFormat !== undefined) {
this.decode = decodeCustom;
this.header.format = customFormat;
this.delimiter = customFormat.delimiter || "\t";
}
this.header.format = format.toLowerCase();
if(!this.decode) {
this.decode = this.getDecoder(this.header.format);
}
this.delimiter = this.config.delimiter || getDelimiter(this.header.format);
}

//if(this.decode = decodeGtexGWAS) this.skipRows = 1;

this.header.nameField = config ? config.nameField : undefined;
this.skipRows = 0; // The number of fixed header rows to skip. Override for specific types as needed

Expand Down Expand Up @@ -108,7 +125,7 @@ class FeatureParser {
try {
// All directives that could change the format, and thus decoder, should have been read by now.
const decoder = this.getDecoder();
if (!line.startsWith("#") && decoder(tokens,header)) {
if (!line.startsWith("#") && decoder(tokens, header)) {
if (columnNames && columnNames.length === tokens.length) {
header.columnNames = columnNames;
for (let n = 0; n < columnNames.length; n++) {
Expand Down Expand Up @@ -194,15 +211,15 @@ class FeatureParser {
case "regionpeak":
case "peaks":
this.decode = decodePeak;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "bedgraph":
this.decode = decodeBedGraph;
this.delimiter = /\s+/;
break;
case "wig":
this.decode = decodeWig;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "gff3" :
case "gff" :
Expand All @@ -213,7 +230,7 @@ class FeatureParser {
case "fusionjuncspan":
// bhaas, needed for FusionInspector view
this.decode = decodeFusionJuncSpan;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "gtexgwas":
this.skipRows = 1;
Expand All @@ -222,24 +239,24 @@ class FeatureParser {
break;
case "refflat":
this.decode = decodeReflat;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "genepred":
this.decode = decodeGenePred;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "genepredext":
this.decode = decodeGenePredExt;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "ensgene":
this.decode = decodeGenePred
this.header.shift = 1;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "refgene":
this.decode = decodeGenePredExt;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
this.header.shift = 1;
break;
case "bed":
Expand All @@ -248,21 +265,21 @@ class FeatureParser {
break;
case "bedpe":
this.decode = decodeBedpe;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || "/t";
break;
case "bedpe-domain":
this.decode = decodeBedpeDomain;
this.headerLine = true;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || "/t";
break;
case "bedpe-loop":
this.decode = decodeBedpe;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || "/t";
this.header = {colorColumn: 7};
break;
case "interact":
this.decode = decodeInteract;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
break;
case "snp":
this.decode = decodeSNP;
Expand All @@ -284,7 +301,7 @@ class FeatureParser {
this.delimiter = customFormat.delimiter || "\t";
} else {
this.decode = decodeBed;
this.delimiter = /\s+/;
this.delimiter = this.config.delimiter || /\s+/;
}
}
}
Expand Down Expand Up @@ -320,9 +337,9 @@ function parseTrackLine(line) {
if (kv.length === 2) {
const key = kv[0].trim();
const value = kv[1].trim();
if(properties.hasOwnProperty(key)) {
if (properties.hasOwnProperty(key)) {
let currentValue = properties[key];
if(Array.isArray(currentValue)) {
if (Array.isArray(currentValue)) {
currentValue.push(value);
} else {
properties[key] = [currentValue, value];
Expand All @@ -332,9 +349,9 @@ function parseTrackLine(line) {
}
}
}
if("interact" == properties["type"]) {
if ("interact" == properties["type"]) {
properties["format"] = "interact";
} else if("gcnv" === properties["type"]) {
} else if ("gcnv" === properties["type"]) {
properties["format"] = "gcnv";
}
return properties;
Expand Down Expand Up @@ -376,4 +393,85 @@ function parseVariableStep(line) {
return {format: "variableStep", chrom, span}
}

function getDecoder(format) {

switch (format) {
case "narrowpeak":
case "broadpeak":
case "regionpeak":
case "peaks":
return decodePeak;
case "bedgraph":
return decodeBedGraph;
case "wig":
return decodeWig;
case "gff3" :
case "gff" :
case "gtf" :
return decodeGFF;
case "fusionjuncspan":
return decodeFusionJuncSpan;
case "gtexgwas":
return decodeGtexGWAS;
case "refflat":
return decodeReflat;
case "genepred":
return decodeGenePred;
case "genepredext":
return decodeGenePredExt;
case "ensgene":
return decodeGenePred;
case "refgene":
return decodeGenePredExt;
case "bed":
return decodeBed;
case "bedpe":
return decodeBedpe;
case "bedpe-domain":
return decodeBedpeDomain;
case "bedpe-loop":
return decodeBedpe;
case "interact":
return decodeInteract;
case "snp":
return decodeSNP;
case "rmsk":
return decodeRepeatMasker;
case "gcnv":
return decodeGcnv;
default:
const customFormat = TrackUtils.getFormat(format);
if (customFormat !== undefined) {
this.decode = decodeCustom;
this.header.format = customFormat;
this.delimiter = customFormat.delimiter || "\t";
} else {
return decodeBed;
}
}

}


function getDelimiter(format) {
return spaceDelimited.has(format) ? /\s+/ : "\t";
}

const spaceDelimited = new Set([
"narrowpeak",
"broadpeak",
"regionpeak",
"peaks",
"fusionjuncspan",
"bedgraph",
"wig",
"refflat",
"genepred",
"genepredext",
"ensgene",
"refgene",
"bed",
"interact"
])

export default FeatureParser;
5 changes: 5 additions & 0 deletions test/data/bed/space_delimited.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
chr2 0 1
chr2 2 3
chr2 4 5
chr2 6 7
chr2 8 9
10 changes: 9 additions & 1 deletion test/testBED.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@ import GenomeUtils from "../js/genome/genome";

suite("testBed", function () {


test("Space delimited", async function () {
const config = {
format: "bed",
url: require.resolve("./data/bed/space_delimited.bed"),
}
const reader = FeatureSource(config, genome);
const features = await reader.getFeatures({chr: "chr2", start: 0, end: 128756129})
assert.equal(features.length, 5);
})

test("Empty lines", async function () {
const config = {
Expand Down

0 comments on commit 0225d9b

Please sign in to comment.