Skip to content

Commit

Permalink
Support config regex to skip specified line
Browse files Browse the repository at this point in the history
Implement #42

Change-Id: Ib27d2cfdaa05599761024ddd5c913e47cccb6eb6
  • Loading branch information
Linary committed Mar 18, 2019
1 parent 3948170 commit 8516c81
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,16 @@ protected Line fetch() {
return null;
}

// Skip the comment line
if (this.isCommentLine(rawLine)) {
// Skip the line matched specified regex
if (this.needSkipLine(rawLine)) {
return this.fetch();
} else {
return this.parser.parse(rawLine);
}
}

private boolean isCommentLine(String line) {
return this.source.commentSymbols().stream().anyMatch(line::startsWith);
private boolean needSkipLine(String line) {
return line.matches(this.source.skippedLineRegex());
}

private boolean isDuplicateHeader(String line) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
package com.baidu.hugegraph.loader.source.file;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.baidu.hugegraph.loader.source.InputSource;
import com.baidu.hugegraph.loader.source.SourceType;
Expand All @@ -32,6 +30,7 @@ public class FileSource implements InputSource {

private static final String DEFAULT_CHARSET = "UTF-8";
private static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
private static final String DEFAULT_SKIPPED_LINE_REGEX = "";

@JsonProperty("path")
private String path;
Expand All @@ -45,16 +44,16 @@ public class FileSource implements InputSource {
private String charset;
@JsonProperty("date_format")
private String dateFormat;
@JsonProperty("skipped_line_regex")
private String skippedLineRegex;
@JsonProperty("compression")
private Compression compression;
@JsonProperty("comment_symbols")
private Set<String> commentSymbols;

public FileSource() {
this.charset = DEFAULT_CHARSET;
this.dateFormat = DEFAULT_DATE_FORMAT;
this.skippedLineRegex = DEFAULT_SKIPPED_LINE_REGEX;
this.compression = Compression.NONE;
this.commentSymbols = new HashSet<>();
}

@Override
Expand Down Expand Up @@ -90,13 +89,12 @@ public String dateFormat() {
return this.dateFormat;
}

public Compression compression() {
return this.compression;
public String skippedLineRegex() {
return this.skippedLineRegex;
}

public Set<String> commentSymbols() {
assert this.commentSymbols != null;
return Collections.unmodifiableSet(this.commentSymbols);
public Compression compression() {
return this.compression;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,18 +628,18 @@ public void testMultiFilesHaveHeader() {
}

@Test
public void testFileHasCommentLine() {
public void testFileHasSkipLine() {
ioUtil.write("vertex_person.csv",
"name,age,city",
"# This is a comment",
"marko,29,Beijing",
"marko,29,#Beijing",
"// This is also a comment",
"# This is still a comment",
"vadas,27,Hongkong");
"vadas,27,//Hongkong");

String[] args = new String[]{
"-f", configPath("file_has_comment_line/struct.json"),
"-s", configPath("file_has_comment_line/schema.groovy"),
"-f", configPath("file_has_skipped_line/struct.json"),
"-s", configPath("file_has_skipped_line/schema.groovy"),
"-g", GRAPH,
"-h", SERVER,
"--test-mode", "true"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"path": "${store_path}/vertex_person.csv",
"format": "CSV",
"charset": "UTF-8",
"comment_symbols": ["#", "//"]
"skipped_line_regex": "(^#|^//).*"
},
"mapping": {
"name": "name",
Expand Down

0 comments on commit 8516c81

Please sign in to comment.