diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java index 99407267c..66b0393bf 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java +++ b/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java @@ -123,16 +123,16 @@ protected Line fetch() { return null; } - // Skip the comment line - if (this.isCommentLine(rawLine)) { + // Skip the line matched specified regex + if (this.needSkipLine(rawLine)) { return this.fetch(); } else { return this.parser.parse(rawLine); } } - private boolean isCommentLine(String line) { - return this.source.commentSymbols().stream().anyMatch(line::startsWith); + private boolean needSkipLine(String line) { + return line.matches(this.source.skippedLineRegex()); } private boolean isDuplicateHeader(String line) { diff --git a/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java b/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java index 843929fe8..7ed1b24c2 100644 --- a/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java +++ b/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java @@ -20,9 +20,7 @@ package com.baidu.hugegraph.loader.source.file; import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Set; import com.baidu.hugegraph.loader.source.InputSource; import com.baidu.hugegraph.loader.source.SourceType; @@ -32,6 +30,7 @@ public class FileSource implements InputSource { private static final String DEFAULT_CHARSET = "UTF-8"; private static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; + private static final String DEFAULT_SKIPPED_LINE_REGEX = ""; @JsonProperty("path") private String path; @@ -45,16 +44,16 @@ public class FileSource implements InputSource { private String charset; @JsonProperty("date_format") private String dateFormat; + @JsonProperty("skipped_line_regex") + private String skippedLineRegex; @JsonProperty("compression") private Compression compression; - @JsonProperty("comment_symbols") - private Set commentSymbols; public FileSource() { this.charset = DEFAULT_CHARSET; this.dateFormat = DEFAULT_DATE_FORMAT; + this.skippedLineRegex = DEFAULT_SKIPPED_LINE_REGEX; this.compression = Compression.NONE; - this.commentSymbols = new HashSet<>(); } @Override @@ -90,13 +89,12 @@ public String dateFormat() { return this.dateFormat; } - public Compression compression() { - return this.compression; + public String skippedLineRegex() { + return this.skippedLineRegex; } - public Set commentSymbols() { - assert this.commentSymbols != null; - return Collections.unmodifiableSet(this.commentSymbols); + public Compression compression() { + return this.compression; } @Override diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java index 2c3e4a953..6cfe61669 100644 --- a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java @@ -590,12 +590,13 @@ public void testIgnoreNullValueColumns() { } @Test - public void testFileOnlyHasAnEmptyLine() { - ioUtil.write("vertex_person_empty.csv", ""); + public void testFileNoHeader() { + ioUtil.write("vertex_person.csv", + "marko,29,Beijing"); String[] args = new String[]{ - "-f", configPath("file_only_has_empty_line/struct.json"), - "-s", configPath("file_only_has_empty_line/schema.groovy"), + "-f", configPath("file_no_header/struct.json"), + "-s", configPath("file_no_header/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--test-mode", "true" @@ -628,18 +629,39 @@ public void testMultiFilesHaveHeader() { } @Test - public void testFileHasCommentLine() { + public void testFileHasEmptyLine() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,#Beijing", + "", + "vadas,27,//Hongkong"); + + String[] args = new String[]{ + "-f", configPath("file_has_empty_line/struct.json"), + "-s", configPath("file_has_empty_line/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(2, vertices.size()); + } + + @Test + public void testFileHasSkippedLine() { ioUtil.write("vertex_person.csv", "name,age,city", "# This is a comment", - "marko,29,Beijing", + "marko,29,#Beijing", "// This is also a comment", "# This is still a comment", - "vadas,27,Hongkong"); + "vadas,27,//Hongkong"); String[] args = new String[]{ - "-f", configPath("file_has_comment_line/struct.json"), - "-s", configPath("file_has_comment_line/schema.groovy"), + "-f", configPath("file_has_skipped_line/struct.json"), + "-s", configPath("file_has_skipped_line/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--test-mode", "true" diff --git a/src/test/resources/file_has_comment_line/schema.groovy b/src/test/resources/file_has_empty_line/schema.groovy similarity index 100% rename from src/test/resources/file_has_comment_line/schema.groovy rename to src/test/resources/file_has_empty_line/schema.groovy diff --git a/src/test/resources/file_has_empty_line/struct.json b/src/test/resources/file_has_empty_line/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/file_has_empty_line/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/file_only_has_empty_line/schema.groovy b/src/test/resources/file_has_skipped_line/schema.groovy similarity index 100% rename from src/test/resources/file_only_has_empty_line/schema.groovy rename to src/test/resources/file_has_skipped_line/schema.groovy diff --git a/src/test/resources/file_has_comment_line/struct.json b/src/test/resources/file_has_skipped_line/struct.json similarity index 88% rename from src/test/resources/file_has_comment_line/struct.json rename to src/test/resources/file_has_skipped_line/struct.json index 79fdfbb62..20b183778 100644 --- a/src/test/resources/file_has_comment_line/struct.json +++ b/src/test/resources/file_has_skipped_line/struct.json @@ -7,7 +7,7 @@ "path": "${store_path}/vertex_person.csv", "format": "CSV", "charset": "UTF-8", - "comment_symbols": ["#", "//"] + "skipped_line_regex": "(^#|^//).*" }, "mapping": { "name": "name", diff --git a/src/test/resources/file_no_header/schema.groovy b/src/test/resources/file_no_header/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/file_no_header/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/file_only_has_empty_line/struct.json b/src/test/resources/file_no_header/struct.json similarity index 73% rename from src/test/resources/file_only_has_empty_line/struct.json rename to src/test/resources/file_no_header/struct.json index 4ab98cebe..1301a5a4b 100644 --- a/src/test/resources/file_only_has_empty_line/struct.json +++ b/src/test/resources/file_no_header/struct.json @@ -4,7 +4,7 @@ "label": "person", "input": { "type": "${source_type}", - "path": "src/test/resources/vertex_person_empty.csv", + "path": "src/test/resources/vertex_person.csv", "format": "CSV", "charset": "UTF-8" }