diff --git a/.travis.yml b/.travis.yml index 4e4c395ad..8c7e829ac 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,24 +5,40 @@ jdk: sudo: required +cache: + directories: + - $HOME/.m2 + - $HOME/downloads + branches: only: - master - /^release-.*$/ - /^test-.*$/ -install: mvn compile -Dmaven.javadoc.skip=true +install: mvn compile -Dmaven.javadoc.skip=true | grep -v "Downloading\|Downloaded" before_script: - - $TRAVIS_DIR/install-hugegraph.sh $TRAVIS_BRANCH + - $TRAVIS_DIR/install-hugegraph.sh $TRAVIS_BRANCH | grep -v "Downloading\|Downloaded" + - | + if [ "$SOURCE_TYPE" == "hdfs" ]; then + $TRAVIS_DIR/install-hadoop.sh + fi + - | + if [ "$SOURCE_TYPE" == "jdbc" ]; then + $TRAVIS_DIR/install-mysql.sh + fi script: - - mvn test -Dtest=LoaderTest - - mvn cobertura:cobertura + - mvn test -P${SOURCE_TYPE} after_success: - bash <(curl -s https://codecov.io/bash) env: + matrix: + - SOURCE_TYPE=file + - SOURCE_TYPE=hdfs + - SOURCE_TYPE=jdbc global: - - TRAVIS_DIR=assembly/travis + - TRAVIS_DIR=assembly/travis diff --git a/assembly/static/example/edge_created.json b/assembly/static/example/edge_created.json deleted file mode 100644 index 39fda8ca0..000000000 --- a/assembly/static/example/edge_created.json +++ /dev/null @@ -1,4 +0,0 @@ -{"source_name": "marko", "target_name": "lop", "date": "2017-12-10", "weight": 0.4} -{"source_name": "josh", "target_name": "lop", "date": "2009-11-11", "weight": 0.4} -{"source_name": "josh", "target_name": "ripple", "date": "2017-12-10", "weight": 1.0} -{"source_name": "peter", "target_name": "lop", "date": "2017-03-24", "weight": 0.2} diff --git a/assembly/static/example/file/edge_created.json b/assembly/static/example/file/edge_created.json new file mode 100644 index 000000000..ba093eab1 --- /dev/null +++ b/assembly/static/example/file/edge_created.json @@ -0,0 +1,4 @@ +{"source_name": "marko", "target_id": 1, "date": "2017-12-10", "weight": 0.4} +{"source_name": "josh", "target_id": 1, "date": "2009-11-11", "weight": 0.4} +{"source_name": "josh", "target_id": 2, "date": "2017-12-10", "weight": 1.0} +{"source_name": "peter", "target_id": 1, "date": "2017-03-24", "weight": 0.2} diff --git a/assembly/static/example/edge_knows.json b/assembly/static/example/file/edge_knows.json similarity index 100% rename from assembly/static/example/edge_knows.json rename to assembly/static/example/file/edge_knows.json diff --git a/assembly/static/example/file/schema.groovy b/assembly/static/example/file/schema.groovy new file mode 100644 index 000000000..cec8c3f34 --- /dev/null +++ b/assembly/static/example/file/schema.groovy @@ -0,0 +1,77 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); + +schema.vertexLabel("person") + .properties("name", "age", "city") + .primaryKeys("name") + .nullableKeys("age", "city") + .ifNotExist() + .create(); +schema.vertexLabel("software") + .useCustomizeNumberId() + .properties("name", "lang", "price") + .ifNotExist() + .create(); + +schema.indexLabel("personByAge") + .onV("person") + .by("age") + .range() + .ifNotExist() + .create(); +schema.indexLabel("personByCity") + .onV("person") + .by("city") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("personByAgeAndCity") + .onV("person") + .by("age", "city") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("softwareByPrice") + .onV("software") + .by("price") + .range() + .ifNotExist() + .create(); + +schema.edgeLabel("knows") + .sourceLabel("person") + .targetLabel("person") + .properties("date", "weight") + .ifNotExist() + .create(); +schema.edgeLabel("created") + .sourceLabel("person") + .targetLabel("software") + .properties("date", "weight") + .ifNotExist() + .create(); + +schema.indexLabel("createdByDate") + .onE("created") + .by("date") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("createdByWeight") + .onE("created") + .by("weight") + .range() + .ifNotExist() + .create(); +schema.indexLabel("knowsByWeight") + .onE("knows") + .by("weight") + .range() + .ifNotExist() + .create(); diff --git a/assembly/static/example/struct.json b/assembly/static/example/file/struct.json similarity index 70% rename from assembly/static/example/struct.json rename to assembly/static/example/file/struct.json index 0e6540163..f7a86b7d9 100644 --- a/assembly/static/example/struct.json +++ b/assembly/static/example/file/struct.json @@ -4,15 +4,11 @@ "label": "person", "input": { "type": "file", - "path": "example/vertex_person.csv", + "path": "example/file/vertex_person.csv", "format": "CSV", "header": ["name", "age", "city"], - "charset": "UTF-8" - }, - "mapping": { - "name": "name", - "age": "age", - "city": "city" + "charset": "UTF-8", + "comment_symbols": ["#"] }, "null_values": ["NULL", "null", ""] }, @@ -20,11 +16,12 @@ "label": "software", "input": { "type": "file", - "path": "example/vertex_software.text", + "path": "example/file/vertex_software.txt", "format": "TEXT", "delimiter": "|", "charset": "GBK" }, + "id": "id", "ignored": ["ISBN"] } ], @@ -35,7 +32,7 @@ "target": ["target_name"], "input": { "type": "file", - "path": "example/edge_knows.json", + "path": "example/file/edge_knows.json", "format": "JSON", "date_format": "yyyyMMdd" }, @@ -47,16 +44,15 @@ { "label": "created", "source": ["source_name"], - "target": ["target_name"], + "target": ["target_id"], "input": { "type": "file", - "path": "example/edge_created.json", + "path": "example/file/edge_created.json", "format": "JSON", "date_format": "yyyy-MM-dd" }, "mapping": { - "source_name": "name", - "target_name": "name" + "source_name": "name" } } ] diff --git a/assembly/static/example/vertex_person.csv b/assembly/static/example/file/vertex_person.csv similarity index 84% rename from assembly/static/example/vertex_person.csv rename to assembly/static/example/file/vertex_person.csv index 0650a30bc..ae8f6d28c 100644 --- a/assembly/static/example/vertex_person.csv +++ b/assembly/static/example/file/vertex_person.csv @@ -1,3 +1,4 @@ +# This is a comment marko,29,Beijing vadas,27,Hongkong josh,32,Beijing diff --git a/assembly/static/example/hdfs/edge_created.json b/assembly/static/example/hdfs/edge_created.json new file mode 100644 index 000000000..ba093eab1 --- /dev/null +++ b/assembly/static/example/hdfs/edge_created.json @@ -0,0 +1,4 @@ +{"source_name": "marko", "target_id": 1, "date": "2017-12-10", "weight": 0.4} +{"source_name": "josh", "target_id": 1, "date": "2009-11-11", "weight": 0.4} +{"source_name": "josh", "target_id": 2, "date": "2017-12-10", "weight": 1.0} +{"source_name": "peter", "target_id": 1, "date": "2017-03-24", "weight": 0.2} diff --git a/assembly/static/example/hdfs/edge_knows.json b/assembly/static/example/hdfs/edge_knows.json new file mode 100644 index 000000000..e1b35b672 --- /dev/null +++ b/assembly/static/example/hdfs/edge_knows.json @@ -0,0 +1,2 @@ +{"source_name": "marko", "target_name": "vadas", "date": "2016-01-10", "weight": 0.5} +{"source_name": "marko", "target_name": "josh", "date": "2013-02-20", "weight": 1.0} diff --git a/assembly/static/example/hdfs/schema.groovy b/assembly/static/example/hdfs/schema.groovy new file mode 100644 index 000000000..cec8c3f34 --- /dev/null +++ b/assembly/static/example/hdfs/schema.groovy @@ -0,0 +1,77 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); + +schema.vertexLabel("person") + .properties("name", "age", "city") + .primaryKeys("name") + .nullableKeys("age", "city") + .ifNotExist() + .create(); +schema.vertexLabel("software") + .useCustomizeNumberId() + .properties("name", "lang", "price") + .ifNotExist() + .create(); + +schema.indexLabel("personByAge") + .onV("person") + .by("age") + .range() + .ifNotExist() + .create(); +schema.indexLabel("personByCity") + .onV("person") + .by("city") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("personByAgeAndCity") + .onV("person") + .by("age", "city") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("softwareByPrice") + .onV("software") + .by("price") + .range() + .ifNotExist() + .create(); + +schema.edgeLabel("knows") + .sourceLabel("person") + .targetLabel("person") + .properties("date", "weight") + .ifNotExist() + .create(); +schema.edgeLabel("created") + .sourceLabel("person") + .targetLabel("software") + .properties("date", "weight") + .ifNotExist() + .create(); + +schema.indexLabel("createdByDate") + .onE("created") + .by("date") + .secondary() + .ifNotExist() + .create(); +schema.indexLabel("createdByWeight") + .onE("created") + .by("weight") + .range() + .ifNotExist() + .create(); +schema.indexLabel("knowsByWeight") + .onE("knows") + .by("weight") + .range() + .ifNotExist() + .create(); diff --git a/assembly/static/example/hdfs/struct.json b/assembly/static/example/hdfs/struct.json new file mode 100644 index 000000000..4ce7a2126 --- /dev/null +++ b/assembly/static/example/hdfs/struct.json @@ -0,0 +1,57 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "hdfs", + "path": "hdfs://localhost:8020/example/vertex_person.csv", + "format": "CSV", + "header": ["name", "age", "city"], + "charset": "UTF-8", + "comment_symbols": ["#"] + }, + "null_values": ["NULL", "null", ""] + }, + { + "label": "software", + "input": { + "type": "hdfs", + "path": "hdfs://localhost:8020/example/vertex_software.text", + "format": "TEXT", + "delimiter": "|", + "charset": "GBK" + }, + "id": "id", + "ignored": ["ISBN"] + } + ], + "edges": [ + { + "label": "knows", + "source": ["source_name"], + "target": ["target_name"], + "input": { + "type": "hdfs", + "path": "hdfs://localhost:8020/example/edge_knows.json", + "format": "JSON" + }, + "mapping": { + "source_name": "name", + "target_name": "name" + } + }, + { + "label": "created", + "source": ["source_name"], + "target": ["target_id"], + "input": { + "type": "hdfs", + "path": "hdfs://localhost:8020/example/edge_created.json", + "format": "JSON" + }, + "mapping": { + "source_name": "name" + } + } + ] +} diff --git a/assembly/static/example/hdfs/vertex_person.csv b/assembly/static/example/hdfs/vertex_person.csv new file mode 100644 index 000000000..ae8f6d28c --- /dev/null +++ b/assembly/static/example/hdfs/vertex_person.csv @@ -0,0 +1,7 @@ +# This is a comment +marko,29,Beijing +vadas,27,Hongkong +josh,32,Beijing +peter,35,Shanghai +"li,nary",26,"Wu,han" +tom,null,NULL diff --git a/assembly/static/example/hdfs/vertex_software.text b/assembly/static/example/hdfs/vertex_software.text new file mode 100644 index 000000000..f73115f46 --- /dev/null +++ b/assembly/static/example/hdfs/vertex_software.text @@ -0,0 +1,3 @@ +id|name|lang|price|ISBN +1|lop|java|328|ISBN978-7-107-18618-5 +2|ripple|java|199|ISBN978-7-100-13678-5 diff --git a/assembly/static/example/mysql/example.sql b/assembly/static/example/mysql/example.sql new file mode 100644 index 000000000..800d71e2c --- /dev/null +++ b/assembly/static/example/mysql/example.sql @@ -0,0 +1,133 @@ +-- MySQL dump 10.13 Distrib 5.6.39, for macos10.13 (x86_64) +-- +-- Host: localhost Database: example +-- ------------------------------------------------------ +-- Server version 5.6.39 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Table structure for table `created` +-- + +DROP TABLE IF EXISTS `created`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `created` ( + `id` int(10) unsigned NOT NULL, + `source_id` int(10) unsigned NOT NULL, + `target_id` int(10) unsigned NOT NULL, + `date` varchar(10) NOT NULL, + `weight` double(10,2) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `created` +-- + +LOCK TABLES `created` WRITE; +/*!40000 ALTER TABLE `created` DISABLE KEYS */; +INSERT INTO `created` VALUES (1,1,100,'2017-12-10',0.40),(2,3,100,'2009-11-11',0.40),(3,3,200,'2017-12-10',1.00),(4,4,100,'2017-03-24',0.20); +/*!40000 ALTER TABLE `created` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `knows` +-- + +DROP TABLE IF EXISTS `knows`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `knows` ( + `id` int(10) unsigned NOT NULL, + `source_id` int(10) unsigned NOT NULL, + `target_id` int(10) unsigned NOT NULL, + `date` varchar(10) NOT NULL, + `weight` double(10,2) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `knows` +-- + +LOCK TABLES `knows` WRITE; +/*!40000 ALTER TABLE `knows` DISABLE KEYS */; +INSERT INTO `knows` VALUES (1,1,2,'2016-01-10',0.50),(2,1,3,'2013-02-20',1.00); +/*!40000 ALTER TABLE `knows` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `person` +-- + +DROP TABLE IF EXISTS `person`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `person` ( + `id` int(10) unsigned NOT NULL, + `name` varchar(20) NOT NULL, + `age` int(3) DEFAULT NULL, + `city` varchar(10) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `person` +-- + +LOCK TABLES `person` WRITE; +/*!40000 ALTER TABLE `person` DISABLE KEYS */; +INSERT INTO `person` VALUES (1,'marko',29,'Beijing'),(2,'vadas',27,'HongKong'),(3,'josh',32,'Beijing'),(4,'peter',35,'Shanghai'),(5,'li,nary',26,'Wu,han'),(6,'tom',NULL,NULL); +/*!40000 ALTER TABLE `person` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `software` +-- + +DROP TABLE IF EXISTS `software`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `software` ( + `id` int(10) unsigned NOT NULL, + `name` varchar(20) NOT NULL, + `lang` varchar(10) NOT NULL, + `price` double(10,2) NOT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `software` +-- + +LOCK TABLES `software` WRITE; +/*!40000 ALTER TABLE `software` DISABLE KEYS */; +INSERT INTO `software` VALUES (100,'lop','java',328.00),(200,'ripple','java',199.00); +/*!40000 ALTER TABLE `software` ENABLE KEYS */; +UNLOCK TABLES; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2019-03-10 18:11:20 diff --git a/src/test/resources/schema_date.groovy b/assembly/static/example/mysql/schema.groovy similarity index 68% rename from src/test/resources/schema_date.groovy rename to assembly/static/example/mysql/schema.groovy index bebc88671..449976600 100644 --- a/src/test/resources/schema_date.groovy +++ b/assembly/static/example/mysql/schema.groovy @@ -4,13 +4,13 @@ schema.propertyKey("age").asInt().ifNotExist().create(); schema.propertyKey("city").asText().ifNotExist().create(); schema.propertyKey("weight").asDouble().ifNotExist().create(); schema.propertyKey("lang").asText().ifNotExist().create(); -schema.propertyKey("date").asDate().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); schema.propertyKey("price").asDouble().ifNotExist().create(); -schema.propertyKey("feel").asInt().valueList().ifNotExist().create(); +schema.propertyKey("feel").asText().valueList().ifNotExist().create(); schema.propertyKey("time").asText().valueSet().ifNotExist().create(); -schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); -schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); +schema.vertexLabel("person").useCustomizeNumberId().properties("name", "age", "city").nullableKeys("age", "city").ifNotExist().create(); +schema.vertexLabel("software").useCustomizeNumberId().properties("name", "lang", "price").ifNotExist().create(); schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); schema.edgeLabel("created").sourceLabel("person").targetLabel("software").properties("date", "weight").ifNotExist().create(); diff --git a/assembly/static/example/mysql/struct.json b/assembly/static/example/mysql/struct.json new file mode 100644 index 000000000..1bc5b5c21 --- /dev/null +++ b/assembly/static/example/mysql/struct.json @@ -0,0 +1,67 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "example", + "table": "person", + "username": "root", + "password": "", + "batch_size": 500 + }, + "id": "id", + "null_values": ["NULL"] + }, + { + "label": "software", + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "example", + "table": "software", + "username": "root", + "password": "", + "batch_size": 500 + }, + "id": "id" + } + ], + "edges": [ + { + "label": "knows", + "source": ["source_id"], + "target": ["target_id"], + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "example", + "table": "knows", + "username": "root", + "password": "", + "batch_size": 500 + }, + "ignored": ["id"] + }, + { + "label": "created", + "source": ["source_id"], + "target": ["target_id"], + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "example", + "table": "created", + "username": "root", + "password": "", + "batch_size": 500 + }, + "ignored": ["id"] + } + ] +} diff --git a/assembly/static/example/schema.groovy b/assembly/static/example/schema.groovy deleted file mode 100644 index 6c937c3d2..000000000 --- a/assembly/static/example/schema.groovy +++ /dev/null @@ -1,24 +0,0 @@ -// Define schema -schema.propertyKey("name").asText().ifNotExist().create(); -schema.propertyKey("age").asInt().ifNotExist().create(); -schema.propertyKey("city").asText().ifNotExist().create(); -schema.propertyKey("weight").asDouble().ifNotExist().create(); -schema.propertyKey("lang").asText().ifNotExist().create(); -schema.propertyKey("date").asDate().ifNotExist().create(); -schema.propertyKey("price").asDouble().ifNotExist().create(); - -schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").nullableKeys("age", "city").ifNotExist().create(); -schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); - -schema.indexLabel("personByName").onV("person").by("name").secondary().ifNotExist().create(); -schema.indexLabel("personByAge").onV("person").by("age").range().ifNotExist().create(); -schema.indexLabel("personByCity").onV("person").by("city").secondary().ifNotExist().create(); -schema.indexLabel("personByAgeAndCity").onV("person").by("age", "city").secondary().ifNotExist().create(); -schema.indexLabel("softwareByPrice").onV("software").by("price").range().ifNotExist().create(); - -schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); -schema.edgeLabel("created").sourceLabel("person").targetLabel("software").properties("date", "weight").ifNotExist().create(); - -schema.indexLabel("createdByDate").onE("created").by("date").range().ifNotExist().create(); -schema.indexLabel("createdByWeight").onE("created").by("weight").range().ifNotExist().create(); -schema.indexLabel("knowsByWeight").onE("knows").by("weight").range().ifNotExist().create(); diff --git a/assembly/static/example/vertex_software.text b/assembly/static/example/vertex_software.text deleted file mode 100644 index a74511772..000000000 --- a/assembly/static/example/vertex_software.text +++ /dev/null @@ -1,3 +0,0 @@ -name|lang|price|ISBN -lop|java|328|ISBN978-7-107-18618-5 -ripple|java|199|ISBN978-7-100-13678-5 diff --git a/assembly/travis/install-hadoop.sh b/assembly/travis/install-hadoop.sh new file mode 100755 index 000000000..254db10ee --- /dev/null +++ b/assembly/travis/install-hadoop.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +set -ev + +UBUNTU_VERSION=$(lsb_release -r | awk '{print substr($2,0,2)}') + +sudo tee /etc/apt/sources.list.d/hdp.list < + + fs.defaultFS + hdfs://localhost:8020 + + +EOF + +sudo tee /etc/hadoop/conf/hdfs-site.xml < + + dfs.namenode.name.dir + /opt/hdfs/name + + + dfs.datanode.data.dir + /opt/hdfs/data + + + dfs.permissions.superusergroup + hadoop + + + dfs.support.append + true + + +EOF + +sudo apt-get install -y --allow-unauthenticated hadoop hadoop-hdfs + +sudo mkdir -p /opt/hdfs/data /opt/hdfs/name +sudo chown -R hdfs:hdfs /opt/hdfs +sudo -u hdfs hdfs namenode -format -nonInteractive + +sudo adduser travis hadoop + +sudo /usr/hdp/current/hadoop-hdfs-datanode/../hadoop/sbin/hadoop-daemon.sh start datanode +sudo /usr/hdp/current/hadoop-hdfs-namenode/../hadoop/sbin/hadoop-daemon.sh start namenode + +hdfs dfsadmin -safemode wait diff --git a/assembly/travis/install-mysql.sh b/assembly/travis/install-mysql.sh new file mode 100755 index 000000000..e5f81956c --- /dev/null +++ b/assembly/travis/install-mysql.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -ev + +TRAVIS_DIR=`dirname $0` +#MYSQL_DOWNLOAD_ADDRESS="http://dev.MySQL.com/get/Downloads" +#MYSQL_VERSION="MySQL-5.7" +#MYSQL_PACKAGE="mysql-5.7.11-Linux-glibc2.5-x86_64" +#MYSQL_TAR="${MYSQL_PACKAGE}.tar.gz" + +if [[ -d /var/lib/mysql ]]; then + # Reference from https://github.com/mozilla/treeherder/blob/master/bin/travis-setup.sh + # Using tmpfs for the MySQL data directory reduces travis test runtime by 6x + sudo mkdir /mnt/ramdisk + sudo mount -t tmpfs -o size=1024m tmpfs /mnt/ramdisk + sudo mv /var/lib/mysql /mnt/ramdisk + sudo ln -s /mnt/ramdisk/mysql /var/lib/mysql + sudo cp ${TRAVIS_DIR}/mysql.cnf /etc/mysql/conf.d/mysql.cnf + sudo service mysql restart +else + echo "Please install mysql firstly." + exit 1 +fi diff --git a/assembly/travis/mysql.cnf b/assembly/travis/mysql.cnf new file mode 100644 index 000000000..44c8aceee --- /dev/null +++ b/assembly/travis/mysql.cnf @@ -0,0 +1,21 @@ +# Overrides the mysql defaults in /etc/mysql/my.cnf +# Reference from https://github.com/mozilla/treeherder/blob/master/vagrant/mysql.cnf + +[mysqld] +character_set_server="utf8" +collation_server="utf8_bin" + +# Ensure operations involving astral characters fail loudly, +# rather than mysql silently replacing each each byte of the +# original character with a U+FFFD replacement character. +# See bug 1275425. +sql_mode="NO_ENGINE_SUBSTITUTION,STRICT_ALL_TABLES" + +# Django advises using READ-COMMITTED instead of REPEATABLE-READ: +# https://docs.djangoproject.com/en/1.10/ref/models/querysets/#get-or-create +# Unhelpfully MySQL uses a different (undocumented) variable name if set via config file: +# https://bugs.mysql.com/bug.php?id=70008 +transaction-isolation=READ-COMMITTED + +# Travis only: Speed up I/O by reducing data-loss protection. +innodb_flush_log_at_trx_commit="0" diff --git a/pom.xml b/pom.xml index d94cb58d2..f3ba64997 100644 --- a/pom.xml +++ b/pom.xml @@ -24,7 +24,7 @@ com.baidu.hugegraph hugegraph-client - 1.6.6 + 1.6.9 commons-io @@ -46,8 +46,74 @@ opencsv 4.2 + + + org.apache.hadoop + hadoop-client + 2.8.0 + + + org.apache.commons + commons-compress + + + + + org.apache.commons + commons-compress + 1.18 + + + org.tukaani + xz + 1.8 + + + org.apache.hive + hive-exec + 3.0.0 + + + mysql + mysql-connector-java + 6.0.6 + + + + file + + true + + + file + files + **/FileLoadTest.java + + + + hdfs + + false + + + hdfs + hdfs://localhost:8020/files + **/FileLoadTest.java + + + + jdbc + + false + + + **/JDBCLoadTest.java + + + + @@ -64,7 +130,6 @@ - org.apache.maven.plugins maven-assembly-plugin @@ -131,20 +196,66 @@ - + + org.jacoco + jacoco-maven-plugin + 0.8.2 + + + pre-unit-test + + prepare-agent + + + + post-unit-test + test + + report + + + + ${project.build.directory} + + + + + org.codehaus.mojo - cobertura-maven-plugin - 2.7 + properties-maven-plugin + 1.0.0 + + + generate-resources + + write-project-properties + + + + ${project.build.testOutputDirectory}/profile.properties + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.20 - - html - xml - - + + ${test-classes} + - - \ No newline at end of file + + + src/test/resources/ + true + + + + diff --git a/src/main/java/com/baidu/hugegraph/loader/HugeGraphLoader.java b/src/main/java/com/baidu/hugegraph/loader/HugeGraphLoader.java index 5dae20354..5c96b8d02 100644 --- a/src/main/java/com/baidu/hugegraph/loader/HugeGraphLoader.java +++ b/src/main/java/com/baidu/hugegraph/loader/HugeGraphLoader.java @@ -31,16 +31,14 @@ import org.slf4j.Logger; import com.baidu.hugegraph.driver.HugeClient; +import com.baidu.hugegraph.loader.builder.EdgeBuilder; +import com.baidu.hugegraph.loader.builder.VertexBuilder; import com.baidu.hugegraph.loader.exception.LoadException; import com.baidu.hugegraph.loader.exception.ParseException; import com.baidu.hugegraph.loader.executor.GroovyExecutor; import com.baidu.hugegraph.loader.executor.LoadLogger; import com.baidu.hugegraph.loader.executor.LoadOptions; import com.baidu.hugegraph.loader.executor.LoadSummary; -import com.baidu.hugegraph.loader.parser.EdgeParser; -import com.baidu.hugegraph.loader.parser.VertexParser; -import com.baidu.hugegraph.loader.reader.InputReader; -import com.baidu.hugegraph.loader.reader.InputReaderFactory; import com.baidu.hugegraph.loader.source.EdgeSource; import com.baidu.hugegraph.loader.source.GraphSource; import com.baidu.hugegraph.loader.source.VertexSource; @@ -160,16 +158,14 @@ private LoadSummary loadVertices() { List vertexSources = this.graphSource.vertexSources(); for (VertexSource source : vertexSources) { LOG.info("Loading vertex source '{}'", source.label()); - InputReader reader = InputReaderFactory.create(source.input()); + VertexBuilder builder = new VertexBuilder(source, this.options); try { - VertexParser parser = new VertexParser(source, reader, - this.options); - this.loadVertex(parser); + this.loadVertex(builder); } finally { try { - reader.close(); + builder.close(); } catch (Throwable e) { - LOG.warn("Failed to close reader for {} with exception {}", + LOG.warn("Failed to close builder for {} with exception {}", source, e); } } @@ -189,12 +185,12 @@ private LoadSummary loadVertices() { return summary; } - private void loadVertex(VertexParser parser) { + private void loadVertex(VertexBuilder builder) { int batchSize = this.options.batchSize; List batch = new ArrayList<>(batchSize); - while (parser.hasNext()) { + while (builder.hasNext()) { try { - Vertex vertex = parser.next(); + Vertex vertex = builder.next(); batch.add(vertex); } catch (ParseException e) { if (this.options.testMode) { @@ -206,6 +202,7 @@ private void loadVertex(VertexParser parser) { LoaderUtil.printError("Error: More than %s vertices " + "parsing error ... Stopping", this.options.maxParseErrors); + // TODO: replace with a more graceful way System.exit(-1); } continue; @@ -226,16 +223,14 @@ private LoadSummary loadEdges() { List edgeSources = this.graphSource.edgeSources(); for (EdgeSource source : edgeSources) { LOG.info("Loading edge source '{}'", source.label()); - InputReader reader = InputReaderFactory.create(source.input()); + EdgeBuilder builder = new EdgeBuilder(source, this.options); try { - EdgeParser parser = new EdgeParser(source, reader, - this.options); - this.loadEdge(parser); + this.loadEdge(builder); } finally { try { - reader.close(); + builder.close(); } catch (Throwable e) { - LOG.warn("Failed to close reader for {} with exception {}", + LOG.warn("Failed to close builder for {} with exception {}", source, e); } } @@ -255,12 +250,12 @@ private LoadSummary loadEdges() { return summary; } - private void loadEdge(EdgeParser parser) { + private void loadEdge(EdgeBuilder builder) { int batchSize = this.options.batchSize; List batch = new ArrayList<>(batchSize); - while (parser.hasNext()) { + while (builder.hasNext()) { try { - Edge edge = parser.next(); + Edge edge = builder.next(); batch.add(edge); } catch (ParseException e) { if (this.options.testMode) { diff --git a/src/main/java/com/baidu/hugegraph/loader/parser/EdgeParser.java b/src/main/java/com/baidu/hugegraph/loader/builder/EdgeBuilder.java similarity index 89% rename from src/main/java/com/baidu/hugegraph/loader/parser/EdgeParser.java rename to src/main/java/com/baidu/hugegraph/loader/builder/EdgeBuilder.java index d4c3038a0..e19e07ecd 100644 --- a/src/main/java/com/baidu/hugegraph/loader/parser/EdgeParser.java +++ b/src/main/java/com/baidu/hugegraph/loader/builder/EdgeBuilder.java @@ -17,13 +17,12 @@ * under the License. */ -package com.baidu.hugegraph.loader.parser; +package com.baidu.hugegraph.loader.builder; import java.util.List; import java.util.Map; import com.baidu.hugegraph.loader.executor.LoadOptions; -import com.baidu.hugegraph.loader.reader.InputReader; import com.baidu.hugegraph.loader.source.EdgeSource; import com.baidu.hugegraph.structure.constant.IdStrategy; import com.baidu.hugegraph.structure.graph.Edge; @@ -32,16 +31,15 @@ import com.baidu.hugegraph.structure.schema.VertexLabel; import com.baidu.hugegraph.util.E; -public class EdgeParser extends ElementParser { +public class EdgeBuilder extends ElementBuilder { private final EdgeSource source; private final EdgeLabel edgeLabel; private final VertexLabel sourceLabel; private final VertexLabel targetLabel; - public EdgeParser(EdgeSource source, InputReader reader, - LoadOptions options) { - super(reader, options); + public EdgeBuilder(EdgeSource source, LoadOptions options) { + super(source, options); this.source = source; this.edgeLabel = this.getEdgeLabel(source.label()); this.sourceLabel = this.getVertexLabel(this.edgeLabel.sourceLabel()); @@ -57,13 +55,15 @@ public EdgeSource source() { } @Override - protected Edge parse(Map keyValues) { + protected Edge build(Map keyValues) { Edge edge = new Edge(this.source.label()); // Must add source/target vertex id - edge.source(this.buildVertexId(this.sourceLabel, - this.source.sourceFields(), keyValues)); - edge.target(this.buildVertexId(this.targetLabel, - this.source.targetFields(), keyValues)); + edge.sourceId(this.buildVertexId(this.sourceLabel, + this.source.sourceFields(), + keyValues)); + edge.targetId(this.buildVertexId(this.targetLabel, + this.source.targetFields(), + keyValues)); // Must add source/target vertex label edge.sourceLabel(this.sourceLabel.name()); edge.targetLabel(this.targetLabel.name()); diff --git a/src/main/java/com/baidu/hugegraph/loader/parser/ElementParser.java b/src/main/java/com/baidu/hugegraph/loader/builder/ElementBuilder.java similarity index 82% rename from src/main/java/com/baidu/hugegraph/loader/parser/ElementParser.java rename to src/main/java/com/baidu/hugegraph/loader/builder/ElementBuilder.java index cfaac48b0..bad8814fd 100644 --- a/src/main/java/com/baidu/hugegraph/loader/parser/ElementParser.java +++ b/src/main/java/com/baidu/hugegraph/loader/builder/ElementBuilder.java @@ -17,7 +17,7 @@ * under the License. */ -package com.baidu.hugegraph.loader.parser; +package com.baidu.hugegraph.loader.builder; import java.io.UnsupportedEncodingException; import java.util.Iterator; @@ -25,13 +25,18 @@ import java.util.Set; import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; import com.baidu.hugegraph.driver.HugeClient; +import com.baidu.hugegraph.loader.exception.LoadException; import com.baidu.hugegraph.loader.exception.ParseException; import com.baidu.hugegraph.loader.executor.LoadOptions; import com.baidu.hugegraph.loader.reader.InputReader; +import com.baidu.hugegraph.loader.reader.InputReaderFactory; +import com.baidu.hugegraph.loader.reader.Line; import com.baidu.hugegraph.loader.source.ElementSource; import com.baidu.hugegraph.loader.source.InputSource; +import com.baidu.hugegraph.loader.util.AutoCloseableIterator; import com.baidu.hugegraph.loader.util.DataTypeUtil; import com.baidu.hugegraph.loader.util.HugeClientWrapper; import com.baidu.hugegraph.structure.GraphElement; @@ -42,25 +47,31 @@ import com.baidu.hugegraph.structure.schema.SchemaLabel; import com.baidu.hugegraph.structure.schema.VertexLabel; import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.Log; import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; -public abstract class ElementParser - implements Iterator { +public abstract class ElementBuilder + implements AutoCloseableIterator { + + private static final Logger LOG = Log.logger(ElementBuilder.class); private static final int VERTEX_ID_LIMIT = 128; private static final String ID_CHARSET = "UTF-8"; private final InputReader reader; - private final HugeClient client; private final Table schemas; - ElementParser(InputReader reader, LoadOptions options) { - this.reader = reader; + public ElementBuilder(ElementSource source, LoadOptions options) { + this.reader = InputReaderFactory.create(source.input()); + try { + this.reader.init(); + } catch (Exception e) { + throw new LoadException("Failed to init input reader", e); + } this.client = HugeClientWrapper.get(options); this.schemas = HashBasedTable.create(); - this.reader.init(); } public abstract ElementSource source(); @@ -76,16 +87,21 @@ public boolean hasNext() { @Override public GE next() { - String line = this.reader().line(); + Line line = this.reader.next(); + Map keyValues = line.toMap(); try { - Map keyValues = this.reader().next(); - return this.parse(this.filterFields(keyValues)); + return this.build(this.filterFields(keyValues)); } catch (IllegalArgumentException e) { - throw new ParseException(line, e.getMessage()); + throw new ParseException(line.rawLine(), e.getMessage()); } } - protected abstract GE parse(Map keyValues); + @Override + public void close() throws Exception { + this.reader.close(); + } + + protected abstract GE build(Map keyValues); protected abstract boolean isIdField(String fieldName); @@ -130,12 +146,8 @@ protected PropertyKey getPropertyKey(String name) { if (schema == null) { schema = this.client.schema().getPropertyKey(name); } - if (schema == null) { - throw new IllegalStateException( - String.format("The property key %s doesn't exist", name)); - } else { - this.schemas.put(HugeType.PROPERTY_KEY, name, schema); - } + E.checkState(schema != null, "The property key %s doesn't exist", name); + this.schemas.put(HugeType.PROPERTY_KEY, name, schema); return (PropertyKey) schema; } @@ -144,12 +156,8 @@ protected VertexLabel getVertexLabel(String name) { if (schema == null) { schema = this.client.schema().getVertexLabel(name); } - if (schema == null) { - throw new IllegalStateException( - String.format("The vertex label %s doesn't exist", name)); - } else { - this.schemas.put(HugeType.VERTEX_LABEL, name, schema); - } + E.checkState(schema != null, "The vertex label %s doesn't exist", name); + this.schemas.put(HugeType.VERTEX_LABEL, name, schema); return (VertexLabel) schema; } @@ -158,12 +166,8 @@ protected EdgeLabel getEdgeLabel(String name) { if (schema == null) { schema = this.client.schema().getEdgeLabel(name); } - if (schema == null) { - throw new IllegalStateException( - String.format("The edge label %s doesn't exist", name)); - } else { - this.schemas.put(HugeType.EDGE_LABEL, name, schema); - } + E.checkState(schema != null, "The edge label %s doesn't exist", name); + this.schemas.put(HugeType.EDGE_LABEL, name, schema); return (EdgeLabel) schema; } diff --git a/src/main/java/com/baidu/hugegraph/loader/parser/VertexParser.java b/src/main/java/com/baidu/hugegraph/loader/builder/VertexBuilder.java similarity index 86% rename from src/main/java/com/baidu/hugegraph/loader/parser/VertexParser.java rename to src/main/java/com/baidu/hugegraph/loader/builder/VertexBuilder.java index 6ba279052..c01263674 100644 --- a/src/main/java/com/baidu/hugegraph/loader/parser/VertexParser.java +++ b/src/main/java/com/baidu/hugegraph/loader/builder/VertexBuilder.java @@ -17,13 +17,12 @@ * under the License. */ -package com.baidu.hugegraph.loader.parser; +package com.baidu.hugegraph.loader.builder; import java.util.List; import java.util.Map; import com.baidu.hugegraph.loader.executor.LoadOptions; -import com.baidu.hugegraph.loader.reader.InputReader; import com.baidu.hugegraph.loader.source.VertexSource; import com.baidu.hugegraph.structure.constant.IdStrategy; import com.baidu.hugegraph.structure.graph.Vertex; @@ -31,14 +30,13 @@ import com.baidu.hugegraph.structure.schema.VertexLabel; import com.baidu.hugegraph.util.E; -public class VertexParser extends ElementParser { +public class VertexBuilder extends ElementBuilder { private final VertexSource source; private final VertexLabel vertexLabel; - public VertexParser(VertexSource source, InputReader reader, - LoadOptions options) { - super(reader, options); + public VertexBuilder(VertexSource source, LoadOptions options) { + super(source, options); this.source = source; this.vertexLabel = this.getVertexLabel(source.label()); // Ensure the id field is matched with id strategy @@ -51,7 +49,7 @@ public VertexSource source() { } @Override - protected Vertex parse(Map keyValues) { + protected Vertex build(Map keyValues) { Vertex vertex = new Vertex(this.source.label()); // Assign or check id if need this.assignIdIfNeed(vertex, keyValues); @@ -111,14 +109,17 @@ private void assignIdIfNeed(Vertex vertex, Map keyValues) { } private void checkIdField() { + String name = this.vertexLabel.name(); if (this.vertexLabel.idStrategy().isCustomize()) { E.checkState(this.source.idField() != null, - "The id field can't be empty or null " + - "when id strategy is CUSTOMIZE"); + "The id field can't be empty or null when " + + "id strategy is CUSTOMIZE for vertex label '%s'", + name); } else if (this.vertexLabel.idStrategy().isPrimaryKey()) { E.checkState(this.source.idField() == null, - "The id field must be empty or null " + - "when id strategy is PRIMARY_KEY"); + "The id field must be empty or null when " + + "id strategy is PRIMARY_KEY for vertex label '%s'", + name); } else { // The id strategy is automatic throw new IllegalArgumentException( diff --git a/src/main/java/com/baidu/hugegraph/loader/executor/LoadOptions.java b/src/main/java/com/baidu/hugegraph/loader/executor/LoadOptions.java index 333ed4217..6a0a48177 100644 --- a/src/main/java/com/baidu/hugegraph/loader/executor/LoadOptions.java +++ b/src/main/java/com/baidu/hugegraph/loader/executor/LoadOptions.java @@ -26,7 +26,7 @@ import com.beust.jcommander.ParameterException; public class LoadOptions { - + @Parameter(names = {"-f", "--file"}, required = true, arity = 1, validateWith = {FileValidator.class}, description = "The path of the data source description file") diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/CsvFileReader.java b/src/main/java/com/baidu/hugegraph/loader/parser/CsvLineParser.java similarity index 85% rename from src/main/java/com/baidu/hugegraph/loader/reader/file/CsvFileReader.java rename to src/main/java/com/baidu/hugegraph/loader/parser/CsvLineParser.java index 14e99902f..bd60a577f 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/CsvFileReader.java +++ b/src/main/java/com/baidu/hugegraph/loader/parser/CsvLineParser.java @@ -17,25 +17,23 @@ * under the License. */ -package com.baidu.hugegraph.loader.reader.file; +package com.baidu.hugegraph.loader.parser; import java.io.IOException; import java.util.Arrays; import java.util.List; import com.baidu.hugegraph.loader.exception.ParseException; -import com.baidu.hugegraph.loader.source.file.FileSource; import com.opencsv.CSVParser; import com.opencsv.CSVParserBuilder; -public class CsvFileReader extends TextFileReader { +public class CsvLineParser extends TextLineParser { private static final String DELIMITER = ","; private final CSVParser parser; - public CsvFileReader(FileSource fileSource) { - super(fileSource); + public CsvLineParser() { this.delimiter = DELIMITER; char separator = this.delimiter.charAt(0); this.parser = new CSVParserBuilder().withSeparator(separator) @@ -44,7 +42,7 @@ public CsvFileReader(FileSource fileSource) { } @Override - protected List split(String line) { + public List split(String line) { try { return Arrays.asList(this.parser.parseLine(line)); } catch (IOException e) { diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/JsonFileReader.java b/src/main/java/com/baidu/hugegraph/loader/parser/JsonLineParser.java similarity index 65% rename from src/main/java/com/baidu/hugegraph/loader/reader/file/JsonFileReader.java rename to src/main/java/com/baidu/hugegraph/loader/parser/JsonLineParser.java index d7a6502b8..46871df9e 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/JsonFileReader.java +++ b/src/main/java/com/baidu/hugegraph/loader/parser/JsonLineParser.java @@ -17,34 +17,33 @@ * under the License. */ -package com.baidu.hugegraph.loader.reader.file; +package com.baidu.hugegraph.loader.parser; import java.util.Map; import com.baidu.hugegraph.loader.exception.ParseException; -import com.baidu.hugegraph.loader.source.file.FileSource; +import com.baidu.hugegraph.loader.reader.Line; +import com.baidu.hugegraph.loader.reader.file.AbstractFileReader; import com.baidu.hugegraph.rest.SerializeException; import com.baidu.hugegraph.util.JsonUtil; -public class JsonFileReader extends FileReader { - - public JsonFileReader(FileSource fileSource) { - super(fileSource); - } +public class JsonLineParser implements LineParser { @Override - public void init() { + public void init(AbstractFileReader reader) { // pass } @Override @SuppressWarnings("unchecked") - protected Map transform(String line) { + public Line parse(String rawLine) { try { - return JsonUtil.fromJson(line, Map.class); + Map keyValues = JsonUtil.fromJson(rawLine, + Map.class); + return new Line(rawLine, keyValues); } catch (SerializeException e) { - throw new ParseException(line, "Deserialize line '%s' error", - e, line); + throw new ParseException(rawLine, "Deserialize line '%s' error", + e, rawLine); } } } diff --git a/src/main/java/com/baidu/hugegraph/loader/parser/LineParser.java b/src/main/java/com/baidu/hugegraph/loader/parser/LineParser.java new file mode 100644 index 000000000..d1e0690c2 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/parser/LineParser.java @@ -0,0 +1,30 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.parser; + +import com.baidu.hugegraph.loader.reader.file.AbstractFileReader; +import com.baidu.hugegraph.loader.reader.Line; + +public interface LineParser { + + public void init(AbstractFileReader reader); + + public Line parse(String rawLine); +} diff --git a/src/main/java/com/baidu/hugegraph/loader/parser/TextLineParser.java b/src/main/java/com/baidu/hugegraph/loader/parser/TextLineParser.java new file mode 100644 index 000000000..8eca6b57c --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/parser/TextLineParser.java @@ -0,0 +1,120 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.parser; + +import java.io.IOException; +import java.util.List; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.exception.ParseException; +import com.baidu.hugegraph.loader.reader.Line; +import com.baidu.hugegraph.loader.reader.file.AbstractFileReader; +import com.baidu.hugegraph.loader.source.file.FileSource; +import com.google.common.base.Splitter; + +public class TextLineParser implements LineParser { + + private static final String EMPTY_STR = ""; + + private static final String DEFAULT_DELIMITER = "\t"; + + // Default is "\t" + protected String delimiter; + protected List header; + + public String delimiter() { + return this.delimiter; + } + + public List header() { + return this.header; + } + + @Override + public void init(AbstractFileReader reader) { + /* + * The delimiter must be initialized before header, + * because init header may use it + */ + this.initDelimiter(reader.source()); + this.initHeader(reader); + } + + protected void initDelimiter(FileSource source) { + if (source.delimiter() != null) { + this.delimiter = source.delimiter(); + } else { + this.delimiter = DEFAULT_DELIMITER; + } + } + + protected void initHeader(AbstractFileReader reader) { + FileSource source = reader.source(); + if (source.header() != null) { + this.header = source.header(); + } else { + String line = null; + try { + line = reader.readNextLine(); + } catch (IOException e) { + throw new LoadException("Read header line error", e); + } + // If doesn't specify header, the first line is considered as header + if (line != null && !line.isEmpty()) { + this.header = this.split(line); + } else { + throw new LoadException("Can't read header from empty file '%s'", + source.path()); + } + if (this.header.isEmpty()) { + throw new LoadException("The header of file '%s' is empty", + source.path()); + } + } + } + + @Override + @SuppressWarnings("unchecked") + public Line parse(String rawLine) { + List columns = this.split(rawLine); + // Ignore extra separator at the end of line + if (columns.size() != this.header.size()) { + if (this.lastColumnIsEmpty(columns)) { + columns = columns.subList(0, columns.size() - 1); + } else { + throw new ParseException(rawLine, + "The column length '%s' doesn't match with " + + "header length '%s' on: %s", + columns.size(), this.header.size(), rawLine); + } + } + return new Line(rawLine, this.header, (List) (Object) columns); + } + + public List split(String line) { + return Splitter.on(this.delimiter).splitToList(line); + } + + private boolean lastColumnIsEmpty(List columns) { + int last = columns.size() - 1; + return columns.size() - 1 == this.header.size() && + columns.get(last).equals(EMPTY_STR); + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/InputReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/InputReader.java index 5f8499336..2b04897f2 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/InputReader.java +++ b/src/main/java/com/baidu/hugegraph/loader/reader/InputReader.java @@ -19,14 +19,9 @@ package com.baidu.hugegraph.loader.reader; -import java.util.Map; - import com.baidu.hugegraph.loader.util.AutoCloseableIterator; -public interface InputReader - extends AutoCloseableIterator> { +public interface InputReader extends AutoCloseableIterator { public void init(); - - public String line(); } diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/InputReaderFactory.java b/src/main/java/com/baidu/hugegraph/loader/reader/InputReaderFactory.java index c75c0fbea..bf84bd05c 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/InputReaderFactory.java +++ b/src/main/java/com/baidu/hugegraph/loader/reader/InputReaderFactory.java @@ -19,39 +19,27 @@ package com.baidu.hugegraph.loader.reader; -import com.baidu.hugegraph.loader.reader.file.CsvFileReader; import com.baidu.hugegraph.loader.reader.file.FileReader; -import com.baidu.hugegraph.loader.reader.file.JsonFileReader; -import com.baidu.hugegraph.loader.reader.file.TextFileReader; -import com.baidu.hugegraph.loader.source.file.FileFormat; -import com.baidu.hugegraph.loader.source.file.FileSource; +import com.baidu.hugegraph.loader.reader.hdfs.HDFSReader; +import com.baidu.hugegraph.loader.reader.jdbc.JDBCReader; import com.baidu.hugegraph.loader.source.InputSource; +import com.baidu.hugegraph.loader.source.file.FileSource; +import com.baidu.hugegraph.loader.source.hdfs.HDFSSource; +import com.baidu.hugegraph.loader.source.jdbc.JDBCSource; public class InputReaderFactory { public static InputReader create(InputSource source) { switch (source.type()) { case FILE: - return createFileReader((FileSource) source); + return new FileReader((FileSource) source); + case HDFS: + return new HDFSReader((HDFSSource) source); + case JDBC: + return new JDBCReader((JDBCSource) source); default: - // TODO: Expand more input sources throw new AssertionError(String.format( "Unsupported input source '%s'", source.type())); } } - - private static FileReader createFileReader(FileSource source) { - FileFormat format = source.format(); - switch (format) { - case CSV: - return new CsvFileReader(source); - case TEXT: - return new TextFileReader(source); - case JSON: - return new JsonFileReader(source); - default: - throw new AssertionError(String.format( - "Unsupported file format '%s'", source)); - } - } } diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/Line.java b/src/main/java/com/baidu/hugegraph/loader/reader/Line.java new file mode 100644 index 000000000..beec98476 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/Line.java @@ -0,0 +1,96 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.collections.Unmodifiable; +import org.apache.commons.lang3.StringUtils; + +public final class Line { + + private final String rawLine; + private final List names; + private final List values; + + public Line(String rawLine, int size) { + this.rawLine = rawLine; + this.names = new ArrayList<>(size); + this.values = new ArrayList<>(size); + } + + public Line(String rawLine, Map keyValues) { + this.rawLine = rawLine; + this.names = new ArrayList<>(keyValues.size()); + this.values = new ArrayList<>(keyValues.size()); + for (Map.Entry entry : keyValues.entrySet()) { + this.names.add(entry.getKey()); + this.values.add(entry.getValue()); + } + } + + public Line(List names, List values) { + assert names.size() == values.size(); + this.rawLine = StringUtils.join(values, ",");; + this.names = names; + this.values = values; + } + + public Line(String rawLine, List names, List values) { + assert names.size() == values.size(); + this.rawLine = rawLine; + this.names = names; + this.values = values; + } + + public String rawLine() { + return this.rawLine; + } + + public final List names() { + return Collections.unmodifiableList(this.names); + } + + public final List values() { + return Collections.unmodifiableList(this.values); + } + + public void add(String name, Object value) { + this.names.add(name); + this.values.add(value); + } + + public Map toMap() { + Map result = new LinkedHashMap<>(); + for (int i = 0; i < this.names.size(); i++) { + result.put(this.names.get(i), this.values.get(i)); + } + return result; + } + + @Override + public String toString() { + return this.rawLine(); + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/Readable.java b/src/main/java/com/baidu/hugegraph/loader/reader/Readable.java new file mode 100644 index 000000000..5b0b28f42 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/Readable.java @@ -0,0 +1,28 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader; + +import java.io.IOException; +import java.io.InputStream; + +public interface Readable { + + public InputStream open() throws IOException; +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java new file mode 100644 index 000000000..99407267c --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/file/AbstractFileReader.java @@ -0,0 +1,314 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.file; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.List; +import java.util.NoSuchElementException; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.slf4j.Logger; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.parser.CsvLineParser; +import com.baidu.hugegraph.loader.parser.JsonLineParser; +import com.baidu.hugegraph.loader.parser.LineParser; +import com.baidu.hugegraph.loader.parser.TextLineParser; +import com.baidu.hugegraph.loader.reader.InputReader; +import com.baidu.hugegraph.loader.reader.Line; +import com.baidu.hugegraph.loader.reader.Readable; +import com.baidu.hugegraph.loader.source.file.Compression; +import com.baidu.hugegraph.loader.source.file.FileFormat; +import com.baidu.hugegraph.loader.source.file.FileSource; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.Log; + +public abstract class AbstractFileReader implements InputReader { + + private static final Logger LOG = Log.logger(AbstractFileReader.class); + + private static final int BUF_SIZE = 5 * 1024 * 1024; + + private final FileSource source; + private Readers readers; + private LineParser parser; + private Line nextLine; + + public AbstractFileReader(FileSource source) { + this.source = source; + this.readers = null; + this.parser = null; + this.nextLine = null; + } + + public FileSource source() { + return this.source; + } + + protected abstract Readers openReaders() throws IOException; + + @Override + public void init() { + try { + this.readers = this.openReaders(); + } catch (IOException e) { + throw new LoadException("Failed to open readers for '%s'", + this.source); + } + this.parser = createLineParser(this.source); + this.parser.init(this); + } + + @Override + public boolean hasNext() { + if (this.nextLine != null) { + return true; + } + this.nextLine = this.fetch(); + return this.nextLine != null; + } + + @Override + public Line next() { + if (!this.hasNext()) { + throw new NoSuchElementException("Reach end of file"); + } + Line line = this.nextLine; + this.nextLine = null; + return line; + } + + @Override + public void close() throws IOException { + if (this.readers != null) { + this.readers.close(); + } + } + + public String readNextLine() throws IOException { + E.checkState(this.readers != null, "The readers shouldn't be null"); + return this.readers.readNextLine(); + } + + protected Line fetch() { + String rawLine; + try { + rawLine = this.readNextLine(); + } catch (IOException e) { + throw new LoadException("Read next line error", e); + } + if (rawLine == null) { + return null; + } + + // Skip the comment line + if (this.isCommentLine(rawLine)) { + return this.fetch(); + } else { + return this.parser.parse(rawLine); + } + } + + private boolean isCommentLine(String line) { + return this.source.commentSymbols().stream().anyMatch(line::startsWith); + } + + private boolean isDuplicateHeader(String line) { + assert line != null; + // Json file doesn't exist header line + if (this.parser.getClass().isAssignableFrom(TextLineParser.class)) { + return false; + } + /* + * All lines will be treated as data line if the header is + * user specified explicitly + */ + if (this.source.header() != null) { + return false; + } + TextLineParser parser = (TextLineParser) this.parser; + E.checkState(parser.header() != null && !parser.header().isEmpty(), + "The header shoudn't be null or empty"); + List columns = parser.split(line); + if (parser.header().size() != columns.size()) { + return false; + } + for (int i = 0; i < parser.header().size(); i++) { + if (!parser.header().get(i).equals(columns.get(i))) { + return false; + } + } + return true; + } + + private static BufferedReader createBufferedReader(InputStream stream, + FileSource source) + throws Exception { + E.checkNotNull(stream, "InputStream"); + try { + Reader csr = createCompressReader(stream, source); + return new BufferedReader(csr, BUF_SIZE); + } catch (IOException e) { + try { + stream.close(); + } catch (IOException ignored) { + LOG.warn("Failed to close file {}", source.path()); + } + throw e; + } + } + + private static Reader createCompressReader(InputStream stream, + FileSource source) + throws Exception { + Compression compression = source.compression(); + String charset = source.charset(); + switch (compression) { + case NONE: + return new InputStreamReader(stream, charset); + case GZIP: + case BZ2: + case XZ: + case LZMA: + case PACK200: + case SNAPPY_RAW: + case SNAPPY_FRAMED: + case Z: + case DEFLATE: + case LZ4_BLOCK: + case LZ4_FRAMED: + CompressorStreamFactory factory = new CompressorStreamFactory(); + CompressorInputStream cis = factory.createCompressorInputStream( + compression.string(), stream); + return new InputStreamReader(cis, charset); + default: + throw new LoadException("Unsupported compression format '%s'", + compression); + } + } + + private static LineParser createLineParser(FileSource source) { + FileFormat format = source.format(); + switch (format) { + case CSV: + return new CsvLineParser(); + case TEXT: + return new TextLineParser(); + case JSON: + return new JsonLineParser(); + default: + throw new AssertionError(String.format( + "Unsupported file format '%s'", source)); + } + } + + /** + * Used to iterate all readable data source, like files, paths + */ + protected final class Readers { + + private final FileSource source; + private final List readables; + private BufferedReader reader; + private int index; + + public Readers(FileSource source, List readables) { + this.source = source; + this.readables = readables; + this.index = 0; + if (readables == null || readables.isEmpty()) { + this.reader = null; + } else { + // Open the first one + this.reader = this.open(this.index); + } + } + + private BufferedReader open(int index) { + assert index < this.readables.size(); + Readable readable = this.readables.get(index); + LOG.debug("Ready to open '{}'", readable); + + InputStream stream = null; + try { + stream = readable.open(); + return createBufferedReader(stream, this.source); + } catch (IOException e) { + throw new LoadException("Failed to open stream for '%s'", + e, readable); + } catch (Exception e) { + if (stream != null) { + try { + stream.close(); + } catch (IOException ignored) { + LOG.warn("Failed to close stream of '{}'", readable); + } + } + throw new LoadException("Failed to create reader for '%s'", + readable); + } + } + + private void close(int index) throws IOException { + assert index < this.readables.size(); + Readable readable = this.readables.get(index); + LOG.info("Ready to close '{}'", readable); + this.close(); + } + + public String readNextLine() throws IOException { + // reader is null means there is no file + if (this.reader == null) { + return null; + } + + boolean openNext = false; + String line; + while ((line = this.reader.readLine()) == null) { + // The current file is read at the end, ready to read next one + this.close(this.index); + + if (++this.index < this.readables.size()) { + // Open the second or subsequent readables, need + this.reader = this.open(this.index); + openNext = true; + } else { + return null; + } + } + // Determine if need to skip duplicate header + if (openNext && isDuplicateHeader(line)) { + line = this.readNextLine(); + } + return line; + } + + private void close() throws IOException { + if (this.reader != null) { + this.reader.close(); + } + } + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/FileReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/file/FileReader.java index ff6fc6b1a..6a1e7095a 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/FileReader.java +++ b/src/main/java/com/baidu/hugegraph/loader/reader/file/FileReader.java @@ -19,133 +19,52 @@ package com.baidu.hugegraph.loader.reader.file; -import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.UnsupportedEncodingException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import com.baidu.hugegraph.loader.exception.LoadException; -import com.baidu.hugegraph.loader.exception.ReadException; -import com.baidu.hugegraph.loader.reader.InputReader; +import com.baidu.hugegraph.loader.reader.Readable; import com.baidu.hugegraph.loader.source.file.FileSource; import com.baidu.hugegraph.util.Log; -public abstract class FileReader implements InputReader { +public class FileReader extends AbstractFileReader { - private static Logger LOG = Log.logger(FileReader.class); - - private static final int BUF_SIZE = 5 * 1024 * 1024; - - private final FileSource source; - private final BufferedReaderWrapper readers; - private String nextLine; + private static final Logger LOG = Log.logger(FileReader.class); public FileReader(FileSource source) { - this.source = source; - try { - this.readers = this.open(source); - } catch (IOException e) { - throw new LoadException("Failed to load input file '%s'", - e, source.path()); - } - this.nextLine = null; - } - - public FileSource source() { - return this.source; - } - - public String line() { - return this.nextLine; - } - - @Override - public boolean hasNext() { - if (this.nextLine == null) { - try { - this.nextLine = this.readers.readNextLine(); - } catch (IOException e) { - throw new LoadException("Read next line error", e); - } - } - // Skip the comment line - if (this.nextLine != null && this.isCommentLine(this.nextLine)) { - this.nextLine = null; - return this.hasNext(); - } - return this.nextLine != null; - } - - @Override - public Map next() { - if (!this.hasNext()) { - throw new NoSuchElementException("Reach end of file"); - } - String line = this.nextLine; - this.nextLine = null; - return this.transform(line); + super(source); } @Override - public void close() throws IOException { - this.readers.close(); - } - - protected abstract Map transform(String line); - - private BufferedReaderWrapper open(FileSource source) throws IOException { - String path = source.path(); - File file = FileUtils.getFile(path); - checkFileOrDir(file); + protected Readers openReaders() throws IOException { + File file = FileUtils.getFile(this.source().path()); + checkExistAndReadable(file); + List files = new ArrayList<>(); if (file.isFile()) { - return new BufferedReaderWrapper(source, file); + files.add(new ReadableFile(file)); } else { assert file.isDirectory(); - return new BufferedReaderWrapper(source, file.listFiles()); - } - } - - private static BufferedReader createBufferedFileReader(FileSource source, - File file) - throws IOException { - String path = source.path(); - String charset = source.charset(); - - InputStream fis = null; - try { - fis = new FileInputStream(file); - Reader isr = new InputStreamReader(fis, charset); - return new BufferedReader(isr, BUF_SIZE); - } catch (FileNotFoundException | UnsupportedEncodingException e) { - if (fis != null) { - try { - fis.close(); - } catch (IOException ignored) { - LOG.warn("Failed to close file {}", path); - } + File[] subFiles = file.listFiles(); + if (subFiles == null) { + throw new LoadException( + "Error when list files of path '%s'", file); + } + for (File subFile : subFiles) { + files.add(new ReadableFile(subFile)); } - throw e; } + return new Readers(this.source(), files); } - private boolean isCommentLine(String line) { - return this.source.commentSymbols().stream().anyMatch(line::startsWith); - } - - private static void checkFileOrDir(File file) { + private static void checkExistAndReadable(File file) { if (!file.exists()) { throw new LoadException( "Please ensure the file or directory exist: '%s'", file); @@ -157,69 +76,22 @@ private static void checkFileOrDir(File file) { } } - private static class BufferedReaderWrapper { - - private final FileSource source; - private final List files; + private static class ReadableFile implements Readable { - private int index; - private BufferedReader reader; + private final File file; - public BufferedReaderWrapper(FileSource source, File... files) { - this.source = source; - this.files = Arrays.asList(files); - this.index = 0; - if (files.length == 0) { - this.reader = null; - } else { - this.reader = this.openFile(this.index); - } + public ReadableFile(File file) { + this.file = file; } - private BufferedReader openFile(int index) { - assert index < this.files.size(); - File file = this.files.get(index); - try { - LOG.info("Ready to open file '{}'", file.getName()); - return createBufferedFileReader(this.source, file); - } catch (IOException e) { - throw new ReadException(file.getAbsolutePath(), - "Failed to create file reader for file '%s'", - file.getName()); - } + @Override + public InputStream open() throws IOException { + return new FileInputStream(this.file); } - private void closeFile(int index) throws IOException { - assert index < this.files.size(); - File file = this.files.get(index); - LOG.info("Ready to close file '{}'", file.getName()); - this.close(); - } - - public String readNextLine() throws IOException { - // reader is null means there is no file - if (this.reader == null) { - return null; - } - - String line; - while ((line = this.reader.readLine()) == null) { - // The current file is read at the end, ready to read next one - this.closeFile(this.index); - - if (++this.index < this.files.size()) { - this.reader = this.openFile(this.index); - } else { - return null; - } - } - return line; - } - - public void close() throws IOException { - if (this.reader != null) { - this.reader.close(); - } + @Override + public String toString() { + return "FILE:" + this.file; } } } diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/TextFileReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/file/TextFileReader.java deleted file mode 100644 index d5a625f07..000000000 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/TextFileReader.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2017 HugeGraph Authors - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -package com.baidu.hugegraph.loader.reader.file; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.baidu.hugegraph.loader.exception.LoadException; -import com.baidu.hugegraph.loader.exception.ParseException; -import com.baidu.hugegraph.loader.source.file.FileSource; -import com.google.common.base.Splitter; - -public class TextFileReader extends FileReader { - - private static final String EMPTY_STR = ""; - - private static final String DEFAULT_DELIMITER = "\t"; - - // Default is "\t" - protected String delimiter; - protected List header; - - public TextFileReader(FileSource source) { - super(source); - this.delimiter = DEFAULT_DELIMITER; - this.header = null; - } - - @Override - public void init() { - /* - * The delimiter must be initialized before header, because init header - * may use it - */ - this.initDelimiter(); - this.initHeader(); - } - - protected void initDelimiter() { - if (this.source().delimiter() != null) { - this.delimiter = this.source().delimiter(); - } - } - - protected void initHeader() { - if (this.source().header() != null) { - this.header = this.source().header(); - } else { - // If doesn't specify header, the first line is considered as header - if (this.hasNext()) { - this.header = this.split(this.line()); - this.next(); - } else { - throw new LoadException("Can't load data from empty file '%s'", - this.source().path()); - } - if (this.header.isEmpty()) { - throw new LoadException("The header is empty", - this.source().path()); - } - } - } - - @Override - public Map transform(String line) { - List columns = this.split(line); - // Ignore extra separator at the end of line - if (columns.size() != this.header.size() && - !this.lastColumnIsEmpty(columns)) { - throw new ParseException(line, - "The column length '%s' doesn't match with " + - "header length '%s' on: %s", - columns.size(), this.header.size(), line); - } - Map keyValues = new HashMap<>(); - for (int i = 0; i < this.header.size(); i++) { - keyValues.put(this.header.get(i), columns.get(i)); - } - return keyValues; - } - - protected List split(String line) { - return Splitter.on(this.delimiter).splitToList(line); - } - - private boolean lastColumnIsEmpty(List columns) { - int last = columns.size() - 1; - return columns.size() - 1 == this.header.size() && - columns.get(last).equals(EMPTY_STR); - } -} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/hdfs/HDFSReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/hdfs/HDFSReader.java new file mode 100644 index 000000000..4433e23c8 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/hdfs/HDFSReader.java @@ -0,0 +1,156 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.hdfs; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.reader.Readable; +import com.baidu.hugegraph.loader.reader.file.AbstractFileReader; +import com.baidu.hugegraph.loader.source.hdfs.HDFSSource; +import com.baidu.hugegraph.util.Log; + +public class HDFSReader extends AbstractFileReader { + + private static final Logger LOG = Log.logger(HDFSReader.class); + + private final FileSystem hdfs; + + public HDFSReader(HDFSSource source) { + super(source); + Configuration config = this.loadConfiguration(); + LOG.info("Opening readers for hdfs source {}", source); + try { + this.hdfs = FileSystem.get(URI.create(source.path()), config); + } catch (IOException e) { + throw new LoadException("Failed to create hdfs file system", e); + } + Path path = new Path(source.path()); + checkExist(this.hdfs, path); + } + + public FileSystem fileSystem() { + return this.hdfs; + } + + @Override + public HDFSSource source() { + return (HDFSSource) super.source(); + } + + @Override + public void close() throws IOException { + super.close(); + if (this.hdfs != null) { + this.hdfs.close(); + } + } + + @Override + protected Readers openReaders() throws IOException { + Path path = new Path(this.source().path()); + + List paths = new ArrayList<>(); + if (this.hdfs.isFile(path)) { + paths.add(new ReadablePath(this.hdfs, path)); + } else { + assert this.hdfs.isDirectory(path); + FileStatus[] statuses = this.hdfs.listStatus(path); + Path[] subPaths = FileUtil.stat2Paths(statuses); + for (Path subPath : subPaths) { + paths.add(new ReadablePath(this.hdfs, subPath)); + } + } + return new Readers(this.source(), paths); + } + + private Configuration loadConfiguration() { + Configuration conf = new Configuration(); + String fsDefaultFS = this.source().fsDefaultFS(); + // Remote hadoop + if (fsDefaultFS != null) { + // TODO: Support pass more params or specify config files + conf.set("fs.defaultFS", fsDefaultFS); + return conf; + } + // Local hadoop + String hadoopHome = System.getenv("HADOOP_HOME"); + if (hadoopHome != null && !hadoopHome.isEmpty()) { + LOG.info("Get HADOOP_HOME {}", hadoopHome); + String path = Paths.get(hadoopHome, "etc", "hadoop").toString(); + conf.addResource(path(path, "/core-site.xml")); + conf.addResource(path(path, "/hdfs-site.xml")); + conf.addResource(path(path, "/mapred-site.xml")); + conf.addResource(path(path, "/yarn-site.xml")); + } + return conf; + } + + private static void checkExist(FileSystem fs, Path path) { + try { + if (!fs.exists(path)) { + throw new LoadException( + "Please ensure the file or directory exist: '%s'", + path); + } + } catch (IOException e) { + throw new LoadException( + "Some exception occured when check hdfs path '%s' exist", + path); + } + } + + private static Path path(String configPath, String configFile) { + return new Path(Paths.get(configPath, configFile).toString()); + } + + private static class ReadablePath implements Readable { + + private final FileSystem hdfs; + private final Path path; + + private ReadablePath(FileSystem hdfs, Path path) { + this.hdfs = hdfs; + this.path = path; + } + + @Override + public InputStream open() throws IOException { + return this.hdfs.open(this.path); + } + + @Override + public String toString() { + return "HDFS:" + this.path; + } + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/JDBCReader.java b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/JDBCReader.java new file mode 100644 index 000000000..b4d418c4e --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/JDBCReader.java @@ -0,0 +1,94 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.jdbc; + +import java.sql.SQLException; +import java.util.List; +import java.util.NoSuchElementException; + +import org.slf4j.Logger; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.reader.InputReader; +import com.baidu.hugegraph.loader.reader.Line; +import com.baidu.hugegraph.loader.source.jdbc.JDBCSource; +import com.baidu.hugegraph.util.Log; + +public class JDBCReader implements InputReader { + + private static final Logger LOG = Log.logger(JDBCReader.class); + + private final JDBCSource source; + private final RowFetcher fetcher; + + private int offset; + private List batch; + + public JDBCReader(JDBCSource source) { + this.source = source; + try { + this.fetcher = new RowFetcher(source); + } catch (Exception e) { + throw new LoadException("Failed to connect database via '%s'", + e, source.url()); + } + this.offset = 0; + this.batch = null; + } + + public JDBCSource source() { + return this.source; + } + + @Override + public void init() { + try { + this.fetcher.readHeader(); + } catch (SQLException e) { + throw new LoadException("Failed to read column names", e); + } + } + + @Override + public boolean hasNext() { + if (this.batch == null || this.offset >= this.batch.size()) { + try { + this.batch = this.fetcher.nextBatch(); + this.offset = 0; + } catch (Exception e) { + throw new LoadException("Read next row error", e); + } + } + return this.batch != null; + } + + @Override + public Line next() { + if (!this.hasNext()) { + throw new NoSuchElementException("Reach end of table"); + } + return this.batch.get(this.offset++); + } + + @Override + public void close() throws Exception { + this.fetcher.close(); + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/MysqlUtil.java b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/MysqlUtil.java new file mode 100644 index 000000000..ed064df7f --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/MysqlUtil.java @@ -0,0 +1,114 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.jdbc; + +public final class MysqlUtil { + + public static String escapeString(String value) { + int length = value.length(); + if (!isEscapeNeededForString(value, length)) { + StringBuilder buf = new StringBuilder(length + 2); + buf.append('\'').append(value).append('\''); + return buf.toString(); + } + + StringBuilder buf = new StringBuilder((int) (length * 1.1D)); + buf.append('\''); + + for (int i = 0; i < length; ++i) { + char c = value.charAt(i); + switch (c) { + case '\u0000': + buf.append('\\'); + buf.append('0'); + break; + case '\n': + buf.append('\\'); + buf.append('n'); + break; + case '\r': + buf.append('\\'); + buf.append('r'); + break; + case '\u001a': + buf.append('\\'); + buf.append('Z'); + break; + case '"': + /* + * Doesn't need to add '\', because we wrap string with "'" + * Assume that we don't use Ansi Mode + */ + buf.append('"'); + break; + case '\'': + buf.append('\\'); + buf.append('\''); + break; + case '\\': + buf.append('\\'); + buf.append('\\'); + break; + default: + buf.append(c); + break; + } + } + + buf.append('\''); + return buf.toString(); + } + + public static boolean isEscapeNeededForString(String sql, int length) { + boolean needsEscape = false; + + for (int i = 0; i < length; ++i) { + char c = sql.charAt(i); + switch (c) { + case '\u0000': + needsEscape = true; + break; + case '\n': + needsEscape = true; + break; + case '\r': + needsEscape = true; + break; + case '\u001a': + needsEscape = true; + break; + case '\'': + needsEscape = true; + break; + case '\\': + needsEscape = true; + break; + default: + break; + } + + if (needsEscape) { + break; + } + } + + return needsEscape; + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/RowFetcher.java b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/RowFetcher.java new file mode 100644 index 000000000..bf08056d1 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/RowFetcher.java @@ -0,0 +1,176 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.jdbc; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.http.client.utils.URIBuilder; +import org.slf4j.Logger; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.reader.Line; +import com.baidu.hugegraph.loader.source.jdbc.JDBCSource; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.Log; + +public class RowFetcher { + + private static final Logger LOG = Log.logger(RowFetcher.class); + + private final String database; + private final String table; + + private final Connection conn; + + private List columns; + private int batchSize; + private Line nextBatchStartRow; + private boolean finished; + + public RowFetcher(JDBCSource source) throws SQLException { + this.database = source.database(); + this.table = source.table(); + this.batchSize = source.batchSize(); + this.conn = this.connect(source); + this.columns = new ArrayList<>(); + this.finished = false; + } + + private Connection connect(JDBCSource source) throws SQLException { + String url = source.url(); + String database = source.database(); + if (url.endsWith("/")) { + url = String.format("%s%s", url, database); + } else { + url = String.format("%s/%s", url, database); + } + + int maxTimes = source.reconnectMaxTimes(); + int interval = source.reconnectInterval(); + + URIBuilder uriBuilder = new URIBuilder(); + uriBuilder.setPath(url) + .setParameter("rewriteBatchedStatements", "true") + .setParameter("useServerPrepStmts", "false") + .setParameter("autoReconnect", "true") + .setParameter("maxReconnects", String.valueOf(maxTimes)) + .setParameter("initialTimeout", String.valueOf(interval)); + + String driverName = source.driver(); + String username = source.username(); + String password = source.password(); + try { + // Register JDBC driver + Class.forName(driverName); + } catch (ClassNotFoundException e) { + throw new LoadException("Invalid driver class '%s'", driverName); + } + return DriverManager.getConnection(url, username, password); + } + + public void readHeader() throws SQLException { + String sql = String.format("SELECT COLUMN_NAME " + + "FROM INFORMATION_SCHEMA.COLUMNS " + + "WHERE TABLE_NAME = '%s' " + + "AND TABLE_SCHEMA = '%s';", + this.table, this.database); + try (Statement stmt = this.conn.createStatement(); + ResultSet result = stmt.executeQuery(sql)) { + while (result.next()) { + this.columns.add(result.getString("COLUMN_NAME")); + } + } catch (SQLException e) { + this.close(); + throw e; + } + E.checkArgument(!this.columns.isEmpty(), + "The colmuns of the table '%s' shouldn't be empty", + this.table); + } + + public List nextBatch() throws SQLException { + if (this.finished) { + return null; + } + + String select = this.buildSql(); + + List batch = new ArrayList<>(this.batchSize + 1); + try (Statement stmt = this.conn.createStatement(); + ResultSet result = stmt.executeQuery(select)) { + while (result.next()) { + List values = new ArrayList<>(this.columns.size()); + for (int i = 1, n = this.columns.size(); i <= n; i++) { + Object value = result.getObject(i); + if (value == null) { + value = "NULL"; + } + values.add(value); + } + Line line = new Line(Collections.unmodifiableList(this.columns), + values); + batch.add(line); + } + } catch (SQLException e) { + this.close(); + throw e; + } + + if (batch.size() != this.batchSize + 1) { + this.finished = true; + } else { + // Remove the last one + this.nextBatchStartRow = batch.remove(batch.size() - 1); + } + return batch; + } + + public String buildSql() { + int limit = this.batchSize + 1; + + StringBuilder sqlBuilder = new StringBuilder(); + sqlBuilder.append("SELECT * FROM ").append(this.table); + + if (this.nextBatchStartRow != null) { + WhereBuilder where = new WhereBuilder(true); + where.gte(this.nextBatchStartRow.names(), + this.nextBatchStartRow.values()); + sqlBuilder.append(where.build()); + } + sqlBuilder.append(" LIMIT ").append(limit); + sqlBuilder.append(";"); + return sqlBuilder.toString(); + } + + public void close() { + try { + this.conn.close(); + } catch (SQLException e) { + LOG.warn("Failed to close connection", e); + } + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/WhereBuilder.java b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/WhereBuilder.java new file mode 100644 index 000000000..5f453b09f --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/reader/jdbc/WhereBuilder.java @@ -0,0 +1,223 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.reader.jdbc; + +import java.util.List; + +import com.baidu.hugegraph.util.E; + +public final class WhereBuilder { + + private StringBuilder builder; + + public WhereBuilder() { + this(true); + } + + public WhereBuilder(boolean startWithWhere) { + if (startWithWhere) { + this.builder = new StringBuilder(" WHERE "); + } else { + this.builder = new StringBuilder(" "); + } + } + + /** + * Concat as: key1 = value and key2 = value... + * @param keys the keys to be concatted with value + * @param value the value to be concatted with every key + */ + public void and(List keys, String value) { + this.and(keys, " = ", value); + } + + /** + * Concat as: key1 op value and key2 op value... + * @param keys the keys to be concatted with value + * @param operator the operator to link every key and value pair + * @param value the value to be concatted with every key + */ + public void and(List keys, String operator, String value) { + for (int i = 0, n = keys.size(); i < n; i++) { + this.builder.append(keys.get(i)); + this.builder.append(operator); + this.builder.append(value); + if (i != n - 1) { + this.builder.append(" AND "); + } + } + } + + /** + * Concat as: key1 = value1 and key2 = value2... + * @param keys the keys to be concatted with values according to the + * same index + * @param values the values to be concatted with every keys according to + * the same index + */ + public void and(List keys, List values) { + this.and(keys, " = ", values); + } + + /** + * Concat as: key1 op value1 and key2 op value2... + * @param keys the keys to be concatted with values according to the + * same index + * @param operator the operator to link every key and value pair + * @param values the values to be concatted with every keys according to + * the same index + */ + public void and(List keys, String operator, List values) { + E.checkArgument(keys.size() == values.size(), + "The size of keys '%s' is not equal with " + + "values size '%s'", + keys.size(), values.size()); + + for (int i = 0, n = keys.size(); i < n; i++) { + this.builder.append(keys.get(i)); + this.builder.append(operator); + Object value = values.get(i); + if (value instanceof String) { + this.builder.append(MysqlUtil.escapeString((String) value)); + } else { + this.builder.append(value); + } + if (i != n - 1) { + this.builder.append(" AND "); + } + } + } + + /** + * Concat as: key1 op1 value1 and key2 op2 value2... + * @param keys the keys to be concatted with values according to the + * same index + * @param operators the operators to link every key and value pair + * according to the same index + * @param values the values to be concatted with every keys according to + * the same index + */ + public void and(List keys, + List operators, + List values) { + E.checkArgument(keys.size() == operators.size(), + "The size of keys '%s' is not equal with " + + "operators size '%s'", + keys.size(), operators.size()); + E.checkArgument(keys.size() == values.size(), + "The size of keys '%s' is not equal with " + + "values size '%s'", + keys.size(), values.size()); + + for (int i = 0, n = keys.size(); i < n; i++) { + this.builder.append(keys.get(i)); + this.builder.append(operators.get(i)); + Object value = values.get(i); + if (value instanceof String) { + this.builder.append(MysqlUtil.escapeString((String) value)); + } else { + this.builder.append(value); + } + if (i != n - 1) { + this.builder.append(" AND "); + } + } + } + + /** + * Concat as: clause1 and clause2... + * @param clauses the clauses to be concatted with 'AND' operator + */ + public void and(List clauses) { + E.checkArgument(clauses != null && !clauses.isEmpty(), + "The clauses can't be empty"); + + int size = clauses.size(); + int i = 0; + for (StringBuilder cluase : clauses) { + this.builder.append(cluase); + if (++i != size) { + this.builder.append(" AND "); + } + } + } + + /** + * Concat as: key in (value1, value2...) + * @param key the key to be concatted with 'IN' operator + * @param values the values to be concated with ',' and wappred by '()' + */ + public void in(String key, List values) { + this.builder.append(key).append(" IN ("); + for (int i = 0, n = values.size(); i < n; i++) { + Object value = values.get(i); + if (value instanceof String) { + this.builder.append(MysqlUtil.escapeString((String) value)); + } else { + this.builder.append(value); + } + if (i != n - 1) { + this.builder.append(", "); + } + } + this.builder.append(")"); + } + + /** + * Concat as: (key1, key2...keyn) {@code >=} (val1, val2...valn) + * @param keys the keys to be concatted with {@code >=} operator + * @param values the values to be concatted with {@code >=} operator + */ + public void gte(List keys, List values) { + E.checkArgument(keys.size() == values.size(), + "The size of keys '%s' is not equal with " + + "values size '%s'", + keys.size(), values.size()); + this.builder.append("("); + for (int i = 0, n = keys.size(); i < n; i++) { + this.builder.append(keys.get(i)); + if (i != n - 1) { + this.builder.append(", "); + } + } + this.builder.append(") >= ("); + for (int i = 0, n = values.size(); i < n; i++) { + Object value = values.get(i); + if (value instanceof String) { + this.builder.append(MysqlUtil.escapeString((String) value)); + } else { + this.builder.append(value); + } + if (i != n - 1) { + this.builder.append(", "); + } + } + this.builder.append(")"); + } + + public String build() { + return this.builder.toString(); + } + + @Override + public String toString() { + return this.builder.toString(); + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/serializer/ElementSourceDeserializer.java b/src/main/java/com/baidu/hugegraph/loader/serializer/ElementSourceDeserializer.java index 346e035fc..01de3f80e 100644 --- a/src/main/java/com/baidu/hugegraph/loader/serializer/ElementSourceDeserializer.java +++ b/src/main/java/com/baidu/hugegraph/loader/serializer/ElementSourceDeserializer.java @@ -19,8 +19,11 @@ package com.baidu.hugegraph.loader.serializer; -import com.baidu.hugegraph.loader.source.file.FileSource; import com.baidu.hugegraph.loader.source.InputSource; +import com.baidu.hugegraph.loader.source.SourceType; +import com.baidu.hugegraph.loader.source.file.FileSource; +import com.baidu.hugegraph.loader.source.hdfs.HDFSSource; +import com.baidu.hugegraph.loader.source.jdbc.JDBCSource; import com.baidu.hugegraph.loader.util.JsonUtil; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeType; @@ -34,15 +37,19 @@ public abstract class ElementSourceDeserializer protected InputSource readInputSource(JsonNode node) { JsonNode typeNode = getNode(node, FIELD_TYPE, JsonNodeType.STRING); String type = typeNode.asText().toUpperCase(); + SourceType sourceType = SourceType.valueOf(type); assert node instanceof ObjectNode; ObjectNode objectNode = (ObjectNode) node; // The node 'type' doesn't participate in deserialization objectNode.remove(FIELD_TYPE); - switch (type) { - case "FILE": + switch (sourceType) { + case FILE: return JsonUtil.convert(node, FileSource.class); + case HDFS: + return JsonUtil.convert(node, HDFSSource.class); + case JDBC: + return JsonUtil.convert(node, JDBCSource.class); default: - // TODO: Expand more input sources throw new AssertionError(String.format( "Unsupported input source '%s'", type)); } diff --git a/src/main/java/com/baidu/hugegraph/loader/serializer/InputSourceDeserializer.java b/src/main/java/com/baidu/hugegraph/loader/serializer/InputSourceDeserializer.java index a10197f12..bcfaf9d58 100644 --- a/src/main/java/com/baidu/hugegraph/loader/serializer/InputSourceDeserializer.java +++ b/src/main/java/com/baidu/hugegraph/loader/serializer/InputSourceDeserializer.java @@ -26,10 +26,10 @@ public abstract class InputSourceDeserializer extends JsonDeserializer { - private static final ObjectMapper mapper = new ObjectMapper(); + private static final ObjectMapper MAPPER = new ObjectMapper(); protected T read(JsonNode node, Class clazz) { - return this.mapper.convertValue(node, clazz); + return MAPPER.convertValue(node, clazz); } protected static JsonNode getNode(JsonNode node, String name, diff --git a/src/main/java/com/baidu/hugegraph/loader/source/SourceType.java b/src/main/java/com/baidu/hugegraph/loader/source/SourceType.java index 927bdf279..96206befa 100644 --- a/src/main/java/com/baidu/hugegraph/loader/source/SourceType.java +++ b/src/main/java/com/baidu/hugegraph/loader/source/SourceType.java @@ -21,5 +21,9 @@ public enum SourceType { - FILE; + FILE, + + HDFS, + + JDBC; } diff --git a/src/main/java/com/baidu/hugegraph/loader/source/file/Compression.java b/src/main/java/com/baidu/hugegraph/loader/source/file/Compression.java new file mode 100644 index 000000000..898bc5ec5 --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/source/file/Compression.java @@ -0,0 +1,57 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.source.file; + +public enum Compression { + + NONE("none"), + + GZIP("gz"), + + BZ2("bzip2"), + + XZ("xz"), + + LZMA("lzma"), + + PACK200("pack200"), + + SNAPPY_RAW("snappy-raw"), + + SNAPPY_FRAMED("snappy-framed"), + + Z("z"), + + DEFLATE("deflate"), + + LZ4_BLOCK("lz4-block"), + + LZ4_FRAMED("lz4-framed"); + + private String name; + + private Compression(String name) { + this.name = name; + } + + public String string() { + return this.name; + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java b/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java index 8519e2fc8..843929fe8 100644 --- a/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java +++ b/src/main/java/com/baidu/hugegraph/loader/source/file/FileSource.java @@ -45,12 +45,15 @@ public class FileSource implements InputSource { private String charset; @JsonProperty("date_format") private String dateFormat; + @JsonProperty("compression") + private Compression compression; @JsonProperty("comment_symbols") private Set commentSymbols; public FileSource() { this.charset = DEFAULT_CHARSET; this.dateFormat = DEFAULT_DATE_FORMAT; + this.compression = Compression.NONE; this.commentSymbols = new HashSet<>(); } @@ -87,8 +90,17 @@ public String dateFormat() { return this.dateFormat; } + public Compression compression() { + return this.compression; + } + public Set commentSymbols() { assert this.commentSymbols != null; return Collections.unmodifiableSet(this.commentSymbols); } + + @Override + public String toString() { + return String.format("%s with path %s", this.type(), this.path()); + } } diff --git a/src/main/java/com/baidu/hugegraph/loader/exception/ReadException.java b/src/main/java/com/baidu/hugegraph/loader/source/hdfs/HDFSSource.java similarity index 52% rename from src/main/java/com/baidu/hugegraph/loader/exception/ReadException.java rename to src/main/java/com/baidu/hugegraph/loader/source/hdfs/HDFSSource.java index 86d7d1d32..0c921a2e4 100644 --- a/src/main/java/com/baidu/hugegraph/loader/exception/ReadException.java +++ b/src/main/java/com/baidu/hugegraph/loader/source/hdfs/HDFSSource.java @@ -17,34 +17,28 @@ * under the License. */ -package com.baidu.hugegraph.loader.exception; +package com.baidu.hugegraph.loader.source.hdfs; -public class ReadException extends RuntimeException { +import com.baidu.hugegraph.loader.source.SourceType; +import com.baidu.hugegraph.loader.source.file.FileSource; +import com.fasterxml.jackson.annotation.JsonProperty; - private final String file; +public class HDFSSource extends FileSource { - public ReadException(String file, String message) { - super(message); - this.file = file; - } - - public ReadException(String file, String message, Throwable cause) { - super(message, cause); - this.file = file; - } + @JsonProperty("fs_default_fs") + private String fsDefaultFS; - public ReadException(String file, String message, Object... args) { - super(String.format(message, args)); - this.file = file; + @Override + public SourceType type() { + return SourceType.HDFS; } - public ReadException(String file, String message, Throwable cause, - Object... args) { - super(String.format(message, args), cause); - this.file = file; + public String fsDefaultFS() { + return this.fsDefaultFS; } - public String file() { - return this.file; + @Override + public String toString() { + return String.format("%s with path %s", this.type(), this.path()); } } diff --git a/src/main/java/com/baidu/hugegraph/loader/source/jdbc/JDBCSource.java b/src/main/java/com/baidu/hugegraph/loader/source/jdbc/JDBCSource.java new file mode 100644 index 000000000..3147b76ce --- /dev/null +++ b/src/main/java/com/baidu/hugegraph/loader/source/jdbc/JDBCSource.java @@ -0,0 +1,87 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.source.jdbc; + +import com.baidu.hugegraph.loader.source.InputSource; +import com.baidu.hugegraph.loader.source.SourceType; +import com.fasterxml.jackson.annotation.JsonProperty; + +public class JDBCSource implements InputSource { + + @JsonProperty("driver") + private String driver; + @JsonProperty("url") + private String url; + @JsonProperty("database") + private String database; + @JsonProperty("table") + private String table; + @JsonProperty("username") + private String username; + @JsonProperty("password") + private String password; + @JsonProperty("reconnect_max_times") + private int reconnectMaxTimes; + @JsonProperty("reconnect_interval") + private int reconnectInterval; + @JsonProperty("batch_size") + private int batchSize = 500; + + @Override + public SourceType type() { + return SourceType.JDBC; + } + + public String driver() { + return this.driver; + } + + public String url() { + return this.url; + } + + public String database() { + return this.database; + } + + public String table() { + return this.table; + } + + public String username() { + return this.username; + } + + public String password() { + return this.password; + } + + public int reconnectMaxTimes() { + return this.reconnectMaxTimes; + } + + public int reconnectInterval() { + return this.reconnectInterval; + } + + public int batchSize() { + return this.batchSize; + } +} diff --git a/src/main/java/com/baidu/hugegraph/loader/util/DataTypeUtil.java b/src/main/java/com/baidu/hugegraph/loader/util/DataTypeUtil.java index 09886b21f..c2674ecf7 100644 --- a/src/main/java/com/baidu/hugegraph/loader/util/DataTypeUtil.java +++ b/src/main/java/com/baidu/hugegraph/loader/util/DataTypeUtil.java @@ -19,12 +19,6 @@ package com.baidu.hugegraph.loader.util; -import static com.baidu.hugegraph.structure.constant.DataType.BYTE; -import static com.baidu.hugegraph.structure.constant.DataType.DOUBLE; -import static com.baidu.hugegraph.structure.constant.DataType.FLOAT; -import static com.baidu.hugegraph.structure.constant.DataType.INT; -import static com.baidu.hugegraph.structure.constant.DataType.LONG; - import java.text.ParseException; import java.util.Collection; import java.util.Date; @@ -32,6 +26,8 @@ import java.util.List; import java.util.UUID; +import org.apache.commons.lang3.tuple.Pair; + import com.baidu.hugegraph.loader.source.InputSource; import com.baidu.hugegraph.loader.source.file.FileSource; import com.baidu.hugegraph.structure.constant.Cardinality; @@ -56,7 +52,7 @@ public static Object convert(Object value, PropertyKey propertyKey, case SET: case LIST: // TODO: diff SET & LIST (Server should support first) - return parseMultiValues(value, dataType, source); + return parseMultiValues(value, dataType, source, null); default: throw new AssertionError(String.format( "Unsupported cardinality: '%s'", cardinality)); @@ -86,12 +82,13 @@ private static Object parseSingleValue(DataType dataType, Object value, /** * collection format: * "obj1,obj2,...,objn" or "[obj1,obj2,...,objn]" ..etc - * TODO: After parsing to json, the order of the collection changed in some cases (such as list) + * TODO: After parsing to json, the order of the collection changed + * in some cases (such as list) **/ - private static Object parseMultiValues(Object values, DataType dataType, - InputSource source, - char... symbols) { - // json file should not parse again + private static Object parseMultiValues(Object values, DataType dataType, + InputSource source, + Pair symbols) { + // JSON file should not parse again if (values instanceof Collection && checkCollectionDataType((Collection) values, dataType)) { return values; @@ -99,37 +96,24 @@ private static Object parseMultiValues(Object values, DataType dataType, E.checkState(values instanceof String, "The value must be String type"); String originValue = String.valueOf(values); - List valueList = new LinkedList<>(); - // use custom start&end format :like [obj1,obj2,...,objn] - if (symbols != null && symbols.length == 2 && originValue.charAt(0) == - symbols[0] && originValue.charAt(originValue.length()-1) == symbols[1]) { + List valueList = new LinkedList<>(); + // Use custom start&end format like [obj1,obj2,...,objn] + if (symbols != null && originValue.charAt(0) == symbols.getLeft() && + originValue.charAt(originValue.length() - 1) == symbols.getRight()) { originValue = originValue.substring(1, originValue.length() - 1); } // TODO: Separator should also be customizable Splitter.on(',').splitToList(originValue).forEach(value -> { - valueList.add((T) parseSingleValue(dataType, value, source)); + valueList.add(parseSingleValue(dataType, value, source)); }); if (checkCollectionDataType(valueList, dataType)) { - return valueList; + return valueList; } return null; } - private static boolean isNumber(DataType dataType) { - return dataType == BYTE || dataType == INT || dataType == LONG || - dataType == FLOAT || dataType == DOUBLE; - } - - private static boolean isDate(DataType dataType) { - return dataType == DataType.DATE; - } - - private static boolean isUUID(DataType dataType) { - return dataType == DataType.UUID; - } - private static Number valueToNumber(Object value, DataType dataType) { - E.checkState(isNumber(dataType), "The target data type must be number"); + E.checkState(dataType.isNumber(), "The target data type must be number"); if (dataType.clazz().isInstance(value)) { return (Number) value; @@ -169,7 +153,7 @@ private static Date valueToDate(Object value, DataType dataType, String df) { if (value instanceof Date) { return (Date) value; } - if (isDate(dataType)) { + if (dataType.isDate()) { if (value instanceof Number) { return new Date(((Number) value).longValue()); } else if (value instanceof String) { @@ -189,7 +173,7 @@ private static UUID valueToUUID(Object value, DataType dataType) { if (value instanceof UUID) { return (UUID) value; } - if (isUUID(dataType) && value instanceof String) { + if (dataType.isUUID() && value instanceof String) { return UUID.fromString((String) value); } return null; diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/DBUtil.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/DBUtil.java new file mode 100644 index 000000000..a9d7d7d7f --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/DBUtil.java @@ -0,0 +1,97 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; + +import com.baidu.hugegraph.loader.exception.LoadException; + +public class DBUtil { + + private final String driver; + private final String url; + private final String user; + private final String pass; + + private Connection conn; + + public DBUtil(String driver, String url, String user, String pass) { + this.driver = driver; + this.url = url; + this.user = user; + this.pass = pass; + } + + public void connect() { + try { + Class.forName(this.driver); + this.conn = DriverManager.getConnection(this.url, this.user, + this.pass); + } catch (ClassNotFoundException e) { + throw new LoadException("Invalid driver class '%s'", this.driver); + } catch (SQLException e) { + throw new LoadException("Failed to connect database via '%s'", + e, this.url); + } + } + + public void connect(String database) { + this.close(); + String url = String.format("%s/%s", this.url, database); + try { + Class.forName(this.driver); + this.conn = DriverManager.getConnection(url, this.user, this.pass); + } catch (ClassNotFoundException e) { + throw new LoadException("Invalid driver class '%s'", this.driver); + } catch (SQLException e) { + throw new LoadException("Failed to connect database via '%s'", + e, this.url); + } + } + + public void close() { + try { + if (this.conn != null && !this.conn.isClosed()) { + this.conn.close(); + } + } catch (SQLException e) { + throw new RuntimeException("Failed to close connection", e); + } + } + + /** + * TODO: insert(String table, String... rows) + */ + public void insert(String sql) { + this.execute(sql); + } + + public void execute(String sql) { + try (Statement stmt = this.conn.createStatement()) { + stmt.execute(sql); + } catch (SQLException e) { + throw new RuntimeException(String.format( + "Failed to execute sql '%s'", sql), e); + } + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java new file mode 100644 index 000000000..2c3e4a953 --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java @@ -0,0 +1,924 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang3.StringUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.baidu.hugegraph.loader.HugeGraphLoader; +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.exception.ParseException; +import com.baidu.hugegraph.loader.source.file.Compression; +import com.baidu.hugegraph.structure.constant.DataType; +import com.baidu.hugegraph.structure.graph.Edge; +import com.baidu.hugegraph.structure.graph.Vertex; +import com.baidu.hugegraph.structure.schema.PropertyKey; +import com.baidu.hugegraph.testutil.Assert; +import com.google.common.collect.ImmutableList; + +public class FileLoadTest extends LoadTest { + + private static final Charset GBK = Charset.forName("GBK"); + + private static IOUtil ioUtil; + + static { + String path = "/profile.properties"; + // Read properties defined in maven profile + try (InputStream is = FileLoadTest.class.getResourceAsStream(path)) { + Properties properties = new Properties(); + properties.load(is); + String sourceType = properties.getProperty("source_type"); + String storePath = properties.getProperty("store_path"); + if (sourceType.equals("file")) { + ioUtil = new FileUtil(storePath); + } else { + assert sourceType.equals("hdfs"); + ioUtil = new HDFSUtil(storePath); + } + } catch (IOException e) { + throw new RuntimeException( + "Failed to read properties defined in maven profile", e); + } + } + + @BeforeClass + public static void setUp() { + clearFileData(); + clearServerData(); + } + + @AfterClass + public static void tearDown() { + ioUtil.close(); + } + + @Before + public void init() { + } + + @After + public void clear() { + clearFileData(); + clearServerData(); + } + + private static void clearFileData() { + ioUtil.delete(); + } + + /** + * NOTE: Unsupport auto create schema + */ + //@Test + public void testAutoCreateSchema() { + String[] args = new String[]{ + "-f", "example/struct.json", + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2" + }; + HugeGraphLoader.main(args); + + List propertyKeys = CLIENT.schema().getPropertyKeys(); + propertyKeys.forEach(pkey -> { + Assert.assertEquals(DataType.TEXT, pkey.dataType()); + }); + + List vertices = CLIENT.graph().listVertices(); + List edges = CLIENT.graph().listEdges(); + + Assert.assertEquals(7, vertices.size()); + Assert.assertEquals(6, edges.size()); + + boolean interestedVertex = false; + for (Vertex vertex : vertices) { + Assert.assertEquals(String.class, vertex.id().getClass()); + if (((String) vertex.id()).contains("li,nary")) { + interestedVertex = true; + Assert.assertEquals("26", vertex.property("age")); + Assert.assertEquals("Wu,han", vertex.property("city")); + } + } + Assert.assertTrue(interestedVertex); + + boolean interestedEdge = false; + for (Edge edge : edges) { + Assert.assertEquals(String.class, edge.sourceId().getClass()); + Assert.assertEquals(String.class, edge.targetId().getClass()); + if (((String) edge.sourceId()).contains("marko") && + ((String) edge.targetId()).contains("vadas")) { + interestedEdge = true; + Assert.assertEquals("20160110", edge.property("date")); + Assert.assertEquals("0.5", edge.property("weight")); + } + } + Assert.assertTrue(interestedEdge); + } + + @Test + public void testCustomizedSchema() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing", + "vadas,27,Hongkong", + "josh,32,Beijing", + "peter,35,Shanghai", + "\"li,nary\",26,\"Wu,han\""); + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + "lop,java,328", + "ripple,java,199"); + ioUtil.write("edge_knows.csv", + "source_name,target_name,date,weight", + "marko,vadas,20160110,0.5", + "marko,josh,20130220,1.0"); + ioUtil.write("edge_created.csv", + "source_name,target_name,date,weight", + "marko,lop,20171210,0.4", + "josh,lop,20091111,0.4", + "josh,ripple,20171210,1.0", + "peter,lop,20170324,0.2"); + + String[] args = new String[]{ + "-f", configPath("customized_schema/struct.json"), + "-s", configPath("customized_schema/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + List edges = CLIENT.graph().listEdges(); + + Assert.assertEquals(7, vertices.size()); + Assert.assertEquals(6, edges.size()); + + boolean interestedVertex = false; + for (Vertex vertex : vertices) { + Assert.assertEquals(String.class, vertex.id().getClass()); + if (((String) vertex.id()).contains("li,nary")) { + interestedVertex = true; + Assert.assertEquals(26, vertex.property("age")); + Assert.assertEquals("Wu,han", vertex.property("city")); + } + } + Assert.assertTrue(interestedVertex); + + boolean interestedEdge = false; + for (Edge edge : edges) { + Assert.assertEquals(String.class, edge.sourceId().getClass()); + Assert.assertEquals(String.class, edge.targetId().getClass()); + if (((String) edge.sourceId()).contains("marko") && + ((String) edge.targetId()).contains("vadas")) { + interestedEdge = true; + Assert.assertEquals("20160110", edge.property("date")); + Assert.assertEquals(0.5, edge.property("weight")); + } + } + Assert.assertTrue(interestedEdge); + } + + @Test + public void testVertexIdExceedLimit() { + Integer[] array = new Integer[129]; + Arrays.fill(array, 1); + String tooLongId = StringUtils.join(array); + String line = StringUtils.join(tooLongId, 29, "Beijing"); + ioUtil.write("vertex_person.csv", + "name,age,city", + line); + + String[] args = new String[]{ + "-f", configPath("vertex_id_exceed_limit/struct.json"), + "-s", configPath("vertex_id_exceed_limit/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testVertexIdExceedLimitInBytes() { + String pk = "ecommerce__color__极光银翻盖上盖+" + + "琥珀啡翻盖下盖+咖啡金翻盖上盖装饰片+" + + "香槟金主镜片+深咖啡色副镜片+琥珀>" + + "啡前壳+极光银后壳+浅灰电池扣+极光银电池组件+深灰天线"; + Assert.assertTrue(pk.length() < 128); + String line = StringUtils.join(pk, "中文", 328); + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + line); + + String[] args = new String[]{ + "-f", configPath("vertex_id_exceed_limit_in_bytes/struct.json"), + "-s", configPath("vertex_id_exceed_limit_in_bytes/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + // Bytes encoded in utf-8 exceed 128 + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testTooManyColumns() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing,Extra"); + + String[] args = new String[]{ + "-f", configPath("too_many_columns/struct.json"), + "-s", configPath("too_many_columns/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testTooFewColumns() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29"); + + String[] args = new String[]{ + "-f", configPath("too_few_columns/struct.json"), + "-s", configPath("too_few_columns/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testUnmatchedPropertyDataType() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,Should be number,Beijing"); + + String[] args = new String[]{ + "-f", configPath("unmatched_property_datatype/struct.json"), + "-s", configPath("unmatched_property_datatype/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testVertexPkContainsSpecicalSymbol() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "mar:ko!,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("vertex_pk_contains_special_symbol/struct.json"), + "-s", configPath("vertex_pk_contains_special_symbol/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(1, vertices.size()); + Vertex vertex = vertices.get(0); + Assert.assertEquals(String.class, vertex.id().getClass()); + Assert.assertTrue(((String) vertex.id()).contains(":mar`:ko`!")); + Assert.assertEquals(29, vertex.property("age")); + Assert.assertEquals("Beijing", vertex.property("city")); + } + + @Test + public void testUnmatchedEncodingCharset() { + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + "lop,中文,328"); + + String[] args = new String[]{ + "-f", configPath("unmatched_encoding_charset/struct.json"), + "-s", configPath("unmatched_encoding_charset/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(1, vertices.size()); + Vertex vertex = vertices.get(0); + Assert.assertEquals("lop", vertex.property("name")); + Assert.assertNotEquals("中文", vertex.property("lang")); + Assert.assertEquals(328.0, vertex.property("price")); + } + + @Test + public void testMatchedEncodingCharset() { + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + "lop,中文,328"); + + String[] args = new String[]{ + "-f", configPath("matched_encoding_charset/struct.json"), + "-s", configPath("matched_encoding_charset/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(1, vertices.size()); + Vertex vertex = vertices.get(0); + Assert.assertEquals("lop", vertex.property("name")); + Assert.assertEquals("中文", vertex.property("lang")); + Assert.assertEquals(328.0, vertex.property("price")); + } + + /** + * TODO: the order of collection's maybe change + * (such as time:["2019-05-02 13:12:44","2008-05-02 13:12:44"]) + */ + @Test + public void testValueListPropertyInJsonFile() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing"); + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + "lop,中文,328"); + ioUtil.write("edge_use.json", + "{\"person_name\": \"marko\", \"software_name\": " + + "\"lop\", \"feel\": [\"so so\", \"good\", \"good\"]}"); + + String[] args = new String[]{ + "-f", configPath("value_list_property_in_json_file/struct.json"), + "-s", configPath("value_list_property_in_json_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List edges = CLIENT.graph().listEdges(); + Assert.assertEquals(1, edges.size()); + Edge edge = edges.get(0); + + Assert.assertEquals("person", edge.sourceLabel()); + Assert.assertEquals("software", edge.targetLabel()); + Assert.assertEquals(ImmutableList.of("so so", "good", "good"), + edge.property("feel")); + } + + // TODO : List is not supported now + @Test + public void testValueListPropertyInTextFile() { + ioUtil.write("vertex_person.txt", "jin\t29\tBeijing"); + ioUtil.write("vertex_software.txt", GBK, "tom\tChinese\t328"); + + // TODO: when meets '[]',only support string now + // line = "[4,6]\t[2019-05-02,2008-05-02]"; + ioUtil.write("edge_use.txt", "4,1,5,6\t2019-05-02,2008-05-02"); + + String[] args = new String[]{ + "-f", configPath("value_list_property_in_text_file/struct.json"), + "-s", configPath("value_list_property_in_text_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List edges = CLIENT.graph().listEdges(); + Assert.assertEquals(1, edges.size()); + Edge edge = edges.get(0); + + Assert.assertEquals("person", edge.sourceLabel()); + Assert.assertEquals("software", edge.targetLabel()); + Assert.assertEquals(ImmutableList.of("2019-05-02", "2008-05-02"), + edge.property("time")); + } + + @Test + public void testValueSetPorpertyInJsonFile() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing"); + ioUtil.write("vertex_software.csv", GBK, + "name,lang,price", + "lop,中文,328"); + ioUtil.write("edge_use.json", + "{\"person_name\": \"marko\", \"software_name\": " + + "\"lop\", \"time\": [\"20171210\", \"20180101\"]}"); + + String[] args = new String[]{ + "-f", configPath("value_set_property_in_json_file/struct.json"), + "-s", configPath("value_set_property_in_json_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List edges = CLIENT.graph().listEdges(); + Assert.assertEquals(1, edges.size()); + Edge edge = edges.get(0); + + Assert.assertEquals("person", edge.sourceLabel()); + Assert.assertEquals("software", edge.targetLabel()); + /* + * NOTE: Although the cardinality of the property is set in schema + * declaration, client will deserialize it to list type in default. + */ + Assert.assertEquals(ImmutableList.of("20171210", "20180101"), + edge.property("time")); + } + + @Test + public void testCustomizedNumberId() { + ioUtil.write("vertex_person_number_id.csv", + "1,marko,29,Beijing", + "2,vadas,27,Hongkong"); + ioUtil.write("edge_knows.csv", "1,2,20160110,0.5"); + + String[] args = new String[]{ + "-f", configPath("customized_number_id/struct.json"), + "-s", configPath("customized_number_id/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(2, vertices.size()); + + List edges = CLIENT.graph().listEdges(); + Assert.assertEquals(1, edges.size()); + } + + @Test + public void testVertexJointPrimaryKeys() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("vertex_joint_pks/struct.json"), + "-s", configPath("vertex_joint_pks/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + + Assert.assertEquals(1, vertices.size()); + Vertex vertex = vertices.get(0); + + Assert.assertTrue(vertex.id().toString().contains("marko!Beijing")); + Assert.assertEquals("person", vertex.label()); + Assert.assertEquals("marko", vertex.property("name")); + Assert.assertEquals(29, vertex.property("age")); + Assert.assertEquals("Beijing", vertex.property("city")); + } + + @Test + public void testIgnoreLastRedundantEmptyColumn() { + // Has a redundant seperator at the end of line + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,29,Beijing,"); + + String[] args = new String[]{ + "-f", configPath("ignore_last_redudant_empty_column/struct.json"), + "-s", configPath("ignore_last_redudant_empty_column/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + + Assert.assertEquals(1, vertices.size()); + Vertex vertex = vertices.get(0); + Assert.assertEquals(3, vertex.properties().size()); + } + + @Test + public void testIgnoreNullValueColumns() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "marko,NULL,null", + "vadas,NULL,", + "josh,,null"); + + String[] args = new String[]{ + "-f", configPath("ignore_null_value_columns/struct.json"), + "-s", configPath("ignore_null_value_columns/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(3, vertices.size()); + + for (Vertex vertex : vertices) { + Assert.assertNull(vertex.property("age")); + Assert.assertNull(vertex.property("city")); + } + } + + @Test + public void testFileOnlyHasAnEmptyLine() { + ioUtil.write("vertex_person_empty.csv", ""); + + String[] args = new String[]{ + "-f", configPath("file_only_has_empty_line/struct.json"), + "-s", configPath("file_only_has_empty_line/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + Assert.assertThrows(LoadException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testMultiFilesHaveHeader() { + ioUtil.write("vertex_dir/vertex_person_1.csv", + "name,age,city", + "marko,29,Beijing"); + ioUtil.write("vertex_dir/vertex_person_2.csv", + "name,age,city", + "vadas,27,Hongkong"); + + String[] args = new String[]{ + "-f", configPath("multi_files_have_header/struct.json"), + "-s", configPath("multi_files_have_header/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(2, vertices.size()); + } + + @Test + public void testFileHasCommentLine() { + ioUtil.write("vertex_person.csv", + "name,age,city", + "# This is a comment", + "marko,29,Beijing", + "// This is also a comment", + "# This is still a comment", + "vadas,27,Hongkong"); + + String[] args = new String[]{ + "-f", configPath("file_has_comment_line/struct.json"), + "-s", configPath("file_has_comment_line/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(2, vertices.size()); + } + + @Test + public void testDirHasNoFile() { + ioUtil.mkdirs("vertex_dir"); + String[] args = new String[]{ + "-f", configPath("dir_has_no_file/struct.json"), + "-s", configPath("dir_has_no_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(0, vertices.size()); + } + + @Test + public void testDirHasMultiFiles() { + ioUtil.write("vertex_dir/vertex_person1.csv", + "marko,29,Beijing", + "vadas,27,Hongkong", + "josh,32,Beijing"); + ioUtil.write("vertex_dir/vertex_person2.csv", + "peter,35,Shanghai", + "\"li,nary\",26,\"Wu,han\""); + ioUtil.write("vertex_dir/vertex_person3.csv"); + + String[] args = new String[]{ + "-f", configPath("dir_has_multi_files/struct.json"), + "-s", configPath("dir_has_multi_files/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(5, vertices.size()); + } + + @Test + public void testMatchedDatePropertyAndFormat() { + ioUtil.write("vertex_person_birth_date.csv", + "marko,1992-10-01,Beijing", + "vadas,2000-01-01,Hongkong"); + + // DateFormat is yyyy-MM-dd + String[] args = new String[]{ + "-f", configPath("matched_date_property_format/struct.json"), + "-s", configPath("matched_date_property_format/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + Assert.assertEquals(2, vertices.size()); + } + + @Test + public void testUnMatchedDatePropertyAndFormat() { + ioUtil.write("vertex_person_birth_date.csv", + "marko,1992/10/01,Beijing", + "vadas,2000/01/01,Hongkong"); + + // DateFormat is yyyy-MM-dd + String[] args = new String[]{ + "-f", configPath("unmatched_date_property_format/struct.json"), + "-s", configPath("unmatched_date_property_format/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--test-mode", "true" + }; + Assert.assertThrows(ParseException.class, () -> { + HugeGraphLoader.main(args); + }); + } + + @Test + public void testGZipCompressFile() { + ioUtil.write("vertex_person.gz", Compression.GZIP, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("gzip_compress_file/struct.json"), + "-s", configPath("gzip_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testBZ2CompressFile() { + ioUtil.write("vertex_person.bz2", Compression.BZ2, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("bz2_compress_file/struct.json"), + "-s", configPath("bz2_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testXZCompressFile() { + ioUtil.write("vertex_person.xz", Compression.XZ, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("xz_compress_file/struct.json"), + "-s", configPath("xz_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testLZMACompressFile() { + ioUtil.write("vertex_person.lzma", Compression.LZMA, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("lzma_compress_file/struct.json"), + "-s", configPath("lzma_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testPack200CompressFile() { + ioUtil.write("vertex_person.pack", Compression.PACK200, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("pack200_compress_file/struct.json"), + "-s", configPath("pack200_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + /** + * Didn't find a way to generate the compression file using code + */ + //@Test + public void testSnappyRawCompressFile() { + ioUtil.write("vertex_person.snappy", Compression.SNAPPY_RAW, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("snappy_raw_compress_file/struct.json"), + "-s", configPath("snappy_raw_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testSnappyFramedCompressFile() { + ioUtil.write("vertex_person.snappy", Compression.SNAPPY_FRAMED, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("snappy_framed_compress_file/struct.json"), + "-s", configPath("snappy_framed_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + /** + * Didn't find a way to generate the compression file using code + */ + //@Test + public void testZCompressFile() { + ioUtil.write("vertex_person.z", Compression.Z, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("z_compress_file/struct.json"), + "-s", configPath("z_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testDeflateCompressFile() { + ioUtil.write("vertex_person.deflate", Compression.DEFLATE, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("deflate_compress_file/struct.json"), + "-s", configPath("deflate_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testLZ4BlockCompressFile() { + ioUtil.write("vertex_person.lz4", Compression.LZ4_BLOCK, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("lz4_block_compress_file/struct.json"), + "-s", configPath("lz4_block_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } + + @Test + public void testLZ4FramedCompressFile() { + ioUtil.write("vertex_person.lz4", Compression.LZ4_FRAMED, + "name,age,city", + "marko,29,Beijing"); + + String[] args = new String[]{ + "-f", configPath("lz4_framed_compress_file/struct.json"), + "-s", configPath("lz4_framed_compress_file/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileUtil.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileUtil.java index 7e3676c43..233ea7757 100644 --- a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileUtil.java +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileUtil.java @@ -20,77 +20,85 @@ package com.baidu.hugegraph.loader.test.functional; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.util.Arrays; +import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; -public class FileUtil { +import com.baidu.hugegraph.loader.source.file.Compression; - private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; +public class FileUtil implements IOUtil { - public static String newCSVLine(Object... parts) { - return StringUtils.join(parts, ","); - } + private final String storePath; - public static void clear(String fileName) { - File file = org.apache.commons.io.FileUtils.getFile(fileName); - checkFileValid(file, true); - try { - FileUtils.write(file, "", DEFAULT_CHARSET); - } catch (IOException e) { - throw new RuntimeException(String.format( - "Failed to clear file '%s'", fileName), e); - } + public FileUtil(String storePath) { + this.storePath = storePath; } - public static void append(String fileName, String... lines) { - append(fileName, DEFAULT_CHARSET, lines); + @Override + public void mkdirs(String dir) { + String path = Paths.get(this.storePath, dir).toString(); + FileUtils.getFile(path).mkdirs(); } - public static void append(String fileName, Charset charset, - String... lines) { - File file = org.apache.commons.io.FileUtils.getFile(fileName); - checkFileValid(file, true); - try { - FileUtils.writeLines(file, charset.name(), - Arrays.asList(lines), true); - } catch (IOException e) { - throw new RuntimeException(String.format( - "Failed to append lines '%s' to file '%s'", - lines, fileName), e); + @Override + public void write(String fileName, Charset charset, + Compression compression, String... lines) { + String path = Paths.get(this.storePath, fileName).toString(); + File file = org.apache.commons.io.FileUtils.getFile(path); + this.checkFile(file); + + if (compression == Compression.NONE) { + try { + FileUtils.writeLines(file, charset.name(), + Arrays.asList(lines), true); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Failed to write lines '%s' to file '%s'", + Arrays.asList(lines), path), e); + } + } else { + try (FileOutputStream fos = new FileOutputStream(file)) { + IOUtil.compress(fos, charset, compression, lines); + } catch (IOException | CompressorException e) { + throw new RuntimeException(String.format( + "Failed to write lines '%s' to file '%s' in '%s' " + + "compression format", + Arrays.asList(lines), path, compression), e); + } } } - public static void delete(String fileName) { + @Override + public void delete() { try { - FileUtils.forceDelete(FileUtils.getFile(fileName)); + FileUtils.forceDelete(FileUtils.getFile(this.storePath)); + } catch (FileNotFoundException ignored) { + // pass } catch (IOException e) { throw new RuntimeException(String.format( - "Failed to delete file '%s'", fileName), e); + "Failed to delete file '%s'", this.storePath), e); } } - public static void mkdirs(String directory) { - FileUtils.getFile(directory).mkdirs(); + @Override + public void close() { + // pass } - private static void checkFileValid(File file, boolean autoCreate) { + private void checkFile(File file) { if (!file.exists()) { - if (autoCreate) { - try { - file.getParentFile().mkdirs(); - file.createNewFile(); - } catch (IOException e) { - throw new RuntimeException(String.format( - "Failed to create file '%s'", file.getName()), e); - } - } else { + file.getParentFile().mkdirs(); + try { + file.createNewFile(); + } catch (IOException e) { throw new RuntimeException(String.format( - "Please ensure the file '%s' exist", file.getName())); + "Failed to create new file '%s'", file), e); } } else { if (!file.isFile() || !file.canWrite()) { diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/HDFSUtil.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/HDFSUtil.java new file mode 100644 index 000000000..92b949a9d --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/HDFSUtil.java @@ -0,0 +1,146 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.io.IOException; +import java.net.URI; +import java.nio.charset.Charset; +import java.nio.file.Paths; +import java.util.Arrays; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; + +import com.baidu.hugegraph.loader.exception.LoadException; +import com.baidu.hugegraph.loader.source.file.Compression; +import com.baidu.hugegraph.util.Log; + +public class HDFSUtil implements IOUtil { + + private static final Logger LOG = Log.logger(HDFSUtil.class); + + private final String storePath; + private final FileSystem hdfs; + + public HDFSUtil(String storePath) { + this.storePath = storePath; + Configuration config = loadConfiguration(); + // HDFS doesn't support write by default + config.setBoolean("dfs.support.write", true); + config.setBoolean("fs.hdfs.impl.disable.cache", true); + try { + this.hdfs = FileSystem.get(URI.create(storePath), config); + } catch (IOException e) { + throw new LoadException("Failed to create hdfs file system", e); + } + } + + private static Configuration loadConfiguration() { + // Just use local hadoop with default config in test + return new Configuration(); + } + + private static Path path(String configPath, String configFile) { + return new Path(Paths.get(configPath, configFile).toString()); + } + + @Override + public void mkdirs(String dir) { + Path path = new Path(this.storePath, dir); + try { + this.hdfs.mkdirs(path); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Failed to create directory '%s'", path), e); + } + } + + @Override + public void write(String fileName, Charset charset, + Compression compression, String... lines) { + Path path = new Path(this.storePath, fileName); + checkPath(path); + + if (compression == Compression.NONE) { + try (FSDataOutputStream fos = this.hdfs.append(path)) { + for (String line : lines) { + fos.write(line.getBytes(charset)); + fos.write("\n".getBytes(charset)); + } + fos.flush(); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Failed to write lines '%s' to path '%s'", + Arrays.asList(lines), path), e); + } + } else { + try (FSDataOutputStream fos = this.hdfs.append(path)) { + IOUtil.compress(fos, charset, compression, lines); + } catch (IOException | CompressorException e) { + throw new RuntimeException(String.format( + "Failed to write lines '%s' to file '%s' in '%s' " + + "compression format", + Arrays.asList(lines), path, compression), e); + } + } + } + + @Override + public void delete() { + Path path = new Path(this.storePath); + try { + this.hdfs.delete(path, true); + } catch (IOException e) { + throw new RuntimeException(String.format( + "Failed to delete file '%s'", path), e); + } + } + + @Override + public void close() { + try { + this.hdfs.close(); + } catch (IOException e) { + LOG.warn("Failed to close hdfs", e); + } + } + + private void checkPath(Path path) { + try { + if (!this.hdfs.exists(path)) { + this.hdfs.mkdirs(path.getParent()); + this.hdfs.createNewFile(path); + } else { + if (!this.hdfs.isFile(path)) { + throw new RuntimeException(String.format( + "Please ensure the path '%s' is file", + path.getName())); + } + } + } catch (IOException e) { + throw new RuntimeException(String.format( + "Failed to check hdfs path '%s'", path), e); + } + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/IOUtil.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/IOUtil.java new file mode 100644 index 000000000..98f6ecca1 --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/IOUtil.java @@ -0,0 +1,76 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; + +import com.baidu.hugegraph.loader.source.file.Compression; + +public interface IOUtil { + + public Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + + public CompressorStreamFactory FACTORY = new CompressorStreamFactory(); + + public void mkdirs(String path); + + public default void write(String fileName, String... lines) { + this.write(fileName, DEFAULT_CHARSET, Compression.NONE, lines); + } + + public default void write(String fileName, Charset charset, + String... lines) { + this.write(fileName, charset, Compression.NONE, lines); + } + + public default void write(String fileName, Compression compression, + String... lines) { + this.write(fileName, DEFAULT_CHARSET, compression, lines); + } + + public void write(String fileName, Charset charset, + Compression compression, String... lines); + + public void delete(); + + public void close(); + + public static void compress(OutputStream stream, Charset charset, + Compression compression, String... lines) + throws IOException, CompressorException { + BufferedOutputStream bos = new BufferedOutputStream(stream); + CompressorOutputStream cos = FACTORY.createCompressorOutputStream( + compression.string(), bos); + for (String line : lines) { + cos.write(line.getBytes(charset)); + cos.write("\n".getBytes(charset)); + } + cos.flush(); + cos.close(); + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/JDBCLoadTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/JDBCLoadTest.java new file mode 100644 index 000000000..5c55b5f3c --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/JDBCLoadTest.java @@ -0,0 +1,164 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.util.List; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.baidu.hugegraph.loader.HugeGraphLoader; +import com.baidu.hugegraph.structure.graph.Edge; +import com.baidu.hugegraph.structure.graph.Vertex; +import com.baidu.hugegraph.testutil.Assert; + +/** + * TODO: add more test cases + */ +public class JDBCLoadTest extends LoadTest { + + // JDBC driver name and database URL + private static final String DRIVER = "com.mysql.cj.jdbc.Driver"; + private static final String DATABASE = "load_test"; + private static final String DB_URL = "jdbc:mysql://localhost"; + // Database credentials + private static final String USER = "root"; + private static final String PASS = ""; + + private static DBUtil dbUtil = new DBUtil(DRIVER, DB_URL, USER, PASS); + + @BeforeClass + public static void setUp() { + clearServerData(); + + dbUtil.connect(); + // create database + dbUtil.execute(String.format("CREATE DATABASE IF NOT EXISTS `%s`;", + DATABASE)); + // create tables + dbUtil.connect(DATABASE); + // vertex person + dbUtil.execute("CREATE TABLE IF NOT EXISTS `person` (\n" + + " `id` int(10) unsigned NOT NULL,\n" + + " `name` varchar(20) NOT NULL,\n" + + " `age` int(3) DEFAULT NULL,\n" + + " `city` varchar(10) DEFAULT NULL,\n" + + " PRIMARY KEY (`id`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8;"); + // vertex software + dbUtil.execute("CREATE TABLE IF NOT EXISTS `software` (\n" + + " `id` int(10) unsigned NOT NULL,\n" + + " `name` varchar(20) NOT NULL,\n" + + " `lang` varchar(10) NOT NULL,\n" + + " `price` double(10,2) NOT NULL,\n" + + " PRIMARY KEY (`id`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8;"); + // edge knows + dbUtil.execute("CREATE TABLE IF NOT EXISTS `knows` (\n" + + " `id` int(10) unsigned NOT NULL,\n" + + " `source_id` int(10) unsigned NOT NULL,\n" + + " `target_id` int(10) unsigned NOT NULL,\n" + + " `date` varchar(10) NOT NULL,\n" + + " `weight` double(10,2) NOT NULL,\n" + + " PRIMARY KEY (`id`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8;"); + // edge created + dbUtil.execute("CREATE TABLE IF NOT EXISTS `created` (\n" + + " `id` int(10) unsigned NOT NULL,\n" + + " `source_id` int(10) unsigned NOT NULL,\n" + + " `target_id` int(10) unsigned NOT NULL,\n" + + " `date` varchar(10) NOT NULL,\n" + + " `weight` double(10,2) NOT NULL,\n" + + " PRIMARY KEY (`id`)\n" + + ") ENGINE=InnoDB DEFAULT CHARSET=utf8;"); + } + + @AfterClass + public static void tearDown() { + // drop tables + dbUtil.execute("DROP TABLE IF EXISTS `person`"); + dbUtil.execute("DROP TABLE IF EXISTS `software`"); + dbUtil.execute("DROP TABLE IF EXISTS `knows`"); + dbUtil.execute("DROP TABLE IF EXISTS `created`"); + // drop database + dbUtil.execute(String.format("DROP DATABASE `%s`", DATABASE)); + + dbUtil.close(); + } + + @Before + public void init() { + } + + @After + public void clear() { + clearServerData(); + } + + @Test + public void testCustomizedSchema() { + dbUtil.insert("INSERT INTO `person` VALUES " + + "(1,'marko',29,'Beijing')," + + "(2,'vadas',27,'HongKong')," + + "(3,'josh',32,'Beijing')," + + "(4,'peter',35,'Shanghai')," + + "(5,'li,nary',26,'Wu,han')," + + "(6,'tom',NULL,NULL);"); + dbUtil.insert("INSERT INTO `software` VALUES " + + "(100,'lop','java',328.00)," + + "(200,'ripple','java',199.00);"); + + dbUtil.insert("INSERT INTO `knows` VALUES " + + "(1,1,2,'2016-01-10',0.50)," + + "(2,1,3,'2013-02-20',1.00);"); + dbUtil.insert("INSERT INTO `created` VALUES " + + "(1,1,100,'2017-12-10',0.40)," + + "(2,3,100,'2009-11-11',0.40)," + + "(3,3,200,'2017-12-10',1.00)," + + "(4,4,100,'2017-03-24',0.20);"); + + String[] args = new String[]{ + "-f", configPath("jdbc_customized_schema/struct.json"), + "-s", configPath("jdbc_customized_schema/schema.groovy"), + "-g", GRAPH, + "-h", SERVER, + "--num-threads", "2", + "--test-mode", "true" + }; + HugeGraphLoader.main(args); + + List vertices = CLIENT.graph().listVertices(); + List edges = CLIENT.graph().listEdges(); + + Assert.assertEquals(8, vertices.size()); + Assert.assertEquals(6, edges.size()); + + for (Vertex vertex : vertices) { + Assert.assertEquals(Integer.class, vertex.id().getClass()); + } + for (Edge edge : edges) { + Assert.assertEquals(Integer.class, edge.sourceId().getClass()); + Assert.assertEquals(Integer.class, edge.targetId().getClass()); + } + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/LoadTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/LoadTest.java new file mode 100644 index 000000000..2d230261a --- /dev/null +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/LoadTest.java @@ -0,0 +1,75 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.loader.test.functional; + +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import com.baidu.hugegraph.driver.GraphManager; +import com.baidu.hugegraph.driver.HugeClient; +import com.baidu.hugegraph.driver.SchemaManager; +import com.baidu.hugegraph.driver.TaskManager; + +public class LoadTest { + + protected static final String CONFIG_PATH_PREFIX = "target/test-classes"; + protected static final String GRAPH = "hugegraph"; + protected static final String SERVER = "127.0.0.1"; + protected static final String PORT = "8080"; + protected static final String URL = String.format("http://%s:%s", + SERVER, PORT); + protected static final HugeClient CLIENT = new HugeClient(URL, GRAPH); + + public static String configPath(String fileName) { + return Paths.get(CONFIG_PATH_PREFIX, fileName).toString(); + } + + public static void clearServerData() { + SchemaManager schema = CLIENT.schema(); + GraphManager graph = CLIENT.graph(); + TaskManager task = CLIENT.task(); + // Clear edge + graph.listEdges().forEach(e -> graph.removeEdge(e.id())); + // Clear vertex + graph.listVertices().forEach(v -> graph.removeVertex(v.id())); + + // Clear schema + List taskIds = new ArrayList<>(); + schema.getIndexLabels().forEach(il -> { + taskIds.add(schema.removeIndexLabelAsync(il.name())); + }); + taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); + taskIds.clear(); + schema.getEdgeLabels().forEach(el -> { + taskIds.add(schema.removeEdgeLabelAsync(el.name())); + }); + taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); + taskIds.clear(); + schema.getVertexLabels().forEach(vl -> { + taskIds.add(schema.removeVertexLabelAsync(vl.name())); + }); + taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); + taskIds.clear(); + schema.getPropertyKeys().forEach(pk -> { + schema.removePropertyKey(pk.name()); + }); + } +} diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/LoaderTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/LoaderTest.java deleted file mode 100644 index 5c4cc2090..000000000 --- a/src/test/java/com/baidu/hugegraph/loader/test/functional/LoaderTest.java +++ /dev/null @@ -1,764 +0,0 @@ -/* - * Copyright 2017 HugeGraph Authors - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -package com.baidu.hugegraph.loader.test.functional; - -import java.nio.charset.Charset; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.lang3.StringUtils; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import com.baidu.hugegraph.driver.GraphManager; -import com.baidu.hugegraph.driver.HugeClient; -import com.baidu.hugegraph.driver.SchemaManager; -import com.baidu.hugegraph.driver.TaskManager; -import com.baidu.hugegraph.loader.HugeGraphLoader; -import com.baidu.hugegraph.loader.exception.ParseException; -import com.baidu.hugegraph.structure.constant.DataType; -import com.baidu.hugegraph.structure.graph.Edge; -import com.baidu.hugegraph.structure.graph.Vertex; -import com.baidu.hugegraph.structure.schema.PropertyKey; -import com.baidu.hugegraph.testutil.Assert; -import com.google.common.collect.ImmutableList; - -public class LoaderTest { - - private static final Charset GBK = Charset.forName("GBK"); - private static final String PATH_PREFIX = "src/test/resources"; - private static final String GRAPH = "hugegraph"; - private static final String SERVER = "127.0.0.1"; - private static final String PORT = "8080"; - private static final String URL = String.format("http://%s:%s", - SERVER, PORT); - private static final HugeClient CLIENT = new HugeClient(URL, GRAPH); - - @BeforeClass - public static void setUp() { - clearFileData(); - clearServerData(); - } - - @Before - public void init() { - FileUtil.append(path("vertex_person.csv"), "name,age,city"); - FileUtil.append(path("vertex_software.csv"), GBK, "name,lang,price"); - FileUtil.append(path("edge_knows.csv"), - "source_name,target_name,date,weight"); - FileUtil.append(path("edge_created.csv"), - "source_name,target_name,date,weight"); - } - - @After - public void clear() { - clearFileData(); - clearServerData(); - } - - @AfterClass - public static void tearDown() { - FileUtil.delete(path("vertex_person.csv")); - FileUtil.delete(path("vertex_software.csv")); - FileUtil.delete(path("edge_knows.csv")); - FileUtil.delete(path("edge_created.csv")); - } - - private static void clearFileData() { - FileUtil.clear(path("vertex_person.csv")); - FileUtil.clear(path("vertex_software.csv")); - FileUtil.clear(path("edge_knows.csv")); - FileUtil.clear(path("edge_created.csv")); - } - - private static void clearServerData() { - SchemaManager schema = CLIENT.schema(); - GraphManager graph = CLIENT.graph(); - TaskManager task = CLIENT.task(); - // Clear edge - graph.listEdges().forEach(e -> graph.removeEdge(e.id())); - // Clear vertex - graph.listVertices().forEach(v -> graph.removeVertex(v.id())); - - // Clear schema - List taskIds = new ArrayList<>(); - schema.getIndexLabels().forEach(il -> { - taskIds.add(schema.removeIndexLabelAsync(il.name())); - }); - taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); - taskIds.clear(); - schema.getEdgeLabels().forEach(el -> { - taskIds.add(schema.removeEdgeLabelAsync(el.name())); - }); - taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); - taskIds.clear(); - schema.getVertexLabels().forEach(vl -> { - taskIds.add(schema.removeVertexLabelAsync(vl.name())); - }); - taskIds.forEach(id -> task.waitUntilTaskCompleted(id, 5L)); - taskIds.clear(); - schema.getPropertyKeys().forEach(pk -> { - schema.removePropertyKey(pk.name()); - }); - } - - /** - * NOTE: Unsupport auto create schema - */ - //@Test - public void testLoadWithAutoCreateSchema() { - String[] args = new String[]{"-f", "example/struct.json", - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List propertyKeys = CLIENT.schema().getPropertyKeys(); - propertyKeys.forEach(pkey -> { - Assert.assertEquals(DataType.TEXT, pkey.dataType()); - }); - - List vertices = CLIENT.graph().listVertices(); - List edges = CLIENT.graph().listEdges(); - - Assert.assertEquals(7, vertices.size()); - Assert.assertEquals(6, edges.size()); - - boolean interestedVertex = false; - for (Vertex vertex : vertices) { - Assert.assertEquals(String.class, vertex.id().getClass()); - if (((String) vertex.id()).contains("li,nary")) { - interestedVertex = true; - Assert.assertEquals("26", vertex.property("age")); - Assert.assertEquals("Wu,han", vertex.property("city")); - } - } - Assert.assertTrue(interestedVertex); - - boolean interestedEdge = false; - for (Edge edge : edges) { - Assert.assertEquals(String.class, edge.source().getClass()); - Assert.assertEquals(String.class, edge.target().getClass()); - if (((String) edge.source()).contains("marko") && - ((String) edge.target()).contains("vadas")) { - interestedEdge = true; - Assert.assertEquals("20160110", edge.property("date")); - Assert.assertEquals("0.5", edge.property("weight")); - } - } - Assert.assertTrue(interestedEdge); - } - - @Test - public void testLoadWithCustomizedSchema() { - FileUtil.append(path("vertex_person.csv"), - "marko,29,Beijing", - "vadas,27,Hongkong", - "josh,32,Beijing", - "peter,35,Shanghai", - "\"li,nary\",26,\"Wu,han\""); - - FileUtil.append(path("vertex_software.csv"), - "lop,java,328", - "ripple,java,199"); - - FileUtil.append(path("edge_knows.csv"), - "marko,vadas,20160110,0.5", - "marko,josh,20130220,1.0"); - - FileUtil.append(path("edge_created.csv"), - "marko,lop,20171210,0.4", - "josh,lop,20091111,0.4", - "josh,ripple,20171210,1.0", - "peter,lop,20170324,0.2"); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - List edges = CLIENT.graph().listEdges(); - - Assert.assertEquals(7, vertices.size()); - Assert.assertEquals(6, edges.size()); - - boolean interestedVertex = false; - for (Vertex vertex : vertices) { - Assert.assertEquals(String.class, vertex.id().getClass()); - if (((String) vertex.id()).contains("li,nary")) { - interestedVertex = true; - Assert.assertEquals(26, vertex.property("age")); - Assert.assertEquals("Wu,han", vertex.property("city")); - } - } - Assert.assertTrue(interestedVertex); - - boolean interestedEdge = false; - for (Edge edge : edges) { - Assert.assertEquals(String.class, edge.source().getClass()); - Assert.assertEquals(String.class, edge.target().getClass()); - if (((String) edge.source()).contains("marko") && - ((String) edge.target()).contains("vadas")) { - interestedEdge = true; - Assert.assertEquals("20160110", edge.property("date")); - Assert.assertEquals(0.5, edge.property("weight")); - } - } - Assert.assertTrue(interestedEdge); - } - - @Test - public void testVertexIdExceedLimit() { - Integer[] array = new Integer[129]; - Arrays.fill(array, 1); - String tooLongId = StringUtils.join(array); - String line = FileUtil.newCSVLine(tooLongId, 29, "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - } - - @Test - public void testLoadWithIdExceedLimitLengthInBytes() { - String pk = "ecommerce__color__极光银翻盖上盖+" + - "琥珀啡翻盖下盖+咖啡金翻盖上盖装饰片+" + - "香槟金主镜片+深咖啡色副镜片+琥珀>" + - "啡前壳+极光银后壳+浅灰电池扣+极光银电池组件+深灰天线"; - assert pk.length() < 128; - String line = FileUtil.newCSVLine(pk, "中文", 328); - FileUtil.append(path("vertex_software.csv"), GBK, line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - // Bytes encoded in utf-8 exceed 128 - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - } - - @Test - public void testVertexTooManyColumns() { - String line = FileUtil.newCSVLine("marko", 29, "Beijing", "Extra"); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - } - - @Test - public void testVertexTooFewColumns() { - String line = FileUtil.newCSVLine("marko", 29); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - } - - @Test - public void testUnmatchedPropertyDataType() { - String line = FileUtil.newCSVLine("marko", "Should be number", - "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - } - - @Test - public void testVertexPkContainsSpecicalSymbol() { - String line = FileUtil.newCSVLine("mar:ko!", 29, "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--num-threads", "2", - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(1, vertices.size()); - Vertex vertex = vertices.get(0); - Assert.assertEquals(String.class, vertex.id().getClass()); - Assert.assertTrue(((String) vertex.id()).contains(":mar`:ko`!")); - Assert.assertEquals(29, vertex.property("age")); - Assert.assertEquals("Beijing", vertex.property("city")); - } - - @Test - public void testLoadWithUnmatchedEncodingCharset() { - String line = FileUtil.newCSVLine("lop", "中文", 328); - FileUtil.append(path("vertex_software.csv"), GBK, line); - - String[] args = new String[]{"-f", path("struct.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema.groovy"), - "--num-threads", "2", - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(1, vertices.size()); - Vertex vertex = vertices.get(0); - Assert.assertEquals("lop", vertex.property("name")); - Assert.assertNotEquals("中文", vertex.property("lang")); - Assert.assertEquals(328.0, vertex.property("price")); - } - - @Test - public void testLoadWithMatchedEncodingCharset() { - String line = FileUtil.newCSVLine("lop", "中文", 328); - FileUtil.append(path("vertex_software.csv"), GBK, line); - - String[] args = new String[]{"-f", path("struct_gbk.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema.groovy"), - "--num-threads", "2", - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(1, vertices.size()); - Vertex vertex = vertices.get(0); - Assert.assertEquals("lop", vertex.property("name")); - Assert.assertEquals("中文", vertex.property("lang")); - Assert.assertEquals(328.0, vertex.property("price")); - } - - @Test - /* TODO: the order of collection's maybe change - * (such as time:["2019-05-02 13:12:44","2008-05-02 13:12:44"]) - */ - public void testLoadWithValueListPorpertyInJsonFile() { - String line = FileUtil.newCSVLine("marko", 29, "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - line = FileUtil.newCSVLine("lop", "中文", 328); - FileUtil.append(path("vertex_software.csv"), GBK, line); - - line = "{\"person_name\": \"marko\", \"software_name\": \"lop\", " + - "\"feel\": [\"so so\", \"good\", \"good\"]}"; - FileUtil.append(path("edge_use.json"), line); - - String[] args = new String[]{"-f", path("struct_edge_use.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema.groovy"), - "--num-threads", "2", - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("edge_use.json")); - Assert.fail("Should not throw exception, but throw " + e); - } - - List edges = CLIENT.graph().listEdges(); - Assert.assertEquals(1, edges.size()); - Edge edge = edges.get(0); - - Assert.assertEquals("person", edge.sourceLabel()); - Assert.assertEquals("software", edge.targetLabel()); - Assert.assertEquals(ImmutableList.of("so so", "good", "good"), - edge.property("feel")); - - FileUtil.delete(path("edge_use.json")); - } - - @Test - // TODO : List is not supported now - public void testLoadWithValueListPorpertyInTextFile() { - String line = ("jin\t29\tBeijing"); - FileUtil.append(path("vertex_person.txt"), line); - line = ("tom\tChinese\t328"); - FileUtil.append(path("vertex_software.txt"), GBK, line); - - line = "4,1,5,6\t2019-05-02,2008-05-02"; - // TODO: when meets '[]',only support string now - // line = "[4,6]\t[2019-05-02,2008-05-02]"; - FileUtil.append(path("edge_use.txt"), line); - - String[] args = new String[]{"-f", path("struct_edge_use_text.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema_date.groovy"), - "--num-threads", "2", - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("edge_use.txt")); - Assert.fail("Should not throw exception, but throw " + e); - } - - List edges = CLIENT.graph().listEdges(); - Assert.assertEquals(1, edges.size()); - Edge edge = edges.get(0); - - Assert.assertEquals("person", edge.sourceLabel()); - Assert.assertEquals("software", edge.targetLabel()); - Assert.assertEquals(ImmutableList.of("2019-05-02", "2008-05-02"), - edge.property("time")); - - FileUtil.delete(path("edge_use.txt")); - } - - @Test - public void testLoadWithValueSetPorpertyInJsonFile() { - String line = FileUtil.newCSVLine("marko", 29, "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - line = FileUtil.newCSVLine("lop", "中文", 328); - FileUtil.append(path("vertex_software.csv"), GBK, line); - - line = "{\"person_name\": \"marko\", \"software_name\": \"lop\", " + - "\"time\": [\"20171210\", \"20180101\"]}"; - FileUtil.append(path("edge_use.json"), line); - - String[] args = new String[]{"-f", path("struct_edge_use.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema.groovy"), - "--num-threads", "2", - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("edge_use.json")); - Assert.fail("Should not throw exception, but throw " + e); - } - - List edges = CLIENT.graph().listEdges(); - Assert.assertEquals(1, edges.size()); - Edge edge = edges.get(0); - - Assert.assertEquals("person", edge.sourceLabel()); - Assert.assertEquals("software", edge.targetLabel()); - /* - * NOTE: Although the cardinality of the property is set in schema - * declaration, client will deserialize it to list type in default. - */ - Assert.assertEquals(ImmutableList.of("20171210", "20180101"), - edge.property("time")); - - FileUtil.delete(path("edge_use.json")); - } - - @Test - public void testLoadWithCustomizedNumberId() { - FileUtil.append(path("vertex_person_number_id.csv"), - "1,marko,29,Beijing", - "2,vadas,27,Hongkong"); - FileUtil.append(path("edge_knows.csv"), "1,2,20160110,0.5"); - String[] args = new String[]{"-f", path("struct_number_id.json"), - "-g", GRAPH, - "-h", SERVER, - "-s", path("schema_number_id.groovy"), - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("vertex_person_number_id.csv")); - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(2, vertices.size()); - - List edges = CLIENT.graph().listEdges(); - Assert.assertEquals(1, edges.size()); - - FileUtil.delete(path("vertex_person_number_id.csv")); - } - - @Test - public void testLoadVerticesWithJointPrimaryKeys() { - String line = FileUtil.newCSVLine("marko", 29, "Beijing"); - FileUtil.append(path("vertex_person.csv"), line); - - String[] args = new String[]{"-f", path("struct_joint_pk.json"), - "-s", path("schema_joint_pk.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - - Assert.assertEquals(1, vertices.size()); - Vertex vertex = vertices.get(0); - - Assert.assertTrue(vertex.id().toString().contains("marko!Beijing")); - Assert.assertEquals("person", vertex.label()); - Assert.assertEquals("marko", vertex.property("name")); - Assert.assertEquals(29, vertex.property("age")); - Assert.assertEquals("Beijing", vertex.property("city")); - } - - @Test - public void testLoadWithIgnoreLastRedundantEmptyColumn() { - // Has a redundant seperator at the end of line - FileUtil.append(path("vertex_person.csv"), "marko,29,Beijing,"); - - String[] args = new String[]{"-f", path("struct.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - - Assert.assertEquals(1, vertices.size()); - Vertex vertex = vertices.get(0); - Assert.assertEquals(3, vertex.properties().size()); - } - - @Test - public void testLoadWithIgnoreNullValueColumns() { - FileUtil.append(path("vertex_person.csv"), - "marko,NULL,null", - "vadas,NULL,", - "josh,,null"); - - String[] args = new String[]{"-f", path("struct_null_value.json"), - "-s", path("schema_null_value.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(3, vertices.size()); - - for (Vertex vertex : vertices) { - Assert.assertNull(vertex.property("age")); - Assert.assertNull(vertex.property("city")); - } - } - - @Test - public void testLoadWithFileHasCommentLine() { - FileUtil.append(path("vertex_person.csv"), - "# This is a comment", - "marko,29,Beijing", - "// This is also a comment", - "# This is still a comment", - "vadas,27,Hongkong"); - - String[] args = new String[] {"-f", path("struct_comment_symbol.json"), - "-s", path("schema_joint_pk.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(2, vertices.size()); - } - - @Test - public void testLoadWithDirHasNoFile() { - FileUtil.mkdirs(path("vertex_dir")); - String[] args = new String[] {"-f", path("struct_vertex_dir.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("vertex_dir")); - Assert.fail("Should not throw exception, but throw " + e); - } - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(0, vertices.size()); - - FileUtil.delete(path("vertex_dir")); - } - - @Test - public void testLoadWithDirHasThreeFiles() { - FileUtil.append(path("vertex_dir/vertex_person1.csv"), - "marko,29,Beijing", - "vadas,27,Hongkong", - "josh,32,Beijing"); - FileUtil.append(path("vertex_dir/vertex_person2.csv"), - "peter,35,Shanghai", - "\"li,nary\",26,\"Wu,han\""); - FileUtil.append(path("vertex_dir/vertex_person3.csv")); - - String[] args = new String[] {"-f", path("struct_vertex_dir.json"), - "-s", path("schema.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - FileUtil.delete(path("vertex_dir")); - Assert.fail("Should not throw exception, but throw " + e); - } - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(5, vertices.size()); - - FileUtil.delete(path("vertex_dir")); - } - - @Test - public void testLoadWithMathedDatePropertyAndFormat() { - FileUtil.append(path("vertex_person_birth_date.csv"), - "marko,1992-10-01,Beijing", - "vadas,2000-01-01,Hongkong"); - - // DateFormat is yyyy-MM-dd - String[] args = new String[] {"-f", path("struct_date_format.json"), - "-s", path("schema_date_format.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - try { - HugeGraphLoader.main(args); - } catch (Exception e) { - Assert.fail("Should not throw exception, but throw " + e); - } - List vertices = CLIENT.graph().listVertices(); - Assert.assertEquals(2, vertices.size()); - - FileUtil.delete(path("vertex_person_birth_date.csv")); - } - - @Test - public void testLoadWithUnMathedDatePropertyAndFormat() { - FileUtil.append(path("vertex_person_birth_date.csv"), - "marko,1992/10/01,Beijing", - "vadas,2000/01/01,Hongkong"); - - // DateFormat is yyyy-MM-dd - String[] args = new String[] {"-f", path("struct_date_format.json"), - "-s", path("schema_date_format.groovy"), - "-g", GRAPH, - "-h", SERVER, - "--test-mode", "true"}; - - Assert.assertThrows(ParseException.class, () -> { - HugeGraphLoader.main(args); - }); - - FileUtil.delete(path("vertex_person_birth_date.csv")); - } - - private static String path(String fileName) { - return Paths.get(PATH_PREFIX, fileName).toString(); - } -} diff --git a/src/test/resources/bz2_compress_file/schema.groovy b/src/test/resources/bz2_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/bz2_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/bz2_compress_file/struct.json b/src/test/resources/bz2_compress_file/struct.json new file mode 100644 index 000000000..ec3d3da9f --- /dev/null +++ b/src/test/resources/bz2_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.bz2", + "format": "CSV", + "charset": "UTF-8", + "compression": "BZ2" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/schema_number_id.groovy b/src/test/resources/customized_number_id/schema.groovy similarity index 100% rename from src/test/resources/schema_number_id.groovy rename to src/test/resources/customized_number_id/schema.groovy diff --git a/src/test/resources/struct_number_id.json b/src/test/resources/customized_number_id/struct.json similarity index 60% rename from src/test/resources/struct_number_id.json rename to src/test/resources/customized_number_id/struct.json index 6b737bb0e..c6d19c969 100644 --- a/src/test/resources/struct_number_id.json +++ b/src/test/resources/customized_number_id/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person_number_id.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person_number_id.csv", "format": "CSV", "header": ["id", "name", "age", "city"], "charset": "UTF-8" @@ -18,8 +18,9 @@ "source": ["source_name"], "target": ["target_name"], "input": { - "type": "file", - "path": "src/test/resources/edge_knows.csv", + "type": "${source_type}", + "header": ["source_name", "target_name", "date", "weight"], + "path": "${store_path}/edge_knows.csv", "format": "CSV" } } diff --git a/src/test/resources/schema.groovy b/src/test/resources/customized_schema/schema.groovy similarity index 100% rename from src/test/resources/schema.groovy rename to src/test/resources/customized_schema/schema.groovy diff --git a/src/test/resources/struct.json b/src/test/resources/customized_schema/struct.json similarity index 70% rename from src/test/resources/struct.json rename to src/test/resources/customized_schema/struct.json index 4a9da4b12..5eae68c6b 100644 --- a/src/test/resources/struct.json +++ b/src/test/resources/customized_schema/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", "format": "CSV", "charset": "UTF-8" }, @@ -17,8 +17,8 @@ { "label": "software", "input": { - "type": "file", - "path": "src/test/resources/vertex_software.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", "format": "CSV" } } @@ -29,8 +29,8 @@ "source": ["source_name"], "target": ["target_name"], "input": { - "type": "file", - "path": "src/test/resources/edge_knows.csv", + "type": "${source_type}", + "path": "${store_path}/edge_knows.csv", "format": "CSV" }, "mapping": { @@ -43,8 +43,8 @@ "source": ["source_name"], "target": ["target_name"], "input": { - "type": "file", - "path": "src/test/resources/edge_created.csv", + "type": "${source_type}", + "path": "${store_path}/edge_created.csv", "format": "CSV" }, "mapping": { @@ -53,4 +53,4 @@ } } ] -} \ No newline at end of file +} diff --git a/src/test/resources/deflate_compress_file/schema.groovy b/src/test/resources/deflate_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/deflate_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/deflate_compress_file/struct.json b/src/test/resources/deflate_compress_file/struct.json new file mode 100644 index 000000000..3b869edb8 --- /dev/null +++ b/src/test/resources/deflate_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.deflate", + "format": "CSV", + "charset": "UTF-8", + "compression": "DEFLATE" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/schema_joint_pk.groovy b/src/test/resources/dir_has_multi_files/schema.groovy similarity index 100% rename from src/test/resources/schema_joint_pk.groovy rename to src/test/resources/dir_has_multi_files/schema.groovy diff --git a/src/test/resources/struct_vertex_dir.json b/src/test/resources/dir_has_multi_files/struct.json similarity index 78% rename from src/test/resources/struct_vertex_dir.json rename to src/test/resources/dir_has_multi_files/struct.json index c15326597..9b1d86ffa 100644 --- a/src/test/resources/struct_vertex_dir.json +++ b/src/test/resources/dir_has_multi_files/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_dir", + "type": "${source_type}", + "path": "${store_path}/vertex_dir", "format": "CSV", "header": ["name", "age", "city"], "charset": "UTF-8" diff --git a/src/test/resources/dir_has_no_file/schema.groovy b/src/test/resources/dir_has_no_file/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/dir_has_no_file/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/dir_has_no_file/struct.json b/src/test/resources/dir_has_no_file/struct.json new file mode 100644 index 000000000..9b1d86ffa --- /dev/null +++ b/src/test/resources/dir_has_no_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_dir", + "format": "CSV", + "header": ["name", "age", "city"], + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/file_has_comment_line/schema.groovy b/src/test/resources/file_has_comment_line/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/file_has_comment_line/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/struct_comment_symbol.json b/src/test/resources/file_has_comment_line/struct.json similarity index 76% rename from src/test/resources/struct_comment_symbol.json rename to src/test/resources/file_has_comment_line/struct.json index d07c877fb..79fdfbb62 100644 --- a/src/test/resources/struct_comment_symbol.json +++ b/src/test/resources/file_has_comment_line/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", "format": "CSV", "charset": "UTF-8", "comment_symbols": ["#", "//"] diff --git a/src/test/resources/file_only_has_empty_line/schema.groovy b/src/test/resources/file_only_has_empty_line/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/file_only_has_empty_line/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/file_only_has_empty_line/struct.json b/src/test/resources/file_only_has_empty_line/struct.json new file mode 100644 index 000000000..4ab98cebe --- /dev/null +++ b/src/test/resources/file_only_has_empty_line/struct.json @@ -0,0 +1,13 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "src/test/resources/vertex_person_empty.csv", + "format": "CSV", + "charset": "UTF-8" + } + } + ] +} diff --git a/src/test/resources/gzip_compress_file/schema.groovy b/src/test/resources/gzip_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/gzip_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/gzip_compress_file/struct.json b/src/test/resources/gzip_compress_file/struct.json new file mode 100644 index 000000000..55638bb04 --- /dev/null +++ b/src/test/resources/gzip_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.gz", + "format": "CSV", + "charset": "UTF-8", + "compression": "GZIP" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/ignore_last_redudant_empty_column/schema.groovy b/src/test/resources/ignore_last_redudant_empty_column/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/ignore_last_redudant_empty_column/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/ignore_last_redudant_empty_column/struct.json b/src/test/resources/ignore_last_redudant_empty_column/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/ignore_last_redudant_empty_column/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/schema_null_value.groovy b/src/test/resources/ignore_null_value_columns/schema.groovy similarity index 100% rename from src/test/resources/schema_null_value.groovy rename to src/test/resources/ignore_null_value_columns/schema.groovy diff --git a/src/test/resources/struct_null_value.json b/src/test/resources/ignore_null_value_columns/struct.json similarity index 76% rename from src/test/resources/struct_null_value.json rename to src/test/resources/ignore_null_value_columns/struct.json index e57303ac9..09e89fc38 100644 --- a/src/test/resources/struct_null_value.json +++ b/src/test/resources/ignore_null_value_columns/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", "format": "CSV", "charset": "UTF-8" }, diff --git a/src/test/resources/jdbc_customized_schema/schema.groovy b/src/test/resources/jdbc_customized_schema/schema.groovy new file mode 100644 index 000000000..449976600 --- /dev/null +++ b/src/test/resources/jdbc_customized_schema/schema.groovy @@ -0,0 +1,17 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); +schema.propertyKey("feel").asText().valueList().ifNotExist().create(); +schema.propertyKey("time").asText().valueSet().ifNotExist().create(); + +schema.vertexLabel("person").useCustomizeNumberId().properties("name", "age", "city").nullableKeys("age", "city").ifNotExist().create(); +schema.vertexLabel("software").useCustomizeNumberId().properties("name", "lang", "price").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); +schema.edgeLabel("created").sourceLabel("person").targetLabel("software").properties("date", "weight").ifNotExist().create(); +schema.edgeLabel("use").sourceLabel("person").targetLabel("software").properties("feel", "time").nullableKeys("feel", "time").ifNotExist().create(); diff --git a/src/test/resources/jdbc_customized_schema/struct.json b/src/test/resources/jdbc_customized_schema/struct.json new file mode 100644 index 000000000..ac458c4a8 --- /dev/null +++ b/src/test/resources/jdbc_customized_schema/struct.json @@ -0,0 +1,67 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "load_test", + "table": "person", + "username": "root", + "password": "", + "batch_size": 500 + }, + "id": "id", + "null_values": ["NULL"] + }, + { + "label": "software", + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "load_test", + "table": "software", + "username": "root", + "password": "", + "batch_size": 500 + }, + "id": "id" + } + ], + "edges": [ + { + "label": "knows", + "source": ["source_id"], + "target": ["target_id"], + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "load_test", + "table": "knows", + "username": "root", + "password": "", + "batch_size": 500 + }, + "ignored": ["id"] + }, + { + "label": "created", + "source": ["source_id"], + "target": ["target_id"], + "input": { + "type": "jdbc", + "driver": "com.mysql.cj.jdbc.Driver", + "url": "jdbc:mysql://127.0.0.1:3306", + "database": "load_test", + "table": "created", + "username": "root", + "password": "", + "batch_size": 500 + }, + "ignored": ["id"] + } + ] +} diff --git a/src/test/resources/lz4_block_compress_file/schema.groovy b/src/test/resources/lz4_block_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/lz4_block_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/lz4_block_compress_file/struct.json b/src/test/resources/lz4_block_compress_file/struct.json new file mode 100644 index 000000000..8c2f30212 --- /dev/null +++ b/src/test/resources/lz4_block_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.lz4", + "format": "CSV", + "charset": "UTF-8", + "compression": "LZ4_BLOCK" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/lz4_framed_compress_file/schema.groovy b/src/test/resources/lz4_framed_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/lz4_framed_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/lz4_framed_compress_file/struct.json b/src/test/resources/lz4_framed_compress_file/struct.json new file mode 100644 index 000000000..e21705fd5 --- /dev/null +++ b/src/test/resources/lz4_framed_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.lz4", + "format": "CSV", + "charset": "UTF-8", + "compression": "LZ4_FRAMED" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/lzma_compress_file/schema.groovy b/src/test/resources/lzma_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/lzma_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/lzma_compress_file/struct.json b/src/test/resources/lzma_compress_file/struct.json new file mode 100644 index 000000000..69612910b --- /dev/null +++ b/src/test/resources/lzma_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.lzma", + "format": "CSV", + "charset": "UTF-8", + "compression": "LZMA" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/schema_date_format.groovy b/src/test/resources/matched_date_property_format/schema.groovy similarity index 100% rename from src/test/resources/schema_date_format.groovy rename to src/test/resources/matched_date_property_format/schema.groovy diff --git a/src/test/resources/struct_date_format.json b/src/test/resources/matched_date_property_format/struct.json similarity index 76% rename from src/test/resources/struct_date_format.json rename to src/test/resources/matched_date_property_format/struct.json index 553a8dc23..b4e286700 100644 --- a/src/test/resources/struct_date_format.json +++ b/src/test/resources/matched_date_property_format/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person_birth_date.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person_birth_date.csv", "format": "CSV", "header": ["name", "birth", "city"], "charset": "UTF-8", diff --git a/src/test/resources/matched_encoding_charset/schema.groovy b/src/test/resources/matched_encoding_charset/schema.groovy new file mode 100644 index 000000000..1c56f15b4 --- /dev/null +++ b/src/test/resources/matched_encoding_charset/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); + +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/matched_encoding_charset/struct.json b/src/test/resources/matched_encoding_charset/struct.json new file mode 100644 index 000000000..3db920b67 --- /dev/null +++ b/src/test/resources/matched_encoding_charset/struct.json @@ -0,0 +1,13 @@ +{ + "vertices": [ + { + "label": "software", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", + "format": "CSV", + "charset": "GBK" + } + } + ] +} diff --git a/src/test/resources/multi_files_have_header/schema.groovy b/src/test/resources/multi_files_have_header/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/multi_files_have_header/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/struct_joint_pk.json b/src/test/resources/multi_files_have_header/struct.json similarity index 74% rename from src/test/resources/struct_joint_pk.json rename to src/test/resources/multi_files_have_header/struct.json index 7bb682636..1a1f613d0 100644 --- a/src/test/resources/struct_joint_pk.json +++ b/src/test/resources/multi_files_have_header/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_dir", "format": "CSV", "charset": "UTF-8" }, diff --git a/src/test/resources/pack200_compress_file/schema.groovy b/src/test/resources/pack200_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/pack200_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/pack200_compress_file/struct.json b/src/test/resources/pack200_compress_file/struct.json new file mode 100644 index 000000000..ba4193d52 --- /dev/null +++ b/src/test/resources/pack200_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.pack", + "format": "CSV", + "charset": "UTF-8", + "compression": "PACK200" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/snappy_framed_compress_file/schema.groovy b/src/test/resources/snappy_framed_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/snappy_framed_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/snappy_framed_compress_file/struct.json b/src/test/resources/snappy_framed_compress_file/struct.json new file mode 100644 index 000000000..d0e9f2e86 --- /dev/null +++ b/src/test/resources/snappy_framed_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.snappy", + "format": "CSV", + "charset": "UTF-8", + "compression": "SNAPPY_FRAMED" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/snappy_raw_compress_file/schema.groovy b/src/test/resources/snappy_raw_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/snappy_raw_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/snappy_raw_compress_file/struct.json b/src/test/resources/snappy_raw_compress_file/struct.json new file mode 100644 index 000000000..796486e24 --- /dev/null +++ b/src/test/resources/snappy_raw_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.snappy", + "format": "CSV", + "charset": "UTF-8", + "compression": "SNAPPY" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/struct_gbk.json b/src/test/resources/struct_gbk.json deleted file mode 100644 index f2431008e..000000000 --- a/src/test/resources/struct_gbk.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "vertices": [ - { - "label": "person", - "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", - "format": "CSV", - "charset": "GBK" - }, - "mapping": { - "name": "name", - "age": "age", - "city": "city" - } - }, - { - "label": "software", - "input": { - "type": "file", - "path": "src/test/resources/vertex_software.csv", - "format": "CSV", - "charset": "GBK" - } - } - ], - "edges": [ - { - "label": "knows", - "source": ["source_name"], - "target": ["target_name"], - "input": { - "type": "file", - "path": "src/test/resources/edge_knows.csv", - "format": "CSV", - "charset": "GBK" - }, - "mapping": { - "source_name": "name", - "target_name": "name" - } - }, - { - "label": "created", - "source": ["source_name"], - "target": ["target_name"], - "input": { - "type": "file", - "path": "src/test/resources/edge_created.csv", - "format": "CSV", - "charset": "GBK" - }, - "mapping": { - "source_name": "name", - "target_name": "name" - } - } - ] -} \ No newline at end of file diff --git a/src/test/resources/too_few_columns/schema.groovy b/src/test/resources/too_few_columns/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/too_few_columns/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/too_few_columns/struct.json b/src/test/resources/too_few_columns/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/too_few_columns/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/too_many_columns/schema.groovy b/src/test/resources/too_many_columns/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/too_many_columns/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/too_many_columns/struct.json b/src/test/resources/too_many_columns/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/too_many_columns/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/unmatched_date_property_format/schema.groovy b/src/test/resources/unmatched_date_property_format/schema.groovy new file mode 100644 index 000000000..5a2e6373b --- /dev/null +++ b/src/test/resources/unmatched_date_property_format/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("birth").asDate().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "birth", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/unmatched_date_property_format/struct.json b/src/test/resources/unmatched_date_property_format/struct.json new file mode 100644 index 000000000..b4e286700 --- /dev/null +++ b/src/test/resources/unmatched_date_property_format/struct.json @@ -0,0 +1,20 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person_birth_date.csv", + "format": "CSV", + "header": ["name", "birth", "city"], + "charset": "UTF-8", + "date_format": "yyyy-MM-dd" + }, + "mapping": { + "name": "name", + "birth": "birth", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/unmatched_encoding_charset/schema.groovy b/src/test/resources/unmatched_encoding_charset/schema.groovy new file mode 100644 index 000000000..1c56f15b4 --- /dev/null +++ b/src/test/resources/unmatched_encoding_charset/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); + +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/unmatched_encoding_charset/struct.json b/src/test/resources/unmatched_encoding_charset/struct.json new file mode 100644 index 000000000..c82294cba --- /dev/null +++ b/src/test/resources/unmatched_encoding_charset/struct.json @@ -0,0 +1,12 @@ +{ + "vertices": [ + { + "label": "software", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", + "format": "CSV" + } + } + ] +} diff --git a/src/test/resources/unmatched_property_datatype/schema.groovy b/src/test/resources/unmatched_property_datatype/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/unmatched_property_datatype/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/unmatched_property_datatype/struct.json b/src/test/resources/unmatched_property_datatype/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/unmatched_property_datatype/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/value_list_property_in_json_file/schema.groovy b/src/test/resources/value_list_property_in_json_file/schema.groovy new file mode 100644 index 000000000..cbb51d09f --- /dev/null +++ b/src/test/resources/value_list_property_in_json_file/schema.groovy @@ -0,0 +1,15 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); +schema.propertyKey("feel").asText().valueList().ifNotExist().create(); +schema.propertyKey("time").asText().valueSet().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); + +schema.edgeLabel("use").sourceLabel("person").targetLabel("software").properties("feel", "time").nullableKeys("feel", "time").ifNotExist().create(); diff --git a/src/test/resources/struct_edge_use.json b/src/test/resources/value_list_property_in_json_file/struct.json similarity index 69% rename from src/test/resources/struct_edge_use.json rename to src/test/resources/value_list_property_in_json_file/struct.json index 2843e90ac..9578b0e93 100644 --- a/src/test/resources/struct_edge_use.json +++ b/src/test/resources/value_list_property_in_json_file/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", "format": "CSV", "charset": "UTF-8" }, @@ -17,8 +17,8 @@ { "label": "software", "input": { - "type": "file", - "path": "src/test/resources/vertex_software.csv", + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", "format": "CSV" } } @@ -29,8 +29,8 @@ "source": ["person_name"], "target": ["software_name"], "input": { - "type": "file", - "path": "src/test/resources/edge_use.json", + "type": "${source_type}", + "path": "${store_path}/edge_use.json", "format": "JSON" }, "mapping": { @@ -39,4 +39,4 @@ } } ] -} \ No newline at end of file +} diff --git a/src/test/resources/value_list_property_in_text_file/schema.groovy b/src/test/resources/value_list_property_in_text_file/schema.groovy new file mode 100644 index 000000000..2cbed18c8 --- /dev/null +++ b/src/test/resources/value_list_property_in_text_file/schema.groovy @@ -0,0 +1,13 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); +schema.propertyKey("feel").asInt().valueList().ifNotExist().create(); +schema.propertyKey("time").asText().valueSet().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); + +schema.edgeLabel("use").sourceLabel("person").targetLabel("software").properties("feel", "time").nullableKeys("feel", "time").ifNotExist().create(); diff --git a/src/test/resources/struct_edge_use_text.json b/src/test/resources/value_list_property_in_text_file/struct.json similarity index 74% rename from src/test/resources/struct_edge_use_text.json rename to src/test/resources/value_list_property_in_text_file/struct.json index 558a91ae0..4b0eb7f00 100644 --- a/src/test/resources/struct_edge_use_text.json +++ b/src/test/resources/value_list_property_in_text_file/struct.json @@ -3,8 +3,8 @@ { "label": "person", "input": { - "type": "file", - "path": "src/test/resources/vertex_person.txt", + "type": "${source_type}", + "path": "${store_path}/vertex_person.txt", "format": "TEXT", "header": ["name", "age", "city"], "charset": "UTF-8" @@ -18,8 +18,8 @@ { "label": "software", "input": { - "type": "file", - "path": "src/test/resources/vertex_software.txt", + "type": "${source_type}", + "path": "${store_path}/vertex_software.txt", "header": ["name", "lang", "price"], "format": "TEXT" } @@ -31,8 +31,8 @@ "source": ["person_name"], "target": ["software_name"], "input": { - "type": "file", - "path": "src/test/resources/edge_use.txt", + "type": "${source_type}", + "path": "${store_path}/edge_use.txt", "header": ["feel", "time"], "format": "TEXT" }, diff --git a/src/test/resources/value_set_property_in_json_file/schema.groovy b/src/test/resources/value_set_property_in_json_file/schema.groovy new file mode 100644 index 000000000..cbb51d09f --- /dev/null +++ b/src/test/resources/value_set_property_in_json_file/schema.groovy @@ -0,0 +1,15 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); +schema.propertyKey("feel").asText().valueList().ifNotExist().create(); +schema.propertyKey("time").asText().valueSet().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); + +schema.edgeLabel("use").sourceLabel("person").targetLabel("software").properties("feel", "time").nullableKeys("feel", "time").ifNotExist().create(); diff --git a/src/test/resources/value_set_property_in_json_file/struct.json b/src/test/resources/value_set_property_in_json_file/struct.json new file mode 100644 index 000000000..9578b0e93 --- /dev/null +++ b/src/test/resources/value_set_property_in_json_file/struct.json @@ -0,0 +1,42 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + }, + { + "label": "software", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", + "format": "CSV" + } + } + ], + "edges": [ + { + "label": "use", + "source": ["person_name"], + "target": ["software_name"], + "input": { + "type": "${source_type}", + "path": "${store_path}/edge_use.json", + "format": "JSON" + }, + "mapping": { + "person_name": "name", + "software_name": "name" + } + } + ] +} diff --git a/src/test/resources/vertex_id_exceed_limit/schema.groovy b/src/test/resources/vertex_id_exceed_limit/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/vertex_id_exceed_limit/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/vertex_id_exceed_limit/struct.json b/src/test/resources/vertex_id_exceed_limit/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/vertex_id_exceed_limit/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/vertex_id_exceed_limit_in_bytes/schema.groovy b/src/test/resources/vertex_id_exceed_limit_in_bytes/schema.groovy new file mode 100644 index 000000000..1c56f15b4 --- /dev/null +++ b/src/test/resources/vertex_id_exceed_limit_in_bytes/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("lang").asText().ifNotExist().create(); +schema.propertyKey("price").asDouble().ifNotExist().create(); + +schema.vertexLabel("software").properties("name", "lang", "price").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/vertex_id_exceed_limit_in_bytes/struct.json b/src/test/resources/vertex_id_exceed_limit_in_bytes/struct.json new file mode 100644 index 000000000..c82294cba --- /dev/null +++ b/src/test/resources/vertex_id_exceed_limit_in_bytes/struct.json @@ -0,0 +1,12 @@ +{ + "vertices": [ + { + "label": "software", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_software.csv", + "format": "CSV" + } + } + ] +} diff --git a/src/test/resources/vertex_joint_pks/schema.groovy b/src/test/resources/vertex_joint_pks/schema.groovy new file mode 100644 index 000000000..5e0616e0a --- /dev/null +++ b/src/test/resources/vertex_joint_pks/schema.groovy @@ -0,0 +1,10 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); +schema.propertyKey("weight").asDouble().ifNotExist().create(); +schema.propertyKey("date").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name", "city").ifNotExist().create(); + +schema.edgeLabel("knows").sourceLabel("person").targetLabel("person").properties("date", "weight").ifNotExist().create(); diff --git a/src/test/resources/vertex_joint_pks/struct.json b/src/test/resources/vertex_joint_pks/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/vertex_joint_pks/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/vertex_pk_contains_special_symbol/schema.groovy b/src/test/resources/vertex_pk_contains_special_symbol/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/vertex_pk_contains_special_symbol/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/vertex_pk_contains_special_symbol/struct.json b/src/test/resources/vertex_pk_contains_special_symbol/struct.json new file mode 100644 index 000000000..fa1520121 --- /dev/null +++ b/src/test/resources/vertex_pk_contains_special_symbol/struct.json @@ -0,0 +1,18 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.csv", + "format": "CSV", + "charset": "UTF-8" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/xz_compress_file/schema.groovy b/src/test/resources/xz_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/xz_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/xz_compress_file/struct.json b/src/test/resources/xz_compress_file/struct.json new file mode 100644 index 000000000..af0b93972 --- /dev/null +++ b/src/test/resources/xz_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.xz", + "format": "CSV", + "charset": "UTF-8", + "compression": "XZ" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +} diff --git a/src/test/resources/z_compress_file/schema.groovy b/src/test/resources/z_compress_file/schema.groovy new file mode 100644 index 000000000..9296fd292 --- /dev/null +++ b/src/test/resources/z_compress_file/schema.groovy @@ -0,0 +1,6 @@ +// Define schema +schema.propertyKey("name").asText().ifNotExist().create(); +schema.propertyKey("age").asInt().ifNotExist().create(); +schema.propertyKey("city").asText().ifNotExist().create(); + +schema.vertexLabel("person").properties("name", "age", "city").primaryKeys("name").ifNotExist().create(); diff --git a/src/test/resources/z_compress_file/struct.json b/src/test/resources/z_compress_file/struct.json new file mode 100644 index 000000000..d2c2d4211 --- /dev/null +++ b/src/test/resources/z_compress_file/struct.json @@ -0,0 +1,19 @@ +{ + "vertices": [ + { + "label": "person", + "input": { + "type": "${source_type}", + "path": "${store_path}/vertex_person.z", + "format": "CSV", + "charset": "UTF-8", + "compression": "Z" + }, + "mapping": { + "name": "name", + "age": "age", + "city": "city" + } + } + ] +}