Skip to content

Commit

Permalink
Support load data from HDFS and relational database (#14)
Browse files Browse the repository at this point in the history
Support load data from HDFS and relational database

1. Refactoring code for unit tests
- Add test for ordinary text file on hdfs
- Add test for compressed files
2. Add tests for jdbc(mysql)
3. nstall mysql when profile is jdbc, remove unused test suite
4. Support pass 'fs.defaultFS' to connect remote hadoop

Change-Id: Idbae121701a97d0b7abce903621842f14877bd45
  • Loading branch information
Linary authored and zhoney committed Mar 18, 2019
1 parent 3ce27ee commit c67d89a
Show file tree
Hide file tree
Showing 130 changed files with 4,717 additions and 1,418 deletions.
26 changes: 21 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,40 @@ jdk:

sudo: required

cache:
directories:
- $HOME/.m2
- $HOME/downloads

branches:
only:
- master
- /^release-.*$/
- /^test-.*$/

install: mvn compile -Dmaven.javadoc.skip=true
install: mvn compile -Dmaven.javadoc.skip=true | grep -v "Downloading\|Downloaded"

before_script:
- $TRAVIS_DIR/install-hugegraph.sh $TRAVIS_BRANCH
- $TRAVIS_DIR/install-hugegraph.sh $TRAVIS_BRANCH | grep -v "Downloading\|Downloaded"
- |
if [ "$SOURCE_TYPE" == "hdfs" ]; then
$TRAVIS_DIR/install-hadoop.sh
fi
- |
if [ "$SOURCE_TYPE" == "jdbc" ]; then
$TRAVIS_DIR/install-mysql.sh
fi
script:
- mvn test -Dtest=LoaderTest
- mvn cobertura:cobertura
- mvn test -P${SOURCE_TYPE}

after_success:
- bash <(curl -s https://codecov.io/bash)

env:
matrix:
- SOURCE_TYPE=file
- SOURCE_TYPE=hdfs
- SOURCE_TYPE=jdbc
global:
- TRAVIS_DIR=assembly/travis
- TRAVIS_DIR=assembly/travis
4 changes: 0 additions & 4 deletions assembly/static/example/edge_created.json

This file was deleted.

4 changes: 4 additions & 0 deletions assembly/static/example/file/edge_created.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"source_name": "marko", "target_id": 1, "date": "2017-12-10", "weight": 0.4}
{"source_name": "josh", "target_id": 1, "date": "2009-11-11", "weight": 0.4}
{"source_name": "josh", "target_id": 2, "date": "2017-12-10", "weight": 1.0}
{"source_name": "peter", "target_id": 1, "date": "2017-03-24", "weight": 0.2}
File renamed without changes.
77 changes: 77 additions & 0 deletions assembly/static/example/file/schema.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Define schema
schema.propertyKey("name").asText().ifNotExist().create();
schema.propertyKey("age").asInt().ifNotExist().create();
schema.propertyKey("city").asText().ifNotExist().create();
schema.propertyKey("weight").asDouble().ifNotExist().create();
schema.propertyKey("lang").asText().ifNotExist().create();
schema.propertyKey("date").asText().ifNotExist().create();
schema.propertyKey("price").asDouble().ifNotExist().create();

schema.vertexLabel("person")
.properties("name", "age", "city")
.primaryKeys("name")
.nullableKeys("age", "city")
.ifNotExist()
.create();
schema.vertexLabel("software")
.useCustomizeNumberId()
.properties("name", "lang", "price")
.ifNotExist()
.create();

schema.indexLabel("personByAge")
.onV("person")
.by("age")
.range()
.ifNotExist()
.create();
schema.indexLabel("personByCity")
.onV("person")
.by("city")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("personByAgeAndCity")
.onV("person")
.by("age", "city")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("softwareByPrice")
.onV("software")
.by("price")
.range()
.ifNotExist()
.create();

schema.edgeLabel("knows")
.sourceLabel("person")
.targetLabel("person")
.properties("date", "weight")
.ifNotExist()
.create();
schema.edgeLabel("created")
.sourceLabel("person")
.targetLabel("software")
.properties("date", "weight")
.ifNotExist()
.create();

schema.indexLabel("createdByDate")
.onE("created")
.by("date")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("createdByWeight")
.onE("created")
.by("weight")
.range()
.ifNotExist()
.create();
schema.indexLabel("knowsByWeight")
.onE("knows")
.by("weight")
.range()
.ifNotExist()
.create();
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,24 @@
"label": "person",
"input": {
"type": "file",
"path": "example/vertex_person.csv",
"path": "example/file/vertex_person.csv",
"format": "CSV",
"header": ["name", "age", "city"],
"charset": "UTF-8"
},
"mapping": {
"name": "name",
"age": "age",
"city": "city"
"charset": "UTF-8",
"comment_symbols": ["#"]
},
"null_values": ["NULL", "null", ""]
},
{
"label": "software",
"input": {
"type": "file",
"path": "example/vertex_software.text",
"path": "example/file/vertex_software.txt",
"format": "TEXT",
"delimiter": "|",
"charset": "GBK"
},
"id": "id",
"ignored": ["ISBN"]
}
],
Expand All @@ -35,7 +32,7 @@
"target": ["target_name"],
"input": {
"type": "file",
"path": "example/edge_knows.json",
"path": "example/file/edge_knows.json",
"format": "JSON",
"date_format": "yyyyMMdd"
},
Expand All @@ -47,16 +44,15 @@
{
"label": "created",
"source": ["source_name"],
"target": ["target_name"],
"target": ["target_id"],
"input": {
"type": "file",
"path": "example/edge_created.json",
"path": "example/file/edge_created.json",
"format": "JSON",
"date_format": "yyyy-MM-dd"
},
"mapping": {
"source_name": "name",
"target_name": "name"
"source_name": "name"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# This is a comment
marko,29,Beijing
vadas,27,Hongkong
josh,32,Beijing
Expand Down
4 changes: 4 additions & 0 deletions assembly/static/example/hdfs/edge_created.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"source_name": "marko", "target_id": 1, "date": "2017-12-10", "weight": 0.4}
{"source_name": "josh", "target_id": 1, "date": "2009-11-11", "weight": 0.4}
{"source_name": "josh", "target_id": 2, "date": "2017-12-10", "weight": 1.0}
{"source_name": "peter", "target_id": 1, "date": "2017-03-24", "weight": 0.2}
2 changes: 2 additions & 0 deletions assembly/static/example/hdfs/edge_knows.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"source_name": "marko", "target_name": "vadas", "date": "2016-01-10", "weight": 0.5}
{"source_name": "marko", "target_name": "josh", "date": "2013-02-20", "weight": 1.0}
77 changes: 77 additions & 0 deletions assembly/static/example/hdfs/schema.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Define schema
schema.propertyKey("name").asText().ifNotExist().create();
schema.propertyKey("age").asInt().ifNotExist().create();
schema.propertyKey("city").asText().ifNotExist().create();
schema.propertyKey("weight").asDouble().ifNotExist().create();
schema.propertyKey("lang").asText().ifNotExist().create();
schema.propertyKey("date").asText().ifNotExist().create();
schema.propertyKey("price").asDouble().ifNotExist().create();

schema.vertexLabel("person")
.properties("name", "age", "city")
.primaryKeys("name")
.nullableKeys("age", "city")
.ifNotExist()
.create();
schema.vertexLabel("software")
.useCustomizeNumberId()
.properties("name", "lang", "price")
.ifNotExist()
.create();

schema.indexLabel("personByAge")
.onV("person")
.by("age")
.range()
.ifNotExist()
.create();
schema.indexLabel("personByCity")
.onV("person")
.by("city")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("personByAgeAndCity")
.onV("person")
.by("age", "city")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("softwareByPrice")
.onV("software")
.by("price")
.range()
.ifNotExist()
.create();

schema.edgeLabel("knows")
.sourceLabel("person")
.targetLabel("person")
.properties("date", "weight")
.ifNotExist()
.create();
schema.edgeLabel("created")
.sourceLabel("person")
.targetLabel("software")
.properties("date", "weight")
.ifNotExist()
.create();

schema.indexLabel("createdByDate")
.onE("created")
.by("date")
.secondary()
.ifNotExist()
.create();
schema.indexLabel("createdByWeight")
.onE("created")
.by("weight")
.range()
.ifNotExist()
.create();
schema.indexLabel("knowsByWeight")
.onE("knows")
.by("weight")
.range()
.ifNotExist()
.create();
57 changes: 57 additions & 0 deletions assembly/static/example/hdfs/struct.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "hdfs",
"path": "hdfs://localhost:8020/example/vertex_person.csv",
"format": "CSV",
"header": ["name", "age", "city"],
"charset": "UTF-8",
"comment_symbols": ["#"]
},
"null_values": ["NULL", "null", ""]
},
{
"label": "software",
"input": {
"type": "hdfs",
"path": "hdfs://localhost:8020/example/vertex_software.text",
"format": "TEXT",
"delimiter": "|",
"charset": "GBK"
},
"id": "id",
"ignored": ["ISBN"]
}
],
"edges": [
{
"label": "knows",
"source": ["source_name"],
"target": ["target_name"],
"input": {
"type": "hdfs",
"path": "hdfs://localhost:8020/example/edge_knows.json",
"format": "JSON"
},
"mapping": {
"source_name": "name",
"target_name": "name"
}
},
{
"label": "created",
"source": ["source_name"],
"target": ["target_id"],
"input": {
"type": "hdfs",
"path": "hdfs://localhost:8020/example/edge_created.json",
"format": "JSON"
},
"mapping": {
"source_name": "name"
}
}
]
}
7 changes: 7 additions & 0 deletions assembly/static/example/hdfs/vertex_person.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This is a comment
marko,29,Beijing
vadas,27,Hongkong
josh,32,Beijing
peter,35,Shanghai
"li,nary",26,"Wu,han"
tom,null,NULL
3 changes: 3 additions & 0 deletions assembly/static/example/hdfs/vertex_software.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id|name|lang|price|ISBN
1|lop|java|328|ISBN978-7-107-18618-5
2|ripple|java|199|ISBN978-7-100-13678-5
Loading

0 comments on commit c67d89a

Please sign in to comment.