From e6bad5154cf111582ba4cf9e8e17936f459cc3b0 Mon Sep 17 00:00:00 2001 From: Anqi Date: Wed, 31 Aug 2022 10:59:12 +0800 Subject: [PATCH] connector for spark 2.2 (#49) * pyspark example added (#51) * pyspark example added * Update README_CN.md * support delete related edges when delete vertex (#53) * support delete related edges when delete vertex * add test * add example for delete vertex with edge (#54) * doc: pyspark write example (#55) * doc: pyspark write example * Added pyshell calling lines and python file header discussed in https://github.com/vesoft-inc/nebula-spark-connector/issues/50 Thanks to @Reid00 * Update README.md wording * Update README_CN.md * Update README.md * Update README_CN.md * Update README.md * Update README_CN.md * spark2.2 reader initial commit * spark2.2 reader initial commit * extract common config for multi spark version * delete common config files * extract common config and utils * remove common test * spark connector reader for spark 2.2 * spark connector writer for spark 2.2 * revert example * refactor spark version & close metaProvider after finish writing * refactor common package name * fix scan part * refactor spark version for spark2.2 * connector writer for spark2.2 Co-authored-by: Wey Gu --- README.md | 120 ++++++ README_CN.md | 119 ++++++ .../connector/NebulaSparkWriterExample.scala | 3 + nebula-spark-common/pom.xml | 251 +++++++++++++ .../nebula/connector/NebulaConfig.scala | 28 +- .../vesoft/nebula/connector/NebulaEnum.scala | 0 .../nebula/connector/NebulaOptions.scala | 4 +- .../vesoft/nebula/connector/NebulaUtils.scala | 0 .../nebula/connector/PartitionUtils.scala | 0 .../vesoft/nebula/connector/Template.scala | 7 +- .../connector/exception/Exception.scala | 0 .../connector/nebula/GraphProvider.scala | 6 +- .../connector/nebula/MetaProvider.scala | 12 +- .../com/vesoft/nebula/connector/package.scala | 67 ++++ .../vesoft/nebula/connector/ssl/SSLEnum.scala | 0 .../nebula/connector/ssl/SSLSignParams.scala | 0 .../connector/writer/NebulaExecutor.scala | 48 +-- .../nebula/connector/DataTypeEnumSuite.scala | 0 .../nebula/connector/NebulaConfigSuite.scala | 0 .../nebula/connector/NebulaUtilsSuite.scala | 0 .../connector/PartitionUtilsSuite.scala | 0 .../connector/mock/NebulaGraphMock.scala | 192 ++++++++++ .../connector/nebula/GraphProviderTest.scala | 2 +- .../connector/nebula/MetaProviderTest.scala | 3 +- nebula-spark-connector/pom.xml | 30 +- .../com/vesoft/nebula/connector/package.scala | 65 +--- .../reader/NebulaPartitionReader.scala | 5 +- .../connector/reader/NebulaSourceReader.scala | 2 +- .../connector/writer/NebulaEdgeWriter.scala | 3 +- .../connector/writer/NebulaVertexWriter.scala | 13 +- .../nebula/connector/mock/SparkMock.scala | 42 ++- .../writer/NebulaExecutorSuite.scala | 15 +- .../connector/writer/WriteDeleteSuite.scala | 25 +- .../connector/writer/WriteInsertSuite.scala | 2 +- nebula-spark-connector_2.2/.gitignore | 36 ++ nebula-spark-connector_2.2/pom.xml | 306 +++++++++++++++ .../nebula/connector/NebulaDataSource.scala | 163 ++++++++ .../com/vesoft/nebula/connector/package.scala | 336 +++++++++++++++++ .../connector/reader/NebulaEdgeReader.scala | 77 ++++ .../connector/reader/NebulaIterator.scala | 167 +++++++++ .../nebula/connector/reader/NebulaRDD.scala | 65 ++++ .../connector/reader/NebulaRelation.scala | 99 +++++ .../reader/NebulaRelationProvider.scala | 29 ++ .../connector/reader/NebulaVertexReader.scala | 78 ++++ .../writer/NebulaCommitMessage.scala | 8 + .../connector/writer/NebulaEdgeWriter.scala | 109 ++++++ .../writer/NebulaInsertableRelation.scala | 13 + .../connector/writer/NebulaVertexWriter.scala | 94 +++++ .../connector/writer/NebulaWriter.scala | 74 ++++ .../writer/NebulaWriterResultRelation.scala | 17 + .../src/test/resources/docker-compose.yaml | 353 ++++++++++++++++++ .../src/test/resources/edge.csv | 14 + .../src/test/resources/log4j.properties | 6 + .../src/test/resources/vertex.csv | 14 + .../connector/mock/NebulaGraphMock.scala | 192 ++++++++++ .../nebula/connector/mock/SparkMock.scala | 179 +++++++++ .../nebula/connector/reader/ReadSuite.scala | 340 +++++++++++++++++ .../connector/writer/WriteDeleteSuite.scala | 50 +++ .../connector/writer/WriteInsertSuite.scala | 76 ++++ pom.xml | 44 +++ 60 files changed, 3861 insertions(+), 142 deletions(-) create mode 100644 nebula-spark-common/pom.xml rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala (97%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala (98%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/Template.scala (75%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala (95%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala (93%) create mode 100644 nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/package.scala rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/ssl/SSLEnum.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/ssl/SSLSignParams.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala (93%) rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/DataTypeEnumSuite.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/NebulaUtilsSuite.scala (100%) rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/PartitionUtilsSuite.scala (100%) create mode 100644 nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala (95%) rename {nebula-spark-connector => nebula-spark-common}/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala (97%) create mode 100644 nebula-spark-connector_2.2/.gitignore create mode 100644 nebula-spark-connector_2.2/pom.xml create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/NebulaDataSource.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/package.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaEdgeReader.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaIterator.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRDD.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelation.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelationProvider.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaVertexReader.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaCommitMessage.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaInsertableRelation.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriter.scala create mode 100644 nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriterResultRelation.scala create mode 100644 nebula-spark-connector_2.2/src/test/resources/docker-compose.yaml create mode 100644 nebula-spark-connector_2.2/src/test/resources/edge.csv create mode 100644 nebula-spark-connector_2.2/src/test/resources/log4j.properties create mode 100644 nebula-spark-connector_2.2/src/test/resources/vertex.csv create mode 100644 nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala create mode 100644 nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala create mode 100644 nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/reader/ReadSuite.scala create mode 100644 nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala create mode 100644 nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala diff --git a/README.md b/README.md index d71942a2..caad4442 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,126 @@ Nebula Spark Connector 2.0/3.0 only supports Nebula Graph 2.x/3.x. If you are us For more information on usage, please refer to [Example](https://github.com/vesoft-inc/nebula-spark-connector/tree/master/example/src/main/scala/com/vesoft/nebula/examples/connector). +## PySpark with Nebula Spark Connector + +Below is an example of calling nebula-spark-connector jar package in pyspark. + +### Read in PySpark + +Read from NebulaGraph with `metaAddress` of `"metad0:9559"` as a dataframe: + +```python +df = spark.read.format( + "com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "returnCols", "name,age").option( + "metaAddress", "metad0:9559").option( + "partitionNumber", 1).load() +``` + +You may then `show` the dataframe as follow: + +```python +>>> df.show(n=2) ++---------+--------------+---+ +|_vertexId| name|age| ++---------+--------------+---+ +|player105| Danny Green| 31| +|player109|Tiago Splitter| 34| ++---------+--------------+---+ +only showing top 2 rows +``` + +### Write in PySpark + +Let's try a write example, by default, the `writeMode` is `insert` + +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "vidPolicy", "").option( + "vertexField", "_vertexId").option( + "batch", 1).option( + "metaAddress", "metad0:9559").option( + "graphAddress", "graphd1:9669").option( + "passwd", "nebula").option( + "user", "root").save() +``` + +For delete or update write mode, we could(for instance)specify with `writeMode` as `delete` like: +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "vidPolicy", "").option( + "vertexField", "_vertexId").option( + "batch", 1).option( + "metaAddress", "metad0:9559").option( + "graphAddress", "graphd1:9669").option( + "passwd", "nebula").option( + "writeMode", "delete").option( + "user", "root").save() +``` + +### Options in PySpark + +For more options, i.e. delete edge with vertex being deleted, refer to [nebula/connector/NebulaOptions.scala +](https://github.com/vesoft-inc/nebula-spark-connector/blob/master/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala), we could know it's named as `deleteEdge` in option. + +```scala + /** write config */ + val RATE_LIMIT: String = "rateLimit" + val VID_POLICY: String = "vidPolicy" + val SRC_POLICY: String = "srcPolicy" + val DST_POLICY: String = "dstPolicy" + val VERTEX_FIELD = "vertexField" + val SRC_VERTEX_FIELD = "srcVertexField" + val DST_VERTEX_FIELD = "dstVertexField" + val RANK_FIELD = "rankFiled" + val BATCH: String = "batch" + val VID_AS_PROP: String = "vidAsProp" + val SRC_AS_PROP: String = "srcAsProp" + val DST_AS_PROP: String = "dstAsProp" + val RANK_AS_PROP: String = "rankAsProp" + val WRITE_MODE: String = "writeMode" + val DELETE_EDGE: String = "deleteEdge" +``` + +### Call Nebula Spark Connector in PySpark shell and .py file + +Also, below are examples on how we run above code with pyspark shell or in python code files: + +- Call with PySpark shell: + +```bash +/spark/bin/pyspark --driver-class-path nebula-spark-connector-3.0.0.jar --jars nebula-spark-connector-3.0.0.jar +``` + +- In Python code: + +``` +from pyspark.sql import SparkSession + +spark = SparkSession.builder.config( + "nebula-spark-connector-3.0.0.jar", + "/path_to/nebula-spark-connector-3.0.0.jar").appName( + "nebula-connector").getOrCreate() + +df = spark.read.format( + "com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "returnCols", "name,age").option( + "metaAddress", "metad0:9559").option( + "partitionNumber", 1).load() +``` + ## Version match There are the version correspondence between Nebula Spark Connector and Nebula: diff --git a/README_CN.md b/README_CN.md index 209eadf2..d044134a 100644 --- a/README_CN.md +++ b/README_CN.md @@ -143,6 +143,125 @@ Nebula Spark Connector 2.0/3.0 仅支持 Nebula Graph 2.x/3.x。如果您正在 更多使用示例请参考 [Example](https://github.com/vesoft-inc/nebula-spark-connector/tree/master/example/src/main/scala/com/vesoft/nebula/examples/connector) 。 +## PySpark 中使用 Nebula Spark Connector + +### PySpark 中读取 NebulaGraph 中数据 + +从 `metaAddress` 为 `"metad0:9559"` 的 Nebula Graph 中读取整个 tag 下的数据为一个 dataframe: + +```python +df = spark.read.format( + "com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "returnCols", "name,age").option( + "metaAddress", "metad0:9559").option( + "partitionNumber", 1).load() +``` + +然后可以像这样 `show` 这个 dataframe: + +```python +>>> df.show(n=2) ++---------+--------------+---+ +|_vertexId| name|age| ++---------+--------------+---+ +|player105| Danny Green| 31| +|player109|Tiago Splitter| 34| ++---------+--------------+---+ +only showing top 2 rows +``` + +### PySpark 中写 NebulaGraph 中数据 + +再试一试写入数据的例子,默认不指定的情况下 `writeMode` 是 `insert`: + +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "vidPolicy", "").option( + "vertexField", "_vertexId").option( + "batch", 1).option( + "metaAddress", "metad0:9559").option( + "graphAddress", "graphd1:9669").option( + "passwd", "nebula").option( + "user", "root").save() +``` +如果想指定 `delete` 或者 `update` 的非默认写入模式,增加 `writeMode` 的配置,比如 `delete` 的例子: + +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "vidPolicy", "").option( + "vertexField", "_vertexId").option( + "batch", 1).option( + "metaAddress", "metad0:9559").option( + "graphAddress", "graphd1:9669").option( + "passwd", "nebula").option( + "writeMode", "delete").option( + "user", "root").save() +``` + +### 关于 PySpark 读写的 option + + +对于其他的 option,比如删除点的时候的 `withDeleteEdge` 可以参考 [nebula/connector/NebulaOptions.scala +](https://github.com/vesoft-inc/nebula-spark-connector/blob/master/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala) 的字符串配置定义,我们可以看到它的字符串定义字段是 `deleteEdge` : + +```scala + /** write config */ + val RATE_LIMIT: String = "rateLimit" + val VID_POLICY: String = "vidPolicy" + val SRC_POLICY: String = "srcPolicy" + val DST_POLICY: String = "dstPolicy" + val VERTEX_FIELD = "vertexField" + val SRC_VERTEX_FIELD = "srcVertexField" + val DST_VERTEX_FIELD = "dstVertexField" + val RANK_FIELD = "rankFiled" + val BATCH: String = "batch" + val VID_AS_PROP: String = "vidAsProp" + val SRC_AS_PROP: String = "srcAsProp" + val DST_AS_PROP: String = "dstAsProp" + val RANK_AS_PROP: String = "rankAsProp" + val WRITE_MODE: String = "writeMode" + val DELETE_EDGE: String = "deleteEdge" +``` + +### 如何在 PySpark 中调用 Nebula Spark Connector + +最后,这里给出用 PySpark Shell 和在 Python 代码里调用 Spark Connector 的例子: + +- Call with PySpark shell: + +```bash +/spark/bin/pyspark --driver-class-path nebula-spark-connector-3.0.0.jar --jars nebula-spark-connector-3.0.0.jar +``` + +- In Python code: + +``` +from pyspark.sql import SparkSession + +spark = SparkSession.builder.config( + "nebula-spark-connector-3.0.0.jar", + "/path_to/nebula-spark-connector-3.0.0.jar").appName( + "nebula-connector").getOrCreate() + +df = spark.read.format( + "com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "returnCols", "name,age").option( + "metaAddress", "metad0:9559").option( + "partitionNumber", 1).load() +``` + ## 版本匹配 Nebula Spark Connector 和 Nebula 的版本对应关系如下: diff --git a/example/src/main/scala/com/vesoft/nebula/examples/connector/NebulaSparkWriterExample.scala b/example/src/main/scala/com/vesoft/nebula/examples/connector/NebulaSparkWriterExample.scala index 2fe34482..279935f9 100644 --- a/example/src/main/scala/com/vesoft/nebula/examples/connector/NebulaSparkWriterExample.scala +++ b/example/src/main/scala/com/vesoft/nebula/examples/connector/NebulaSparkWriterExample.scala @@ -205,6 +205,9 @@ object NebulaSparkWriterExample { .withUser("root") .withPasswd("nebula") .withWriteMode(WriteMode.DELETE) + // config deleteEdge true, means delete related edges when delete vertex + // refer https://docs.nebula-graph.com.cn/master/3.ngql-guide/12.vertex-statements/4.delete-vertex/#_1 + .withDeleteEdge(true) .build() df.write.nebula(config, nebulaWriteVertexConfig).writeVertices() } diff --git a/nebula-spark-common/pom.xml b/nebula-spark-common/pom.xml new file mode 100644 index 00000000..e5e5b138 --- /dev/null +++ b/nebula-spark-common/pom.xml @@ -0,0 +1,251 @@ + + + + nebula-spark + com.vesoft + 3.0-SNAPSHOT + + 4.0.0 + + nebula-spark-common + + + + 3.0-SNAPSHOT + 1.8 + 1.8 + 3.2.3 + 4.13.1 + 1.13 + + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + + + org.apache.spark + spark-graphx_${scala.binary.version} + ${spark.version} + + + com.vesoft + client + ${nebula.version} + + + commons-codec + commons-codec + ${codec.version} + + + + org.scalatest + scalatest-funsuite_2.11 + ${scalatest.version} + + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + + + default-deploy + deploy + + true + + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + + + default-deploy + deploy + + deploy + + + ossrh + true + + + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.2.0 + + + attach-sources + + jar + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.2.0 + + + attach-javadocs + package + + jar + + + UTF-8 + UTF-8 + + -source 8 + -Xdoclint:none + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + **/*Test.* + **/*Suite.* + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + + test + + test + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + + + net.alchim31.maven + scala-maven-plugin + 4.4.0 + + + Scaladoc + + doc + + prepare-package + + + -nobootcp + -no-link-warnings + + + + + attach-javadocs + + doc-jar + + + + -nobootcp + -no-link-warnings + + + + + scala-compile-first + + compile + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.7 + + + + prepare-agent + + + + report + test + + report + + + + + + + diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala similarity index 97% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala index c470c54e..9e8a0543 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaConfig.scala @@ -241,12 +241,14 @@ class WriteNebulaVertexConfig(space: String, vidAsProp: Boolean, user: String, passwd: String, - writeMode: String) + writeMode: String, + deleteEdge: Boolean) extends WriteNebulaConfig(space, user, passwd, batch, writeMode) { - def getTagName = tagName - def getVidField = vidField - def getVidPolicy = if (vidPolicy == null) "" else vidPolicy - def getVidAsProp = vidAsProp + def getTagName = tagName + def getVidField = vidField + def getVidPolicy = if (vidPolicy == null) "" else vidPolicy + def getVidAsProp = vidAsProp + def getDeleteEdge = deleteEdge } /** @@ -270,6 +272,9 @@ object WriteNebulaVertexConfig { /** whether set vid as property */ var vidAsProp: Boolean = false + /** whether delete the related edges of vertex */ + var deleteEdge: Boolean = false + /** * set space name */ @@ -343,6 +348,14 @@ object WriteNebulaVertexConfig { this } + /** + * set whether delete related edges when delete vertex + */ + def withDeleteEdge(deleteEdge: Boolean): WriteVertexConfigBuilder = { + this.deleteEdge = deleteEdge + this + } + /** * check and get WriteNebulaVertexConfig */ @@ -356,7 +369,8 @@ object WriteNebulaVertexConfig { vidAsProp, user, passwd, - writeMode) + writeMode, + deleteEdge) } private def check(): Unit = { @@ -388,7 +402,7 @@ object WriteNebulaVertexConfig { } LOG.info( s"NebulaWriteVertexConfig={space=$space,tagName=$tagName,vidField=$vidField," + - s"vidPolicy=$vidPolicy,batch=$batch,writeMode=$writeMode}") + s"vidPolicy=$vidPolicy,batch=$batch,writeMode=$writeMode,deleteEdge=$deleteEdge}") } } diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala similarity index 98% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala index 11051a68..47f77486 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala @@ -8,7 +8,6 @@ package com.vesoft.nebula.connector import java.util.Properties import com.google.common.net.HostAndPort -import com.vesoft.nebula.connector.connector.Address import com.vesoft.nebula.connector.ssl.{CASSLSignParams, SSLSignType, SelfSSLSignParams} import org.apache.commons.lang.StringUtils import org.apache.spark.internal.Logging @@ -131,6 +130,7 @@ class NebulaOptions(@transient val parameters: CaseInsensitiveMap[String])( var dstAsProp: Boolean = _ var rankAsProp: Boolean = _ var writeMode: WriteMode.Value = _ + var deleteEdge: Boolean = _ if (operaType == OperaType.WRITE) { require(parameters.isDefinedAt(GRAPH_ADDRESS), @@ -166,6 +166,7 @@ class NebulaOptions(@transient val parameters: CaseInsensitiveMap[String])( rankAsProp = parameters.getOrElse(RANK_AS_PROP, false).toString.toBoolean writeMode = WriteMode.withName(parameters.getOrElse(WRITE_MODE, DEFAULT_WRITE_MODE).toString.toLowerCase) + deleteEdge = parameters.getOrElse(DELETE_EDGE, false).toString.toBoolean } def getReturnCols: List[String] = { @@ -249,6 +250,7 @@ object NebulaOptions { val DST_AS_PROP: String = "dstAsProp" val RANK_AS_PROP: String = "rankAsProp" val WRITE_MODE: String = "writeMode" + val DELETE_EDGE: String = "deleteEdge" val DEFAULT_TIMEOUT: Int = 3000 val DEFAULT_CONNECTION_TIMEOUT: Int = 3000 diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/Template.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/Template.scala similarity index 75% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/Template.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/Template.scala index b23213b4..748612df 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/Template.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/Template.scala @@ -19,7 +19,8 @@ object NebulaTemplate { private[connector] val UPDATE_EDGE_TEMPLATE = "UPDATE %s ON `%s` %s->%s@%d SET %s" private[connector] val UPDATE_VALUE_TEMPLATE = "`%s`=%s" - private[connector] val DELETE_VERTEX_TEMPLATE = "DELETE VERTEX %s" - private[connector] val DELETE_EDGE_TEMPLATE = "DELETE EDGE `%s` %s" - private[connector] val EDGE_ENDPOINT_TEMPLATE = "%s->%s@%d" + private[connector] val DELETE_VERTEX_TEMPLATE = "DELETE VERTEX %s" + private[connector] val DELETE_VERTEX_WITH_EDGE_TEMPLATE = "DELETE VERTEX %s WITH EDGE" + private[connector] val DELETE_EDGE_TEMPLATE = "DELETE EDGE `%s` %s" + private[connector] val EDGE_ENDPOINT_TEMPLATE = "%s->%s@%d" } diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala similarity index 95% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala index 614de134..1a61bd38 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala @@ -13,7 +13,7 @@ import com.vesoft.nebula.client.graph.data.{ SelfSignedSSLParam } import com.vesoft.nebula.client.graph.net.{NebulaPool, Session} -import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.Address import com.vesoft.nebula.connector.exception.GraphConnectException import com.vesoft.nebula.connector.ssl.{CASSLSignParams, SSLSignType, SelfSSLSignParams} import org.apache.log4j.Logger @@ -32,7 +32,7 @@ class GraphProvider(addresses: List[Address], selfSignParam: SelfSSLSignParams = null) extends AutoCloseable with Serializable { - private[this] lazy val LOG = Logger.getLogger(this.getClass) + @transient private[this] lazy val LOG = Logger.getLogger(this.getClass) @transient val nebulaPoolConfig = new NebulaPoolConfig @@ -62,7 +62,7 @@ class GraphProvider(addresses: List[Address], } pool.init(address.asJava, nebulaPoolConfig) - var session: Session = null + var session: Session = _ /** * release session diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala similarity index 93% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala index cde1108a..0f2e2f39 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala @@ -13,8 +13,7 @@ import com.vesoft.nebula.client.graph.data.{ SelfSignedSSLParam } import com.vesoft.nebula.client.meta.MetaClient -import com.vesoft.nebula.connector.connector.Address -import com.vesoft.nebula.connector.DataTypeEnum +import com.vesoft.nebula.connector.{Address, DataTypeEnum} import com.vesoft.nebula.connector.ssl.{CASSLSignParams, SSLSignType, SelfSSLSignParams} import com.vesoft.nebula.meta.Schema @@ -29,11 +28,12 @@ class MetaProvider(addresses: List[Address], sslSignType: String = null, caSignParam: CASSLSignParams, selfSignParam: SelfSSLSignParams) - extends AutoCloseable { + extends AutoCloseable + with Serializable { - val metaAddress = addresses.map(address => new HostAddress(address._1, address._2)).asJava - var client: MetaClient = null - var sslParam: SSLParam = null + val metaAddress = addresses.map(address => new HostAddress(address._1, address._2)).asJava + @transient var client: MetaClient = null + @transient var sslParam: SSLParam = null if (enableSSL) { SSLSignType.withName(sslSignType) match { case SSLSignType.CA => diff --git a/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/package.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/package.scala new file mode 100644 index 00000000..c9137c02 --- /dev/null +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/package.scala @@ -0,0 +1,67 @@ +package com.vesoft.nebula + +import com.vesoft.nebula.connector.writer.NebulaExecutor + +package object connector { + + type Address = (String, Int) + type NebulaType = Int + type Prop = List[Any] + type PropertyNames = List[String] + type PropertyValues = List[Any] + + type VertexID = Long + type VertexIDSlice = String + type NebulaGraphxVertex = (VertexID, PropertyValues) + type NebulaGraphxEdge = org.apache.spark.graphx.Edge[(EdgeRank, Prop)] + type EdgeRank = Long + + case class NebulaVertex(vertexIDSlice: VertexIDSlice, values: PropertyValues) { + def propertyValues = values.mkString(", ") + + override def toString: String = { + s"Vertex ID: ${vertexIDSlice}, Values: ${values.mkString(", ")}" + } + } + + case class NebulaVertices(propNames: PropertyNames, + values: List[NebulaVertex], + policy: Option[KeyPolicy.Value]) { + + def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") + + override def toString: String = { + s"Vertices: " + + s"Property Names: ${propNames.mkString(", ")}" + + s"Vertex Values: ${values.mkString(", ")} " + + s"with policy: ${policy}" + } + } + + case class NebulaEdge(source: VertexIDSlice, + target: VertexIDSlice, + rank: Option[EdgeRank], + values: PropertyValues) { + def propertyValues: String = values.mkString(", ") + + override def toString: String = { + s"Edge: ${source}->${target}@${rank} values: ${propertyValues}" + } + } + + case class NebulaEdges(propNames: PropertyNames, + values: List[NebulaEdge], + sourcePolicy: Option[KeyPolicy.Value], + targetPolicy: Option[KeyPolicy.Value]) { + def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") + def getSourcePolicy = sourcePolicy + def getTargetPolicy = targetPolicy + + override def toString: String = { + "Edges:" + + s" Property Names: ${propNames.mkString(", ")}" + + s" with source policy ${sourcePolicy}" + + s" with target policy ${targetPolicy}" + } + } +} diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/ssl/SSLEnum.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/ssl/SSLEnum.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/ssl/SSLEnum.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/ssl/SSLEnum.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/ssl/SSLSignParams.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/ssl/SSLSignParams.scala similarity index 100% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/ssl/SSLSignParams.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/ssl/SSLSignParams.scala diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala similarity index 93% rename from nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala rename to nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala index cbcb94c5..5d3ecdf0 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala +++ b/nebula-spark-common/src/main/scala/com/vesoft/nebula/connector/writer/NebulaExecutor.scala @@ -10,6 +10,7 @@ import com.vesoft.nebula.connector.NebulaTemplate.{ BATCH_INSERT_TEMPLATE, DELETE_EDGE_TEMPLATE, DELETE_VERTEX_TEMPLATE, + DELETE_VERTEX_WITH_EDGE_TEMPLATE, EDGE_ENDPOINT_TEMPLATE, EDGE_VALUE_TEMPLATE, EDGE_VALUE_WITHOUT_RANKING_TEMPLATE, @@ -20,16 +21,16 @@ import com.vesoft.nebula.connector.NebulaTemplate.{ VERTEX_VALUE_TEMPLATE, VERTEX_VALUE_TEMPLATE_WITH_POLICY } -import com.vesoft.nebula.connector.connector.{ +import com.vesoft.nebula.connector.{ + DataTypeEnum, EdgeRank, - NebulaEdge, + KeyPolicy, NebulaEdges, - NebulaVertex, + NebulaUtils, NebulaVertices, PropertyNames, PropertyValues } -import com.vesoft.nebula.connector.{DataTypeEnum, KeyPolicy, NebulaUtils} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.StructType @@ -365,26 +366,31 @@ object NebulaExecutor { /** * construct delete statement for vertex */ - def toDeleteExecuteStatement(vertices: NebulaVertices): String = { - DELETE_VERTEX_TEMPLATE.format( - vertices.values - .map { value => - vertices.policy match { - case Some(KeyPolicy.HASH) => - ENDPOINT_TEMPLATE.format(KeyPolicy.HASH.toString, value.vertexIDSlice) + def toDeleteExecuteStatement(vertices: NebulaVertices, deleteEdge: Boolean): String = { + if (deleteEdge) + DELETE_VERTEX_WITH_EDGE_TEMPLATE.format(genDeleteVertexInfo(vertices)) + else + DELETE_VERTEX_TEMPLATE.format(genDeleteVertexInfo(vertices)) + } - case Some(KeyPolicy.UUID) => - ENDPOINT_TEMPLATE.format(KeyPolicy.UUID.toString, value.vertexIDSlice) + private def genDeleteVertexInfo(vertices: NebulaVertices): String = { + vertices.values + .map { value => + vertices.policy match { + case Some(KeyPolicy.HASH) => + ENDPOINT_TEMPLATE.format(KeyPolicy.HASH.toString, value.vertexIDSlice) - case None => - value.vertexIDSlice - case _ => - throw new IllegalArgumentException( - s"vertex policy ${vertices.policy.get} is not supported") - } + case Some(KeyPolicy.UUID) => + ENDPOINT_TEMPLATE.format(KeyPolicy.UUID.toString, value.vertexIDSlice) + + case None => + value.vertexIDSlice + case _ => + throw new IllegalArgumentException( + s"vertex policy ${vertices.policy.get} is not supported") } - .mkString(",") - ) + } + .mkString(",") } /** diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/DataTypeEnumSuite.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/DataTypeEnumSuite.scala similarity index 100% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/DataTypeEnumSuite.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/DataTypeEnumSuite.scala diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala similarity index 100% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/NebulaUtilsSuite.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/NebulaUtilsSuite.scala similarity index 100% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/NebulaUtilsSuite.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/NebulaUtilsSuite.scala diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/PartitionUtilsSuite.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/PartitionUtilsSuite.scala similarity index 100% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/PartitionUtilsSuite.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/PartitionUtilsSuite.scala diff --git a/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala new file mode 100644 index 00000000..b8f0e72e --- /dev/null +++ b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala @@ -0,0 +1,192 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.mock + +import com.vesoft.nebula.client.graph.NebulaPoolConfig +import com.vesoft.nebula.client.graph.data.HostAddress +import com.vesoft.nebula.client.graph.net.NebulaPool +import org.apache.log4j.Logger + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer + +class NebulaGraphMock { + private[this] val LOG = Logger.getLogger(this.getClass) + + @transient val nebulaPoolConfig = new NebulaPoolConfig + @transient val pool: NebulaPool = new NebulaPool + val address = new ListBuffer[HostAddress]() + address.append(new HostAddress("127.0.0.1", 9669)) + + val randAddr = scala.util.Random.shuffle(address) + pool.init(randAddr.asJava, nebulaPoolConfig) + + def mockStringIdGraph(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_string(partition_num=10,vid_type=fixed_string(8));" + + "USE test_string;" + "CREATE TAG IF NOT EXISTS person(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE EDGE IF NOT EXISTS friend(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE TAG IF NOT EXISTS geo_shape(geo geography);" + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create string type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + + Thread.sleep(10000) + val insertTag = + "INSERT VERTEX person(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " \"1\":(\"person1\", \"person1\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"2\":(\"person2\", \"person2\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"3\":(\"person3\", \"person3\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"4\":(\"person4\", \"person4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"5\":(\"person5\", \"person5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"6\":(\"person6\", \"person6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"7\":(\"person7\", \"person7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"8\":(\"person8\", \"person8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"9\":(\"person9\", \"person9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"10\":(\"person10\", \"person10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"11\":(\"person11\", \"person11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"12\":(\"person12\", \"person11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-1\":(\"person00\", \"person00\", 23, 1400, 13000, 1388888, date(\"2021-01-13\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-2\":(\"person01\", \"person01\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-3\":(\"person02\", \"person02\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"19\":(\"person19\", \"person22\", 25, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"22\":(\"person22\", \"person22\", 26, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"));" + + "INSERT VERTEX geo_shape(geo) VALUES \"100\":(ST_GeogFromText(\"POINT(1 2)\")), \"101\":(ST_GeogFromText(\"LINESTRING(1 2, 3 4)\")), \"102\":(ST_GeogFromText(\"POLYGON((0 1, 1 2, 2 3, 0 1))\"))" + val insertTagResp = session.execute(insertTag) + if (!insertTagResp.isSucceeded) { + close() + LOG.error("insert vertex for string type space failed," + insertTagResp.getErrorMessage) + sys.exit(-1) + } + + val insertEdge = "INSERT EDGE friend(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " \"1\" -> \"2\":(\"friend1\", \"friend2\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"2\" -> \"3\":(\"friend2\", \"friend3\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"3\" -> \"4\":(\"friend3\", \"friend4\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"4\" -> \"5\":(\"friend4\", \"friend4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"5\" -> \"6\":(\"friend5\", \"friend5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"6\" -> \"7\":(\"friend6\", \"friend6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"7\" -> \"8\":(\"friend7\", \"friend7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"8\" -> \"9\":(\"friend8\", \"friend8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"9\" -> \"10\":(\"friend9\", \"friend9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"10\" -> \"11\":(\"friend10\", \"friend10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"11\" -> \"12\":(\"friend11\", \"friend11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"12\" -> \"1\":(\"friend12\", \"friend11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-1\" -> \"11\":(\"friend13\", \"friend12\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-2\" -> \"-1\":(\"friend14\", \"friend13\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))" + val insertEdgeResp = session.execute(insertEdge) + if (!insertEdgeResp.isSucceeded) { + close() + LOG.error("insert edge for string type space failed," + insertEdgeResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockIntIdGraph(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_int(partition_num=10, vid_type=int64);" + + "USE test_int;" + "CREATE TAG IF NOT EXISTS person(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE EDGE IF NOT EXISTS friend(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE TAG IF NOT EXISTS geo_shape(geo geography);" + + "CREATE TAG IF NOT EXISTS tag_duration(col duration);" + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create int type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + + Thread.sleep(10000) + val insertTag = + "INSERT VERTEX person(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " 1:(\"person1\", \"person1\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 2:(\"person2\", \"person2\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 3:(\"person3\", \"person3\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 4:(\"person4\", \"person4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 5:(\"person5\", \"person5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 6:(\"person6\", \"person6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 7:(\"person7\", \"person7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 8:(\"person8\", \"person8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 9:(\"person9\", \"person9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 10:(\"person10\", \"person10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 11:(\"person11\", \"person11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 12:(\"person12\", \"person11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -1:(\"person00\", \"person00\", 23, 1400, 13000, 1388888, date(\"2021-01-13\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -2:(\"person01\", \"person01\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -3:(\"person02\", \"person02\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 19:(\"person19\", \"person22\", 25, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 22:(\"person22\", \"person22\", 26, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\")), " + + " 0:(null, null, null, null, null, null, null, null, null, null, null, null, null);" + + "INSERT VERTEX geo_shape(geo) VALUES 100:(ST_GeogFromText(\"POINT(1 2)\")), 101:(ST_GeogFromText(\"LINESTRING(1 2, 3 4)\")), 102:(ST_GeogFromText(\"POLYGON((0 1, 1 2, 2 3, 0 1))\"));" + + "INSERT VERTEX tag_duration(col) VALUES 200:(duration({months:1, seconds:100, microseconds:20}))" + + val insertTagResp = session.execute(insertTag) + if (!insertTagResp.isSucceeded) { + close() + LOG.error("insert vertex for int type space failed," + insertTagResp.getErrorMessage) + sys.exit(-1) + } + + val insertEdge = "INSERT EDGE friend(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " 1 -> 2:(\"friend1\", \"friend2\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 2 -> 3:(\"friend2\", \"friend3\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 3 -> 4:(\"friend3\", \"friend4\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 4 -> 5:(\"friend4\", \"friend4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 5 -> 6:(\"friend5\", \"friend5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 6 -> 7:(\"friend6\", \"friend6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 7 -> 8:(\"friend7\", \"friend7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 8 -> 9:(\"friend8\", \"friend8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 9 -> 10:(\"friend9\", \"friend9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 10 -> 11:(\"friend10\", \"friend10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 11 -> 12:(\"friend11\", \"friend11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 12 -> 1:(\"friend12\", \"friend11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))" + val insertEdgeResp = session.execute(insertEdge) + if (!insertEdgeResp.isSucceeded) { + close() + LOG.error("insert edge for int type space failed," + insertEdgeResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockStringIdGraphSchema(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_write_string(partition_num=10,vid_type=fixed_string(8));" + + "USE test_write_string;" + + "CREATE TAG IF NOT EXISTS person_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography, col15 duration);" + + "CREATE EDGE IF NOT EXISTS friend_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography);"; + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create string type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockIntIdGraphSchema(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_write_int(partition_num=10, vid_type=int64);" + + "USE test_write_int;" + + "CREATE TAG IF NOT EXISTS person_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography, col15 duration);" + + "CREATE EDGE IF NOT EXISTS friend_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography);"; + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create int type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + } + + def close(): Unit = { + pool.close() + } +} diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala similarity index 95% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala index 67fe0cbb..3ee5642d 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala +++ b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/GraphProviderTest.scala @@ -5,7 +5,7 @@ package com.vesoft.nebula.connector.nebula -import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.Address import com.vesoft.nebula.connector.mock.NebulaGraphMock import org.apache.log4j.BasicConfigurator import org.scalatest.BeforeAndAfterAll diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala similarity index 97% rename from nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala rename to nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala index 4e40d713..c8b1dc5f 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala +++ b/nebula-spark-common/src/test/scala/com/vesoft/nebula/connector/nebula/MetaProviderTest.scala @@ -6,9 +6,8 @@ package com.vesoft.nebula.connector.nebula import com.vesoft.nebula.PropertyType -import com.vesoft.nebula.connector.DataTypeEnum -import com.vesoft.nebula.connector.connector.Address import com.vesoft.nebula.connector.mock.NebulaGraphMock +import com.vesoft.nebula.connector.{Address, DataTypeEnum} import com.vesoft.nebula.meta.Schema import org.apache.log4j.BasicConfigurator import org.scalatest.BeforeAndAfterAll diff --git a/nebula-spark-connector/pom.xml b/nebula-spark-connector/pom.xml index ace6b2e6..60286725 100644 --- a/nebula-spark-connector/pom.xml +++ b/nebula-spark-connector/pom.xml @@ -13,46 +13,32 @@ nebula-spark-connector - 2.4.4 - 3.0-SNAPSHOT + 2.4.4 1.8 1.8 - 3.2.3 - 4.13.1 - 1.13 org.apache.spark spark-core_2.11 - ${spark.version} + ${spark2.4.version} org.apache.spark spark-sql_2.11 - ${spark.version} + ${spark2.4.version} org.apache.spark spark-graphx_2.11 - ${spark.version} + ${spark2.4.version} + com.vesoft - client - ${nebula.version} - - - commons-codec - commons-codec - ${codec.version} - - - - org.scalatest - scalatest-funsuite_2.11 - ${scalatest.version} + nebula-spark-common + ${project.version} @@ -98,6 +84,7 @@ org.apache.maven.plugins maven-compiler-plugin + 3.1 ${compiler.source.version} ${compiler.target.version} @@ -245,6 +232,7 @@ org.apache.maven.plugins maven-surefire-plugin + 2.12.4 **/*Test.* diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/package.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/package.scala index 59482660..240a7644 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/package.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/package.scala @@ -6,10 +6,7 @@ package com.vesoft.nebula.connector import com.vesoft.nebula.connector.ssl.SSLSignType -import com.vesoft.nebula.connector.writer.NebulaExecutor -import org.apache.commons.codec.digest.MurmurHash2 import org.apache.spark.rdd.RDD -import org.apache.spark.sql.types.LongType import org.apache.spark.sql.{ DataFrame, DataFrameReader, @@ -24,67 +21,6 @@ import scala.collection.mutable.ListBuffer package object connector { - type Address = (String, Int) - type NebulaType = Int - type Prop = List[Any] - type PropertyNames = List[String] - type PropertyValues = List[Any] - - type VertexID = Long - type VertexIDSlice = String - type NebulaGraphxVertex = (VertexID, PropertyValues) - type NebulaGraphxEdge = org.apache.spark.graphx.Edge[(EdgeRank, Prop)] - type EdgeRank = Long - - case class NebulaVertex(vertexIDSlice: VertexIDSlice, values: PropertyValues) { - def propertyValues = values.mkString(", ") - - override def toString: String = { - s"Vertex ID: ${vertexIDSlice}, Values: ${values.mkString(", ")}" - } - } - - case class NebulaVertices(propNames: PropertyNames, - values: List[NebulaVertex], - policy: Option[KeyPolicy.Value]) { - - def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") - - override def toString: String = { - s"Vertices: " + - s"Property Names: ${propNames.mkString(", ")}" + - s"Vertex Values: ${values.mkString(", ")} " + - s"with policy: ${policy}" - } - } - - case class NebulaEdge(source: VertexIDSlice, - target: VertexIDSlice, - rank: Option[EdgeRank], - values: PropertyValues) { - def propertyValues: String = values.mkString(", ") - - override def toString: String = { - s"Edge: ${source}->${target}@${rank} values: ${propertyValues}" - } - } - - case class NebulaEdges(propNames: PropertyNames, - values: List[NebulaEdge], - sourcePolicy: Option[KeyPolicy.Value], - targetPolicy: Option[KeyPolicy.Value]) { - def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") - def getSourcePolicy = sourcePolicy - def getTargetPolicy = targetPolicy - - override def toString: String = { - "Edges:" + - s" Property Names: ${propNames.mkString(", ")}" + - s" with source policy ${sourcePolicy}" + - s" with target policy ${targetPolicy}" - } - } - /** * spark reader for nebula graph */ @@ -266,6 +202,7 @@ package object connector { .option(NebulaOptions.BATCH, writeConfig.getBatch) .option(NebulaOptions.VID_AS_PROP, writeConfig.getVidAsProp) .option(NebulaOptions.WRITE_MODE, writeConfig.getWriteMode) + .option(NebulaOptions.DELETE_EDGE, writeConfig.getDeleteEdge) .option(NebulaOptions.META_ADDRESS, connectionConfig.getMetaAddress) .option(NebulaOptions.GRAPH_ADDRESS, connectionConfig.getGraphAddress) .option(NebulaOptions.TIMEOUT, connectionConfig.getTimeout) diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartitionReader.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartitionReader.scala index c96ea39d..e72f3460 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartitionReader.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartitionReader.scala @@ -13,10 +13,10 @@ import com.vesoft.nebula.client.graph.data.{ ValueWrapper } import com.vesoft.nebula.client.storage.StorageClient -import com.vesoft.nebula.client.storage.data.{BaseTableRow, VertexTableRow} +import com.vesoft.nebula.client.storage.data.BaseTableRow import com.vesoft.nebula.connector.NebulaUtils.NebulaValueGetter -import com.vesoft.nebula.connector.exception.GraphConnectException import com.vesoft.nebula.connector.{NebulaOptions, NebulaUtils, PartitionUtils} +import com.vesoft.nebula.connector.exception.GraphConnectException import com.vesoft.nebula.connector.nebula.MetaProvider import com.vesoft.nebula.connector.ssl.SSLSignType import org.apache.spark.sql.catalyst.InternalRow @@ -101,6 +101,7 @@ abstract class NebulaPartitionReader extends InputPartitionReader[InternalRow] { // allocate scanPart to this partition val totalPart = metaProvider.getPartitionNumber(nebulaOptions.spaceName) + // index starts with 1 val scanParts = PartitionUtils.getScanParts(index, totalPart, nebulaOptions.partitionNums.toInt) LOG.info(s"partition index: ${index}, scanParts: ${scanParts.toString}") scanPartIterator = scanParts.iterator diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaSourceReader.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaSourceReader.scala index f6da55e4..ff2a43f2 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaSourceReader.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaSourceReader.scala @@ -7,8 +7,8 @@ package com.vesoft.nebula.connector.reader import java.util -import com.vesoft.nebula.connector.{DataTypeEnum, NebulaOptions, NebulaUtils} import com.vesoft.nebula.connector.nebula.MetaProvider +import com.vesoft.nebula.connector.{DataTypeEnum, NebulaOptions, NebulaUtils} import com.vesoft.nebula.meta.ColumnDef import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.sources.v2.reader.{DataSourceReader, InputPartition} diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala index ece22c80..9b04b229 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala @@ -5,7 +5,7 @@ package com.vesoft.nebula.connector.writer -import com.vesoft.nebula.connector.connector.{NebulaEdge, NebulaEdges} +import com.vesoft.nebula.connector.{NebulaEdge, NebulaEdges} import com.vesoft.nebula.connector.{KeyPolicy, NebulaOptions, WriteMode} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage} @@ -100,6 +100,7 @@ class NebulaEdgeWriter(nebulaOptions: NebulaOptions, execute() } graphProvider.close() + metaProvider.close() NebulaCommitMessage.apply(failedExecs.toList) } diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala index 03c04cbe..8d418af1 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala @@ -5,8 +5,13 @@ package com.vesoft.nebula.connector.writer -import com.vesoft.nebula.connector.connector.{NebulaVertex, NebulaVertices} -import com.vesoft.nebula.connector.{KeyPolicy, NebulaOptions, WriteMode} +import com.vesoft.nebula.connector.{ + KeyPolicy, + NebulaOptions, + NebulaVertex, + NebulaVertices, + WriteMode +} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage} import org.apache.spark.sql.types.StructType @@ -65,7 +70,8 @@ class NebulaVertexWriter(nebulaOptions: NebulaOptions, vertexIndex: Int, schema: case WriteMode.INSERT => NebulaExecutor.toExecuteSentence(nebulaOptions.label, nebulaVertices) case WriteMode.UPDATE => NebulaExecutor.toUpdateExecuteStatement(nebulaOptions.label, nebulaVertices) - case WriteMode.DELETE => NebulaExecutor.toDeleteExecuteStatement(nebulaVertices) + case WriteMode.DELETE => + NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, nebulaOptions.deleteEdge) case _ => throw new IllegalArgumentException(s"write mode ${nebulaOptions.writeMode} not supported.") } @@ -78,6 +84,7 @@ class NebulaVertexWriter(nebulaOptions: NebulaOptions, vertexIndex: Int, schema: execute() } graphProvider.close() + metaProvider.close() NebulaCommitMessage(failedExecs.toList) } diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala index 43da472e..a8eec279 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala +++ b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala @@ -6,13 +6,13 @@ package com.vesoft.nebula.connector.mock import com.facebook.thrift.protocol.TCompactProtocol -import com.vesoft.nebula.connector.connector.NebulaDataFrameWriter import com.vesoft.nebula.connector.{ NebulaConnectionConfig, WriteMode, WriteNebulaEdgeConfig, WriteNebulaVertexConfig } +import com.vesoft.nebula.connector.connector.NebulaDataFrameWriter import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession @@ -176,4 +176,44 @@ object SparkMock { spark.stop() } + /** + * write nebula vertex with delete_with_edge mode + */ + def deleteVertexWithEdge(): Unit = { + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + val spark = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + + val df = spark.read + .option("header", true) + .csv("src/test/resources/vertex.csv") + + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withGraphAddress("127.0.0.1:9669") + .withConenctionRetry(2) + .build() + val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig + .builder() + .withSpace("test_write_string") + .withTag("person_connector") + .withVidField("id") + .withVidAsProp(false) + .withWriteMode(WriteMode.DELETE) + .withDeleteEdge(true) + .withBatch(5) + .build() + df.write.nebula(config, nebulaWriteVertexConfig).writeVertices() + + spark.stop() + } + } diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/NebulaExecutorSuite.scala b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/NebulaExecutorSuite.scala index 58c725e7..7a95f623 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/NebulaExecutorSuite.scala +++ b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/NebulaExecutorSuite.scala @@ -6,7 +6,7 @@ package com.vesoft.nebula.connector.writer import com.vesoft.nebula.connector.KeyPolicy -import com.vesoft.nebula.connector.connector.{NebulaEdge, NebulaEdges, NebulaVertex, NebulaVertices} +import com.vesoft.nebula.connector.{NebulaEdge, NebulaEdges, NebulaVertex, NebulaVertices} import org.apache.log4j.BasicConfigurator import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow @@ -335,9 +335,13 @@ class NebulaExecutorSuite extends AnyFunSuite with BeforeAndAfterAll { vertices.append(NebulaVertex("\"vid2\"", List())) val nebulaVertices = NebulaVertices(List(), vertices.toList, None) - val vertexStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices) + val vertexStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, false) val expectVertexDeleteStatement = "DELETE VERTEX \"vid1\",\"vid2\"" assert(expectVertexDeleteStatement.equals(vertexStatement)) + + val vertexWithEdgeStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, true) + val expectVertexWithEdgeDeleteStatement = "DELETE VERTEX \"vid1\",\"vid2\" WITH EDGE" + assert(expectVertexWithEdgeDeleteStatement.equals(vertexWithEdgeStatement)) } test("test toDeleteExecuteStatement for vertex with HASH policy") { @@ -346,9 +350,14 @@ class NebulaExecutorSuite extends AnyFunSuite with BeforeAndAfterAll { vertices.append(NebulaVertex("vid2", List())) val nebulaVertices = NebulaVertices(List(), vertices.toList, Some(KeyPolicy.HASH)) - val vertexStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices) + val vertexStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, false) val expectVertexDeleteStatement = "DELETE VERTEX hash(\"vid1\"),hash(\"vid2\")" assert(expectVertexDeleteStatement.equals(vertexStatement)) + + val vertexWithEdgeStatement = NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, true) + val expectVertexWithEdgeDeleteStatement = + "DELETE VERTEX hash(\"vid1\"),hash(\"vid2\") WITH EDGE" + assert(expectVertexWithEdgeDeleteStatement.equals(vertexWithEdgeStatement)) } test("test toDeleteExecuteStatement for edge") { diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala index e63fbf8c..5ebf52f6 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala +++ b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala @@ -6,7 +6,7 @@ package com.vesoft.nebula.connector.writer import com.vesoft.nebula.client.graph.data.ResultSet -import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.Address import com.vesoft.nebula.connector.mock.{NebulaGraphMock, SparkMock} import com.vesoft.nebula.connector.nebula.GraphProvider import org.apache.log4j.BasicConfigurator @@ -22,6 +22,7 @@ class WriteDeleteSuite extends AnyFunSuite with BeforeAndAfterAll { graphMock.mockIntIdGraphSchema() graphMock.close() SparkMock.writeVertex() + SparkMock.writeEdge() } test("write vertex into test_write_string space with delete mode") { @@ -36,6 +37,28 @@ class WriteDeleteSuite extends AnyFunSuite with BeforeAndAfterAll { assert(resultSet.isEmpty) } + test("write vertex into test_write_with_edge_string space with delete with edge mode") { + SparkMock.writeVertex() + SparkMock.writeEdge() + SparkMock.deleteVertexWithEdge() + val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) + val graphProvider = new GraphProvider(addresses, 3000) + + graphProvider.switchSpace("root", "nebula", "test_write_string") + // assert vertex is deleted + val vertexResultSet: ResultSet = + graphProvider.submit("use test_write_string;match (v:person_connector) return v;") + assert(vertexResultSet.getColumnNames.size() == 0) + assert(vertexResultSet.isEmpty) + + // assert edge is deleted + val edgeResultSet: ResultSet = + graphProvider.submit("use test_write_string;fetch prop on friend_connector 1->2@10") + assert(edgeResultSet.getColumnNames.size() == 0) + assert(edgeResultSet.isEmpty) + + } + test("write edge into test_write_string space with delete mode") { SparkMock.deleteEdge() val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) diff --git a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala index 5b9bf051..73b90925 100644 --- a/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala +++ b/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala @@ -6,7 +6,7 @@ package com.vesoft.nebula.connector.writer import com.vesoft.nebula.client.graph.data.ResultSet -import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.Address import com.vesoft.nebula.connector.mock.{NebulaGraphMock, SparkMock} import com.vesoft.nebula.connector.nebula.GraphProvider import org.apache.log4j.BasicConfigurator diff --git a/nebula-spark-connector_2.2/.gitignore b/nebula-spark-connector_2.2/.gitignore new file mode 100644 index 00000000..84e7a6bc --- /dev/null +++ b/nebula-spark-connector_2.2/.gitignore @@ -0,0 +1,36 @@ +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* + +# build target +target/ + +# IDE +.idea/ +.eclipse/ +*.iml + +spark-importer.ipr +spark-importer.iws + +.DS_Store diff --git a/nebula-spark-connector_2.2/pom.xml b/nebula-spark-connector_2.2/pom.xml new file mode 100644 index 00000000..c47c2a5d --- /dev/null +++ b/nebula-spark-connector_2.2/pom.xml @@ -0,0 +1,306 @@ + + + + nebula-spark + com.vesoft + 3.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + nebula-spark-connector_2.2 + + + 2.2.0 + 1.8 + 1.8 + + + + + org.apache.spark + spark-core_2.11 + ${spark2.2.version} + + + org.apache.spark + spark-sql_2.11 + ${spark2.2.version} + + + org.apache.spark + spark-graphx_2.11 + ${spark2.2.version} + + + com.vesoft + nebula-spark-common + ${project.version} + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + + + default-deploy + deploy + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.8 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.2.0 + + + + test-jar + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${compiler.source.version} + ${compiler.target.version} + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.1 + + + package + + shade + + + false + + + org.apache.spark:* + org.apache.hadoop:* + org.apache.hive:* + log4j:log4j + org.apache.orc:* + xml-apis:xml-apis + javax.inject:javax.inject + org.spark-project.hive:hive-exec + stax:stax-api + org.glassfish.hk2.external:aopalliance-repackaged + + + + + + *:* + + com/vesoft/tools/** + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + 2.11.12 + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + com/vesoft/tools/** + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + com/vesoft/tools/** + + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.2.0 + + + attach-sources + + jar + + + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.2 + + + + compile + testCompile + + + + Scaladoc + + doc + + prepare-package + + + -nobootcp + -no-link-warnings + + + + + attach-javadocs + + doc-jar + + + + -nobootcp + -no-link-warnings + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/*Test.* + **/*Suite.* + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + + test + + test + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.2.0 + + com.facebook.thrift:com.facebook.thrift.* + + + + + attach-javadocs + package + + jar + + + UTF-8 + UTF-8 + + -source 8 + -Xdoclint:none + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.7 + + + + prepare-agent + + + + report + test + + report + + + + + + + + + snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/NebulaDataSource.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/NebulaDataSource.scala new file mode 100644 index 00000000..8d978501 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/NebulaDataSource.scala @@ -0,0 +1,163 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector + +import com.vesoft.nebula.connector.exception.IllegalOptionException +import com.vesoft.nebula.connector.reader.NebulaRelation +import com.vesoft.nebula.connector.writer.{ + NebulaCommitMessage, + NebulaEdgeWriter, + NebulaVertexWriter, + NebulaWriter, + NebulaWriterResultRelation +} +import org.apache.spark.TaskContext +import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode} +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.sources.{ + BaseRelation, + CreatableRelationProvider, + DataSourceRegister, + RelationProvider +} +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory + +class NebulaDataSource + extends RelationProvider + with CreatableRelationProvider + with DataSourceRegister + with Serializable { + private val LOG = LoggerFactory.getLogger(this.getClass) + + /** + * The string that represents the format that nebula data source provider uses. + */ + override def shortName(): String = "nebula" + + /** + * Creates a {@link DataSourceReader} to scan the data from Nebula Graph. + */ + override def createRelation(sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + val nebulaOptions = getNebulaOptions(parameters, OperaType.READ) + + LOG.info("create relation") + LOG.info(s"options ${parameters}") + + NebulaRelation(sqlContext, nebulaOptions) + } + + /** + * Saves a DataFrame to a destination (using data source-specific parameters) + */ + override def createRelation(sqlContext: SQLContext, + mode: SaveMode, + parameters: Map[String, String], + data: DataFrame): BaseRelation = { + + val nebulaOptions = getNebulaOptions(parameters, OperaType.WRITE) + if (mode == SaveMode.Ignore || mode == SaveMode.ErrorIfExists) { + LOG.warn(s"Currently do not support mode") + } + + LOG.info("create writer") + LOG.info(s"options ${parameters}") + + val schema = data.schema + data.foreachPartition(iterator => { + savePartition(nebulaOptions, schema, iterator) + }) + + new NebulaWriterResultRelation(sqlContext, data.schema) + } + + /** + * construct nebula options with DataSourceOptions + */ + def getNebulaOptions(options: Map[String, String], + operateType: OperaType.Value): NebulaOptions = { + val nebulaOptions = new NebulaOptions(CaseInsensitiveMap(options))(operateType) + nebulaOptions + } + + private def savePartition(nebulaOptions: NebulaOptions, + schema: StructType, + iterator: Iterator[Row]): Unit = { + val dataType = nebulaOptions.dataType + val writer: NebulaWriter = { + if (DataTypeEnum.VERTEX == DataTypeEnum.withName(dataType)) { + val vertexFiled = nebulaOptions.vertexField + val vertexIndex: Int = { + var index: Int = -1 + for (i <- schema.fields.indices) { + if (schema.fields(i).name.equals(vertexFiled)) { + index = i + } + } + if (index < 0) { + throw new IllegalOptionException( + s" vertex field ${vertexFiled} does not exist in dataframe") + } + index + } + new NebulaVertexWriter(nebulaOptions, vertexIndex, schema).asInstanceOf[NebulaWriter] + } else { + val srcVertexFiled = nebulaOptions.srcVertexField + val dstVertexField = nebulaOptions.dstVertexField + val rankExist = !nebulaOptions.rankField.isEmpty + val edgeFieldsIndex = { + var srcIndex: Int = -1 + var dstIndex: Int = -1 + var rankIndex: Int = -1 + for (i <- schema.fields.indices) { + if (schema.fields(i).name.equals(srcVertexFiled)) { + srcIndex = i + } + if (schema.fields(i).name.equals(dstVertexField)) { + dstIndex = i + } + if (rankExist) { + if (schema.fields(i).name.equals(nebulaOptions.rankField)) { + rankIndex = i + } + } + } + // check src filed and dst field + if (srcIndex < 0 || dstIndex < 0) { + throw new IllegalOptionException( + s" srcVertex field ${srcVertexFiled} or dstVertex field ${dstVertexField} do not exist in dataframe") + } + // check rank field + if (rankExist && rankIndex < 0) { + throw new IllegalOptionException(s"rank field does not exist in dataframe") + } + + if (!rankExist) { + (srcIndex, dstIndex, Option.empty) + } else { + (srcIndex, dstIndex, Option(rankIndex)) + } + + } + new NebulaEdgeWriter(nebulaOptions, + edgeFieldsIndex._1, + edgeFieldsIndex._2, + edgeFieldsIndex._3, + schema).asInstanceOf[NebulaWriter] + } + } + val message = writer.writeData(iterator) + LOG.debug( + s"spark partition id ${message.partitionId} write failed size: ${message.executeStatements.length}") + if (message.executeStatements.nonEmpty) { + LOG.error(s"failed execs:\n ${message.executeStatements.toString()}") + } else { + LOG.info(s"execs for spark partition ${TaskContext.getPartitionId()} all succeed") + } + + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/package.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/package.scala new file mode 100644 index 00000000..4f26b32f --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/package.scala @@ -0,0 +1,336 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector + +import com.vesoft.nebula.connector.ssl.SSLSignType +import com.vesoft.nebula.connector.writer.NebulaExecutor +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{ + DataFrame, + DataFrameReader, + DataFrameWriter, + Encoder, + Encoders, + Row, + SaveMode +} + +import scala.collection.mutable.ListBuffer + +package object connector { + + type Address = (String, Int) + type NebulaType = Int + type Prop = List[Any] + type PropertyNames = List[String] + type PropertyValues = List[Any] + + type VertexID = Long + type VertexIDSlice = String + type NebulaGraphxVertex = (VertexID, PropertyValues) + type NebulaGraphxEdge = org.apache.spark.graphx.Edge[(EdgeRank, Prop)] + type EdgeRank = Long + + case class NebulaVertex(vertexIDSlice: VertexIDSlice, values: PropertyValues) { + def propertyValues = values.mkString(", ") + + override def toString: String = { + s"Vertex ID: ${vertexIDSlice}, Values: ${values.mkString(", ")}" + } + } + + case class NebulaVertices(propNames: PropertyNames, + values: List[NebulaVertex], + policy: Option[KeyPolicy.Value]) { + + def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") + + override def toString: String = { + s"Vertices: " + + s"Property Names: ${propNames.mkString(", ")}" + + s"Vertex Values: ${values.mkString(", ")} " + + s"with policy: ${policy}" + } + } + + case class NebulaEdge(source: VertexIDSlice, + target: VertexIDSlice, + rank: Option[EdgeRank], + values: PropertyValues) { + def propertyValues: String = values.mkString(", ") + + override def toString: String = { + s"Edge: ${source}->${target}@${rank} values: ${propertyValues}" + } + } + + case class NebulaEdges(propNames: PropertyNames, + values: List[NebulaEdge], + sourcePolicy: Option[KeyPolicy.Value], + targetPolicy: Option[KeyPolicy.Value]) { + def propertyNames: String = NebulaExecutor.escapePropName(propNames).mkString(",") + def getSourcePolicy = sourcePolicy + def getTargetPolicy = targetPolicy + + override def toString: String = { + "Edges:" + + s" Property Names: ${propNames.mkString(", ")}" + + s" with source policy ${sourcePolicy}" + + s" with target policy ${targetPolicy}" + } + } + + /** + * spark reader for nebula graph + */ + implicit class NebulaDataFrameReader(reader: DataFrameReader) { + var connectionConfig: NebulaConnectionConfig = _ + var readConfig: ReadNebulaConfig = _ + + def nebula(connectionConfig: NebulaConnectionConfig, + readConfig: ReadNebulaConfig): NebulaDataFrameReader = { + this.connectionConfig = connectionConfig + this.readConfig = readConfig + this + } + + /** + * Reading com.vesoft.nebula.tools.connector.vertices from Nebula Graph + * @return DataFrame + */ + def loadVerticesToDF(): DataFrame = { + assert(connectionConfig != null && readConfig != null, + "nebula config is not set, please call nebula() before loadVerticesToDF") + val dfReader = reader + .format(classOf[NebulaDataSource].getName) + .option(NebulaOptions.TYPE, DataTypeEnum.VERTEX.toString) + .option(NebulaOptions.SPACE_NAME, readConfig.getSpace) + .option(NebulaOptions.LABEL, readConfig.getLabel) + .option(NebulaOptions.PARTITION_NUMBER, readConfig.getPartitionNum) + .option(NebulaOptions.RETURN_COLS, readConfig.getReturnCols.mkString(",")) + .option(NebulaOptions.NO_COLUMN, readConfig.getNoColumn) + .option(NebulaOptions.LIMIT, readConfig.getLimit) + .option(NebulaOptions.META_ADDRESS, connectionConfig.getMetaAddress) + .option(NebulaOptions.TIMEOUT, connectionConfig.getTimeout) + .option(NebulaOptions.CONNECTION_RETRY, connectionConfig.getConnectionRetry) + .option(NebulaOptions.EXECUTION_RETRY, connectionConfig.getExecRetry) + .option(NebulaOptions.ENABLE_META_SSL, connectionConfig.getEnableMetaSSL) + .option(NebulaOptions.ENABLE_STORAGE_SSL, connectionConfig.getEnableStorageSSL) + + if (connectionConfig.getEnableStorageSSL || connectionConfig.getEnableMetaSSL) { + dfReader.option(NebulaOptions.SSL_SIGN_TYPE, connectionConfig.getSignType) + SSLSignType.withName(connectionConfig.getSignType) match { + case SSLSignType.CA => + dfReader.option(NebulaOptions.CA_SIGN_PARAM, connectionConfig.getCaSignParam) + case SSLSignType.SELF => + dfReader.option(NebulaOptions.SELF_SIGN_PARAM, connectionConfig.getSelfSignParam) + } + } + + dfReader.load() + } + + /** + * Reading edges from Nebula Graph + * @return DataFrame + */ + def loadEdgesToDF(): DataFrame = { + assert(connectionConfig != null && readConfig != null, + "nebula config is not set, please call nebula() before loadEdgesToDF") + + val dfReader = reader + .format(classOf[NebulaDataSource].getName) + .option(NebulaOptions.TYPE, DataTypeEnum.EDGE.toString) + .option(NebulaOptions.SPACE_NAME, readConfig.getSpace) + .option(NebulaOptions.LABEL, readConfig.getLabel) + .option(NebulaOptions.RETURN_COLS, readConfig.getReturnCols.mkString(",")) + .option(NebulaOptions.NO_COLUMN, readConfig.getNoColumn) + .option(NebulaOptions.LIMIT, readConfig.getLimit) + .option(NebulaOptions.PARTITION_NUMBER, readConfig.getPartitionNum) + .option(NebulaOptions.META_ADDRESS, connectionConfig.getMetaAddress) + .option(NebulaOptions.TIMEOUT, connectionConfig.getTimeout) + .option(NebulaOptions.CONNECTION_RETRY, connectionConfig.getConnectionRetry) + .option(NebulaOptions.EXECUTION_RETRY, connectionConfig.getExecRetry) + .option(NebulaOptions.ENABLE_META_SSL, connectionConfig.getEnableMetaSSL) + .option(NebulaOptions.ENABLE_STORAGE_SSL, connectionConfig.getEnableStorageSSL) + + if (connectionConfig.getEnableStorageSSL || connectionConfig.getEnableMetaSSL) { + dfReader.option(NebulaOptions.SSL_SIGN_TYPE, connectionConfig.getSignType) + SSLSignType.withName(connectionConfig.getSignType) match { + case SSLSignType.CA => + dfReader.option(NebulaOptions.CA_SIGN_PARAM, connectionConfig.getCaSignParam) + case SSLSignType.SELF => + dfReader.option(NebulaOptions.SELF_SIGN_PARAM, connectionConfig.getSelfSignParam) + } + } + + dfReader.load() + } + + /** + * read nebula vertex edge to graphx's vertex + * use hash() for String type vertex id. + */ + def loadVerticesToGraphx(): RDD[NebulaGraphxVertex] = { + val vertexDataset = loadVerticesToDF() + implicit val encoder: Encoder[NebulaGraphxVertex] = + Encoders.bean[NebulaGraphxVertex](classOf[NebulaGraphxVertex]) + + vertexDataset + .map(row => { + val vertexId = row.get(0) + val vid: Long = vertexId.toString.toLong + val props: ListBuffer[Any] = ListBuffer() + for (i <- row.schema.fields.indices) { + if (i != 0) { + props.append(row.get(i)) + } + } + (vid, props.toList) + })(encoder) + .rdd + } + + /** + * read nebula edge edge to graphx's edge + * use hash() for String type srcId and dstId. + */ + def loadEdgesToGraphx(): RDD[NebulaGraphxEdge] = { + val edgeDataset = loadEdgesToDF() + implicit val encoder: Encoder[NebulaGraphxEdge] = + Encoders.bean[NebulaGraphxEdge](classOf[NebulaGraphxEdge]) + + edgeDataset + .map(row => { + val props: ListBuffer[Any] = ListBuffer() + for (i <- row.schema.fields.indices) { + if (i != 0 && i != 1 && i != 2) { + props.append(row.get(i)) + } + } + val srcId = row.get(0) + val dstId = row.get(1) + val edgeSrc = srcId.toString.toLong + val edgeDst = dstId.toString.toLong + val edgeProp = (row.get(2).toString.toLong, props.toList) + org.apache.spark.graphx + .Edge(edgeSrc, edgeDst, edgeProp) + })(encoder) + .rdd + } + + } + + /** + * spark writer for nebula graph + */ + implicit class NebulaDataFrameWriter(writer: DataFrameWriter[Row]) { + + var connectionConfig: NebulaConnectionConfig = _ + var writeNebulaConfig: WriteNebulaConfig = _ + + /** + * config nebula connection + * @param connectionConfig connection parameters + * @param writeNebulaConfig write parameters for vertex or edge + */ + def nebula(connectionConfig: NebulaConnectionConfig, + writeNebulaConfig: WriteNebulaConfig): NebulaDataFrameWriter = { + this.connectionConfig = connectionConfig + this.writeNebulaConfig = writeNebulaConfig + this + } + + /** + * write dataframe into nebula vertex + */ + def writeVertices(): Unit = { + assert(connectionConfig != null && writeNebulaConfig != null, + "nebula config is not set, please call nebula() before writeVertices") + val writeConfig = writeNebulaConfig.asInstanceOf[WriteNebulaVertexConfig] + val dfWriter = writer + .format(classOf[NebulaDataSource].getName) + .mode(SaveMode.Overwrite) + .option(NebulaOptions.TYPE, DataTypeEnum.VERTEX.toString) + .option(NebulaOptions.SPACE_NAME, writeConfig.getSpace) + .option(NebulaOptions.LABEL, writeConfig.getTagName) + .option(NebulaOptions.USER_NAME, writeConfig.getUser) + .option(NebulaOptions.PASSWD, writeConfig.getPasswd) + .option(NebulaOptions.VERTEX_FIELD, writeConfig.getVidField) + .option(NebulaOptions.VID_POLICY, writeConfig.getVidPolicy) + .option(NebulaOptions.BATCH, writeConfig.getBatch) + .option(NebulaOptions.VID_AS_PROP, writeConfig.getVidAsProp) + .option(NebulaOptions.WRITE_MODE, writeConfig.getWriteMode) + .option(NebulaOptions.META_ADDRESS, connectionConfig.getMetaAddress) + .option(NebulaOptions.GRAPH_ADDRESS, connectionConfig.getGraphAddress) + .option(NebulaOptions.TIMEOUT, connectionConfig.getTimeout) + .option(NebulaOptions.CONNECTION_RETRY, connectionConfig.getConnectionRetry) + .option(NebulaOptions.EXECUTION_RETRY, connectionConfig.getExecRetry) + .option(NebulaOptions.ENABLE_GRAPH_SSL, connectionConfig.getEnableGraphSSL) + .option(NebulaOptions.ENABLE_META_SSL, connectionConfig.getEnableMetaSSL) + + if (connectionConfig.getEnableGraphSSL || connectionConfig.getEnableMetaSSL) { + dfWriter.option(NebulaOptions.SSL_SIGN_TYPE, connectionConfig.getSignType) + SSLSignType.withName(connectionConfig.getSignType) match { + case SSLSignType.CA => + dfWriter.option(NebulaOptions.CA_SIGN_PARAM, connectionConfig.getCaSignParam) + case SSLSignType.SELF => + dfWriter.option(NebulaOptions.SELF_SIGN_PARAM, connectionConfig.getSelfSignParam) + } + } + + dfWriter.save() + } + + /** + * write dataframe into nebula edge + */ + def writeEdges(): Unit = { + + assert(connectionConfig != null && writeNebulaConfig != null, + "nebula config is not set, please call nebula() before writeEdges") + val writeConfig = writeNebulaConfig.asInstanceOf[WriteNebulaEdgeConfig] + val dfWriter = writer + .format(classOf[NebulaDataSource].getName) + .mode(SaveMode.Overwrite) + .option(NebulaOptions.TYPE, DataTypeEnum.EDGE.toString) + .option(NebulaOptions.SPACE_NAME, writeConfig.getSpace) + .option(NebulaOptions.USER_NAME, writeConfig.getUser) + .option(NebulaOptions.PASSWD, writeConfig.getPasswd) + .option(NebulaOptions.LABEL, writeConfig.getEdgeName) + .option(NebulaOptions.SRC_VERTEX_FIELD, writeConfig.getSrcFiled) + .option(NebulaOptions.DST_VERTEX_FIELD, writeConfig.getDstField) + .option(NebulaOptions.SRC_POLICY, writeConfig.getSrcPolicy) + .option(NebulaOptions.DST_POLICY, writeConfig.getDstPolicy) + .option(NebulaOptions.RANK_FIELD, writeConfig.getRankField) + .option(NebulaOptions.BATCH, writeConfig.getBatch) + .option(NebulaOptions.SRC_AS_PROP, writeConfig.getSrcAsProp) + .option(NebulaOptions.DST_AS_PROP, writeConfig.getDstAsProp) + .option(NebulaOptions.RANK_AS_PROP, writeConfig.getRankAsProp) + .option(NebulaOptions.WRITE_MODE, writeConfig.getWriteMode) + .option(NebulaOptions.META_ADDRESS, connectionConfig.getMetaAddress) + .option(NebulaOptions.GRAPH_ADDRESS, connectionConfig.getGraphAddress) + .option(NebulaOptions.TIMEOUT, connectionConfig.getTimeout) + .option(NebulaOptions.CONNECTION_RETRY, connectionConfig.getConnectionRetry) + .option(NebulaOptions.EXECUTION_RETRY, connectionConfig.getExecRetry) + .option(NebulaOptions.ENABLE_GRAPH_SSL, connectionConfig.getEnableGraphSSL) + .option(NebulaOptions.ENABLE_META_SSL, connectionConfig.getEnableMetaSSL) + + if (connectionConfig.getEnableGraphSSL || connectionConfig.getEnableMetaSSL) { + dfWriter.option(NebulaOptions.SSL_SIGN_TYPE, connectionConfig.getSignType) + SSLSignType.withName(connectionConfig.getSignType) match { + case SSLSignType.CA => + dfWriter.option(NebulaOptions.CA_SIGN_PARAM, connectionConfig.getCaSignParam) + case SSLSignType.SELF => + dfWriter.option(NebulaOptions.SELF_SIGN_PARAM, connectionConfig.getSelfSignParam) + } + } + + dfWriter.save() + } + } + +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaEdgeReader.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaEdgeReader.scala new file mode 100644 index 00000000..45fab6c8 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaEdgeReader.scala @@ -0,0 +1,77 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.client.storage.scan.{ScanEdgeResult, ScanEdgeResultIterator} +import com.vesoft.nebula.connector.NebulaOptions +import org.apache.spark.Partition +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ + +class NebulaEdgeReader(split: Partition, nebulaOptions: NebulaOptions, schema: StructType) + extends NebulaIterator(split, nebulaOptions, schema) { + private val LOG = LoggerFactory.getLogger(this.getClass) + + private var responseIterator: ScanEdgeResultIterator = _ + + override def hasNext: Boolean = { + if (dataIterator == null && responseIterator == null && !scanPartIterator.hasNext) + return false + + var continue: Boolean = false + var break: Boolean = false + while ((dataIterator == null || !dataIterator.hasNext) && !break) { + resultValues.clear() + continue = false + if (responseIterator == null || !responseIterator.hasNext) { + if (scanPartIterator.hasNext) { + try { + if (nebulaOptions.noColumn) { + responseIterator = storageClient.scanEdge(nebulaOptions.spaceName, + scanPartIterator.next(), + nebulaOptions.label, + nebulaOptions.limit, + 0L, + Long.MaxValue, + true, + true) + } else { + responseIterator = storageClient.scanEdge(nebulaOptions.spaceName, + scanPartIterator.next(), + nebulaOptions.label, + nebulaOptions.getReturnCols.asJava, + nebulaOptions.limit, + 0, + Long.MaxValue, + true, + true) + } + } catch { + case e: Exception => + LOG.error(s"Exception scanning vertex ${nebulaOptions.label}", e) + storageClient.close() + throw new Exception(e.getMessage, e) + } + // jump to the next loop + continue = true + } + // break while loop + break = !continue + } else { + val next: ScanEdgeResult = responseIterator.next + if (!next.isEmpty) { + dataIterator = next.getEdgeTableRows.iterator().asScala + } + } + } + + if (dataIterator == null) { + return false + } + dataIterator.hasNext + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaIterator.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaIterator.scala new file mode 100644 index 00000000..dc7b085e --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaIterator.scala @@ -0,0 +1,167 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.client.graph.data.{ + CASignedSSLParam, + HostAddress, + SSLParam, + SelfSignedSSLParam, + ValueWrapper +} +import com.vesoft.nebula.client.storage.data.BaseTableRow +import com.vesoft.nebula.client.storage.StorageClient +import com.vesoft.nebula.connector.{NebulaOptions, NebulaUtils, PartitionUtils} +import com.vesoft.nebula.connector.NebulaUtils.NebulaValueGetter +import com.vesoft.nebula.connector.exception.GraphConnectException +import com.vesoft.nebula.connector.nebula.MetaProvider +import com.vesoft.nebula.connector.ssl.SSLSignType +import org.apache.spark.Partition +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow +import org.apache.spark.sql.types.StructType +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.mutable +import scala.collection.mutable.ListBuffer +import scala.collection.JavaConverters._ + +/** + * @todo + * iterator for nebula vertex or edge data + * convert each vertex data or edge data to Spark SQL's Row + */ +abstract class NebulaIterator extends Iterator[InternalRow] { + + private val LOG: Logger = LoggerFactory.getLogger(classOf[NebulaIterator]) + + private var metaProvider: MetaProvider = _ + private var schema: StructType = _ + + protected var dataIterator: Iterator[BaseTableRow] = _ + protected var scanPartIterator: Iterator[Integer] = _ + protected var resultValues: mutable.ListBuffer[List[Object]] = mutable.ListBuffer[List[Object]]() + protected var storageClient: StorageClient = _ + + def this(index: Partition, nebulaOptions: NebulaOptions, schema: StructType) { + this() + this.schema = schema + + metaProvider = new MetaProvider( + nebulaOptions.getMetaAddress, + nebulaOptions.timeout, + nebulaOptions.connectionRetry, + nebulaOptions.executionRetry, + nebulaOptions.enableMetaSSL, + nebulaOptions.sslSignType, + nebulaOptions.caSignParam, + nebulaOptions.selfSignParam + ) + val address: ListBuffer[HostAddress] = new ListBuffer[HostAddress] + + for (addr <- nebulaOptions.getMetaAddress) { + address.append(new HostAddress(addr._1, addr._2)) + } + + var sslParam: SSLParam = null + if (nebulaOptions.enableStorageSSL) { + SSLSignType.withName(nebulaOptions.sslSignType) match { + case SSLSignType.CA => { + val caSSLSignParams = nebulaOptions.caSignParam + sslParam = new CASignedSSLParam(caSSLSignParams.caCrtFilePath, + caSSLSignParams.crtFilePath, + caSSLSignParams.keyFilePath) + } + case SSLSignType.SELF => { + val selfSSLSignParams = nebulaOptions.selfSignParam + sslParam = new SelfSignedSSLParam(selfSSLSignParams.crtFilePath, + selfSSLSignParams.keyFilePath, + selfSSLSignParams.password) + } + case _ => throw new IllegalArgumentException("ssl sign type is not supported") + } + this.storageClient = new StorageClient(address.asJava, + nebulaOptions.timeout, + nebulaOptions.connectionRetry, + nebulaOptions.executionRetry, + true, + sslParam) + } else { + this.storageClient = new StorageClient(address.asJava, nebulaOptions.timeout) + } + + if (!storageClient.connect()) { + throw new GraphConnectException("storage connect failed.") + } + // allocate scanPart to this partition + val totalPart = metaProvider.getPartitionNumber(nebulaOptions.spaceName) + + val nebulaPartition = index.asInstanceOf[NebulaPartition] + val scanParts = + nebulaPartition.getScanParts(totalPart, nebulaOptions.partitionNums.toInt) + LOG.info(s"partition index: ${index}, scanParts: ${scanParts.toString}") + scanPartIterator = scanParts.iterator + } + + /** + * @todo + * whether this iterator can provide another element. + */ + override def hasNext: Boolean + + /** + * @todo + * Produces the next vertex or edge of this iterator. + */ + override def next(): InternalRow = { + val resultSet: Array[ValueWrapper] = + dataIterator.next().getValues.toArray.map(v => v.asInstanceOf[ValueWrapper]) + val getters: Array[NebulaValueGetter] = NebulaUtils.makeGetters(schema) + val mutableRow = new SpecificInternalRow(schema.fields.map(x => x.dataType)) + + for (i <- getters.indices) { + val value: ValueWrapper = resultSet(i) + var resolved = false + if (value.isNull) { + mutableRow.setNullAt(i) + resolved = true + } + if (value.isString) { + getters(i).apply(value.asString(), mutableRow, i) + resolved = true + } + if (value.isDate) { + getters(i).apply(value.asDate(), mutableRow, i) + resolved = true + } + if (value.isTime) { + getters(i).apply(value.asTime(), mutableRow, i) + resolved = true + } + if (value.isDateTime) { + getters(i).apply(value.asDateTime(), mutableRow, i) + resolved = true + } + if (value.isLong) { + getters(i).apply(value.asLong(), mutableRow, i) + } + if (value.isBoolean) { + getters(i).apply(value.asBoolean(), mutableRow, i) + } + if (value.isDouble) { + getters(i).apply(value.asDouble(), mutableRow, i) + } + if (value.isGeography) { + getters(i).apply(value.asGeography(), mutableRow, i) + } + if (value.isDuration) { + getters(i).apply(value.asDuration(), mutableRow, i) + } + } + mutableRow + } + +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRDD.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRDD.scala new file mode 100644 index 00000000..bbe5118c --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRDD.scala @@ -0,0 +1,65 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.connector.{DataTypeEnum, NebulaOptions} +import org.apache.spark.{Partition, TaskContext} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.StructType + +import scala.collection.mutable.ListBuffer + +class NebulaRDD(val sqlContext: SQLContext, var nebulaOptions: NebulaOptions, schema: StructType) + extends RDD[InternalRow](sqlContext.sparkContext, Nil) { + + /** + * start to scan vertex or edge data + * + * @param split + * @param context + * @return Iterator + */ + override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = { + val dataType = nebulaOptions.dataType + if (DataTypeEnum.VERTEX.toString.equalsIgnoreCase(dataType)) + new NebulaVertexReader(split, nebulaOptions, schema) + else new NebulaEdgeReader(split, nebulaOptions, schema) + } + + override def getPartitions = { + val partitionNumber = nebulaOptions.partitionNums.toInt + val partitions = new Array[Partition](partitionNumber) + for (i <- 0 until partitionNumber) { + partitions(i) = NebulaPartition(i) + } + partitions + } +} + +/** + * An identifier for a partition in an NebulaRDD. + */ +case class NebulaPartition(indexNum: Int) extends Partition { + override def index: Int = indexNum + + /** + * allocate scanPart to partition + * + * @param totalPart nebula data part num + * @return scan data part list + */ + def getScanParts(totalPart: Int, totalPartition: Int): List[Integer] = { + val scanParts = new ListBuffer[Integer] + var currentPart = indexNum + 1 + while (currentPart <= totalPart) { + scanParts.append(currentPart) + currentPart += totalPartition + } + scanParts.toList + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelation.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelation.scala new file mode 100644 index 00000000..f499da41 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelation.scala @@ -0,0 +1,99 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.connector.nebula.MetaProvider +import com.vesoft.nebula.connector.{DataTypeEnum, NebulaOptions, NebulaUtils} +import com.vesoft.nebula.meta.ColumnDef +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.sql.sources.{BaseRelation, TableScan} +import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType} +import org.slf4j.LoggerFactory + +import scala.collection.mutable.ListBuffer + +case class NebulaRelation(override val sqlContext: SQLContext, nebulaOptions: NebulaOptions) + extends BaseRelation + with TableScan { + private val LOG = LoggerFactory.getLogger(this.getClass) + + protected lazy val datasetSchema: StructType = getSchema(nebulaOptions) + + override val needConversion: Boolean = false + + override def schema: StructType = getSchema(nebulaOptions) + + /** + * return the dataset's schema. Schema includes configured cols in returnCols or includes all properties in nebula. + */ + private def getSchema(nebulaOptions: NebulaOptions): StructType = { + val returnCols = nebulaOptions.getReturnCols + val noColumn = nebulaOptions.noColumn + val fields: ListBuffer[StructField] = new ListBuffer[StructField] + val metaProvider = new MetaProvider( + nebulaOptions.getMetaAddress, + nebulaOptions.timeout, + nebulaOptions.connectionRetry, + nebulaOptions.executionRetry, + nebulaOptions.enableMetaSSL, + nebulaOptions.sslSignType, + nebulaOptions.caSignParam, + nebulaOptions.selfSignParam + ) + + import scala.collection.JavaConverters._ + var schemaCols: Seq[ColumnDef] = Seq() + val isVertex = DataTypeEnum.VERTEX.toString.equalsIgnoreCase(nebulaOptions.dataType) + + // construct vertex or edge default prop + if (isVertex) { + fields.append(DataTypes.createStructField("_vertexId", DataTypes.StringType, false)) + } else { + fields.append(DataTypes.createStructField("_srcId", DataTypes.StringType, false)) + fields.append(DataTypes.createStructField("_dstId", DataTypes.StringType, false)) + fields.append(DataTypes.createStructField("_rank", DataTypes.LongType, false)) + } + + var dataSchema: StructType = null + // read no column + if (noColumn) { + dataSchema = new StructType(fields.toArray) + return dataSchema + } + // get tag schema or edge schema + val schema = if (isVertex) { + metaProvider.getTag(nebulaOptions.spaceName, nebulaOptions.label) + } else { + metaProvider.getEdge(nebulaOptions.spaceName, nebulaOptions.label) + } + + schemaCols = schema.columns.asScala + + // read all columns + if (returnCols.isEmpty) { + schemaCols.foreach(columnDef => { + LOG.info(s"prop name ${new String(columnDef.getName)}, type ${columnDef.getType.getType} ") + fields.append( + DataTypes.createStructField(new String(columnDef.getName), + NebulaUtils.convertDataType(columnDef.getType), + true)) + }) + } else { + for (col: String <- returnCols) { + fields.append( + DataTypes + .createStructField(col, NebulaUtils.getColDataType(schemaCols.toList, col), true)) + } + } + dataSchema = new StructType(fields.toArray) + dataSchema + } + + override def buildScan(): RDD[Row] = { + new NebulaRDD(sqlContext, nebulaOptions, datasetSchema).asInstanceOf[RDD[Row]] + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelationProvider.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelationProvider.scala new file mode 100644 index 00000000..0fb86747 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaRelationProvider.scala @@ -0,0 +1,29 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.connector.{NebulaOptions, OperaType} +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider} + +class NebulaRelationProvider extends RelationProvider with DataSourceRegister { + + /** + * The string that represents the format that nebula data source provider uses. + */ + override def shortName(): String = "nebula" + + /** + * Returns a new base relation with the given parameters. + * you can see it as reader. + */ + override def createRelation(sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + val nebulaOptions = new NebulaOptions(parameters, OperaType.READ) + NebulaRelation(sqlContext, nebulaOptions) + } + +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaVertexReader.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaVertexReader.scala new file mode 100644 index 00000000..c27dc339 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/reader/NebulaVertexReader.scala @@ -0,0 +1,78 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.vesoft.nebula.client.storage.scan.{ScanVertexResult, ScanVertexResultIterator} +import com.vesoft.nebula.connector.NebulaOptions +import org.apache.spark.Partition +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory +import scala.collection.JavaConverters._ + +class NebulaVertexReader(split: Partition, nebulaOptions: NebulaOptions, schema: StructType) + extends NebulaIterator(split, nebulaOptions, schema) { + + private val LOG = LoggerFactory.getLogger(this.getClass) + + private var responseIterator: ScanVertexResultIterator = _ + + override def hasNext: Boolean = { + if (dataIterator == null && responseIterator == null && !scanPartIterator.hasNext) + return false + + var continue: Boolean = false + var break: Boolean = false + while ((dataIterator == null || !dataIterator.hasNext) && !break) { + resultValues.clear() + continue = false + if (responseIterator == null || !responseIterator.hasNext) { + if (scanPartIterator.hasNext) { + try { + if (nebulaOptions.noColumn) { + responseIterator = storageClient.scanVertex(nebulaOptions.spaceName, + scanPartIterator.next(), + nebulaOptions.label, + nebulaOptions.limit, + 0, + Long.MaxValue, + true, + true) + } else { + responseIterator = storageClient.scanVertex(nebulaOptions.spaceName, + scanPartIterator.next(), + nebulaOptions.label, + nebulaOptions.getReturnCols.asJava, + nebulaOptions.limit, + 0, + Long.MaxValue, + true, + true) + } + } catch { + case e: Exception => + LOG.error(s"Exception scanning vertex ${nebulaOptions.label}", e) + storageClient.close() + throw new Exception(e.getMessage, e) + } + // jump to the next loop + continue = true + } + // break while loop + break = !continue + } else { + val next: ScanVertexResult = responseIterator.next + if (!next.isEmpty) { + dataIterator = next.getVertexTableRows.iterator().asScala + } + } + } + + if (dataIterator == null) { + return false + } + dataIterator.hasNext + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaCommitMessage.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaCommitMessage.scala new file mode 100644 index 00000000..841fb916 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaCommitMessage.scala @@ -0,0 +1,8 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +case class NebulaCommitMessage(partitionId: Int, executeStatements: List[String]) diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala new file mode 100644 index 00000000..a849f5b1 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala @@ -0,0 +1,109 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import com.vesoft.nebula.connector.{KeyPolicy, NebulaEdge, NebulaEdges, NebulaOptions, WriteMode} +import org.apache.spark.TaskContext +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory + +import scala.collection.mutable.ListBuffer + +class NebulaEdgeWriter(nebulaOptions: NebulaOptions, + srcIndex: Int, + dstIndex: Int, + rankIndex: Option[Int], + schema: StructType) + extends NebulaWriter(nebulaOptions, schema) { + + private val LOG = LoggerFactory.getLogger(this.getClass) + + val rankIdx = if (rankIndex.isDefined) rankIndex.get else -1 + val propNames = NebulaExecutor.assignEdgePropNames(schema, + srcIndex, + dstIndex, + rankIdx, + nebulaOptions.srcAsProp, + nebulaOptions.dstAsProp, + nebulaOptions.rankAsProp) + val fieldTypMap: Map[String, Integer] = + if (nebulaOptions.writeMode == WriteMode.DELETE) Map[String, Integer]() + else metaProvider.getEdgeSchema(nebulaOptions.spaceName, nebulaOptions.label) + + val srcPolicy = + if (nebulaOptions.srcPolicy.isEmpty) Option.empty + else Option(KeyPolicy.withName(nebulaOptions.srcPolicy)) + val dstPolicy = { + if (nebulaOptions.dstPolicy.isEmpty) Option.empty + else Option(KeyPolicy.withName(nebulaOptions.dstPolicy)) + } + + /** buffer to save batch edges */ + var edges: ListBuffer[NebulaEdge] = new ListBuffer() + + prepareSpace() + + override def writeData(iterator: Iterator[Row]): NebulaCommitMessage = { + while (iterator.hasNext) { + val internalRow = rowEncoder.toRow(iterator.next()) + write(internalRow) + } + if (edges.nonEmpty) { + execute() + } + graphProvider.close() + metaProvider.close() + NebulaCommitMessage(TaskContext.getPartitionId(), failedExecs.toList) + } + + /** + * write one edge record to buffer + */ + override def write(row: InternalRow): Unit = { + val srcId = NebulaExecutor.extraID(schema, row, srcIndex, srcPolicy, isVidStringType) + val dstId = NebulaExecutor.extraID(schema, row, dstIndex, dstPolicy, isVidStringType) + val rank = + if (rankIndex.isEmpty) Option.empty + else Option(NebulaExecutor.extraRank(schema, row, rankIndex.get)) + val values = + if (nebulaOptions.writeMode == WriteMode.DELETE) List() + else + NebulaExecutor.assignEdgeValues(schema, + row, + srcIndex, + dstIndex, + rankIdx, + nebulaOptions.srcAsProp, + nebulaOptions.dstAsProp, + nebulaOptions.rankAsProp, + fieldTypMap) + val nebulaEdge = NebulaEdge(srcId, dstId, rank, values) + edges.append(nebulaEdge) + if (edges.size >= nebulaOptions.batch) { + execute() + } + } + + /** + * submit buffer edges to nebula + */ + def execute(): Unit = { + val nebulaEdges = NebulaEdges(propNames, edges.toList, srcPolicy, dstPolicy) + val exec = nebulaOptions.writeMode match { + case WriteMode.INSERT => NebulaExecutor.toExecuteSentence(nebulaOptions.label, nebulaEdges) + case WriteMode.UPDATE => + NebulaExecutor.toUpdateExecuteStatement(nebulaOptions.label, nebulaEdges) + case WriteMode.DELETE => + NebulaExecutor.toDeleteExecuteStatement(nebulaOptions.label, nebulaEdges) + case _ => + throw new IllegalArgumentException(s"write mode ${nebulaOptions.writeMode} not supported.") + } + edges.clear() + submit(exec) + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaInsertableRelation.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaInsertableRelation.scala new file mode 100644 index 00000000..b685a339 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaInsertableRelation.scala @@ -0,0 +1,13 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.sources.InsertableRelation + +class NebulaInsertableRelation extends InsertableRelation { + override def insert(data: DataFrame, overwrite: Boolean): Unit = {} +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala new file mode 100644 index 00000000..28c009a8 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala @@ -0,0 +1,94 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import com.vesoft.nebula.connector.{ + KeyPolicy, + NebulaOptions, + NebulaVertex, + NebulaVertices, + WriteMode +} +import org.apache.spark.TaskContext +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory + +import scala.collection.mutable.ListBuffer + +class NebulaVertexWriter(nebulaOptions: NebulaOptions, vertexIndex: Int, schema: StructType) + extends NebulaWriter(nebulaOptions, schema) { + + private val LOG = LoggerFactory.getLogger(this.getClass) + + val propNames = NebulaExecutor.assignVertexPropNames(schema, vertexIndex, nebulaOptions.vidAsProp) + val fieldTypMap: Map[String, Integer] = + if (nebulaOptions.writeMode == WriteMode.DELETE) Map[String, Integer]() + else metaProvider.getTagSchema(nebulaOptions.spaceName, nebulaOptions.label) + + val policy = { + if (nebulaOptions.vidPolicy.isEmpty) Option.empty + else Option(KeyPolicy.withName(nebulaOptions.vidPolicy)) + } + + /** buffer to save batch vertices */ + var vertices: ListBuffer[NebulaVertex] = new ListBuffer() + + prepareSpace() + + override def writeData(iterator: Iterator[Row]): NebulaCommitMessage = { + while (iterator.hasNext) { + val internalRow = rowEncoder.toRow(iterator.next()) + write(internalRow) + } + if (vertices.nonEmpty) { + execute() + } + graphProvider.close() + metaProvider.close() + NebulaCommitMessage(TaskContext.getPartitionId(), failedExecs.toList) + } + + /** + * write one vertex row to buffer + */ + override def write(row: InternalRow): Unit = { + val vertex = + NebulaExecutor.extraID(schema, row, vertexIndex, policy, isVidStringType) + val values = + if (nebulaOptions.writeMode == WriteMode.DELETE) List() + else + NebulaExecutor.assignVertexPropValues(schema, + row, + vertexIndex, + nebulaOptions.vidAsProp, + fieldTypMap) + val nebulaVertex = NebulaVertex(vertex, values) + vertices.append(nebulaVertex) + if (vertices.size >= nebulaOptions.batch) { + execute() + } + } + + /** + * submit buffer vertices to nebula + */ + private def execute(): Unit = { + val nebulaVertices = NebulaVertices(propNames, vertices.toList, policy) + val exec = nebulaOptions.writeMode match { + case WriteMode.INSERT => NebulaExecutor.toExecuteSentence(nebulaOptions.label, nebulaVertices) + case WriteMode.UPDATE => + NebulaExecutor.toUpdateExecuteStatement(nebulaOptions.label, nebulaVertices) + case WriteMode.DELETE => + NebulaExecutor.toDeleteExecuteStatement(nebulaVertices, nebulaOptions.deleteEdge) + case _ => + throw new IllegalArgumentException(s"write mode ${nebulaOptions.writeMode} not supported.") + } + vertices.clear() + submit(exec) + } +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriter.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriter.scala new file mode 100644 index 00000000..ca3f3725 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriter.scala @@ -0,0 +1,74 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import java.util.concurrent.TimeUnit + +import com.google.common.util.concurrent.RateLimiter +import com.vesoft.nebula.connector.NebulaOptions +import com.vesoft.nebula.connector.nebula.{GraphProvider, MetaProvider, VidType} +import org.apache.spark.TaskContext +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.types.StructType +import org.slf4j.LoggerFactory + +import scala.collection.mutable.ListBuffer + +abstract class NebulaWriter(nebulaOptions: NebulaOptions, schema: StructType) extends Serializable { + private val LOG = LoggerFactory.getLogger(this.getClass) + + protected val rowEncoder: ExpressionEncoder[Row] = RowEncoder(schema).resolveAndBind() + protected val failedExecs: ListBuffer[String] = new ListBuffer[String] + + val metaProvider = new MetaProvider( + nebulaOptions.getMetaAddress, + nebulaOptions.timeout, + nebulaOptions.connectionRetry, + nebulaOptions.executionRetry, + nebulaOptions.enableMetaSSL, + nebulaOptions.sslSignType, + nebulaOptions.caSignParam, + nebulaOptions.selfSignParam + ) + val graphProvider = new GraphProvider( + nebulaOptions.getGraphAddress, + nebulaOptions.timeout, + nebulaOptions.enableGraphSSL, + nebulaOptions.sslSignType, + nebulaOptions.caSignParam, + nebulaOptions.selfSignParam + ) + val isVidStringType = metaProvider.getVidType(nebulaOptions.spaceName) == VidType.STRING + + def prepareSpace(): Unit = { + graphProvider.switchSpace(nebulaOptions.user, nebulaOptions.passwd, nebulaOptions.spaceName) + } + + def submit(exec: String): Unit = { + @transient val rateLimiter = RateLimiter.create(nebulaOptions.rateLimit) + if (rateLimiter.tryAcquire(nebulaOptions.rateTimeOut, TimeUnit.MILLISECONDS)) { + val result = graphProvider.submit(exec) + if (!result.isSucceeded) { + failedExecs.append(exec) + LOG.error(s"failed to write ${exec} for " + result.getErrorMessage) + } else { + LOG.info(s"batch write succeed") + LOG.debug(s"batch write succeed: ${exec}") + } + } else { + failedExecs.append(exec) + LOG.error(s"failed to acquire reteLimiter for statement {$exec}") + } + } + + def write(row: InternalRow): Unit + + /** write dataframe data into nebula for each partition */ + def writeData(iterator: Iterator[Row]): NebulaCommitMessage + +} diff --git a/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriterResultRelation.scala b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriterResultRelation.scala new file mode 100644 index 00000000..ca0054c5 --- /dev/null +++ b/nebula-spark-connector_2.2/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriterResultRelation.scala @@ -0,0 +1,17 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.sources.BaseRelation +import org.apache.spark.sql.types.StructType + +class NebulaWriterResultRelation(SQLContext: SQLContext, userDefSchema: StructType) + extends BaseRelation { + override def sqlContext: SQLContext = SQLContext + + override def schema: StructType = userDefSchema +} diff --git a/nebula-spark-connector_2.2/src/test/resources/docker-compose.yaml b/nebula-spark-connector_2.2/src/test/resources/docker-compose.yaml new file mode 100644 index 00000000..bce1badb --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/resources/docker-compose.yaml @@ -0,0 +1,353 @@ +version: '3.4' +services: + metad0: + image: vesoft/nebula-metad:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.1.1 + - --ws_ip=172.28.1.1 + - --port=9559 + - --data_path=/data/meta + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.1.1:11000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9559:9559" + - 11000 + - 11002 + volumes: + - ./data/meta0:/data/meta:Z + - ./logs/meta0:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.1.1 + restart: on-failure + cap_add: + - SYS_PTRACE + + metad1: + image: vesoft/nebula-metad:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.1.2 + - --ws_ip=172.28.1.2 + - --port=9559 + - --data_path=/data/meta + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.1.2:11000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9560:9559" + - 11000 + - 11002 + volumes: + - ./data/meta1:/data/meta:Z + - ./logs/meta1:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.1.2 + restart: on-failure + cap_add: + - SYS_PTRACE + + metad2: + image: vesoft/nebula-metad:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.1.3 + - --ws_ip=172.28.1.3 + - --port=9559 + - --data_path=/data/meta + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.1.3:11000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9561:9559" + - 11000 + - 11002 + volumes: + - ./data/meta2:/data/meta:Z + - ./logs/meta2:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.1.3 + restart: on-failure + cap_add: + - SYS_PTRACE + + storaged0: + image: vesoft/nebula-storaged:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.2.1 + - --ws_ip=172.28.2.1 + - --port=9779 + - --data_path=/data/storage + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.2.1:12000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9779:9779" + - 12000 + - 12002 + volumes: + - ./data/storage0:/data/storage:Z + - ./logs/storage0:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.2.1 + restart: on-failure + cap_add: + - SYS_PTRACE + + storaged1: + image: vesoft/nebula-storaged:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.2.2 + - --ws_ip=172.28.2.2 + - --port=9779 + - --data_path=/data/storage + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.2.2:12000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9780:9779" + - 12000 + - 12002 + volumes: + - ./data/storage1:/data/storage:Z + - ./logs/storage1:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.2.2 + restart: on-failure + cap_add: + - SYS_PTRACE + + storaged2: + image: vesoft/nebula-storaged:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --local_ip=172.28.2.3 + - --ws_ip=172.28.2.3 + - --port=9779 + - --data_path=/data/storage + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.2.3:12000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9781:9779" + - 12000 + - 12002 + volumes: + - ./data/storage2:/data/storage:Z + - ./logs/storage2:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.2.3 + restart: on-failure + cap_add: + - SYS_PTRACE + + graphd0: + image: vesoft/nebula-graphd:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --port=9669 + - --ws_ip=172.28.3.1 + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.3.1:13000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9669:9669" + - 13000 + - 13002 + volumes: + - ./logs/graph0:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.3.1 + restart: on-failure + cap_add: + - SYS_PTRACE + + graphd1: + image: vesoft/nebula-graphd:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --port=9669 + - --ws_ip=172.28.3.2 + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.3.2:13000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9670:9669" + - 13000 + - 13002 + volumes: + - ./logs/graph1:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.3.2 + restart: on-failure + cap_add: + - SYS_PTRACE + + graphd2: + image: vesoft/nebula-graphd:nightly + environment: + USER: root + TZ: "${TZ}" + command: + - --meta_server_addrs=172.28.1.1:9559,172.28.1.2:9559,172.28.1.3:9559 + - --port=9669 + - --ws_ip=172.28.3.3 + - --log_dir=/logs + - --v=0 + - --minloglevel=0 + - --heartbeat_interval_secs=2 + depends_on: + - metad0 + - metad1 + - metad2 + healthcheck: + test: ["CMD", "curl", "-f", "http://172.28.3.3:13000/status"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 20s + ports: + - "9671:9669" + - 13000 + - 13002 + volumes: + - ./logs/graph2:/logs:Z + networks: + nebula-net: + ipv4_address: 172.28.3.3 + restart: on-failure + cap_add: + - SYS_PTRACE + + console: + image: vesoft/nebula-console:nightly + entrypoint: "" + command: + - sh + - -c + - | + sleep 3 && + nebula-console -addr graphd0 -port 9669 -u root -p nebula -e 'ADD HOSTS "172.28.2.1":9779,"172.28.2.2":9779,"172.28.2.3":9779' && + sleep 36000 + depends_on: + - graphd0 + networks: + - nebula-net + +networks: + nebula-net: + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 diff --git a/nebula-spark-connector_2.2/src/test/resources/edge.csv b/nebula-spark-connector_2.2/src/test/resources/edge.csv new file mode 100644 index 00000000..2a2380fe --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/resources/edge.csv @@ -0,0 +1,14 @@ +id1,id2,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14 +1,2,Tom,tom,10,20,30,40,2021-01-27,2021-01-01T12:10:10,43535232,true,1.0,2.0,10:10:10,POINT(1 2) +2,3,Jina,Jina,11,21,31,41,2021-01-28,2021-01-02T12:10:10,43535232,false,1.1,2.1,11:10:10,POINT(3 4) +3,4,Tim,Tim,12,22,32,42,2021-01-29,2021-01-03T12:10:10,43535232,false,1.2,2.2,12:10:10,POINT(5 6) +4,5,张三,张三,13,23,33,43,2021-01-30,2021-01-04T12:10:10,43535232,true,1.3,2.3,13:10:10,POINT(6 7) +5,6,李四,李四,14,24,34,44,2021-02-01,2021-01-05T12:10:10,43535232,false,1.4,2.4,14:10:10,POINT(1 5) +6,7,王五,王五,15,25,35,45,2021-02-02,2021-01-06T12:10:10,0,false,1.5,2.5,15:10:10,"LINESTRING(1 3, 4.7 73.23)" +7,1,Jina,Jina,16,26,36,46,2021-02-03,2021-01-07T12:10:10,43535232,true,1.6,2.6,16:10:10,"LINESTRING(1 3, 4.7 73.23)" +8,1,Jina,Jina,17,27,37,47,2021-02-04,2021-01-08T12:10:10,43535232,false,1.7,2.7,17:10:10,"LINESTRING(1 3, 4.7 73.23)" +9,1,Jina,Jina,18,28,38,48,2021-02-05,2021-01-09T12:10:10,43535232,true,1.8,2.8,18:10:10,"LINESTRING(1 3, 4.7 73.23)" +10,2,Jina,Jina,19,29,39,49,2021-02-06,2021-01-10T12:10:10,43535232,false,1.9,2.9,19:10:10,"LINESTRING(1 3, 4.7 73.23)" +-1,5,Jina,Jina,20,30,40,50,2021-02-07,2021-02-11T12:10:10,43535232,false,2.0,3.0,20:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))" +-2,6,Jina,Jina,21,31,41,51,2021-02-08,2021-03-12T12:10:10,43535232,false,2.1,3.1,21:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))" +-3,7,Jina,Jina,22,32,42,52,2021-02-09,2021-04-13T12:10:10,43535232,false,2.2,3.2,22:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))" diff --git a/nebula-spark-connector_2.2/src/test/resources/log4j.properties b/nebula-spark-connector_2.2/src/test/resources/log4j.properties new file mode 100644 index 00000000..913391db --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/resources/log4j.properties @@ -0,0 +1,6 @@ +# Global logging configuration +log4j.rootLogger=INFO, stdout +# Console output... +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n diff --git a/nebula-spark-connector_2.2/src/test/resources/vertex.csv b/nebula-spark-connector_2.2/src/test/resources/vertex.csv new file mode 100644 index 00000000..2b74dfa0 --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/resources/vertex.csv @@ -0,0 +1,14 @@ +id,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15 +1,Tom,tom,10,20,30,40,2021-01-27,2021-01-01T12:10:10,43535232,true,1.0,2.0,10:10:10,POINT(1 2),"duration({years:1,months:1,seconds:1})" +2,Jina,Jina,11,21,31,41,2021-01-28,2021-01-02T12:10:10,43535232,false,1.1,2.1,11:10:10,POINT(3 4),"duration({years:1,months:1,seconds:1})" +3,Tim,Tim,12,22,32,42,2021-01-29,2021-01-03T12:10:10,43535232,false,1.2,2.2,12:10:10,POINT(5 6),"duration({years:1,months:1,seconds:1})" +4,张三,张三,13,23,33,43,2021-01-30,2021-01-04T12:10:10,43535232,true,1.3,2.3,13:10:10,POINT(6 7),"duration({years:1,months:1,seconds:1})" +5,李四,李四,14,24,34,44,2021-02-01,2021-01-05T12:10:10,43535232,false,1.4,2.4,14:10:10,POINT(1 5),"duration({years:1,months:1,seconds:1})" +6,王五,王五,15,25,35,45,2021-02-02,2021-01-06T12:10:10,0,false,1.5,2.5,15:10:10,"LINESTRING(1 3, 4.7 73.23)","duration({years:1,months:1,seconds:1})" +7,Jina,Jina,16,26,36,46,2021-02-03,2021-01-07T12:10:10,43535232,true,1.6,2.6,16:10:10,"LINESTRING(1 3, 4.7 73.23)","duration({years:1,months:1,seconds:1})" +8,Jina,Jina,17,27,37,47,2021-02-04,2021-01-08T12:10:10,43535232,false,1.7,2.7,17:10:10,"LINESTRING(1 3, 4.7 73.23)","duration({years:1,months:1,seconds:1})" +9,Jina,Jina,18,28,38,48,2021-02-05,2021-01-09T12:10:10,43535232,true,1.8,2.8,18:10:10,"LINESTRING(1 3, 4.7 73.23)","duration({years:1,months:1,seconds:1})" +10,Jina,Jina,19,29,39,49,2021-02-06,2021-01-10T12:10:10,43535232,false,1.9,2.9,19:10:10,"LINESTRING(1 3, 4.7 73.23)","duration({years:1,months:1,seconds:1})" +-1,Jina,Jina,20,30,40,50,2021-02-07,2021-02-11T12:10:10,43535232,false,2.0,3.0,20:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))","duration({years:1,months:1,seconds:1})" +-2,Jina,Jina,21,31,41,51,2021-02-08,2021-03-12T12:10:10,43535232,false,2.1,3.1,21:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))","duration({years:1,months:1,seconds:1})" +-3,Jina,Jina,22,32,42,52,2021-02-09,2021-04-13T12:10:10,43535232,false,2.2,3.2,22:10:10,"POLYGON((0 1, 1 2, 2 3, 0 1))","duration({years:1,months:1,seconds:1})" diff --git a/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala new file mode 100644 index 00000000..b8f0e72e --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/NebulaGraphMock.scala @@ -0,0 +1,192 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.mock + +import com.vesoft.nebula.client.graph.NebulaPoolConfig +import com.vesoft.nebula.client.graph.data.HostAddress +import com.vesoft.nebula.client.graph.net.NebulaPool +import org.apache.log4j.Logger + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer + +class NebulaGraphMock { + private[this] val LOG = Logger.getLogger(this.getClass) + + @transient val nebulaPoolConfig = new NebulaPoolConfig + @transient val pool: NebulaPool = new NebulaPool + val address = new ListBuffer[HostAddress]() + address.append(new HostAddress("127.0.0.1", 9669)) + + val randAddr = scala.util.Random.shuffle(address) + pool.init(randAddr.asJava, nebulaPoolConfig) + + def mockStringIdGraph(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_string(partition_num=10,vid_type=fixed_string(8));" + + "USE test_string;" + "CREATE TAG IF NOT EXISTS person(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE EDGE IF NOT EXISTS friend(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE TAG IF NOT EXISTS geo_shape(geo geography);" + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create string type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + + Thread.sleep(10000) + val insertTag = + "INSERT VERTEX person(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " \"1\":(\"person1\", \"person1\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"2\":(\"person2\", \"person2\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"3\":(\"person3\", \"person3\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"4\":(\"person4\", \"person4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"5\":(\"person5\", \"person5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"6\":(\"person6\", \"person6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"7\":(\"person7\", \"person7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"8\":(\"person8\", \"person8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"9\":(\"person9\", \"person9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"10\":(\"person10\", \"person10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"11\":(\"person11\", \"person11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"12\":(\"person12\", \"person11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-1\":(\"person00\", \"person00\", 23, 1400, 13000, 1388888, date(\"2021-01-13\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-2\":(\"person01\", \"person01\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-3\":(\"person02\", \"person02\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"19\":(\"person19\", \"person22\", 25, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"22\":(\"person22\", \"person22\", 26, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"));" + + "INSERT VERTEX geo_shape(geo) VALUES \"100\":(ST_GeogFromText(\"POINT(1 2)\")), \"101\":(ST_GeogFromText(\"LINESTRING(1 2, 3 4)\")), \"102\":(ST_GeogFromText(\"POLYGON((0 1, 1 2, 2 3, 0 1))\"))" + val insertTagResp = session.execute(insertTag) + if (!insertTagResp.isSucceeded) { + close() + LOG.error("insert vertex for string type space failed," + insertTagResp.getErrorMessage) + sys.exit(-1) + } + + val insertEdge = "INSERT EDGE friend(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " \"1\" -> \"2\":(\"friend1\", \"friend2\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"2\" -> \"3\":(\"friend2\", \"friend3\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"3\" -> \"4\":(\"friend3\", \"friend4\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"4\" -> \"5\":(\"friend4\", \"friend4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"5\" -> \"6\":(\"friend5\", \"friend5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"6\" -> \"7\":(\"friend6\", \"friend6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"7\" -> \"8\":(\"friend7\", \"friend7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"8\" -> \"9\":(\"friend8\", \"friend8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"9\" -> \"10\":(\"friend9\", \"friend9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"10\" -> \"11\":(\"friend10\", \"friend10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"11\" -> \"12\":(\"friend11\", \"friend11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " \"12\" -> \"1\":(\"friend12\", \"friend11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-1\" -> \"11\":(\"friend13\", \"friend12\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " \"-2\" -> \"-1\":(\"friend14\", \"friend13\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))" + val insertEdgeResp = session.execute(insertEdge) + if (!insertEdgeResp.isSucceeded) { + close() + LOG.error("insert edge for string type space failed," + insertEdgeResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockIntIdGraph(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_int(partition_num=10, vid_type=int64);" + + "USE test_int;" + "CREATE TAG IF NOT EXISTS person(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE EDGE IF NOT EXISTS friend(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time);" + + "CREATE TAG IF NOT EXISTS geo_shape(geo geography);" + + "CREATE TAG IF NOT EXISTS tag_duration(col duration);" + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create int type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + + Thread.sleep(10000) + val insertTag = + "INSERT VERTEX person(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " 1:(\"person1\", \"person1\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 2:(\"person2\", \"person2\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 3:(\"person3\", \"person3\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 4:(\"person4\", \"person4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 5:(\"person5\", \"person5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 6:(\"person6\", \"person6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 7:(\"person7\", \"person7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 8:(\"person8\", \"person8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 9:(\"person9\", \"person9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 10:(\"person10\", \"person10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 11:(\"person11\", \"person11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 12:(\"person12\", \"person11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -1:(\"person00\", \"person00\", 23, 1400, 13000, 1388888, date(\"2021-01-13\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -2:(\"person01\", \"person01\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " -3:(\"person02\", \"person02\", 24, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 19:(\"person19\", \"person22\", 25, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 22:(\"person22\", \"person22\", 26, 1500, 14000, 1488888, date(\"2021-01-14\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\")), " + + " 0:(null, null, null, null, null, null, null, null, null, null, null, null, null);" + + "INSERT VERTEX geo_shape(geo) VALUES 100:(ST_GeogFromText(\"POINT(1 2)\")), 101:(ST_GeogFromText(\"LINESTRING(1 2, 3 4)\")), 102:(ST_GeogFromText(\"POLYGON((0 1, 1 2, 2 3, 0 1))\"));" + + "INSERT VERTEX tag_duration(col) VALUES 200:(duration({months:1, seconds:100, microseconds:20}))" + + val insertTagResp = session.execute(insertTag) + if (!insertTagResp.isSucceeded) { + close() + LOG.error("insert vertex for int type space failed," + insertTagResp.getErrorMessage) + sys.exit(-1) + } + + val insertEdge = "INSERT EDGE friend(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13) VALUES " + + " 1 -> 2:(\"friend1\", \"friend2\", 11, 200, 1000, 188888, date(\"2021-01-01\"), datetime(\"2021-01-01T12:00:00\"),timestamp(\"2021-01-01T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 2 -> 3:(\"friend2\", \"friend3\", 12, 300, 2000, 288888, date(\"2021-01-02\"), datetime(\"2021-01-02T12:00:00\"),timestamp(\"2021-01-02T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 3 -> 4:(\"friend3\", \"friend4\", 13, 400, 3000, 388888, date(\"2021-01-03\"), datetime(\"2021-01-03T12:00:00\"),timestamp(\"2021-01-03T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 4 -> 5:(\"friend4\", \"friend4\", 14, 500, 4000, 488888, date(\"2021-01-04\"), datetime(\"2021-01-04T12:00:00\"),timestamp(\"2021-01-04T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 5 -> 6:(\"friend5\", \"friend5\", 15, 600, 5000, 588888, date(\"2021-01-05\"), datetime(\"2021-01-05T12:00:00\"),timestamp(\"2021-01-05T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 6 -> 7:(\"friend6\", \"friend6\", 16, 700, 6000, 688888, date(\"2021-01-06\"), datetime(\"2021-01-06T12:00:00\"),timestamp(\"2021-01-06T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 7 -> 8:(\"friend7\", \"friend7\", 17, 800, 7000, 788888, date(\"2021-01-07\"), datetime(\"2021-01-07T12:00:00\"),timestamp(\"2021-01-07T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 8 -> 9:(\"friend8\", \"friend8\", 18, 900, 8000, 888888, date(\"2021-01-08\"), datetime(\"2021-01-08T12:00:00\"),timestamp(\"2021-01-08T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 9 -> 10:(\"friend9\", \"friend9\", 19, 1000, 9000, 988888, date(\"2021-01-09\"), datetime(\"2021-01-09T12:00:00\"),timestamp(\"2021-01-09T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 10 -> 11:(\"friend10\", \"friend10\", 20, 1100, 10000, 1088888, date(\"2021-01-10\"), datetime(\"2021-01-10T12:00:00\"),timestamp(\"2021-01-10T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))," + + " 11 -> 12:(\"friend11\", \"friend11\", 21, 1200, 11000, 1188888, date(\"2021-01-11\"), datetime(\"2021-01-11T12:00:00\"),timestamp(\"2021-01-11T12:00:00\"), false, 1.0, 2.0, time(\"12:01:01\"))," + + " 12 -> 1:(\"friend12\", \"friend11\", 22, 1300, 12000, 1288888, date(\"2021-01-12\"), datetime(\"2021-01-12T12:00:00\"),timestamp(\"2021-01-12T12:00:00\"), true, 1.0, 2.0, time(\"12:01:01\"))" + val insertEdgeResp = session.execute(insertEdge) + if (!insertEdgeResp.isSucceeded) { + close() + LOG.error("insert edge for int type space failed," + insertEdgeResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockStringIdGraphSchema(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_write_string(partition_num=10,vid_type=fixed_string(8));" + + "USE test_write_string;" + + "CREATE TAG IF NOT EXISTS person_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography, col15 duration);" + + "CREATE EDGE IF NOT EXISTS friend_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography);"; + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create string type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + } + + def mockIntIdGraphSchema(): Unit = { + val session = pool.getSession("root", "nebula", true) + + val createSpace = "CREATE SPACE IF NOT EXISTS test_write_int(partition_num=10, vid_type=int64);" + + "USE test_write_int;" + + "CREATE TAG IF NOT EXISTS person_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography, col15 duration);" + + "CREATE EDGE IF NOT EXISTS friend_connector(col1 string, col2 fixed_string(8), col3 int8, col4 int16, col5 int32, col6 int64, col7 date, col8 datetime, col9 timestamp, col10 bool, col11 double, col12 float, col13 time, col14 geography);"; + val createResp = session.execute(createSpace) + if (!createResp.isSucceeded) { + close() + LOG.error("create int type space failed," + createResp.getErrorMessage) + sys.exit(-1) + } + } + + def close(): Unit = { + pool.close() + } +} diff --git a/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala new file mode 100644 index 00000000..43da472e --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/mock/SparkMock.scala @@ -0,0 +1,179 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.mock + +import com.facebook.thrift.protocol.TCompactProtocol +import com.vesoft.nebula.connector.connector.NebulaDataFrameWriter +import com.vesoft.nebula.connector.{ + NebulaConnectionConfig, + WriteMode, + WriteNebulaEdgeConfig, + WriteNebulaVertexConfig +} +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +object SparkMock { + + /** + * write nebula vertex with insert mode + */ + def writeVertex(): Unit = { + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + val spark = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + + val df = spark.read + .option("header", true) + .csv("src/test/resources/vertex.csv") + + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withGraphAddress("127.0.0.1:9669") + .withConenctionRetry(2) + .build() + val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig + .builder() + .withSpace("test_write_string") + .withTag("person_connector") + .withVidField("id") + .withVidAsProp(false) + .withBatch(5) + .build() + df.write.nebula(config, nebulaWriteVertexConfig).writeVertices() + + spark.stop() + } + + /** + * write nebula vertex with delete mode + */ + def deleteVertex(): Unit = { + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + val spark = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + + val df = spark.read + .option("header", true) + .csv("src/test/resources/vertex.csv") + + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withGraphAddress("127.0.0.1:9669") + .withConenctionRetry(2) + .build() + val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig + .builder() + .withSpace("test_write_string") + .withTag("person_connector") + .withVidField("id") + .withVidAsProp(false) + .withWriteMode(WriteMode.DELETE) + .withBatch(5) + .build() + df.write.nebula(config, nebulaWriteVertexConfig).writeVertices() + + spark.stop() + } + + /** + * write nebula edge with insert mode + */ + def writeEdge(): Unit = { + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + val spark = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + + val df = spark.read + .option("header", true) + .csv("src/test/resources/edge.csv") + + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withGraphAddress("127.0.0.1:9669") + .withConenctionRetry(2) + .build() + val nebulaWriteEdgeConfig: WriteNebulaEdgeConfig = WriteNebulaEdgeConfig + .builder() + .withSpace("test_write_string") + .withEdge("friend_connector") + .withSrcIdField("id1") + .withDstIdField("id2") + .withRankField("col3") + .withRankAsProperty(true) + .withBatch(5) + .build() + df.write.nebula(config, nebulaWriteEdgeConfig).writeEdges() + + spark.stop() + } + + /** + * write nebula edge with delete mode + */ + def deleteEdge(): Unit = { + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + val spark = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + + val df = spark.read + .option("header", true) + .csv("src/test/resources/edge.csv") + + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withGraphAddress("127.0.0.1:9669") + .withConenctionRetry(2) + .build() + val nebulaWriteEdgeConfig: WriteNebulaEdgeConfig = WriteNebulaEdgeConfig + .builder() + .withSpace("test_write_string") + .withEdge("friend_connector") + .withSrcIdField("id1") + .withDstIdField("id2") + .withRankField("col3") + .withRankAsProperty(true) + .withWriteMode(WriteMode.DELETE) + .withBatch(5) + .build() + df.write.nebula(config, nebulaWriteEdgeConfig).writeEdges() + + spark.stop() + } + +} diff --git a/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/reader/ReadSuite.scala b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/reader/ReadSuite.scala new file mode 100644 index 00000000..39ee72f5 --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/reader/ReadSuite.scala @@ -0,0 +1,340 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.reader + +import com.facebook.thrift.protocol.TCompactProtocol +import com.vesoft.nebula.connector.connector.NebulaDataFrameReader +import com.vesoft.nebula.connector.{NebulaConnectionConfig, ReadNebulaConfig} +import com.vesoft.nebula.connector.mock.NebulaGraphMock +import org.apache.log4j.BasicConfigurator +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoders, SparkSession} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +class ReadSuite extends AnyFunSuite with BeforeAndAfterAll { + BasicConfigurator.configure() + var sparkSession: SparkSession = null + + override def beforeAll(): Unit = { + val graphMock = new NebulaGraphMock + graphMock.mockIntIdGraph() + graphMock.close() + val sparkConf = new SparkConf + sparkConf + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol])) + sparkSession = SparkSession + .builder() + .master("local") + .config(sparkConf) + .getOrCreate() + } + + override def afterAll(): Unit = { + sparkSession.stop() + } + + test("read vertex with no properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("person") + .withNoColumn(true) + .withLimit(10) + .withPartitionNum(10) + .build() + val vertex = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF() + vertex.printSchema() + vertex.show() + assert(vertex.count() == 18) + assert(vertex.schema.fields.length == 1) + } + + test("read vertex with specific properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("person") + .withNoColumn(false) + .withReturnCols(List("col1")) + .withLimit(10) + .withPartitionNum(10) + .build() + val vertex = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF() + vertex.printSchema() + vertex.show() + assert(vertex.count() == 18) + assert(vertex.schema.fields.length == 2) + + vertex.map(row => { + row.getAs[Long]("_vertexId") match { + case 1L => { + assert(row.getAs[String]("col1").equals("person1")) + } + case 0L => { + assert(row.isNullAt(1)) + } + } + "" + })(Encoders.STRING) + + } + + test("read vertex with all properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("person") + .withNoColumn(false) + .withReturnCols(List()) + .withLimit(10) + .withPartitionNum(10) + .build() + val vertex = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF() + vertex.printSchema() + vertex.show() + assert(vertex.count() == 18) + assert(vertex.schema.fields.length == 14) + + vertex.map(row => { + row.getAs[Long]("_vertexId") match { + case 1L => { + assert(row.getAs[String]("col1").equals("person1")) + assert(row.getAs[String]("col2").equals("person1")) + assert(row.getAs[Long]("col3") == 11) + assert(row.getAs[Long]("col4") == 200) + assert(row.getAs[Long]("col5") == 1000) + assert(row.getAs[Long]("col6") == 188888) + assert(row.getAs[String]("col7").equals("2021-01-01")) + assert(row.getAs[String]("col8").equals("2021-01-01T12:00:00.000")) + assert(row.getAs[Long]("col9") == 1609502400) + assert(row.getAs[Boolean]("col10")) + assert(row.getAs[Double]("col11") < 1.001) + assert(row.getAs[Double]("col12") < 2.001) + assert(row.getAs[String]("col13").equals("12:01:01")) + } + case 0L => { + assert(row.isNullAt(1)) + assert(row.isNullAt(2)) + assert(row.isNullAt(3)) + assert(row.isNullAt(4)) + assert(row.isNullAt(5)) + assert(row.isNullAt(6)) + assert(row.isNullAt(7)) + assert(row.isNullAt(8)) + assert(row.isNullAt(9)) + assert(row.isNullAt(10)) + assert(row.isNullAt(11)) + assert(row.isNullAt(12)) + assert(row.isNullAt(13)) + } + } + "" + })(Encoders.STRING) + } + + test("read vertex for geo_shape") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("geo_shape") + .withNoColumn(false) + .withReturnCols(List("geo")) + .withLimit(10) + .withPartitionNum(10) + .build() + val vertex = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF() + vertex.printSchema() + vertex.show() + assert(vertex.count() == 3) + assert(vertex.schema.fields.length == 2) + + vertex.map(row => { + row.getAs[Long]("_vertexId") match { + case 100L => { + assert(row.getAs[String]("geo").equals("POINT(1 2)")) + } + case 101L => { + assert(row.getAs[String]("geo").equals("LINESTRING(1 2, 3 4)")) + } + case 102L => { + assert(row.getAs[String]("geo").equals("POLYGON((0 1, 1 2, 2 3, 0 1))")) + } + } + "" + })(Encoders.STRING) + } + + test("read vertex for tag_duration") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("tag_duration") + .withNoColumn(false) + .withReturnCols(List("col")) + .withLimit(10) + .withPartitionNum(10) + .build() + val vertex = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF() + vertex.printSchema() + vertex.show() + assert(vertex.count() == 1) + assert(vertex.schema.fields.length == 2) + + vertex.map(row => { + row.getAs[Long]("_vertexId") match { + case 200L => { + assert( + row.getAs[String]("col").equals("duration({months:1, seconds:100, microseconds:20})")) + } + } + "" + })(Encoders.STRING) + } + + test("read edge with no properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("friend") + .withNoColumn(true) + .withLimit(10) + .withPartitionNum(10) + .build() + val edge = sparkSession.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF() + edge.printSchema() + edge.show() + assert(edge.count() == 12) + assert(edge.schema.fields.length == 3) + + edge.map(row => { + row.getAs[Long]("_srcId") match { + case 1L => { + assert(row.getAs[Long]("_dstId") == 2) + assert(row.getAs[Long]("_rank") == 0) + } + } + "" + })(Encoders.STRING) + } + + test("read edge with specific properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("friend") + .withNoColumn(false) + .withReturnCols(List("col1")) + .withLimit(10) + .withPartitionNum(10) + .build() + val edge = sparkSession.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF() + edge.printSchema() + edge.show(20) + assert(edge.count() == 12) + assert(edge.schema.fields.length == 4) + edge.map(row => { + row.getAs[Long]("_srcId") match { + case 1L => { + assert(row.getAs[Long]("_dstId") == 2) + assert(row.getAs[Long]("_rank") == 0) + assert(row.getAs[String]("col1").equals("friend1")) + } + } + "" + })(Encoders.STRING) + } + + test("read edge with all properties") { + val config = + NebulaConnectionConfig + .builder() + .withMetaAddress("127.0.0.1:9559") + .withConenctionRetry(2) + .build() + val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig + .builder() + .withSpace("test_int") + .withLabel("friend") + .withNoColumn(false) + .withReturnCols(List()) + .withLimit(10) + .withPartitionNum(10) + .build() + val edge = sparkSession.read.nebula(config, nebulaReadVertexConfig).loadEdgesToDF() + edge.printSchema() + edge.show() + assert(edge.count() == 12) + assert(edge.schema.fields.length == 16) + + edge.map(row => { + row.getAs[Long]("_srcId") match { + case 1L => { + assert(row.getAs[Long]("_dstId") == 2) + assert(row.getAs[Long]("_rank") == 0) + assert(row.getAs[String]("col1").equals("friend1")) + assert(row.getAs[String]("col2").equals("friend2")) + assert(row.getAs[Long]("col3") == 11) + assert(row.getAs[Long]("col4") == 200) + assert(row.getAs[Long]("col5") == 1000) + assert(row.getAs[Long]("col6") == 188888) + assert(row.getAs[String]("col7").equals("2021-01-01")) + assert(row.getAs[String]("col8").equals("2021-01-01T12:00:00.000")) + assert(row.getAs[Long]("col9") == 1609502400) + assert(row.getAs[Boolean]("col10")) + assert(row.getAs[Double]("col11") < 1.001) + assert(row.getAs[Double]("col12") < 2.001) + assert(row.getAs[String]("col13").equals("12:01:01")) + } + } + "" + })(Encoders.STRING) + } + +} diff --git a/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala new file mode 100644 index 00000000..e63fbf8c --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteDeleteSuite.scala @@ -0,0 +1,50 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import com.vesoft.nebula.client.graph.data.ResultSet +import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.mock.{NebulaGraphMock, SparkMock} +import com.vesoft.nebula.connector.nebula.GraphProvider +import org.apache.log4j.BasicConfigurator +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +class WriteDeleteSuite extends AnyFunSuite with BeforeAndAfterAll { + BasicConfigurator.configure() + + override def beforeAll(): Unit = { + val graphMock = new NebulaGraphMock + graphMock.mockStringIdGraphSchema() + graphMock.mockIntIdGraphSchema() + graphMock.close() + SparkMock.writeVertex() + } + + test("write vertex into test_write_string space with delete mode") { + SparkMock.deleteVertex() + val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) + val graphProvider = new GraphProvider(addresses, 3000) + + graphProvider.switchSpace("root", "nebula", "test_write_string") + val resultSet: ResultSet = + graphProvider.submit("use test_write_string;match (v:person_connector) return v;") + assert(resultSet.getColumnNames.size() == 0) + assert(resultSet.isEmpty) + } + + test("write edge into test_write_string space with delete mode") { + SparkMock.deleteEdge() + val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) + val graphProvider = new GraphProvider(addresses, 3000) + + graphProvider.switchSpace("root", "nebula", "test_write_string") + val resultSet: ResultSet = + graphProvider.submit("use test_write_string;fetch prop on friend_connector 1->2@10") + assert(resultSet.getColumnNames.size() == 0) + assert(resultSet.isEmpty) + } +} diff --git a/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala new file mode 100644 index 00000000..5b9bf051 --- /dev/null +++ b/nebula-spark-connector_2.2/src/test/scala/com/vesoft/nebula/connector/writer/WriteInsertSuite.scala @@ -0,0 +1,76 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +package com.vesoft.nebula.connector.writer + +import com.vesoft.nebula.client.graph.data.ResultSet +import com.vesoft.nebula.connector.connector.Address +import com.vesoft.nebula.connector.mock.{NebulaGraphMock, SparkMock} +import com.vesoft.nebula.connector.nebula.GraphProvider +import org.apache.log4j.BasicConfigurator +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +class WriteInsertSuite extends AnyFunSuite with BeforeAndAfterAll { + BasicConfigurator.configure() + + override def beforeAll(): Unit = { + val graphMock = new NebulaGraphMock + graphMock.mockStringIdGraphSchema() + graphMock.mockIntIdGraphSchema() + graphMock.close() + } + + test("write vertex into test_write_string space with insert mode") { + SparkMock.writeVertex() + val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) + val graphProvider = new GraphProvider(addresses, 3000) + + graphProvider.switchSpace("root", "nebula", "test_write_string") + val createIndexResult: ResultSet = graphProvider.submit( + "use test_write_string; " + + "create tag index if not exists person_index on person_connector(col1(20));") + Thread.sleep(5000) + graphProvider.submit("rebuild tag index person_index;") + + Thread.sleep(5000) + + graphProvider.submit("use test_write_string;") + val resultSet: ResultSet = + graphProvider.submit("match (v:person_connector) return v;") + assert(resultSet.getColumnNames.size() == 1) + assert(resultSet.getRows.size() == 13) + + for (i <- 0 until resultSet.getRows.size) { + println(resultSet.rowValues(i).toString) + } + } + + test("write edge into test_write_string space with insert mode") { + SparkMock.writeEdge() + + val addresses: List[Address] = List(new Address("127.0.0.1", 9669)) + val graphProvider = new GraphProvider(addresses, 3000) + + graphProvider.switchSpace("root", "nebula", "test_write_string") + val createIndexResult: ResultSet = graphProvider.submit( + "use test_write_string; " + + "create edge index if not exists friend_index on friend_connector(col1(20));") + Thread.sleep(5000) + graphProvider.submit("rebuild edge index friend_index;") + + Thread.sleep(5000) + + graphProvider.submit("use test_write_string;") + val resultSet: ResultSet = + graphProvider.submit("match (v:person_connector)-[e:friend_connector] -> () return e;") + assert(resultSet.getColumnNames.size() == 1) + assert(resultSet.getRows.size() == 13) + + for (i <- 0 until resultSet.getRows.size) { + println(resultSet.rowValues(i).toString) + } + } +} diff --git a/pom.xml b/pom.xml index 01d76c89..54e9df06 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,9 @@ nebula-spark-connector + nebula-spark-connector_2.2 example + nebula-spark-common @@ -122,6 +124,48 @@ + + + + scala-2.11 + + 2.11.12 + 2.11 + + + true + + + + + scala-2.12 + + 2.12.10 + 2.12 + + + + + spark-2.2 + + 2.2.0 + + + nebula-spark-connector_2.2 + + + + spark-2.4 + + 2.4.4 + + + true + + + nebula-spark-connector + +