vesoft-inc · Nicole00 · Nov 30, 2022 · Nov 4, 2022 · Nov 4, 2022
diff --git a/conf-template/client_import/csv_datasource.conf b/conf-template/client_import/csv_datasource.conf
@@ -0,0 +1,94 @@
+# Use the command to submit the exchange job:
+
+# spark-submit \
+# --master "spark://master_ip:7077" \
+# --driver-memory=2G --executor-memory=30G  \
+# --num-executors=3 --executor-cores=20 \
+# --class com.vesoft.nebula.exchange.Exchange \
+# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf
+
+{
+  # Spark config
+  spark: {
+    app: {
+      name: NebulaGraph Exchange
+    }
+  }
+
+  # Nebula Graph config
+  nebula: {
+    address:{
+      graph:["127.0.0.1:9669"]
+      # if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
+      # use `SHOW meta leader` to see your meta leader's address
+      meta:["127.0.0.1:9559"]
+    }
+    user: root
+    pswd: nebula
+    space: test
+
+    # nebula client connection parameters
+    connection {
+      # socket connect & execute timeout, unit: millisecond
+      timeout: 30000
+    }
+
+    error: {
+      # max number of failures, if the number of failures is bigger than max, then exit the application.
+      max: 32
+      # failed data will be recorded in output path, format with ngql
+      output: /tmp/errors
+    }
+
+    # use google's RateLimiter to limit the requests send to NebulaGraph
+    rate: {
+      # the stable throughput of RateLimiter
+      limit: 1024
+      # Acquires a permit from RateLimiter, unit: MILLISECONDS
+      # if it can't be obtained within the specified timeout, then give up the request.
+      timeout: 1000
+    }
+  }
+
+  # Processing tags
+  tags: [
+    {
+      name: tag-name-1
+      type: {
+        source: csv
+        sink: client
+      }
+      # if your file in not in hdfs, config "file:///path/test.csv"
+      path: "hdfs://ip:port/path/test.csv"
+      # if your csv file has no header, then use _c0,_c1,_c2,.. to indicate fields
+      fields: [csv-field-0, csv-field-1, csv-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      vertex: csv-field-0
+      separator: ","
+      header: true
+      batch: 2000
+      partition: 60
+    }
+  ]
+
+  # process edges
+  edges: [
+    {
+      name: edge-name-1
+      type: {
+        source: csv
+        sink: client
+      }
+      path: "hdfs://ip:port/path/test.csv"
+      fields: [csv-field-0, csv-field-1, csv-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      source: csv-field-0
+      target: csv-field-1
+      ranking: csv-field-2
+      separator: ","
+      header: true
+      batch: 2000
+      partition: 60
+    }
+  ]
+}
diff --git a/conf-template/client_import/hive_datasource.conf b/conf-template/client_import/hive_datasource.conf
@@ -0,0 +1,88 @@
+# Use the command to submit the exchange job:
+
+# spark-submit \
+# --master "spark://master_ip:7077" \
+# --driver-memory=2G --executor-memory=30G  \
+# --num-executors=3 --executor-cores=20 \
+# --class com.vesoft.nebula.exchange.Exchange \
+# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf
+
+{
+  # Spark config
+  spark: {
+    app: {
+      name: NebulaGraph Exchange
+    }
+  }
+
+  # Nebula Graph config
+  nebula: {
+    address:{
+      graph:["127.0.0.1:9669"]
+      # if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
+      # use `SHOW meta leader` to see your meta leader's address
+      meta:["127.0.0.1:9559"]
+    }
+    user: root
+    pswd: nebula
+    space: test
+
+    # nebula client connection parameters
+    connection {
+      # socket connect & execute timeout, unit: millisecond
+      timeout: 30000
+    }
+
+    error: {
+      # max number of failures, if the number of failures is bigger than max, then exit the application.
+      max: 32
+      # failed data will be recorded in output path, format with ngql
+      output: /tmp/errors
+    }
+
+    # use google's RateLimiter to limit the requests send to NebulaGraph
+    rate: {
+      # the stable throughput of RateLimiter
+      limit: 1024
+      # Acquires a permit from RateLimiter, unit: MILLISECONDS
+      # if it can't be obtained within the specified timeout, then give up the request.
+      timeout: 1000
+    }
+  }
+
+  # Processing tags
+  tags: [
+    {
+      name: tag-name-1
+      type: {
+        source: hive
+        sink: client
+      }
+      exec: "select hive-field0, hive-field1, hive-field2 from database.table"
+      fields: [hive-field-0, hive-field-1, hive-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      vertex: hive-field-0
+      batch: 2000
+      partition: 60
+    }
+  ]
+
+  # process edges
+  edges: [
+    {
+      name: edge-name-1
+      type: {
+        source: hive
+        sink: client
+      }
+      exec: "select hive-field0, hive-field1, hive-field2 from database.table"
+      fields: [ hive-field-0, hive-field-1, hive-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      source: hive-field-0
+      target: hive-field-1
+      ranking: hive-filed-2
+      batch: 2000
+      partition: 60
+    }
+  ]
+}
diff --git a/conf-template/sst_import/csv_datasource.conf b/conf-template/sst_import/csv_datasource.conf
@@ -0,0 +1,101 @@
+# Use the command to submit the exchange job:
+
+# spark-submit \
+# --master "spark://master_ip:7077" \
+# --driver-memory=2G --executor-memory=30G  \
+# --num-executors=3 --executor-cores=20 \
+# --class com.vesoft.nebula.exchange.Exchange \
+# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf
+
+{
+  # Spark config
+  spark: {
+    app: {
+      name: NebulaGraph Exchange
+    }
+  }
+
+  # Nebula Graph config
+  nebula: {
+    address:{
+      graph:["127.0.0.1:9669"]
+      # if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
+      # use `SHOW meta leader` to see your meta leader's address
+      meta:["127.0.0.1:9559"]
+    }
+    user: root
+    pswd: nebula
+    space: test
+
+    path:{
+        # any path that owns read and write access is ok
+        local:"/tmp"
+        remote:"/sst"
+        hdfs.namenode: "hdfs://name_node:9000"
+    }
+
+    # nebula client connection parameters
+    connection {
+      # socket connect & execute timeout, unit: millisecond
+      timeout: 30000
+    }
+
+    error: {
+      # max number of failures, if the number of failures is bigger than max, then exit the application.
+      max: 32
+      # failed data will be recorded in output path, format with ngql
+      output: /tmp/errors
+    }
+
+    # use google's RateLimiter to limit the requests send to NebulaGraph
+    rate: {
+      # the stable throughput of RateLimiter
+      limit: 1024
+      # Acquires a permit from RateLimiter, unit: MILLISECONDS
+      # if it can't be obtained within the specified timeout, then give up the request.
+      timeout: 1000
+    }
+  }
+
+  # Processing tags
+  tags: [
+    {
+      name: tag-name-1
+      type: {
+        source: csv
+        sink: sst
+      }
+      # if your file in not in hdfs, config "file:///path/test.csv"
+      path: "hdfs://ip:port/path/test.csv"
+      # if your csv file has no header, then use _c0,_c1,_c2,.. to indicate fields
+      fields: [csv-field-0, csv-field-1, csv-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      vertex: csv-field-0
+      separator: ","
+      header: true
+      batch: 2000
+      partition: 60
+    }
+  ]
+
+  # process edges
+  edges: [
+    {
+      name: edge-name-1
+      type: {
+        source: csv
+        sink: sst
+      }
+      path: "hdfs://ip:port/path/test.csv"
+      fields: [csv-field-0, csv-field-1, csv-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      source: csv-field-0
+      target: csv-field-1
+      ranking: csv-field-2
+      separator: ","
+      header: true
+      batch: 2000
+      partition: 60
+    }
+  ]
+}
diff --git a/conf-template/sst_import/hive_datasource.conf b/conf-template/sst_import/hive_datasource.conf
@@ -0,0 +1,95 @@
+# Use the command to submit the exchange job:
+
+# spark-submit \
+# --master "spark://master_ip:7077" \
+# --driver-memory=2G --executor-memory=30G  \
+# --num-executors=3 --executor-cores=20 \
+# --class com.vesoft.nebula.exchange.Exchange \
+# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf
+
+{
+  # Spark config
+  spark: {
+    app: {
+      name: NebulaGraph Exchange
+    }
+  }
+
+  # Nebula Graph config
+  nebula: {
+    address:{
+      graph:["127.0.0.1:9669"]
+      # if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
+      # use `SHOW meta leader` to see your meta leader's address
+      meta:["127.0.0.1:9559"]
+    }
+    user: root
+    pswd: nebula
+    space: test
+
+    path:{
+        # any path that owns read and write access is ok
+        local:"/tmp"
+        remote:"/sst"
+        hdfs.namenode: "hdfs://name_node:9000"
+    }
+
+    # nebula client connection parameters
+    connection {
+      # socket connect & execute timeout, unit: millisecond
+      timeout: 30000
+    }
+
+    error: {
+      # max number of failures, if the number of failures is bigger than max, then exit the application.
+      max: 32
+      # failed data will be recorded in output path, format with ngql
+      output: /tmp/errors
+    }
+
+    # use google's RateLimiter to limit the requests send to NebulaGraph
+    rate: {
+      # the stable throughput of RateLimiter
+      limit: 1024
+      # Acquires a permit from RateLimiter, unit: MILLISECONDS
+      # if it can't be obtained within the specified timeout, then give up the request.
+      timeout: 1000
+    }
+  }
+
+  # Processing tags
+  tags: [
+    {
+      name: tag-name-1
+      type: {
+        source: hive
+        sink: sst
+      }
+      exec: "select hive-field0, hive-field1, hive-field2 from database.table"
+      fields: [hive-field-0, hive-field-1, hive-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      vertex: hive-field-0
+      batch: 2000
+      partition: 60
+    }
+  ]
+
+  # process edges
+  edges: [
+    {
+      name: edge-name-1
+      type: {
+        source: hive
+        sink: sst
+      }
+      exec: "select hive-field0, hive-field1, hive-field2 from database.table"
+      fields: [ hive-field-0, hive-field-1, hive-field-2]
+      nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+      source: hive-field-0
+      target: hive-field-1
+      ranking: hive-filed-2
+      batch: 2000
+      partition: 60
+    }
+  ]
+}