Skip to content

Commit

Permalink
add conf template (#105)
Browse files Browse the repository at this point in the history
* update conf

* add conf template
  • Loading branch information
Nicole00 authored Nov 30, 2022
1 parent ba424c0 commit e9c6aad
Show file tree
Hide file tree
Showing 5 changed files with 429 additions and 66 deletions.
94 changes: 94 additions & 0 deletions conf-template/client_import/csv_datasource.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Use the command to submit the exchange job:

# spark-submit \
# --master "spark://master_ip:7077" \
# --driver-memory=2G --executor-memory=30G \
# --num-executors=3 --executor-cores=20 \
# --class com.vesoft.nebula.exchange.Exchange \
# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf

{
# Spark config
spark: {
app: {
name: NebulaGraph Exchange
}
}

# Nebula Graph config
nebula: {
address:{
graph:["127.0.0.1:9669"]
# if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
# use `SHOW meta leader` to see your meta leader's address
meta:["127.0.0.1:9559"]
}
user: root
pswd: nebula
space: test

# nebula client connection parameters
connection {
# socket connect & execute timeout, unit: millisecond
timeout: 30000
}

error: {
# max number of failures, if the number of failures is bigger than max, then exit the application.
max: 32
# failed data will be recorded in output path, format with ngql
output: /tmp/errors
}

# use google's RateLimiter to limit the requests send to NebulaGraph
rate: {
# the stable throughput of RateLimiter
limit: 1024
# Acquires a permit from RateLimiter, unit: MILLISECONDS
# if it can't be obtained within the specified timeout, then give up the request.
timeout: 1000
}
}

# Processing tags
tags: [
{
name: tag-name-1
type: {
source: csv
sink: client
}
# if your file in not in hdfs, config "file:///path/test.csv"
path: "hdfs://ip:port/path/test.csv"
# if your csv file has no header, then use _c0,_c1,_c2,.. to indicate fields
fields: [csv-field-0, csv-field-1, csv-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
vertex: csv-field-0
separator: ","
header: true
batch: 2000
partition: 60
}
]

# process edges
edges: [
{
name: edge-name-1
type: {
source: csv
sink: client
}
path: "hdfs://ip:port/path/test.csv"
fields: [csv-field-0, csv-field-1, csv-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
source: csv-field-0
target: csv-field-1
ranking: csv-field-2
separator: ","
header: true
batch: 2000
partition: 60
}
]
}
88 changes: 88 additions & 0 deletions conf-template/client_import/hive_datasource.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Use the command to submit the exchange job:

# spark-submit \
# --master "spark://master_ip:7077" \
# --driver-memory=2G --executor-memory=30G \
# --num-executors=3 --executor-cores=20 \
# --class com.vesoft.nebula.exchange.Exchange \
# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf

{
# Spark config
spark: {
app: {
name: NebulaGraph Exchange
}
}

# Nebula Graph config
nebula: {
address:{
graph:["127.0.0.1:9669"]
# if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
# use `SHOW meta leader` to see your meta leader's address
meta:["127.0.0.1:9559"]
}
user: root
pswd: nebula
space: test

# nebula client connection parameters
connection {
# socket connect & execute timeout, unit: millisecond
timeout: 30000
}

error: {
# max number of failures, if the number of failures is bigger than max, then exit the application.
max: 32
# failed data will be recorded in output path, format with ngql
output: /tmp/errors
}

# use google's RateLimiter to limit the requests send to NebulaGraph
rate: {
# the stable throughput of RateLimiter
limit: 1024
# Acquires a permit from RateLimiter, unit: MILLISECONDS
# if it can't be obtained within the specified timeout, then give up the request.
timeout: 1000
}
}

# Processing tags
tags: [
{
name: tag-name-1
type: {
source: hive
sink: client
}
exec: "select hive-field0, hive-field1, hive-field2 from database.table"
fields: [hive-field-0, hive-field-1, hive-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
vertex: hive-field-0
batch: 2000
partition: 60
}
]

# process edges
edges: [
{
name: edge-name-1
type: {
source: hive
sink: client
}
exec: "select hive-field0, hive-field1, hive-field2 from database.table"
fields: [ hive-field-0, hive-field-1, hive-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
source: hive-field-0
target: hive-field-1
ranking: hive-filed-2
batch: 2000
partition: 60
}
]
}
101 changes: 101 additions & 0 deletions conf-template/sst_import/csv_datasource.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Use the command to submit the exchange job:

# spark-submit \
# --master "spark://master_ip:7077" \
# --driver-memory=2G --executor-memory=30G \
# --num-executors=3 --executor-cores=20 \
# --class com.vesoft.nebula.exchange.Exchange \
# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf

{
# Spark config
spark: {
app: {
name: NebulaGraph Exchange
}
}

# Nebula Graph config
nebula: {
address:{
graph:["127.0.0.1:9669"]
# if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
# use `SHOW meta leader` to see your meta leader's address
meta:["127.0.0.1:9559"]
}
user: root
pswd: nebula
space: test

path:{
# any path that owns read and write access is ok
local:"/tmp"
remote:"/sst"
hdfs.namenode: "hdfs://name_node:9000"
}

# nebula client connection parameters
connection {
# socket connect & execute timeout, unit: millisecond
timeout: 30000
}

error: {
# max number of failures, if the number of failures is bigger than max, then exit the application.
max: 32
# failed data will be recorded in output path, format with ngql
output: /tmp/errors
}

# use google's RateLimiter to limit the requests send to NebulaGraph
rate: {
# the stable throughput of RateLimiter
limit: 1024
# Acquires a permit from RateLimiter, unit: MILLISECONDS
# if it can't be obtained within the specified timeout, then give up the request.
timeout: 1000
}
}

# Processing tags
tags: [
{
name: tag-name-1
type: {
source: csv
sink: sst
}
# if your file in not in hdfs, config "file:///path/test.csv"
path: "hdfs://ip:port/path/test.csv"
# if your csv file has no header, then use _c0,_c1,_c2,.. to indicate fields
fields: [csv-field-0, csv-field-1, csv-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
vertex: csv-field-0
separator: ","
header: true
batch: 2000
partition: 60
}
]

# process edges
edges: [
{
name: edge-name-1
type: {
source: csv
sink: sst
}
path: "hdfs://ip:port/path/test.csv"
fields: [csv-field-0, csv-field-1, csv-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
source: csv-field-0
target: csv-field-1
ranking: csv-field-2
separator: ","
header: true
batch: 2000
partition: 60
}
]
}
95 changes: 95 additions & 0 deletions conf-template/sst_import/hive_datasource.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Use the command to submit the exchange job:

# spark-submit \
# --master "spark://master_ip:7077" \
# --driver-memory=2G --executor-memory=30G \
# --num-executors=3 --executor-cores=20 \
# --class com.vesoft.nebula.exchange.Exchange \
# nebula-exchange-3.0-SNAPSHOT.jar -c csv_datasource.conf

{
# Spark config
spark: {
app: {
name: NebulaGraph Exchange
}
}

# Nebula Graph config
nebula: {
address:{
graph:["127.0.0.1:9669"]
# if your NebulaGraph server is in virtual network like k8s, please config the leader address of meta.
# use `SHOW meta leader` to see your meta leader's address
meta:["127.0.0.1:9559"]
}
user: root
pswd: nebula
space: test

path:{
# any path that owns read and write access is ok
local:"/tmp"
remote:"/sst"
hdfs.namenode: "hdfs://name_node:9000"
}

# nebula client connection parameters
connection {
# socket connect & execute timeout, unit: millisecond
timeout: 30000
}

error: {
# max number of failures, if the number of failures is bigger than max, then exit the application.
max: 32
# failed data will be recorded in output path, format with ngql
output: /tmp/errors
}

# use google's RateLimiter to limit the requests send to NebulaGraph
rate: {
# the stable throughput of RateLimiter
limit: 1024
# Acquires a permit from RateLimiter, unit: MILLISECONDS
# if it can't be obtained within the specified timeout, then give up the request.
timeout: 1000
}
}

# Processing tags
tags: [
{
name: tag-name-1
type: {
source: hive
sink: sst
}
exec: "select hive-field0, hive-field1, hive-field2 from database.table"
fields: [hive-field-0, hive-field-1, hive-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
vertex: hive-field-0
batch: 2000
partition: 60
}
]

# process edges
edges: [
{
name: edge-name-1
type: {
source: hive
sink: sst
}
exec: "select hive-field0, hive-field1, hive-field2 from database.table"
fields: [ hive-field-0, hive-field-1, hive-field-2]
nebula.fields: [nebula-field-0, nebula-field-1, nebula-field-2]
source: hive-field-0
target: hive-field-1
ranking: hive-filed-2
batch: 2000
partition: 60
}
]
}
Loading

0 comments on commit e9c6aad

Please sign in to comment.