Skip to content

Commit

Permalink
add prefix for LDBC (#63)
Browse files Browse the repository at this point in the history
* add prefix for LDBC

* fix

* fix prefix
  • Loading branch information
HarrisChu authored Apr 15, 2022
1 parent 0295d1b commit 2b8e11d
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 24 deletions.
55 changes: 44 additions & 11 deletions nebula_bench/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from email.policy import default
import click

from nebula_bench import setting
Expand All @@ -12,11 +13,17 @@


def common(f):
f = click.option("-f", "--folder", help="ldbc data folder, default: target/data/test_data")(f)
f = click.option(
"-f", "--folder", help="ldbc data folder, default: target/data/test_data"
)(f)

f = click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")(f)
f = click.option(
"-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669"
)(f)
f = click.option("-u", "--user", help="Nebula Graph address, default: root")(f)
f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(f)
f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(
f
)
f = click.option(
"-s",
"--space",
Expand All @@ -32,8 +39,12 @@ def cli():


@cli.command(help="generate and split ldbc data")
@click.option("-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1")
@click.option("-og", "--only-generate", default=False, is_flag=True, help="only generate data")
@click.option(
"-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1"
)
@click.option(
"-og", "--only-generate", default=False, is_flag=True, help="only generate data"
)
@click.option(
"-os",
"--only-split",
Expand Down Expand Up @@ -69,10 +80,14 @@ def nebula():
@click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")
@click.option("-u", "--user", help="Nebula Graph address, default: root")
@click.option("-p", "--password", help="Nebula Graph address, default: nebula")
@click.option("-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2")
@click.option(
"-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2"
)
def clean(address, user, password, keep):
sc = NebulaController(user=user, password=password, address=address)
value = click.confirm("Will delete all spaces in Nebula Graph. Continue?", abort=True)
value = click.confirm(
"Will delete all spaces in Nebula Graph. Continue?", abort=True
)
sc.clean_spaces(keep)


Expand All @@ -91,9 +106,18 @@ def clean(address, user, password, keep):
is_flag=True,
help="Dry run, just dump the import config file, default: False",
)
def importer(folder, address, user, password, space, vid_type, dry_run):
@click.option(
"-p",
"--enable-prefix",
default=False,
is_flag=True,
help="enable add prefix in vid, vid type should be string",
)
def importer(folder, address, user, password, space, vid_type, enable_prefix, dry_run):
assert vid_type in ["int", "string"], 'the vid type should be "ini" or "string" '
nc = NebulaController(folder, space, user, password, address, vid_type)
nc = NebulaController(
folder, space, user, password, address, vid_type, enable_prefix
)
c = nc.import_space(dry_run)
if c != 0:
exit(c)
Expand Down Expand Up @@ -122,7 +146,16 @@ def stress():
)
@click.option("--args", help="extend args for test tool")
def run(
folder, address, user, password, space, vid_type, scenario, controller, args, dry_run
folder,
address,
user,
password,
space,
vid_type,
scenario,
controller,
args,
dry_run,
):
stress = StressFactory.gen_stress(
_type=controller,
Expand All @@ -133,7 +166,7 @@ def run(
space=space,
vid_type=vid_type,
scenarios=scenario,
args = args,
args=args,
dry_run=dry_run,
)
stress.run()
Expand Down
27 changes: 22 additions & 5 deletions nebula_bench/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@


class BaseController(object):
def __init__(self, data_folder=None, space=None, user=None, password=None, address=None):
def __init__(
self, data_folder=None, space=None, user=None, password=None, address=None
):
self.workspace_path = setting.WORKSPACE_PATH
self.data_folder = data_folder or setting.DATA_FOLDER
self.data_folder = Path(self.data_folder)
Expand All @@ -21,7 +23,14 @@ def __init__(self, data_folder=None, space=None, user=None, password=None, addre

class NebulaController(BaseController):
def __init__(
self, data_folder=None, space=None, user=None, password=None, address=None, vid_type=None
self,
data_folder=None,
space=None,
user=None,
password=None,
address=None,
vid_type=None,
enable_prefix=None,
):
super().__init__(
data_folder=data_folder,
Expand All @@ -31,6 +40,7 @@ def __init__(
address=address,
)
self.vid_type = vid_type or "int"
self.enable_prefix = enable_prefix

def import_space(self, dry_run=False):
result_file = self.dump_nebula_importer()
Expand All @@ -40,10 +50,15 @@ def import_space(self, dry_run=False):
return 0

def dump_nebula_importer(self):
_type = "int64" if self.vid_type == "int" else "fixed_string(20)"
kwargs = {}
if self.enable_prefix and self.vid_type == 'int':
raise Exception("must use prefix with vid type string")
else:
kwargs["enable_prefix"] = self.enable_prefix

p = parser.Parser(parser.NebulaDumper, self.data_folder)
dumper = p.parse()
kwargs = {}

kwargs["space"] = self.space
kwargs["user"] = self.user
kwargs["password"] = self.password
Expand Down Expand Up @@ -85,7 +100,9 @@ def load_scenarios(self, scenario):
class_name=scenario,
)
else:
return utils.load_class(package_name, load_all=True, base_class=BaseScenario)
return utils.load_class(
package_name, load_all=True, base_class=BaseScenario
)

def run(self, nebula_scenario):
result_folder = "target/result"
Expand Down
20 changes: 20 additions & 0 deletions nebula_bench/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
from nebula_bench import setting
from nebula_bench.utils import jinja_dump

prefix_map = {
"comment": "c-",
"forum": "f-",
"organisation": "o-",
"person": "p-",
"place": "l-",
"post": "s-",
"tag": "t-",
"tagclass": "g-",
"emailaddress": "e-",
"language": "u-",
}


class PropTypeEnum(enum.Enum):
INT = "int"
Expand Down Expand Up @@ -43,13 +56,15 @@ def __init__(self, name=None, index=None):
Base.__init__(self, name, index)
self.path = None
self.prop_list = []
self.prefix = None


class Edge(Base):
def __init__(self, name=None, index=None):
Base.__init__(self, name, index)
self.src_index = self.dst_index = None
self.src_name = self.dst_name = None
self.src_prefix = self.dst_prefix = None
self.prop_list = []


Expand Down Expand Up @@ -91,6 +106,7 @@ def parse_vertex(self, file_path):

vertex = Vertex(name)
vertex.path = str(file_path.absolute())
vertex.prefix = prefix_map.get(name.lower(), "")

header_path = Path(file_path.parent / (file_name + "_header.csv"))
with open(str(header_path.absolute()), "r") as fl:
Expand Down Expand Up @@ -157,9 +173,11 @@ def parse_edge(self, file_path):
flag = not flag
name = h.rsplit(".id", 1)[0].lower()
edge.src_name, edge.src_index = name, index
edge.src_prefix = prefix_map.get(name, "")
elif h.lower() == dst_vertex.lower() + ".id":
name = h.rsplit(".id", 1)[0].lower()
edge.dst_name, edge.dst_index = name, index
edge.dst_prefix = prefix_map.get(name, "")

else:
p = Prop()
Expand Down Expand Up @@ -219,6 +237,7 @@ def __init__(self, parser, result_file=None, template_file=None):

def dump(self, *args, **kwargs):
vid_type = kwargs.pop("vid_type", "int")
enable_prefix = kwargs.pop("enable_prefix", False)
if vid_type == "int":
self.template_file = self.template_file or "nebula-import-vid-int.yaml.j2"
elif vid_type == "string":
Expand All @@ -230,6 +249,7 @@ def dump(self, *args, **kwargs):
kwargs["edge_list"] = self._parser.edge_list
kwargs["vertex_set"] = self._parser.vertex_set
kwargs["edge_set"] = self._parser.edge_set
kwargs["enable_prefix"] = enable_prefix

jinja_dump(self.template_file, self.result_file, kwargs)
return self.result_file
15 changes: 8 additions & 7 deletions scripts/copy-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
_all_csv_files_copy = []
_all_csv_files_need_fix_title = [
'static/place_isPartOf_place_header.csv.copy',
'dynamic/person_knows_person_header.csv.copy']
'dynamic/person_knows_person_header.csv.copy',
]

if __name__ == "__main__":
argv = sys.argv[1:]
Expand All @@ -22,14 +23,14 @@
_csv_dir = arg
all_dir_list = os.listdir(_csv_dir)
for dir in all_dir_list:
if os.path.isdir(_csv_dir+'/'+dir):
dir_list = os.listdir(_csv_dir+'/'+dir)
if os.path.isdir(_csv_dir + '/' + dir):
dir_list = os.listdir(_csv_dir + '/' + dir)
for file in dir_list:
if file.endswith('.csv'):
_all_csv_files.append(dir+'/'+file)
_all_csv_files.append(dir + '/' + file)
elif file.endswith('.copy'):
_all_csv_files_copy.append(dir+'/'+file)
_all_csv_files_copy.append(dir + '/' + file)
for dir in _all_csv_files:
os.remove(_csv_dir+dir)
os.remove(_csv_dir + dir)
for dir in _all_csv_files_copy:
os.rename(_csv_dir+dir, _csv_dir+dir[:-5])
os.rename(_csv_dir + dir, _csv_dir + dir[:-5])
2 changes: 1 addition & 1 deletion scripts/generate-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ echo "ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat:yyyy-MM
# set this to the Hadoop 3.2.1 directory
export HADOOP_HOME=${HADOOP_HOME} && \
export LDBC_SNB_DATAGEN_HOME=`pwd` && \
sh run.sh && \
bash run.sh && \
rm -rf ${DATA_DIR}/test_data && \
mv test_data ${DATA_DIR}/.

Expand Down
2 changes: 2 additions & 0 deletions templates/nebula-import-vid-int.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ files:
vid:
index: {{ vertex.index }}
type: int

tags:
- name: {{ vertex.name }}
props:
Expand Down Expand Up @@ -81,6 +82,7 @@ files:
srcVID:
index: {{ edge.src_index }}
type: int

dstVID:
index: {{ edge.dst_index }}
type: int
Expand Down
9 changes: 9 additions & 0 deletions templates/nebula-import-vid-string.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ files:
vid:
index: {{ vertex.index }}
type: string
{%- if enable_prefix %}
prefix: {{ vertex.prefix }}
{% endif %}
tags:
- name: {{ vertex.name }}
props:
Expand Down Expand Up @@ -81,9 +84,15 @@ files:
srcVID:
index: {{ edge.src_index }}
type: string
{%- if enable_prefix %}
prefix: {{ edge.src_prefix }}
{% endif %}
dstVID:
index: {{ edge.dst_index }}
type: string
{%- if enable_prefix %}
prefix: {{ edge.dst_prefix }}
{% endif %}
props:
{%- for prop in edge.prop_list %}
- name: {{ prop.name }}
Expand Down

0 comments on commit 2b8e11d

Please sign in to comment.