-
Notifications
You must be signed in to change notification settings - Fork 2
/
create_db.py
141 lines (114 loc) · 5.52 KB
/
create_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# create_db.py
import random
'''This script just outputs a txt file with a list of commands
that create a database for testing purposes.'''
# The global constants below are used to specify the number of nodes and
# edges to generate, the output file for the batch script, the node
# attributes and their ranges, and the edge attributes and their ranges.
NUM_NODES = 10
NUM_EDGES = 5
FILE = 'random_db.txt'
NODE_ATTRIBUTES = ['employee_id', 'salary']
MIN_NODE_ATTRIBUTES = [1, 10000]
MAX_NODE_ATTRIBUTES = [1000, 1000000] # 1000 employees, a lucky few will be millionaires
EDGE_ATTRIBUTES = ['friendship']
MIN_EDGE_ATTRIBUTES = [0] # I hate your guts brah
MAX_EDGE_ATTRIBUTES = [1000] # friends forevah
def create_rand_node(attr_names, min_values, max_values):
"""
Helper function that returns the command line string to create a
node that has randomly generated attributes in the specified ranges. It is
assumed that the attributes should be initialized to random integers. It does
NOT include a semicolon at the end, in case multiple create node commands
need to be chained together on the same line.
@type attr_names: List of strings.
@param attr_names: The node's attribute names.
@type min_values: List of integers.
@param min_values: The minimum integer values that each attribute can take on.
@type max_values: List of integers.
@param max_values: The maximum integer values that each attribute can take on.
@rtype: String
@return: The command line string to create a node with random attributes.
"""
# ensure that the passed arguments are equal in size
if (len(attr_names) != len(max_values)) or (len(attr_names) != len(min_values)):
print 'ERROR : Cannot create node, invalid arguments.'
return
# build up the string by iterating through the attributes
ret_str = 'create n: null_id '
for i, attr in enumerate(attr_names):
rand_val = random.randint(min_values[i], max_values[i])
ret_str += (attr + ':' + str(rand_val) + ' ')
return ret_str
def create_rand_edge(node_list, attr_names, min_values, max_values):
"""
Helper function that returns the command line string to create an
edge between two nodes that were generated using create_rand_node.
It will also have randomly generated attributes in the specified ranges. It is
assumed that the attributes should be initialized to random integers. It does
NOT include a semicolon at the end, in case multiple create edge commands
need to be chained together on the same line.
@type node_list: List of strings generated by L{create_rand_node} function.
@param node_list: List of create node commands.
@type attr_names: List of strings.
@param attr_names: The node's attribute names.
@type min_values: List of integers.
@param min_values: The minimum integer values that each attribute can take on.
@type max_values: List of integers.
@param max_values: The maximum integer values that each attribute can take on.
@rtype: String, String, String
@return: The command line string to create a node with random attributes,
the node string representing the first node of the edge, and the
node string representing the second node of the edge.
"""
# ensure that the passed arguments are equal in size
if (len(attr_names) != len(max_values)) or (len(attr_names) != len(min_values)):
print 'ERROR : Cannot create node, invalid arguments.'
return
ret_str = 'createedge '
# get a random node from the node list
node1_ind = random.randint(0, len(node_list) - 1)
# get the create node string and strip the "create" part from the string,
# add it to the string
node1 = node_list[node1_ind][7:]
ret_str += node1
# build up the edge attributes part of the string by iterating through the attributes
ret_str += 'e: null_id '
for i, attr in enumerate(attr_names):
rand_val = random.randint(min_values[i], max_values[i])
ret_str += (attr + ':' + str(rand_val) + ' ')
# get a second random node from the node list that is not the first node
node2_ind = node1_ind
while (node2_ind == node1_ind):
node2_ind = random.randint(0, len(node_list) - 1)
# get the create node string and strip the "create" part from the string,
# add it to the string
node2 = node_list[node2_ind][7:]
ret_str += node2
return ret_str, node1, node2
if __name__ == '__main__':
# open output file for writing
f = open(FILE, 'w')
# keep track of nodes in the database in the following list
node_list = []
# keep track of edges in the database in the following set for O(1) access
# the values in the set are "[node1 string]*[node2 string]"
# the set is used so that we can check if edges are in the set in constant
# time :)
edge_set = set()
# write create node commands
for i in range(NUM_NODES):
node = create_rand_node(NODE_ATTRIBUTES, MIN_NODE_ATTRIBUTES, MAX_NODE_ATTRIBUTES)
f.write(node + ';\n')
node_list.append(node)
# write create edge commands
for i in range(NUM_EDGES):
# keep getting random edges until we get an edge that does not
# already exist
while True:
edge, node1, node2 = create_rand_edge(node_list, EDGE_ATTRIBUTES, MIN_EDGE_ATTRIBUTES, MAX_EDGE_ATTRIBUTES)
if (node1 + "*" + node2) not in edge_set:
break
f.write(edge + ';\n')
edge_set.add(node1 + "*" + node2)
f.close()