-
Notifications
You must be signed in to change notification settings - Fork 10
/
database_tools_old.py
258 lines (204 loc) · 7.5 KB
/
database_tools_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import sys
from datetime import timezone, datetime as dt
import time
import argparse
import pickle
import codecs
import pandas as pd
import regex as re
from arctic import Arctic
from arctic import TICK_STORE
from arctic import CHUNK_STORE
from arctic.date import DateRange, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN
masterKey=""
#other keys include _all_logs and _all_receipts
dbKeys = {'tick': '', 'tx': '', 'block': '', 'log': '', 'trace': ''}
chunkSizes = {
'tick': 'M',
'tx': 'D',
'block': 'W',
'log': 'D',
'trace': 'D',
}
dontRemoveKeys = True
storeKey = 'chunkstore'
db = None
maxDBStorage = 1024 #1024 GB max size of the database
def updateKeys(masterKey):
for key in dbKeys:
dbKeys[key] = key+masterKey
keys = getChunkstore().list_symbols()
if not dontRemoveKeys:
for pair in list(dbKeys.items()):
if pair[1] not in keys: #if the value (aka datastore key) is not in db
dbKeys.pop(pair[0], None) #remove the key, as it is not in the db
print("Removing key %s from dbKeys list due to unavailability" % pair[0])
#Methods for db management
def init():
global db
db = Arctic('localhost')
updateKeys(masterKey)
def getChunkstore():
chunkStore = getLibrary(storeKey)
if(chunkStore == None):
initLibrary(storeKey, CHUNK_STORE)
chunkStore = getLibrary(storeKey)
#turn GB to bytes and set the max quota of storage. Arctic's default is 10GB
chunkStore._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)
return chunkStore
def getLibrary(lib):
try:
return(db[lib])
except:
return None
def removeDB(lib, key):
if lib.has_symbol(key):
lib.delete(key) #used for debugging
print("Removed key "+key+" in database")
def initLibrary(key, libType = None):
if(libType != None):
db.initialize_library(key, lib_type=libType)
else:
db.initialize_library(key)
#Methods for saving
def saveData(lib, key, data, chunkSize):
start = time.time()
#if we have started writing this data before
if lib.has_symbol(key):
trimIndex = 0
#read the last saved timestamp
try:
newestDate = lib.read_metadata(key)['end']
except:
newestDate = 0
print("newest date is ")
print(newestDate)
#find out where to trim the data so we don't write the same items, in case of an overlap
while trimIndex < len(data) and newestDate >= data[trimIndex]['date'] :
trimIndex+=1
#if there is nothing new to write
if(len(data) == trimIndex): print("Data already written!")
else:
#update the metadata and save the trimmed data
metadata = lib.read_metadata(key)
print("Got metadata", metadata)
metadata['end'] = data[len(data)-1]['date']
lib.append(key, getDataFrame(data[trimIndex:]), metadata)
lib.write_metadata(key, metadata)
else:
#create the store of this new data
df = getDataFrame(data)
lib.write(key, df, {'start': data[0]['date'], 'end': data[len(data)-1]['date'] }, chunk_size=chunkSize)
print("Saving the data took "+str(time.time() - start)+" seconds")
def getDataFrame(data):
return pd.DataFrame(data) #object in order to save large ints
#Methods for reading
#Prints the start and end of the data
def peekData(lib, key, n = 5 ):
start = time.time()
try:
#df = chunkStore.read(key)
head = getLatestRow(lib, key, False)
tail = getFirstRow(lib, key, False)
except:
print("Error:", sys.exc_info()[0])
return
print(tail.head(n))
print('...')
print(head.tail(n))
print(len(head.values), len(tail.values))
print("Displaying the data took "+str(time.time() - start)+" seconds")
def getLatestRow(lib, key, filter = True):
latestDate = lib.read_metadata(key)['end']
return loadData(lib, key, latestDate, None, filter)
def getFirstRow(lib, key, filter = True):
firstDate = lib.read_metadata(key)['start']
return loadData(lib, key, None, firstDate, filter)
def loadData(lib, key, startDate = None, endDate = None, filter = True, interval = CLOSED_CLOSED):
return lib.read(key, chunk_range = DateRange(startDate, endDate, interval), filter_data = filter)
def loadMetadata(lib, key):
return lib.read_metadata(key)
def getMasterInterval(lib, keys, start=None, end=None):
"""Checks the min/max dates for each key and returns the overlap. If start and end are given, returns the overlap with them as well."""
startAll = max([loadMetadata(lib, key)['start'] for key in keys])
endAll = min([loadMetadata(lib, key)['end'] for key in keys])
if start: start = max(start, startAll) #make sure we don't go out of bounds
else: start = startAll
if end:
end = min(end, endAll)
else: end = endAll
return (start, end)
#reads all data in memory. Eats all the ram.
def encodeObject(obj):
return codecs.encode(pickle.dumps(obj, -1), "base64").decode()
def decodeObject(encoded):
return pickle.loads(codecs.decode(encoded.encode(), "base64"))
def readAllData(lib, key):
start = time.time()
try:
df = lib.read(key)
except:
print("Error:", sys.exc_info()[0])
return
print("Loading took "+str(time.time() - start)+"s")
start = time.time()
print("The data is", df)
values = df.values
print(values[4])
print("Getting the values took "+str(time.time() - start)+"s")
print("The metadata is", loadMetadata(lib, key))
#Pandas config for debug display
pd.set_option("display.max_columns",999)
pd.set_option('expand_frame_repr', False)
#Init the module
init()
if __name__ == "__main__": #if this is the main file, parse the command args
parser = argparse.ArgumentParser(description="Module that manages database storage and retrieval.")
parser.add_argument('--key', type=str, help='The database symbol.')
parser.add_argument('--list', dest='list', action="store_true", help="Print the available symbols in the database.")
parser.add_argument('--peek', dest='peek', action="store_true", help="Print the first and last rows of a symbol.")
parser.add_argument('--read', dest='read', action="store_true", help="Load and print the whole symbol.")
parser.add_argument('--remove', dest='remove', action="store_true", help="Remove a certain symbol from the database.")
parser.add_argument('--removeBlockchain', dest='removeBlockchain', action="store_true", help="Remove all downloaded raw blockchain data.")
parser.add_argument('--removeProperties', dest='removeProperties', action="store_true", help="Remove all database records that are NOT the current raw blockchain data.")
parser.set_defaults(list=False)
parser.set_defaults(peek=False)
parser.set_defaults(read=False)
parser.set_defaults(remove=False)
parser.set_defaults(removeBlockchain=False)
parser.set_defaults(removeProperties=False)
args, _ = parser.parse_known_args()
if args.removeBlockchain:
for key in dbKeys:
removeDB(getChunkstore(), dbKeys[key])
elif args.removeProperties:
for key in getChunkstore().list_symbols():
pas=False
for blockKey in dbKeys:
#if the key is in our blockchain keys, don't touch it
if key == dbKeys[blockKey]:
pas=True
#remove everything else
if not pas:
removeDB(getChunkstore(), key)
elif args.key == None or args.list:
print("Available symbols: ", str.join(',', getChunkstore().list_symbols()))
else:
if args.peek:
try:
peekData(getChunkstore(), args.key)
except:
print("There was an error while peeking. Did you enter the correct key?")
if args.read:
try:
readAllData(getChunkstore(), args.key)
except:
print("There was an error while reading. Did you enter the correct key?")
if args.remove:
try:
key = args.key
for dbkey in getChunkstore().list_symbols():
if re.fullmatch(key, dbkey) is not None:
removeDB(getChunkstore(), dbkey)
except:
print("There was an error while removing. Did you enter the correct key?")