forked from MISP/PyMISP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport_scrippsc02.py
467 lines (405 loc) · 18.4 KB
/
import_scrippsc02.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from dateutil.parser import parse
import csv
from pathlib import Path
import requests
from pymisp import MISPEvent, MISPObject, MISPTag
from keys import misp_url, misp_key, misp_verifycert
from pymisp import ExpandedPyMISP
class Scrippts:
def __init__(self):
self.misp = ExpandedPyMISP(misp_url, misp_key, misp_verifycert)
def geolocation_alt(self) -> MISPObject:
# Alert, NWT, Canada
location = MISPObject('geolocation', standalone=False)
location.add_attribute('latitude', 82.3)
location.add_attribute('longitude', 62.3)
location.add_attribute('altitude', 210)
location.add_attribute('text', 'Alert, NWT, Canada')
return location
def tag_alt(self) -> MISPTag:
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:ALT'
return tag
def geolocation_ptb(self):
# Point Barrow, Alaska
location = MISPObject('geolocation')
location.add_attribute('latitude', 71.3)
location.add_attribute('longitude', 156.6)
location.add_attribute('altitude', 11)
location.add_attribute('text', 'Point Barrow, Alaska')
return location
def tag_ptb(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:PTB'
return tag
def geolocation_stp(self) -> MISPObject:
# Station P
location = MISPObject('geolocation')
location.add_attribute('latitude', 50)
location.add_attribute('longitude', 145)
location.add_attribute('altitude', 0)
location.add_attribute('text', 'Station P')
return location
def tag_stp(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:STP'
return tag
def geolocation_ljo(self) -> MISPObject:
# La Jolla Pier, California
location = MISPObject('geolocation')
location.add_attribute('latitude', 32.9)
location.add_attribute('longitude', 117.3)
location.add_attribute('altitude', 10)
location.add_attribute('text', 'La Jolla Pier, California')
return location
def tag_ljo(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:LJO'
return tag
def geolocation_bcs(self) -> MISPObject:
# Baja California Sur, Mexico
location = MISPObject('geolocation')
location.add_attribute('latitude', 23.3)
location.add_attribute('longitude', 110.2)
location.add_attribute('altitude', 4)
location.add_attribute('text', 'Baja California Sur, Mexico')
return location
def tag_bcs(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:BCS'
return tag
def geolocation_mlo(self) -> MISPObject:
# Mauna Loa Observatory, Hawaii
location = MISPObject('geolocation')
location.add_attribute('latitude', 19.5)
location.add_attribute('longitude', 155.6)
location.add_attribute('altitude', 3397)
location.add_attribute('text', 'Mauna Loa Observatory, Hawaii')
return location
def tag_mlo(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:MLO'
return tag
def geolocation_kum(self) -> MISPObject:
# Cape Kumukahi, Hawaii
location = MISPObject('geolocation')
location.add_attribute('latitude', 19.5)
location.add_attribute('longitude', 154.8)
location.add_attribute('altitude', 3)
location.add_attribute('text', 'Cape Kumukahi, Hawaii')
return location
def tag_kum(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:KUM'
return tag
def geolocation_chr(self):
# Christmas Island, Fanning Island
location = MISPObject('geolocation')
location.add_attribute('latitude', 2)
location.add_attribute('longitude', 157.3)
location.add_attribute('altitude', 2)
location.add_attribute('text', 'Christmas Island, Fanning Island')
return location
def tag_chr(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:CHR'
return tag
def geolocation_sam(self):
# American Samoa
location = MISPObject('geolocation')
location.add_attribute('latitude', 14.2)
location.add_attribute('longitude', 170.6)
location.add_attribute('altitude', 30)
location.add_attribute('text', 'American Samoa')
return location
def tag_sam(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:SAM'
return tag
def geolocation_ker(self):
# Kermadec Islands, Raoul Island
location = MISPObject('geolocation')
location.add_attribute('latitude', 29.2)
location.add_attribute('longitude', 177.9)
location.add_attribute('altitude', 2)
location.add_attribute('text', 'Kermadec Islands, Raoul Island')
return location
def tag_ker(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:KER'
return tag
def geolocation_nzd(self):
# Baring Head, New Zealand
location = MISPObject('geolocation')
location.add_attribute('latitude', 41.4)
location.add_attribute('longitude', 174.9)
location.add_attribute('altitude', 85)
location.add_attribute('text', 'Baring Head, New Zealand')
return location
def tag_nzd(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:NZD'
return tag
def geolocation_psa(self):
# Palmer Station, Antarctica
location = MISPObject('geolocation')
location.add_attribute('latitude', 64.9)
location.add_attribute('longitude', 64)
location.add_attribute('altitude', 10)
location.add_attribute('text', 'Palmer Station, Antarctica')
return location
def tag_psa(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:PSA'
return tag
def geolocation_spo(self):
# South Pole
location = MISPObject('geolocation')
location.add_attribute('latitude', 90)
location.add_attribute('longitude', 0)
location.add_attribute('altitude', 2810)
location.add_attribute('text', 'South Pole')
return location
def tag_spo(self):
tag = MISPTag()
tag.name = 'scrippsco2-sampling-stations:SPO'
return tag
def fetch(self, url):
filepath = Path('scrippts') / Path(url).name
if filepath.exists():
return filepath
r = requests.get(url)
if r.status_code != 200 or r.text[0] != '"':
print(url)
return False
with filepath.open('w') as f:
f.write(r.text)
return filepath
def get_existing_event_to_update(self, infofield):
found = self.misp.search(eventinfo=infofield, pythonify=True)
if found:
event = found[0]
return event
return False
def import_all(self, stations_short_names, interval, data_type):
object_creator = getattr(self, f'{interval}_flask_{data_type}')
if data_type == 'co2':
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/'
elif data_type in ['c13', 'o18']:
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/'
for station in stations_short_names:
url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv'
infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations'
filepath = self.fetch(url)
if not filepath:
continue
update = True
event = self.get_existing_event_to_update(infofield)
if event:
location = event.get_objects_by_name('geolocation')[0]
if not event:
event = MISPEvent()
event.info = infofield
event.add_tag(getattr(self, f'tag_{station}')())
location = getattr(self, f'geolocation_{station}')()
event.add_object(location)
event.add_attribute('link', f'http://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}')
update = False
object_creator(event, location, filepath, update)
if update:
self.misp.update_event(event)
else:
self.misp.add_event(event)
def import_monthly_co2_all(self):
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
self.import_all(to_import, 'monthly', 'co2')
def import_monthly_c13_all(self):
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'psa', 'spo']
self.import_all(to_import, 'monthly', 'c13')
def import_monthly_o18_all(self):
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
self.import_all(to_import, 'monthly', 'o18')
def import_daily_co2_all(self):
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
self.import_all(to_import, 'daily', 'co2')
def import_daily_c13_all(self):
to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
self.import_all(to_import, 'daily', 'c13')
def import_daily_o18_all(self):
to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
self.import_all(to_import, 'daily', 'o18')
def split_data_comment(self, csv_file, update, event):
comment = ''
data = []
with csv_file.open() as f:
for line in f:
if line[0] == '"':
if update:
continue
if '----------' in line:
event.add_attribute('comment', comment, disable_correlation=True)
comment = ''
continue
comment += line[1:-1].strip()
else:
data.append(line)
if not update:
event.add_attribute('comment', comment, disable_correlation=True)
return data
def monthly_flask_co2(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-co2-monthly'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
if not row[0].isdigit():
# This file has fucked up headers
continue
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-co2-monthly', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('monthly-co2', float(row[4]))
obj.add_attribute('monthly-co2-seasonal-adjustment', float(row[5]))
obj.add_attribute('monthly-co2-smoothed', float(row[6]))
obj.add_attribute('monthly-co2-smoothed-seasonal-adjustment', float(row[7]))
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
def monthly_flask_c13(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-c13-monthly'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
if not row[0].isdigit():
# This file has fucked up headers
continue
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-c13-monthly', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('monthly-c13', float(row[4]))
obj.add_attribute('monthly-c13-seasonal-adjustment', float(row[5]))
obj.add_attribute('monthly-c13-smoothed', float(row[6]))
obj.add_attribute('monthly-c13-smoothed-seasonal-adjustment', float(row[7]))
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
def monthly_flask_o18(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-o18-monthly'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
if not row[0].isdigit():
# This file has fucked up headers
continue
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-o18-monthly', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('monthly-o18', float(row[4]))
obj.add_attribute('monthly-o18-seasonal-adjustment', float(row[5]))
obj.add_attribute('monthly-o18-smoothed', float(row[6]))
obj.add_attribute('monthly-o18-smoothed-seasonal-adjustment', float(row[7]))
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
def daily_flask_co2(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-co2-daily'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
sample_date = parse(f'{row[0]}-{row[1]}')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-co2-daily', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('number-flask', int(row[4]))
obj.add_attribute('flag', int(row[5]))
attr = obj.add_attribute('co2-value', float(row[6]))
attr.add_tag(f'scrippsco2-fgc:{int(row[5])}')
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
def daily_flask_c13(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-c13-daily'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
sample_date = parse(f'{row[0]}-{row[1]}')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-c13-daily', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('number-flask', int(row[4]))
obj.add_attribute('flag', int(row[5]))
attr = obj.add_attribute('c13-value', float(row[6]))
attr.add_tag(f'scrippsco2-fgi:{int(row[5])}')
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
def daily_flask_o18(self, event, location, csv_file, update):
data = self.split_data_comment(csv_file, update, event)
dates_already_imported = []
if update:
# get all datetime from existing event
for obj in event.get_objects_by_name('scrippsco2-o18-daily'):
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
dates_already_imported.append(date_attribute.value)
reader = csv.reader(data)
for row in reader:
sample_date = parse(f'{row[0]}-{row[1]}')
if sample_date in dates_already_imported:
continue
obj = MISPObject('scrippsco2-o18-daily', standalone=False)
obj.add_attribute('sample-datetime', sample_date)
obj.add_attribute('sample-date-excel', float(row[2]))
obj.add_attribute('sample-date-fractional', float(row[3]))
obj.add_attribute('number-flask', int(row[4]))
obj.add_attribute('flag', int(row[5]))
attr = obj.add_attribute('o18-value', float(row[6]))
attr.add_tag(f'scrippsco2-fgi:{int(row[5])}')
obj.add_reference(location, 'sampling-location')
event.add_object(obj)
if __name__ == '__main__':
i = Scrippts()
i.import_daily_co2_all()
i.import_daily_c13_all()
i.import_daily_o18_all()
i.import_monthly_co2_all()
i.import_monthly_c13_all()
i.import_monthly_o18_all()