diff --git a/manifest.json b/manifest.json index 51a03ad..6f03ad6 100644 --- a/manifest.json +++ b/manifest.json @@ -4,7 +4,7 @@ "dumper" : { "schedule": "0 3,13,20,23 * * *", "data_url" : [ - "https://storage.googleapis.com/andersen-lab_temp/outbreak_info/biothings_items_breaks.jsonl.gz" + "http://su13/outbreak/biothings_covid19/epi_data.jsonl.gz" ], "release": "version:get_release", "uncompress" : false diff --git a/parser.py b/parser.py index 4b3ff34..e315e44 100644 --- a/parser.py +++ b/parser.py @@ -1,21 +1,15 @@ import os import json import gzip -import numpy as np -from biothings import config -logging = config.logger +from biothings.utils.dataload import to_number def load_annotations(data_folder): - json_path = os.path.join(data_folder,"biothings_items_breaks.jsonl.gz") + json_path = os.path.join(data_folder,"epi_data.jsonl.gz") with gzip.open(json_path) as f: for line in f: + # TODO: use biothings.utils.serializer import load_json when upgrade biothings.api item = json.loads(line) - for k,v in item.items(): - if type(v) == np.int64: - item[k] = int(v) - if type(v) == np.float64 or type(v) == np.float: - item[k] = float(v) - + item[k] = to_number(v) yield item