-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkaggle_upload.py
68 lines (50 loc) · 1.86 KB
/
kaggle_upload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
import os
from datetime import date, timedelta
from glob import glob
from kaggle import api
schema_fields = [
("query_time", "Estimated time of when the data was queried", "datetime"),
("place_id", "A string identifier of the station", "string"),
("lat", "Latitude of the station", "number"),
("lon", "Longitude of the station", "number"),
("bikes", "Number of bikes at the station", "number"),
("empty_docks", "Number of empty docks at the station", "number"),
("docks", "Number of total docks at the station", "number"),
]
with open("dataset-metadata.json") as r:
dataset_metadata = json.load(r)
resources = []
dates = []
today = date.today()
cutoff_date = today - timedelta(days=-365)
for file in sorted(glob("data/*.csv")):
date_parts = file[5:-4].split("-")
if len(date_parts) == 4:
print("Skipping stations info")
continue
date_parts = [int(part) for part in date_parts]
file_date = date(*date_parts)
if file_date >= today:
print(f"Skipping {file} as it is too recent")
os.remove(file)
continue
if file_date < cutoff_date:
print(f"Skipping {file} as it is older than a year")
os.remove(file)
continue
dates.append(file_date)
schema = []
for name, description, _type in schema_fields:
schema.append({"name": name, "description": description, "type": _type})
resource = {
"path": file[5:],
"description": f"Station data for the {file_date.strftime('%B %d %Y')}",
"schema": {"fields": schema},
}
resources.append(resource)
dataset_metadata["resources"] = resources
with open("data/dataset-metadata.json", "w") as w:
json.dump(dataset_metadata, w, indent=4)
update_message = f"Data from {min(dates)} to {max(dates)}"
api.dataset_create_version("data", update_message, dir_mode="zip", quiet=False)