-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_bucket_consistency.py
59 lines (48 loc) · 1.57 KB
/
check_bucket_consistency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import boto3
from google.cloud import storage
def get_all_gcp_keys(bucket_iterator):
for bucket in bucket_iterator:
yield bucket.name
def get_all_aws_keys(bucket_iterator):
for bucket in bucket_iterator:
yield bucket.key
s3 = boto3.resource('s3')
my_bucket = s3.Bucket('org-hca-dss-staging')
gcs = storage.Client()
a_bucket = gcs.lookup_bucket('org-hca-dss-staging')
# listing on both buckets defaults to alphabetical order
# so in recently synced buckets, the lists should be nearly identical
diff_list = {'aws': [],
'gcp': []}
gcp_iter = get_all_gcp_keys(a_bucket.list_blobs())
aws_iter = get_all_aws_keys(my_bucket.objects.all())
gcp_key = next(gcp_iter, '')
aws_key = next(aws_iter, '')
while True:
if gcp_key > aws_key:
diff_list['aws'].append(aws_key)
print(f'Extra in AWS: {aws_key}')
aws_key = next(aws_iter, '')
elif gcp_key < aws_key:
diff_list['gcp'].append(gcp_key)
print(f'Extra in GCP: {gcp_key}')
gcp_key = next(gcp_iter, '')
elif gcp_key == aws_key:
aws_key = next(aws_iter, '')
gcp_key = next(gcp_iter, '')
if aws_key == '':
for g in gcp_iter:
diff_list['gcp'].append(gcp_key)
print(f'Extra in GCP: {g}')
break
elif gcp_key == '':
for a in aws_iter:
diff_list['aws'].append(aws_key)
print(f'Extra in AWS: {a}')
break
with open('gcp.log', 'w') as f:
for g in gcp_key:
f.write(f'{g}\n')
with open('aws.log', 'w') as f:
for a in aws_key:
f.write(f'{a}\n')