Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using Azure credentials from config #167

Merged
merged 3 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dest=$2

#key_prefix="synthetic-fake-imagenet/4_16384"
key_prefix="fake_imagenet"
bucket_prefix="exps222"
bucket_prefix="exps"
src_bucket=(${src//:/ })
src_bucket=${bucket_prefix}-skylark-${src_bucket[1]}
dest_bucket=(${dest//:/ })
Expand All @@ -32,7 +32,7 @@ echo $filename
export GOOGLE_APPLICATION_CREDENTIALS="/home/ubuntu/.skylark-shishir-42be5f375b7a.json"

# creats buckets + bucket data and sets env variables
#python scripts/setup_bucket.py --key-prefix ${key_prefix} --bucket-prefix ${bucket_prefix} --gcp-project skylark-shishir --src-data-path ../${key_prefix}/ --src-region ${src} --dest-region ${dest}
python scripts/setup_bucket.py --key-prefix ${key_prefix} --bucket-prefix ${bucket_prefix} --gcp-project skylark-shishir --src-data-path ../${key_prefix}/ --src-region ${src} --dest-region ${dest}


# TODO:artificially increase the number of chunks
Expand Down
29 changes: 15 additions & 14 deletions skylark/obj_store/azure_interface.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import os
from concurrent.futures import Future, ThreadPoolExecutor
from typing import Iterator, List

from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError

# from azure.identity import DefaultAzureCredential
from azure.identity import DefaultAzureCredential, ClientSecretCredential
from azure.storage.blob import BlobServiceClient
from skylark.config import load_config
from skylark.utils import logger
from skylark.obj_store.azure_keys import azure_storage_credentials
from skylark.obj_store.object_store_interface import NoSuchObjectException, ObjectStoreInterface, ObjectStoreObject


Expand All @@ -20,20 +18,23 @@ class AzureInterface(ObjectStoreInterface):
def __init__(self, azure_region, container_name):
# TODO: the azure region should get corresponding os.getenv()
self.azure_region = azure_region
assert self.azure_region in azure_storage_credentials

self.container_name = container_name
self.bucket_name = self.container_name # For compatibility
self.pending_downloads, self.completed_downloads = 0, 0
self.pending_uploads, self.completed_uploads = 0, 0

# Connection strings are stored in azure_keys.py
self._connect_str = azure_storage_credentials[self.azure_region]["connection_string"]
self.blob_service_client = BlobServiceClient.from_connection_string(self._connect_str)
# self.azure_default_credential = DefaultAzureCredential()
# self.blob_service_client = BlobServiceClient(account_url=account_url, credential=self.azure_default_credential)

self.pool = ThreadPoolExecutor(max_workers=256) # TODO: Figure this out, since azure by default has 15 workers
# Authenticate
config = load_config()
self.subscription_id = config["azure_subscription_id"]
self.credential = ClientSecretCredential(
ShishirPatil marked this conversation as resolved.
Show resolved Hide resolved
tenant_id=config["azure_tenant_id"],
client_id=config["azure_client_id"],
client_secret=config["azure_client_secret"],
)
# Create a blob service client
self.account_url = "https://{}.blob.core.windows.net".format("skylark" + self.azure_region)
self.blob_service_client = BlobServiceClient(account_url=self.account_url, credential=self.credential)

self.pool = ThreadPoolExecutor(max_workers=256) # TODO: This might need some tuning
self.max_concurrency = 1
self.container_client = None
if not self.container_exists():
Expand Down
42 changes: 0 additions & 42 deletions skylark/obj_store/azure_keys.py

This file was deleted.

2 changes: 1 addition & 1 deletion skylark/replicate/replicator_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def fn(s: Server):
tqdm.write(log_line)
else:
logger.debug(log_line)
elif t.elapsed > 180 and completed_bytes == 0:
elif t.elapsed > 600 and completed_bytes == 0:
logger.error(f"No chunks completed after {int(t.elapsed)}s! There is probably a bug, check logs. Exiting...")
return dict(
completed_chunk_ids=completed_chunk_ids,
Expand Down