-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
exp push: push experiments revs to studio #9160
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,10 +15,12 @@ | |
|
||
logger = logging.getLogger(__name__) | ||
|
||
STUDIO_ENDPOINT = "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently blocked on endpoint. I talked with @amritghimire about the API. It has to be a separate endpoint, and for the payload, it seems If Studio needs more info regarding experiment refs, we can iterate as we go, as this will be behind a feature flag till the endpoint is stable. |
||
|
||
|
||
@locked | ||
@scm_context | ||
def push( # noqa: C901 | ||
def push( # noqa: C901, PLR0912 | ||
repo, | ||
git_remote: str, | ||
exp_names: Union[Iterable[str], str], | ||
|
@@ -29,6 +31,8 @@ def push( # noqa: C901 | |
push_cache: bool = False, | ||
**kwargs, | ||
) -> Iterable[str]: | ||
from dvc.utils import env2bool | ||
|
||
exp_ref_set: Set["ExpRefInfo"] = set() | ||
if all_commits: | ||
exp_ref_set.update(exp_refs(repo.scm)) | ||
|
@@ -69,7 +73,48 @@ def push( # noqa: C901 | |
push_result[SyncStatus.UP_TO_DATE] + push_result[SyncStatus.SUCCESS] | ||
) | ||
_push_cache(repo, push_cache_ref, **kwargs) | ||
return [ref.name for ref in push_result[SyncStatus.SUCCESS]] | ||
|
||
refs = push_result[SyncStatus.SUCCESS] | ||
config = repo.config | ||
if refs and config["feature"]["push_exp_to_studio"] and not env2bool("DVC_TEST"): | ||
from dvc_studio_client.post_live_metrics import get_studio_token_and_repo_url | ||
|
||
token, repo_url = get_studio_token_and_repo_url() | ||
if token and repo_url: | ||
logger.debug( | ||
"pushing experiments %s to Studio", | ||
", ".join(ref.name for ref in refs), | ||
) | ||
_notify_studio([str(ref) for ref in refs], repo_url, token) | ||
return [ref.name for ref in refs] | ||
|
||
|
||
def _notify_studio( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will move to |
||
refs: List[str], | ||
repo_url: str, | ||
token: str, | ||
endpoint: Optional[str] = None, | ||
): | ||
if not refs: | ||
return | ||
|
||
import os | ||
|
||
import requests | ||
from requests.adapters import HTTPAdapter | ||
|
||
endpoint = endpoint or os.getenv("STUDIO_ENDPOINT", STUDIO_ENDPOINT) | ||
assert endpoint is not None | ||
|
||
session = requests.Session() | ||
session.mount(endpoint, HTTPAdapter(max_retries=3)) | ||
|
||
json = {"repo_url": repo_url, "client": "dvc", "refs": refs} | ||
logger.trace("Sending %s to %s", json, endpoint) # type: ignore[attr-defined] | ||
|
||
headers = {"Authorization": f"token {token}"} | ||
resp = session.post(endpoint, json=json, headers=headers, timeout=5) | ||
resp.raise_for_status() | ||
|
||
|
||
def _push( | ||
|
@@ -83,7 +128,7 @@ def _push( | |
from dvc.scm import GitAuthError | ||
|
||
refspec_list = [f"{exp_ref}:{exp_ref}" for exp_ref in refs] | ||
logger.debug("git push experiment '%s' -> '%s'", refspec_list, git_remote) | ||
logger.debug("git push experiment %s -> '%s'", refspec_list, git_remote) | ||
|
||
with TqdmGit(desc="Pushing git refs") as pbar: | ||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from requests import Response | ||
|
||
from dvc.repo.experiments.push import STUDIO_ENDPOINT, _notify_studio | ||
|
||
|
||
def test_notify_studio_for_exp_push(mocker): | ||
valid_response = Response() | ||
valid_response.status_code = 200 | ||
mock_post = mocker.patch("requests.Session.post", return_value=valid_response) | ||
|
||
_notify_studio( | ||
["ref1", "ref2", "ref3"], | ||
"[email protected]:iterative/dvc.git", | ||
"TOKEN", | ||
) | ||
|
||
assert mock_post.called | ||
assert mock_post.call_args == mocker.call( | ||
STUDIO_ENDPOINT, | ||
json={ | ||
"repo_url": "[email protected]:iterative/dvc.git", | ||
"client": "dvc", | ||
"refs": ["ref1", "ref2", "ref3"], | ||
}, | ||
headers={"Authorization": "token TOKEN"}, | ||
timeout=5, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the motivation for putting this under a feature flag?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Endpoint and data format may change. For example, we have yet to figure out if
refs
will be enough for all git-forges to check if the refs exist with the API, before Studio even clones. We still don't know if bitbucket supports them for example.Also, I don't want any users to hit any endpoint until this feature is considered stable.
The plan is to release this internally behind a flag, and see what the performance of Studio will be, what needs to be improved, etc to make it as close to live metrics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We also have to figure out workflow (eg: what will be the ways to enable/disable push to studio?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wanted a bit of flexibility since there are lots of questions regarding workflow/API, etc.