From ba5e90ba94885110835f1a081f88075e3b9754e3 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Tue, 16 Jun 2020 22:19:02 -0700 Subject: [PATCH 1/3] Proof-of-Concept: Removing hardcoded offline_access --- tests/test_e2e.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 93d321e4..627f70d6 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -86,10 +86,15 @@ def assertCacheWorksForUser( result_from_wire['access_token'], result_from_cache['access_token'], "We should get a cached AT") - # Going to test acquire_token_silent(...) to obtain an AT by a RT from cache - self.app.token_cache._cache["AccessToken"] = {} # A hacky way to clear ATs + if "refresh_token" in result_from_wire: + # Going to test acquire_token_silent(...) to obtain an AT by a RT from cache + self.app.token_cache._cache["AccessToken"] = {} # A hacky way to clear ATs result_from_cache = self.app.acquire_token_silent( scope, account=account, data=data or {}) + if "refresh_token" not in result_from_wire: + self.assertEqual( + result_from_cache["access_token"], result_from_wire["access_token"], + "The previously cached AT should be returned") self.assertIsNotNone(result_from_cache, "We should get a result from acquire_token_silent(...) call") self.assertIsNotNone( From 06f2abb4b1d3b1cd54f159d2b7f0a65a144db304 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Wed, 24 Jun 2020 10:54:30 -0700 Subject: [PATCH 2/3] Remove hardcoded profile scope --- msal/application.py | 15 ++++++++++++++- msal/token_cache.py | 8 ++++++-- tests/test_e2e.py | 15 ++++++++------- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/msal/application.py b/msal/application.py index a8457e46..ee12271a 100644 --- a/msal/application.py +++ b/msal/application.py @@ -757,6 +757,13 @@ def get_accounts(self, username=None): lowercase_username = username.lower() accounts = [a for a in accounts if a["username"].lower() == lowercase_username] + if not accounts: + logger.warning(( + "get_accounts(username='{}') finds no account. " + "If tokens were acquired without 'profile' scope, " + "they would contain no username for filtering. " + "Consider calling get_accounts(username=None) instead." + ).format(username)) # Does not further filter by existing RTs here. It probably won't matter. # Because in most cases Accounts and RTs co-exist. # Even in the rare case when an RT is revoked and then removed, @@ -1251,7 +1258,13 @@ def _acquire_token_by_username_password_federated( self.client.grant_assertion_encoders.setdefault( # Register a non-standard type grant_type, self.client.encode_saml_assertion) return self.client.obtain_token_by_assertion( - wstrust_result["token"], grant_type, scope=scopes, **kwargs) + wstrust_result["token"], grant_type, scope=scopes, + on_obtaining_tokens=lambda event: self.token_cache.add(dict( + event, + environment=self.authority.instance, + username=username, # Useful in case IDT contains no such info + )), + **kwargs) class PublicClientApplication(ClientApplication): # browser app or mobile app diff --git a/msal/token_cache.py b/msal/token_cache.py index b0731278..d11d5c91 100644 --- a/msal/token_cache.py +++ b/msal/token_cache.py @@ -108,11 +108,13 @@ def wipe(dictionary, sensitive_fields): # Masks sensitive info if sensitive in dictionary: dictionary[sensitive] = "********" wipe(event.get("data", {}), - ("password", "client_secret", "refresh_token", "assertion", "username")) + ("password", "client_secret", "refresh_token", "assertion")) try: return self.__add(event, now=now) finally: - wipe(event.get("response", {}), ("access_token", "refresh_token")) + wipe(event.get("response", {}), ( # These claims were useful during __add() + "access_token", "refresh_token", "username")) + wipe(event, ["username"]) # Needed for federated ROPC logger.debug("event=%s", json.dumps( # We examined and concluded that this log won't have Log Injection risk, # because the event payload is already in JSON so CR/LF will be escaped. @@ -184,6 +186,8 @@ def __add(self, event, now=None): "oid", id_token_claims.get("sub")), "username": id_token_claims.get("preferred_username") # AAD or id_token_claims.get("upn") # ADFS 2019 + or data.get("username") # Falls back to ROPC username + or event.get("username") # Falls back to Federated ROPC username or "", # The schema does not like null "authority_type": self.AuthorityType.ADFS if realm == "adfs" diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 627f70d6..b7280e8f 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -132,10 +132,9 @@ def _test_username_password(self, result = self.app.acquire_token_by_username_password( username, password, scopes=scope) self.assertLoosely(result) - # self.assertEqual(None, result.get("error"), str(result)) self.assertCacheWorksForUser( result, scope, - username=username if ".b2clogin.com" not in authority else None, + username=username, # Our implementation works even when "profile" scope was not requested, or when profile claims is unavailable in B2C ) def _test_device_flow( @@ -554,11 +553,13 @@ def _test_acquire_token_obo(self, config_pca, config_cca): # Assuming you already did that (which is not shown in this test case), # the following part shows one of the ways to obtain an AT from cache. username = cca_result.get("id_token_claims", {}).get("preferred_username") - self.assertEqual(config_cca["username"], username) - if username: # A precaution so that we won't use other user's token - account = cca.get_accounts(username=username)[0] - result = cca.acquire_token_silent(config_cca["scope"], account) - self.assertEqual(cca_result["access_token"], result["access_token"]) + if username: # It means CCA have requested an IDT w/ "profile" scope + self.assertEqual(config_cca["username"], username) + accounts = cca.get_accounts(username=username) + assert len(accounts) == 1, "App is expected to partition token cache per user" + account = accounts[0] + result = cca.acquire_token_silent(config_cca["scope"], account) + self.assertEqual(cca_result["access_token"], result["access_token"]) def _test_acquire_token_by_client_secret( self, client_id=None, client_secret=None, authority=None, scope=None, From d6bf21e96411974a1ea3240c59a6aff34335ec36 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Wed, 12 May 2021 22:55:30 -0700 Subject: [PATCH 3/3] Add exclude_scopes:Optional[list] --- msal/application.py | 93 +++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/msal/application.py b/msal/application.py index ee12271a..7d7af50f 100644 --- a/msal/application.py +++ b/msal/application.py @@ -27,33 +27,6 @@ logger = logging.getLogger(__name__) -def decorate_scope( - scopes, client_id, - reserved_scope=frozenset(['openid', 'profile', 'offline_access'])): - if not isinstance(scopes, (list, set, tuple)): - raise ValueError("The input scopes should be a list, tuple, or set") - scope_set = set(scopes) # Input scopes is typically a list. Copy it to a set. - if scope_set & reserved_scope: - # These scopes are reserved for the API to provide good experience. - # We could make the developer pass these and then if they do they will - # come back asking why they don't see refresh token or user information. - raise ValueError( - "API does not accept {} value as user-provided scopes".format( - reserved_scope)) - if client_id in scope_set: - if len(scope_set) > 1: - # We make developers pass their client id, so that they can express - # the intent that they want the token for themselves (their own - # app). - # If we do not restrict them to passing only client id then they - # could write code where they expect an id token but end up getting - # access_token. - raise ValueError("Client Id can only be provided as a single scope") - decorated = set(reserved_scope) # Make a writable copy - else: - decorated = scope_set | reserved_scope - return list(decorated) - def extract_certs(public_cert_content): # Parses raw public certificate file contents and returns a list of strings @@ -123,6 +96,7 @@ def __init__( # despite it is currently only needed by ConfidentialClientApplication. # This way, it holds the same positional param place for PCA, # when we would eventually want to add this feature to PCA in future. + exclude_scopes=None, ): """Create an instance of application. @@ -275,11 +249,28 @@ def __init__( or provide a custom http_client which has a short timeout. That way, the latency would be under your control, but still less performant than opting out of region feature. + :param list[str] exclude_scopes: (optional) + Historically MSAL hardcodes `offline_access` scope, + which would allow your app to have prolonged access to user's data. + If that is unnecessary or undesirable for your app, + now you can use this parameter to supply an exclusion list of scopes, + such as ``exclude_scopes = ["offline_access"]``. """ self.client_id = client_id self.client_credential = client_credential self.client_claims = client_claims self._client_capabilities = client_capabilities + + if exclude_scopes and not isinstance(exclude_scopes, list): + raise ValueError( + "Invalid exclude_scopes={}. It need to be a list of strings.".format( + repr(exclude_scopes))) + self._exclude_scopes = frozenset(exclude_scopes or []) + if "openid" in self._exclude_scopes: + raise ValueError( + 'Invalid exclude_scopes={}. You can not opt out "openid" scope'.format( + repr(exclude_scopes))) + if http_client: self.http_client = http_client else: @@ -326,6 +317,34 @@ def __init__( self._telemetry_buffer = {} self._telemetry_lock = Lock() + def _decorate_scope( + self, scopes, + reserved_scope=frozenset(['openid', 'profile', 'offline_access'])): + if not isinstance(scopes, (list, set, tuple)): + raise ValueError("The input scopes should be a list, tuple, or set") + scope_set = set(scopes) # Input scopes is typically a list. Copy it to a set. + if scope_set & reserved_scope: + # These scopes are reserved for the API to provide good experience. + # We could make the developer pass these and then if they do they will + # come back asking why they don't see refresh token or user information. + raise ValueError( + "API does not accept {} value as user-provided scopes".format( + reserved_scope)) + if self.client_id in scope_set: + if len(scope_set) > 1: + # We make developers pass their client id, so that they can express + # the intent that they want the token for themselves (their own + # app). + # If we do not restrict them to passing only client id then they + # could write code where they expect an id token but end up getting + # access_token. + raise ValueError("Client Id can only be provided as a single scope") + decorated = set(reserved_scope) # Make a writable copy + else: + decorated = scope_set | reserved_scope + decorated -= self._exclude_scopes + return list(decorated) + def _build_telemetry_context( self, api_id, correlation_id=None, refresh_reason=None): return msal.telemetry._TelemetryContext( @@ -505,7 +524,7 @@ def initiate_auth_code_flow( flow = client.initiate_auth_code_flow( redirect_uri=redirect_uri, state=state, login_hint=login_hint, prompt=prompt, - scope=decorate_scope(scopes, self.client_id), + scope=self._decorate_scope(scopes), domain_hint=domain_hint, claims=_merge_claims_challenge_and_capabilities( self._client_capabilities, claims_challenge), @@ -587,7 +606,7 @@ def get_authorization_request_url( response_type=response_type, redirect_uri=redirect_uri, state=state, login_hint=login_hint, prompt=prompt, - scope=decorate_scope(scopes, self.client_id), + scope=self._decorate_scope(scopes), nonce=nonce, domain_hint=domain_hint, claims=_merge_claims_challenge_and_capabilities( @@ -650,7 +669,7 @@ def authorize(): # A controller in a web app response =_clean_up(self.client.obtain_token_by_auth_code_flow( auth_code_flow, auth_response, - scope=decorate_scope(scopes, self.client_id) if scopes else None, + scope=self._decorate_scope(scopes) if scopes else None, headers=telemetry_context.generate_headers(), data=dict( kwargs.pop("data", {}), @@ -722,7 +741,7 @@ def acquire_token_by_authorization_code( self.ACQUIRE_TOKEN_BY_AUTHORIZATION_CODE_ID) response = _clean_up(self.client.obtain_token_by_authorization_code( code, redirect_uri=redirect_uri, - scope=decorate_scope(scopes, self.client_id), + scope=self._decorate_scope(scopes), headers=telemetry_context.generate_headers(), data=dict( kwargs.pop("data", {}), @@ -1020,7 +1039,7 @@ def _acquire_token_silent_from_cache_and_possibly_refresh_it( assert refresh_reason, "It should have been established at this point" try: result = _clean_up(self._acquire_token_silent_by_finding_rt_belongs_to_me_or_my_family( - authority, decorate_scope(scopes, self.client_id), account, + authority, self._decorate_scope(scopes), account, refresh_reason=refresh_reason, claims_challenge=claims_challenge, **kwargs)) if (result and "error" not in result) or (not access_token_from_cache): @@ -1166,7 +1185,7 @@ def acquire_token_by_refresh_token(self, refresh_token, scopes, **kwargs): refresh_reason=msal.telemetry.FORCE_REFRESH) response = _clean_up(self.client.obtain_token_by_refresh_token( refresh_token, - scope=decorate_scope(scopes, self.client_id), + scope=self._decorate_scope(scopes), headers=telemetry_context.generate_headers(), rt_getter=lambda rt: rt, on_updating_rt=False, @@ -1197,7 +1216,7 @@ def acquire_token_by_username_password( - A successful response would contain "access_token" key, - an error response would contain "error" and usually "error_description". """ - scopes = decorate_scope(scopes, self.client_id) + scopes = self._decorate_scope(scopes) telemetry_context = self._build_telemetry_context( self.ACQUIRE_TOKEN_BY_USERNAME_PASSWORD_ID) headers = telemetry_context.generate_headers() @@ -1343,7 +1362,7 @@ def acquire_token_interactive( telemetry_context = self._build_telemetry_context( self.ACQUIRE_TOKEN_INTERACTIVE) response = _clean_up(self.client.obtain_token_by_browser( - scope=decorate_scope(scopes, self.client_id) if scopes else None, + scope=self._decorate_scope(scopes) if scopes else None, extra_scope_to_consent=extra_scopes_to_consent, redirect_uri="http://localhost:{port}".format( # Hardcode the host, for now. AAD portal rejects 127.0.0.1 anyway @@ -1374,7 +1393,7 @@ def initiate_device_flow(self, scopes=None, **kwargs): """ correlation_id = msal.telemetry._get_new_correlation_id() flow = self.client.initiate_device_flow( - scope=decorate_scope(scopes or [], self.client_id), + scope=self._decorate_scope(scopes or []), headers={msal.telemetry.CLIENT_REQUEST_ID: correlation_id}, **kwargs) flow[self.DEVICE_FLOW_CORRELATION_ID] = correlation_id @@ -1485,7 +1504,7 @@ def acquire_token_on_behalf_of(self, user_assertion, scopes, claims_challenge=No response = _clean_up(self.client.obtain_token_by_assertion( # bases on assertion RFC 7521 user_assertion, self.client.GRANT_TYPE_JWT, # IDTs and AAD ATs are all JWTs - scope=decorate_scope(scopes, self.client_id), # Decoration is used for: + scope=self._decorate_scope(scopes), # Decoration is used for: # 1. Explicitly requesting an RT, without relying on AAD default # behavior, even though it currently still issues an RT. # 2. Requesting an IDT (which would otherwise be unavailable)