From 44f0f69a9a385de904a80bd62f78590be65717b1 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Fri, 9 Feb 2018 15:23:58 -0800 Subject: [PATCH 1/9] Use regex to grab xml content as-is --- adal/wstrust_response.py | 30 +++++++++++++++++++++++++++++- tests/test_wstrust_response.py | 18 ++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py index ecdc398f..cfc57c3f 100644 --- a/adal/wstrust_response.py +++ b/adal/wstrust_response.py @@ -55,6 +55,20 @@ def scrub_rstr_log_message(response_str): return 'RSTR Response: ' + scrubbed_rstr +def findall_content(xml_string, tag): + """ + Given a tag name without any prefix, + this function returns a list of the raw content inside this tag as-is. + + >>> findall_content(" what ever content ", "foo") + [" what ever content "] + + """ + # https://www.w3.org/TR/REC-xml/#NT-NameChar + pattern = r"<(?:\w+:)?%(tag)s>(.*) ever content +in multiple lines +""" + sample = "" + content + "" + self.assertEqual([content], findall_content(sample, "foo")) + self.assertEqual([], findall_content(sample, "nonexist")) + + def test_findall_content_for_real(self): + with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f: + rstr = f.read() + wstrustResponse = WSTrustResponse(_call_context, rstr, WSTrustVersion.WSTRUST13) + wstrustResponse.parse() + self.assertIn("", rstr) + self.assertIn(b"", wstrustResponse.token) # It is in bytes + if __name__ == '__main__': unittest.main() From 7a8dc4581f54db50dae68288f9bf751c66a9ac11 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Tue, 20 Feb 2018 16:55:37 -0800 Subject: [PATCH 2/9] Add extra documentation and test the comparison --- adal/wstrust_response.py | 13 +++++++++++-- tests/test_wstrust_response.py | 25 ++++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py index cfc57c3f..33690ccb 100644 --- a/adal/wstrust_response.py +++ b/adal/wstrust_response.py @@ -63,9 +63,18 @@ def findall_content(xml_string, tag): >>> findall_content(" what ever content ", "foo") [" what ever content "] + Usually we would use XML parser to extract the data by xpath. + However the ElementTree in Python will implicitly normalize the output + by "hoisting" the inner inline namespaces into the outmost element. + The result will be a semantically equivalent XML snippet, + but not fully identical to the original one. + While this shouldn't become a problem, + in practice it could potentially confuse a picky recipient. + + Introducing this helper, based on Regex, which will return raw content as-is. """ - # https://www.w3.org/TR/REC-xml/#NT-NameChar - pattern = r"<(?:\w+:)?%(tag)s>(.*)]*)>(.*) ever content -in multiple lines -""" - sample = "" + content + "" - self.assertEqual([content], findall_content(sample, "foo")) - self.assertEqual([], findall_content(sample, "nonexist")) + + + foo + + """ + sample = ('' + + content + + '') + + # Demonstrating how XML-based parser won't give you the raw content as-is + element = ET.fromstring(sample).findall('{SAML:assertion}Assertion')[0] + assertion_via_xml_parser = ET.tostring(element) + self.assertNotEqual(content, assertion_via_xml_parser) + self.assertNotIn(b"", assertion_via_xml_parser) + + # The findall_content() helper, based on Regex, will return content as-is. + self.assertEqual([content], findall_content(sample, "Wrapper")) def test_findall_content_for_real(self): with open(os.path.join(os.getcwd(), 'tests', 'wstrust', 'RSTR.xml')) as f: From 69e68268b0c19a9bbf11b4504894566f23b7e328 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Thu, 1 Mar 2018 17:32:07 -0800 Subject: [PATCH 3/9] AuthenticationContext(...) adds an enable_pii flag --- adal/authentication_context.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/adal/authentication_context.py b/adal/authentication_context.py index 35790379..24bb3d03 100644 --- a/adal/authentication_context.py +++ b/adal/authentication_context.py @@ -47,7 +47,7 @@ class AuthenticationContext(object): def __init__( self, authority, validate_authority=None, cache=None, - api_version='1.0', timeout=None): + api_version='1.0', timeout=None, enable_pii=False): '''Creates a new AuthenticationContext object. By default the authority will be checked against a list of known Azure @@ -73,6 +73,8 @@ def __init__( :param timeout: (optional) requests timeout. How long to wait for the server to send data before giving up, as a float, or a `(connect timeout, read timeout) ` tuple. + :param enable_pii: (optional) Unless this is set to True, + there will be no Personally Identifiable Information (PII) written in log. ''' self.authority = Authority(authority, validate_authority is None or validate_authority) self._oauth2client = None @@ -93,7 +95,8 @@ def __init__( 'options': GLOBAL_ADAL_OPTIONS, 'api_version': api_version, 'verify_ssl': None if env_value is None else not env_value, # mainly for tracing through proxy - 'timeout':timeout + 'timeout':timeout, + "enable_pii": enable_pii, } self._token_requests_with_user_code = {} self.cache = cache or TokenCache() From f449f2f7d8ae05dd2feb6debcb0d7e411e1bc2ab Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Fri, 2 Mar 2018 12:14:37 -0800 Subject: [PATCH 4/9] Further clarify the rationale of this workaround based on research --- adal/wstrust_response.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py index 33690ccb..5b2f5eea 100644 --- a/adal/wstrust_response.py +++ b/adal/wstrust_response.py @@ -63,15 +63,21 @@ def findall_content(xml_string, tag): >>> findall_content(" what ever content ", "foo") [" what ever content "] + Motivation: + Usually we would use XML parser to extract the data by xpath. However the ElementTree in Python will implicitly normalize the output by "hoisting" the inner inline namespaces into the outmost element. The result will be a semantically equivalent XML snippet, but not fully identical to the original one. - While this shouldn't become a problem, - in practice it could potentially confuse a picky recipient. - - Introducing this helper, based on Regex, which will return raw content as-is. + While this effect shouldn't become a problem in all other cases, + it does not seem to fully comply with Exclusive XML Canonicalization spec + (https://www.w3.org/TR/xml-exc-c14n/), and void the SAML token signature. + SAML signature algo needs the "XML -> C14N(XML) -> Signed(C14N(Xml))" order. + + The binary extention lxml is probably the canonical way to solve this + (https://stackoverflow.com/questions/22959577/python-exclusive-xml-canonicalization-xml-exc-c14n) + but here we use this workaround, based on Regex, to return raw content as-is. """ # \w+ is good enough for https://www.w3.org/TR/REC-xml/#NT-NameChar pattern = r"<(?:\w+:)?%(tag)s(?:[^>]*)>(.*) Date: Fri, 2 Mar 2018 13:36:17 -0800 Subject: [PATCH 5/9] Add timeout for WSTrust --- adal/wstrust_request.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/adal/wstrust_request.py b/adal/wstrust_request.py index 0cb59edd..41624348 100644 --- a/adal/wstrust_request.py +++ b/adal/wstrust_request.py @@ -145,7 +145,9 @@ def acquire_token(self, username, password): operation = "WS-Trust RST" resp = requests.post(self._wstrust_endpoint_url, headers=options['headers'], data=rst, - allow_redirects=True, verify=self._call_context.get('verify_ssl', None)) + allow_redirects=True, + verify=self._call_context.get('verify_ssl', None), + timeout=self._call_context.get('timeout', None)) util.log_return_correlation_id(self._log, operation, resp) From 40178100e13c3b47304311ceb1ba9c23ec11b509 Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Fri, 2 Mar 2018 15:04:21 -0800 Subject: [PATCH 6/9] Tentative fix for thread racing issue --- adal/token_cache.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adal/token_cache.py b/adal/token_cache.py index f1cb441a..ab59d932 100644 --- a/adal/token_cache.py +++ b/adal/token_cache.py @@ -81,8 +81,9 @@ def remove(self, entries): with self._lock: for e in entries: key = _get_cache_key(e) - self._cache.pop(key) - self.has_state_changed = True + removed = self._cache.pop(key, None) + if removed is not None: + self.has_state_changed = True def add(self, entries): with self._lock: From 699a9c49213d8365992656982c1299278b5b4e5c Mon Sep 17 00:00:00 2001 From: Navya Canumalla Date: Thu, 8 Mar 2018 16:48:00 -0800 Subject: [PATCH 7/9] Add PII logging info to readme and minor edits --- README.md | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3d45d538..dfb5124a 100644 --- a/README.md +++ b/README.md @@ -10,21 +10,23 @@ To support 'service principal' with certificate, ADAL depends on the 'cryptograp * For Windows and macOS -Upgrade to the latest pip (8.1.2 as of June 2016) and just do `pip install adal`. + Upgrade to the latest pip (8.1.2 as of June 2016) and just do `pip install adal`. * For Linux -Upgrade to the latest pip (8.1.2 as of June 2016). + Upgrade to the latest pip (8.1.2 as of June 2016). -You'll need a C compiler, libffi + its development headers, and openssl + its development headers. Refer to [cryptography installation](https://cryptography.io/en/latest/installation/) + You'll need a C compiler, libffi + its development headers, and openssl + its development headers. + Refer to [cryptography installation](https://cryptography.io/en/latest/installation/) * To install from source: -Upgrade to the latest pip (8.1.2 as of June 2016). -Before run `python setup.py install`, to avoid dealing with compilation errors from cryptography, run `pip install cryptography` first to use statically-linked wheels. -If you still like build from source, refer to [cryptography installation](https://cryptography.io/en/latest/installation/). + Upgrade to the latest pip (8.1.2 as of June 2016). + To avoid dealing with compilation errors from cryptography, first run `pip install cryptography` to use statically-linked wheels. + Next, run `python setup.py install` -For more context, starts with this [stackoverflow thread](http://stackoverflow.com/questions/22073516/failed-to-install-python-cryptography-package-with-pip-and-setup-py). + If you still like to build from source, refer to [cryptography installation](https://cryptography.io/en/latest/installation/). + For more context, start with this [stackoverflow thread](http://stackoverflow.com/questions/22073516/failed-to-install-python-cryptography-package-with-pip-and-setup-py). ### Acquire Token with Client Credentials @@ -45,6 +47,19 @@ Find the `Main logic` part in the [sample](sample/device_code_sample.py#L49-L54) ### Acquire Token with authorization code Find the `Main logic` part in the [sample](sample/website_sample.py#L107-L115) for a complete bare bones web site that makes use of the code below. +## Logging + +#### Personal Identifiable Information (PII) & Organizational Identifiable Information (OII) + +Starting from ADAL Python 0.5.1, by default, ADAL logging does not capture or log any PII or OII. The library allows app developers to turn this on by configuring the `enable_pii` flag on the AuthenticationContext. By turning on PII or OII, the app takes responsibility for safely handling highly-sensitive data and complying with any regulatory requirements. + +```python +//PII or OII logging disabled. Default Logger does not capture any PII or OII. +auth_context = AuthenticationContext(...) + +//PII or OII logging enabled +auth_context = AuthenticationContext(..., enable_pii=True) +``` ## Samples and Documentation We provide a full suite of [sample applications on GitHub](https://github.com/azure-samples?utf8=%E2%9C%93&q=active-directory&type=&language=) and an [Azure AD developer landing page](https://docs.microsoft.com/en-us/azure/active-directory/develop/active-directory-developers-guide) to help you get started with learning the Azure Identity system. This includes tutorials for native clients and web applications. We also provide full walkthroughs for authentication flows such as OAuth2, OpenID Connect and for calling APIs such as the Graph API. @@ -78,16 +93,16 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope ``` $ pip install adal ``` ### http tracing/proxy -If need to bypass self-signed certificates, turn on the environment variable of `ADAL_PYTHON_SSL_NO_VERIFY` +If you need to bypass self-signed certificates, turn on the environment variable of `ADAL_PYTHON_SSL_NO_VERIFY` ## Note ### Changes on 'client_id' and 'resource' arguments after 0.1.0 -The convinient methods in 0.1.0 have been removed, and now your application should provide parameter values to `client_id` and `resource`. +The convenient methods in 0.1.0 have been removed, and now your application should provide parameter values to `client_id` and `resource`. 2 Reasons: -* Each adal client should have an Application ID representing an valid application registered in a tenant. The old methods borrowed the client-id of [azure-cli](https://github.com/Azure/azure-xplat-cli), which is never right. It is simple to register your application and get a client id. Many walkthroughs exist. You can follow [one of those](https://docs.microsoft.com/en-us/azure/active-directory/develop/active-directory-integrating-applications). Do check out if you are new to AAD. +* Each adal client should have an Application ID representing a valid application registered in a tenant. The old methods borrowed the client-id of [azure-cli](https://github.com/Azure/azure-xplat-cli), which is never right. It is simple to register your application and get a client id. You can follow [this walkthrough](https://docs.microsoft.com/en-us/azure/active-directory/develop/active-directory-integrating-applications). Do check out if you are new to AAD. * The old method defaults the `resource` argument to 'https://management.core.windows.net/', now you can just supply this value explictly. Please note, there are lots of different azure resources you can acquire tokens through adal though, for example, the samples in the repository acquire for the 'graph' resource. Because it is not an appropriate assumption to be made at the library level, we removed the old defaults. From 028ea059b8083706a11853219a16c68b87a25b3e Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Tue, 6 Mar 2018 16:04:13 -0800 Subject: [PATCH 8/9] ADAL Python 0.5.1 Bumping version number --- adal/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adal/__init__.py b/adal/__init__.py index 0a4974e6..8ab8dbf0 100644 --- a/adal/__init__.py +++ b/adal/__init__.py @@ -27,7 +27,7 @@ # pylint: disable=wrong-import-position -__version__ = '0.5.0' +__version__ = '0.5.1' import logging From 92f5d0e487febf36a8eacdcc272cb40aa9e11d2d Mon Sep 17 00:00:00 2001 From: Ray Luo Date: Sun, 11 Mar 2018 17:49:19 -0700 Subject: [PATCH 9/9] Add comment for old WSTrustResponse implementation --- adal/wstrust_response.py | 1 + 1 file changed, 1 insertion(+) diff --git a/adal/wstrust_response.py b/adal/wstrust_response.py index 5b2f5eea..8633ed93 100644 --- a/adal/wstrust_response.py +++ b/adal/wstrust_response.py @@ -186,6 +186,7 @@ def _parse_token(self): # Adjust namespaces (without this they are autogenerated) so this is understood # by the receiver. Then make a string repr of the element tree node. + # See also http://blog.tomhennigan.co.uk/post/46945128556/elementtree-and-xmlns ET.register_namespace('saml', 'urn:oasis:names:tc:SAML:1.0:assertion') ET.register_namespace('ds', 'http://www.w3.org/2000/09/xmldsig#')