From 3b42c92cc315c58d74fed9449d3ce16d4cc0444a Mon Sep 17 00:00:00 2001 From: Craig Citro Date: Sun, 8 May 2016 23:39:17 -0700 Subject: [PATCH] Fix an issue with Python3 multipart encodings. As discovered in https://github.com/GoogleCloudPlatform/gcloud-python/issues/1760, we were mangling bytes when encoding them as part of a multipart upload request. The fix is to switch from using `six.StringIO` to `six.BytesIO` in `transfer.py`. The patch here is closely based on https://github.com/GoogleCloudPlatform/gcloud-python/pull/1779. --- apitools/base/py/transfer.py | 16 +++-- apitools/base/py/transfer_test.py | 116 +++++++++++++++++------------- 2 files changed, 77 insertions(+), 55 deletions(-) diff --git a/apitools/base/py/transfer.py b/apitools/base/py/transfer.py index 397222ed..29bbbcc6 100644 --- a/apitools/base/py/transfer.py +++ b/apitools/base/py/transfer.py @@ -758,20 +758,24 @@ def __ConfigureMultipartRequest(self, http_request): # NOTE: We encode the body, but can't use # `email.message.Message.as_string` because it prepends # `> ` to `From ` lines. - # NOTE: We must use six.StringIO() instead of io.StringIO() since the - # `email` library uses cStringIO in Py2 and io.StringIO in Py3. - fp = six.StringIO() - g = email_generator.Generator(fp, mangle_from_=False) + fp = six.BytesIO() + if six.PY3: + generator_class = email_generator.BytesGenerator + else: + generator_class = email_generator.Generator + g = generator_class(fp, mangle_from_=False) g.flatten(msg_root, unixfrom=False) http_request.body = fp.getvalue() multipart_boundary = msg_root.get_boundary() http_request.headers['content-type'] = ( 'multipart/related; boundary=%r' % multipart_boundary) + if isinstance(multipart_boundary, six.text_type): + multipart_boundary = multipart_boundary.encode('ascii') body_components = http_request.body.split(multipart_boundary) - headers, _, _ = body_components[-2].partition('\n\n') - body_components[-2] = '\n\n'.join([headers, '\n\n--']) + headers, _, _ = body_components[-2].partition(b'\n\n') + body_components[-2] = b'\n\n'.join([headers, b'\n\n--']) http_request.loggable_body = multipart_boundary.join(body_components) def __ConfigureResumableRequest(self, http_request): diff --git a/apitools/base/py/transfer_test.py b/apitools/base/py/transfer_test.py index 53906fd9..a4c43e7c 100644 --- a/apitools/base/py/transfer_test.py +++ b/apitools/base/py/transfer_test.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # Copyright 2015 Google Inc. # @@ -199,52 +200,69 @@ def _ReturnBytes(unused_http, http_request, self.assertEqual(string.ascii_lowercase + string.ascii_uppercase, download_stream.getvalue()) - def testFromEncoding(self): - # Test a specific corner case in multipart encoding. - - # Python's mime module by default encodes lines that start with - # "From " as ">From ", which we need to make sure we don't run afoul - # of when sending content that isn't intended to be so encoded. This - # test calls out that we get this right. We test for both the - # multipart and non-multipart case. - multipart_body = '{"body_field_one": 7}' - upload_contents = 'line one\nFrom \nline two' - upload_config = base_api.ApiUploadInfo( - accept=['*/*'], - max_size=None, - resumable_multipart=True, - resumable_path=u'/resumable/upload', - simple_multipart=True, - simple_path=u'/upload', - ) - url_builder = base_api._UrlBuilder('http://www.uploads.com') - - # Test multipart: having a body argument in http_request forces - # multipart here. - upload = transfer.Upload.FromStream( - six.StringIO(upload_contents), - 'text/plain', - total_size=len(upload_contents)) - http_request = http_wrapper.Request( - 'http://www.uploads.com', - headers={'content-type': 'text/plain'}, - body=multipart_body) - upload.ConfigureRequest(upload_config, http_request, url_builder) - self.assertEqual(url_builder.query_params['uploadType'], 'multipart') - rewritten_upload_contents = '\n'.join( - http_request.body.split('--')[2].splitlines()[1:]) - self.assertTrue(rewritten_upload_contents.endswith(upload_contents)) - - # Test non-multipart (aka media): no body argument means this is - # sent as media. - upload = transfer.Upload.FromStream( - six.StringIO(upload_contents), - 'text/plain', - total_size=len(upload_contents)) - http_request = http_wrapper.Request( - 'http://www.uploads.com', - headers={'content-type': 'text/plain'}) - upload.ConfigureRequest(upload_config, http_request, url_builder) - self.assertEqual(url_builder.query_params['uploadType'], 'media') - rewritten_upload_contents = http_request.body - self.assertTrue(rewritten_upload_contents.endswith(upload_contents)) + def testMultipartEncoding(self): + # This is really a table test for various issues we've seen in + # the past; see notes below for particular histories. + + test_cases = [ + # Python's mime module by default encodes lines that start + # with "From " as ">From ", which we need to make sure we + # don't run afoul of when sending content that isn't + # intended to be so encoded. This test calls out that we + # get this right. We test for both the multipart and + # non-multipart case. + 'line one\nFrom \nline two', + + # We had originally used a `six.StringIO` to hold the http + # request body in the case of a multipart upload; for + # bytes being uploaded in Python3, however, this causes + # issues like this: + # https://github.com/GoogleCloudPlatform/gcloud-python/issues/1760 + # We test below to ensure that we don't end up mangling + # the body before sending. + u'name,main_ingredient\nRäksmörgås,Räkor\nBaguette,Bröd', + ] + + for upload_contents in test_cases: + multipart_body = '{"body_field_one": 7}' + upload_bytes = upload_contents.encode('ascii', 'backslashreplace') + upload_config = base_api.ApiUploadInfo( + accept=['*/*'], + max_size=None, + resumable_multipart=True, + resumable_path=u'/resumable/upload', + simple_multipart=True, + simple_path=u'/upload', + ) + url_builder = base_api._UrlBuilder('http://www.uploads.com') + + # Test multipart: having a body argument in http_request forces + # multipart here. + upload = transfer.Upload.FromStream( + six.BytesIO(upload_bytes), + 'text/plain', + total_size=len(upload_bytes)) + http_request = http_wrapper.Request( + 'http://www.uploads.com', + headers={'content-type': 'text/plain'}, + body=multipart_body) + upload.ConfigureRequest(upload_config, http_request, url_builder) + self.assertEqual( + 'multipart', url_builder.query_params['uploadType']) + rewritten_upload_contents = b'\n'.join( + http_request.body.split(b'--')[2].splitlines()[1:]) + self.assertTrue(rewritten_upload_contents.endswith(upload_bytes)) + + # Test non-multipart (aka media): no body argument means this is + # sent as media. + upload = transfer.Upload.FromStream( + six.BytesIO(upload_bytes), + 'text/plain', + total_size=len(upload_bytes)) + http_request = http_wrapper.Request( + 'http://www.uploads.com', + headers={'content-type': 'text/plain'}) + upload.ConfigureRequest(upload_config, http_request, url_builder) + self.assertEqual(url_builder.query_params['uploadType'], 'media') + rewritten_upload_contents = http_request.body + self.assertTrue(rewritten_upload_contents.endswith(upload_bytes))