Skip to content

Commit

Permalink
Fix an issue with Python3 multipart encodings.
Browse files Browse the repository at this point in the history
As discovered in
googleapis/google-cloud-python#1760, we were
mangling bytes when encoding them as part of a multipart upload request. The
fix is to switch from using `six.StringIO` to `six.BytesIO` in `transfer.py`.
The patch here is closely based on
googleapis/google-cloud-python#1779.
  • Loading branch information
craigcitro committed May 9, 2016
1 parent d6a3172 commit 3b42c92
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 55 deletions.
16 changes: 10 additions & 6 deletions apitools/base/py/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,20 +758,24 @@ def __ConfigureMultipartRequest(self, http_request):
# NOTE: We encode the body, but can't use
# `email.message.Message.as_string` because it prepends
# `> ` to `From ` lines.
# NOTE: We must use six.StringIO() instead of io.StringIO() since the
# `email` library uses cStringIO in Py2 and io.StringIO in Py3.
fp = six.StringIO()
g = email_generator.Generator(fp, mangle_from_=False)
fp = six.BytesIO()
if six.PY3:
generator_class = email_generator.BytesGenerator
else:
generator_class = email_generator.Generator
g = generator_class(fp, mangle_from_=False)
g.flatten(msg_root, unixfrom=False)
http_request.body = fp.getvalue()

multipart_boundary = msg_root.get_boundary()
http_request.headers['content-type'] = (
'multipart/related; boundary=%r' % multipart_boundary)
if isinstance(multipart_boundary, six.text_type):
multipart_boundary = multipart_boundary.encode('ascii')

body_components = http_request.body.split(multipart_boundary)
headers, _, _ = body_components[-2].partition('\n\n')
body_components[-2] = '\n\n'.join([headers, '<media body>\n\n--'])
headers, _, _ = body_components[-2].partition(b'\n\n')
body_components[-2] = b'\n\n'.join([headers, b'<media body>\n\n--'])
http_request.loggable_body = multipart_boundary.join(body_components)

def __ConfigureResumableRequest(self, http_request):
Expand Down
116 changes: 67 additions & 49 deletions apitools/base/py/transfer_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
#
# Copyright 2015 Google Inc.
#
Expand Down Expand Up @@ -199,52 +200,69 @@ def _ReturnBytes(unused_http, http_request,
self.assertEqual(string.ascii_lowercase + string.ascii_uppercase,
download_stream.getvalue())

def testFromEncoding(self):
# Test a specific corner case in multipart encoding.

# Python's mime module by default encodes lines that start with
# "From " as ">From ", which we need to make sure we don't run afoul
# of when sending content that isn't intended to be so encoded. This
# test calls out that we get this right. We test for both the
# multipart and non-multipart case.
multipart_body = '{"body_field_one": 7}'
upload_contents = 'line one\nFrom \nline two'
upload_config = base_api.ApiUploadInfo(
accept=['*/*'],
max_size=None,
resumable_multipart=True,
resumable_path=u'/resumable/upload',
simple_multipart=True,
simple_path=u'/upload',
)
url_builder = base_api._UrlBuilder('http://www.uploads.com')

# Test multipart: having a body argument in http_request forces
# multipart here.
upload = transfer.Upload.FromStream(
six.StringIO(upload_contents),
'text/plain',
total_size=len(upload_contents))
http_request = http_wrapper.Request(
'http://www.uploads.com',
headers={'content-type': 'text/plain'},
body=multipart_body)
upload.ConfigureRequest(upload_config, http_request, url_builder)
self.assertEqual(url_builder.query_params['uploadType'], 'multipart')
rewritten_upload_contents = '\n'.join(
http_request.body.split('--')[2].splitlines()[1:])
self.assertTrue(rewritten_upload_contents.endswith(upload_contents))

# Test non-multipart (aka media): no body argument means this is
# sent as media.
upload = transfer.Upload.FromStream(
six.StringIO(upload_contents),
'text/plain',
total_size=len(upload_contents))
http_request = http_wrapper.Request(
'http://www.uploads.com',
headers={'content-type': 'text/plain'})
upload.ConfigureRequest(upload_config, http_request, url_builder)
self.assertEqual(url_builder.query_params['uploadType'], 'media')
rewritten_upload_contents = http_request.body
self.assertTrue(rewritten_upload_contents.endswith(upload_contents))
def testMultipartEncoding(self):
# This is really a table test for various issues we've seen in
# the past; see notes below for particular histories.

test_cases = [
# Python's mime module by default encodes lines that start
# with "From " as ">From ", which we need to make sure we
# don't run afoul of when sending content that isn't
# intended to be so encoded. This test calls out that we
# get this right. We test for both the multipart and
# non-multipart case.
'line one\nFrom \nline two',

# We had originally used a `six.StringIO` to hold the http
# request body in the case of a multipart upload; for
# bytes being uploaded in Python3, however, this causes
# issues like this:
# https://github.com/GoogleCloudPlatform/gcloud-python/issues/1760
# We test below to ensure that we don't end up mangling
# the body before sending.
u'name,main_ingredient\nRäksmörgås,Räkor\nBaguette,Bröd',
]

for upload_contents in test_cases:
multipart_body = '{"body_field_one": 7}'
upload_bytes = upload_contents.encode('ascii', 'backslashreplace')
upload_config = base_api.ApiUploadInfo(
accept=['*/*'],
max_size=None,
resumable_multipart=True,
resumable_path=u'/resumable/upload',
simple_multipart=True,
simple_path=u'/upload',
)
url_builder = base_api._UrlBuilder('http://www.uploads.com')

# Test multipart: having a body argument in http_request forces
# multipart here.
upload = transfer.Upload.FromStream(
six.BytesIO(upload_bytes),
'text/plain',
total_size=len(upload_bytes))
http_request = http_wrapper.Request(
'http://www.uploads.com',
headers={'content-type': 'text/plain'},
body=multipart_body)
upload.ConfigureRequest(upload_config, http_request, url_builder)
self.assertEqual(
'multipart', url_builder.query_params['uploadType'])
rewritten_upload_contents = b'\n'.join(
http_request.body.split(b'--')[2].splitlines()[1:])
self.assertTrue(rewritten_upload_contents.endswith(upload_bytes))

# Test non-multipart (aka media): no body argument means this is
# sent as media.
upload = transfer.Upload.FromStream(
six.BytesIO(upload_bytes),
'text/plain',
total_size=len(upload_bytes))
http_request = http_wrapper.Request(
'http://www.uploads.com',
headers={'content-type': 'text/plain'})
upload.ConfigureRequest(upload_config, http_request, url_builder)
self.assertEqual(url_builder.query_params['uploadType'], 'media')
rewritten_upload_contents = http_request.body
self.assertTrue(rewritten_upload_contents.endswith(upload_bytes))

0 comments on commit 3b42c92

Please sign in to comment.