Skip to content

Commit

Permalink
workaround double utf-8 encode bug in javascript client
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelgrinberg committed Sep 5, 2016
1 parent d61c3ac commit 00d2459
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
24 changes: 24 additions & 0 deletions engineio/payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def encode(self, b64=False):

def decode(self, encoded_payload):
"""Decode a transmitted payload."""
fixed_double_encode = False
self.packets = []
while encoded_payload:
if six.byte2int(encoded_payload[0:1]) <= 1:
Expand All @@ -49,6 +50,29 @@ def decode(self, encoded_payload):
if i == -1:
raise ValueError('invalid payload')
packet_len = int(encoded_payload[0:i])
if not fixed_double_encode:
# the engine.io javascript client sends text payloads with
# a double UTF-8 encoding. Here we try to fix that mess and
# restore the original packet
try:
# first we remove one UTF-8 encoding layer
fixed_payload = encoded_payload.decode(
'utf-8').encode('raw_unicode_escape')

# then we make sure the result can be decoded a second
# time (this will raise an exception if not)
fixed_payload.decode('utf-8')

# if a second utf-8 decode worked, then this appears to
# be a double encoded packet, so here we keep the
# packet after a single decode, since the packet class
# will perform a decode as well
encoded_payload = fixed_payload
except:
# if we couldn't apply a double utf-8 decode then
# the packet must have been correct, so keep going
pass
fixed_double_encode = True
pkt = encoded_payload[i + 1: i + 1 + packet_len]
self.packets.append(packet.Packet(encoded_packet=pkt))
encoded_payload = encoded_payload[i + 1 + packet_len:]
16 changes: 16 additions & 0 deletions tests/test_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,19 @@ def test_decode_payload_xhr_binary(self):
def test_decode_invalid_payload(self):
self.assertRaises(ValueError, payload.Payload,
encoded_payload=b'bad payload')

def test_decode_double_encoded_utf8_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\x83\xc2\xa9')
self.assertEqual(len(p.packets), 1)
self.assertEqual(p.packets[0].data, 'é')

def test_decode_double_encoded_utf8_multi_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\x83\xc2\xa94:4abc')
self.assertEqual(len(p.packets), 2)
self.assertEqual(p.packets[0].data, 'é')
self.assertEqual(p.packets[1].data, 'abc')

def test_decode_single_encoded_utf8_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\xa9')
self.assertEqual(len(p.packets), 1)
self.assertEqual(p.packets[0].data, 'é')

0 comments on commit 00d2459

Please sign in to comment.