Skip to content

Commit

Permalink
remove double-utf8 encoding hack
Browse files Browse the repository at this point in the history
this hack that made some incorrectly encoded packets sent by the
JS socket.io 1.x clients does not always work, and is not needed
anymore since the 2.x clients have been fixed.
  • Loading branch information
miguelgrinberg committed Nov 19, 2017
1 parent e6985cc commit 83d2277
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 46 deletions.
45 changes: 14 additions & 31 deletions engineio/payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def encode(self, b64=False):

def decode(self, encoded_payload):
"""Decode a transmitted payload."""
fixed_double_encode = False
self.packets = []
while encoded_payload:
if six.byte2int(encoded_payload[0:1]) <= 1:
Expand All @@ -49,36 +48,20 @@ def decode(self, encoded_payload):
i = encoded_payload.find(b':')
if i == -1:
raise ValueError('invalid payload')
# the packet_len below is given in utf-8 characters, but we
# receive the payload as bytes, so down below this length is
# adjusted to reflect byte length
packet_len = int(encoded_payload[0:i])
if not fixed_double_encode:
# the engine.io javascript client sends text payloads with
# a double UTF-8 encoding. Here we try to fix that mess and
# restore the original packet
try:
# first we remove one UTF-8 encoding layer
fixed_payload = encoded_payload.decode(
'utf-8').encode('raw_unicode_escape')

# then we make sure the result can be decoded a second
# time (this will raise an exception if not)
fixed_payload.decode('utf-8')

# if a second utf-8 decode worked, then this appears to
# be a double encoded packet, so here we keep the
# packet after a single decode, since the packet class
# will perform a decode as well, and in this case it is
# not necessary to adjust the packet length
encoded_payload = fixed_payload
except:
# if we couldn't apply a double utf-8 decode then
# the packet must have been correct, so we just adjust
# the packet length to be in bytes and not utf-8
# characters and keep going
packet_len += len(encoded_payload) - len(fixed_payload)
fixed_double_encode = True
pkt = encoded_payload[i + 1: i + 1 + packet_len]
# extracting the packet out of the payload is extremely
# inefficient, because the payload needs to be treated as
# binary, but the non-binary packets have to be parsed as
# unicode. Luckily this complication only applies to long
# polling, as the websocket transport sends packets
# individually wrapped.
packet_len = int(encoded_payload[0:i])
pkt = encoded_payload.decode('utf-8', errors='ignore')[
i + 1: i + 1 + packet_len].encode('utf-8')
self.packets.append(packet.Packet(encoded_packet=pkt))

# the engine.io protocol sends the packet length in
# utf-8 characters, but we need it in bytes to be able to
# jump to the next packet in the payload
packet_len = len(pkt)
encoded_payload = encoded_payload[i + 1 + packet_len:]
18 changes: 4 additions & 14 deletions tests/test_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,8 @@ def test_decode_invalid_payload(self):
self.assertRaises(ValueError, payload.Payload,
encoded_payload=b'bad payload')

def test_decode_double_encoded_utf8_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\x83\xc2\xa9')
self.assertEqual(len(p.packets), 1)
self.assertEqual(p.packets[0].data.encode('utf-8'), b'\xc3\xa9')

def test_decode_double_encoded_utf8_multi_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\x83\xc2\xa94:4abc')
def test_decode_multi_payload(self):
p = payload.Payload(encoded_payload=b'4:4abc\x00\x04\xff4def')
self.assertEqual(len(p.packets), 2)
self.assertEqual(p.packets[0].data.encode('utf-8'), b'\xc3\xa9')
self.assertEqual(p.packets[1].data, 'abc')

def test_decode_single_encoded_utf8_payload(self):
p = payload.Payload(encoded_payload=b'3:4\xc3\xa9')
self.assertEqual(len(p.packets), 1)
self.assertEqual(p.packets[0].data.encode('utf-8'), b'\xc3\xa9')
self.assertEqual(p.packets[0].data, 'abc')
self.assertEqual(p.packets[1].data, 'def')
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ deps=
deps=
flake8
commands=
flake8 --exclude=".*" --ignore=E402 engineio tests
flake8 --exclude=".*" --ignore=E402,E722 engineio tests

[testenv:docs]
changedir=docs
Expand Down

0 comments on commit 83d2277

Please sign in to comment.