From 09b7ccf561b94e095c516305d9bfcb4d355f1882 Mon Sep 17 00:00:00 2001 From: Greg Ward Date: Thu, 16 Jul 2015 14:36:26 -0400 Subject: [PATCH] Ensure that request bodies are always bytes, not text (fixes #163). It shouldn't matter whether the request body comes from a file or a string, or whether it is passed to the Request constructor or assigned later. It should always be stored internally as bytes. --- tests/unit/test_serialize.py | 36 ++++++++++++++++++++++++++++++++++++ vcr/matchers.py | 9 ++++++++- vcr/request.py | 10 +++++----- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_serialize.py b/tests/unit/test_serialize.py index 41d97bd..6e4795e 100644 --- a/tests/unit/test_serialize.py +++ b/tests/unit/test_serialize.py @@ -27,6 +27,42 @@ def test_deserialize_new_json_cassette(): deserialize(f.read(), jsonserializer) +REQBODY_TEMPLATE = u'''\ +interactions: +- request: + body: {req_body} + headers: + Content-Type: [application/x-www-form-urlencoded] + Host: [httpbin.org] + method: POST + uri: http://httpbin.org/post + response: + body: {{string: ""}} + headers: + content-length: ['0'] + content-type: [application/json] + status: {{code: 200, message: OK}} +''' + + +# A cassette generated under Python 2 stores the request body as a string, +# but the same cassette generated under Python 3 stores it as "!!binary". +# Make sure we accept both forms, regardless of whether we're running under +# Python 2 or 3. +@pytest.mark.parametrize("req_body, expect", [ + # Cassette written under Python 2 (pure ASCII body) + ('x=5&y=2', b'x=5&y=2'), + # Cassette written under Python 3 (pure ASCII body) + ('!!binary |\n eD01Jnk9Mg==', b'x=5&y=2'), +]) +def test_deserialize_py2py3_yaml_cassette(tmpdir, req_body, expect): + cfile = tmpdir.join('test_cassette.yaml') + cfile.write(REQBODY_TEMPLATE.format(req_body=req_body)) + with open(str(cfile)) as f: + (requests, responses) = deserialize(f.read(), yamlserializer) + assert requests[0].body == expect + + @mock.patch.object(jsonserializer.json, 'dumps', side_effect=UnicodeDecodeError('utf-8', b'unicode error in serialization', 0, 10, 'blew up')) diff --git a/vcr/matchers.py b/vcr/matchers.py index 34bd4e6..5b2fb61 100644 --- a/vcr/matchers.py +++ b/vcr/matchers.py @@ -43,11 +43,18 @@ def _header_checker(value, header='Content-Type'): return checker +def _transform_json(body): + # Request body is always a byte string, but json.loads() wants a text + # string. RFC 7159 says the default encoding is UTF-8 (although UTF-16 + # and UTF-32 are also allowed: hmmmmm). + return json.loads(body.decode('utf-8')) + + _xml_header_checker = _header_checker('text/xml') _xmlrpc_header_checker = _header_checker('xmlrpc', header='User-Agent') _checker_transformer_pairs = ( (_header_checker('application/x-www-form-urlencoded'), urllib.parse.parse_qs), - (_header_checker('application/json'), json.loads), + (_header_checker('application/json'), _transform_json), (lambda request: _xml_header_checker(request) and _xmlrpc_header_checker(request), xmlrpc_client.loads), ) diff --git a/vcr/request.py b/vcr/request.py index fbc02e1..3500044 100644 --- a/vcr/request.py +++ b/vcr/request.py @@ -1,4 +1,4 @@ -from six import BytesIO, binary_type +from six import BytesIO, text_type from six.moves.urllib.parse import urlparse, parse_qsl @@ -29,11 +29,9 @@ class Request(object): self.uri = uri self._was_file = hasattr(body, 'read') if self._was_file: - self._body = body.read() - if not isinstance(self._body, binary_type): - self._body = self._body.encode('utf-8') + self.body = body.read() else: - self._body = body + self.body = body self.headers = {} for key in headers: self.add_header(key, headers[key]) @@ -44,6 +42,8 @@ class Request(object): @body.setter def body(self, value): + if isinstance(value, text_type): + value = value.encode('utf-8') self._body = value def add_header(self, key, value):