diff --git a/docs/advanced.rst b/docs/advanced.rst index 6faa064..7fff29f 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -255,6 +255,21 @@ that of ``before_record``: with my_vcr.use_cassette('test.yml'): # your http code here + +Decode compressed response +--------------------------- + +When the ``decode_compressed_response`` keyword argument of a ``VCR`` object +is set to True, VCR will decompress "gzip" and "deflate" response bodies +before recording. This ensures that these interactions become readable and +editable after being serialized. + +.. note:: + Decompression is done before any other specified `Custom Response Filtering`_. + +This option should be avoided if the actual decompression of response bodies +is part of the functionality of the library or app being tested. + Ignore requests --------------- diff --git a/tests/integration/test_filter.py b/tests/integration/test_filter.py index 43f63ff..a5dc3a6 100644 --- a/tests/integration/test_filter.py +++ b/tests/integration/test_filter.py @@ -5,6 +5,7 @@ from six.moves.urllib.parse import urlencode from six.moves.urllib.error import HTTPError import vcr import json +from assertions import assert_cassette_has_one_response, assert_is_json def _request_with_auth(url, username, password): @@ -93,3 +94,39 @@ def test_filter_callback(tmpdir): with my_vcr.use_cassette(cass_file, filter_headers=['authorization']) as cass: urlopen(url) assert len(cass) == 0 + + +def test_decompress_gzip(tmpdir): + url = 'http://httpbin.org/gzip' + request = Request(url, headers={'Accept-Encoding': ['gzip, deflate']}) + cass_file = str(tmpdir.join('gzip_response.yaml')) + with vcr.use_cassette(cass_file, decode_compressed_response=True): + urlopen(request) + with vcr.use_cassette(cass_file) as cass: + decoded_response = urlopen(url).read() + assert_cassette_has_one_response(cass) + assert_is_json(decoded_response) + + +def test_decompress_deflate(tmpdir): + url = 'http://httpbin.org/deflate' + request = Request(url, headers={'Accept-Encoding': ['gzip, deflate']}) + cass_file = str(tmpdir.join('deflate_response.yaml')) + with vcr.use_cassette(cass_file, decode_compressed_response=True): + urlopen(request) + with vcr.use_cassette(cass_file) as cass: + decoded_response = urlopen(url).read() + assert_cassette_has_one_response(cass) + assert_is_json(decoded_response) + + +def test_decompress_regular(tmpdir): + """Test that it doesn't try to decompress content that isn't compressed""" + url = 'http://httpbin.org/get' + cass_file = str(tmpdir.join('noncompressed_response.yaml')) + with vcr.use_cassette(cass_file, decode_compressed_response=True): + urlopen(url) + with vcr.use_cassette(cass_file) as cass: + resp = urlopen(url).read() + assert_cassette_has_one_response(cass) + assert_is_json(resp) diff --git a/tests/unit/test_filters.py b/tests/unit/test_filters.py index be1d657..274a70c 100644 --- a/tests/unit/test_filters.py +++ b/tests/unit/test_filters.py @@ -1,11 +1,15 @@ +from six import BytesIO from vcr.filters import ( remove_headers, replace_headers, remove_query_parameters, replace_query_parameters, remove_post_data_parameters, replace_post_data_parameters, + decode_response ) from vcr.compat import mock from vcr.request import Request +import gzip import json +import zlib def test_replace_headers(): @@ -200,3 +204,71 @@ def test_remove_all_json_post_data_parameters(): request.headers['Content-Type'] = 'application/json' replace_post_data_parameters(request, [('id', None), ('foo', None)]) assert request.body == b'{}' + + +def test_decode_response_uncompressed(): + recorded_response = { + "status": { + "message": "OK", + "code": 200 + }, + "headers": { + "content-length": ["10806"], + "date": ["Fri, 24 Oct 2014 18:35:37 GMT"], + "content-type": ["text/html; charset=utf-8"], + }, + "body": { + "string": b"" + } + } + assert decode_response(recorded_response) == recorded_response + + +def test_decode_response_deflate(): + body = b'deflate message' + deflate_response = { + 'body': {'string': zlib.compress(body)}, + 'headers': { + 'access-control-allow-credentials': ['true'], + 'access-control-allow-origin': ['*'], + 'connection': ['keep-alive'], + 'content-encoding': ['deflate'], + 'content-length': ['177'], + 'content-type': ['application/json'], + 'date': ['Wed, 02 Dec 2015 19:44:32 GMT'], + 'server': ['nginx'] + }, + 'status': {'code': 200, 'message': 'OK'} + } + decoded_response = decode_response(deflate_response) + assert decoded_response['body']['string'] == body + assert decoded_response['headers']['content-length'] == [str(len(body))] + + +def test_decode_response_gzip(): + body = b'gzip message' + + buf = BytesIO() + f = gzip.GzipFile('a', fileobj=buf, mode='wb') + f.write(body) + f.close() + + compressed_body = buf.getvalue() + buf.close() + gzip_response = { + 'body': {'string': compressed_body}, + 'headers': { + 'access-control-allow-credentials': ['true'], + 'access-control-allow-origin': ['*'], + 'connection': ['keep-alive'], + 'content-encoding': ['gzip'], + 'content-length': ['177'], + 'content-type': ['application/json'], + 'date': ['Wed, 02 Dec 2015 19:44:32 GMT'], + 'server': ['nginx'] + }, + 'status': {'code': 200, 'message': 'OK'} + } + decoded_response = decode_response(gzip_response) + assert decoded_response['body']['string'] == body + assert decoded_response['headers']['content-length'] == [str(len(body))] diff --git a/vcr/config.py b/vcr/config.py index 728b17f..ae99346 100644 --- a/vcr/config.py +++ b/vcr/config.py @@ -35,7 +35,8 @@ class VCR(object): before_record_response=None, filter_post_data_parameters=(), match_on=('method', 'scheme', 'host', 'port', 'path', 'query'), before_record=None, inject_cassette=False, serializer='yaml', - cassette_library_dir=None, func_path_generator=None): + cassette_library_dir=None, func_path_generator=None, + decode_compressed_response=False): self.serializer = serializer self.match_on = match_on self.cassette_library_dir = cassette_library_dir @@ -67,6 +68,7 @@ class VCR(object): self.inject_cassette = inject_cassette self.path_transformer = path_transformer self.func_path_generator = func_path_generator + self.decode_compressed_response = decode_compressed_response self._custom_patches = tuple(custom_patches) def _get_serializer(self, serializer_name): @@ -163,7 +165,12 @@ class VCR(object): before_record_response = options.get( 'before_record_response', self.before_record_response ) + decode_compressed_response = options.get( + 'decode_compressed_response', self.decode_compressed_response + ) filter_functions = [] + if decode_compressed_response: + filter_functions.append(filters.decode_response) if before_record_response: if not isinstance(before_record_response, collections.Iterable): before_record_response = (before_record_response,) diff --git a/vcr/filters.py b/vcr/filters.py index 6070d79..db6c130 100644 --- a/vcr/filters.py +++ b/vcr/filters.py @@ -1,6 +1,10 @@ from six import BytesIO, text_type from six.moves.urllib.parse import urlparse, urlencode, urlunparse +import copy import json +import zlib + +from .util import CaseInsensitiveDict def replace_headers(request, replacements): @@ -120,3 +124,38 @@ def remove_post_data_parameters(request, post_data_parameters_to_remove): """ replacements = [(k, None) for k in post_data_parameters_to_remove] return replace_post_data_parameters(request, replacements) + + +def decode_response(response): + """ + If the response is compressed with gzip or deflate: + 1. decompress the response body + 2. delete the content-encoding header + 3. update content-length header to decompressed length + """ + def is_compressed(headers): + encoding = headers.get('content-encoding', []) + return encoding and encoding[0] in ('gzip', 'deflate') + + def decompress_body(body, encoding): + """Returns decompressed body according to encoding using zlib. + to (de-)compress gzip format, use wbits = zlib.MAX_WBITS | 16 + """ + if encoding == 'gzip': + return zlib.decompress(body, zlib.MAX_WBITS | 16) + else: # encoding == 'deflate' + return zlib.decompress(body) + + headers = CaseInsensitiveDict(response['headers']) + if is_compressed(headers): + response = copy.deepcopy(response) + encoding = headers['content-encoding'][0] + headers['content-encoding'].remove(encoding) + if not headers['content-encoding']: + del headers['content-encoding'] + + new_body = decompress_body(response['body']['string'], encoding) + response['body']['string'] = new_body + headers['content-length'] = [str(len(new_body))] + response['headers'] = dict(headers) + return response