From eda64bc3be52959eabe497b62c3662b6fd9d8d90 Mon Sep 17 00:00:00 2001 From: Aron Griffis Date: Mon, 24 Aug 2015 12:58:34 -0400 Subject: [PATCH 1/3] Make request.headers always a CaseInsensitiveDict. Previously request.headers was a normal dict (albeit with the request.add_header interface) which meant that some code paths would do case-sensitive matching, for example remove_post_data_parameters which tests for 'Content-Type'. This change allows all code paths to get the same case-insensitive treatment. Additionally request.headers becomes a property to enforce upgrading it to a CaseInsensitiveDict even if assigned. --- tests/integration/test_filter.py | 6 +----- vcr/filters.py | 13 +++++-------- vcr/matchers.py | 5 ++--- vcr/request.py | 19 +++++++++++++++---- vcr/stubs/tornado_stubs.py | 2 +- 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/tests/integration/test_filter.py b/tests/integration/test_filter.py index 0a5232d..45c3f78 100644 --- a/tests/integration/test_filter.py +++ b/tests/integration/test_filter.py @@ -17,11 +17,7 @@ def _request_with_auth(url, username, password): def _find_header(cassette, header): - for request in cassette.requests: - for k in request.headers: - if header.lower() == k.lower(): - return True - return False + return any(header in request.headers for request in cassette.requests) def test_filter_basic_auth(tmpdir): diff --git a/vcr/filters.py b/vcr/filters.py index 14159d0..102a95e 100644 --- a/vcr/filters.py +++ b/vcr/filters.py @@ -1,19 +1,16 @@ from six import BytesIO, text_type from six.moves.urllib.parse import urlparse, urlencode, urlunparse -import copy import json from .compat import collections def remove_headers(request, headers_to_remove): - headers = copy.copy(request.headers) - headers_to_remove = [h.lower() for h in headers_to_remove] - keys = [k for k in headers if k.lower() in headers_to_remove] - if keys: - for k in keys: - headers.pop(k) - request.headers = headers + new_headers = request.headers.copy() + for k in headers_to_remove: + if k in new_headers: + del new_headers[k] + request.headers = new_headers return request diff --git a/vcr/matchers.py b/vcr/matchers.py index 6d8cbf8..57e7a02 100644 --- a/vcr/matchers.py +++ b/vcr/matchers.py @@ -1,6 +1,6 @@ import json from six.moves import urllib, xmlrpc_client -from .util import CaseInsensitiveDict, read_body +from .util import read_body import logging @@ -66,9 +66,8 @@ def _identity(x): def _get_transformer(request): - headers = CaseInsensitiveDict(request.headers) for checker, transformer in _checker_transformer_pairs: - if checker(headers): return transformer + if checker(request.headers): return transformer else: return _identity diff --git a/vcr/request.py b/vcr/request.py index 3500044..bc15e6f 100644 --- a/vcr/request.py +++ b/vcr/request.py @@ -1,10 +1,11 @@ from six import BytesIO, text_type from six.moves.urllib.parse import urlparse, parse_qsl +from .util import CaseInsensitiveDict class Request(object): """ - VCR's representation of a request. + VCR's representation of a request. There is a weird quirk in HTTP. You can send the same header twice. For this reason, headers are represented by a dict, with lists as the values. @@ -32,9 +33,19 @@ class Request(object): self.body = body.read() else: self.body = body - self.headers = {} - for key in headers: - self.add_header(key, headers[key]) + self.headers = CaseInsensitiveDict() + for key, value in headers.items(): + self.add_header(key, value) + + @property + def headers(self): + return self._headers + + @headers.setter + def headers(self, value): + if not isinstance(value, CaseInsensitiveDict): + value = CaseInsensitiveDict(value) + self._headers = value @property def body(self): diff --git a/vcr/stubs/tornado_stubs.py b/vcr/stubs/tornado_stubs.py index 5a4ce58..a6422da 100644 --- a/vcr/stubs/tornado_stubs.py +++ b/vcr/stubs/tornado_stubs.py @@ -15,7 +15,7 @@ def vcr_fetch_impl(cassette, real_fetch_impl): @functools.wraps(real_fetch_impl) def new_fetch_impl(self, request, callback): - headers = dict(request.headers) + headers = request.headers.copy() if request.user_agent: headers.setdefault('User-Agent', request.user_agent) From 646d12df943d54f8808160c745f1cf297eecb3db Mon Sep 17 00:00:00 2001 From: Aron Griffis Date: Mon, 24 Aug 2015 14:22:12 -0400 Subject: [PATCH 2/3] More compact expression with dict.get() --- vcr/filters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vcr/filters.py b/vcr/filters.py index 102a95e..10830c6 100644 --- a/vcr/filters.py +++ b/vcr/filters.py @@ -27,8 +27,7 @@ def remove_query_parameters(request, query_parameters_to_remove): def remove_post_data_parameters(request, post_data_parameters_to_remove): if request.method == 'POST' and not isinstance(request.body, BytesIO): - if ('Content-Type' in request.headers and - request.headers['Content-Type'] == 'application/json'): + if request.headers.get('Content-Type') == 'application/json': json_data = json.loads(request.body.decode('utf-8')) for k in list(json_data.keys()): if k in post_data_parameters_to_remove: From 7312229aefe187f21fc0a28b72d6717db09bc584 Mon Sep 17 00:00:00 2001 From: Aron Griffis Date: Mon, 24 Aug 2015 22:14:38 -0400 Subject: [PATCH 3/3] Add HeadersDict, and mark add_header deprecated. HeadersDict is a subclass of CaseInsensitiveDict with two new features: 1. Preserve the case of the header key from the first time it was set. This means that later munging won't modify the key case. (You can force picking up the new case with `del` followed by setting.) 2. If the value is a list or tuple, unpack it and store the first element. This is the same as how `Request.add_header()` used to work. For backward compatibility this commit preserves `Request.add_header()` but marks it deprecated. --- tests/unit/test_filters.py | 6 ++-- tests/unit/test_request.py | 38 +++++++++++++++++++-- vcr/request.py | 67 +++++++++++++++++++++++--------------- vcr/stubs/__init__.py | 3 +- 4 files changed, 79 insertions(+), 35 deletions(-) diff --git a/tests/unit/test_filters.py b/tests/unit/test_filters.py index 2bab38c..30bb203 100644 --- a/tests/unit/test_filters.py +++ b/tests/unit/test_filters.py @@ -73,7 +73,7 @@ def test_remove_nonexistent_post_data_parameters(): def test_remove_json_post_data_parameters(): body = b'{"id": "secret", "foo": "bar", "baz": "qux"}' request = Request('POST', 'http://google.com', body, {}) - request.add_header('Content-Type', 'application/json') + request.headers['Content-Type'] = 'application/json' remove_post_data_parameters(request, ['id']) request_body_json = json.loads(request.body.decode('utf-8')) expected_json = json.loads(b'{"foo": "bar", "baz": "qux"}'.decode('utf-8')) @@ -83,7 +83,7 @@ def test_remove_json_post_data_parameters(): def test_remove_all_json_post_data_parameters(): body = b'{"id": "secret", "foo": "bar"}' request = Request('POST', 'http://google.com', body, {}) - request.add_header('Content-Type', 'application/json') + request.headers['Content-Type'] = 'application/json' remove_post_data_parameters(request, ['id', 'foo']) assert request.body == b'{}' @@ -91,6 +91,6 @@ def test_remove_all_json_post_data_parameters(): def test_remove_nonexistent_json_post_data_parameters(): body = b'{}' request = Request('POST', 'http://google.com', body, {}) - request.add_header('Content-Type', 'application/json') + request.headers['Content-Type'] = 'application/json' remove_post_data_parameters(request, ['id']) assert request.body == b'{}' diff --git a/tests/unit/test_request.py b/tests/unit/test_request.py index 9a26acc..a89e15a 100644 --- a/tests/unit/test_request.py +++ b/tests/unit/test_request.py @@ -1,6 +1,6 @@ import pytest -from vcr.request import Request +from vcr.request import Request, HeadersDict def test_str(): @@ -12,11 +12,16 @@ def test_headers(): headers = {'X-Header1': ['h1'], 'X-Header2': 'h2'} req = Request('GET', 'http://go.com/', '', headers) assert req.headers == {'X-Header1': 'h1', 'X-Header2': 'h2'} - - req.add_header('X-Header1', 'h11') + req.headers['X-Header1'] = 'h11' assert req.headers == {'X-Header1': 'h11', 'X-Header2': 'h2'} +def test_add_header_deprecated(): + req = Request('GET', 'http://go.com/', '', {}) + pytest.deprecated_call(req.add_header, 'foo', 'bar') + assert req.headers == {'foo': 'bar'} + + @pytest.mark.parametrize("uri, expected_port", [ ('http://go.com/', 80), ('http://go.com:80/', 80), @@ -36,3 +41,30 @@ def test_uri(): req = Request('GET', 'http://go.com:80/', '', {}) assert req.uri == 'http://go.com:80/' + + +def test_HeadersDict(): + + # Simple test of CaseInsensitiveDict + h = HeadersDict() + assert h == {} + h['Content-Type'] = 'application/json' + assert h == {'Content-Type': 'application/json'} + assert h['content-type'] == 'application/json' + assert h['CONTENT-TYPE'] == 'application/json' + + # Test feature of HeadersDict: devolve list to first element + h = HeadersDict() + assert h == {} + h['x'] = ['foo', 'bar'] + assert h == {'x': 'foo'} + + # Test feature of HeadersDict: preserve original key case + h = HeadersDict() + assert h == {} + h['Content-Type'] = 'application/json' + assert h == {'Content-Type': 'application/json'} + h['content-type'] = 'text/plain' + assert h == {'Content-Type': 'text/plain'} + h['CONtent-tyPE'] = 'whoa' + assert h == {'Content-Type': 'whoa'} diff --git a/vcr/request.py b/vcr/request.py index bc15e6f..63e451e 100644 --- a/vcr/request.py +++ b/vcr/request.py @@ -1,3 +1,4 @@ +import warnings from six import BytesIO, text_type from six.moves.urllib.parse import urlparse, parse_qsl from .util import CaseInsensitiveDict @@ -6,23 +7,6 @@ from .util import CaseInsensitiveDict class Request(object): """ VCR's representation of a request. - - There is a weird quirk in HTTP. You can send the same header twice. For - this reason, headers are represented by a dict, with lists as the values. - However, it appears that HTTPlib is completely incapable of sending the - same header twice. This puts me in a weird position: I want to be able to - accurately represent HTTP headers in cassettes, but I don't want the extra - step of always having to do [0] in the general case, i.e. - request.headers['key'][0] - - In addition, some servers sometimes send the same header more than once, - and httplib *can* deal with this situation. - - Futhermore, I wanted to keep the request and response cassette format as - similar as possible. - - For this reason, in cassettes I keep a dict with lists as keys, but once - deserialized into VCR, I keep them as plain, naked dicts. """ def __init__(self, method, uri, body, headers): @@ -33,9 +17,7 @@ class Request(object): self.body = body.read() else: self.body = body - self.headers = CaseInsensitiveDict() - for key, value in headers.items(): - self.add_header(key, value) + self.headers = headers @property def headers(self): @@ -43,8 +25,8 @@ class Request(object): @headers.setter def headers(self, value): - if not isinstance(value, CaseInsensitiveDict): - value = CaseInsensitiveDict(value) + if not isinstance(value, HeadersDict): + value = HeadersDict(value) self._headers = value @property @@ -58,11 +40,10 @@ class Request(object): self._body = value def add_header(self, key, value): - # see class docstring for an explanation - if isinstance(value, (tuple, list)): - self.headers[key] = value[0] - else: - self.headers[key] = value + warnings.warn("Request.add_header is deprecated. " + "Please assign to request.headers instead.", + DeprecationWarning) + self.headers[key] = value @property def scheme(self): @@ -116,3 +97,35 @@ class Request(object): @classmethod def _from_dict(cls, dct): return Request(**dct) + + +class HeadersDict(CaseInsensitiveDict): + """ + There is a weird quirk in HTTP. You can send the same header twice. For + this reason, headers are represented by a dict, with lists as the values. + However, it appears that HTTPlib is completely incapable of sending the + same header twice. This puts me in a weird position: I want to be able to + accurately represent HTTP headers in cassettes, but I don't want the extra + step of always having to do [0] in the general case, i.e. + request.headers['key'][0] + + In addition, some servers sometimes send the same header more than once, + and httplib *can* deal with this situation. + + Futhermore, I wanted to keep the request and response cassette format as + similar as possible. + + For this reason, in cassettes I keep a dict with lists as keys, but once + deserialized into VCR, I keep them as plain, naked dicts. + """ + + def __setitem__(self, key, value): + if isinstance(value, (tuple, list)): + value = value[0] + + # Preserve the case from the first time this key was set. + old = self._store.get(key.lower()) + if old: + key = old[0] + + super(HeadersDict, self).__setitem__(key, value) diff --git a/vcr/stubs/__init__.py b/vcr/stubs/__init__.py index a5ec50d..19d57f1 100644 --- a/vcr/stubs/__init__.py +++ b/vcr/stubs/__init__.py @@ -188,8 +188,7 @@ class VCRConnection(object): log.debug('Got {0}'.format(self._vcr_request)) def putheader(self, header, *values): - for value in values: - self._vcr_request.add_header(header, value) + self._vcr_request.headers[header] = values def send(self, data): '''