serialize.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. import base64
  2. import io
  3. import json
  4. import zlib
  5. from pip._vendor.requests.structures import CaseInsensitiveDict
  6. from .compat import HTTPResponse, pickle, text_type
  7. def _b64_encode_bytes(b):
  8. return base64.b64encode(b).decode("ascii")
  9. def _b64_encode_str(s):
  10. return _b64_encode_bytes(s.encode("utf8"))
  11. def _b64_encode(s):
  12. if isinstance(s, text_type):
  13. return _b64_encode_str(s)
  14. return _b64_encode_bytes(s)
  15. def _b64_decode_bytes(b):
  16. return base64.b64decode(b.encode("ascii"))
  17. def _b64_decode_str(s):
  18. return _b64_decode_bytes(s).decode("utf8")
  19. class Serializer(object):
  20. def dumps(self, request, response, body=None):
  21. response_headers = CaseInsensitiveDict(response.headers)
  22. if body is None:
  23. body = response.read(decode_content=False)
  24. # NOTE: 99% sure this is dead code. I'm only leaving it
  25. # here b/c I don't have a test yet to prove
  26. # it. Basically, before using
  27. # `cachecontrol.filewrapper.CallbackFileWrapper`,
  28. # this made an effort to reset the file handle. The
  29. # `CallbackFileWrapper` short circuits this code by
  30. # setting the body as the content is consumed, the
  31. # result being a `body` argument is *always* passed
  32. # into cache_response, and in turn,
  33. # `Serializer.dump`.
  34. response._fp = io.BytesIO(body)
  35. data = {
  36. "response": {
  37. "body": _b64_encode_bytes(body),
  38. "headers": dict(
  39. (_b64_encode(k), _b64_encode(v))
  40. for k, v in response.headers.items()
  41. ),
  42. "status": response.status,
  43. "version": response.version,
  44. "reason": _b64_encode_str(response.reason),
  45. "strict": response.strict,
  46. "decode_content": response.decode_content,
  47. },
  48. }
  49. # Construct our vary headers
  50. data["vary"] = {}
  51. if "vary" in response_headers:
  52. varied_headers = response_headers['vary'].split(',')
  53. for header in varied_headers:
  54. header = header.strip()
  55. data["vary"][header] = request.headers.get(header, None)
  56. # Encode our Vary headers to ensure they can be serialized as JSON
  57. data["vary"] = dict(
  58. (_b64_encode(k), _b64_encode(v) if v is not None else v)
  59. for k, v in data["vary"].items()
  60. )
  61. return b",".join([
  62. b"cc=2",
  63. zlib.compress(
  64. json.dumps(
  65. data, separators=(",", ":"), sort_keys=True,
  66. ).encode("utf8"),
  67. ),
  68. ])
  69. def loads(self, request, data):
  70. # Short circuit if we've been given an empty set of data
  71. if not data:
  72. return
  73. # Determine what version of the serializer the data was serialized
  74. # with
  75. try:
  76. ver, data = data.split(b",", 1)
  77. except ValueError:
  78. ver = b"cc=0"
  79. # Make sure that our "ver" is actually a version and isn't a false
  80. # positive from a , being in the data stream.
  81. if ver[:3] != b"cc=":
  82. data = ver + data
  83. ver = b"cc=0"
  84. # Get the version number out of the cc=N
  85. ver = ver.split(b"=", 1)[-1].decode("ascii")
  86. # Dispatch to the actual load method for the given version
  87. try:
  88. return getattr(self, "_loads_v{0}".format(ver))(request, data)
  89. except AttributeError:
  90. # This is a version we don't have a loads function for, so we'll
  91. # just treat it as a miss and return None
  92. return
  93. def prepare_response(self, request, cached):
  94. """Verify our vary headers match and construct a real urllib3
  95. HTTPResponse object.
  96. """
  97. # Special case the '*' Vary value as it means we cannot actually
  98. # determine if the cached response is suitable for this request.
  99. if "*" in cached.get("vary", {}):
  100. return
  101. # Ensure that the Vary headers for the cached response match our
  102. # request
  103. for header, value in cached.get("vary", {}).items():
  104. if request.headers.get(header, None) != value:
  105. return
  106. body_raw = cached["response"].pop("body")
  107. headers = CaseInsensitiveDict(data=cached['response']['headers'])
  108. if headers.get('transfer-encoding', '') == 'chunked':
  109. headers.pop('transfer-encoding')
  110. cached['response']['headers'] = headers
  111. try:
  112. body = io.BytesIO(body_raw)
  113. except TypeError:
  114. # This can happen if cachecontrol serialized to v1 format (pickle)
  115. # using Python 2. A Python 2 str(byte string) will be unpickled as
  116. # a Python 3 str (unicode string), which will cause the above to
  117. # fail with:
  118. #
  119. # TypeError: 'str' does not support the buffer interface
  120. body = io.BytesIO(body_raw.encode('utf8'))
  121. return HTTPResponse(
  122. body=body,
  123. preload_content=False,
  124. **cached["response"]
  125. )
  126. def _loads_v0(self, request, data):
  127. # The original legacy cache data. This doesn't contain enough
  128. # information to construct everything we need, so we'll treat this as
  129. # a miss.
  130. return
  131. def _loads_v1(self, request, data):
  132. try:
  133. cached = pickle.loads(data)
  134. except ValueError:
  135. return
  136. return self.prepare_response(request, cached)
  137. def _loads_v2(self, request, data):
  138. try:
  139. cached = json.loads(zlib.decompress(data).decode("utf8"))
  140. except ValueError:
  141. return
  142. # We need to decode the items that we've base64 encoded
  143. cached["response"]["body"] = _b64_decode_bytes(
  144. cached["response"]["body"]
  145. )
  146. cached["response"]["headers"] = dict(
  147. (_b64_decode_str(k), _b64_decode_str(v))
  148. for k, v in cached["response"]["headers"].items()
  149. )
  150. cached["response"]["reason"] = _b64_decode_str(
  151. cached["response"]["reason"],
  152. )
  153. cached["vary"] = dict(
  154. (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
  155. for k, v in cached["vary"].items()
  156. )
  157. return self.prepare_response(request, cached)