lint.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. # -*- coding: utf-8 -*-
  2. """
  3. werkzeug.contrib.lint
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. .. versionadded:: 0.5
  6. This module provides a middleware that performs sanity checks of the WSGI
  7. application. It checks that :pep:`333` is properly implemented and warns
  8. on some common HTTP errors such as non-empty responses for 304 status
  9. codes.
  10. This module provides a middleware, the :class:`LintMiddleware`. Wrap your
  11. application with it and it will warn about common problems with WSGI and
  12. HTTP while your application is running.
  13. It's strongly recommended to use it during development.
  14. :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
  15. :license: BSD, see LICENSE for more details.
  16. """
  17. from urlparse import urlparse
  18. from warnings import warn
  19. from werkzeug.datastructures import Headers
  20. from werkzeug.http import is_entity_header
  21. from werkzeug.wsgi import FileWrapper
  22. from werkzeug._compat import string_types
  23. class WSGIWarning(Warning):
  24. """Warning class for WSGI warnings."""
  25. class HTTPWarning(Warning):
  26. """Warning class for HTTP warnings."""
  27. def check_string(context, obj, stacklevel=3):
  28. if type(obj) is not str:
  29. warn(WSGIWarning('%s requires bytestrings, got %s' %
  30. (context, obj.__class__.__name__)))
  31. class InputStream(object):
  32. def __init__(self, stream):
  33. self._stream = stream
  34. def read(self, *args):
  35. if len(args) == 0:
  36. warn(WSGIWarning('wsgi does not guarantee an EOF marker on the '
  37. 'input stream, thus making calls to '
  38. 'wsgi.input.read() unsafe. Conforming servers '
  39. 'may never return from this call.'),
  40. stacklevel=2)
  41. elif len(args) != 1:
  42. warn(WSGIWarning('too many parameters passed to wsgi.input.read()'),
  43. stacklevel=2)
  44. return self._stream.read(*args)
  45. def readline(self, *args):
  46. if len(args) == 0:
  47. warn(WSGIWarning('Calls to wsgi.input.readline() without arguments'
  48. ' are unsafe. Use wsgi.input.read() instead.'),
  49. stacklevel=2)
  50. elif len(args) == 1:
  51. warn(WSGIWarning('wsgi.input.readline() was called with a size hint. '
  52. 'WSGI does not support this, although it\'s available '
  53. 'on all major servers.'),
  54. stacklevel=2)
  55. else:
  56. raise TypeError('too many arguments passed to wsgi.input.readline()')
  57. return self._stream.readline(*args)
  58. def __iter__(self):
  59. try:
  60. return iter(self._stream)
  61. except TypeError:
  62. warn(WSGIWarning('wsgi.input is not iterable.'), stacklevel=2)
  63. return iter(())
  64. def close(self):
  65. warn(WSGIWarning('application closed the input stream!'),
  66. stacklevel=2)
  67. self._stream.close()
  68. class ErrorStream(object):
  69. def __init__(self, stream):
  70. self._stream = stream
  71. def write(self, s):
  72. check_string('wsgi.error.write()', s)
  73. self._stream.write(s)
  74. def flush(self):
  75. self._stream.flush()
  76. def writelines(self, seq):
  77. for line in seq:
  78. self.write(seq)
  79. def close(self):
  80. warn(WSGIWarning('application closed the error stream!'),
  81. stacklevel=2)
  82. self._stream.close()
  83. class GuardedWrite(object):
  84. def __init__(self, write, chunks):
  85. self._write = write
  86. self._chunks = chunks
  87. def __call__(self, s):
  88. check_string('write()', s)
  89. self._write.write(s)
  90. self._chunks.append(len(s))
  91. class GuardedIterator(object):
  92. def __init__(self, iterator, headers_set, chunks):
  93. self._iterator = iterator
  94. self._next = iter(iterator).next
  95. self.closed = False
  96. self.headers_set = headers_set
  97. self.chunks = chunks
  98. def __iter__(self):
  99. return self
  100. def next(self):
  101. if self.closed:
  102. warn(WSGIWarning('iterated over closed app_iter'),
  103. stacklevel=2)
  104. rv = self._next()
  105. if not self.headers_set:
  106. warn(WSGIWarning('Application returned before it '
  107. 'started the response'), stacklevel=2)
  108. check_string('application iterator items', rv)
  109. self.chunks.append(len(rv))
  110. return rv
  111. def close(self):
  112. self.closed = True
  113. if hasattr(self._iterator, 'close'):
  114. self._iterator.close()
  115. if self.headers_set:
  116. status_code, headers = self.headers_set
  117. bytes_sent = sum(self.chunks)
  118. content_length = headers.get('content-length', type=int)
  119. if status_code == 304:
  120. for key, value in headers:
  121. key = key.lower()
  122. if key not in ('expires', 'content-location') and \
  123. is_entity_header(key):
  124. warn(HTTPWarning('entity header %r found in 304 '
  125. 'response' % key))
  126. if bytes_sent:
  127. warn(HTTPWarning('304 responses must not have a body'))
  128. elif 100 <= status_code < 200 or status_code == 204:
  129. if content_length != 0:
  130. warn(HTTPWarning('%r responses must have an empty '
  131. 'content length') % status_code)
  132. if bytes_sent:
  133. warn(HTTPWarning('%r responses must not have a body' %
  134. status_code))
  135. elif content_length is not None and content_length != bytes_sent:
  136. warn(WSGIWarning('Content-Length and the number of bytes '
  137. 'sent to the client do not match.'))
  138. def __del__(self):
  139. if not self.closed:
  140. try:
  141. warn(WSGIWarning('Iterator was garbage collected before '
  142. 'it was closed.'))
  143. except Exception:
  144. pass
  145. class LintMiddleware(object):
  146. """This middleware wraps an application and warns on common errors.
  147. Among other thing it currently checks for the following problems:
  148. - invalid status codes
  149. - non-bytestrings sent to the WSGI server
  150. - strings returned from the WSGI application
  151. - non-empty conditional responses
  152. - unquoted etags
  153. - relative URLs in the Location header
  154. - unsafe calls to wsgi.input
  155. - unclosed iterators
  156. Detected errors are emitted using the standard Python :mod:`warnings`
  157. system and usually end up on :data:`stderr`.
  158. ::
  159. from werkzeug.contrib.lint import LintMiddleware
  160. app = LintMiddleware(app)
  161. :param app: the application to wrap
  162. """
  163. def __init__(self, app):
  164. self.app = app
  165. def check_environ(self, environ):
  166. if type(environ) is not dict:
  167. warn(WSGIWarning('WSGI environment is not a standard python dict.'),
  168. stacklevel=4)
  169. for key in ('REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
  170. 'wsgi.version', 'wsgi.input', 'wsgi.errors',
  171. 'wsgi.multithread', 'wsgi.multiprocess',
  172. 'wsgi.run_once'):
  173. if key not in environ:
  174. warn(WSGIWarning('required environment key %r not found'
  175. % key), stacklevel=3)
  176. if environ['wsgi.version'] != (1, 0):
  177. warn(WSGIWarning('environ is not a WSGI 1.0 environ'),
  178. stacklevel=3)
  179. script_name = environ.get('SCRIPT_NAME', '')
  180. if script_name and script_name[:1] != '/':
  181. warn(WSGIWarning('SCRIPT_NAME does not start with a slash: %r'
  182. % script_name), stacklevel=3)
  183. path_info = environ.get('PATH_INFO', '')
  184. if path_info[:1] != '/':
  185. warn(WSGIWarning('PATH_INFO does not start with a slash: %r'
  186. % path_info), stacklevel=3)
  187. def check_start_response(self, status, headers, exc_info):
  188. check_string('status', status)
  189. status_code = status.split(None, 1)[0]
  190. if len(status_code) != 3 or not status_code.isdigit():
  191. warn(WSGIWarning('Status code must be three digits'), stacklevel=3)
  192. if len(status) < 4 or status[3] != ' ':
  193. warn(WSGIWarning('Invalid value for status %r. Valid '
  194. 'status strings are three digits, a space '
  195. 'and a status explanation'), stacklevel=3)
  196. status_code = int(status_code)
  197. if status_code < 100:
  198. warn(WSGIWarning('status code < 100 detected'), stacklevel=3)
  199. if type(headers) is not list:
  200. warn(WSGIWarning('header list is not a list'), stacklevel=3)
  201. for item in headers:
  202. if type(item) is not tuple or len(item) != 2:
  203. warn(WSGIWarning('Headers must tuple 2-item tuples'),
  204. stacklevel=3)
  205. name, value = item
  206. if type(name) is not str or type(value) is not str:
  207. warn(WSGIWarning('header items must be strings'),
  208. stacklevel=3)
  209. if name.lower() == 'status':
  210. warn(WSGIWarning('The status header is not supported due to '
  211. 'conflicts with the CGI spec.'),
  212. stacklevel=3)
  213. if exc_info is not None and not isinstance(exc_info, tuple):
  214. warn(WSGIWarning('invalid value for exc_info'), stacklevel=3)
  215. headers = Headers(headers)
  216. self.check_headers(headers)
  217. return status_code, headers
  218. def check_headers(self, headers):
  219. etag = headers.get('etag')
  220. if etag is not None:
  221. if etag.startswith(('W/', 'w/')):
  222. if etag.startswith('w/'):
  223. warn(HTTPWarning('weak etag indicator should be upcase.'),
  224. stacklevel=4)
  225. etag = etag[2:]
  226. if not (etag[:1] == etag[-1:] == '"'):
  227. warn(HTTPWarning('unquoted etag emitted.'), stacklevel=4)
  228. location = headers.get('location')
  229. if location is not None:
  230. if not urlparse(location).netloc:
  231. warn(HTTPWarning('absolute URLs required for location header'),
  232. stacklevel=4)
  233. def check_iterator(self, app_iter):
  234. if isinstance(app_iter, string_types):
  235. warn(WSGIWarning('application returned string. Response will '
  236. 'send character for character to the client '
  237. 'which will kill the performance. Return a '
  238. 'list or iterable instead.'), stacklevel=3)
  239. def __call__(self, *args, **kwargs):
  240. if len(args) != 2:
  241. warn(WSGIWarning('Two arguments to WSGI app required'), stacklevel=2)
  242. if kwargs:
  243. warn(WSGIWarning('No keyword arguments to WSGI app allowed'),
  244. stacklevel=2)
  245. environ, start_response = args
  246. self.check_environ(environ)
  247. environ['wsgi.input'] = InputStream(environ['wsgi.input'])
  248. environ['wsgi.errors'] = ErrorStream(environ['wsgi.errors'])
  249. # hook our own file wrapper in so that applications will always
  250. # iterate to the end and we can check the content length
  251. environ['wsgi.file_wrapper'] = FileWrapper
  252. headers_set = []
  253. chunks = []
  254. def checking_start_response(*args, **kwargs):
  255. if len(args) not in (2, 3):
  256. warn(WSGIWarning('Invalid number of arguments: %s, expected '
  257. '2 or 3' % len(args), stacklevel=2))
  258. if kwargs:
  259. warn(WSGIWarning('no keyword arguments allowed.'))
  260. status, headers = args[:2]
  261. if len(args) == 3:
  262. exc_info = args[2]
  263. else:
  264. exc_info = None
  265. headers_set[:] = self.check_start_response(status, headers,
  266. exc_info)
  267. return GuardedWrite(start_response(status, headers, exc_info),
  268. chunks)
  269. app_iter = self.app(environ, checking_start_response)
  270. self.check_iterator(app_iter)
  271. return GuardedIterator(app_iter, headers_set, chunks)