__init__.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. # -*- coding: utf-8 -*-
  2. """
  3. markupsafe
  4. ~~~~~~~~~~
  5. Implements a Markup string.
  6. :copyright: (c) 2010 by Armin Ronacher.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. import re
  10. import string
  11. from collections import Mapping
  12. from markupsafe._compat import text_type, string_types, int_types, \
  13. unichr, iteritems, PY2
  14. __version__ = "1.0"
  15. __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
  16. _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  17. _entity_re = re.compile(r'&([^& ;]+);')
  18. class Markup(text_type):
  19. r"""Marks a string as being safe for inclusion in HTML/XML output without
  20. needing to be escaped. This implements the `__html__` interface a couple
  21. of frameworks and web applications use. :class:`Markup` is a direct
  22. subclass of `unicode` and provides all the methods of `unicode` just that
  23. it escapes arguments passed and always returns `Markup`.
  24. The `escape` function returns markup objects so that double escaping can't
  25. happen.
  26. The constructor of the :class:`Markup` class can be used for three
  27. different things: When passed an unicode object it's assumed to be safe,
  28. when passed an object with an HTML representation (has an `__html__`
  29. method) that representation is used, otherwise the object passed is
  30. converted into a unicode string and then assumed to be safe:
  31. >>> Markup("Hello <em>World</em>!")
  32. Markup(u'Hello <em>World</em>!')
  33. >>> class Foo(object):
  34. ... def __html__(self):
  35. ... return '<a href="#">foo</a>'
  36. ...
  37. >>> Markup(Foo())
  38. Markup(u'<a href="#">foo</a>')
  39. If you want object passed being always treated as unsafe you can use the
  40. :meth:`escape` classmethod to create a :class:`Markup` object:
  41. >>> Markup.escape("Hello <em>World</em>!")
  42. Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
  43. Operations on a markup string are markup aware which means that all
  44. arguments are passed through the :func:`escape` function:
  45. >>> em = Markup("<em>%s</em>")
  46. >>> em % "foo & bar"
  47. Markup(u'<em>foo &amp; bar</em>')
  48. >>> strong = Markup("<strong>%(text)s</strong>")
  49. >>> strong % {'text': '<blink>hacker here</blink>'}
  50. Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
  51. >>> Markup("<em>Hello</em> ") + "<foo>"
  52. Markup(u'<em>Hello</em> &lt;foo&gt;')
  53. """
  54. __slots__ = ()
  55. def __new__(cls, base=u'', encoding=None, errors='strict'):
  56. if hasattr(base, '__html__'):
  57. base = base.__html__()
  58. if encoding is None:
  59. return text_type.__new__(cls, base)
  60. return text_type.__new__(cls, base, encoding, errors)
  61. def __html__(self):
  62. return self
  63. def __add__(self, other):
  64. if isinstance(other, string_types) or hasattr(other, '__html__'):
  65. return self.__class__(super(Markup, self).__add__(self.escape(other)))
  66. return NotImplemented
  67. def __radd__(self, other):
  68. if hasattr(other, '__html__') or isinstance(other, string_types):
  69. return self.escape(other).__add__(self)
  70. return NotImplemented
  71. def __mul__(self, num):
  72. if isinstance(num, int_types):
  73. return self.__class__(text_type.__mul__(self, num))
  74. return NotImplemented
  75. __rmul__ = __mul__
  76. def __mod__(self, arg):
  77. if isinstance(arg, tuple):
  78. arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
  79. else:
  80. arg = _MarkupEscapeHelper(arg, self.escape)
  81. return self.__class__(text_type.__mod__(self, arg))
  82. def __repr__(self):
  83. return '%s(%s)' % (
  84. self.__class__.__name__,
  85. text_type.__repr__(self)
  86. )
  87. def join(self, seq):
  88. return self.__class__(text_type.join(self, map(self.escape, seq)))
  89. join.__doc__ = text_type.join.__doc__
  90. def split(self, *args, **kwargs):
  91. return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
  92. split.__doc__ = text_type.split.__doc__
  93. def rsplit(self, *args, **kwargs):
  94. return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
  95. rsplit.__doc__ = text_type.rsplit.__doc__
  96. def splitlines(self, *args, **kwargs):
  97. return list(map(self.__class__, text_type.splitlines(
  98. self, *args, **kwargs)))
  99. splitlines.__doc__ = text_type.splitlines.__doc__
  100. def unescape(self):
  101. r"""Unescape markup again into an text_type string. This also resolves
  102. known HTML4 and XHTML entities:
  103. >>> Markup("Main &raquo; <em>About</em>").unescape()
  104. u'Main \xbb <em>About</em>'
  105. """
  106. from markupsafe._constants import HTML_ENTITIES
  107. def handle_match(m):
  108. name = m.group(1)
  109. if name in HTML_ENTITIES:
  110. return unichr(HTML_ENTITIES[name])
  111. try:
  112. if name[:2] in ('#x', '#X'):
  113. return unichr(int(name[2:], 16))
  114. elif name.startswith('#'):
  115. return unichr(int(name[1:]))
  116. except ValueError:
  117. pass
  118. # Don't modify unexpected input.
  119. return m.group()
  120. return _entity_re.sub(handle_match, text_type(self))
  121. def striptags(self):
  122. r"""Unescape markup into an text_type string and strip all tags. This
  123. also resolves known HTML4 and XHTML entities. Whitespace is
  124. normalized to one:
  125. >>> Markup("Main &raquo; <em>About</em>").striptags()
  126. u'Main \xbb About'
  127. """
  128. stripped = u' '.join(_striptags_re.sub('', self).split())
  129. return Markup(stripped).unescape()
  130. @classmethod
  131. def escape(cls, s):
  132. """Escape the string. Works like :func:`escape` with the difference
  133. that for subclasses of :class:`Markup` this function would return the
  134. correct subclass.
  135. """
  136. rv = escape(s)
  137. if rv.__class__ is not cls:
  138. return cls(rv)
  139. return rv
  140. def make_simple_escaping_wrapper(name):
  141. orig = getattr(text_type, name)
  142. def func(self, *args, **kwargs):
  143. args = _escape_argspec(list(args), enumerate(args), self.escape)
  144. _escape_argspec(kwargs, iteritems(kwargs), self.escape)
  145. return self.__class__(orig(self, *args, **kwargs))
  146. func.__name__ = orig.__name__
  147. func.__doc__ = orig.__doc__
  148. return func
  149. for method in '__getitem__', 'capitalize', \
  150. 'title', 'lower', 'upper', 'replace', 'ljust', \
  151. 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
  152. 'translate', 'expandtabs', 'swapcase', 'zfill':
  153. locals()[method] = make_simple_escaping_wrapper(method)
  154. # new in python 2.5
  155. if hasattr(text_type, 'partition'):
  156. def partition(self, sep):
  157. return tuple(map(self.__class__,
  158. text_type.partition(self, self.escape(sep))))
  159. def rpartition(self, sep):
  160. return tuple(map(self.__class__,
  161. text_type.rpartition(self, self.escape(sep))))
  162. # new in python 2.6
  163. if hasattr(text_type, 'format'):
  164. def format(*args, **kwargs):
  165. self, args = args[0], args[1:]
  166. formatter = EscapeFormatter(self.escape)
  167. kwargs = _MagicFormatMapping(args, kwargs)
  168. return self.__class__(formatter.vformat(self, args, kwargs))
  169. def __html_format__(self, format_spec):
  170. if format_spec:
  171. raise ValueError('Unsupported format specification '
  172. 'for Markup.')
  173. return self
  174. # not in python 3
  175. if hasattr(text_type, '__getslice__'):
  176. __getslice__ = make_simple_escaping_wrapper('__getslice__')
  177. del method, make_simple_escaping_wrapper
  178. class _MagicFormatMapping(Mapping):
  179. """This class implements a dummy wrapper to fix a bug in the Python
  180. standard library for string formatting.
  181. See http://bugs.python.org/issue13598 for information about why
  182. this is necessary.
  183. """
  184. def __init__(self, args, kwargs):
  185. self._args = args
  186. self._kwargs = kwargs
  187. self._last_index = 0
  188. def __getitem__(self, key):
  189. if key == '':
  190. idx = self._last_index
  191. self._last_index += 1
  192. try:
  193. return self._args[idx]
  194. except LookupError:
  195. pass
  196. key = str(idx)
  197. return self._kwargs[key]
  198. def __iter__(self):
  199. return iter(self._kwargs)
  200. def __len__(self):
  201. return len(self._kwargs)
  202. if hasattr(text_type, 'format'):
  203. class EscapeFormatter(string.Formatter):
  204. def __init__(self, escape):
  205. self.escape = escape
  206. def format_field(self, value, format_spec):
  207. if hasattr(value, '__html_format__'):
  208. rv = value.__html_format__(format_spec)
  209. elif hasattr(value, '__html__'):
  210. if format_spec:
  211. raise ValueError('No format specification allowed '
  212. 'when formatting an object with '
  213. 'its __html__ method.')
  214. rv = value.__html__()
  215. else:
  216. # We need to make sure the format spec is unicode here as
  217. # otherwise the wrong callback methods are invoked. For
  218. # instance a byte string there would invoke __str__ and
  219. # not __unicode__.
  220. rv = string.Formatter.format_field(
  221. self, value, text_type(format_spec))
  222. return text_type(self.escape(rv))
  223. def _escape_argspec(obj, iterable, escape):
  224. """Helper for various string-wrapped functions."""
  225. for key, value in iterable:
  226. if hasattr(value, '__html__') or isinstance(value, string_types):
  227. obj[key] = escape(value)
  228. return obj
  229. class _MarkupEscapeHelper(object):
  230. """Helper for Markup.__mod__"""
  231. def __init__(self, obj, escape):
  232. self.obj = obj
  233. self.escape = escape
  234. __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
  235. __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
  236. __repr__ = lambda s: str(s.escape(repr(s.obj)))
  237. __int__ = lambda s: int(s.obj)
  238. __float__ = lambda s: float(s.obj)
  239. # we have to import it down here as the speedups and native
  240. # modules imports the markup type which is define above.
  241. try:
  242. from markupsafe._speedups import escape, escape_silent, soft_unicode
  243. except ImportError:
  244. from markupsafe._native import escape, escape_silent, soft_unicode
  245. if not PY2:
  246. soft_str = soft_unicode
  247. __all__.append('soft_str')