useragents.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # -*- coding: utf-8 -*-
  2. """
  3. werkzeug.useragents
  4. ~~~~~~~~~~~~~~~~~~~
  5. This module provides a helper to inspect user agent strings. This module
  6. is far from complete but should work for most of the currently available
  7. browsers.
  8. :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
  9. :license: BSD, see LICENSE for more details.
  10. """
  11. import re
  12. class UserAgentParser(object):
  13. """A simple user agent parser. Used by the `UserAgent`."""
  14. platforms = (
  15. ('cros', 'chromeos'),
  16. ('iphone|ios', 'iphone'),
  17. ('ipad', 'ipad'),
  18. (r'darwin|mac|os\s*x', 'macos'),
  19. ('win', 'windows'),
  20. (r'android', 'android'),
  21. (r'x11|lin(\b|ux)?', 'linux'),
  22. ('(sun|i86)os', 'solaris'),
  23. (r'nintendo\s+wii', 'wii'),
  24. ('irix', 'irix'),
  25. ('hp-?ux', 'hpux'),
  26. ('aix', 'aix'),
  27. ('sco|unix_sv', 'sco'),
  28. ('bsd', 'bsd'),
  29. ('amiga', 'amiga'),
  30. ('blackberry|playbook', 'blackberry'),
  31. ('symbian', 'symbian')
  32. )
  33. browsers = (
  34. ('googlebot', 'google'),
  35. ('msnbot', 'msn'),
  36. ('yahoo', 'yahoo'),
  37. ('ask jeeves', 'ask'),
  38. (r'aol|america\s+online\s+browser', 'aol'),
  39. ('opera', 'opera'),
  40. ('chrome', 'chrome'),
  41. ('firefox|firebird|phoenix|iceweasel', 'firefox'),
  42. ('galeon', 'galeon'),
  43. ('safari|version', 'safari'),
  44. ('webkit', 'webkit'),
  45. ('camino', 'camino'),
  46. ('konqueror', 'konqueror'),
  47. ('k-meleon', 'kmeleon'),
  48. ('netscape', 'netscape'),
  49. (r'msie|microsoft\s+internet\s+explorer|trident/.+? rv:', 'msie'),
  50. ('lynx', 'lynx'),
  51. ('links', 'links'),
  52. ('seamonkey|mozilla', 'seamonkey')
  53. )
  54. _browser_version_re = r'(?:%s)[/\sa-z(]*(\d+[.\da-z]+)?(?i)'
  55. _language_re = re.compile(
  56. r'(?:;\s*|\s+)(\b\w{2}\b(?:-\b\w{2}\b)?)\s*;|'
  57. r'(?:\(|\[|;)\s*(\b\w{2}\b(?:-\b\w{2}\b)?)\s*(?:\]|\)|;)'
  58. )
  59. def __init__(self):
  60. self.platforms = [(b, re.compile(a, re.I)) for a, b in self.platforms]
  61. self.browsers = [(b, re.compile(self._browser_version_re % a))
  62. for a, b in self.browsers]
  63. def __call__(self, user_agent):
  64. for platform, regex in self.platforms:
  65. match = regex.search(user_agent)
  66. if match is not None:
  67. break
  68. else:
  69. platform = None
  70. for browser, regex in self.browsers:
  71. match = regex.search(user_agent)
  72. if match is not None:
  73. version = match.group(1)
  74. break
  75. else:
  76. browser = version = None
  77. match = self._language_re.search(user_agent)
  78. if match is not None:
  79. language = match.group(1) or match.group(2)
  80. else:
  81. language = None
  82. return platform, browser, version, language
  83. class UserAgent(object):
  84. """Represents a user agent. Pass it a WSGI environment or a user agent
  85. string and you can inspect some of the details from the user agent
  86. string via the attributes. The following attributes exist:
  87. .. attribute:: string
  88. the raw user agent string
  89. .. attribute:: platform
  90. the browser platform. The following platforms are currently
  91. recognized:
  92. - `aix`
  93. - `amiga`
  94. - `android`
  95. - `bsd`
  96. - `chromeos`
  97. - `hpux`
  98. - `iphone`
  99. - `ipad`
  100. - `irix`
  101. - `linux`
  102. - `macos`
  103. - `sco`
  104. - `solaris`
  105. - `wii`
  106. - `windows`
  107. .. attribute:: browser
  108. the name of the browser. The following browsers are currently
  109. recognized:
  110. - `aol` *
  111. - `ask` *
  112. - `camino`
  113. - `chrome`
  114. - `firefox`
  115. - `galeon`
  116. - `google` *
  117. - `kmeleon`
  118. - `konqueror`
  119. - `links`
  120. - `lynx`
  121. - `msie`
  122. - `msn`
  123. - `netscape`
  124. - `opera`
  125. - `safari`
  126. - `seamonkey`
  127. - `webkit`
  128. - `yahoo` *
  129. (Browsers maked with a star (``*``) are crawlers.)
  130. .. attribute:: version
  131. the version of the browser
  132. .. attribute:: language
  133. the language of the browser
  134. """
  135. _parser = UserAgentParser()
  136. def __init__(self, environ_or_string):
  137. if isinstance(environ_or_string, dict):
  138. environ_or_string = environ_or_string.get('HTTP_USER_AGENT', '')
  139. self.string = environ_or_string
  140. self.platform, self.browser, self.version, self.language = \
  141. self._parser(environ_or_string)
  142. def to_header(self):
  143. return self.string
  144. def __str__(self):
  145. return self.string
  146. def __nonzero__(self):
  147. return bool(self.browser)
  148. __bool__ = __nonzero__
  149. def __repr__(self):
  150. return '<%s %r/%s>' % (
  151. self.__class__.__name__,
  152. self.browser,
  153. self.version
  154. )
  155. # conceptionally this belongs in this module but because we want to lazily
  156. # load the user agent module (which happens in wrappers.py) we have to import
  157. # it afterwards. The class itself has the module set to this module so
  158. # pickle, inspect and similar modules treat the object as if it was really
  159. # implemented here.
  160. from werkzeug.wrappers import UserAgentMixin # noqa