extract.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import re
  2. from mako import compat
  3. from mako import lexer
  4. from mako import parsetree
  5. class MessageExtractor(object):
  6. def process_file(self, fileobj):
  7. template_node = lexer.Lexer(
  8. fileobj.read(),
  9. input_encoding=self.config['encoding']).parse()
  10. for extracted in self.extract_nodes(template_node.get_children()):
  11. yield extracted
  12. def extract_nodes(self, nodes):
  13. translator_comments = []
  14. in_translator_comments = False
  15. input_encoding = self.config['encoding'] or 'ascii'
  16. comment_tags = list(
  17. filter(None, re.split(r'\s+', self.config['comment-tags'])))
  18. for node in nodes:
  19. child_nodes = None
  20. if in_translator_comments and \
  21. isinstance(node, parsetree.Text) and \
  22. not node.content.strip():
  23. # Ignore whitespace within translator comments
  24. continue
  25. if isinstance(node, parsetree.Comment):
  26. value = node.text.strip()
  27. if in_translator_comments:
  28. translator_comments.extend(
  29. self._split_comment(node.lineno, value))
  30. continue
  31. for comment_tag in comment_tags:
  32. if value.startswith(comment_tag):
  33. in_translator_comments = True
  34. translator_comments.extend(
  35. self._split_comment(node.lineno, value))
  36. continue
  37. if isinstance(node, parsetree.DefTag):
  38. code = node.function_decl.code
  39. child_nodes = node.nodes
  40. elif isinstance(node, parsetree.BlockTag):
  41. code = node.body_decl.code
  42. child_nodes = node.nodes
  43. elif isinstance(node, parsetree.CallTag):
  44. code = node.code.code
  45. child_nodes = node.nodes
  46. elif isinstance(node, parsetree.PageTag):
  47. code = node.body_decl.code
  48. elif isinstance(node, parsetree.CallNamespaceTag):
  49. code = node.expression
  50. child_nodes = node.nodes
  51. elif isinstance(node, parsetree.ControlLine):
  52. if node.isend:
  53. in_translator_comments = False
  54. continue
  55. code = node.text
  56. elif isinstance(node, parsetree.Code):
  57. in_translator_comments = False
  58. code = node.code.code
  59. elif isinstance(node, parsetree.Expression):
  60. code = node.code.code
  61. else:
  62. continue
  63. # Comments don't apply unless they immediately preceed the message
  64. if translator_comments and \
  65. translator_comments[-1][0] < node.lineno - 1:
  66. translator_comments = []
  67. translator_strings = [
  68. comment[1] for comment in translator_comments]
  69. if isinstance(code, compat.text_type):
  70. code = code.encode(input_encoding, 'backslashreplace')
  71. used_translator_comments = False
  72. # We add extra newline to work around a pybabel bug
  73. # (see python-babel/babel#274, parse_encoding dies if the first
  74. # input string of the input is non-ascii)
  75. # Also, because we added it, we have to subtract one from
  76. # node.lineno
  77. code = compat.byte_buffer(compat.b('\n') + code)
  78. for message in self.process_python(
  79. code, node.lineno - 1, translator_strings):
  80. yield message
  81. used_translator_comments = True
  82. if used_translator_comments:
  83. translator_comments = []
  84. in_translator_comments = False
  85. if child_nodes:
  86. for extracted in self.extract_nodes(child_nodes):
  87. yield extracted
  88. @staticmethod
  89. def _split_comment(lineno, comment):
  90. """Return the multiline comment at lineno split into a list of
  91. comment line numbers and the accompanying comment line"""
  92. return [(lineno + index, line) for index, line in
  93. enumerate(comment.splitlines())]