123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- import re
- from mako import compat
- from mako import lexer
- from mako import parsetree
- class MessageExtractor(object):
- def process_file(self, fileobj):
- template_node = lexer.Lexer(
- fileobj.read(),
- input_encoding=self.config['encoding']).parse()
- for extracted in self.extract_nodes(template_node.get_children()):
- yield extracted
- def extract_nodes(self, nodes):
- translator_comments = []
- in_translator_comments = False
- input_encoding = self.config['encoding'] or 'ascii'
- comment_tags = list(
- filter(None, re.split(r'\s+', self.config['comment-tags'])))
- for node in nodes:
- child_nodes = None
- if in_translator_comments and \
- isinstance(node, parsetree.Text) and \
- not node.content.strip():
- # Ignore whitespace within translator comments
- continue
- if isinstance(node, parsetree.Comment):
- value = node.text.strip()
- if in_translator_comments:
- translator_comments.extend(
- self._split_comment(node.lineno, value))
- continue
- for comment_tag in comment_tags:
- if value.startswith(comment_tag):
- in_translator_comments = True
- translator_comments.extend(
- self._split_comment(node.lineno, value))
- continue
- if isinstance(node, parsetree.DefTag):
- code = node.function_decl.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.BlockTag):
- code = node.body_decl.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.CallTag):
- code = node.code.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.PageTag):
- code = node.body_decl.code
- elif isinstance(node, parsetree.CallNamespaceTag):
- code = node.expression
- child_nodes = node.nodes
- elif isinstance(node, parsetree.ControlLine):
- if node.isend:
- in_translator_comments = False
- continue
- code = node.text
- elif isinstance(node, parsetree.Code):
- in_translator_comments = False
- code = node.code.code
- elif isinstance(node, parsetree.Expression):
- code = node.code.code
- else:
- continue
- # Comments don't apply unless they immediately preceed the message
- if translator_comments and \
- translator_comments[-1][0] < node.lineno - 1:
- translator_comments = []
- translator_strings = [
- comment[1] for comment in translator_comments]
- if isinstance(code, compat.text_type):
- code = code.encode(input_encoding, 'backslashreplace')
- used_translator_comments = False
- # We add extra newline to work around a pybabel bug
- # (see python-babel/babel#274, parse_encoding dies if the first
- # input string of the input is non-ascii)
- # Also, because we added it, we have to subtract one from
- # node.lineno
- code = compat.byte_buffer(compat.b('\n') + code)
- for message in self.process_python(
- code, node.lineno - 1, translator_strings):
- yield message
- used_translator_comments = True
- if used_translator_comments:
- translator_comments = []
- in_translator_comments = False
- if child_nodes:
- for extracted in self.extract_nodes(child_nodes):
- yield extracted
- @staticmethod
- def _split_comment(lineno, comment):
- """Return the multiline comment at lineno split into a list of
- comment line numbers and the accompanying comment line"""
- return [(lineno + index, line) for index, line in
- enumerate(comment.splitlines())]
|