reflection.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. # mysql/reflection.py
  2. # Copyright (C) 2005-2017 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: http://www.opensource.org/licenses/mit-license.php
  7. import re
  8. from ... import log, util
  9. from ... import types as sqltypes
  10. from .enumerated import _EnumeratedValues, SET
  11. from .types import DATETIME, TIME, TIMESTAMP
  12. class ReflectedState(object):
  13. """Stores raw information about a SHOW CREATE TABLE statement."""
  14. def __init__(self):
  15. self.columns = []
  16. self.table_options = {}
  17. self.table_name = None
  18. self.keys = []
  19. self.constraints = []
  20. @log.class_logger
  21. class MySQLTableDefinitionParser(object):
  22. """Parses the results of a SHOW CREATE TABLE statement."""
  23. def __init__(self, dialect, preparer):
  24. self.dialect = dialect
  25. self.preparer = preparer
  26. self._prep_regexes()
  27. def parse(self, show_create, charset):
  28. state = ReflectedState()
  29. state.charset = charset
  30. for line in re.split(r'\r?\n', show_create):
  31. if line.startswith(' ' + self.preparer.initial_quote):
  32. self._parse_column(line, state)
  33. # a regular table options line
  34. elif line.startswith(') '):
  35. self._parse_table_options(line, state)
  36. # an ANSI-mode table options line
  37. elif line == ')':
  38. pass
  39. elif line.startswith('CREATE '):
  40. self._parse_table_name(line, state)
  41. # Not present in real reflection, but may be if
  42. # loading from a file.
  43. elif not line:
  44. pass
  45. else:
  46. type_, spec = self._parse_constraints(line)
  47. if type_ is None:
  48. util.warn("Unknown schema content: %r" % line)
  49. elif type_ == 'key':
  50. state.keys.append(spec)
  51. elif type_ == 'constraint':
  52. state.constraints.append(spec)
  53. else:
  54. pass
  55. return state
  56. def _parse_constraints(self, line):
  57. """Parse a KEY or CONSTRAINT line.
  58. :param line: A line of SHOW CREATE TABLE output
  59. """
  60. # KEY
  61. m = self._re_key.match(line)
  62. if m:
  63. spec = m.groupdict()
  64. # convert columns into name, length pairs
  65. spec['columns'] = self._parse_keyexprs(spec['columns'])
  66. return 'key', spec
  67. # CONSTRAINT
  68. m = self._re_constraint.match(line)
  69. if m:
  70. spec = m.groupdict()
  71. spec['table'] = \
  72. self.preparer.unformat_identifiers(spec['table'])
  73. spec['local'] = [c[0]
  74. for c in self._parse_keyexprs(spec['local'])]
  75. spec['foreign'] = [c[0]
  76. for c in self._parse_keyexprs(spec['foreign'])]
  77. return 'constraint', spec
  78. # PARTITION and SUBPARTITION
  79. m = self._re_partition.match(line)
  80. if m:
  81. # Punt!
  82. return 'partition', line
  83. # No match.
  84. return (None, line)
  85. def _parse_table_name(self, line, state):
  86. """Extract the table name.
  87. :param line: The first line of SHOW CREATE TABLE
  88. """
  89. regex, cleanup = self._pr_name
  90. m = regex.match(line)
  91. if m:
  92. state.table_name = cleanup(m.group('name'))
  93. def _parse_table_options(self, line, state):
  94. """Build a dictionary of all reflected table-level options.
  95. :param line: The final line of SHOW CREATE TABLE output.
  96. """
  97. options = {}
  98. if not line or line == ')':
  99. pass
  100. else:
  101. rest_of_line = line[:]
  102. for regex, cleanup in self._pr_options:
  103. m = regex.search(rest_of_line)
  104. if not m:
  105. continue
  106. directive, value = m.group('directive'), m.group('val')
  107. if cleanup:
  108. value = cleanup(value)
  109. options[directive.lower()] = value
  110. rest_of_line = regex.sub('', rest_of_line)
  111. for nope in ('auto_increment', 'data directory', 'index directory'):
  112. options.pop(nope, None)
  113. for opt, val in options.items():
  114. state.table_options['%s_%s' % (self.dialect.name, opt)] = val
  115. def _parse_column(self, line, state):
  116. """Extract column details.
  117. Falls back to a 'minimal support' variant if full parse fails.
  118. :param line: Any column-bearing line from SHOW CREATE TABLE
  119. """
  120. spec = None
  121. m = self._re_column.match(line)
  122. if m:
  123. spec = m.groupdict()
  124. spec['full'] = True
  125. else:
  126. m = self._re_column_loose.match(line)
  127. if m:
  128. spec = m.groupdict()
  129. spec['full'] = False
  130. if not spec:
  131. util.warn("Unknown column definition %r" % line)
  132. return
  133. if not spec['full']:
  134. util.warn("Incomplete reflection of column definition %r" % line)
  135. name, type_, args = spec['name'], spec['coltype'], spec['arg']
  136. try:
  137. col_type = self.dialect.ischema_names[type_]
  138. except KeyError:
  139. util.warn("Did not recognize type '%s' of column '%s'" %
  140. (type_, name))
  141. col_type = sqltypes.NullType
  142. # Column type positional arguments eg. varchar(32)
  143. if args is None or args == '':
  144. type_args = []
  145. elif args[0] == "'" and args[-1] == "'":
  146. type_args = self._re_csv_str.findall(args)
  147. else:
  148. type_args = [int(v) for v in self._re_csv_int.findall(args)]
  149. # Column type keyword options
  150. type_kw = {}
  151. if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)):
  152. if type_args:
  153. type_kw['fsp'] = type_args.pop(0)
  154. for kw in ('unsigned', 'zerofill'):
  155. if spec.get(kw, False):
  156. type_kw[kw] = True
  157. for kw in ('charset', 'collate'):
  158. if spec.get(kw, False):
  159. type_kw[kw] = spec[kw]
  160. if issubclass(col_type, _EnumeratedValues):
  161. type_args = _EnumeratedValues._strip_values(type_args)
  162. if issubclass(col_type, SET) and '' in type_args:
  163. type_kw['retrieve_as_bitwise'] = True
  164. type_instance = col_type(*type_args, **type_kw)
  165. col_kw = {}
  166. # NOT NULL
  167. col_kw['nullable'] = True
  168. # this can be "NULL" in the case of TIMESTAMP
  169. if spec.get('notnull', False) == 'NOT NULL':
  170. col_kw['nullable'] = False
  171. # AUTO_INCREMENT
  172. if spec.get('autoincr', False):
  173. col_kw['autoincrement'] = True
  174. elif issubclass(col_type, sqltypes.Integer):
  175. col_kw['autoincrement'] = False
  176. # DEFAULT
  177. default = spec.get('default', None)
  178. if default == 'NULL':
  179. # eliminates the need to deal with this later.
  180. default = None
  181. col_d = dict(name=name, type=type_instance, default=default)
  182. col_d.update(col_kw)
  183. state.columns.append(col_d)
  184. def _describe_to_create(self, table_name, columns):
  185. """Re-format DESCRIBE output as a SHOW CREATE TABLE string.
  186. DESCRIBE is a much simpler reflection and is sufficient for
  187. reflecting views for runtime use. This method formats DDL
  188. for columns only- keys are omitted.
  189. :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples.
  190. SHOW FULL COLUMNS FROM rows must be rearranged for use with
  191. this function.
  192. """
  193. buffer = []
  194. for row in columns:
  195. (name, col_type, nullable, default, extra) = \
  196. [row[i] for i in (0, 1, 2, 4, 5)]
  197. line = [' ']
  198. line.append(self.preparer.quote_identifier(name))
  199. line.append(col_type)
  200. if not nullable:
  201. line.append('NOT NULL')
  202. if default:
  203. if 'auto_increment' in default:
  204. pass
  205. elif (col_type.startswith('timestamp') and
  206. default.startswith('C')):
  207. line.append('DEFAULT')
  208. line.append(default)
  209. elif default == 'NULL':
  210. line.append('DEFAULT')
  211. line.append(default)
  212. else:
  213. line.append('DEFAULT')
  214. line.append("'%s'" % default.replace("'", "''"))
  215. if extra:
  216. line.append(extra)
  217. buffer.append(' '.join(line))
  218. return ''.join([('CREATE TABLE %s (\n' %
  219. self.preparer.quote_identifier(table_name)),
  220. ',\n'.join(buffer),
  221. '\n) '])
  222. def _parse_keyexprs(self, identifiers):
  223. """Unpack '"col"(2),"col" ASC'-ish strings into components."""
  224. return self._re_keyexprs.findall(identifiers)
  225. def _prep_regexes(self):
  226. """Pre-compile regular expressions."""
  227. self._re_columns = []
  228. self._pr_options = []
  229. _final = self.preparer.final_quote
  230. quotes = dict(zip(('iq', 'fq', 'esc_fq'),
  231. [re.escape(s) for s in
  232. (self.preparer.initial_quote,
  233. _final,
  234. self.preparer._escape_identifier(_final))]))
  235. self._pr_name = _pr_compile(
  236. r'^CREATE (?:\w+ +)?TABLE +'
  237. r'%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($' % quotes,
  238. self.preparer._unescape_identifier)
  239. # `col`,`col2`(32),`col3`(15) DESC
  240. #
  241. # Note: ASC and DESC aren't reflected, so we'll punt...
  242. self._re_keyexprs = _re_compile(
  243. r'(?:'
  244. r'(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)'
  245. r'(?:\((\d+)\))?(?=\,|$))+' % quotes)
  246. # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
  247. self._re_csv_str = _re_compile(r'\x27(?:\x27\x27|[^\x27])*\x27')
  248. # 123 or 123,456
  249. self._re_csv_int = _re_compile(r'\d+')
  250. # `colname` <type> [type opts]
  251. # (NOT NULL | NULL)
  252. # DEFAULT ('value' | CURRENT_TIMESTAMP...)
  253. # COMMENT 'comment'
  254. # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
  255. # STORAGE (DISK|MEMORY)
  256. self._re_column = _re_compile(
  257. r' '
  258. r'%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +'
  259. r'(?P<coltype>\w+)'
  260. r'(?:\((?P<arg>(?:\d+|\d+,\d+|'
  261. r'(?:\x27(?:\x27\x27|[^\x27])*\x27,?)+))\))?'
  262. r'(?: +(?P<unsigned>UNSIGNED))?'
  263. r'(?: +(?P<zerofill>ZEROFILL))?'
  264. r'(?: +CHARACTER SET +(?P<charset>[\w_]+))?'
  265. r'(?: +COLLATE +(?P<collate>[\w_]+))?'
  266. r'(?: +(?P<notnull>(?:NOT )?NULL))?'
  267. r'(?: +DEFAULT +(?P<default>'
  268. r'(?:NULL|\x27(?:\x27\x27|[^\x27])*\x27|\w+'
  269. r'(?: +ON UPDATE \w+)?)'
  270. r'))?'
  271. r'(?: +(?P<autoincr>AUTO_INCREMENT))?'
  272. r'(?: +COMMENT +(P<comment>(?:\x27\x27|[^\x27])+))?'
  273. r'(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?'
  274. r'(?: +STORAGE +(?P<storage>\w+))?'
  275. r'(?: +(?P<extra>.*))?'
  276. r',?$'
  277. % quotes
  278. )
  279. # Fallback, try to parse as little as possible
  280. self._re_column_loose = _re_compile(
  281. r' '
  282. r'%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +'
  283. r'(?P<coltype>\w+)'
  284. r'(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?'
  285. r'.*?(?P<notnull>(?:NOT )NULL)?'
  286. % quotes
  287. )
  288. # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
  289. # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
  290. # KEY_BLOCK_SIZE size | WITH PARSER name
  291. self._re_key = _re_compile(
  292. r' '
  293. r'(?:(?P<type>\S+) )?KEY'
  294. r'(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?'
  295. r'(?: +USING +(?P<using_pre>\S+))?'
  296. r' +\((?P<columns>.+?)\)'
  297. r'(?: +USING +(?P<using_post>\S+))?'
  298. r'(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?'
  299. r'(?: +WITH PARSER +(?P<parser>\S+))?'
  300. r'(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?'
  301. r',?$'
  302. % quotes
  303. )
  304. # CONSTRAINT `name` FOREIGN KEY (`local_col`)
  305. # REFERENCES `remote` (`remote_col`)
  306. # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
  307. # ON DELETE CASCADE ON UPDATE RESTRICT
  308. #
  309. # unique constraints come back as KEYs
  310. kw = quotes.copy()
  311. kw['on'] = 'RESTRICT|CASCADE|SET NULL|NOACTION'
  312. self._re_constraint = _re_compile(
  313. r' '
  314. r'CONSTRAINT +'
  315. r'%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +'
  316. r'FOREIGN KEY +'
  317. r'\((?P<local>[^\)]+?)\) REFERENCES +'
  318. r'(?P<table>%(iq)s[^%(fq)s]+%(fq)s'
  319. r'(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +'
  320. r'\((?P<foreign>[^\)]+?)\)'
  321. r'(?: +(?P<match>MATCH \w+))?'
  322. r'(?: +ON DELETE (?P<ondelete>%(on)s))?'
  323. r'(?: +ON UPDATE (?P<onupdate>%(on)s))?'
  324. % kw
  325. )
  326. # PARTITION
  327. #
  328. # punt!
  329. self._re_partition = _re_compile(r'(?:.*)(?:SUB)?PARTITION(?:.*)')
  330. # Table-level options (COLLATE, ENGINE, etc.)
  331. # Do the string options first, since they have quoted
  332. # strings we need to get rid of.
  333. for option in _options_of_type_string:
  334. self._add_option_string(option)
  335. for option in ('ENGINE', 'TYPE', 'AUTO_INCREMENT',
  336. 'AVG_ROW_LENGTH', 'CHARACTER SET',
  337. 'DEFAULT CHARSET', 'CHECKSUM',
  338. 'COLLATE', 'DELAY_KEY_WRITE', 'INSERT_METHOD',
  339. 'MAX_ROWS', 'MIN_ROWS', 'PACK_KEYS', 'ROW_FORMAT',
  340. 'KEY_BLOCK_SIZE'):
  341. self._add_option_word(option)
  342. self._add_option_regex('UNION', r'\([^\)]+\)')
  343. self._add_option_regex('TABLESPACE', r'.*? STORAGE DISK')
  344. self._add_option_regex(
  345. 'RAID_TYPE',
  346. r'\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+')
  347. _optional_equals = r'(?:\s*(?:=\s*)|\s+)'
  348. def _add_option_string(self, directive):
  349. regex = (r'(?P<directive>%s)%s'
  350. r"'(?P<val>(?:[^']|'')*?)'(?!')" %
  351. (re.escape(directive), self._optional_equals))
  352. self._pr_options.append(_pr_compile(
  353. regex, lambda v: v.replace("\\\\", "\\").replace("''", "'")
  354. ))
  355. def _add_option_word(self, directive):
  356. regex = (r'(?P<directive>%s)%s'
  357. r'(?P<val>\w+)' %
  358. (re.escape(directive), self._optional_equals))
  359. self._pr_options.append(_pr_compile(regex))
  360. def _add_option_regex(self, directive, regex):
  361. regex = (r'(?P<directive>%s)%s'
  362. r'(?P<val>%s)' %
  363. (re.escape(directive), self._optional_equals, regex))
  364. self._pr_options.append(_pr_compile(regex))
  365. _options_of_type_string = ('COMMENT', 'DATA DIRECTORY', 'INDEX DIRECTORY',
  366. 'PASSWORD', 'CONNECTION')
  367. def _pr_compile(regex, cleanup=None):
  368. """Prepare a 2-tuple of compiled regex and callable."""
  369. return (_re_compile(regex), cleanup)
  370. def _re_compile(regex):
  371. """Compile a string to regex, I and UNICODE."""
  372. return re.compile(regex, re.I | re.UNICODE)