aliases.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. """ Encoding Aliases Support
  2. This module is used by the encodings package search function to
  3. map encodings names to module names.
  4. Note that the search function normalizes the encoding names before
  5. doing the lookup, so the mapping will have to map normalized
  6. encoding names to module names.
  7. Contents:
  8. The following aliases dictionary contains mappings of all IANA
  9. character set names for which the Python core library provides
  10. codecs. In addition to these, a few Python specific codec
  11. aliases have also been added.
  12. """
  13. aliases = {
  14. # Please keep this list sorted alphabetically by value !
  15. # ascii codec
  16. '646' : 'ascii',
  17. 'ansi_x3.4_1968' : 'ascii',
  18. 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
  19. 'ansi_x3.4_1986' : 'ascii',
  20. 'cp367' : 'ascii',
  21. 'csascii' : 'ascii',
  22. 'ibm367' : 'ascii',
  23. 'iso646_us' : 'ascii',
  24. 'iso_646.irv_1991' : 'ascii',
  25. 'iso_ir_6' : 'ascii',
  26. 'us' : 'ascii',
  27. 'us_ascii' : 'ascii',
  28. # base64_codec codec
  29. 'base64' : 'base64_codec',
  30. 'base_64' : 'base64_codec',
  31. # big5 codec
  32. 'big5_tw' : 'big5',
  33. 'csbig5' : 'big5',
  34. # big5hkscs codec
  35. 'big5_hkscs' : 'big5hkscs',
  36. 'hkscs' : 'big5hkscs',
  37. # bz2_codec codec
  38. 'bz2' : 'bz2_codec',
  39. # cp037 codec
  40. '037' : 'cp037',
  41. 'csibm037' : 'cp037',
  42. 'ebcdic_cp_ca' : 'cp037',
  43. 'ebcdic_cp_nl' : 'cp037',
  44. 'ebcdic_cp_us' : 'cp037',
  45. 'ebcdic_cp_wt' : 'cp037',
  46. 'ibm037' : 'cp037',
  47. 'ibm039' : 'cp037',
  48. # cp1026 codec
  49. '1026' : 'cp1026',
  50. 'csibm1026' : 'cp1026',
  51. 'ibm1026' : 'cp1026',
  52. # cp1140 codec
  53. '1140' : 'cp1140',
  54. 'ibm1140' : 'cp1140',
  55. # cp1250 codec
  56. '1250' : 'cp1250',
  57. 'windows_1250' : 'cp1250',
  58. # cp1251 codec
  59. '1251' : 'cp1251',
  60. 'windows_1251' : 'cp1251',
  61. # cp1252 codec
  62. '1252' : 'cp1252',
  63. 'windows_1252' : 'cp1252',
  64. # cp1253 codec
  65. '1253' : 'cp1253',
  66. 'windows_1253' : 'cp1253',
  67. # cp1254 codec
  68. '1254' : 'cp1254',
  69. 'windows_1254' : 'cp1254',
  70. # cp1255 codec
  71. '1255' : 'cp1255',
  72. 'windows_1255' : 'cp1255',
  73. # cp1256 codec
  74. '1256' : 'cp1256',
  75. 'windows_1256' : 'cp1256',
  76. # cp1257 codec
  77. '1257' : 'cp1257',
  78. 'windows_1257' : 'cp1257',
  79. # cp1258 codec
  80. '1258' : 'cp1258',
  81. 'windows_1258' : 'cp1258',
  82. # cp424 codec
  83. '424' : 'cp424',
  84. 'csibm424' : 'cp424',
  85. 'ebcdic_cp_he' : 'cp424',
  86. 'ibm424' : 'cp424',
  87. # cp437 codec
  88. '437' : 'cp437',
  89. 'cspc8codepage437' : 'cp437',
  90. 'ibm437' : 'cp437',
  91. # cp500 codec
  92. '500' : 'cp500',
  93. 'csibm500' : 'cp500',
  94. 'ebcdic_cp_be' : 'cp500',
  95. 'ebcdic_cp_ch' : 'cp500',
  96. 'ibm500' : 'cp500',
  97. # cp775 codec
  98. '775' : 'cp775',
  99. 'cspc775baltic' : 'cp775',
  100. 'ibm775' : 'cp775',
  101. # cp850 codec
  102. '850' : 'cp850',
  103. 'cspc850multilingual' : 'cp850',
  104. 'ibm850' : 'cp850',
  105. # cp852 codec
  106. '852' : 'cp852',
  107. 'cspcp852' : 'cp852',
  108. 'ibm852' : 'cp852',
  109. # cp855 codec
  110. '855' : 'cp855',
  111. 'csibm855' : 'cp855',
  112. 'ibm855' : 'cp855',
  113. # cp857 codec
  114. '857' : 'cp857',
  115. 'csibm857' : 'cp857',
  116. 'ibm857' : 'cp857',
  117. # cp858 codec
  118. '858' : 'cp858',
  119. 'csibm858' : 'cp858',
  120. 'ibm858' : 'cp858',
  121. # cp860 codec
  122. '860' : 'cp860',
  123. 'csibm860' : 'cp860',
  124. 'ibm860' : 'cp860',
  125. # cp861 codec
  126. '861' : 'cp861',
  127. 'cp_is' : 'cp861',
  128. 'csibm861' : 'cp861',
  129. 'ibm861' : 'cp861',
  130. # cp862 codec
  131. '862' : 'cp862',
  132. 'cspc862latinhebrew' : 'cp862',
  133. 'ibm862' : 'cp862',
  134. # cp863 codec
  135. '863' : 'cp863',
  136. 'csibm863' : 'cp863',
  137. 'ibm863' : 'cp863',
  138. # cp864 codec
  139. '864' : 'cp864',
  140. 'csibm864' : 'cp864',
  141. 'ibm864' : 'cp864',
  142. # cp865 codec
  143. '865' : 'cp865',
  144. 'csibm865' : 'cp865',
  145. 'ibm865' : 'cp865',
  146. # cp866 codec
  147. '866' : 'cp866',
  148. 'csibm866' : 'cp866',
  149. 'ibm866' : 'cp866',
  150. # cp869 codec
  151. '869' : 'cp869',
  152. 'cp_gr' : 'cp869',
  153. 'csibm869' : 'cp869',
  154. 'ibm869' : 'cp869',
  155. # cp932 codec
  156. '932' : 'cp932',
  157. 'ms932' : 'cp932',
  158. 'mskanji' : 'cp932',
  159. 'ms_kanji' : 'cp932',
  160. # cp949 codec
  161. '949' : 'cp949',
  162. 'ms949' : 'cp949',
  163. 'uhc' : 'cp949',
  164. # cp950 codec
  165. '950' : 'cp950',
  166. 'ms950' : 'cp950',
  167. # euc_jis_2004 codec
  168. 'jisx0213' : 'euc_jis_2004',
  169. 'eucjis2004' : 'euc_jis_2004',
  170. 'euc_jis2004' : 'euc_jis_2004',
  171. # euc_jisx0213 codec
  172. 'eucjisx0213' : 'euc_jisx0213',
  173. # euc_jp codec
  174. 'eucjp' : 'euc_jp',
  175. 'ujis' : 'euc_jp',
  176. 'u_jis' : 'euc_jp',
  177. # euc_kr codec
  178. 'euckr' : 'euc_kr',
  179. 'korean' : 'euc_kr',
  180. 'ksc5601' : 'euc_kr',
  181. 'ks_c_5601' : 'euc_kr',
  182. 'ks_c_5601_1987' : 'euc_kr',
  183. 'ksx1001' : 'euc_kr',
  184. 'ks_x_1001' : 'euc_kr',
  185. # gb18030 codec
  186. 'gb18030_2000' : 'gb18030',
  187. # gb2312 codec
  188. 'chinese' : 'gb2312',
  189. 'csiso58gb231280' : 'gb2312',
  190. 'euc_cn' : 'gb2312',
  191. 'euccn' : 'gb2312',
  192. 'eucgb2312_cn' : 'gb2312',
  193. 'gb2312_1980' : 'gb2312',
  194. 'gb2312_80' : 'gb2312',
  195. 'iso_ir_58' : 'gb2312',
  196. # gbk codec
  197. '936' : 'gbk',
  198. 'cp936' : 'gbk',
  199. 'ms936' : 'gbk',
  200. # hex_codec codec
  201. 'hex' : 'hex_codec',
  202. # hp_roman8 codec
  203. 'roman8' : 'hp_roman8',
  204. 'r8' : 'hp_roman8',
  205. 'csHPRoman8' : 'hp_roman8',
  206. # hz codec
  207. 'hzgb' : 'hz',
  208. 'hz_gb' : 'hz',
  209. 'hz_gb_2312' : 'hz',
  210. # iso2022_jp codec
  211. 'csiso2022jp' : 'iso2022_jp',
  212. 'iso2022jp' : 'iso2022_jp',
  213. 'iso_2022_jp' : 'iso2022_jp',
  214. # iso2022_jp_1 codec
  215. 'iso2022jp_1' : 'iso2022_jp_1',
  216. 'iso_2022_jp_1' : 'iso2022_jp_1',
  217. # iso2022_jp_2 codec
  218. 'iso2022jp_2' : 'iso2022_jp_2',
  219. 'iso_2022_jp_2' : 'iso2022_jp_2',
  220. # iso2022_jp_2004 codec
  221. 'iso_2022_jp_2004' : 'iso2022_jp_2004',
  222. 'iso2022jp_2004' : 'iso2022_jp_2004',
  223. # iso2022_jp_3 codec
  224. 'iso2022jp_3' : 'iso2022_jp_3',
  225. 'iso_2022_jp_3' : 'iso2022_jp_3',
  226. # iso2022_jp_ext codec
  227. 'iso2022jp_ext' : 'iso2022_jp_ext',
  228. 'iso_2022_jp_ext' : 'iso2022_jp_ext',
  229. # iso2022_kr codec
  230. 'csiso2022kr' : 'iso2022_kr',
  231. 'iso2022kr' : 'iso2022_kr',
  232. 'iso_2022_kr' : 'iso2022_kr',
  233. # iso8859_10 codec
  234. 'csisolatin6' : 'iso8859_10',
  235. 'iso_8859_10' : 'iso8859_10',
  236. 'iso_8859_10_1992' : 'iso8859_10',
  237. 'iso_ir_157' : 'iso8859_10',
  238. 'l6' : 'iso8859_10',
  239. 'latin6' : 'iso8859_10',
  240. # iso8859_11 codec
  241. 'thai' : 'iso8859_11',
  242. 'iso_8859_11' : 'iso8859_11',
  243. 'iso_8859_11_2001' : 'iso8859_11',
  244. # iso8859_13 codec
  245. 'iso_8859_13' : 'iso8859_13',
  246. 'l7' : 'iso8859_13',
  247. 'latin7' : 'iso8859_13',
  248. # iso8859_14 codec
  249. 'iso_8859_14' : 'iso8859_14',
  250. 'iso_8859_14_1998' : 'iso8859_14',
  251. 'iso_celtic' : 'iso8859_14',
  252. 'iso_ir_199' : 'iso8859_14',
  253. 'l8' : 'iso8859_14',
  254. 'latin8' : 'iso8859_14',
  255. # iso8859_15 codec
  256. 'iso_8859_15' : 'iso8859_15',
  257. 'l9' : 'iso8859_15',
  258. 'latin9' : 'iso8859_15',
  259. # iso8859_16 codec
  260. 'iso_8859_16' : 'iso8859_16',
  261. 'iso_8859_16_2001' : 'iso8859_16',
  262. 'iso_ir_226' : 'iso8859_16',
  263. 'l10' : 'iso8859_16',
  264. 'latin10' : 'iso8859_16',
  265. # iso8859_2 codec
  266. 'csisolatin2' : 'iso8859_2',
  267. 'iso_8859_2' : 'iso8859_2',
  268. 'iso_8859_2_1987' : 'iso8859_2',
  269. 'iso_ir_101' : 'iso8859_2',
  270. 'l2' : 'iso8859_2',
  271. 'latin2' : 'iso8859_2',
  272. # iso8859_3 codec
  273. 'csisolatin3' : 'iso8859_3',
  274. 'iso_8859_3' : 'iso8859_3',
  275. 'iso_8859_3_1988' : 'iso8859_3',
  276. 'iso_ir_109' : 'iso8859_3',
  277. 'l3' : 'iso8859_3',
  278. 'latin3' : 'iso8859_3',
  279. # iso8859_4 codec
  280. 'csisolatin4' : 'iso8859_4',
  281. 'iso_8859_4' : 'iso8859_4',
  282. 'iso_8859_4_1988' : 'iso8859_4',
  283. 'iso_ir_110' : 'iso8859_4',
  284. 'l4' : 'iso8859_4',
  285. 'latin4' : 'iso8859_4',
  286. # iso8859_5 codec
  287. 'csisolatincyrillic' : 'iso8859_5',
  288. 'cyrillic' : 'iso8859_5',
  289. 'iso_8859_5' : 'iso8859_5',
  290. 'iso_8859_5_1988' : 'iso8859_5',
  291. 'iso_ir_144' : 'iso8859_5',
  292. # iso8859_6 codec
  293. 'arabic' : 'iso8859_6',
  294. 'asmo_708' : 'iso8859_6',
  295. 'csisolatinarabic' : 'iso8859_6',
  296. 'ecma_114' : 'iso8859_6',
  297. 'iso_8859_6' : 'iso8859_6',
  298. 'iso_8859_6_1987' : 'iso8859_6',
  299. 'iso_ir_127' : 'iso8859_6',
  300. # iso8859_7 codec
  301. 'csisolatingreek' : 'iso8859_7',
  302. 'ecma_118' : 'iso8859_7',
  303. 'elot_928' : 'iso8859_7',
  304. 'greek' : 'iso8859_7',
  305. 'greek8' : 'iso8859_7',
  306. 'iso_8859_7' : 'iso8859_7',
  307. 'iso_8859_7_1987' : 'iso8859_7',
  308. 'iso_ir_126' : 'iso8859_7',
  309. # iso8859_8 codec
  310. 'csisolatinhebrew' : 'iso8859_8',
  311. 'hebrew' : 'iso8859_8',
  312. 'iso_8859_8' : 'iso8859_8',
  313. 'iso_8859_8_1988' : 'iso8859_8',
  314. 'iso_ir_138' : 'iso8859_8',
  315. # iso8859_9 codec
  316. 'csisolatin5' : 'iso8859_9',
  317. 'iso_8859_9' : 'iso8859_9',
  318. 'iso_8859_9_1989' : 'iso8859_9',
  319. 'iso_ir_148' : 'iso8859_9',
  320. 'l5' : 'iso8859_9',
  321. 'latin5' : 'iso8859_9',
  322. # johab codec
  323. 'cp1361' : 'johab',
  324. 'ms1361' : 'johab',
  325. # koi8_r codec
  326. 'cskoi8r' : 'koi8_r',
  327. # latin_1 codec
  328. #
  329. # Note that the latin_1 codec is implemented internally in C and a
  330. # lot faster than the charmap codec iso8859_1 which uses the same
  331. # encoding. This is why we discourage the use of the iso8859_1
  332. # codec and alias it to latin_1 instead.
  333. #
  334. '8859' : 'latin_1',
  335. 'cp819' : 'latin_1',
  336. 'csisolatin1' : 'latin_1',
  337. 'ibm819' : 'latin_1',
  338. 'iso8859' : 'latin_1',
  339. 'iso8859_1' : 'latin_1',
  340. 'iso_8859_1' : 'latin_1',
  341. 'iso_8859_1_1987' : 'latin_1',
  342. 'iso_ir_100' : 'latin_1',
  343. 'l1' : 'latin_1',
  344. 'latin' : 'latin_1',
  345. 'latin1' : 'latin_1',
  346. # mac_cyrillic codec
  347. 'maccyrillic' : 'mac_cyrillic',
  348. # mac_greek codec
  349. 'macgreek' : 'mac_greek',
  350. # mac_iceland codec
  351. 'maciceland' : 'mac_iceland',
  352. # mac_latin2 codec
  353. 'maccentraleurope' : 'mac_latin2',
  354. 'maclatin2' : 'mac_latin2',
  355. # mac_roman codec
  356. 'macroman' : 'mac_roman',
  357. # mac_turkish codec
  358. 'macturkish' : 'mac_turkish',
  359. # mbcs codec
  360. 'dbcs' : 'mbcs',
  361. # ptcp154 codec
  362. 'csptcp154' : 'ptcp154',
  363. 'pt154' : 'ptcp154',
  364. 'cp154' : 'ptcp154',
  365. 'cyrillic_asian' : 'ptcp154',
  366. # quopri_codec codec
  367. 'quopri' : 'quopri_codec',
  368. 'quoted_printable' : 'quopri_codec',
  369. 'quotedprintable' : 'quopri_codec',
  370. # rot_13 codec
  371. 'rot13' : 'rot_13',
  372. # shift_jis codec
  373. 'csshiftjis' : 'shift_jis',
  374. 'shiftjis' : 'shift_jis',
  375. 'sjis' : 'shift_jis',
  376. 's_jis' : 'shift_jis',
  377. # shift_jis_2004 codec
  378. 'shiftjis2004' : 'shift_jis_2004',
  379. 'sjis_2004' : 'shift_jis_2004',
  380. 's_jis_2004' : 'shift_jis_2004',
  381. # shift_jisx0213 codec
  382. 'shiftjisx0213' : 'shift_jisx0213',
  383. 'sjisx0213' : 'shift_jisx0213',
  384. 's_jisx0213' : 'shift_jisx0213',
  385. # tactis codec
  386. 'tis260' : 'tactis',
  387. # tis_620 codec
  388. 'tis620' : 'tis_620',
  389. 'tis_620_0' : 'tis_620',
  390. 'tis_620_2529_0' : 'tis_620',
  391. 'tis_620_2529_1' : 'tis_620',
  392. 'iso_ir_166' : 'tis_620',
  393. # utf_16 codec
  394. 'u16' : 'utf_16',
  395. 'utf16' : 'utf_16',
  396. # utf_16_be codec
  397. 'unicodebigunmarked' : 'utf_16_be',
  398. 'utf_16be' : 'utf_16_be',
  399. # utf_16_le codec
  400. 'unicodelittleunmarked' : 'utf_16_le',
  401. 'utf_16le' : 'utf_16_le',
  402. # utf_32 codec
  403. 'u32' : 'utf_32',
  404. 'utf32' : 'utf_32',
  405. # utf_32_be codec
  406. 'utf_32be' : 'utf_32_be',
  407. # utf_32_le codec
  408. 'utf_32le' : 'utf_32_le',
  409. # utf_7 codec
  410. 'u7' : 'utf_7',
  411. 'utf7' : 'utf_7',
  412. 'unicode_1_1_utf_7' : 'utf_7',
  413. # utf_8 codec
  414. 'u8' : 'utf_8',
  415. 'utf' : 'utf_8',
  416. 'utf8' : 'utf_8',
  417. 'utf8_ucs2' : 'utf_8',
  418. 'utf8_ucs4' : 'utf_8',
  419. # uu_codec codec
  420. 'uu' : 'uu_codec',
  421. # zlib_codec codec
  422. 'zip' : 'zlib_codec',
  423. 'zlib' : 'zlib_codec',
  424. }