utf_16.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. """ Python 'utf-16' Codec
  2. Written by Marc-Andre Lemburg (mal@lemburg.com).
  3. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  4. """
  5. import codecs, sys
  6. ### Codec APIs
  7. encode = codecs.utf_16_encode
  8. def decode(input, errors='strict'):
  9. return codecs.utf_16_decode(input, errors, True)
  10. class IncrementalEncoder(codecs.IncrementalEncoder):
  11. def __init__(self, errors='strict'):
  12. codecs.IncrementalEncoder.__init__(self, errors)
  13. self.encoder = None
  14. def encode(self, input, final=False):
  15. if self.encoder is None:
  16. result = codecs.utf_16_encode(input, self.errors)[0]
  17. if sys.byteorder == 'little':
  18. self.encoder = codecs.utf_16_le_encode
  19. else:
  20. self.encoder = codecs.utf_16_be_encode
  21. return result
  22. return self.encoder(input, self.errors)[0]
  23. def reset(self):
  24. codecs.IncrementalEncoder.reset(self)
  25. self.encoder = None
  26. def getstate(self):
  27. # state info we return to the caller:
  28. # 0: stream is in natural order for this platform
  29. # 2: endianness hasn't been determined yet
  30. # (we're never writing in unnatural order)
  31. return (2 if self.encoder is None else 0)
  32. def setstate(self, state):
  33. if state:
  34. self.encoder = None
  35. else:
  36. if sys.byteorder == 'little':
  37. self.encoder = codecs.utf_16_le_encode
  38. else:
  39. self.encoder = codecs.utf_16_be_encode
  40. class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
  41. def __init__(self, errors='strict'):
  42. codecs.BufferedIncrementalDecoder.__init__(self, errors)
  43. self.decoder = None
  44. def _buffer_decode(self, input, errors, final):
  45. if self.decoder is None:
  46. (output, consumed, byteorder) = \
  47. codecs.utf_16_ex_decode(input, errors, 0, final)
  48. if byteorder == -1:
  49. self.decoder = codecs.utf_16_le_decode
  50. elif byteorder == 1:
  51. self.decoder = codecs.utf_16_be_decode
  52. elif consumed >= 2:
  53. raise UnicodeError("UTF-16 stream does not start with BOM")
  54. return (output, consumed)
  55. return self.decoder(input, self.errors, final)
  56. def reset(self):
  57. codecs.BufferedIncrementalDecoder.reset(self)
  58. self.decoder = None
  59. class StreamWriter(codecs.StreamWriter):
  60. def __init__(self, stream, errors='strict'):
  61. codecs.StreamWriter.__init__(self, stream, errors)
  62. self.encoder = None
  63. def reset(self):
  64. codecs.StreamWriter.reset(self)
  65. self.encoder = None
  66. def encode(self, input, errors='strict'):
  67. if self.encoder is None:
  68. result = codecs.utf_16_encode(input, errors)
  69. if sys.byteorder == 'little':
  70. self.encoder = codecs.utf_16_le_encode
  71. else:
  72. self.encoder = codecs.utf_16_be_encode
  73. return result
  74. else:
  75. return self.encoder(input, errors)
  76. class StreamReader(codecs.StreamReader):
  77. def reset(self):
  78. codecs.StreamReader.reset(self)
  79. try:
  80. del self.decode
  81. except AttributeError:
  82. pass
  83. def decode(self, input, errors='strict'):
  84. (object, consumed, byteorder) = \
  85. codecs.utf_16_ex_decode(input, errors, 0, False)
  86. if byteorder == -1:
  87. self.decode = codecs.utf_16_le_decode
  88. elif byteorder == 1:
  89. self.decode = codecs.utf_16_be_decode
  90. elif consumed>=2:
  91. raise UnicodeError,"UTF-16 stream does not start with BOM"
  92. return (object, consumed)
  93. ### encodings module API
  94. def getregentry():
  95. return codecs.CodecInfo(
  96. name='utf-16',
  97. encode=encode,
  98. decode=decode,
  99. incrementalencoder=IncrementalEncoder,
  100. incrementaldecoder=IncrementalDecoder,
  101. streamreader=StreamReader,
  102. streamwriter=StreamWriter,
  103. )