install.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. """
  2. Operations on existing wheel files, including basic installation.
  3. """
  4. # XXX see patched pip to install
  5. import sys
  6. import warnings
  7. import os.path
  8. import re
  9. import zipfile
  10. import hashlib
  11. import csv
  12. import shutil
  13. try:
  14. _big_number = sys.maxsize
  15. except NameError:
  16. _big_number = sys.maxint
  17. from wheel.decorator import reify
  18. from wheel.util import (urlsafe_b64encode, from_json, urlsafe_b64decode,
  19. native, binary, HashingFile)
  20. from wheel import signatures
  21. from wheel.pkginfo import read_pkg_info_bytes
  22. from wheel.util import open_for_csv
  23. from .pep425tags import get_supported
  24. from .paths import get_install_paths
  25. # The next major version after this version of the 'wheel' tool:
  26. VERSION_TOO_HIGH = (1, 0)
  27. # Non-greedy matching of an optional build number may be too clever (more
  28. # invalid wheel filenames will match). Separate regex for .dist-info?
  29. WHEEL_INFO_RE = re.compile(
  30. r"""^(?P<namever>(?P<name>.+?)(-(?P<ver>\d.+?))?)
  31. ((-(?P<build>\d.*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)
  32. \.whl|\.dist-info)$""",
  33. re.VERBOSE).match
  34. def parse_version(version):
  35. """Use parse_version from pkg_resources or distutils as available."""
  36. global parse_version
  37. try:
  38. from pkg_resources import parse_version
  39. except ImportError:
  40. from distutils.version import LooseVersion as parse_version
  41. return parse_version(version)
  42. class BadWheelFile(ValueError):
  43. pass
  44. class WheelFile(object):
  45. """Parse wheel-specific attributes from a wheel (.whl) file and offer
  46. basic installation and verification support.
  47. WheelFile can be used to simply parse a wheel filename by avoiding the
  48. methods that require the actual file contents."""
  49. WHEEL_INFO = "WHEEL"
  50. RECORD = "RECORD"
  51. def __init__(self,
  52. filename,
  53. fp=None,
  54. append=False,
  55. context=get_supported):
  56. """
  57. :param fp: A seekable file-like object or None to open(filename).
  58. :param append: Open archive in append mode.
  59. :param context: Function returning list of supported tags. Wheels
  60. must have the same context to be sortable.
  61. """
  62. self.filename = filename
  63. self.fp = fp
  64. self.append = append
  65. self.context = context
  66. basename = os.path.basename(filename)
  67. self.parsed_filename = WHEEL_INFO_RE(basename)
  68. if not basename.endswith('.whl') or self.parsed_filename is None:
  69. raise BadWheelFile("Bad filename '%s'" % filename)
  70. def __repr__(self):
  71. return self.filename
  72. @property
  73. def distinfo_name(self):
  74. return "%s.dist-info" % self.parsed_filename.group('namever')
  75. @property
  76. def datadir_name(self):
  77. return "%s.data" % self.parsed_filename.group('namever')
  78. @property
  79. def record_name(self):
  80. return "%s/%s" % (self.distinfo_name, self.RECORD)
  81. @property
  82. def wheelinfo_name(self):
  83. return "%s/%s" % (self.distinfo_name, self.WHEEL_INFO)
  84. @property
  85. def tags(self):
  86. """A wheel file is compatible with the Cartesian product of the
  87. period-delimited tags in its filename.
  88. To choose a wheel file among several candidates having the same
  89. distribution version 'ver', an installer ranks each triple of
  90. (pyver, abi, plat) that its Python installation can run, sorting
  91. the wheels by the best-ranked tag it supports and then by their
  92. arity which is just len(list(compatibility_tags)).
  93. """
  94. tags = self.parsed_filename.groupdict()
  95. for pyver in tags['pyver'].split('.'):
  96. for abi in tags['abi'].split('.'):
  97. for plat in tags['plat'].split('.'):
  98. yield (pyver, abi, plat)
  99. compatibility_tags = tags
  100. @property
  101. def arity(self):
  102. """The number of compatibility tags the wheel declares."""
  103. return len(list(self.compatibility_tags))
  104. @property
  105. def rank(self):
  106. """
  107. Lowest index of any of this wheel's tags in self.context(), and the
  108. arity e.g. (0, 1)
  109. """
  110. return self.compatibility_rank(self.context())
  111. @property
  112. def compatible(self):
  113. return self.rank[0] != _big_number # bad API!
  114. # deprecated:
  115. def compatibility_rank(self, supported):
  116. """Rank the wheel against the supported tags. Smaller ranks are more
  117. compatible!
  118. :param supported: A list of compatibility tags that the current
  119. Python implemenation can run.
  120. """
  121. preferences = []
  122. for tag in self.compatibility_tags:
  123. try:
  124. preferences.append(supported.index(tag))
  125. # Tag not present
  126. except ValueError:
  127. pass
  128. if len(preferences):
  129. return (min(preferences), self.arity)
  130. return (_big_number, 0)
  131. # deprecated
  132. def supports_current_python(self, x):
  133. assert self.context == x, 'context mismatch'
  134. return self.compatible
  135. # Comparability.
  136. # Wheels are equal if they refer to the same file.
  137. # If two wheels are not equal, compare based on (in this order):
  138. # 1. Name
  139. # 2. Version
  140. # 3. Compatibility rank
  141. # 4. Filename (as a tiebreaker)
  142. @property
  143. def _sort_key(self):
  144. return (self.parsed_filename.group('name'),
  145. parse_version(self.parsed_filename.group('ver')),
  146. tuple(-x for x in self.rank),
  147. self.filename)
  148. def __eq__(self, other):
  149. return self.filename == other.filename
  150. def __ne__(self, other):
  151. return self.filename != other.filename
  152. def __lt__(self, other):
  153. if self.context != other.context:
  154. raise TypeError("{0}.context != {1}.context".format(self, other))
  155. return self._sort_key < other._sort_key
  156. # XXX prune
  157. sn = self.parsed_filename.group('name')
  158. on = other.parsed_filename.group('name')
  159. if sn != on:
  160. return sn < on
  161. sv = parse_version(self.parsed_filename.group('ver'))
  162. ov = parse_version(other.parsed_filename.group('ver'))
  163. if sv != ov:
  164. return sv < ov
  165. # Compatibility
  166. if self.context != other.context:
  167. raise TypeError("{0}.context != {1}.context".format(self, other))
  168. sc = self.rank
  169. oc = other.rank
  170. if sc != None and oc != None and sc != oc:
  171. # Smaller compatibility ranks are "better" than larger ones,
  172. # so we have to reverse the sense of the comparison here!
  173. return sc > oc
  174. elif sc == None and oc != None:
  175. return False
  176. return self.filename < other.filename
  177. def __gt__(self, other):
  178. return other < self
  179. def __le__(self, other):
  180. return self == other or self < other
  181. def __ge__(self, other):
  182. return self == other or other < self
  183. #
  184. # Methods using the file's contents:
  185. #
  186. @reify
  187. def zipfile(self):
  188. mode = "r"
  189. if self.append:
  190. mode = "a"
  191. vzf = VerifyingZipFile(self.fp if self.fp else self.filename, mode)
  192. if not self.append:
  193. self.verify(vzf)
  194. return vzf
  195. @reify
  196. def parsed_wheel_info(self):
  197. """Parse wheel metadata (the .data/WHEEL file)"""
  198. return read_pkg_info_bytes(self.zipfile.read(self.wheelinfo_name))
  199. def check_version(self):
  200. version = self.parsed_wheel_info['Wheel-Version']
  201. if tuple(map(int, version.split('.'))) >= VERSION_TOO_HIGH:
  202. raise ValueError("Wheel version is too high")
  203. @reify
  204. def install_paths(self):
  205. """
  206. Consult distutils to get the install paths for our dist. A dict with
  207. ('purelib', 'platlib', 'headers', 'scripts', 'data').
  208. We use the name from our filename as the dist name, which means headers
  209. could be installed in the wrong place if the filesystem-escaped name
  210. is different than the Name. Who cares?
  211. """
  212. name = self.parsed_filename.group('name')
  213. return get_install_paths(name)
  214. def install(self, force=False, overrides={}):
  215. """
  216. Install the wheel into site-packages.
  217. """
  218. # Utility to get the target directory for a particular key
  219. def get_path(key):
  220. return overrides.get(key) or self.install_paths[key]
  221. # The base target location is either purelib or platlib
  222. if self.parsed_wheel_info['Root-Is-Purelib'] == 'true':
  223. root = get_path('purelib')
  224. else:
  225. root = get_path('platlib')
  226. # Parse all the names in the archive
  227. name_trans = {}
  228. for info in self.zipfile.infolist():
  229. name = info.filename
  230. # Zip files can contain entries representing directories.
  231. # These end in a '/'.
  232. # We ignore these, as we create directories on demand.
  233. if name.endswith('/'):
  234. continue
  235. # Pathnames in a zipfile namelist are always /-separated.
  236. # In theory, paths could start with ./ or have other oddities
  237. # but this won't happen in practical cases of well-formed wheels.
  238. # We'll cover the simple case of an initial './' as it's both easy
  239. # to do and more common than most other oddities.
  240. if name.startswith('./'):
  241. name = name[2:]
  242. # Split off the base directory to identify files that are to be
  243. # installed in non-root locations
  244. basedir, sep, filename = name.partition('/')
  245. if sep and basedir == self.datadir_name:
  246. # Data file. Target destination is elsewhere
  247. key, sep, filename = filename.partition('/')
  248. if not sep:
  249. raise ValueError("Invalid filename in wheel: {0}".format(name))
  250. target = get_path(key)
  251. else:
  252. # Normal file. Target destination is root
  253. key = ''
  254. target = root
  255. filename = name
  256. # Map the actual filename from the zipfile to its intended target
  257. # directory and the pathname relative to that directory.
  258. dest = os.path.normpath(os.path.join(target, filename))
  259. name_trans[info] = (key, target, filename, dest)
  260. # We're now ready to start processing the actual install. The process
  261. # is as follows:
  262. # 1. Prechecks - is the wheel valid, is its declared architecture
  263. # OK, etc. [[Responsibility of the caller]]
  264. # 2. Overwrite check - do any of the files to be installed already
  265. # exist?
  266. # 3. Actual install - put the files in their target locations.
  267. # 4. Update RECORD - write a suitably modified RECORD file to
  268. # reflect the actual installed paths.
  269. if not force:
  270. for info, v in name_trans.items():
  271. k = info.filename
  272. key, target, filename, dest = v
  273. if os.path.exists(dest):
  274. raise ValueError("Wheel file {0} would overwrite {1}. Use force if this is intended".format(k, dest))
  275. # Get the name of our executable, for use when replacing script
  276. # wrapper hashbang lines.
  277. # We encode it using getfilesystemencoding, as that is "the name of
  278. # the encoding used to convert Unicode filenames into system file
  279. # names".
  280. exename = sys.executable.encode(sys.getfilesystemencoding())
  281. record_data = []
  282. record_name = self.distinfo_name + '/RECORD'
  283. for info, (key, target, filename, dest) in name_trans.items():
  284. name = info.filename
  285. source = self.zipfile.open(info)
  286. # Skip the RECORD file
  287. if name == record_name:
  288. continue
  289. ddir = os.path.dirname(dest)
  290. if not os.path.isdir(ddir):
  291. os.makedirs(ddir)
  292. destination = HashingFile(open(dest, 'wb'))
  293. if key == 'scripts':
  294. hashbang = source.readline()
  295. if hashbang.startswith(b'#!python'):
  296. hashbang = b'#!' + exename + binary(os.linesep)
  297. destination.write(hashbang)
  298. shutil.copyfileobj(source, destination)
  299. reldest = os.path.relpath(dest, root)
  300. reldest.replace(os.sep, '/')
  301. record_data.append((reldest, destination.digest(), destination.length))
  302. destination.close()
  303. source.close()
  304. # preserve attributes (especially +x bit for scripts)
  305. attrs = info.external_attr >> 16
  306. if attrs: # tends to be 0 if Windows.
  307. os.chmod(dest, info.external_attr >> 16)
  308. record_name = os.path.join(root, self.record_name)
  309. writer = csv.writer(open_for_csv(record_name, 'w+'))
  310. for reldest, digest, length in sorted(record_data):
  311. writer.writerow((reldest, digest, length))
  312. writer.writerow((self.record_name, '', ''))
  313. def verify(self, zipfile=None):
  314. """Configure the VerifyingZipFile `zipfile` by verifying its signature
  315. and setting expected hashes for every hash in RECORD.
  316. Caller must complete the verification process by completely reading
  317. every file in the archive (e.g. with extractall)."""
  318. sig = None
  319. if zipfile is None:
  320. zipfile = self.zipfile
  321. zipfile.strict = True
  322. record_name = '/'.join((self.distinfo_name, 'RECORD'))
  323. sig_name = '/'.join((self.distinfo_name, 'RECORD.jws'))
  324. # tolerate s/mime signatures:
  325. smime_sig_name = '/'.join((self.distinfo_name, 'RECORD.p7s'))
  326. zipfile.set_expected_hash(record_name, None)
  327. zipfile.set_expected_hash(sig_name, None)
  328. zipfile.set_expected_hash(smime_sig_name, None)
  329. record = zipfile.read(record_name)
  330. record_digest = urlsafe_b64encode(hashlib.sha256(record).digest())
  331. try:
  332. sig = from_json(native(zipfile.read(sig_name)))
  333. except KeyError: # no signature
  334. pass
  335. if sig:
  336. headers, payload = signatures.verify(sig)
  337. if payload['hash'] != "sha256=" + native(record_digest):
  338. msg = "RECORD.sig claimed RECORD hash {0} != computed hash {1}."
  339. raise BadWheelFile(msg.format(payload['hash'],
  340. native(record_digest)))
  341. reader = csv.reader((native(r) for r in record.splitlines()))
  342. for row in reader:
  343. filename = row[0]
  344. hash = row[1]
  345. if not hash:
  346. if filename not in (record_name, sig_name):
  347. sys.stderr.write("%s has no hash!\n" % filename)
  348. continue
  349. algo, data = row[1].split('=', 1)
  350. assert algo == "sha256", "Unsupported hash algorithm"
  351. zipfile.set_expected_hash(filename, urlsafe_b64decode(binary(data)))
  352. class VerifyingZipFile(zipfile.ZipFile):
  353. """ZipFile that can assert that each of its extracted contents matches
  354. an expected sha256 hash. Note that each file must be completly read in
  355. order for its hash to be checked."""
  356. def __init__(self, file, mode="r",
  357. compression=zipfile.ZIP_STORED,
  358. allowZip64=False):
  359. zipfile.ZipFile.__init__(self, file, mode, compression, allowZip64)
  360. self.strict = False
  361. self._expected_hashes = {}
  362. self._hash_algorithm = hashlib.sha256
  363. def set_expected_hash(self, name, hash):
  364. """
  365. :param name: name of zip entry
  366. :param hash: bytes of hash (or None for "don't care")
  367. """
  368. self._expected_hashes[name] = hash
  369. def open(self, name_or_info, mode="r", pwd=None):
  370. """Return file-like object for 'name'."""
  371. # A non-monkey-patched version would contain most of zipfile.py
  372. ef = zipfile.ZipFile.open(self, name_or_info, mode, pwd)
  373. if isinstance(name_or_info, zipfile.ZipInfo):
  374. name = name_or_info.filename
  375. else:
  376. name = name_or_info
  377. if (name in self._expected_hashes
  378. and self._expected_hashes[name] != None):
  379. expected_hash = self._expected_hashes[name]
  380. try:
  381. _update_crc_orig = ef._update_crc
  382. except AttributeError:
  383. warnings.warn('Need ZipExtFile._update_crc to implement '
  384. 'file hash verification (in Python >= 2.7)')
  385. return ef
  386. running_hash = self._hash_algorithm()
  387. if hasattr(ef, '_eof'): # py33
  388. def _update_crc(data):
  389. _update_crc_orig(data)
  390. running_hash.update(data)
  391. if ef._eof and running_hash.digest() != expected_hash:
  392. raise BadWheelFile("Bad hash for file %r" % ef.name)
  393. else:
  394. def _update_crc(data, eof=None):
  395. _update_crc_orig(data, eof=eof)
  396. running_hash.update(data)
  397. if eof and running_hash.digest() != expected_hash:
  398. raise BadWheelFile("Bad hash for file %r" % ef.name)
  399. ef._update_crc = _update_crc
  400. elif self.strict and name not in self._expected_hashes:
  401. raise BadWheelFile("No expected hash for file %r" % ef.name)
  402. return ef
  403. def pop(self):
  404. """Truncate the last file off this zipfile.
  405. Assumes infolist() is in the same order as the files (true for
  406. ordinary zip files created by Python)"""
  407. if not self.fp:
  408. raise RuntimeError(
  409. "Attempt to pop from ZIP archive that was already closed")
  410. last = self.infolist().pop()
  411. del self.NameToInfo[last.filename]
  412. self.fp.seek(last.header_offset, os.SEEK_SET)
  413. self.fp.truncate()
  414. self._didModify = True