download.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906
  1. from __future__ import absolute_import
  2. import cgi
  3. import email.utils
  4. import getpass
  5. import json
  6. import logging
  7. import mimetypes
  8. import os
  9. import platform
  10. import re
  11. import shutil
  12. import sys
  13. import tempfile
  14. try:
  15. import ssl # noqa
  16. HAS_TLS = True
  17. except ImportError:
  18. HAS_TLS = False
  19. from pip._vendor.six.moves.urllib import parse as urllib_parse
  20. from pip._vendor.six.moves.urllib import request as urllib_request
  21. import pip
  22. from pip.exceptions import InstallationError, HashMismatch
  23. from pip.models import PyPI
  24. from pip.utils import (splitext, rmtree, format_size, display_path,
  25. backup_dir, ask_path_exists, unpack_file,
  26. ARCHIVE_EXTENSIONS, consume, call_subprocess)
  27. from pip.utils.encoding import auto_decode
  28. from pip.utils.filesystem import check_path_owner
  29. from pip.utils.logging import indent_log
  30. from pip.utils.setuptools_build import SETUPTOOLS_SHIM
  31. from pip.utils.glibc import libc_ver
  32. from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner
  33. from pip.locations import write_delete_marker_file
  34. from pip.vcs import vcs
  35. from pip._vendor import requests, six
  36. from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
  37. from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
  38. from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
  39. from pip._vendor.requests.utils import get_netrc_auth
  40. from pip._vendor.requests.structures import CaseInsensitiveDict
  41. from pip._vendor.requests.packages import urllib3
  42. from pip._vendor.cachecontrol import CacheControlAdapter
  43. from pip._vendor.cachecontrol.caches import FileCache
  44. from pip._vendor.lockfile import LockError
  45. from pip._vendor.six.moves import xmlrpc_client
  46. __all__ = ['get_file_content',
  47. 'is_url', 'url_to_path', 'path_to_url',
  48. 'is_archive_file', 'unpack_vcs_link',
  49. 'unpack_file_url', 'is_vcs_url', 'is_file_url',
  50. 'unpack_http_url', 'unpack_url']
  51. logger = logging.getLogger(__name__)
  52. def user_agent():
  53. """
  54. Return a string representing the user agent.
  55. """
  56. data = {
  57. "installer": {"name": "pip", "version": pip.__version__},
  58. "python": platform.python_version(),
  59. "implementation": {
  60. "name": platform.python_implementation(),
  61. },
  62. }
  63. if data["implementation"]["name"] == 'CPython':
  64. data["implementation"]["version"] = platform.python_version()
  65. elif data["implementation"]["name"] == 'PyPy':
  66. if sys.pypy_version_info.releaselevel == 'final':
  67. pypy_version_info = sys.pypy_version_info[:3]
  68. else:
  69. pypy_version_info = sys.pypy_version_info
  70. data["implementation"]["version"] = ".".join(
  71. [str(x) for x in pypy_version_info]
  72. )
  73. elif data["implementation"]["name"] == 'Jython':
  74. # Complete Guess
  75. data["implementation"]["version"] = platform.python_version()
  76. elif data["implementation"]["name"] == 'IronPython':
  77. # Complete Guess
  78. data["implementation"]["version"] = platform.python_version()
  79. if sys.platform.startswith("linux"):
  80. from pip._vendor import distro
  81. distro_infos = dict(filter(
  82. lambda x: x[1],
  83. zip(["name", "version", "id"], distro.linux_distribution()),
  84. ))
  85. libc = dict(filter(
  86. lambda x: x[1],
  87. zip(["lib", "version"], libc_ver()),
  88. ))
  89. if libc:
  90. distro_infos["libc"] = libc
  91. if distro_infos:
  92. data["distro"] = distro_infos
  93. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  94. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  95. if platform.system():
  96. data.setdefault("system", {})["name"] = platform.system()
  97. if platform.release():
  98. data.setdefault("system", {})["release"] = platform.release()
  99. if platform.machine():
  100. data["cpu"] = platform.machine()
  101. # Python 2.6 doesn't have ssl.OPENSSL_VERSION.
  102. if HAS_TLS and sys.version_info[:2] > (2, 6):
  103. data["openssl_version"] = ssl.OPENSSL_VERSION
  104. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  105. data=data,
  106. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  107. )
  108. class MultiDomainBasicAuth(AuthBase):
  109. def __init__(self, prompting=True):
  110. self.prompting = prompting
  111. self.passwords = {}
  112. def __call__(self, req):
  113. parsed = urllib_parse.urlparse(req.url)
  114. # Get the netloc without any embedded credentials
  115. netloc = parsed.netloc.rsplit("@", 1)[-1]
  116. # Set the url of the request to the url without any credentials
  117. req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:])
  118. # Use any stored credentials that we have for this netloc
  119. username, password = self.passwords.get(netloc, (None, None))
  120. # Extract credentials embedded in the url if we have none stored
  121. if username is None:
  122. username, password = self.parse_credentials(parsed.netloc)
  123. # Get creds from netrc if we still don't have them
  124. if username is None and password is None:
  125. netrc_auth = get_netrc_auth(req.url)
  126. username, password = netrc_auth if netrc_auth else (None, None)
  127. if username or password:
  128. # Store the username and password
  129. self.passwords[netloc] = (username, password)
  130. # Send the basic auth with this request
  131. req = HTTPBasicAuth(username or "", password or "")(req)
  132. # Attach a hook to handle 401 responses
  133. req.register_hook("response", self.handle_401)
  134. return req
  135. def handle_401(self, resp, **kwargs):
  136. # We only care about 401 responses, anything else we want to just
  137. # pass through the actual response
  138. if resp.status_code != 401:
  139. return resp
  140. # We are not able to prompt the user so simply return the response
  141. if not self.prompting:
  142. return resp
  143. parsed = urllib_parse.urlparse(resp.url)
  144. # Prompt the user for a new username and password
  145. username = six.moves.input("User for %s: " % parsed.netloc)
  146. password = getpass.getpass("Password: ")
  147. # Store the new username and password to use for future requests
  148. if username or password:
  149. self.passwords[parsed.netloc] = (username, password)
  150. # Consume content and release the original connection to allow our new
  151. # request to reuse the same one.
  152. resp.content
  153. resp.raw.release_conn()
  154. # Add our new username and password to the request
  155. req = HTTPBasicAuth(username or "", password or "")(resp.request)
  156. # Send our new request
  157. new_resp = resp.connection.send(req, **kwargs)
  158. new_resp.history.append(resp)
  159. return new_resp
  160. def parse_credentials(self, netloc):
  161. if "@" in netloc:
  162. userinfo = netloc.rsplit("@", 1)[0]
  163. if ":" in userinfo:
  164. return userinfo.split(":", 1)
  165. return userinfo, None
  166. return None, None
  167. class LocalFSAdapter(BaseAdapter):
  168. def send(self, request, stream=None, timeout=None, verify=None, cert=None,
  169. proxies=None):
  170. pathname = url_to_path(request.url)
  171. resp = Response()
  172. resp.status_code = 200
  173. resp.url = request.url
  174. try:
  175. stats = os.stat(pathname)
  176. except OSError as exc:
  177. resp.status_code = 404
  178. resp.raw = exc
  179. else:
  180. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  181. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  182. resp.headers = CaseInsensitiveDict({
  183. "Content-Type": content_type,
  184. "Content-Length": stats.st_size,
  185. "Last-Modified": modified,
  186. })
  187. resp.raw = open(pathname, "rb")
  188. resp.close = resp.raw.close
  189. return resp
  190. def close(self):
  191. pass
  192. class SafeFileCache(FileCache):
  193. """
  194. A file based cache which is safe to use even when the target directory may
  195. not be accessible or writable.
  196. """
  197. def __init__(self, *args, **kwargs):
  198. super(SafeFileCache, self).__init__(*args, **kwargs)
  199. # Check to ensure that the directory containing our cache directory
  200. # is owned by the user current executing pip. If it does not exist
  201. # we will check the parent directory until we find one that does exist.
  202. # If it is not owned by the user executing pip then we will disable
  203. # the cache and log a warning.
  204. if not check_path_owner(self.directory):
  205. logger.warning(
  206. "The directory '%s' or its parent directory is not owned by "
  207. "the current user and the cache has been disabled. Please "
  208. "check the permissions and owner of that directory. If "
  209. "executing pip with sudo, you may want sudo's -H flag.",
  210. self.directory,
  211. )
  212. # Set our directory to None to disable the Cache
  213. self.directory = None
  214. def get(self, *args, **kwargs):
  215. # If we don't have a directory, then the cache should be a no-op.
  216. if self.directory is None:
  217. return
  218. try:
  219. return super(SafeFileCache, self).get(*args, **kwargs)
  220. except (LockError, OSError, IOError):
  221. # We intentionally silence this error, if we can't access the cache
  222. # then we can just skip caching and process the request as if
  223. # caching wasn't enabled.
  224. pass
  225. def set(self, *args, **kwargs):
  226. # If we don't have a directory, then the cache should be a no-op.
  227. if self.directory is None:
  228. return
  229. try:
  230. return super(SafeFileCache, self).set(*args, **kwargs)
  231. except (LockError, OSError, IOError):
  232. # We intentionally silence this error, if we can't access the cache
  233. # then we can just skip caching and process the request as if
  234. # caching wasn't enabled.
  235. pass
  236. def delete(self, *args, **kwargs):
  237. # If we don't have a directory, then the cache should be a no-op.
  238. if self.directory is None:
  239. return
  240. try:
  241. return super(SafeFileCache, self).delete(*args, **kwargs)
  242. except (LockError, OSError, IOError):
  243. # We intentionally silence this error, if we can't access the cache
  244. # then we can just skip caching and process the request as if
  245. # caching wasn't enabled.
  246. pass
  247. class InsecureHTTPAdapter(HTTPAdapter):
  248. def cert_verify(self, conn, url, verify, cert):
  249. conn.cert_reqs = 'CERT_NONE'
  250. conn.ca_certs = None
  251. class PipSession(requests.Session):
  252. timeout = None
  253. def __init__(self, *args, **kwargs):
  254. retries = kwargs.pop("retries", 0)
  255. cache = kwargs.pop("cache", None)
  256. insecure_hosts = kwargs.pop("insecure_hosts", [])
  257. super(PipSession, self).__init__(*args, **kwargs)
  258. # Attach our User Agent to the request
  259. self.headers["User-Agent"] = user_agent()
  260. # Attach our Authentication handler to the session
  261. self.auth = MultiDomainBasicAuth()
  262. # Create our urllib3.Retry instance which will allow us to customize
  263. # how we handle retries.
  264. retries = urllib3.Retry(
  265. # Set the total number of retries that a particular request can
  266. # have.
  267. total=retries,
  268. # A 503 error from PyPI typically means that the Fastly -> Origin
  269. # connection got interrupted in some way. A 503 error in general
  270. # is typically considered a transient error so we'll go ahead and
  271. # retry it.
  272. status_forcelist=[503],
  273. # Add a small amount of back off between failed requests in
  274. # order to prevent hammering the service.
  275. backoff_factor=0.25,
  276. )
  277. # We want to _only_ cache responses on securely fetched origins. We do
  278. # this because we can't validate the response of an insecurely fetched
  279. # origin, and we don't want someone to be able to poison the cache and
  280. # require manual eviction from the cache to fix it.
  281. if cache:
  282. secure_adapter = CacheControlAdapter(
  283. cache=SafeFileCache(cache, use_dir_lock=True),
  284. max_retries=retries,
  285. )
  286. else:
  287. secure_adapter = HTTPAdapter(max_retries=retries)
  288. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  289. # support caching (see above) so we'll use it for all http:// URLs as
  290. # well as any https:// host that we've marked as ignoring TLS errors
  291. # for.
  292. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  293. self.mount("https://", secure_adapter)
  294. self.mount("http://", insecure_adapter)
  295. # Enable file:// urls
  296. self.mount("file://", LocalFSAdapter())
  297. # We want to use a non-validating adapter for any requests which are
  298. # deemed insecure.
  299. for host in insecure_hosts:
  300. self.mount("https://{0}/".format(host), insecure_adapter)
  301. def request(self, method, url, *args, **kwargs):
  302. # Allow setting a default timeout on a session
  303. kwargs.setdefault("timeout", self.timeout)
  304. # Dispatch the actual request
  305. return super(PipSession, self).request(method, url, *args, **kwargs)
  306. def get_file_content(url, comes_from=None, session=None):
  307. """Gets the content of a file; it may be a filename, file: URL, or
  308. http: URL. Returns (location, content). Content is unicode."""
  309. if session is None:
  310. raise TypeError(
  311. "get_file_content() missing 1 required keyword argument: 'session'"
  312. )
  313. match = _scheme_re.search(url)
  314. if match:
  315. scheme = match.group(1).lower()
  316. if (scheme == 'file' and comes_from and
  317. comes_from.startswith('http')):
  318. raise InstallationError(
  319. 'Requirements file %s references URL %s, which is local'
  320. % (comes_from, url))
  321. if scheme == 'file':
  322. path = url.split(':', 1)[1]
  323. path = path.replace('\\', '/')
  324. match = _url_slash_drive_re.match(path)
  325. if match:
  326. path = match.group(1) + ':' + path.split('|', 1)[1]
  327. path = urllib_parse.unquote(path)
  328. if path.startswith('/'):
  329. path = '/' + path.lstrip('/')
  330. url = path
  331. else:
  332. # FIXME: catch some errors
  333. resp = session.get(url)
  334. resp.raise_for_status()
  335. return resp.url, resp.text
  336. try:
  337. with open(url, 'rb') as f:
  338. content = auto_decode(f.read())
  339. except IOError as exc:
  340. raise InstallationError(
  341. 'Could not open requirements file: %s' % str(exc)
  342. )
  343. return url, content
  344. _scheme_re = re.compile(r'^(http|https|file):', re.I)
  345. _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
  346. def is_url(name):
  347. """Returns true if the name looks like a URL"""
  348. if ':' not in name:
  349. return False
  350. scheme = name.split(':', 1)[0].lower()
  351. return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
  352. def url_to_path(url):
  353. """
  354. Convert a file: URL to a path.
  355. """
  356. assert url.startswith('file:'), (
  357. "You can only turn file: urls into filenames (not %r)" % url)
  358. _, netloc, path, _, _ = urllib_parse.urlsplit(url)
  359. # if we have a UNC path, prepend UNC share notation
  360. if netloc:
  361. netloc = '\\\\' + netloc
  362. path = urllib_request.url2pathname(netloc + path)
  363. return path
  364. def path_to_url(path):
  365. """
  366. Convert a path to a file: URL. The path will be made absolute and have
  367. quoted path parts.
  368. """
  369. path = os.path.normpath(os.path.abspath(path))
  370. url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
  371. return url
  372. def is_archive_file(name):
  373. """Return True if `name` is a considered as an archive file."""
  374. ext = splitext(name)[1].lower()
  375. if ext in ARCHIVE_EXTENSIONS:
  376. return True
  377. return False
  378. def unpack_vcs_link(link, location):
  379. vcs_backend = _get_used_vcs_backend(link)
  380. vcs_backend.unpack(location)
  381. def _get_used_vcs_backend(link):
  382. for backend in vcs.backends:
  383. if link.scheme in backend.schemes:
  384. vcs_backend = backend(link.url)
  385. return vcs_backend
  386. def is_vcs_url(link):
  387. return bool(_get_used_vcs_backend(link))
  388. def is_file_url(link):
  389. return link.url.lower().startswith('file:')
  390. def is_dir_url(link):
  391. """Return whether a file:// Link points to a directory.
  392. ``link`` must not have any other scheme but file://. Call is_file_url()
  393. first.
  394. """
  395. link_path = url_to_path(link.url_without_fragment)
  396. return os.path.isdir(link_path)
  397. def _progress_indicator(iterable, *args, **kwargs):
  398. return iterable
  399. def _download_url(resp, link, content_file, hashes):
  400. try:
  401. total_length = int(resp.headers['content-length'])
  402. except (ValueError, KeyError, TypeError):
  403. total_length = 0
  404. cached_resp = getattr(resp, "from_cache", False)
  405. if logger.getEffectiveLevel() > logging.INFO:
  406. show_progress = False
  407. elif cached_resp:
  408. show_progress = False
  409. elif total_length > (40 * 1000):
  410. show_progress = True
  411. elif not total_length:
  412. show_progress = True
  413. else:
  414. show_progress = False
  415. show_url = link.show_url
  416. def resp_read(chunk_size):
  417. try:
  418. # Special case for urllib3.
  419. for chunk in resp.raw.stream(
  420. chunk_size,
  421. # We use decode_content=False here because we don't
  422. # want urllib3 to mess with the raw bytes we get
  423. # from the server. If we decompress inside of
  424. # urllib3 then we cannot verify the checksum
  425. # because the checksum will be of the compressed
  426. # file. This breakage will only occur if the
  427. # server adds a Content-Encoding header, which
  428. # depends on how the server was configured:
  429. # - Some servers will notice that the file isn't a
  430. # compressible file and will leave the file alone
  431. # and with an empty Content-Encoding
  432. # - Some servers will notice that the file is
  433. # already compressed and will leave the file
  434. # alone and will add a Content-Encoding: gzip
  435. # header
  436. # - Some servers won't notice anything at all and
  437. # will take a file that's already been compressed
  438. # and compress it again and set the
  439. # Content-Encoding: gzip header
  440. #
  441. # By setting this not to decode automatically we
  442. # hope to eliminate problems with the second case.
  443. decode_content=False):
  444. yield chunk
  445. except AttributeError:
  446. # Standard file-like object.
  447. while True:
  448. chunk = resp.raw.read(chunk_size)
  449. if not chunk:
  450. break
  451. yield chunk
  452. def written_chunks(chunks):
  453. for chunk in chunks:
  454. content_file.write(chunk)
  455. yield chunk
  456. progress_indicator = _progress_indicator
  457. if link.netloc == PyPI.netloc:
  458. url = show_url
  459. else:
  460. url = link.url_without_fragment
  461. if show_progress: # We don't show progress on cached responses
  462. if total_length:
  463. logger.info("Downloading %s (%s)", url, format_size(total_length))
  464. progress_indicator = DownloadProgressBar(max=total_length).iter
  465. else:
  466. logger.info("Downloading %s", url)
  467. progress_indicator = DownloadProgressSpinner().iter
  468. elif cached_resp:
  469. logger.info("Using cached %s", url)
  470. else:
  471. logger.info("Downloading %s", url)
  472. logger.debug('Downloading from URL %s', link)
  473. downloaded_chunks = written_chunks(
  474. progress_indicator(
  475. resp_read(CONTENT_CHUNK_SIZE),
  476. CONTENT_CHUNK_SIZE
  477. )
  478. )
  479. if hashes:
  480. hashes.check_against_chunks(downloaded_chunks)
  481. else:
  482. consume(downloaded_chunks)
  483. def _copy_file(filename, location, link):
  484. copy = True
  485. download_location = os.path.join(location, link.filename)
  486. if os.path.exists(download_location):
  487. response = ask_path_exists(
  488. 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' %
  489. display_path(download_location), ('i', 'w', 'b', 'a'))
  490. if response == 'i':
  491. copy = False
  492. elif response == 'w':
  493. logger.warning('Deleting %s', display_path(download_location))
  494. os.remove(download_location)
  495. elif response == 'b':
  496. dest_file = backup_dir(download_location)
  497. logger.warning(
  498. 'Backing up %s to %s',
  499. display_path(download_location),
  500. display_path(dest_file),
  501. )
  502. shutil.move(download_location, dest_file)
  503. elif response == 'a':
  504. sys.exit(-1)
  505. if copy:
  506. shutil.copy(filename, download_location)
  507. logger.info('Saved %s', display_path(download_location))
  508. def unpack_http_url(link, location, download_dir=None,
  509. session=None, hashes=None):
  510. if session is None:
  511. raise TypeError(
  512. "unpack_http_url() missing 1 required keyword argument: 'session'"
  513. )
  514. temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
  515. # If a download dir is specified, is the file already downloaded there?
  516. already_downloaded_path = None
  517. if download_dir:
  518. already_downloaded_path = _check_download_dir(link,
  519. download_dir,
  520. hashes)
  521. if already_downloaded_path:
  522. from_path = already_downloaded_path
  523. content_type = mimetypes.guess_type(from_path)[0]
  524. else:
  525. # let's download to a tmp dir
  526. from_path, content_type = _download_http_url(link,
  527. session,
  528. temp_dir,
  529. hashes)
  530. # unpack the archive to the build dir location. even when only downloading
  531. # archives, they have to be unpacked to parse dependencies
  532. unpack_file(from_path, location, content_type, link)
  533. # a download dir is specified; let's copy the archive there
  534. if download_dir and not already_downloaded_path:
  535. _copy_file(from_path, download_dir, link)
  536. if not already_downloaded_path:
  537. os.unlink(from_path)
  538. rmtree(temp_dir)
  539. def unpack_file_url(link, location, download_dir=None, hashes=None):
  540. """Unpack link into location.
  541. If download_dir is provided and link points to a file, make a copy
  542. of the link file inside download_dir.
  543. """
  544. link_path = url_to_path(link.url_without_fragment)
  545. # If it's a url to a local directory
  546. if is_dir_url(link):
  547. if os.path.isdir(location):
  548. rmtree(location)
  549. shutil.copytree(link_path, location, symlinks=True)
  550. if download_dir:
  551. logger.info('Link is a directory, ignoring download_dir')
  552. return
  553. # If --require-hashes is off, `hashes` is either empty, the
  554. # link's embedded hash, or MissingHashes; it is required to
  555. # match. If --require-hashes is on, we are satisfied by any
  556. # hash in `hashes` matching: a URL-based or an option-based
  557. # one; no internet-sourced hash will be in `hashes`.
  558. if hashes:
  559. hashes.check_against_path(link_path)
  560. # If a download dir is specified, is the file already there and valid?
  561. already_downloaded_path = None
  562. if download_dir:
  563. already_downloaded_path = _check_download_dir(link,
  564. download_dir,
  565. hashes)
  566. if already_downloaded_path:
  567. from_path = already_downloaded_path
  568. else:
  569. from_path = link_path
  570. content_type = mimetypes.guess_type(from_path)[0]
  571. # unpack the archive to the build dir location. even when only downloading
  572. # archives, they have to be unpacked to parse dependencies
  573. unpack_file(from_path, location, content_type, link)
  574. # a download dir is specified and not already downloaded
  575. if download_dir and not already_downloaded_path:
  576. _copy_file(from_path, download_dir, link)
  577. def _copy_dist_from_dir(link_path, location):
  578. """Copy distribution files in `link_path` to `location`.
  579. Invoked when user requests to install a local directory. E.g.:
  580. pip install .
  581. pip install ~/dev/git-repos/python-prompt-toolkit
  582. """
  583. # Note: This is currently VERY SLOW if you have a lot of data in the
  584. # directory, because it copies everything with `shutil.copytree`.
  585. # What it should really do is build an sdist and install that.
  586. # See https://github.com/pypa/pip/issues/2195
  587. if os.path.isdir(location):
  588. rmtree(location)
  589. # build an sdist
  590. setup_py = 'setup.py'
  591. sdist_args = [sys.executable]
  592. sdist_args.append('-c')
  593. sdist_args.append(SETUPTOOLS_SHIM % setup_py)
  594. sdist_args.append('sdist')
  595. sdist_args += ['--dist-dir', location]
  596. logger.info('Running setup.py sdist for %s', link_path)
  597. with indent_log():
  598. call_subprocess(sdist_args, cwd=link_path, show_stdout=False)
  599. # unpack sdist into `location`
  600. sdist = os.path.join(location, os.listdir(location)[0])
  601. logger.info('Unpacking sdist %s into %s', sdist, location)
  602. unpack_file(sdist, location, content_type=None, link=None)
  603. class PipXmlrpcTransport(xmlrpc_client.Transport):
  604. """Provide a `xmlrpclib.Transport` implementation via a `PipSession`
  605. object.
  606. """
  607. def __init__(self, index_url, session, use_datetime=False):
  608. xmlrpc_client.Transport.__init__(self, use_datetime)
  609. index_parts = urllib_parse.urlparse(index_url)
  610. self._scheme = index_parts.scheme
  611. self._session = session
  612. def request(self, host, handler, request_body, verbose=False):
  613. parts = (self._scheme, host, handler, None, None, None)
  614. url = urllib_parse.urlunparse(parts)
  615. try:
  616. headers = {'Content-Type': 'text/xml'}
  617. response = self._session.post(url, data=request_body,
  618. headers=headers, stream=True)
  619. response.raise_for_status()
  620. self.verbose = verbose
  621. return self.parse_response(response.raw)
  622. except requests.HTTPError as exc:
  623. logger.critical(
  624. "HTTP error %s while getting %s",
  625. exc.response.status_code, url,
  626. )
  627. raise
  628. def unpack_url(link, location, download_dir=None,
  629. only_download=False, session=None, hashes=None):
  630. """Unpack link.
  631. If link is a VCS link:
  632. if only_download, export into download_dir and ignore location
  633. else unpack into location
  634. for other types of link:
  635. - unpack into location
  636. - if download_dir, copy the file into download_dir
  637. - if only_download, mark location for deletion
  638. :param hashes: A Hashes object, one of whose embedded hashes must match,
  639. or HashMismatch will be raised. If the Hashes is empty, no matches are
  640. required, and unhashable types of requirements (like VCS ones, which
  641. would ordinarily raise HashUnsupported) are allowed.
  642. """
  643. # non-editable vcs urls
  644. if is_vcs_url(link):
  645. unpack_vcs_link(link, location)
  646. # file urls
  647. elif is_file_url(link):
  648. unpack_file_url(link, location, download_dir, hashes=hashes)
  649. # http urls
  650. else:
  651. if session is None:
  652. session = PipSession()
  653. unpack_http_url(
  654. link,
  655. location,
  656. download_dir,
  657. session,
  658. hashes=hashes
  659. )
  660. if only_download:
  661. write_delete_marker_file(location)
  662. def _download_http_url(link, session, temp_dir, hashes):
  663. """Download link url into temp_dir using provided session"""
  664. target_url = link.url.split('#', 1)[0]
  665. try:
  666. resp = session.get(
  667. target_url,
  668. # We use Accept-Encoding: identity here because requests
  669. # defaults to accepting compressed responses. This breaks in
  670. # a variety of ways depending on how the server is configured.
  671. # - Some servers will notice that the file isn't a compressible
  672. # file and will leave the file alone and with an empty
  673. # Content-Encoding
  674. # - Some servers will notice that the file is already
  675. # compressed and will leave the file alone and will add a
  676. # Content-Encoding: gzip header
  677. # - Some servers won't notice anything at all and will take
  678. # a file that's already been compressed and compress it again
  679. # and set the Content-Encoding: gzip header
  680. # By setting this to request only the identity encoding We're
  681. # hoping to eliminate the third case. Hopefully there does not
  682. # exist a server which when given a file will notice it is
  683. # already compressed and that you're not asking for a
  684. # compressed file and will then decompress it before sending
  685. # because if that's the case I don't think it'll ever be
  686. # possible to make this work.
  687. headers={"Accept-Encoding": "identity"},
  688. stream=True,
  689. )
  690. resp.raise_for_status()
  691. except requests.HTTPError as exc:
  692. logger.critical(
  693. "HTTP error %s while getting %s", exc.response.status_code, link,
  694. )
  695. raise
  696. content_type = resp.headers.get('content-type', '')
  697. filename = link.filename # fallback
  698. # Have a look at the Content-Disposition header for a better guess
  699. content_disposition = resp.headers.get('content-disposition')
  700. if content_disposition:
  701. type, params = cgi.parse_header(content_disposition)
  702. # We use ``or`` here because we don't want to use an "empty" value
  703. # from the filename param.
  704. filename = params.get('filename') or filename
  705. ext = splitext(filename)[1]
  706. if not ext:
  707. ext = mimetypes.guess_extension(content_type)
  708. if ext:
  709. filename += ext
  710. if not ext and link.url != resp.url:
  711. ext = os.path.splitext(resp.url)[1]
  712. if ext:
  713. filename += ext
  714. file_path = os.path.join(temp_dir, filename)
  715. with open(file_path, 'wb') as content_file:
  716. _download_url(resp, link, content_file, hashes)
  717. return file_path, content_type
  718. def _check_download_dir(link, download_dir, hashes):
  719. """ Check download_dir for previously downloaded file with correct hash
  720. If a correct file is found return its path else None
  721. """
  722. download_path = os.path.join(download_dir, link.filename)
  723. if os.path.exists(download_path):
  724. # If already downloaded, does its hash match?
  725. logger.info('File was already downloaded %s', download_path)
  726. if hashes:
  727. try:
  728. hashes.check_against_path(download_path)
  729. except HashMismatch:
  730. logger.warning(
  731. 'Previously-downloaded file %s has bad hash. '
  732. 'Re-downloading.',
  733. download_path
  734. )
  735. os.unlink(download_path)
  736. return None
  737. return download_path
  738. return None