first commit

This commit is contained in:
Victor
2023-11-16 16:57:13 +01:00
commit a8fb1fd811
1477 changed files with 275005 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
"""Contains purely network-related utilities.
"""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,69 @@
"""HTTP cache implementation.
"""
import os
from contextlib import contextmanager
from typing import Generator, Optional
from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.caches import FileCache
from pip._vendor.requests.models import Response
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
from pip._internal.utils.misc import ensure_dir
def is_from_cache(response: Response) -> bool:
return getattr(response, "from_cache", False)
@contextmanager
def suppressed_cache_errors() -> Generator[None, None, None]:
"""If we can't access the cache then we can just skip caching and process
requests as if caching wasn't enabled.
"""
try:
yield
except OSError:
pass
class SafeFileCache(BaseCache):
"""
A file based cache which is safe to use even when the target directory may
not be accessible or writable.
"""
def __init__(self, directory: str) -> None:
assert directory is not None, "Cache directory must not be None."
super().__init__()
self.directory = directory
def _get_cache_path(self, name: str) -> str:
# From cachecontrol.caches.file_cache.FileCache._fn, brought into our
# class for backwards-compatibility and to avoid using a non-public
# method.
hashed = FileCache.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key: str) -> Optional[bytes]:
path = self._get_cache_path(key)
with suppressed_cache_errors():
with open(path, "rb") as f:
return f.read()
def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None:
path = self._get_cache_path(key)
with suppressed_cache_errors():
ensure_dir(os.path.dirname(path))
with adjacent_tmp_file(path) as f:
f.write(value)
replace(f.name, path)
def delete(self, key: str) -> None:
path = self._get_cache_path(key)
with suppressed_cache_errors():
os.remove(path)

View File

@@ -0,0 +1,186 @@
"""Download files with progress indicators.
"""
import email.message
import logging
import mimetypes
import os
from typing import Iterable, Optional, Tuple
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._internal.cli.progress_bars import get_download_progress_renderer
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.index import PyPI
from pip._internal.models.link import Link
from pip._internal.network.cache import is_from_cache
from pip._internal.network.session import PipSession
from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks
from pip._internal.utils.misc import format_size, redact_auth_from_url, splitext
logger = logging.getLogger(__name__)
def _get_http_response_size(resp: Response) -> Optional[int]:
try:
return int(resp.headers["content-length"])
except (ValueError, KeyError, TypeError):
return None
def _prepare_download(
resp: Response,
link: Link,
progress_bar: str,
) -> Iterable[bytes]:
total_length = _get_http_response_size(resp)
if link.netloc == PyPI.file_storage_domain:
url = link.show_url
else:
url = link.url_without_fragment
logged_url = redact_auth_from_url(url)
if total_length:
logged_url = "{} ({})".format(logged_url, format_size(total_length))
if is_from_cache(resp):
logger.info("Using cached %s", logged_url)
else:
logger.info("Downloading %s", logged_url)
if logger.getEffectiveLevel() > logging.INFO:
show_progress = False
elif is_from_cache(resp):
show_progress = False
elif not total_length:
show_progress = True
elif total_length > (40 * 1000):
show_progress = True
else:
show_progress = False
chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
if not show_progress:
return chunks
renderer = get_download_progress_renderer(bar_type=progress_bar, size=total_length)
return renderer(chunks)
def sanitize_content_filename(filename: str) -> str:
"""
Sanitize the "filename" value from a Content-Disposition header.
"""
return os.path.basename(filename)
def parse_content_disposition(content_disposition: str, default_filename: str) -> str:
"""
Parse the "filename" value from a Content-Disposition header, and
return the default filename if the result is empty.
"""
m = email.message.Message()
m["content-type"] = content_disposition
filename = m.get_param("filename")
if filename:
# We need to sanitize the filename to prevent directory traversal
# in case the filename contains ".." path parts.
filename = sanitize_content_filename(str(filename))
return filename or default_filename
def _get_http_response_filename(resp: Response, link: Link) -> str:
"""Get an ideal filename from the given HTTP response, falling back to
the link filename if not provided.
"""
filename = link.filename # fallback
# Have a look at the Content-Disposition header for a better guess
content_disposition = resp.headers.get("content-disposition")
if content_disposition:
filename = parse_content_disposition(content_disposition, filename)
ext: Optional[str] = splitext(filename)[1]
if not ext:
ext = mimetypes.guess_extension(resp.headers.get("content-type", ""))
if ext:
filename += ext
if not ext and link.url != resp.url:
ext = os.path.splitext(resp.url)[1]
if ext:
filename += ext
return filename
def _http_get_download(session: PipSession, link: Link) -> Response:
target_url = link.url.split("#", 1)[0]
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
return resp
class Downloader:
def __init__(
self,
session: PipSession,
progress_bar: str,
) -> None:
self._session = session
self._progress_bar = progress_bar
def __call__(self, link: Link, location: str) -> Tuple[str, str]:
"""Download the file given by link into location."""
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s", e.response.status_code, link
)
raise
filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)
chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, "wb") as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get("Content-Type", "")
return filepath, content_type
class BatchDownloader:
def __init__(
self,
session: PipSession,
progress_bar: str,
) -> None:
self._session = session
self._progress_bar = progress_bar
def __call__(
self, links: Iterable[Link], location: str
) -> Iterable[Tuple[Link, Tuple[str, str]]]:
"""Download the files given by links into location."""
for link in links:
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s",
e.response.status_code,
link,
)
raise
filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)
chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, "wb") as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get("Content-Type", "")
yield link, (filepath, content_type)

View File

@@ -0,0 +1,210 @@
"""Lazy ZIP over HTTP"""
__all__ = ["HTTPRangeRequestUnsupported", "dist_from_wheel_url"]
from bisect import bisect_left, bisect_right
from contextlib import contextmanager
from tempfile import NamedTemporaryFile
from typing import Any, Dict, Generator, List, Optional, Tuple
from zipfile import BadZipFile, ZipFile
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._internal.metadata import BaseDistribution, MemoryWheel, get_wheel_distribution
from pip._internal.network.session import PipSession
from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks
class HTTPRangeRequestUnsupported(Exception):
pass
def dist_from_wheel_url(name: str, url: str, session: PipSession) -> BaseDistribution:
"""Return a distribution object from the given wheel URL.
This uses HTTP range requests to only fetch the portion of the wheel
containing metadata, just enough for the object to be constructed.
If such requests are not supported, HTTPRangeRequestUnsupported
is raised.
"""
with LazyZipOverHTTP(url, session) as zf:
# For read-only ZIP files, ZipFile only needs methods read,
# seek, seekable and tell, not the whole IO protocol.
wheel = MemoryWheel(zf.name, zf) # type: ignore
# After context manager exit, wheel.name
# is an invalid file by intention.
return get_wheel_distribution(wheel, canonicalize_name(name))
class LazyZipOverHTTP:
"""File-like object mapped to a ZIP file over HTTP.
This uses HTTP range requests to lazily fetch the file's content,
which is supposed to be fed to ZipFile. If such requests are not
supported by the server, raise HTTPRangeRequestUnsupported
during initialization.
"""
def __init__(
self, url: str, session: PipSession, chunk_size: int = CONTENT_CHUNK_SIZE
) -> None:
head = session.head(url, headers=HEADERS)
raise_for_status(head)
assert head.status_code == 200
self._session, self._url, self._chunk_size = session, url, chunk_size
self._length = int(head.headers["Content-Length"])
self._file = NamedTemporaryFile()
self.truncate(self._length)
self._left: List[int] = []
self._right: List[int] = []
if "bytes" not in head.headers.get("Accept-Ranges", "none"):
raise HTTPRangeRequestUnsupported("range request is not supported")
self._check_zip()
@property
def mode(self) -> str:
"""Opening mode, which is always rb."""
return "rb"
@property
def name(self) -> str:
"""Path to the underlying file."""
return self._file.name
def seekable(self) -> bool:
"""Return whether random access is supported, which is True."""
return True
def close(self) -> None:
"""Close the file."""
self._file.close()
@property
def closed(self) -> bool:
"""Whether the file is closed."""
return self._file.closed
def read(self, size: int = -1) -> bytes:
"""Read up to size bytes from the object and return them.
As a convenience, if size is unspecified or -1,
all bytes until EOF are returned. Fewer than
size bytes may be returned if EOF is reached.
"""
download_size = max(size, self._chunk_size)
start, length = self.tell(), self._length
stop = length if size < 0 else min(start + download_size, length)
start = max(0, stop - download_size)
self._download(start, stop - 1)
return self._file.read(size)
def readable(self) -> bool:
"""Return whether the file is readable, which is True."""
return True
def seek(self, offset: int, whence: int = 0) -> int:
"""Change stream position and return the new absolute position.
Seek to offset relative position indicated by whence:
* 0: Start of stream (the default). pos should be >= 0;
* 1: Current position - pos may be negative;
* 2: End of stream - pos usually negative.
"""
return self._file.seek(offset, whence)
def tell(self) -> int:
"""Return the current position."""
return self._file.tell()
def truncate(self, size: Optional[int] = None) -> int:
"""Resize the stream to the given size in bytes.
If size is unspecified resize to the current position.
The current stream position isn't changed.
Return the new file size.
"""
return self._file.truncate(size)
def writable(self) -> bool:
"""Return False."""
return False
def __enter__(self) -> "LazyZipOverHTTP":
self._file.__enter__()
return self
def __exit__(self, *exc: Any) -> None:
self._file.__exit__(*exc)
@contextmanager
def _stay(self) -> Generator[None, None, None]:
"""Return a context manager keeping the position.
At the end of the block, seek back to original position.
"""
pos = self.tell()
try:
yield
finally:
self.seek(pos)
def _check_zip(self) -> None:
"""Check and download until the file is a valid ZIP."""
end = self._length - 1
for start in reversed(range(0, end, self._chunk_size)):
self._download(start, end)
with self._stay():
try:
# For read-only ZIP files, ZipFile only needs
# methods read, seek, seekable and tell.
ZipFile(self) # type: ignore
except BadZipFile:
pass
else:
break
def _stream_response(
self, start: int, end: int, base_headers: Dict[str, str] = HEADERS
) -> Response:
"""Return HTTP response to a range request from start to end."""
headers = base_headers.copy()
headers["Range"] = f"bytes={start}-{end}"
# TODO: Get range requests to be correctly cached
headers["Cache-Control"] = "no-cache"
return self._session.get(self._url, headers=headers, stream=True)
def _merge(
self, start: int, end: int, left: int, right: int
) -> Generator[Tuple[int, int], None, None]:
"""Return a generator of intervals to be fetched.
Args:
start (int): Start of needed interval
end (int): End of needed interval
left (int): Index of first overlapping downloaded data
right (int): Index after last overlapping downloaded data
"""
lslice, rslice = self._left[left:right], self._right[left:right]
i = start = min([start] + lslice[:1])
end = max([end] + rslice[-1:])
for j, k in zip(lslice, rslice):
if j > i:
yield i, j - 1
i = k + 1
if i <= end:
yield i, end
self._left[left:right], self._right[left:right] = [start], [end]
def _download(self, start: int, end: int) -> None:
"""Download bytes from start to end inclusively."""
with self._stay():
left = bisect_left(self._right, start)
right = bisect_right(self._left, end)
for start, end in self._merge(start, end, left, right):
response = self._stream_response(start, end)
response.raise_for_status()
self.seek(start)
for chunk in response_chunks(response, self._chunk_size):
self._file.write(chunk)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,96 @@
from typing import Dict, Generator
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._internal.exceptions import NetworkConnectionError
# The following comments and HTTP headers were originally added by
# Donald Stufft in git commit 22c562429a61bb77172039e480873fb239dd8c03.
#
# We use Accept-Encoding: identity here because requests defaults to
# accepting compressed responses. This breaks in a variety of ways
# depending on how the server is configured.
# - Some servers will notice that the file isn't a compressible file
# and will leave the file alone and with an empty Content-Encoding
# - Some servers will notice that the file is already compressed and
# will leave the file alone, adding a Content-Encoding: gzip header
# - Some servers won't notice anything at all and will take a file
# that's already been compressed and compress it again, and set
# the Content-Encoding: gzip header
# By setting this to request only the identity encoding we're hoping
# to eliminate the third case. Hopefully there does not exist a server
# which when given a file will notice it is already compressed and that
# you're not asking for a compressed file and will then decompress it
# before sending because if that's the case I don't think it'll ever be
# possible to make this work.
HEADERS: Dict[str, str] = {"Accept-Encoding": "identity"}
def raise_for_status(resp: Response) -> None:
http_error_msg = ""
if isinstance(resp.reason, bytes):
# We attempt to decode utf-8 first because some servers
# choose to localize their reason strings. If the string
# isn't utf-8, we fall back to iso-8859-1 for all other
# encodings.
try:
reason = resp.reason.decode("utf-8")
except UnicodeDecodeError:
reason = resp.reason.decode("iso-8859-1")
else:
reason = resp.reason
if 400 <= resp.status_code < 500:
http_error_msg = (
f"{resp.status_code} Client Error: {reason} for url: {resp.url}"
)
elif 500 <= resp.status_code < 600:
http_error_msg = (
f"{resp.status_code} Server Error: {reason} for url: {resp.url}"
)
if http_error_msg:
raise NetworkConnectionError(http_error_msg, response=resp)
def response_chunks(
response: Response, chunk_size: int = CONTENT_CHUNK_SIZE
) -> Generator[bytes, None, None]:
"""Given a requests Response, provide the data chunks."""
try:
# Special case for urllib3.
for chunk in response.raw.stream(
chunk_size,
# We use decode_content=False here because we don't
# want urllib3 to mess with the raw bytes we get
# from the server. If we decompress inside of
# urllib3 then we cannot verify the checksum
# because the checksum will be of the compressed
# file. This breakage will only occur if the
# server adds a Content-Encoding header, which
# depends on how the server was configured:
# - Some servers will notice that the file isn't a
# compressible file and will leave the file alone
# and with an empty Content-Encoding
# - Some servers will notice that the file is
# already compressed and will leave the file
# alone and will add a Content-Encoding: gzip
# header
# - Some servers won't notice anything at all and
# will take a file that's already been compressed
# and compress it again and set the
# Content-Encoding: gzip header
#
# By setting this not to decode automatically we
# hope to eliminate problems with the second case.
decode_content=False,
):
yield chunk
except AttributeError:
# Standard file-like object.
while True:
chunk = response.raw.read(chunk_size)
if not chunk:
break
yield chunk

View File

@@ -0,0 +1,60 @@
"""xmlrpclib.Transport implementation
"""
import logging
import urllib.parse
import xmlrpc.client
from typing import TYPE_CHECKING, Tuple
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
if TYPE_CHECKING:
from xmlrpc.client import _HostType, _Marshallable
logger = logging.getLogger(__name__)
class PipXmlrpcTransport(xmlrpc.client.Transport):
"""Provide a `xmlrpclib.Transport` implementation via a `PipSession`
object.
"""
def __init__(
self, index_url: str, session: PipSession, use_datetime: bool = False
) -> None:
super().__init__(use_datetime)
index_parts = urllib.parse.urlparse(index_url)
self._scheme = index_parts.scheme
self._session = session
def request(
self,
host: "_HostType",
handler: str,
request_body: bytes,
verbose: bool = False,
) -> Tuple["_Marshallable", ...]:
assert isinstance(host, str)
parts = (self._scheme, host, handler, None, None, None)
url = urllib.parse.urlunparse(parts)
try:
headers = {"Content-Type": "text/xml"}
response = self._session.post(
url,
data=request_body,
headers=headers,
stream=True,
)
raise_for_status(response)
self.verbose = verbose
return self.parse_response(response.raw)
except NetworkConnectionError as exc:
assert exc.response
logger.critical(
"HTTP error %s while getting %s",
exc.response.status_code,
url,
)
raise