mirror of
https://github.com/NomaDamas/k-skill.git
synced 2026-06-24 02:04:11 +00:00
Compare commits
No commits in common. "main" and "changeset-release/main" have entirely different histories.
main
...
changeset-
609 changed files with 213336 additions and 3993 deletions
247
.cache/python-test-venv/bin/Activate.ps1
Normal file
247
.cache/python-test-venv/bin/Activate.ps1
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||
70
.cache/python-test-venv/bin/activate
Normal file
70
.cache/python-test-venv/bin/activate
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# This file must be used with "source bin/activate" *from bash*
|
||||
# You cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# Call hash to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
hash -r 2> /dev/null
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
unset VIRTUAL_ENV_PROMPT
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
# on Windows, a path can contain colons and backslashes and has to be converted:
|
||||
if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
|
||||
# transform D:\path\to\venv to /d/path/to/venv on MSYS
|
||||
# and to /cygdrive/d/path/to/venv on Cygwin
|
||||
export VIRTUAL_ENV=$(cygpath /home/runner/work/k-skill/k-skill/.cache/python-test-venv)
|
||||
else
|
||||
# use the path as-is
|
||||
export VIRTUAL_ENV=/home/runner/work/k-skill/k-skill/.cache/python-test-venv
|
||||
fi
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/"bin":$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1='(python-test-venv) '"${PS1:-}"
|
||||
export PS1
|
||||
VIRTUAL_ENV_PROMPT='(python-test-venv) '
|
||||
export VIRTUAL_ENV_PROMPT
|
||||
fi
|
||||
|
||||
# Call hash to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
hash -r 2> /dev/null
|
||||
27
.cache/python-test-venv/bin/activate.csh
Normal file
27
.cache/python-test-venv/bin/activate.csh
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# This file must be used with "source bin/activate.csh" *from csh*.
|
||||
# You cannot run it directly.
|
||||
|
||||
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
||||
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
||||
|
||||
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
setenv VIRTUAL_ENV /home/runner/work/k-skill/k-skill/.cache/python-test-venv
|
||||
|
||||
set _OLD_VIRTUAL_PATH="$PATH"
|
||||
setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
|
||||
|
||||
|
||||
set _OLD_VIRTUAL_PROMPT="$prompt"
|
||||
|
||||
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
||||
set prompt = '(python-test-venv) '"$prompt"
|
||||
setenv VIRTUAL_ENV_PROMPT '(python-test-venv) '
|
||||
endif
|
||||
|
||||
alias pydoc python -m pydoc
|
||||
|
||||
rehash
|
||||
69
.cache/python-test-venv/bin/activate.fish
Normal file
69
.cache/python-test-venv/bin/activate.fish
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
||||
# (https://fishshell.com/). You cannot run it directly.
|
||||
|
||||
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
||||
# reset old environment variables
|
||||
if test -n "$_OLD_VIRTUAL_PATH"
|
||||
set -gx PATH $_OLD_VIRTUAL_PATH
|
||||
set -e _OLD_VIRTUAL_PATH
|
||||
end
|
||||
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
||||
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
||||
set -e _OLD_VIRTUAL_PYTHONHOME
|
||||
end
|
||||
|
||||
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
||||
set -e _OLD_FISH_PROMPT_OVERRIDE
|
||||
# prevents error when using nested fish instances (Issue #93858)
|
||||
if functions -q _old_fish_prompt
|
||||
functions -e fish_prompt
|
||||
functions -c _old_fish_prompt fish_prompt
|
||||
functions -e _old_fish_prompt
|
||||
end
|
||||
end
|
||||
|
||||
set -e VIRTUAL_ENV
|
||||
set -e VIRTUAL_ENV_PROMPT
|
||||
if test "$argv[1]" != "nondestructive"
|
||||
# Self-destruct!
|
||||
functions -e deactivate
|
||||
end
|
||||
end
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
set -gx VIRTUAL_ENV /home/runner/work/k-skill/k-skill/.cache/python-test-venv
|
||||
|
||||
set -gx _OLD_VIRTUAL_PATH $PATH
|
||||
set -gx PATH "$VIRTUAL_ENV/"bin $PATH
|
||||
|
||||
# Unset PYTHONHOME if set.
|
||||
if set -q PYTHONHOME
|
||||
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
||||
set -e PYTHONHOME
|
||||
end
|
||||
|
||||
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
||||
# fish uses a function instead of an env var to generate the prompt.
|
||||
|
||||
# Save the current fish_prompt function as the function _old_fish_prompt.
|
||||
functions -c fish_prompt _old_fish_prompt
|
||||
|
||||
# With the original prompt function renamed, we can override with our own.
|
||||
function fish_prompt
|
||||
# Save the return status of the last command.
|
||||
set -l old_status $status
|
||||
|
||||
# Output the venv prompt; color taken from the blue of the Python logo.
|
||||
printf "%s%s%s" (set_color 4B8BBE) '(python-test-venv) ' (set_color normal)
|
||||
|
||||
# Restore the return status of the previous command.
|
||||
echo "exit $old_status" | .
|
||||
# Output the original/"old" prompt.
|
||||
_old_fish_prompt
|
||||
end
|
||||
|
||||
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
||||
set -gx VIRTUAL_ENV_PROMPT '(python-test-venv) '
|
||||
end
|
||||
8
.cache/python-test-venv/bin/pip
Executable file
8
.cache/python-test-venv/bin/pip
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
8
.cache/python-test-venv/bin/pip3
Executable file
8
.cache/python-test-venv/bin/pip3
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
8
.cache/python-test-venv/bin/pip3.12
Executable file
8
.cache/python-test-venv/bin/pip3.12
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
1
.cache/python-test-venv/bin/python
Symbolic link
1
.cache/python-test-venv/bin/python
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
python3
|
||||
1
.cache/python-test-venv/bin/python3
Symbolic link
1
.cache/python-test-venv/bin/python3
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
/usr/bin/python3
|
||||
1
.cache/python-test-venv/bin/python3.12
Symbolic link
1
.cache/python-test-venv/bin/python3.12
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
python3
|
||||
|
|
@ -0,0 +1 @@
|
|||
pip
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: beautifulsoup4
|
||||
Version: 4.14.3
|
||||
Summary: Screen-scraping library
|
||||
Project-URL: Download, https://www.crummy.com/software/BeautifulSoup/bs4/download/
|
||||
Project-URL: Homepage, https://www.crummy.com/software/BeautifulSoup/bs4/
|
||||
Author-email: Leonard Richardson <leonardr@segfault.org>
|
||||
License: MIT License
|
||||
License-File: AUTHORS
|
||||
License-File: LICENSE
|
||||
Keywords: HTML,XML,parse,soup
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||
Classifier: Topic :: Text Processing :: Markup :: SGML
|
||||
Classifier: Topic :: Text Processing :: Markup :: XML
|
||||
Requires-Python: >=3.7.0
|
||||
Requires-Dist: soupsieve>=1.6.1
|
||||
Requires-Dist: typing-extensions>=4.0.0
|
||||
Provides-Extra: cchardet
|
||||
Requires-Dist: cchardet; extra == 'cchardet'
|
||||
Provides-Extra: chardet
|
||||
Requires-Dist: chardet; extra == 'chardet'
|
||||
Provides-Extra: charset-normalizer
|
||||
Requires-Dist: charset-normalizer; extra == 'charset-normalizer'
|
||||
Provides-Extra: html5lib
|
||||
Requires-Dist: html5lib; extra == 'html5lib'
|
||||
Provides-Extra: lxml
|
||||
Requires-Dist: lxml; extra == 'lxml'
|
||||
Description-Content-Type: text/markdown
|
||||
|
||||
Beautiful Soup is a library that makes it easy to scrape information
|
||||
from web pages. It sits atop an HTML or XML parser, providing Pythonic
|
||||
idioms for iterating, searching, and modifying the parse tree.
|
||||
|
||||
# Quick start
|
||||
|
||||
```
|
||||
>>> from bs4 import BeautifulSoup
|
||||
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
|
||||
>>> print(soup.prettify())
|
||||
<html>
|
||||
<body>
|
||||
<p>
|
||||
Some
|
||||
<b>
|
||||
bad
|
||||
<i>
|
||||
HTML
|
||||
</i>
|
||||
</b>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
>>> soup.find(string="bad")
|
||||
'bad'
|
||||
>>> soup.i
|
||||
<i>HTML</i>
|
||||
#
|
||||
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
|
||||
#
|
||||
>>> print(soup.prettify())
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<tag1>
|
||||
Some
|
||||
<tag2/>
|
||||
bad
|
||||
<tag3>
|
||||
XML
|
||||
</tag3>
|
||||
</tag1>
|
||||
```
|
||||
|
||||
To go beyond the basics, [comprehensive documentation is available](https://www.crummy.com/software/BeautifulSoup/bs4/doc/).
|
||||
|
||||
# Links
|
||||
|
||||
* [Homepage](https://www.crummy.com/software/BeautifulSoup/bs4/)
|
||||
* [Documentation](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
||||
* [Discussion group](https://groups.google.com/group/beautifulsoup/)
|
||||
* [Development](https://code.launchpad.net/beautifulsoup/)
|
||||
* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
|
||||
* [Complete changelog](https://git.launchpad.net/beautifulsoup/tree/CHANGELOG)
|
||||
|
||||
# Note on Python 2 sunsetting
|
||||
|
||||
Beautiful Soup's support for Python 2 was discontinued on December 31,
|
||||
2020: one year after the sunset date for Python 2 itself. From this
|
||||
point onward, new Beautiful Soup development will exclusively target
|
||||
Python 3. The final release of Beautiful Soup 4 to support Python 2
|
||||
was 4.9.3.
|
||||
|
||||
# Supporting the project
|
||||
|
||||
If you use Beautiful Soup as part of your professional work, please consider a
|
||||
[Tidelift subscription](https://tidelift.com/subscription/pkg/pypi-beautifulsoup4?utm_source=pypi-beautifulsoup4&utm_medium=referral&utm_campaign=readme).
|
||||
This will support many of the free software projects your organization
|
||||
depends on, not just Beautiful Soup.
|
||||
|
||||
If you use Beautiful Soup for personal projects, the best way to say
|
||||
thank you is to read
|
||||
[Tool Safety](https://www.crummy.com/software/BeautifulSoup/zine/), a zine I
|
||||
wrote about what Beautiful Soup has taught me about software
|
||||
development.
|
||||
|
||||
# Building the documentation
|
||||
|
||||
The bs4/doc/ directory contains full documentation in Sphinx
|
||||
format. Run `make html` in that directory to create HTML
|
||||
documentation.
|
||||
|
||||
# Running the unit tests
|
||||
|
||||
Beautiful Soup supports unit test discovery using Pytest:
|
||||
|
||||
```
|
||||
$ pytest
|
||||
```
|
||||
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
beautifulsoup4-4.14.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
beautifulsoup4-4.14.3.dist-info/METADATA,sha256=Ac93vA8Xp9FtgOcKXFM8ESfVdztimUfJ3WUpVlhKtsY,3812
|
||||
beautifulsoup4-4.14.3.dist-info/RECORD,,
|
||||
beautifulsoup4-4.14.3.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
beautifulsoup4-4.14.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
||||
beautifulsoup4-4.14.3.dist-info/licenses/AUTHORS,sha256=uYkjiRjh_aweRnF8tAW2PpJJeickE68NmJwd9siry28,2201
|
||||
beautifulsoup4-4.14.3.dist-info/licenses/LICENSE,sha256=VbTY1LHlvIbRDvrJG3TIe8t3UmsPW57a-LnNKtxzl7I,1441
|
||||
bs4/__init__.py,sha256=E7wiVp7oQK0JhdAYxpehZa8drv3W_sJv5oeTFiBfR5o,44386
|
||||
bs4/__pycache__/__init__.cpython-312.pyc,,
|
||||
bs4/__pycache__/_deprecation.cpython-312.pyc,,
|
||||
bs4/__pycache__/_typing.cpython-312.pyc,,
|
||||
bs4/__pycache__/_warnings.cpython-312.pyc,,
|
||||
bs4/__pycache__/css.cpython-312.pyc,,
|
||||
bs4/__pycache__/dammit.cpython-312.pyc,,
|
||||
bs4/__pycache__/diagnose.cpython-312.pyc,,
|
||||
bs4/__pycache__/element.cpython-312.pyc,,
|
||||
bs4/__pycache__/exceptions.cpython-312.pyc,,
|
||||
bs4/__pycache__/filter.cpython-312.pyc,,
|
||||
bs4/__pycache__/formatter.cpython-312.pyc,,
|
||||
bs4/_deprecation.py,sha256=niHJCk37APg8KEuFOa57ZXaxLdBmc_2V6uuaJqu7r30,2408
|
||||
bs4/_typing.py,sha256=zNcx7R1yCTK8WwtumP28hc7CJ3pMyZXj_VAeYaNXMZA,7549
|
||||
bs4/_warnings.py,sha256=ZuOETgcnEbZgw2N0nnNXn6wvtrn2ut7AF0d98bvkMFc,4711
|
||||
bs4/builder/__init__.py,sha256=Rl4qjOXvdyyyjayOFqbkgoUoo81IgoyKD-RwWeVK59g,31194
|
||||
bs4/builder/__pycache__/__init__.cpython-312.pyc,,
|
||||
bs4/builder/__pycache__/_html5lib.cpython-312.pyc,,
|
||||
bs4/builder/__pycache__/_htmlparser.cpython-312.pyc,,
|
||||
bs4/builder/__pycache__/_lxml.cpython-312.pyc,,
|
||||
bs4/builder/_html5lib.py,sha256=hL6xUk4_I2i5CMguFoYFlrI26cY4Dut7fOEQrUctHIM,23607
|
||||
bs4/builder/_htmlparser.py,sha256=CnULPQV2rm4vLojJABpQ7Xm9diddnEZx2Wcz_VTC1Mg,17445
|
||||
bs4/builder/_lxml.py,sha256=ks1e8boA_nOA2oomAhxeudccR6ThbEE-EllFqHRoPLA,18969
|
||||
bs4/css.py,sha256=_m_l_4SGWHnY620VJ21j_qQH1RX3p91sYVemgKxaLsM,12713
|
||||
bs4/dammit.py,sha256=ZJWa9K32X6N2imFHleqUq0ekf592weU1lvULN_WYWYk,57024
|
||||
bs4/diagnose.py,sha256=at98iuxyOrqec4V8iwkTIbNUqBCsq9Lr3fDAQx2129Y,7846
|
||||
bs4/element.py,sha256=oXmj7LG_2NpsDK90mq73q0PMK0FjFBIGSeTTJLVwwTc,120237
|
||||
bs4/exceptions.py,sha256=Q9FOadNe8QRvzDMaKSXe2Wtl8JK_oAZW7mbFZBVP_GE,951
|
||||
bs4/filter.py,sha256=rw8ZNhTDLEJVCEiSifou5tZR_3zBLeuvAyouY82qU_E,29201
|
||||
bs4/formatter.py,sha256=uBT0k6W8O5kJ9PCuJYjra97yoUqC-dlM9D_v-oRM0r8,10478
|
||||
bs4/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
Wheel-Version: 1.0
|
||||
Generator: hatchling 1.27.0
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
Behold, mortal, the origins of Beautiful Soup...
|
||||
================================================
|
||||
|
||||
Leonard Richardson is the primary maintainer.
|
||||
|
||||
Aaron DeVore, Isaac Muse and Chris Papademetrious have made
|
||||
significant contributions to the code base.
|
||||
|
||||
Mark Pilgrim provided the encoding detection code that forms the base
|
||||
of UnicodeDammit.
|
||||
|
||||
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
|
||||
Soup 4 working under Python 3.
|
||||
|
||||
Simon Willison wrote soupselect, which was used to make Beautiful Soup
|
||||
support CSS selectors. Isaac Muse wrote SoupSieve, which made it
|
||||
possible to _remove_ the CSS selector code from Beautiful Soup.
|
||||
|
||||
Sam Ruby helped with a lot of edge cases.
|
||||
|
||||
Jonathan Ellis was awarded the prestigious Beau Potage D'Or for his
|
||||
work in solving the nestable tags conundrum.
|
||||
|
||||
An incomplete list of people have contributed patches to Beautiful
|
||||
Soup:
|
||||
|
||||
Istvan Albert, Andrew Lin, Anthony Baxter, Oliver Beattie, Andrew
|
||||
Boyko, Tony Chang, Francisco Canas, "Delong", Zephyr Fang, Fuzzy,
|
||||
Roman Gaufman, Yoni Gilad, Richie Hindle, Toshihiro Kamiya, Peteris
|
||||
Krumins, Kent Johnson, Marek Kapolka, Andreas Kostyrka, Roel Kramer,
|
||||
Ben Last, Robert Leftwich, Stefaan Lippens, "liquider", Staffan
|
||||
Malmgren, Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon",
|
||||
Ed Oskiewicz, Martijn Peters, Greg Phillips, Giles Radford, Stefano
|
||||
Revera, Arthur Rudolph, Marko Samastur, James Salter, Jouni Seppänen,
|
||||
Alexander Schmolck, Tim Shirley, Geoffrey Sneddon, Ville Skyttä,
|
||||
"Vikas", Jens Svalgaard, Andy Theyers, Eric Weiser, Glyn Webster, John
|
||||
Wiseman, Paul Wright, Danny Yoo
|
||||
|
||||
An incomplete list of people who made suggestions or found bugs or
|
||||
found ways to break Beautiful Soup:
|
||||
|
||||
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
|
||||
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
|
||||
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
|
||||
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
|
||||
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
|
||||
Summers, Dennis Sutch, Chris Smith, Aaron Swartz, Stuart
|
||||
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
|
||||
Sousa Rocha, Yichun Wei, Per Vognsen
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
Beautiful Soup is made available under the MIT license:
|
||||
|
||||
Copyright (c) Leonard Richardson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
Beautiful Soup incorporates code from the html5lib library, which is
|
||||
also made available under the MIT license. Copyright (c) James Graham
|
||||
and other contributors
|
||||
|
||||
Beautiful Soup has an optional dependency on the soupsieve library,
|
||||
which is also made available under the MIT license. Copyright (c)
|
||||
Isaac Muse
|
||||
1174
.cache/python-test-venv/lib/python3.12/site-packages/bs4/__init__.py
Normal file
1174
.cache/python-test-venv/lib/python3.12/site-packages/bs4/__init__.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,80 @@
|
|||
"""Helper functions for deprecation.
|
||||
|
||||
This interface is itself unstable and may change without warning. Do
|
||||
not use these functions yourself, even as a joke. The underscores are
|
||||
there for a reason. No support will be given.
|
||||
|
||||
In particular, most of this will go away without warning once
|
||||
Beautiful Soup drops support for Python 3.11, since Python 3.12
|
||||
defines a `@typing.deprecated()
|
||||
decorator. <https://peps.python.org/pep-0702/>`_
|
||||
"""
|
||||
|
||||
import functools
|
||||
import warnings
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
|
||||
def _deprecated_alias(old_name: str, new_name: str, version: str):
|
||||
"""Alias one attribute name to another for backward compatibility
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
|
||||
@property # type:ignore
|
||||
def alias(self) -> Any:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
f"Access to deprecated property {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return getattr(self, new_name)
|
||||
|
||||
@alias.setter
|
||||
def alias(self, value: str) -> None:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
f"Write to deprecated property {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return setattr(self, new_name, value)
|
||||
|
||||
return alias
|
||||
|
||||
|
||||
def _deprecated_function_alias(
|
||||
old_name: str, new_name: str, version: str
|
||||
) -> Callable[[Any], Any]:
|
||||
def alias(self, *args: Any, **kwargs: Any) -> Any:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
f"Call to deprecated method {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return getattr(self, new_name)(*args, **kwargs)
|
||||
|
||||
return alias
|
||||
|
||||
|
||||
def _deprecated(replaced_by: str, version: str) -> Callable:
|
||||
def deprecate(func: Callable) -> Callable:
|
||||
@functools.wraps(func)
|
||||
def with_warning(*args: Any, **kwargs: Any) -> Any:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
f"Call to deprecated method {func.__name__}. (Replaced by {replaced_by}) -- Deprecated since version {version}.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return with_warning
|
||||
|
||||
return deprecate
|
||||
|
|
@ -0,0 +1,205 @@
|
|||
# Custom type aliases used throughout Beautiful Soup to improve readability.
|
||||
|
||||
# Notes on improvements to the type system in newer versions of Python
|
||||
# that can be used once Beautiful Soup drops support for older
|
||||
# versions:
|
||||
#
|
||||
# * ClassVar can be put on class variables now.
|
||||
# * In 3.10, x|y is an accepted shorthand for Union[x,y].
|
||||
# * In 3.10, TypeAlias gains capabilities that can be used to
|
||||
# improve the tree matching types (I don't remember what, exactly).
|
||||
# * In 3.9 it's possible to specialize the re.Match type,
|
||||
# e.g. re.Match[str]. In 3.8 there's a typing.re namespace for this,
|
||||
# but it's removed in 3.12, so to support the widest possible set of
|
||||
# versions I'm not using it.
|
||||
|
||||
from typing_extensions import (
|
||||
runtime_checkable,
|
||||
Protocol,
|
||||
TypeAlias,
|
||||
)
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
IO,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Optional,
|
||||
Pattern,
|
||||
TYPE_CHECKING,
|
||||
Union,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4.element import (
|
||||
AttributeValueList,
|
||||
NamespacedAttribute,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ResultSet,
|
||||
Tag,
|
||||
)
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class _RegularExpressionProtocol(Protocol):
|
||||
"""A protocol object which can accept either Python's built-in
|
||||
`re.Pattern` objects, or the similar ``Regex`` objects defined by the
|
||||
third-party ``regex`` package.
|
||||
"""
|
||||
|
||||
def search(
|
||||
self, string: str, pos: int = ..., endpos: int = ...
|
||||
) -> Optional[Any]: ...
|
||||
|
||||
@property
|
||||
def pattern(self) -> str: ...
|
||||
|
||||
|
||||
# Aliases for markup in various stages of processing.
|
||||
#
|
||||
#: The rawest form of markup: either a string, bytestring, or an open filehandle.
|
||||
_IncomingMarkup: TypeAlias = Union[str, bytes, IO[str], IO[bytes]]
|
||||
|
||||
#: Markup that is in memory but has (potentially) yet to be converted
|
||||
#: to Unicode.
|
||||
_RawMarkup: TypeAlias = Union[str, bytes]
|
||||
|
||||
# Aliases for character encodings
|
||||
#
|
||||
|
||||
#: A data encoding.
|
||||
_Encoding: TypeAlias = str
|
||||
|
||||
#: One or more data encodings.
|
||||
_Encodings: TypeAlias = Iterable[_Encoding]
|
||||
|
||||
# Aliases for XML namespaces
|
||||
#
|
||||
|
||||
#: The prefix for an XML namespace.
|
||||
_NamespacePrefix: TypeAlias = str
|
||||
|
||||
#: The URL of an XML namespace
|
||||
_NamespaceURL: TypeAlias = str
|
||||
|
||||
#: A mapping of prefixes to namespace URLs.
|
||||
_NamespaceMapping: TypeAlias = Dict[_NamespacePrefix, _NamespaceURL]
|
||||
|
||||
#: A mapping of namespace URLs to prefixes
|
||||
_InvertedNamespaceMapping: TypeAlias = Dict[_NamespaceURL, _NamespacePrefix]
|
||||
|
||||
# Aliases for the attribute values associated with HTML/XML tags.
|
||||
#
|
||||
|
||||
#: The value associated with an HTML or XML attribute. This is the
|
||||
#: relatively unprocessed value Beautiful Soup expects to come from a
|
||||
#: `TreeBuilder`.
|
||||
_RawAttributeValue: TypeAlias = str
|
||||
|
||||
#: A dictionary of names to `_RawAttributeValue` objects. This is how
|
||||
#: Beautiful Soup expects a `TreeBuilder` to represent a tag's
|
||||
#: attribute values.
|
||||
_RawAttributeValues: TypeAlias = (
|
||||
"Mapping[Union[str, NamespacedAttribute], _RawAttributeValue]"
|
||||
)
|
||||
|
||||
#: An attribute value in its final form, as stored in the
|
||||
# `Tag` class, after it has been processed and (in some cases)
|
||||
# split into a list of strings.
|
||||
_AttributeValue: TypeAlias = Union[str, "AttributeValueList"]
|
||||
|
||||
#: A dictionary of names to :py:data:`_AttributeValue` objects. This is what
|
||||
#: a tag's attributes look like after processing.
|
||||
_AttributeValues: TypeAlias = Dict[str, _AttributeValue]
|
||||
|
||||
#: The methods that deal with turning :py:data:`_RawAttributeValue` into
|
||||
#: :py:data:`_AttributeValue` may be called several times, even after the values
|
||||
#: are already processed (e.g. when cloning a tag), so they need to
|
||||
#: be able to acommodate both possibilities.
|
||||
_RawOrProcessedAttributeValues: TypeAlias = Union[_RawAttributeValues, _AttributeValues]
|
||||
|
||||
#: A number of tree manipulation methods can take either a `PageElement` or a
|
||||
#: normal Python string (which will be converted to a `NavigableString`).
|
||||
_InsertableElement: TypeAlias = Union["PageElement", str]
|
||||
|
||||
# Aliases to represent the many possibilities for matching bits of a
|
||||
# parse tree.
|
||||
#
|
||||
# This is very complicated because we're applying a formal type system
|
||||
# to some very DWIM code. The types we end up with will be the types
|
||||
# of the arguments to the SoupStrainer constructor and (more
|
||||
# familiarly to Beautiful Soup users) the find* methods.
|
||||
|
||||
#: A function that takes a PageElement and returns a yes-or-no answer.
|
||||
_PageElementMatchFunction: TypeAlias = Callable[["PageElement"], bool]
|
||||
|
||||
#: A function that takes the raw parsed ingredients of a markup tag
|
||||
#: and returns a yes-or-no answer.
|
||||
# Not necessary at the moment.
|
||||
# _AllowTagCreationFunction:TypeAlias = Callable[[Optional[str], str, Optional[_RawAttributeValues]], bool]
|
||||
|
||||
#: A function that takes the raw parsed ingredients of a markup string node
|
||||
#: and returns a yes-or-no answer.
|
||||
# Not necessary at the moment.
|
||||
# _AllowStringCreationFunction:TypeAlias = Callable[[Optional[str]], bool]
|
||||
|
||||
#: A function that takes a `Tag` and returns a yes-or-no answer.
|
||||
#: A `TagNameMatchRule` expects this kind of function, if you're
|
||||
#: going to pass it a function.
|
||||
_TagMatchFunction: TypeAlias = Callable[["Tag"], bool]
|
||||
|
||||
#: A function that takes a string (or None) and returns a yes-or-no
|
||||
#: answer. An `AttributeValueMatchRule` expects this kind of function, if
|
||||
#: you're going to pass it a function.
|
||||
_NullableStringMatchFunction: TypeAlias = Callable[[Optional[str]], bool]
|
||||
|
||||
#: A function that takes a string and returns a yes-or-no answer. A
|
||||
# `StringMatchRule` expects this kind of function, if you're going to
|
||||
# pass it a function.
|
||||
_StringMatchFunction: TypeAlias = Callable[[str], bool]
|
||||
|
||||
#: Either a tag name, an attribute value or a string can be matched
|
||||
#: against a string, bytestring, regular expression, or a boolean.
|
||||
_BaseStrainable: TypeAlias = Union[str, bytes, Pattern[str], bool]
|
||||
|
||||
#: A tag can be matched either with the `_BaseStrainable` options, or
|
||||
#: using a function that takes the `Tag` as its sole argument.
|
||||
_BaseStrainableElement: TypeAlias = Union[_BaseStrainable, _TagMatchFunction]
|
||||
|
||||
#: A tag's attribute value can be matched either with the
|
||||
#: `_BaseStrainable` options, or using a function that takes that
|
||||
#: value as its sole argument.
|
||||
_BaseStrainableAttribute: TypeAlias = Union[_BaseStrainable, _NullableStringMatchFunction]
|
||||
|
||||
#: A tag can be matched using either a single criterion or a list of
|
||||
#: criteria.
|
||||
_StrainableElement: TypeAlias = Union[
|
||||
_BaseStrainableElement, Iterable[_BaseStrainableElement]
|
||||
]
|
||||
|
||||
#: An attribute value can be matched using either a single criterion
|
||||
#: or a list of criteria.
|
||||
_StrainableAttribute: TypeAlias = Union[
|
||||
_BaseStrainableAttribute, Iterable[_BaseStrainableAttribute]
|
||||
]
|
||||
|
||||
#: An string can be matched using the same techniques as
|
||||
#: an attribute value.
|
||||
_StrainableString: TypeAlias = _StrainableAttribute
|
||||
|
||||
#: A dictionary may be used to match against multiple attribute vlaues at once.
|
||||
_StrainableAttributes: TypeAlias = Dict[str, _StrainableAttribute]
|
||||
|
||||
#: Many Beautiful soup methods return a PageElement or an ResultSet of
|
||||
#: PageElements. A PageElement is either a Tag or a NavigableString.
|
||||
#: These convenience aliases make it easier for IDE users to see which methods
|
||||
#: are available on the objects they're dealing with.
|
||||
_OneElement: TypeAlias = Union["PageElement", "Tag", "NavigableString"]
|
||||
_AtMostOneElement: TypeAlias = Optional[_OneElement]
|
||||
_AtMostOneTag: TypeAlias = Optional["Tag"]
|
||||
_AtMostOneNavigableString: TypeAlias = Optional["NavigableString"]
|
||||
_QueryResults: TypeAlias = "ResultSet[_OneElement]"
|
||||
_SomeTags: TypeAlias = "ResultSet[Tag]"
|
||||
_SomeNavigableStrings: TypeAlias = "ResultSet[NavigableString]"
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
"""Define some custom warnings."""
|
||||
|
||||
|
||||
class GuessedAtParserWarning(UserWarning):
|
||||
"""The warning issued when BeautifulSoup has to guess what parser to
|
||||
use -- probably because no parser was specified in the constructor.
|
||||
"""
|
||||
|
||||
MESSAGE: str = """No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system ("%(parser)s"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
|
||||
|
||||
The code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features="%(parser)s"' to the BeautifulSoup constructor.
|
||||
"""
|
||||
|
||||
|
||||
class UnusualUsageWarning(UserWarning):
|
||||
"""A superclass for warnings issued when Beautiful Soup sees
|
||||
something that is typically the result of a mistake in the calling
|
||||
code, but might be intentional on the part of the user. If it is
|
||||
in fact intentional, you can filter the individual warning class
|
||||
to get rid of the warning. If you don't like Beautiful Soup
|
||||
second-guessing what you are doing, you can filter the
|
||||
UnusualUsageWarningclass itself and get rid of these entirely.
|
||||
"""
|
||||
|
||||
|
||||
class MarkupResemblesLocatorWarning(UnusualUsageWarning):
|
||||
"""The warning issued when BeautifulSoup is given 'markup' that
|
||||
actually looks like a resource locator -- a URL or a path to a file
|
||||
on disk.
|
||||
"""
|
||||
|
||||
#: :meta private:
|
||||
GENERIC_MESSAGE: str = """
|
||||
|
||||
However, if you want to parse some data that happens to look like a %(what)s, then nothing has gone wrong: you are using Beautiful Soup correctly, and this warning is spurious and can be filtered. To make this warning go away, run this code before calling the BeautifulSoup constructor:
|
||||
|
||||
from bs4 import MarkupResemblesLocatorWarning
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
|
||||
"""
|
||||
|
||||
URL_MESSAGE: str = (
|
||||
"""The input passed in on this line looks more like a URL than HTML or XML.
|
||||
|
||||
If you meant to use Beautiful Soup to parse the web page found at a certain URL, then something has gone wrong. You should use an Python package like 'requests' to fetch the content behind the URL. Once you have the content as a string, you can feed that string into Beautiful Soup."""
|
||||
+ GENERIC_MESSAGE
|
||||
)
|
||||
|
||||
FILENAME_MESSAGE: str = (
|
||||
"""The input passed in on this line looks more like a filename than HTML or XML.
|
||||
|
||||
If you meant to use Beautiful Soup to parse the contents of a file on disk, then something has gone wrong. You should open the file first, using code like this:
|
||||
|
||||
filehandle = open(your filename)
|
||||
|
||||
You can then feed the open filehandle into Beautiful Soup instead of using the filename."""
|
||||
+ GENERIC_MESSAGE
|
||||
)
|
||||
|
||||
|
||||
class AttributeResemblesVariableWarning(UnusualUsageWarning, SyntaxWarning):
|
||||
"""The warning issued when Beautiful Soup suspects a provided
|
||||
attribute name may actually be the misspelled name of a Beautiful
|
||||
Soup variable. Generally speaking, this is only used in cases like
|
||||
"_class" where it's very unlikely the user would be referencing an
|
||||
XML attribute with that name.
|
||||
"""
|
||||
|
||||
MESSAGE: str = """%(original)r is an unusual attribute name and is a common misspelling for %(autocorrect)r.
|
||||
|
||||
If you meant %(autocorrect)r, change your code to use it, and this warning will go away.
|
||||
|
||||
If you really did mean to check the %(original)r attribute, this warning is spurious and can be filtered. To make it go away, run this code before creating your BeautifulSoup object:
|
||||
|
||||
from bs4 import AttributeResemblesVariableWarning
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=AttributeResemblesVariableWarning)
|
||||
"""
|
||||
|
||||
|
||||
class XMLParsedAsHTMLWarning(UnusualUsageWarning):
|
||||
"""The warning issued when an HTML parser is used to parse
|
||||
XML that is not (as far as we can tell) XHTML.
|
||||
"""
|
||||
|
||||
MESSAGE: str = """It looks like you're using an HTML parser to parse an XML document.
|
||||
|
||||
Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.
|
||||
|
||||
If you want or need to use an HTML parser on this document, you can make this warning go away by filtering it. To do that, run this code before calling the BeautifulSoup constructor:
|
||||
|
||||
from bs4 import XMLParsedAsHTMLWarning
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
||||
"""
|
||||
|
|
@ -0,0 +1,848 @@
|
|||
from __future__ import annotations
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
from collections import defaultdict
|
||||
import re
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
Any,
|
||||
cast,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Pattern,
|
||||
Set,
|
||||
Tuple,
|
||||
Type,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
import warnings
|
||||
import sys
|
||||
from bs4.element import (
|
||||
AttributeDict,
|
||||
AttributeValueList,
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
RubyParenthesisString,
|
||||
RubyTextString,
|
||||
Stylesheet,
|
||||
Script,
|
||||
TemplateString,
|
||||
nonwhitespace_re,
|
||||
)
|
||||
|
||||
# Exceptions were moved to their own module in 4.13. Import here for
|
||||
# backwards compatibility.
|
||||
from bs4.exceptions import ParserRejectedMarkup
|
||||
|
||||
from bs4._typing import (
|
||||
_AttributeValues,
|
||||
_RawAttributeValue,
|
||||
)
|
||||
|
||||
from bs4._warnings import XMLParsedAsHTMLWarning
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
from bs4._typing import (
|
||||
_AttributeValue,
|
||||
_Encoding,
|
||||
_Encodings,
|
||||
_RawOrProcessedAttributeValues,
|
||||
_RawMarkup,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"HTMLTreeBuilder",
|
||||
"SAXTreeBuilder",
|
||||
"TreeBuilder",
|
||||
"TreeBuilderRegistry",
|
||||
]
|
||||
|
||||
# Some useful features for a TreeBuilder to have.
|
||||
FAST = "fast"
|
||||
PERMISSIVE = "permissive"
|
||||
STRICT = "strict"
|
||||
XML = "xml"
|
||||
HTML = "html"
|
||||
HTML_5 = "html5"
|
||||
|
||||
__all__ = [
|
||||
"TreeBuilderRegistry",
|
||||
"TreeBuilder",
|
||||
"HTMLTreeBuilder",
|
||||
"DetectsXMLParsedAsHTML",
|
||||
|
||||
"ParserRejectedMarkup", # backwards compatibility only as of 4.13.0
|
||||
]
|
||||
|
||||
class TreeBuilderRegistry(object):
|
||||
"""A way of looking up TreeBuilder subclasses by their name or by desired
|
||||
features.
|
||||
"""
|
||||
|
||||
builders_for_feature: Dict[str, List[Type[TreeBuilder]]]
|
||||
builders: List[Type[TreeBuilder]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.builders_for_feature = defaultdict(list)
|
||||
self.builders = []
|
||||
|
||||
def register(self, treebuilder_class: type[TreeBuilder]) -> None:
|
||||
"""Register a treebuilder based on its advertised features.
|
||||
|
||||
:param treebuilder_class: A subclass of `TreeBuilder`. its
|
||||
`TreeBuilder.features` attribute should list its features.
|
||||
"""
|
||||
for feature in treebuilder_class.features:
|
||||
self.builders_for_feature[feature].insert(0, treebuilder_class)
|
||||
self.builders.insert(0, treebuilder_class)
|
||||
|
||||
def lookup(self, *features: str) -> Optional[Type[TreeBuilder]]:
|
||||
"""Look up a TreeBuilder subclass with the desired features.
|
||||
|
||||
:param features: A list of features to look for. If none are
|
||||
provided, the most recently registered TreeBuilder subclass
|
||||
will be used.
|
||||
:return: A TreeBuilder subclass, or None if there's no
|
||||
registered subclass with all the requested features.
|
||||
"""
|
||||
if len(self.builders) == 0:
|
||||
# There are no builders at all.
|
||||
return None
|
||||
|
||||
if len(features) == 0:
|
||||
# They didn't ask for any features. Give them the most
|
||||
# recently registered builder.
|
||||
return self.builders[0]
|
||||
|
||||
# Go down the list of features in order, and eliminate any builders
|
||||
# that don't match every feature.
|
||||
feature_list = list(features)
|
||||
feature_list.reverse()
|
||||
candidates = None
|
||||
candidate_set = None
|
||||
while len(feature_list) > 0:
|
||||
feature = feature_list.pop()
|
||||
we_have_the_feature = self.builders_for_feature.get(feature, [])
|
||||
if len(we_have_the_feature) > 0:
|
||||
if candidates is None:
|
||||
candidates = we_have_the_feature
|
||||
candidate_set = set(candidates)
|
||||
elif candidate_set is not None:
|
||||
# Eliminate any candidates that don't have this feature.
|
||||
candidate_set = candidate_set.intersection(set(we_have_the_feature))
|
||||
|
||||
# The only valid candidates are the ones in candidate_set.
|
||||
# Go through the original list of candidates and pick the first one
|
||||
# that's in candidate_set.
|
||||
if candidate_set is None or candidates is None:
|
||||
return None
|
||||
for candidate in candidates:
|
||||
if candidate in candidate_set:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
#: The `BeautifulSoup` constructor will take a list of features
|
||||
#: and use it to look up `TreeBuilder` classes in this registry.
|
||||
builder_registry: TreeBuilderRegistry = TreeBuilderRegistry()
|
||||
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Turn a textual document into a Beautiful Soup object tree.
|
||||
|
||||
This is an abstract superclass which smooths out the behavior of
|
||||
different parser libraries into a single, unified interface.
|
||||
|
||||
:param multi_valued_attributes: If this is set to None, the
|
||||
TreeBuilder will not turn any values for attributes like
|
||||
'class' into lists. Setting this to a dictionary will
|
||||
customize this behavior; look at :py:attr:`bs4.builder.HTMLTreeBuilder.DEFAULT_CDATA_LIST_ATTRIBUTES`
|
||||
for an example.
|
||||
|
||||
Internally, these are called "CDATA list attributes", but that
|
||||
probably doesn't make sense to an end-user, so the argument name
|
||||
is ``multi_valued_attributes``.
|
||||
|
||||
:param preserve_whitespace_tags: A set of tags to treat
|
||||
the way <pre> tags are treated in HTML. Tags in this set
|
||||
are immune from pretty-printing; their contents will always be
|
||||
output as-is.
|
||||
|
||||
:param string_containers: A dictionary mapping tag names to
|
||||
the classes that should be instantiated to contain the textual
|
||||
contents of those tags. The default is to use NavigableString
|
||||
for every tag, no matter what the name. You can override the
|
||||
default by changing :py:attr:`DEFAULT_STRING_CONTAINERS`.
|
||||
|
||||
:param store_line_numbers: If the parser keeps track of the line
|
||||
numbers and positions of the original markup, that information
|
||||
will, by default, be stored in each corresponding
|
||||
:py:class:`bs4.element.Tag` object. You can turn this off by
|
||||
passing store_line_numbers=False; then Tag.sourcepos and
|
||||
Tag.sourceline will always be None. If the parser you're using
|
||||
doesn't keep track of this information, then store_line_numbers
|
||||
is irrelevant.
|
||||
|
||||
:param attribute_dict_class: The value of a multi-valued attribute
|
||||
(such as HTML's 'class') willl be stored in an instance of this
|
||||
class. The default is Beautiful Soup's built-in
|
||||
`AttributeValueList`, which is a normal Python list, and you
|
||||
will probably never need to change it.
|
||||
"""
|
||||
|
||||
USE_DEFAULT: Any = object() #: :meta private:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
multi_valued_attributes: Dict[str, Set[str]] = USE_DEFAULT,
|
||||
preserve_whitespace_tags: Set[str] = USE_DEFAULT,
|
||||
store_line_numbers: bool = USE_DEFAULT,
|
||||
string_containers: Dict[str, Type[NavigableString]] = USE_DEFAULT,
|
||||
empty_element_tags: Set[str] = USE_DEFAULT,
|
||||
attribute_dict_class: Type[AttributeDict] = AttributeDict,
|
||||
attribute_value_list_class: Type[AttributeValueList] = AttributeValueList,
|
||||
):
|
||||
self.soup = None
|
||||
if multi_valued_attributes is self.USE_DEFAULT:
|
||||
multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
|
||||
self.cdata_list_attributes = multi_valued_attributes
|
||||
if preserve_whitespace_tags is self.USE_DEFAULT:
|
||||
preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if empty_element_tags is self.USE_DEFAULT:
|
||||
self.empty_element_tags = self.DEFAULT_EMPTY_ELEMENT_TAGS
|
||||
else:
|
||||
self.empty_element_tags = empty_element_tags
|
||||
# TODO: store_line_numbers is probably irrelevant now that
|
||||
# the behavior of sourceline and sourcepos has been made consistent
|
||||
# everywhere.
|
||||
if store_line_numbers == self.USE_DEFAULT:
|
||||
store_line_numbers = self.TRACKS_LINE_NUMBERS
|
||||
self.store_line_numbers = store_line_numbers
|
||||
if string_containers == self.USE_DEFAULT:
|
||||
string_containers = self.DEFAULT_STRING_CONTAINERS
|
||||
self.string_containers = string_containers
|
||||
self.attribute_dict_class = attribute_dict_class
|
||||
self.attribute_value_list_class = attribute_value_list_class
|
||||
|
||||
NAME: str = "[Unknown tree builder]"
|
||||
ALTERNATE_NAMES: Iterable[str] = []
|
||||
features: Iterable[str] = []
|
||||
|
||||
is_xml: bool = False
|
||||
picklable: bool = False
|
||||
|
||||
soup: Optional[BeautifulSoup] #: :meta private:
|
||||
|
||||
#: A tag will be considered an empty-element
|
||||
#: tag when and only when it has no contents.
|
||||
empty_element_tags: Optional[Set[str]] = None #: :meta private:
|
||||
cdata_list_attributes: Dict[str, Set[str]] #: :meta private:
|
||||
preserve_whitespace_tags: Set[str] #: :meta private:
|
||||
string_containers: Dict[str, Type[NavigableString]] #: :meta private:
|
||||
tracks_line_numbers: bool #: :meta private:
|
||||
|
||||
#: A value for these tag/attribute combinations is a space- or
|
||||
#: comma-separated list of CDATA, rather than a single CDATA.
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES: Dict[str, Set[str]] = defaultdict(set)
|
||||
|
||||
#: Whitespace should be preserved inside these tags.
|
||||
DEFAULT_PRESERVE_WHITESPACE_TAGS: Set[str] = set()
|
||||
|
||||
#: The textual contents of tags with these names should be
|
||||
#: instantiated with some class other than `bs4.element.NavigableString`.
|
||||
DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = {} # type:ignore
|
||||
|
||||
#: By default, tags are treated as empty-element tags if they have
|
||||
#: no contents--that is, using XML rules. HTMLTreeBuilder
|
||||
#: defines a different set of DEFAULT_EMPTY_ELEMENT_TAGS based on the
|
||||
#: HTML 4 and HTML5 standards.
|
||||
DEFAULT_EMPTY_ELEMENT_TAGS: Optional[Set[str]] = None
|
||||
|
||||
#: Most parsers don't keep track of line numbers.
|
||||
TRACKS_LINE_NUMBERS: bool = False
|
||||
|
||||
def initialize_soup(self, soup: BeautifulSoup) -> None:
|
||||
"""The BeautifulSoup object has been initialized and is now
|
||||
being associated with the TreeBuilder.
|
||||
|
||||
:param soup: A BeautifulSoup object.
|
||||
"""
|
||||
self.soup = soup
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Do any work necessary to reset the underlying parser
|
||||
for a new document.
|
||||
|
||||
By default, this does nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def can_be_empty_element(self, tag_name: str) -> bool:
|
||||
"""Might a tag with this name be an empty-element tag?
|
||||
|
||||
The final markup may or may not actually present this tag as
|
||||
self-closing.
|
||||
|
||||
For instance: an HTMLBuilder does not consider a <p> tag to be
|
||||
an empty-element tag (it's not in
|
||||
HTMLBuilder.empty_element_tags). This means an empty <p> tag
|
||||
will be presented as "<p></p>", not "<p/>" or "<p>".
|
||||
|
||||
The default implementation has no opinion about which tags are
|
||||
empty-element tags, so a tag will be presented as an
|
||||
empty-element tag if and only if it has no children.
|
||||
"<foo></foo>" will become "<foo/>", and "<foo>bar</foo>" will
|
||||
be left alone.
|
||||
|
||||
:param tag_name: The name of a markup tag.
|
||||
"""
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
def feed(self, markup: _RawMarkup) -> None:
|
||||
"""Run incoming markup through some parsing process."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(
|
||||
self,
|
||||
markup: _RawMarkup,
|
||||
user_specified_encoding: Optional[_Encoding] = None,
|
||||
document_declared_encoding: Optional[_Encoding] = None,
|
||||
exclude_encodings: Optional[_Encodings] = None,
|
||||
) -> Iterable[Tuple[_RawMarkup, Optional[_Encoding], Optional[_Encoding], bool]]:
|
||||
"""Run any preliminary steps necessary to make incoming markup
|
||||
acceptable to the parser.
|
||||
|
||||
:param markup: The markup that's about to be parsed.
|
||||
:param user_specified_encoding: The user asked to try this encoding
|
||||
to convert the markup into a Unicode string.
|
||||
:param document_declared_encoding: The markup itself claims to be
|
||||
in this encoding. NOTE: This argument is not used by the
|
||||
calling code and can probably be removed.
|
||||
:param exclude_encodings: The user asked *not* to try any of
|
||||
these encodings.
|
||||
|
||||
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy that the parser can try
|
||||
to convert the document to Unicode and parse it. Each
|
||||
strategy will be tried in turn.
|
||||
|
||||
By default, the only strategy is to parse the markup
|
||||
as-is. See `LXMLTreeBuilderForXML` and
|
||||
`HTMLParserTreeBuilder` for implementations that take into
|
||||
account the quirks of particular parsers.
|
||||
|
||||
:meta private:
|
||||
|
||||
"""
|
||||
yield markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment: str) -> str:
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
|
||||
Different parsers do this differently. For instance, lxml
|
||||
introduces an empty <head> tag, and html5lib
|
||||
doesn't. Abstracting this away lets us write simple tests
|
||||
which run HTML fragments through the parser and compare the
|
||||
results against other HTML fragments.
|
||||
|
||||
This method should not be used outside of unit tests.
|
||||
|
||||
:param fragment: A fragment of HTML.
|
||||
:return: A full HTML document.
|
||||
:meta private:
|
||||
"""
|
||||
return fragment
|
||||
|
||||
def set_up_substitutions(self, tag: Tag) -> bool:
|
||||
"""Set up any substitutions that will need to be performed on
|
||||
a `Tag` when it's output as a string.
|
||||
|
||||
By default, this does nothing. See `HTMLTreeBuilder` for a
|
||||
case where this is used.
|
||||
|
||||
:return: Whether or not a substitution was performed.
|
||||
:meta private:
|
||||
"""
|
||||
return False
|
||||
|
||||
def _replace_cdata_list_attribute_values(
|
||||
self, tag_name: str, attrs: _RawOrProcessedAttributeValues
|
||||
) -> _AttributeValues:
|
||||
"""When an attribute value is associated with a tag that can
|
||||
have multiple values for that attribute, convert the string
|
||||
value to a list of strings.
|
||||
|
||||
Basically, replaces class="foo bar" with class=["foo", "bar"]
|
||||
|
||||
NOTE: This method modifies its input in place.
|
||||
|
||||
:param tag_name: The name of a tag.
|
||||
:param attrs: A dictionary containing the tag's attributes.
|
||||
Any appropriate attribute values will be modified in place.
|
||||
:return: The modified dictionary that was originally passed in.
|
||||
"""
|
||||
|
||||
# First, cast the attrs dict to _AttributeValues. This might
|
||||
# not be accurate yet, but it will be by the time this method
|
||||
# returns.
|
||||
modified_attrs = cast(_AttributeValues, attrs)
|
||||
if not modified_attrs or not self.cdata_list_attributes:
|
||||
# Nothing to do.
|
||||
return modified_attrs
|
||||
|
||||
# There is at least a possibility that we need to modify one of
|
||||
# the attribute values.
|
||||
universal: Set[str] = self.cdata_list_attributes.get("*", set())
|
||||
tag_specific = self.cdata_list_attributes.get(tag_name.lower(), None)
|
||||
for attr in list(modified_attrs.keys()):
|
||||
modified_value: _AttributeValue
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
original_value: _AttributeValue = modified_attrs[attr]
|
||||
if isinstance(original_value, _RawAttributeValue):
|
||||
# This is a _RawAttributeValue (a string) that
|
||||
# needs to be split and converted to a
|
||||
# AttributeValueList so it can be an
|
||||
# _AttributeValue.
|
||||
modified_value = self.attribute_value_list_class(
|
||||
nonwhitespace_re.findall(original_value)
|
||||
)
|
||||
else:
|
||||
# html5lib calls setAttributes twice for the
|
||||
# same tag when rearranging the parse tree. On
|
||||
# the second call the attribute value here is
|
||||
# already a list. This can also happen when a
|
||||
# Tag object is cloned. If this happens, leave
|
||||
# the value alone rather than trying to split
|
||||
# it again.
|
||||
modified_value = original_value
|
||||
modified_attrs[attr] = modified_value
|
||||
return modified_attrs
|
||||
|
||||
|
||||
class SAXTreeBuilder(TreeBuilder):
|
||||
"""A Beautiful Soup treebuilder that listens for SAX events.
|
||||
|
||||
This is not currently used for anything, and it will be removed
|
||||
soon. It was a good idea, but it wasn't properly integrated into the
|
||||
rest of Beautiful Soup, so there have been long stretches where it
|
||||
hasn't worked properly.
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
warnings.warn(
|
||||
"The SAXTreeBuilder class was deprecated in 4.13.0 and will be removed soon thereafter. It is completely untested and probably doesn't work; do not use it.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
super(SAXTreeBuilder, self).__init__(*args, **kwargs)
|
||||
|
||||
def feed(self, markup: _RawMarkup) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
def startElement(self, name: str, attrs: Dict[str, str]) -> None:
|
||||
attrs = AttributeDict((key[1], value) for key, value in list(attrs.items()))
|
||||
# print("Start %s, %r" % (name, attrs))
|
||||
assert self.soup is not None
|
||||
self.soup.handle_starttag(name, None, None, attrs)
|
||||
|
||||
def endElement(self, name: str) -> None:
|
||||
# print("End %s" % name)
|
||||
assert self.soup is not None
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def startElementNS(
|
||||
self, nsTuple: Tuple[str, str], nodeName: str, attrs: Dict[str, str]
|
||||
) -> None:
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.startElement(nodeName, attrs)
|
||||
|
||||
def endElementNS(self, nsTuple: Tuple[str, str], nodeName: str) -> None:
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.endElement(nodeName)
|
||||
# handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
|
||||
def startPrefixMapping(self, prefix: str, nodeValue: str) -> None:
|
||||
# Ignore the prefix for now.
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix: str) -> None:
|
||||
# Ignore the prefix for now.
|
||||
# handler.endPrefixMapping(prefix)
|
||||
pass
|
||||
|
||||
def characters(self, content: str) -> None:
|
||||
assert self.soup is not None
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def startDocument(self) -> None:
|
||||
pass
|
||||
|
||||
def endDocument(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class HTMLTreeBuilder(TreeBuilder):
|
||||
"""This TreeBuilder knows facts about HTML, such as which tags are treated
|
||||
specially by the HTML standard.
|
||||
"""
|
||||
|
||||
#: Some HTML tags are defined as having no contents. Beautiful Soup
|
||||
#: treats these specially.
|
||||
DEFAULT_EMPTY_ELEMENT_TAGS: Optional[Set[str]] = set(
|
||||
[
|
||||
# These are from HTML5.
|
||||
"area",
|
||||
"base",
|
||||
"br",
|
||||
"col",
|
||||
"embed",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"keygen",
|
||||
"link",
|
||||
"menuitem",
|
||||
"meta",
|
||||
"param",
|
||||
"source",
|
||||
"track",
|
||||
"wbr",
|
||||
# These are from earlier versions of HTML and are removed in HTML5.
|
||||
"basefont",
|
||||
"bgsound",
|
||||
"command",
|
||||
"frame",
|
||||
"image",
|
||||
"isindex",
|
||||
"nextid",
|
||||
"spacer",
|
||||
]
|
||||
)
|
||||
|
||||
#: The HTML standard defines these tags as block-level elements. Beautiful
|
||||
#: Soup does not treat these elements differently from other elements,
|
||||
#: but it may do so eventually, and this information is available if
|
||||
#: you need to use it.
|
||||
DEFAULT_BLOCK_ELEMENTS: Set[str] = set(
|
||||
[
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
"blockquote",
|
||||
"canvas",
|
||||
"dd",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"header",
|
||||
"hr",
|
||||
"li",
|
||||
"main",
|
||||
"nav",
|
||||
"noscript",
|
||||
"ol",
|
||||
"output",
|
||||
"p",
|
||||
"pre",
|
||||
"section",
|
||||
"table",
|
||||
"tfoot",
|
||||
"ul",
|
||||
"video",
|
||||
]
|
||||
)
|
||||
|
||||
#: These HTML tags need special treatment so they can be
|
||||
#: represented by a string class other than `bs4.element.NavigableString`.
|
||||
#:
|
||||
#: For some of these tags, it's because the HTML standard defines
|
||||
#: an unusual content model for them. I made this list by going
|
||||
#: through the HTML spec
|
||||
#: (https://html.spec.whatwg.org/#metadata-content) and looking for
|
||||
#: "metadata content" elements that can contain strings.
|
||||
#:
|
||||
#: The Ruby tags (<rt> and <rp>) are here despite being normal
|
||||
#: "phrasing content" tags, because the content they contain is
|
||||
#: qualitatively different from other text in the document, and it
|
||||
#: can be useful to be able to distinguish it.
|
||||
#:
|
||||
#: TODO: Arguably <noscript> could go here but it seems
|
||||
#: qualitatively different from the other tags.
|
||||
DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = { # type:ignore
|
||||
"rt": RubyTextString,
|
||||
"rp": RubyParenthesisString,
|
||||
"style": Stylesheet,
|
||||
"script": Script,
|
||||
"template": TemplateString,
|
||||
}
|
||||
|
||||
#: The HTML standard defines these attributes as containing a
|
||||
#: space-separated list of values, not a single value. That is,
|
||||
#: class="foo bar" means that the 'class' attribute has two values,
|
||||
#: 'foo' and 'bar', not the single value 'foo bar'. When we
|
||||
#: encounter one of these attributes, we will parse its value into
|
||||
#: a list of values if possible. Upon output, the list will be
|
||||
#: converted back into a string.
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES: Dict[str, Set[str]] = {
|
||||
"*": {"class", "accesskey", "dropzone"},
|
||||
"a": {"rel", "rev"},
|
||||
"link": {"rel", "rev"},
|
||||
"td": {"headers"},
|
||||
"th": {"headers"},
|
||||
"form": {"accept-charset"},
|
||||
"object": {"archive"},
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
"area": {"rel"},
|
||||
"icon": {"sizes"},
|
||||
"iframe": {"sandbox"},
|
||||
"output": {"for"},
|
||||
}
|
||||
|
||||
#: By default, whitespace inside these HTML tags will be
|
||||
#: preserved rather than being collapsed.
|
||||
DEFAULT_PRESERVE_WHITESPACE_TAGS: set[str] = set(["pre", "textarea"])
|
||||
|
||||
def set_up_substitutions(self, tag: Tag) -> bool:
|
||||
"""Replace the declared encoding in a <meta> tag with a placeholder,
|
||||
to be substituted when the tag is output to a string.
|
||||
|
||||
An HTML document may come in to Beautiful Soup as one
|
||||
encoding, but exit in a different encoding, and the <meta> tag
|
||||
needs to be changed to reflect this.
|
||||
|
||||
:return: Whether or not a substitution was performed.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != "meta":
|
||||
return False
|
||||
|
||||
# TODO: This cast will fail in the (very unlikely) scenario
|
||||
# that the programmer who instantiates the TreeBuilder
|
||||
# specifies meta['content'] or meta['charset'] as
|
||||
# cdata_list_attributes.
|
||||
content: Optional[str] = cast(Optional[str], tag.get("content"))
|
||||
charset: Optional[str] = cast(Optional[str], tag.get("charset"))
|
||||
|
||||
# But we can accommodate meta['http-equiv'] being made a
|
||||
# cdata_list_attribute (again, very unlikely) without much
|
||||
# trouble.
|
||||
http_equiv: List[str] = tag.get_attribute_list("http-equiv")
|
||||
|
||||
# We are interested in <meta> tags that say what encoding the
|
||||
# document was originally in. This means HTML 5-style <meta>
|
||||
# tags that provide the "charset" attribute. It also means
|
||||
# HTML 4-style <meta> tags that provide the "content"
|
||||
# attribute and have "http-equiv" set to "content-type".
|
||||
#
|
||||
# In both cases we will replace the value of the appropriate
|
||||
# attribute with a standin object that can take on any
|
||||
# encoding.
|
||||
substituted = False
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
tag["charset"] = CharsetMetaAttributeValue(charset)
|
||||
substituted = True
|
||||
|
||||
elif content is not None and any(
|
||||
x.lower() == "content-type" for x in http_equiv
|
||||
):
|
||||
# HTML 4 style:
|
||||
# <meta http-equiv="content-type" content="text/html; charset=utf8">
|
||||
tag["content"] = ContentMetaAttributeValue(content)
|
||||
substituted = True
|
||||
|
||||
return substituted
|
||||
|
||||
|
||||
class DetectsXMLParsedAsHTML(object):
|
||||
"""A mixin class for any class (a TreeBuilder, or some class used by a
|
||||
TreeBuilder) that's in a position to detect whether an XML
|
||||
document is being incorrectly parsed as HTML, and issue an
|
||||
appropriate warning.
|
||||
|
||||
This requires being able to observe an incoming processing
|
||||
instruction that might be an XML declaration, and also able to
|
||||
observe tags as they're opened. If you can't do that for a given
|
||||
`TreeBuilder`, there's a less reliable implementation based on
|
||||
examining the raw markup.
|
||||
"""
|
||||
|
||||
#: Regular expression for seeing if string markup has an <html> tag.
|
||||
LOOKS_LIKE_HTML: Pattern[str] = re.compile("<[^ +]html", re.I)
|
||||
|
||||
#: Regular expression for seeing if byte markup has an <html> tag.
|
||||
LOOKS_LIKE_HTML_B: Pattern[bytes] = re.compile(b"<[^ +]html", re.I)
|
||||
|
||||
#: The start of an XML document string.
|
||||
XML_PREFIX: str = "<?xml"
|
||||
|
||||
#: The start of an XML document bytestring.
|
||||
XML_PREFIX_B: bytes = b"<?xml"
|
||||
|
||||
# This is typed as str, not `ProcessingInstruction`, because this
|
||||
# check may be run before any Beautiful Soup objects are created.
|
||||
_first_processing_instruction: Optional[str] #: :meta private:
|
||||
_root_tag_name: Optional[str] #: :meta private:
|
||||
|
||||
@classmethod
|
||||
def warn_if_markup_looks_like_xml(
|
||||
cls, markup: Optional[_RawMarkup], stacklevel: int = 3
|
||||
) -> bool:
|
||||
"""Perform a check on some markup to see if it looks like XML
|
||||
that's not XHTML. If so, issue a warning.
|
||||
|
||||
This is much less reliable than doing the check while parsing,
|
||||
but some of the tree builders can't do that.
|
||||
|
||||
:param stacklevel: The stacklevel of the code calling this\
|
||||
function.
|
||||
|
||||
:return: True if the markup looks like non-XHTML XML, False
|
||||
otherwise.
|
||||
"""
|
||||
if markup is None:
|
||||
return False
|
||||
markup = markup[:500]
|
||||
if isinstance(markup, bytes):
|
||||
markup_b: bytes = markup
|
||||
looks_like_xml = markup_b.startswith(
|
||||
cls.XML_PREFIX_B
|
||||
) and not cls.LOOKS_LIKE_HTML_B.search(markup)
|
||||
else:
|
||||
markup_s: str = markup
|
||||
looks_like_xml = markup_s.startswith(
|
||||
cls.XML_PREFIX
|
||||
) and not cls.LOOKS_LIKE_HTML.search(markup)
|
||||
|
||||
if looks_like_xml:
|
||||
cls._warn(stacklevel=stacklevel + 2)
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _warn(cls, stacklevel: int = 5) -> None:
|
||||
"""Issue a warning about XML being parsed as HTML."""
|
||||
warnings.warn(
|
||||
XMLParsedAsHTMLWarning.MESSAGE,
|
||||
XMLParsedAsHTMLWarning,
|
||||
stacklevel=stacklevel,
|
||||
)
|
||||
|
||||
def _initialize_xml_detector(self) -> None:
|
||||
"""Call this method before parsing a document."""
|
||||
self._first_processing_instruction = None
|
||||
self._root_tag_name = None
|
||||
|
||||
def _document_might_be_xml(self, processing_instruction: str) -> None:
|
||||
"""Call this method when encountering an XML declaration, or a
|
||||
"processing instruction" that might be an XML declaration.
|
||||
|
||||
This helps Beautiful Soup detect potential issues later, if
|
||||
the XML document turns out to be a non-XHTML document that's
|
||||
being parsed as XML.
|
||||
"""
|
||||
if (
|
||||
self._first_processing_instruction is not None
|
||||
or self._root_tag_name is not None
|
||||
):
|
||||
# The document has already started. Don't bother checking
|
||||
# anymore.
|
||||
return
|
||||
|
||||
self._first_processing_instruction = processing_instruction
|
||||
|
||||
# We won't know until we encounter the first tag whether or
|
||||
# not this is actually a problem.
|
||||
|
||||
def _root_tag_encountered(self, name: str) -> None:
|
||||
"""Call this when you encounter the document's root tag.
|
||||
|
||||
This is where we actually check whether an XML document is
|
||||
being incorrectly parsed as HTML, and issue the warning.
|
||||
"""
|
||||
if self._root_tag_name is not None:
|
||||
# This method was incorrectly called multiple times. Do
|
||||
# nothing.
|
||||
return
|
||||
|
||||
self._root_tag_name = name
|
||||
|
||||
if (
|
||||
name != "html"
|
||||
and self._first_processing_instruction is not None
|
||||
and self._first_processing_instruction.lower().startswith("xml ")
|
||||
):
|
||||
# We encountered an XML declaration and then a tag other
|
||||
# than 'html'. This is a reliable indicator that a
|
||||
# non-XHTML document is being parsed as XML.
|
||||
self._warn(stacklevel=10)
|
||||
|
||||
|
||||
def register_treebuilders_from(module: ModuleType) -> None:
|
||||
"""Copy TreeBuilders from the given module into this module."""
|
||||
this_module = sys.modules[__name__]
|
||||
for name in module.__all__:
|
||||
obj = getattr(module, name)
|
||||
|
||||
if issubclass(obj, TreeBuilder):
|
||||
setattr(this_module, name, obj)
|
||||
this_module.__all__.append(name)
|
||||
# Register the builder while we're at it.
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
# want to use HTMLParser as a last resort.
|
||||
from . import _htmlparser # noqa: E402
|
||||
|
||||
register_treebuilders_from(_htmlparser)
|
||||
try:
|
||||
from . import _html5lib
|
||||
|
||||
register_treebuilders_from(_html5lib)
|
||||
except ImportError:
|
||||
# They don't have html5lib installed.
|
||||
pass
|
||||
try:
|
||||
from . import _lxml
|
||||
|
||||
register_treebuilders_from(_lxml)
|
||||
except ImportError:
|
||||
# They don't have lxml installed.
|
||||
pass
|
||||
|
|
@ -0,0 +1,611 @@
|
|||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
"HTML5TreeBuilder",
|
||||
]
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
cast,
|
||||
Dict,
|
||||
Iterable,
|
||||
Optional,
|
||||
Sequence,
|
||||
TYPE_CHECKING,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from typing_extensions import TypeAlias
|
||||
from bs4._typing import (
|
||||
_AttributeValue,
|
||||
_AttributeValues,
|
||||
_Encoding,
|
||||
_Encodings,
|
||||
_NamespaceURL,
|
||||
_RawMarkup,
|
||||
)
|
||||
|
||||
import warnings
|
||||
from bs4.builder import (
|
||||
DetectsXMLParsedAsHTML,
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import (
|
||||
NamespacedAttribute,
|
||||
PageElement,
|
||||
nonwhitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use `html5lib <https://github.com/html5lib/html5lib-python>`_ to
|
||||
build a tree.
|
||||
|
||||
Note that `HTML5TreeBuilder` does not support some common HTML
|
||||
`TreeBuilder` features. Some of these features could theoretically
|
||||
be implemented, but at the very least it's quite difficult,
|
||||
because html5lib moves the parse tree around as it's being built.
|
||||
|
||||
Specifically:
|
||||
|
||||
* This `TreeBuilder` doesn't use different subclasses of
|
||||
`NavigableString` (e.g. `Script`) based on the name of the tag
|
||||
in which the string was found.
|
||||
* You can't use a `SoupStrainer` to parse only part of a document.
|
||||
"""
|
||||
|
||||
NAME: str = "html5lib"
|
||||
|
||||
features: Iterable[str] = [NAME, PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
#: html5lib can tell us which line number and position in the
|
||||
#: original file is the source of an element.
|
||||
TRACKS_LINE_NUMBERS: bool = True
|
||||
|
||||
underlying_builder: "TreeBuilderForHtml5lib" #: :meta private:
|
||||
user_specified_encoding: Optional[_Encoding]
|
||||
|
||||
def prepare_markup(
|
||||
self,
|
||||
markup: _RawMarkup,
|
||||
user_specified_encoding: Optional[_Encoding] = None,
|
||||
document_declared_encoding: Optional[_Encoding] = None,
|
||||
exclude_encodings: Optional[_Encodings] = None,
|
||||
) -> Iterable[Tuple[_RawMarkup, Optional[_Encoding], Optional[_Encoding], bool]]:
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
|
||||
# document_declared_encoding and exclude_encodings aren't used
|
||||
# ATM because the html5lib TreeBuilder doesn't use
|
||||
# UnicodeDammit.
|
||||
for variable, name in (
|
||||
(document_declared_encoding, "document_declared_encoding"),
|
||||
(exclude_encodings, "exclude_encodings"),
|
||||
):
|
||||
if variable:
|
||||
warnings.warn(
|
||||
f"You provided a value for {name}, but the html5lib tree builder doesn't support {name}.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
# html5lib only parses HTML, so if it's given XML that's worth
|
||||
# noting.
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup, stacklevel=3)
|
||||
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup: _RawMarkup) -> None:
|
||||
"""Run some incoming markup through some parsing process,
|
||||
populating the `BeautifulSoup` object in `HTML5TreeBuilder.soup`.
|
||||
"""
|
||||
if self.soup is not None and self.soup.parse_only is not None:
|
||||
warnings.warn(
|
||||
"You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
# self.underlying_builder is probably None now, but it'll be set
|
||||
# when html5lib calls self.create_treebuilder().
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
assert self.underlying_builder is not None
|
||||
self.underlying_builder.parser = parser
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, str):
|
||||
# kwargs, specifically override_encoding, will eventually
|
||||
# be passed in to html5lib's
|
||||
# HTMLBinaryInputStream.__init__.
|
||||
extra_kwargs["override_encoding"] = self.user_specified_encoding
|
||||
|
||||
doc = parser.parse(markup, **extra_kwargs) # type:ignore
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, str):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0] # type:ignore
|
||||
# The encoding is an html5lib Encoding object. We want to
|
||||
# use a string for compatibility with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
self.underlying_builder.parser = None
|
||||
|
||||
def create_treebuilder(
|
||||
self, namespaceHTMLElements: bool
|
||||
) -> "TreeBuilderForHtml5lib":
|
||||
"""Called by html5lib to instantiate the kind of class it
|
||||
calls a 'TreeBuilder'.
|
||||
|
||||
:param namespaceHTMLElements: Whether or not to namespace HTML elements.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
namespaceHTMLElements, self.soup, store_line_numbers=self.store_line_numbers
|
||||
)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment: str) -> str:
|
||||
"""See `TreeBuilder`."""
|
||||
return "<html><head></head><body>%s</body></html>" % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
soup: "BeautifulSoup" #: :meta private:
|
||||
parser: Optional[html5lib.HTMLParser] #: :meta private:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
namespaceHTMLElements: bool,
|
||||
soup: Optional["BeautifulSoup"] = None,
|
||||
store_line_numbers: bool = True,
|
||||
**kwargs: Any,
|
||||
):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
warnings.warn(
|
||||
"The optionality of the 'soup' argument to the TreeBuilderForHtml5lib constructor is deprecated as of Beautiful Soup 4.13.0: 'soup' is now required. If you can't pass in a BeautifulSoup object here, or you get this warning and it seems mysterious to you, please contact the Beautiful Soup developer team for possible un-deprecation.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# TODO: Why is the parser 'html.parser' here? Using
|
||||
# html5lib doesn't cause an infinite loop and is more
|
||||
# accurate. Best to get rid of this entire section, I think.
|
||||
self.soup = BeautifulSoup(
|
||||
"", "html.parser", store_line_numbers=store_line_numbers, **kwargs
|
||||
)
|
||||
# TODO: What are **kwargs exactly? Should they be passed in
|
||||
# here in addition to/instead of being passed to the BeautifulSoup
|
||||
# constructor?
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
# This will be set later to a real html5lib HTMLParser object,
|
||||
# which we can use to track the current line number.
|
||||
self.parser = None
|
||||
self.store_line_numbers = store_line_numbers
|
||||
|
||||
def documentClass(self) -> "Element":
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def insertDoctype(self, token: Dict[str, Any]) -> None:
|
||||
name: str = cast(str, token["name"])
|
||||
publicId: Optional[str] = cast(Optional[str], token["publicId"])
|
||||
systemId: Optional[str] = cast(Optional[str], token["systemId"])
|
||||
|
||||
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name: str, namespace: str) -> "Element":
|
||||
sourceline: Optional[int] = None
|
||||
sourcepos: Optional[int] = None
|
||||
if self.parser is not None and self.store_line_numbers:
|
||||
# This represents the point immediately after the end of the
|
||||
# tag. We don't know when the tag started, but we do know
|
||||
# where it ended -- the character just before this one.
|
||||
sourceline, sourcepos = self.parser.tokenizer.stream.position() # type:ignore
|
||||
assert sourcepos is not None
|
||||
sourcepos = sourcepos - 1
|
||||
tag = self.soup.new_tag(
|
||||
name, namespace, sourceline=sourceline, sourcepos=sourcepos
|
||||
)
|
||||
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data: str) -> "TextNode":
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self) -> "Element":
|
||||
"""This is only used by html5lib HTMLParser.parseFragment(),
|
||||
which is never used by Beautiful Soup, only by the html5lib
|
||||
unit tests. Since we don't currently hook into those tests,
|
||||
the implementation is left blank.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def getFragment(self) -> "Element":
|
||||
"""This is only used by the html5lib unit tests. Since we
|
||||
don't currently hook into those tests, the implementation is
|
||||
left blank.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def appendChild(self, node: "Element") -> None:
|
||||
# TODO: This code is not covered by the BS4 tests, and
|
||||
# apparently not triggered by the html5lib test suite either.
|
||||
# But it doesn't seem test-specific and there are calls to it
|
||||
# (or a method with the same name) all over html5lib, so I'm
|
||||
# leaving the implementation in place rather than replacing it
|
||||
# with NotImplementedError()
|
||||
self.soup.append(node.element)
|
||||
|
||||
def getDocument(self) -> "BeautifulSoup":
|
||||
return self.soup
|
||||
|
||||
def testSerializer(self, node: "Element") -> None:
|
||||
"""This is only used by the html5lib unit tests. Since we
|
||||
don't currently hook into those tests, the implementation is
|
||||
left blank.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class AttrList(object):
|
||||
"""Represents a Tag's attributes in a way compatible with html5lib."""
|
||||
|
||||
element: Tag
|
||||
attrs: _AttributeValues
|
||||
|
||||
def __init__(self, element: Tag):
|
||||
self.element = element
|
||||
self.attrs = dict(self.element.attrs)
|
||||
|
||||
def __iter__(self) -> Iterable[Tuple[str, _AttributeValue]]:
|
||||
return list(self.attrs.items()).__iter__()
|
||||
|
||||
def __setitem__(self, name: str, value: _AttributeValue) -> None:
|
||||
# If this attribute is a multi-valued attribute for this element,
|
||||
# turn its value into a list.
|
||||
list_attr = self.element.cdata_list_attributes or {}
|
||||
if name in list_attr.get("*", []) or (
|
||||
self.element.name in list_attr
|
||||
and name in list_attr.get(self.element.name, [])
|
||||
):
|
||||
# A node that is being cloned may have already undergone
|
||||
# this procedure. Check for this and skip it.
|
||||
if not isinstance(value, list):
|
||||
assert isinstance(value, str)
|
||||
value = self.element.attribute_value_list_class(
|
||||
nonwhitespace_re.findall(value)
|
||||
)
|
||||
self.element[name] = value
|
||||
|
||||
def items(self) -> Iterable[Tuple[str, _AttributeValue]]:
|
||||
return list(self.attrs.items())
|
||||
|
||||
def keys(self) -> Iterable[str]:
|
||||
return list(self.attrs.keys())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.attrs)
|
||||
|
||||
def __getitem__(self, name: str) -> _AttributeValue:
|
||||
return self.attrs[name]
|
||||
|
||||
def __contains__(self, name: str) -> bool:
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class BeautifulSoupNode(treebuilder_base.Node):
|
||||
# A node can correspond to _either_ a Tag _or_ a NavigableString.
|
||||
tag: Optional[Tag]
|
||||
string: Optional[NavigableString]
|
||||
soup: "BeautifulSoup"
|
||||
namespace: Optional[_NamespaceURL]
|
||||
|
||||
@property
|
||||
def element(self) -> PageElement:
|
||||
assert self.tag is not None or self.string is not None
|
||||
if self.tag is not None:
|
||||
return self.tag
|
||||
else:
|
||||
assert self.string is not None
|
||||
return self.string
|
||||
|
||||
@property
|
||||
def nodeType(self) -> int:
|
||||
"""Return the html5lib constant corresponding to the type of
|
||||
the underlying DOM object.
|
||||
|
||||
NOTE: This property is only accessed by the html5lib test
|
||||
suite, not by Beautiful Soup proper.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
# TODO-TYPING: typeshed stubs are incorrect about this;
|
||||
# cloneNode returns a new Node, not None.
|
||||
def cloneNode(self) -> treebuilder_base.Node: # type:ignore
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Element(BeautifulSoupNode):
|
||||
namespace: Optional[_NamespaceURL]
|
||||
|
||||
def __init__(
|
||||
self, element: Tag, soup: "BeautifulSoup", namespace: Optional[_NamespaceURL]
|
||||
):
|
||||
self.tag = element
|
||||
self.string = None
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
|
||||
def appendChild(self, node: "BeautifulSoupNode") -> None:
|
||||
string_child: Optional[NavigableString] = None
|
||||
child: PageElement
|
||||
if type(node.string) is NavigableString:
|
||||
# We check for NavigableString *only* because we want to avoid
|
||||
# joining PreformattedStrings, such as Comments, with nearby strings.
|
||||
string_child = child = node.string
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if (
|
||||
child is not None
|
||||
and child.parent is not None
|
||||
and not isinstance(child, str)
|
||||
):
|
||||
node.element.extract()
|
||||
|
||||
if (
|
||||
string_child is not None
|
||||
and self.tag is not None and self.tag.contents
|
||||
and type(self.tag.contents[-1]) is NavigableString
|
||||
):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.tag.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, str):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.tag is not None and self.tag.contents:
|
||||
most_recent_element = self.tag._last_descendant(False)
|
||||
elif self.element.next_element is not None:
|
||||
# Something from further ahead in the parse tree is
|
||||
# being inserted into this earlier element. This is
|
||||
# very annoying because it means an expensive search
|
||||
# for the last element in the tree.
|
||||
most_recent_element = self.soup._last_descendant()
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.tag, most_recent_element=most_recent_element
|
||||
)
|
||||
|
||||
def getAttributes(self) -> AttrList:
|
||||
assert self.tag is not None
|
||||
return AttrList(self.tag)
|
||||
|
||||
# An HTML5lib attribute name may either be a single string,
|
||||
# or a tuple (namespace, name).
|
||||
_Html5libAttributeName: TypeAlias = Union[str, Tuple[str, str]]
|
||||
# Now we can define the type this method accepts as a dictionary
|
||||
# mapping those attribute names to single string values.
|
||||
_Html5libAttributes: TypeAlias = Dict[_Html5libAttributeName, str]
|
||||
|
||||
def setAttributes(self, attributes: Optional[_Html5libAttributes]) -> None:
|
||||
assert self.tag is not None
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
# Replace any namespaced attributes with
|
||||
# NamespacedAttribute objects.
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
new_name = NamespacedAttribute(*name)
|
||||
del attributes[name]
|
||||
attributes[new_name] = value
|
||||
|
||||
# We can now cast attributes to the type of Dict
|
||||
# used by Beautiful Soup.
|
||||
normalized_attributes = cast(_AttributeValues, attributes)
|
||||
|
||||
# Values for tags like 'class' came in as single strings;
|
||||
# replace them with lists of strings as appropriate.
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, normalized_attributes
|
||||
)
|
||||
|
||||
# Then set the attributes on the Tag associated with this
|
||||
# BeautifulSoupNode.
|
||||
for name, value_or_values in list(normalized_attributes.items()):
|
||||
self.tag[name] = value_or_values
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.tag)
|
||||
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(
|
||||
self, data: str, insertBefore: Optional["BeautifulSoupNode"] = None
|
||||
) -> None:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(
|
||||
self, node: "BeautifulSoupNode", refNode: "BeautifulSoupNode"
|
||||
) -> None:
|
||||
assert self.tag is not None
|
||||
index = self.tag.index(refNode.element)
|
||||
if (
|
||||
type(node.element) is NavigableString
|
||||
and self.tag.contents
|
||||
and type(self.tag.contents[index - 1]) is NavigableString
|
||||
):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.tag.contents[index - 1]
|
||||
assert type(old_node) is NavigableString
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.tag.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node: "Element") -> None:
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, newParent: "Element") -> None:
|
||||
"""Move all of this tag's children into another tag."""
|
||||
# print("MOVE", self.element.contents)
|
||||
# print("FROM", self.element)
|
||||
# print("TO", new_parent.element)
|
||||
|
||||
element = self.tag
|
||||
assert element is not None
|
||||
new_parent_element = newParent.tag
|
||||
assert new_parent_element is not None
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
|
||||
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
|
||||
if len(new_parent_element.contents) > 0:
|
||||
# The new parent already contains children. We will be
|
||||
# appending this tag's children to the end.
|
||||
|
||||
# We can make this assertion since we know new_parent has
|
||||
# children.
|
||||
assert new_parents_last_descendant is not None
|
||||
new_parents_last_child = new_parent_element.contents[-1]
|
||||
new_parents_last_descendant_next_element = (
|
||||
new_parents_last_descendant.next_element
|
||||
)
|
||||
else:
|
||||
# The new parent contains no children.
|
||||
new_parents_last_child = None
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
if new_parents_last_descendant is not None:
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
else:
|
||||
first_child.previous_element = new_parent_element
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
if new_parents_last_descendant is not None:
|
||||
new_parents_last_descendant.next_element = first_child
|
||||
else:
|
||||
new_parent_element.next_element = first_child
|
||||
if new_parents_last_child is not None:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(
|
||||
is_initialized=False, accept_self=True
|
||||
)
|
||||
|
||||
# Since we passed accept_self=True into _last_descendant,
|
||||
# there's no possibility that the result is None.
|
||||
assert last_childs_last_descendant is not None
|
||||
last_childs_last_descendant.next_element = (
|
||||
new_parents_last_descendant_next_element
|
||||
)
|
||||
if new_parents_last_descendant_next_element is not None:
|
||||
# TODO-COVERAGE: This code has no test coverage and
|
||||
# I'm not sure how to get html5lib to go through this
|
||||
# path, but it's just the other side of the previous
|
||||
# line.
|
||||
new_parents_last_descendant_next_element.previous_element = (
|
||||
last_childs_last_descendant
|
||||
)
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
new_parent_element.contents.append(child)
|
||||
|
||||
# Now that this element has no children, change its .next_element.
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
# print("DONE WITH MOVE")
|
||||
# print("FROM", self.element)
|
||||
# print("TO", new_parent_element)
|
||||
|
||||
# TODO-TYPING: typeshed stubs are incorrect about this;
|
||||
# hasContent returns a boolean, not None.
|
||||
def hasContent(self) -> bool: # type:ignore
|
||||
return self.tag is None or len(self.tag.contents) > 0
|
||||
|
||||
# TODO-TYPING: typeshed stubs are incorrect about this;
|
||||
# cloneNode returns a new Node, not None.
|
||||
def cloneNode(self) -> treebuilder_base.Node: # type:ignore
|
||||
assert self.tag is not None
|
||||
tag = self.soup.new_tag(self.tag.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key, value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
return node
|
||||
|
||||
def getNameTuple(self) -> Tuple[Optional[_NamespaceURL], str]:
|
||||
if self.namespace is None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
|
||||
class TextNode(BeautifulSoupNode):
|
||||
|
||||
def __init__(self, element: NavigableString, soup: "BeautifulSoup"):
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.tag = None
|
||||
self.string = element
|
||||
self.soup = soup
|
||||
|
|
@ -0,0 +1,462 @@
|
|||
# encoding: utf-8
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
from __future__ import annotations
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
"HTMLParserTreeBuilder",
|
||||
]
|
||||
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
cast,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
TYPE_CHECKING,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
from bs4.element import (
|
||||
AttributeDict,
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
DetectsXMLParsedAsHTML,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
)
|
||||
|
||||
from bs4.exceptions import ParserRejectedMarkup
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import NavigableString
|
||||
from bs4._typing import (
|
||||
_Encoding,
|
||||
_Encodings,
|
||||
_RawMarkup,
|
||||
)
|
||||
|
||||
HTMLPARSER = "html.parser"
|
||||
|
||||
_DuplicateAttributeHandler = Callable[[Dict[str, str], str, str], None]
|
||||
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
||||
#: Constant to handle duplicate attributes by ignoring later values
|
||||
#: and keeping the earlier ones.
|
||||
REPLACE: str = "replace"
|
||||
|
||||
#: Constant to handle duplicate attributes by replacing earlier values
|
||||
#: with later ones.
|
||||
IGNORE: str = "ignore"
|
||||
|
||||
"""A subclass of the Python standard library's HTMLParser class, which
|
||||
listens for HTMLParser events and translates them into calls
|
||||
to Beautiful Soup's tree construction API.
|
||||
|
||||
:param on_duplicate_attribute: A strategy for what to do if a
|
||||
tag includes the same attribute more than once. Accepted
|
||||
values are: REPLACE (replace earlier values with later
|
||||
ones, the default), IGNORE (keep the earliest value
|
||||
encountered), or a callable. A callable must take three
|
||||
arguments: the dictionary of attributes already processed,
|
||||
the name of the duplicate attribute, and the most recent value
|
||||
encountered.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
soup: BeautifulSoup,
|
||||
*args: Any,
|
||||
on_duplicate_attribute: Union[str, _DuplicateAttributeHandler] = REPLACE,
|
||||
**kwargs: Any,
|
||||
):
|
||||
self.soup = soup
|
||||
self.on_duplicate_attribute = on_duplicate_attribute
|
||||
self.attribute_dict_class = soup.builder.attribute_dict_class
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
self._initialize_xml_detector()
|
||||
|
||||
on_duplicate_attribute: Union[str, _DuplicateAttributeHandler]
|
||||
already_closed_empty_element: List[str]
|
||||
soup: BeautifulSoup
|
||||
|
||||
def error(self, message: str) -> None:
|
||||
# NOTE: This method is required so long as Python 3.9 is
|
||||
# supported. The corresponding code is removed from HTMLParser
|
||||
# in 3.5, but not removed from ParserBase until 3.10.
|
||||
# https://github.com/python/cpython/issues/76025
|
||||
#
|
||||
# The original implementation turned the error into a warning,
|
||||
# but in every case I discovered, this made HTMLParser
|
||||
# immediately crash with an error message that was less
|
||||
# helpful than the warning. The new implementation makes it
|
||||
# more clear that html.parser just can't parse this
|
||||
# markup. The 3.10 implementation does the same, though it
|
||||
# raises AssertionError rather than calling a method. (We
|
||||
# catch this error and wrap it in a ParserRejectedMarkup.)
|
||||
raise ParserRejectedMarkup(message)
|
||||
|
||||
def handle_startendtag(
|
||||
self, tag: str, attrs: List[Tuple[str, Optional[str]]]
|
||||
) -> None:
|
||||
"""Handle an incoming empty-element tag.
|
||||
|
||||
html.parser only calls this method when the markup looks like
|
||||
<tag/>.
|
||||
"""
|
||||
# `handle_empty_element` tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag, and we want to call
|
||||
# handle_endtag ourselves.
|
||||
self.handle_starttag(tag, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
def handle_starttag(
|
||||
self,
|
||||
tag: str,
|
||||
attrs: List[Tuple[str, Optional[str]]],
|
||||
handle_empty_element: bool = True,
|
||||
) -> None:
|
||||
"""Handle an opening tag, e.g. '<tag>'
|
||||
|
||||
:param handle_empty_element: True if this tag is known to be
|
||||
an empty-element tag (i.e. there is not expected to be any
|
||||
closing tag).
|
||||
"""
|
||||
# TODO: handle namespaces here?
|
||||
attr_dict: AttributeDict = self.attribute_dict_class()
|
||||
for key, value in attrs:
|
||||
# Change None attribute values to the empty string
|
||||
# for consistency with the other tree builders.
|
||||
if value is None:
|
||||
value = ""
|
||||
if key in attr_dict:
|
||||
# A single attribute shows up multiple times in this
|
||||
# tag. How to handle it depends on the
|
||||
# on_duplicate_attribute setting.
|
||||
on_dupe = self.on_duplicate_attribute
|
||||
if on_dupe == self.IGNORE:
|
||||
pass
|
||||
elif on_dupe in (None, self.REPLACE):
|
||||
attr_dict[key] = value
|
||||
else:
|
||||
on_dupe = cast(_DuplicateAttributeHandler, on_dupe)
|
||||
on_dupe(attr_dict, key, value)
|
||||
else:
|
||||
attr_dict[key] = value
|
||||
# print("START", tag)
|
||||
sourceline: Optional[int]
|
||||
sourcepos: Optional[int]
|
||||
if self.soup.builder.store_line_numbers:
|
||||
sourceline, sourcepos = self.getpos()
|
||||
else:
|
||||
sourceline = sourcepos = None
|
||||
tagObj = self.soup.handle_starttag(
|
||||
tag, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
|
||||
)
|
||||
if tagObj is not None and tagObj.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(tag, check_already_closed=False)
|
||||
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(tag)
|
||||
|
||||
if self._root_tag_name is None:
|
||||
self._root_tag_encountered(tag)
|
||||
|
||||
def handle_endtag(self, tag: str, check_already_closed: bool = True) -> None:
|
||||
"""Handle a closing tag, e.g. '</tag>'
|
||||
|
||||
:param tag: A tag name.
|
||||
:param check_already_closed: True if this tag is expected to
|
||||
be the closing portion of an empty-element tag,
|
||||
e.g. '<tag></tag>'.
|
||||
"""
|
||||
# print("END", tag)
|
||||
if check_already_closed and tag in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print("ALREADY CLOSED", tag)
|
||||
self.already_closed_empty_element.remove(tag)
|
||||
else:
|
||||
self.soup.handle_endtag(tag)
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
"""Handle some textual data that shows up between tags."""
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name: str) -> None:
|
||||
"""Handle a numeric character reference by converting it to the
|
||||
corresponding Unicode character and treating it as textual
|
||||
data.
|
||||
|
||||
:param name: Character number, possibly in hexadecimal.
|
||||
"""
|
||||
# TODO: This was originally a workaround for a bug in
|
||||
# HTMLParser. (http://bugs.python.org/issue13633) The bug has
|
||||
# been fixed, but removing this code still makes some
|
||||
# Beautiful Soup tests fail. This needs investigation.
|
||||
real_name:int
|
||||
if name.startswith("x"):
|
||||
real_name = int(name.lstrip("x"), 16)
|
||||
elif name.startswith("X"):
|
||||
real_name = int(name.lstrip("X"), 16)
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
data, replacement_added = UnicodeDammit.numeric_character_reference(real_name)
|
||||
if replacement_added:
|
||||
self.soup.contains_replacement_characters = True
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name: str) -> None:
|
||||
"""Handle a named entity reference by converting it to the
|
||||
corresponding Unicode character(s) and treating it as textual
|
||||
data.
|
||||
|
||||
:param name: Name of the entity reference.
|
||||
"""
|
||||
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
# If this were XML, it would be ambiguous whether "&foo"
|
||||
# was an character entity reference with a missing
|
||||
# semicolon or the literal string "&foo". Since this is
|
||||
# HTML, we have a complete list of all character entity references,
|
||||
# and this one wasn't found, so assume it's the literal string "&foo".
|
||||
data = "&%s" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data: str) -> None:
|
||||
"""Handle an HTML comment.
|
||||
|
||||
:param data: The text of the comment.
|
||||
"""
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, decl: str) -> None:
|
||||
"""Handle a DOCTYPE declaration.
|
||||
|
||||
:param data: The text of the declaration.
|
||||
"""
|
||||
self.soup.endData()
|
||||
decl = decl[len("DOCTYPE ") :]
|
||||
self.soup.handle_data(decl)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data: str) -> None:
|
||||
"""Handle a declaration of unknown type -- probably a CDATA block.
|
||||
|
||||
:param data: The text of the declaration.
|
||||
"""
|
||||
cls: Type[NavigableString]
|
||||
if data.upper().startswith("CDATA["):
|
||||
cls = CData
|
||||
data = data[len("CDATA[") :]
|
||||
else:
|
||||
cls = Declaration
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(cls)
|
||||
|
||||
def handle_pi(self, data: str) -> None:
|
||||
"""Handle a processing instruction.
|
||||
|
||||
:param data: The text of the instruction.
|
||||
"""
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self._document_might_be_xml(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
"""A Beautiful soup `bs4.builder.TreeBuilder` that uses the
|
||||
:py:class:`html.parser.HTMLParser` parser, found in the Python
|
||||
standard library.
|
||||
|
||||
"""
|
||||
|
||||
is_xml: bool = False
|
||||
picklable: bool = True
|
||||
NAME: str = HTMLPARSER
|
||||
features: Iterable[str] = [NAME, HTML, STRICT]
|
||||
parser_args: Tuple[Iterable[Any], Dict[str, Any]]
|
||||
|
||||
#: The html.parser knows which line number and position in the
|
||||
#: original file is the source of an element.
|
||||
TRACKS_LINE_NUMBERS: bool = True
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parser_args: Optional[Iterable[Any]] = None,
|
||||
parser_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Constructor.
|
||||
|
||||
:param parser_args: Positional arguments to pass into
|
||||
the BeautifulSoupHTMLParser constructor, once it's
|
||||
invoked.
|
||||
:param parser_kwargs: Keyword arguments to pass into
|
||||
the BeautifulSoupHTMLParser constructor, once it's
|
||||
invoked.
|
||||
:param kwargs: Keyword arguments for the superclass constructor.
|
||||
"""
|
||||
# Some keyword arguments will be pulled out of kwargs and placed
|
||||
# into parser_kwargs.
|
||||
extra_parser_kwargs = dict()
|
||||
for arg in ("on_duplicate_attribute",):
|
||||
if arg in kwargs:
|
||||
value = kwargs.pop(arg)
|
||||
extra_parser_kwargs[arg] = value
|
||||
super(HTMLParserTreeBuilder, self).__init__(**kwargs)
|
||||
parser_args = parser_args or []
|
||||
parser_kwargs = parser_kwargs or {}
|
||||
parser_kwargs.update(extra_parser_kwargs)
|
||||
parser_kwargs["convert_charrefs"] = False
|
||||
self.parser_args = (parser_args, parser_kwargs)
|
||||
|
||||
def prepare_markup(
|
||||
self,
|
||||
markup: _RawMarkup,
|
||||
user_specified_encoding: Optional[_Encoding] = None,
|
||||
document_declared_encoding: Optional[_Encoding] = None,
|
||||
exclude_encodings: Optional[_Encodings] = None,
|
||||
) -> Iterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]]:
|
||||
"""Run any preliminary steps necessary to make incoming markup
|
||||
acceptable to the parser.
|
||||
|
||||
:param markup: Some markup -- probably a bytestring.
|
||||
:param user_specified_encoding: The user asked to try this encoding.
|
||||
:param document_declared_encoding: The markup itself claims to be
|
||||
in this encoding.
|
||||
:param exclude_encodings: The user asked _not_ to try any of
|
||||
these encodings.
|
||||
|
||||
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
This TreeBuilder uses Unicode, Dammit to convert the markup
|
||||
into Unicode, so the ``markup`` element of the tuple will
|
||||
always be a string.
|
||||
"""
|
||||
if isinstance(markup, str):
|
||||
# Parse Unicode as-is.
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
# Ask UnicodeDammit to sniff the most likely encoding.
|
||||
|
||||
known_definite_encodings: List[_Encoding] = []
|
||||
if user_specified_encoding:
|
||||
# This was provided by the end-user; treat it as a known
|
||||
# definite encoding per the algorithm laid out in the
|
||||
# HTML5 spec. (See the EncodingDetector class for
|
||||
# details.)
|
||||
known_definite_encodings.append(user_specified_encoding)
|
||||
|
||||
user_encodings: List[_Encoding] = []
|
||||
if document_declared_encoding:
|
||||
# This was found in the document; treat it as a slightly
|
||||
# lower-priority user encoding.
|
||||
user_encodings.append(document_declared_encoding)
|
||||
|
||||
dammit = UnicodeDammit(
|
||||
markup,
|
||||
known_definite_encodings=known_definite_encodings,
|
||||
user_encodings=user_encodings,
|
||||
is_html=True,
|
||||
exclude_encodings=exclude_encodings,
|
||||
)
|
||||
|
||||
if dammit.unicode_markup is None:
|
||||
# In every case I've seen, Unicode, Dammit is able to
|
||||
# convert the markup into Unicode, even if it needs to use
|
||||
# REPLACEMENT CHARACTER. But there is a code path that
|
||||
# could result in unicode_markup being None, and
|
||||
# HTMLParser can only parse Unicode, so here we handle
|
||||
# that code path.
|
||||
raise ParserRejectedMarkup(
|
||||
"Could not convert input to Unicode, and html.parser will not accept bytestrings."
|
||||
)
|
||||
else:
|
||||
yield (
|
||||
dammit.unicode_markup,
|
||||
dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters,
|
||||
)
|
||||
|
||||
def feed(self, markup: _RawMarkup, _parser_class:type[BeautifulSoupHTMLParser] =BeautifulSoupHTMLParser) -> None:
|
||||
"""
|
||||
:param markup: The markup to feed into the parser.
|
||||
:param _parser_class: An HTMLParser subclass to use. This is only intended for use in unit tests.
|
||||
"""
|
||||
args, kwargs = self.parser_args
|
||||
|
||||
# HTMLParser.feed will only handle str, but
|
||||
# BeautifulSoup.markup is allowed to be _RawMarkup, because
|
||||
# it's set by the yield value of
|
||||
# TreeBuilder.prepare_markup. Fortunately,
|
||||
# HTMLParserTreeBuilder.prepare_markup always yields a str
|
||||
# (UnicodeDammit.unicode_markup).
|
||||
assert isinstance(markup, str)
|
||||
|
||||
# We know BeautifulSoup calls TreeBuilder.initialize_soup
|
||||
# before calling feed(), so we can assume self.soup
|
||||
# is set.
|
||||
assert self.soup is not None
|
||||
parser = _parser_class(self.soup, *args, **kwargs)
|
||||
|
||||
try:
|
||||
parser.feed(markup)
|
||||
parser.close()
|
||||
except AssertionError as e:
|
||||
# html.parser raises AssertionError in rare cases to
|
||||
# indicate a fatal problem with the markup, especially
|
||||
# when there's an error in the doctype declaration.
|
||||
raise ParserRejectedMarkup(e)
|
||||
parser.already_closed_empty_element = []
|
||||
|
|
@ -0,0 +1,501 @@
|
|||
# encoding: utf-8
|
||||
from __future__ import annotations
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
"LXMLTreeBuilderForXML",
|
||||
"LXMLTreeBuilder",
|
||||
]
|
||||
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Type,
|
||||
TYPE_CHECKING,
|
||||
Union,
|
||||
)
|
||||
|
||||
from io import BytesIO
|
||||
from io import StringIO
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from lxml import etree # type:ignore
|
||||
from bs4.element import (
|
||||
AttributeDict,
|
||||
XMLAttributeDict,
|
||||
Comment,
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
DetectsXMLParsedAsHTML,
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
PERMISSIVE,
|
||||
TreeBuilder,
|
||||
XML,
|
||||
)
|
||||
from bs4.dammit import EncodingDetector
|
||||
from bs4.exceptions import ParserRejectedMarkup
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4._typing import (
|
||||
_Encoding,
|
||||
_Encodings,
|
||||
_NamespacePrefix,
|
||||
_NamespaceURL,
|
||||
_NamespaceMapping,
|
||||
_InvertedNamespaceMapping,
|
||||
_RawMarkup,
|
||||
)
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
LXML: str = "lxml"
|
||||
|
||||
|
||||
def _invert(d: dict[Any, Any]) -> dict[Any, Any]:
|
||||
"Invert a dictionary."
|
||||
return dict((v, k) for k, v in list(d.items()))
|
||||
|
||||
|
||||
_LXMLParser: TypeAlias = Union[etree.XMLParser, etree.HTMLParser]
|
||||
_ParserOrParserClass: TypeAlias = Union[
|
||||
_LXMLParser, Type[etree.XMLParser], Type[etree.HTMLParser]
|
||||
]
|
||||
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS: Type[etree.XMLParser] = etree.XMLParser
|
||||
|
||||
is_xml: bool = True
|
||||
|
||||
#: Set this to true (probably by passing huge_tree=True into the :
|
||||
#: BeautifulSoup constructor) to enable the lxml feature "disable security
|
||||
#: restrictions and support very deep trees and very long text
|
||||
#: content".
|
||||
huge_tree: bool
|
||||
|
||||
processing_instruction_class: Type[ProcessingInstruction]
|
||||
|
||||
NAME: str = "lxml-xml"
|
||||
ALTERNATE_NAMES: Iterable[str] = ["xml"]
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features: Iterable[str] = [NAME, LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE: int = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS: _NamespaceMapping = dict(xml="http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
DEFAULT_NSMAPS_INVERTED: _InvertedNamespaceMapping = _invert(DEFAULT_NSMAPS)
|
||||
|
||||
nsmaps: List[Optional[_InvertedNamespaceMapping]]
|
||||
empty_element_tags: Optional[Set[str]]
|
||||
parser: Any
|
||||
_default_parser: Optional[etree.XMLParser]
|
||||
|
||||
# NOTE: If we parsed Element objects and looked at .sourceline,
|
||||
# we'd be able to see the line numbers from the original document.
|
||||
# But instead we build an XMLParser or HTMLParser object to serve
|
||||
# as the target of parse messages, and those messages don't include
|
||||
# line numbers.
|
||||
# See: https://bugs.launchpad.net/lxml/+bug/1846906
|
||||
|
||||
def initialize_soup(self, soup: BeautifulSoup) -> None:
|
||||
"""Let the BeautifulSoup object know about the standard namespace
|
||||
mapping.
|
||||
|
||||
:param soup: A `BeautifulSoup`.
|
||||
"""
|
||||
# Beyond this point, self.soup is set, so we can assume (and
|
||||
# assert) it's not None whenever necessary.
|
||||
super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
|
||||
self._register_namespaces(self.DEFAULT_NSMAPS)
|
||||
|
||||
def _register_namespaces(self, mapping: Dict[str, str]) -> None:
|
||||
"""Let the BeautifulSoup object know about namespaces encountered
|
||||
while parsing the document.
|
||||
|
||||
This might be useful later on when creating CSS selectors.
|
||||
|
||||
This will track (almost) all namespaces, even ones that were
|
||||
only in scope for part of the document. If two namespaces have
|
||||
the same prefix, only the first one encountered will be
|
||||
tracked. Un-prefixed namespaces are not tracked.
|
||||
|
||||
:param mapping: A dictionary mapping namespace prefixes to URIs.
|
||||
"""
|
||||
assert self.soup is not None
|
||||
for key, value in list(mapping.items()):
|
||||
# This is 'if key' and not 'if key is not None' because we
|
||||
# don't track un-prefixed namespaces. Soupselect will
|
||||
# treat an un-prefixed namespace as the default, which
|
||||
# causes confusion in some cases.
|
||||
if key and key not in self.soup._namespaces:
|
||||
# Let the BeautifulSoup object know about a new namespace.
|
||||
# If there are multiple namespaces defined with the same
|
||||
# prefix, the first one in the document takes precedence.
|
||||
self.soup._namespaces[key] = value
|
||||
|
||||
def default_parser(self, encoding: Optional[_Encoding]) -> _ParserOrParserClass:
|
||||
"""Find the default parser for the given encoding.
|
||||
|
||||
:return: Either a parser object or a class, which
|
||||
will be instantiated with default arguments.
|
||||
"""
|
||||
if self._default_parser is not None:
|
||||
return self._default_parser
|
||||
return self.DEFAULT_PARSER_CLASS(target=self, recover=True, huge_tree=self.huge_tree, encoding=encoding)
|
||||
|
||||
def parser_for(self, encoding: Optional[_Encoding]) -> _LXMLParser:
|
||||
"""Instantiate an appropriate parser for the given encoding.
|
||||
|
||||
:param encoding: A string.
|
||||
:return: A parser object such as an `etree.XMLParser`.
|
||||
"""
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if callable(parser):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, recover=True, huge_tree=self.huge_tree, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parser: Optional[etree.XMLParser] = None,
|
||||
empty_element_tags: Optional[Set[str]] = None,
|
||||
huge_tree: bool = False,
|
||||
**kwargs: Any,
|
||||
):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
self._default_parser = parser
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
|
||||
self.active_namespace_prefixes = [dict(self.DEFAULT_NSMAPS)]
|
||||
if self.is_xml:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
|
||||
if "attribute_dict_class" not in kwargs:
|
||||
kwargs["attribute_dict_class"] = XMLAttributeDict
|
||||
self.huge_tree = huge_tree
|
||||
|
||||
super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
|
||||
|
||||
def _getNsTag(self, tag: str) -> Tuple[Optional[str], str]:
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
if tag[0] == "{" and "}" in tag:
|
||||
namespace, name = tag[1:].split("}", 1)
|
||||
return (namespace, name)
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(
|
||||
self,
|
||||
markup: _RawMarkup,
|
||||
user_specified_encoding: Optional[_Encoding] = None,
|
||||
document_declared_encoding: Optional[_Encoding] = None,
|
||||
exclude_encodings: Optional[_Encodings] = None,
|
||||
) -> Iterable[
|
||||
Tuple[Union[str, bytes], Optional[_Encoding], Optional[_Encoding], bool]
|
||||
]:
|
||||
"""Run any preliminary steps necessary to make incoming markup
|
||||
acceptable to the parser.
|
||||
|
||||
lxml really wants to get a bytestring and convert it to
|
||||
Unicode itself. So instead of using UnicodeDammit to convert
|
||||
the bytestring to Unicode using different encodings, this
|
||||
implementation uses EncodingDetector to iterate over the
|
||||
encodings, and tell lxml to try to parse the document as each
|
||||
one in turn.
|
||||
|
||||
:param markup: Some markup -- hopefully a bytestring.
|
||||
:param user_specified_encoding: The user asked to try this encoding.
|
||||
:param document_declared_encoding: The markup itself claims to be
|
||||
in this encoding.
|
||||
:param exclude_encodings: The user asked _not_ to try any of
|
||||
these encodings.
|
||||
|
||||
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for converting the
|
||||
document to Unicode and parsing it. Each strategy will be tried
|
||||
in turn.
|
||||
"""
|
||||
if not self.is_xml:
|
||||
# We're in HTML mode, so if we're given XML, that's worth
|
||||
# noting.
|
||||
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup, stacklevel=3)
|
||||
|
||||
if isinstance(markup, str):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
|
||||
# TODO: This is a workaround for
|
||||
# https://bugs.launchpad.net/lxml/+bug/1948551.
|
||||
# We can remove it once the upstream issue is fixed.
|
||||
if len(markup) > 0 and markup[0] == "\N{BYTE ORDER MARK}":
|
||||
markup = markup[1:]
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, str):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8", document_declared_encoding, False)
|
||||
|
||||
# Since the document was Unicode in the first place, there
|
||||
# is no need to try any more strategies; we know this will
|
||||
# work.
|
||||
return
|
||||
|
||||
known_definite_encodings: List[_Encoding] = []
|
||||
if user_specified_encoding:
|
||||
# This was provided by the end-user; treat it as a known
|
||||
# definite encoding per the algorithm laid out in the
|
||||
# HTML5 spec. (See the EncodingDetector class for
|
||||
# details.)
|
||||
known_definite_encodings.append(user_specified_encoding)
|
||||
|
||||
user_encodings: List[_Encoding] = []
|
||||
if document_declared_encoding:
|
||||
# This was found in the document; treat it as a slightly
|
||||
# lower-priority user encoding.
|
||||
user_encodings.append(document_declared_encoding)
|
||||
|
||||
detector = EncodingDetector(
|
||||
markup,
|
||||
known_definite_encodings=known_definite_encodings,
|
||||
user_encodings=user_encodings,
|
||||
is_html=not self.is_xml,
|
||||
exclude_encodings=exclude_encodings,
|
||||
)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup: _RawMarkup) -> None:
|
||||
io: Union[BytesIO, StringIO]
|
||||
if isinstance(markup, bytes):
|
||||
io = BytesIO(markup)
|
||||
elif isinstance(markup, str):
|
||||
io = StringIO(markup)
|
||||
|
||||
# initialize_soup is called before feed, so we know this
|
||||
# is not None.
|
||||
assert self.soup is not None
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
# or the parser won't be initialized.
|
||||
data = io.read(self.CHUNK_SIZE)
|
||||
try:
|
||||
self.parser = self.parser_for(self.soup.original_encoding)
|
||||
self.parser.feed(data)
|
||||
while len(data) != 0:
|
||||
# Now call feed() on the rest of the data, chunk by chunk.
|
||||
data = io.read(self.CHUNK_SIZE)
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
|
||||
raise ParserRejectedMarkup(e)
|
||||
|
||||
def close(self) -> None:
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
|
||||
|
||||
def start(
|
||||
self,
|
||||
tag: str | bytes,
|
||||
attrib: Dict[str | bytes, str | bytes],
|
||||
nsmap: _NamespaceMapping = {},
|
||||
) -> None:
|
||||
# This is called by lxml code as a result of calling
|
||||
# BeautifulSoup.feed(), and we know self.soup is set by the time feed()
|
||||
# is called.
|
||||
assert self.soup is not None
|
||||
assert isinstance(tag, str)
|
||||
|
||||
# We need to recreate the attribute dict for three
|
||||
# reasons. First, for type checking, so we can assert there
|
||||
# are no bytestrings in the keys or values. Second, because we
|
||||
# need a mutable dict--lxml might send us an immutable
|
||||
# dictproxy. Third, so we can handle namespaced attribute
|
||||
# names by converting the keys to NamespacedAttributes.
|
||||
new_attrib: Dict[Union[str, NamespacedAttribute], str] = (
|
||||
self.attribute_dict_class()
|
||||
)
|
||||
for k, v in attrib.items():
|
||||
assert isinstance(k, str)
|
||||
assert isinstance(v, str)
|
||||
new_attrib[k] = v
|
||||
|
||||
nsprefix: Optional[_NamespacePrefix] = None
|
||||
namespace: Optional[_NamespaceURL] = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(nsmap) == 0 and len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
|
||||
# First, Let the BeautifulSoup object know about it.
|
||||
self._register_namespaces(nsmap)
|
||||
|
||||
# Then, add it to our running list of inverted namespace
|
||||
# mappings.
|
||||
self.nsmaps.append(_invert(nsmap))
|
||||
|
||||
# The currently active namespace prefixes have
|
||||
# changed. Calculate the new mapping so it can be stored
|
||||
# with all Tag objects created while these prefixes are in
|
||||
# scope.
|
||||
current_mapping = dict(self.active_namespace_prefixes[-1])
|
||||
current_mapping.update(nsmap)
|
||||
|
||||
# We should not track un-prefixed namespaces as we can only hold one
|
||||
# and it will be recognized as the default namespace by soupsieve,
|
||||
# which may be confusing in some situations.
|
||||
if "" in current_mapping:
|
||||
del current_mapping[""]
|
||||
self.active_namespace_prefixes.append(current_mapping)
|
||||
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
for prefix, namespace in list(nsmap.items()):
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/"
|
||||
)
|
||||
new_attrib[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
final_attrib: AttributeDict = self.attribute_dict_class()
|
||||
for attr, value in list(new_attrib.items()):
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
final_attrib[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
final_attrib[attr] = value
|
||||
|
||||
namespace, tag = self._getNsTag(tag)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
self.soup.handle_starttag(
|
||||
tag,
|
||||
namespace,
|
||||
nsprefix,
|
||||
final_attrib,
|
||||
namespaces=self.active_namespace_prefixes[-1],
|
||||
)
|
||||
|
||||
def _prefix_for_namespace(
|
||||
self, namespace: Optional[_NamespaceURL]
|
||||
) -> Optional[_NamespacePrefix]:
|
||||
"""Find the currently active prefix for the given namespace."""
|
||||
if namespace is None:
|
||||
return None
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, tag: str | bytes) -> None:
|
||||
assert self.soup is not None
|
||||
assert isinstance(tag, str)
|
||||
self.soup.endData()
|
||||
namespace, tag = self._getNsTag(tag)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(tag, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
out_of_scope_nsmap = self.nsmaps.pop()
|
||||
|
||||
if out_of_scope_nsmap is not None:
|
||||
# This tag introduced a namespace mapping which is no
|
||||
# longer in scope. Recalculate the currently active
|
||||
# namespace prefixes.
|
||||
self.active_namespace_prefixes.pop()
|
||||
|
||||
def pi(self, target: str, data: str) -> None:
|
||||
assert self.soup is not None
|
||||
self.soup.endData()
|
||||
data = target + " " + data
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, data: str | bytes) -> None:
|
||||
assert self.soup is not None
|
||||
assert isinstance(data, str)
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def doctype(self, name: str, pubid: str, system: str) -> None:
|
||||
assert self.soup is not None
|
||||
self.soup.endData()
|
||||
doctype_string = Doctype._string_for_name_and_ids(name, pubid, system)
|
||||
self.soup.handle_data(doctype_string)
|
||||
self.soup.endData(containerClass=Doctype)
|
||||
|
||||
def comment(self, text: str | bytes) -> None:
|
||||
"Handle comments as Comment objects."
|
||||
assert self.soup is not None
|
||||
assert isinstance(text, str)
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(text)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def test_fragment_to_document(self, fragment: str) -> str:
|
||||
"""See `TreeBuilder`."""
|
||||
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
NAME: str = LXML
|
||||
ALTERNATE_NAMES: Iterable[str] = ["lxml-html"]
|
||||
|
||||
features: Iterable[str] = list(ALTERNATE_NAMES) + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml: bool = False
|
||||
|
||||
def default_parser(self, encoding: Optional[_Encoding]) -> _ParserOrParserClass:
|
||||
return etree.HTMLParser
|
||||
|
||||
def feed(self, markup: _RawMarkup) -> None:
|
||||
# We know self.soup is set by the time feed() is called.
|
||||
assert self.soup is not None
|
||||
encoding = self.soup.original_encoding
|
||||
try:
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
|
||||
raise ParserRejectedMarkup(e)
|
||||
|
||||
def test_fragment_to_document(self, fragment: str) -> str:
|
||||
"""See `TreeBuilder`."""
|
||||
return "<html><body>%s</body></html>" % fragment
|
||||
339
.cache/python-test-venv/lib/python3.12/site-packages/bs4/css.py
Normal file
339
.cache/python-test-venv/lib/python3.12/site-packages/bs4/css.py
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
"""Integration code for CSS selectors using `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_ (pypi: ``soupsieve``).
|
||||
|
||||
Acquire a `CSS` object through the `element.Tag.css` attribute of
|
||||
the starting point of your CSS selector, or (if you want to run a
|
||||
selector against the entire document) of the `BeautifulSoup` object
|
||||
itself.
|
||||
|
||||
The main advantage of doing this instead of using ``soupsieve``
|
||||
functions is that you don't need to keep passing the `element.Tag` to be
|
||||
selected against, since the `CSS` object is permanently scoped to that
|
||||
`element.Tag`.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
Any,
|
||||
cast,
|
||||
Iterable,
|
||||
Iterator,
|
||||
MutableSequence,
|
||||
Optional,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
import warnings
|
||||
from bs4._typing import _NamespaceMapping
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from soupsieve import SoupSieve
|
||||
from bs4 import element
|
||||
from bs4.element import ResultSet, Tag
|
||||
|
||||
soupsieve: Optional[ModuleType]
|
||||
try:
|
||||
import soupsieve
|
||||
except ImportError:
|
||||
soupsieve = None
|
||||
warnings.warn(
|
||||
"The soupsieve package is not installed. CSS selectors cannot be used."
|
||||
)
|
||||
|
||||
|
||||
class CSS(object):
|
||||
"""A proxy object against the ``soupsieve`` library, to simplify its
|
||||
CSS selector API.
|
||||
|
||||
You don't need to instantiate this class yourself; instead, use
|
||||
`element.Tag.css`.
|
||||
|
||||
:param tag: All CSS selectors run by this object will use this as
|
||||
their starting point.
|
||||
|
||||
:param api: An optional drop-in replacement for the ``soupsieve`` module,
|
||||
intended for use in unit tests.
|
||||
"""
|
||||
|
||||
def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None):
|
||||
if api is None:
|
||||
api = soupsieve
|
||||
if api is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot execute CSS selectors because the soupsieve package is not installed."
|
||||
)
|
||||
self.api = api
|
||||
self.tag = tag
|
||||
|
||||
def escape(self, ident: str) -> str:
|
||||
"""Escape a CSS identifier.
|
||||
|
||||
This is a simple wrapper around `soupsieve.escape() <https://facelessuser.github.io/soupsieve/api/#soupsieveescape>`_. See the
|
||||
documentation for that function for more information.
|
||||
"""
|
||||
if soupsieve is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot escape CSS identifiers because the soupsieve package is not installed."
|
||||
)
|
||||
return cast(str, self.api.escape(ident))
|
||||
|
||||
def _ns(
|
||||
self, ns: Optional[_NamespaceMapping], select: str
|
||||
) -> Optional[_NamespaceMapping]:
|
||||
"""Normalize a dictionary of namespaces."""
|
||||
if not isinstance(select, self.api.SoupSieve) and ns is None:
|
||||
# If the selector is a precompiled pattern, it already has
|
||||
# a namespace context compiled in, which cannot be
|
||||
# replaced.
|
||||
ns = self.tag._namespaces
|
||||
return ns
|
||||
|
||||
def _rs(self, results: MutableSequence[Tag]) -> ResultSet[Tag]:
|
||||
"""Normalize a list of results to a py:class:`ResultSet`.
|
||||
|
||||
A py:class:`ResultSet` is more consistent with the rest of
|
||||
Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has
|
||||
a helpful error message if you try to treat a list of results
|
||||
as a single result (a common mistake).
|
||||
"""
|
||||
# Import here to avoid circular import
|
||||
from bs4 import ResultSet
|
||||
|
||||
return ResultSet(None, results)
|
||||
|
||||
def compile(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> SoupSieve:
|
||||
"""Pre-compile a selector and return the compiled object.
|
||||
|
||||
:param selector: A CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
|
||||
|
||||
:return: A precompiled selector object.
|
||||
:rtype: soupsieve.SoupSieve
|
||||
"""
|
||||
return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs)
|
||||
|
||||
def select_one(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> element.Tag | None:
|
||||
"""Perform a CSS selection operation on the current Tag and return the
|
||||
first result, if any.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
|
||||
|
||||
:param selector: A CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
|
||||
"""
|
||||
return self.api.select_one(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def select(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> ResultSet[element.Tag]:
|
||||
"""Perform a CSS selection operation on the current `element.Tag`.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
|
||||
|
||||
:param selector: A CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
|
||||
"""
|
||||
if limit is None:
|
||||
limit = 0
|
||||
|
||||
return self._rs(
|
||||
self.api.select(
|
||||
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
|
||||
)
|
||||
)
|
||||
|
||||
def iselect(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[element.Tag]:
|
||||
"""Perform a CSS selection operation on the current `element.Tag`.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the `soupsieve.iselect()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_
|
||||
method. It is the same as select(), but it returns a generator
|
||||
instead of a list.
|
||||
|
||||
:param selector: A string containing a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
|
||||
"""
|
||||
return self.api.iselect(
|
||||
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
|
||||
)
|
||||
|
||||
def closest(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> Optional[element.Tag]:
|
||||
"""Find the `element.Tag` closest to this one that matches the given selector.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the `soupsieve.closest()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_
|
||||
method.
|
||||
|
||||
:param selector: A string containing a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
|
||||
|
||||
"""
|
||||
return self.api.closest(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def match(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> bool:
|
||||
"""Check whether or not this `element.Tag` matches the given CSS selector.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the `soupsieve.match()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
|
||||
method.
|
||||
|
||||
:param: a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.match()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
|
||||
method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
`soupsieve.match()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
|
||||
method.
|
||||
"""
|
||||
return cast(
|
||||
bool,
|
||||
self.api.match(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
),
|
||||
)
|
||||
|
||||
def filter(
|
||||
self,
|
||||
select: str,
|
||||
namespaces: Optional[_NamespaceMapping] = None,
|
||||
flags: int = 0,
|
||||
**kwargs: Any,
|
||||
) -> ResultSet[element.Tag]:
|
||||
"""Filter this `element.Tag`'s direct children based on the given CSS selector.
|
||||
|
||||
This uses the Soup Sieve library. It works the same way as
|
||||
passing a `element.Tag` into that library's `soupsieve.filter()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
|
||||
method. For more information, see the documentation for
|
||||
`soupsieve.filter()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
`soupsieve.filter()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
|
||||
method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
`soupsieve.filter()
|
||||
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
|
||||
method.
|
||||
"""
|
||||
return self._rs(
|
||||
self.api.filter(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
)
|
||||
1516
.cache/python-test-venv/lib/python3.12/site-packages/bs4/dammit.py
Normal file
1516
.cache/python-test-venv/lib/python3.12/site-packages/bs4/dammit.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,268 @@
|
|||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
from io import BytesIO
|
||||
from html.parser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
from typing import (
|
||||
Any,
|
||||
IO,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4._typing import _IncomingMarkup
|
||||
|
||||
import pstats
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import sys
|
||||
|
||||
|
||||
def diagnose(data: "_IncomingMarkup") -> None:
|
||||
"""Diagnostic suite for isolating common problems.
|
||||
|
||||
:param data: Some markup that needs to be explained.
|
||||
:return: None; diagnostics are printed to standard output.
|
||||
"""
|
||||
print(("Diagnostic running on Beautiful Soup %s" % __version__))
|
||||
print(("Python version %s" % sys.version))
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
for builder in builder_registry.builders:
|
||||
if name in builder.features:
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print(
|
||||
("I noticed that %s is not installed. Installing it may help." % name)
|
||||
)
|
||||
|
||||
if "lxml" in basic_parsers:
|
||||
basic_parsers.append("lxml-xml")
|
||||
try:
|
||||
from lxml import etree # type:ignore
|
||||
|
||||
print(("Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))))
|
||||
except ImportError:
|
||||
print("lxml is not installed or couldn't be imported.")
|
||||
|
||||
if "html5lib" in basic_parsers:
|
||||
try:
|
||||
import html5lib
|
||||
|
||||
print(("Found html5lib version %s" % html5lib.__version__))
|
||||
except ImportError:
|
||||
print("html5lib is not installed or couldn't be imported.")
|
||||
|
||||
if hasattr(data, "read"):
|
||||
data = data.read()
|
||||
|
||||
for parser in basic_parsers:
|
||||
print(("Trying to parse your markup with %s" % parser))
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, features=parser)
|
||||
success = True
|
||||
except Exception:
|
||||
print(("%s could not parse the markup." % parser))
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print(("Here's what %s did with the markup:" % parser))
|
||||
print((soup.prettify()))
|
||||
|
||||
print(("-" * 80))
|
||||
|
||||
|
||||
def lxml_trace(data: "_IncomingMarkup", html: bool = True, **kwargs: Any) -> None:
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
|
||||
This lets you see how lxml parses a document when no Beautiful
|
||||
Soup code is running. You can use this to determine whether
|
||||
an lxml-specific problem is in Beautiful Soup's lxml tree builders
|
||||
or in lxml itself.
|
||||
|
||||
:param data: Some markup.
|
||||
:param html: If True, markup will be parsed with lxml's HTML parser.
|
||||
if False, lxml's XML parser will be used.
|
||||
"""
|
||||
from lxml import etree
|
||||
|
||||
recover = kwargs.pop("recover", True)
|
||||
if isinstance(data, str):
|
||||
data = data.encode("utf8")
|
||||
if not isinstance(data, IO):
|
||||
reader = BytesIO(data)
|
||||
for event, element in etree.iterparse(reader, html=html, recover=recover, **kwargs):
|
||||
print(("%s, %4s, %s" % (event, element.tag, element.text)))
|
||||
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Subclass of HTMLParser that announces parse events, without doing
|
||||
anything else.
|
||||
|
||||
You can use this to get a picture of how html.parser sees a given
|
||||
document. The easiest way to do this is to call `htmlparser_trace`.
|
||||
"""
|
||||
|
||||
def _p(self, s: str) -> None:
|
||||
print(s)
|
||||
|
||||
def handle_starttag(
|
||||
self,
|
||||
name: str,
|
||||
attrs: List[Tuple[str, Optional[str]]],
|
||||
handle_empty_element: bool = True,
|
||||
) -> None:
|
||||
self._p(f"{name} {attrs} START")
|
||||
|
||||
def handle_endtag(self, name: str, check_already_closed: bool = True) -> None:
|
||||
self._p("%s END" % name)
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
self._p("%s DATA" % data)
|
||||
|
||||
def handle_charref(self, name: str) -> None:
|
||||
self._p("%s CHARREF" % name)
|
||||
|
||||
def handle_entityref(self, name: str) -> None:
|
||||
self._p("%s ENTITYREF" % name)
|
||||
|
||||
def handle_comment(self, data: str) -> None:
|
||||
self._p("%s COMMENT" % data)
|
||||
|
||||
def handle_decl(self, data: str) -> None:
|
||||
self._p("%s DECL" % data)
|
||||
|
||||
def unknown_decl(self, data: str) -> None:
|
||||
self._p("%s UNKNOWN-DECL" % data)
|
||||
|
||||
def handle_pi(self, data: str) -> None:
|
||||
self._p("%s PI" % data)
|
||||
|
||||
|
||||
def htmlparser_trace(data: str) -> None:
|
||||
"""Print out the HTMLParser events that occur during parsing.
|
||||
|
||||
This lets you see how HTMLParser parses a document when no
|
||||
Beautiful Soup code is running.
|
||||
|
||||
:param data: Some markup.
|
||||
"""
|
||||
parser = AnnouncingParser()
|
||||
parser.feed(data)
|
||||
|
||||
|
||||
_vowels: str = "aeiou"
|
||||
_consonants: str = "bcdfghjklmnpqrstvwxyz"
|
||||
|
||||
|
||||
def rword(length: int = 5) -> str:
|
||||
"""Generate a random word-like string.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
s = ""
|
||||
for i in range(length):
|
||||
if i % 2 == 0:
|
||||
t = _consonants
|
||||
else:
|
||||
t = _vowels
|
||||
s += random.choice(t)
|
||||
return s
|
||||
|
||||
|
||||
def rsentence(length: int = 4) -> str:
|
||||
"""Generate a random sentence-like string.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return " ".join(rword(random.randint(4, 9)) for i in range(length))
|
||||
|
||||
|
||||
def rdoc(num_elements: int = 1000) -> str:
|
||||
"""Randomly generate an invalid HTML document.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
tag_names = ["p", "div", "span", "i", "b", "script", "table"]
|
||||
elements = []
|
||||
for i in range(num_elements):
|
||||
choice = random.randint(0, 3)
|
||||
if choice == 0:
|
||||
# New tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("<%s>" % tag_name)
|
||||
elif choice == 1:
|
||||
elements.append(rsentence(random.randint(1, 4)))
|
||||
elif choice == 2:
|
||||
# Close a tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("</%s>" % tag_name)
|
||||
return "<html>" + "\n".join(elements) + "</html>"
|
||||
|
||||
|
||||
def benchmark_parsers(num_elements: int = 100000) -> None:
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
|
||||
data = rdoc(num_elements)
|
||||
print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
|
||||
|
||||
for parser_name in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
BeautifulSoup(data, parser_name)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception:
|
||||
print(("%s could not parse the markup." % parser_name))
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print(("BS4+%s parsed the markup in %.2fs." % (parser_name, b - a)))
|
||||
|
||||
from lxml import etree
|
||||
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print(("Raw lxml parsed the markup in %.2fs." % (b - a)))
|
||||
|
||||
import html5lib
|
||||
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print(("Raw html5lib parsed the markup in %.2fs." % (b - a)))
|
||||
|
||||
|
||||
def profile(num_elements: int = 100000, parser: str = "lxml") -> None:
|
||||
"""Use Python's profiler on a randomly generated document."""
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
|
||||
data = rdoc(num_elements)
|
||||
vars = dict(bs4=bs4, data=data, parser=parser)
|
||||
cProfile.runctx("bs4.BeautifulSoup(data, parser)", vars, vars, filename)
|
||||
|
||||
stats = pstats.Stats(filename)
|
||||
# stats.strip_dirs()
|
||||
stats.sort_stats("cumulative")
|
||||
stats.print_stats("_html5lib|bs4", 50)
|
||||
|
||||
|
||||
# If this file is run as a script, standard input is diagnosed.
|
||||
if __name__ == "__main__":
|
||||
diagnose(sys.stdin.read())
|
||||
3211
.cache/python-test-venv/lib/python3.12/site-packages/bs4/element.py
Normal file
3211
.cache/python-test-venv/lib/python3.12/site-packages/bs4/element.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,28 @@
|
|||
"""Exceptions defined by Beautiful Soup itself."""
|
||||
|
||||
from typing import Union
|
||||
|
||||
|
||||
class StopParsing(Exception):
|
||||
"""Exception raised by a TreeBuilder if it's unable to continue parsing."""
|
||||
|
||||
|
||||
class FeatureNotFound(ValueError):
|
||||
"""Exception raised by the BeautifulSoup constructor if no parser with the
|
||||
requested features is found.
|
||||
"""
|
||||
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
"""An Exception to be raised when the underlying parser simply
|
||||
refuses to parse the given markup.
|
||||
"""
|
||||
|
||||
def __init__(self, message_or_exception: Union[str, Exception]):
|
||||
"""Explain why the parser rejected the given markup, either
|
||||
with a textual explanation or another exception.
|
||||
"""
|
||||
if isinstance(message_or_exception, Exception):
|
||||
e = message_or_exception
|
||||
message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
|
||||
super(ParserRejectedMarkup, self).__init__(message_or_exception)
|
||||
|
|
@ -0,0 +1,764 @@
|
|||
from __future__ import annotations
|
||||
from collections import defaultdict
|
||||
import re
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
cast,
|
||||
Dict,
|
||||
Iterator,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from bs4._deprecation import _deprecated
|
||||
from bs4.element import (
|
||||
AttributeDict,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ResultSet,
|
||||
Tag,
|
||||
)
|
||||
from bs4._typing import (
|
||||
_AtMostOneElement,
|
||||
_AttributeValue,
|
||||
_NullableStringMatchFunction,
|
||||
_OneElement,
|
||||
_PageElementMatchFunction,
|
||||
_QueryResults,
|
||||
_RawAttributeValues,
|
||||
_RegularExpressionProtocol,
|
||||
_StrainableAttribute,
|
||||
_StrainableElement,
|
||||
_StrainableString,
|
||||
_StringMatchFunction,
|
||||
_TagMatchFunction,
|
||||
)
|
||||
|
||||
|
||||
class ElementFilter(object):
|
||||
"""`ElementFilter` encapsulates the logic necessary to decide:
|
||||
|
||||
1. whether a `PageElement` (a `Tag` or a `NavigableString`) matches a
|
||||
user-specified query.
|
||||
|
||||
2. whether a given sequence of markup found during initial parsing
|
||||
should be turned into a `PageElement` at all, or simply discarded.
|
||||
|
||||
The base class is the simplest `ElementFilter`. By default, it
|
||||
matches everything and allows all markup to become `PageElement`
|
||||
objects. You can make it more selective by passing in a
|
||||
user-defined match function, or defining a subclass.
|
||||
|
||||
Most users of Beautiful Soup will never need to use
|
||||
`ElementFilter`, or its more capable subclass
|
||||
`SoupStrainer`. Instead, they will use methods like
|
||||
:py:meth:`Tag.find`, which will convert their arguments into
|
||||
`SoupStrainer` objects and run them against the tree.
|
||||
|
||||
However, if you find yourself wanting to treat the arguments to
|
||||
Beautiful Soup's find_*() methods as first-class objects, those
|
||||
objects will be `SoupStrainer` objects. You can create them
|
||||
yourself and then make use of functions like
|
||||
`ElementFilter.filter()`.
|
||||
"""
|
||||
|
||||
match_function: Optional[_PageElementMatchFunction]
|
||||
|
||||
def __init__(self, match_function: Optional[_PageElementMatchFunction] = None):
|
||||
"""Pass in a match function to easily customize the behavior of
|
||||
`ElementFilter.match` without needing to subclass.
|
||||
|
||||
:param match_function: A function that takes a `PageElement`
|
||||
and returns `True` if that `PageElement` matches some criteria.
|
||||
"""
|
||||
self.match_function = match_function
|
||||
|
||||
@property
|
||||
def includes_everything(self) -> bool:
|
||||
"""Does this `ElementFilter` obviously include everything? If so,
|
||||
the filter process can be made much faster.
|
||||
|
||||
The `ElementFilter` might turn out to include everything even
|
||||
if this returns `False`, but it won't include everything in an
|
||||
obvious way.
|
||||
|
||||
The base `ElementFilter` implementation includes things based on
|
||||
the match function, so includes_everything is only true if
|
||||
there is no match function.
|
||||
"""
|
||||
return not self.match_function
|
||||
|
||||
@property
|
||||
def excludes_everything(self) -> bool:
|
||||
"""Does this `ElementFilter` obviously exclude everything? If
|
||||
so, Beautiful Soup will issue a warning if you try to use it
|
||||
when parsing a document.
|
||||
|
||||
The `ElementFilter` might turn out to exclude everything even
|
||||
if this returns `False`, but it won't exclude everything in an
|
||||
obvious way.
|
||||
|
||||
The base `ElementFilter` implementation excludes things based
|
||||
on a match function we can't inspect, so excludes_everything
|
||||
is always false.
|
||||
"""
|
||||
return False
|
||||
|
||||
def match(self, element: PageElement, _known_rules:bool=False) -> bool:
|
||||
"""Does the given PageElement match the rules set down by this
|
||||
ElementFilter?
|
||||
|
||||
The base implementation delegates to the function passed in to
|
||||
the constructor.
|
||||
|
||||
:param _known_rules: Defined for compatibility with
|
||||
SoupStrainer._match(). Used more for consistency than because
|
||||
we need the performance optimization.
|
||||
"""
|
||||
if not _known_rules and self.includes_everything:
|
||||
return True
|
||||
if not self.match_function:
|
||||
return True
|
||||
return self.match_function(element)
|
||||
|
||||
def filter(self, generator: Iterator[PageElement]) -> Iterator[_OneElement]:
|
||||
"""The most generic search method offered by Beautiful Soup.
|
||||
|
||||
Acts like Python's built-in `filter`, using
|
||||
`ElementFilter.match` as the filtering function.
|
||||
"""
|
||||
# If there are no rules at all, don't bother filtering. Let
|
||||
# anything through.
|
||||
if self.includes_everything:
|
||||
yield from generator
|
||||
while True:
|
||||
try:
|
||||
i = next(generator)
|
||||
except StopIteration:
|
||||
break
|
||||
if i:
|
||||
if self.match(i, _known_rules=True):
|
||||
yield cast("_OneElement", i)
|
||||
|
||||
def find(self, generator: Iterator[PageElement]) -> _AtMostOneElement:
|
||||
"""A lower-level equivalent of :py:meth:`Tag.find`.
|
||||
|
||||
You can pass in your own generator for iterating over
|
||||
`PageElement` objects. The first one that matches this
|
||||
`ElementFilter` will be returned.
|
||||
|
||||
:param generator: A way of iterating over `PageElement`
|
||||
objects.
|
||||
"""
|
||||
for match in self.filter(generator):
|
||||
return match
|
||||
return None
|
||||
|
||||
def find_all(
|
||||
self, generator: Iterator[PageElement], limit: Optional[int] = None
|
||||
) -> _QueryResults:
|
||||
"""A lower-level equivalent of :py:meth:`Tag.find_all`.
|
||||
|
||||
You can pass in your own generator for iterating over
|
||||
`PageElement` objects. Only elements that match this
|
||||
`ElementFilter` will be returned in the :py:class:`ResultSet`.
|
||||
|
||||
:param generator: A way of iterating over `PageElement`
|
||||
objects.
|
||||
|
||||
:param limit: Stop looking after finding this many results.
|
||||
"""
|
||||
results = []
|
||||
for match in self.filter(generator):
|
||||
results.append(match)
|
||||
if limit is not None and len(results) >= limit:
|
||||
break
|
||||
return ResultSet(self, results)
|
||||
|
||||
def allow_tag_creation(
|
||||
self, nsprefix: Optional[str], name: str, attrs: Optional[_RawAttributeValues]
|
||||
) -> bool:
|
||||
"""Based on the name and attributes of a tag, see whether this
|
||||
`ElementFilter` will allow a `Tag` object to even be created.
|
||||
|
||||
By default, all tags are parsed. To change this, subclass
|
||||
`ElementFilter`.
|
||||
|
||||
:param name: The name of the prospective tag.
|
||||
:param attrs: The attributes of the prospective tag.
|
||||
"""
|
||||
return True
|
||||
|
||||
def allow_string_creation(self, string: str) -> bool:
|
||||
"""Based on the content of a string, see whether this
|
||||
`ElementFilter` will allow a `NavigableString` object based on
|
||||
this string to be added to the parse tree.
|
||||
|
||||
By default, all strings are processed into `NavigableString`
|
||||
objects. To change this, subclass `ElementFilter`.
|
||||
|
||||
:param str: The string under consideration.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
class MatchRule(object):
|
||||
"""Each MatchRule encapsulates the logic behind a single argument
|
||||
passed in to one of the Beautiful Soup find* methods.
|
||||
"""
|
||||
|
||||
string: Optional[str]
|
||||
pattern: Optional[_RegularExpressionProtocol]
|
||||
present: Optional[bool]
|
||||
exclude_everything: Optional[bool]
|
||||
# TODO-TYPING: All MatchRule objects also have an attribute
|
||||
# ``function``, but the type of the function depends on the
|
||||
# subclass.
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
string: Optional[Union[str, bytes]] = None,
|
||||
pattern: Optional[_RegularExpressionProtocol] = None,
|
||||
function: Optional[Callable] = None,
|
||||
present: Optional[bool] = None,
|
||||
exclude_everything: Optional[bool] = None
|
||||
):
|
||||
if isinstance(string, bytes):
|
||||
string = string.decode("utf8")
|
||||
self.string = string
|
||||
if isinstance(pattern, bytes):
|
||||
self.pattern = re.compile(pattern.decode("utf8"))
|
||||
elif isinstance(pattern, str):
|
||||
self.pattern = re.compile(pattern)
|
||||
else:
|
||||
self.pattern = pattern
|
||||
self.function = function
|
||||
self.present = present
|
||||
self.exclude_everything = exclude_everything
|
||||
|
||||
values = [
|
||||
x
|
||||
for x in (self.string, self.pattern, self.function, self.present, self.exclude_everything)
|
||||
if x is not None
|
||||
]
|
||||
if len(values) == 0:
|
||||
raise ValueError(
|
||||
"Either string, pattern, function, present, or exclude_everything must be provided."
|
||||
)
|
||||
if len(values) > 1:
|
||||
raise ValueError(
|
||||
"At most one of string, pattern, function, present, and exclude_everything must be provided."
|
||||
)
|
||||
|
||||
def _base_match(self, string: Optional[str]) -> Optional[bool]:
|
||||
"""Run the 'cheap' portion of a match, trying to get an answer without
|
||||
calling a potentially expensive custom function.
|
||||
|
||||
:return: True or False if we have a (positive or negative)
|
||||
match; None if we need to keep trying.
|
||||
"""
|
||||
# self.exclude_everything matches nothing.
|
||||
if self.exclude_everything:
|
||||
return False
|
||||
|
||||
# self.present==True matches everything except None.
|
||||
if self.present is True:
|
||||
return string is not None
|
||||
|
||||
# self.present==False matches _only_ None.
|
||||
if self.present is False:
|
||||
return string is None
|
||||
|
||||
# self.string does an exact string match.
|
||||
if self.string is not None:
|
||||
# print(f"{self.string} ?= {string}")
|
||||
return self.string == string
|
||||
|
||||
# self.pattern does a regular expression search.
|
||||
if self.pattern is not None:
|
||||
# print(f"{self.pattern} ?~ {string}")
|
||||
if string is None:
|
||||
return False
|
||||
return self.pattern.search(string) is not None
|
||||
|
||||
return None
|
||||
|
||||
def matches_string(self, string: Optional[str]) -> bool:
|
||||
_base_result = self._base_match(string)
|
||||
if _base_result is not None:
|
||||
# No need to invoke the test function.
|
||||
return _base_result
|
||||
if self.function is not None and not self.function(string):
|
||||
# print(f"{self.function}({string}) == False")
|
||||
return False
|
||||
return True
|
||||
|
||||
def __repr__(self) -> str:
|
||||
cls = type(self).__name__
|
||||
return f"<{cls} string={self.string} pattern={self.pattern} function={self.function} present={self.present}>"
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
return (
|
||||
isinstance(other, MatchRule)
|
||||
and self.string == other.string
|
||||
and self.pattern == other.pattern
|
||||
and self.function == other.function
|
||||
and self.present == other.present
|
||||
)
|
||||
|
||||
|
||||
class TagNameMatchRule(MatchRule):
|
||||
"""A MatchRule implementing the rules for matches against tag name."""
|
||||
|
||||
function: Optional[_TagMatchFunction]
|
||||
|
||||
def matches_tag(self, tag: Tag) -> bool:
|
||||
base_value = self._base_match(tag.name)
|
||||
if base_value is not None:
|
||||
return base_value
|
||||
|
||||
# The only remaining possibility is that the match is determined
|
||||
# by a function call. Call the function.
|
||||
function = cast(_TagMatchFunction, self.function)
|
||||
if function(tag):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class AttributeValueMatchRule(MatchRule):
|
||||
"""A MatchRule implementing the rules for matches against attribute value."""
|
||||
|
||||
function: Optional[_NullableStringMatchFunction]
|
||||
|
||||
|
||||
class StringMatchRule(MatchRule):
|
||||
"""A MatchRule implementing the rules for matches against a NavigableString."""
|
||||
|
||||
function: Optional[_StringMatchFunction]
|
||||
|
||||
|
||||
class SoupStrainer(ElementFilter):
|
||||
"""The `ElementFilter` subclass used internally by Beautiful Soup.
|
||||
|
||||
A `SoupStrainer` encapsulates the logic necessary to perform the
|
||||
kind of matches supported by methods such as
|
||||
:py:meth:`Tag.find`. `SoupStrainer` objects are primarily created
|
||||
internally, but you can create one yourself and pass it in as
|
||||
``parse_only`` to the `BeautifulSoup` constructor, to parse a
|
||||
subset of a large document.
|
||||
|
||||
Internally, `SoupStrainer` objects work by converting the
|
||||
constructor arguments into `MatchRule` objects. Incoming
|
||||
tags/markup are matched against those rules.
|
||||
|
||||
:param name: One or more restrictions on the tags found in a document.
|
||||
|
||||
:param attrs: A dictionary that maps attribute names to
|
||||
restrictions on tags that use those attributes.
|
||||
|
||||
:param string: One or more restrictions on the strings found in a
|
||||
document.
|
||||
|
||||
:param kwargs: A dictionary that maps attribute names to restrictions
|
||||
on tags that use those attributes. These restrictions are additive to
|
||||
any specified in ``attrs``.
|
||||
|
||||
"""
|
||||
|
||||
name_rules: List[TagNameMatchRule]
|
||||
attribute_rules: Dict[str, List[AttributeValueMatchRule]]
|
||||
string_rules: List[StringMatchRule]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: Optional[_StrainableElement] = None,
|
||||
attrs: Optional[Dict[str, _StrainableAttribute]] = None,
|
||||
string: Optional[_StrainableString] = None,
|
||||
**kwargs: _StrainableAttribute,
|
||||
):
|
||||
if string is None and "text" in kwargs:
|
||||
string = cast(Optional[_StrainableString], kwargs.pop("text"))
|
||||
warnings.warn(
|
||||
"As of version 4.11.0, the 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if name is None and not attrs and not string and not kwargs:
|
||||
# Special case for backwards compatibility. Instantiating
|
||||
# a SoupStrainer with no arguments whatsoever gets you one
|
||||
# that matches all Tags, and only Tags.
|
||||
self.name_rules = [TagNameMatchRule(present=True)]
|
||||
else:
|
||||
self.name_rules = cast(
|
||||
List[TagNameMatchRule], list(self._make_match_rules(name, TagNameMatchRule))
|
||||
)
|
||||
self.attribute_rules = defaultdict(list)
|
||||
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
if not isinstance(attrs, dict):
|
||||
# Passing something other than a dictionary as attrs is
|
||||
# sugar for matching that thing against the 'class'
|
||||
# attribute.
|
||||
attrs = {"class": attrs}
|
||||
|
||||
for attrdict in attrs, kwargs:
|
||||
for attr, value in attrdict.items():
|
||||
if attr == "class_" and attrdict is kwargs:
|
||||
# If you pass in 'class_' as part of kwargs, it's
|
||||
# because class is a Python reserved word. If you
|
||||
# pass it in as part of the attrs dict, it's
|
||||
# because you really are looking for an attribute
|
||||
# called 'class_'.
|
||||
attr = "class"
|
||||
|
||||
if value is None:
|
||||
value = False
|
||||
for rule_obj in self._make_match_rules(value, AttributeValueMatchRule):
|
||||
self.attribute_rules[attr].append(
|
||||
cast(AttributeValueMatchRule, rule_obj)
|
||||
)
|
||||
|
||||
self.string_rules = cast(
|
||||
List[StringMatchRule], list(self._make_match_rules(string, StringMatchRule))
|
||||
)
|
||||
|
||||
#: DEPRECATED 4.13.0: You shouldn't need to check this under
|
||||
#: any name (.string or .text), and if you do, you're probably
|
||||
#: not taking into account all of the types of values this
|
||||
#: variable might have. Look at the .string_rules list instead.
|
||||
self.__string = string
|
||||
|
||||
@property
|
||||
def includes_everything(self) -> bool:
|
||||
"""Check whether the provided rules will obviously include
|
||||
everything. (They might include everything even if this returns `False`,
|
||||
but not in an obvious way.)
|
||||
"""
|
||||
return not self.name_rules and not self.string_rules and not self.attribute_rules
|
||||
|
||||
@property
|
||||
def excludes_everything(self) -> bool:
|
||||
"""Check whether the provided rules will obviously exclude
|
||||
everything. (They might exclude everything even if this returns `False`,
|
||||
but not in an obvious way.)
|
||||
"""
|
||||
if (self.string_rules and (self.name_rules or self.attribute_rules)):
|
||||
# This is self-contradictory, so the rules exclude everything.
|
||||
return True
|
||||
|
||||
# If there's a rule that ended up treated as an "exclude everything"
|
||||
# rule due to creating a logical inconsistency, then the rules
|
||||
# exclude everything.
|
||||
if any(x.exclude_everything for x in self.string_rules):
|
||||
return True
|
||||
if any(x.exclude_everything for x in self.name_rules):
|
||||
return True
|
||||
for ruleset in self.attribute_rules.values():
|
||||
if any(x.exclude_everything for x in ruleset):
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def string(self) -> Optional[_StrainableString]:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
"Access to deprecated property string. (Look at .string_rules instead) -- Deprecated since version 4.13.0.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.__string
|
||||
|
||||
@property
|
||||
def text(self) -> Optional[_StrainableString]:
|
||||
":meta private:"
|
||||
warnings.warn(
|
||||
"Access to deprecated property text. (Look at .string_rules instead) -- Deprecated since version 4.13.0.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.__string
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} name={self.name_rules} attrs={self.attribute_rules} string={self.string_rules}>"
|
||||
|
||||
@classmethod
|
||||
def _make_match_rules(
|
||||
cls,
|
||||
obj: Optional[Union[_StrainableElement, _StrainableAttribute]],
|
||||
rule_class: Type[MatchRule],
|
||||
) -> Iterator[MatchRule]:
|
||||
"""Convert a vaguely-specific 'object' into one or more well-defined
|
||||
`MatchRule` objects.
|
||||
|
||||
:param obj: Some kind of object that corresponds to one or more
|
||||
matching rules.
|
||||
:param rule_class: Create instances of this `MatchRule` subclass.
|
||||
"""
|
||||
if obj is None:
|
||||
return
|
||||
if isinstance(obj, (str, bytes)):
|
||||
yield rule_class(string=obj)
|
||||
elif isinstance(obj, bool):
|
||||
yield rule_class(present=obj)
|
||||
elif callable(obj):
|
||||
yield rule_class(function=obj)
|
||||
elif isinstance(obj, _RegularExpressionProtocol):
|
||||
yield rule_class(pattern=obj)
|
||||
elif hasattr(obj, "__iter__"):
|
||||
if not obj:
|
||||
# The attribute is being matched against the null set,
|
||||
# which means it should exclude everything.
|
||||
yield rule_class(exclude_everything=True)
|
||||
for o in obj:
|
||||
if not isinstance(o, (bytes, str)) and hasattr(o, "__iter__"):
|
||||
# This is almost certainly the user's
|
||||
# mistake. This list contains another list, which
|
||||
# opens up the possibility of infinite
|
||||
# self-reference. In the interests of avoiding
|
||||
# infinite recursion, we'll treat this as an
|
||||
# impossible match and issue a rule that excludes
|
||||
# everything, rather than looking inside.
|
||||
warnings.warn(
|
||||
f"Ignoring nested list {o} to avoid the possibility of infinite recursion.",
|
||||
stacklevel=5,
|
||||
)
|
||||
yield rule_class(exclude_everything=True)
|
||||
continue
|
||||
for x in cls._make_match_rules(o, rule_class):
|
||||
yield x
|
||||
else:
|
||||
yield rule_class(string=str(obj))
|
||||
|
||||
def matches_tag(self, tag: Tag) -> bool:
|
||||
"""Do the rules of this `SoupStrainer` trigger a match against the
|
||||
given `Tag`?
|
||||
|
||||
If the `SoupStrainer` has any `TagNameMatchRule`, at least one
|
||||
must match the `Tag` or its `Tag.name`.
|
||||
|
||||
If there are any `AttributeValueMatchRule` for a given
|
||||
attribute, at least one of them must match the attribute
|
||||
value.
|
||||
|
||||
If there are any `StringMatchRule`, at least one must match,
|
||||
but a `SoupStrainer` that *only* contains `StringMatchRule`
|
||||
cannot match a `Tag`, only a `NavigableString`.
|
||||
"""
|
||||
# If there are no rules at all, let anything through.
|
||||
#if self.includes_everything:
|
||||
# return True
|
||||
|
||||
# String rules cannot not match a Tag on their own.
|
||||
if not self.name_rules and not self.attribute_rules:
|
||||
return False
|
||||
|
||||
# Optimization for a very common case where the user is
|
||||
# searching for a tag with one specific name, and we're
|
||||
# looking at a tag with a different name.
|
||||
if (
|
||||
not tag.prefix
|
||||
and len(self.name_rules) == 1
|
||||
and self.name_rules[0].string is not None
|
||||
and tag.name != self.name_rules[0].string
|
||||
):
|
||||
return False
|
||||
|
||||
# If there are name rules, at least one must match. It can
|
||||
# match either the Tag object itself or the prefixed name of
|
||||
# the tag.
|
||||
prefixed_name = None
|
||||
if tag.prefix:
|
||||
prefixed_name = f"{tag.prefix}:{tag.name}"
|
||||
if self.name_rules:
|
||||
name_matches = False
|
||||
for rule in self.name_rules:
|
||||
# attrs = " ".join(
|
||||
# [f"{k}={v}" for k, v in sorted(tag.attrs.items())]
|
||||
# )
|
||||
# print(f"Testing <{tag.name} {attrs}>{tag.string}</{tag.name}> against {rule}")
|
||||
|
||||
# If the rule contains a function, the function will be called
|
||||
# with `tag`. It will not be called a second time with
|
||||
# `prefixed_name`.
|
||||
if rule.matches_tag(tag) or (
|
||||
not rule.function and prefixed_name is not None and rule.matches_string(prefixed_name)
|
||||
):
|
||||
name_matches = True
|
||||
break
|
||||
|
||||
if not name_matches:
|
||||
return False
|
||||
|
||||
# If there are attribute rules for a given attribute, at least
|
||||
# one of them must match. If there are rules for multiple
|
||||
# attributes, each attribute must have at least one match.
|
||||
for attr, rules in self.attribute_rules.items():
|
||||
attr_value = tag.get(attr, None)
|
||||
this_attr_match = self._attribute_match(attr_value, rules)
|
||||
if not this_attr_match:
|
||||
return False
|
||||
|
||||
# If there are string rules, at least one must match.
|
||||
if self.string_rules:
|
||||
_str = tag.string
|
||||
if _str is None:
|
||||
return False
|
||||
if not self.matches_any_string_rule(_str):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _attribute_match(
|
||||
self,
|
||||
attr_value: Optional[_AttributeValue],
|
||||
rules: Iterable[AttributeValueMatchRule],
|
||||
) -> bool:
|
||||
attr_values: Sequence[Optional[str]]
|
||||
if isinstance(attr_value, list):
|
||||
attr_values = attr_value
|
||||
else:
|
||||
attr_values = [cast(str, attr_value)]
|
||||
|
||||
def _match_attribute_value_helper(attr_values: Sequence[Optional[str]]) -> bool:
|
||||
for rule in rules:
|
||||
for attr_value in attr_values:
|
||||
if rule.matches_string(attr_value):
|
||||
return True
|
||||
return False
|
||||
|
||||
this_attr_match = _match_attribute_value_helper(attr_values)
|
||||
if not this_attr_match and len(attr_values) != 1:
|
||||
# Try again but treat the attribute value as a single
|
||||
# string instead of a list. The result can only be
|
||||
# different if the list of values contains more or less
|
||||
# than one item.
|
||||
|
||||
# This cast converts Optional[str] to plain str.
|
||||
#
|
||||
# We know there can't be any None in the list. Beautiful
|
||||
# Soup never uses None as a value of a multi-valued
|
||||
# attribute, and if None is passed in as attr_value, it's
|
||||
# turned into a list with 1 element, which was excluded by
|
||||
# the if statement above.
|
||||
attr_values = cast(Sequence[str], attr_values)
|
||||
|
||||
joined_attr_value = " ".join(attr_values)
|
||||
this_attr_match = _match_attribute_value_helper([joined_attr_value])
|
||||
return this_attr_match
|
||||
|
||||
def allow_tag_creation(
|
||||
self, nsprefix: Optional[str], name: str, attrs: Optional[_RawAttributeValues]
|
||||
) -> bool:
|
||||
"""Based on the name and attributes of a tag, see whether this
|
||||
`SoupStrainer` will allow a `Tag` object to even be created.
|
||||
|
||||
:param name: The name of the prospective tag.
|
||||
:param attrs: The attributes of the prospective tag.
|
||||
"""
|
||||
if self.string_rules:
|
||||
# A SoupStrainer that has string rules can't be used to
|
||||
# manage tag creation, because the string rule can't be
|
||||
# evaluated until after the tag and all of its contents
|
||||
# have been parsed.
|
||||
return False
|
||||
prefixed_name = None
|
||||
if nsprefix:
|
||||
prefixed_name = f"{nsprefix}:{name}"
|
||||
if self.name_rules:
|
||||
# At least one name rule must match.
|
||||
name_match = False
|
||||
for rule in self.name_rules:
|
||||
for x in name, prefixed_name:
|
||||
if x is not None:
|
||||
if rule.matches_string(x):
|
||||
name_match = True
|
||||
break
|
||||
if not name_match:
|
||||
return False
|
||||
|
||||
# For each attribute that has rules, at least one rule must
|
||||
# match.
|
||||
if attrs is None:
|
||||
attrs = AttributeDict()
|
||||
for attr, rules in self.attribute_rules.items():
|
||||
attr_value = attrs.get(attr)
|
||||
if not self._attribute_match(attr_value, rules):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def allow_string_creation(self, string: str) -> bool:
|
||||
"""Based on the content of a markup string, see whether this
|
||||
`SoupStrainer` will allow it to be instantiated as a
|
||||
`NavigableString` object, or whether it should be ignored.
|
||||
"""
|
||||
if self.name_rules or self.attribute_rules:
|
||||
# A SoupStrainer that has name or attribute rules won't
|
||||
# match any strings; it's designed to match tags with
|
||||
# certain properties.
|
||||
return False
|
||||
if not self.string_rules:
|
||||
# A SoupStrainer with no string rules will match
|
||||
# all strings.
|
||||
return True
|
||||
if not self.matches_any_string_rule(string):
|
||||
return False
|
||||
return True
|
||||
|
||||
def matches_any_string_rule(self, string: str) -> bool:
|
||||
"""See whether the content of a string matches any of
|
||||
this `SoupStrainer`'s string rules.
|
||||
"""
|
||||
if not self.string_rules:
|
||||
return True
|
||||
for string_rule in self.string_rules:
|
||||
if string_rule.matches_string(string):
|
||||
return True
|
||||
return False
|
||||
|
||||
def match(self, element: PageElement, _known_rules: bool=False) -> bool:
|
||||
"""Does the given `PageElement` match the rules set down by this
|
||||
`SoupStrainer`?
|
||||
|
||||
The find_* methods rely heavily on this method to find matches.
|
||||
|
||||
:param element: A `PageElement`.
|
||||
:param _known_rules: Set to true in the common case where
|
||||
we already checked and found at least one rule in this SoupStrainer
|
||||
that might exclude a PageElement. Without this, we need
|
||||
to check .includes_everything every time, just to be safe.
|
||||
:return: `True` if the element matches this `SoupStrainer`'s rules; `False` otherwise.
|
||||
"""
|
||||
# If there are no rules at all, let anything through.
|
||||
if not _known_rules and self.includes_everything:
|
||||
return True
|
||||
if isinstance(element, Tag):
|
||||
return self.matches_tag(element)
|
||||
assert isinstance(element, NavigableString)
|
||||
if not (self.name_rules or self.attribute_rules):
|
||||
# A NavigableString can only match a SoupStrainer that
|
||||
# does not define any name or attribute rules.
|
||||
# Then it comes down to the string rules.
|
||||
return self.matches_any_string_rule(element)
|
||||
return False
|
||||
|
||||
@_deprecated("allow_tag_creation", "4.13.0")
|
||||
def search_tag(self, name: str, attrs: Optional[_RawAttributeValues]) -> bool:
|
||||
"""A less elegant version of `allow_tag_creation`. Deprecated as of 4.13.0"""
|
||||
":meta private:"
|
||||
return self.allow_tag_creation(None, name, attrs)
|
||||
|
||||
@_deprecated("match", "4.13.0")
|
||||
def search(self, element: PageElement) -> Optional[PageElement]:
|
||||
"""A less elegant version of match(). Deprecated as of 4.13.0.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return element if self.match(element) else None
|
||||
|
|
@ -0,0 +1,276 @@
|
|||
from __future__ import annotations
|
||||
from typing import Callable, Dict, Iterable, Optional, Set, Tuple, TYPE_CHECKING, Union
|
||||
from typing_extensions import TypeAlias
|
||||
from bs4.dammit import EntitySubstitution
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4._typing import _AttributeValue
|
||||
|
||||
|
||||
class Formatter(EntitySubstitution):
|
||||
"""Describes a strategy to use when outputting a parse tree to a string.
|
||||
|
||||
Some parts of this strategy come from the distinction between
|
||||
HTML4, HTML5, and XML. Others are configurable by the user.
|
||||
|
||||
Formatters are passed in as the `formatter` argument to methods
|
||||
like `bs4.element.Tag.encode`. Most people won't need to
|
||||
think about formatters, and most people who need to think about
|
||||
them can pass in one of these predefined strings as `formatter`
|
||||
rather than making a new Formatter object:
|
||||
|
||||
For HTML documents:
|
||||
* 'html' - HTML entity substitution for generic HTML documents. (default)
|
||||
* 'html5' - HTML entity substitution for HTML5 documents, as
|
||||
well as some optimizations in the way tags are rendered.
|
||||
* 'html5-4.12.0' - The version of the 'html5' formatter used prior to
|
||||
Beautiful Soup 4.13.0.
|
||||
* 'minimal' - Only make the substitutions necessary to guarantee
|
||||
valid HTML.
|
||||
* None - Do not perform any substitution. This will be faster
|
||||
but may result in invalid markup.
|
||||
|
||||
For XML documents:
|
||||
* 'html' - Entity substitution for XHTML documents.
|
||||
* 'minimal' - Only make the substitutions necessary to guarantee
|
||||
valid XML. (default)
|
||||
* None - Do not perform any substitution. This will be faster
|
||||
but may result in invalid markup.
|
||||
|
||||
"""
|
||||
|
||||
#: Constant name denoting HTML markup
|
||||
HTML: str = "html"
|
||||
|
||||
#: Constant name denoting XML markup
|
||||
XML: str = "xml"
|
||||
|
||||
#: Default values for the various constructor options when the
|
||||
#: markup language is HTML.
|
||||
HTML_DEFAULTS: Dict[str, Set[str]] = dict(
|
||||
cdata_containing_tags=set(["script", "style"]),
|
||||
)
|
||||
|
||||
language: Optional[str] #: :meta private:
|
||||
entity_substitution: Optional[_EntitySubstitutionFunction] #: :meta private:
|
||||
void_element_close_prefix: str #: :meta private:
|
||||
cdata_containing_tags: Set[str] #: :meta private:
|
||||
indent: str #: :meta private:
|
||||
|
||||
#: If this is set to true by the constructor, then attributes whose
|
||||
#: values are sent to the empty string will be treated as HTML
|
||||
#: boolean attributes. (Attributes whose value is None are always
|
||||
#: rendered this way.)
|
||||
empty_attributes_are_booleans: bool
|
||||
|
||||
def _default(
|
||||
self, language: str, value: Optional[Set[str]], kwarg: str
|
||||
) -> Set[str]:
|
||||
if value is not None:
|
||||
return value
|
||||
if language == self.XML:
|
||||
# When XML is the markup language in use, all of the
|
||||
# defaults are the empty list.
|
||||
return set()
|
||||
|
||||
# Otherwise, it depends on what's in HTML_DEFAULTS.
|
||||
return self.HTML_DEFAULTS[kwarg]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
language: Optional[str] = None,
|
||||
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
|
||||
void_element_close_prefix: str = "/",
|
||||
cdata_containing_tags: Optional[Set[str]] = None,
|
||||
empty_attributes_are_booleans: bool = False,
|
||||
indent: Union[int,str] = 1,
|
||||
):
|
||||
r"""Constructor.
|
||||
|
||||
:param language: This should be `Formatter.XML` if you are formatting
|
||||
XML markup and `Formatter.HTML` if you are formatting HTML markup.
|
||||
|
||||
:param entity_substitution: A function to call to replace special
|
||||
characters with XML/HTML entities. For examples, see
|
||||
bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
|
||||
:param void_element_close_prefix: By default, void elements
|
||||
are represented as <tag/> (XML rules) rather than <tag>
|
||||
(HTML rules). To get <tag>, pass in the empty string.
|
||||
:param cdata_containing_tags: The set of tags that are defined
|
||||
as containing CDATA in this dialect. For example, in HTML,
|
||||
<script> and <style> tags are defined as containing CDATA,
|
||||
and their contents should not be formatted.
|
||||
:param empty_attributes_are_booleans: If this is set to true,
|
||||
then attributes whose values are sent to the empty string
|
||||
will be treated as `HTML boolean
|
||||
attributes<https://dev.w3.org/html5/spec-LC/common-microsyntaxes.html#boolean-attributes>`_. (Attributes
|
||||
whose value is None are always rendered this way.)
|
||||
:param indent: If indent is a non-negative integer or string,
|
||||
then the contents of elements will be indented
|
||||
appropriately when pretty-printing. An indent level of 0,
|
||||
negative, or "" will only insert newlines. Using a
|
||||
positive integer indent indents that many spaces per
|
||||
level. If indent is a string (such as "\t"), that string
|
||||
is used to indent each level. The default behavior is to
|
||||
indent one space per level.
|
||||
|
||||
"""
|
||||
self.language = language or self.HTML
|
||||
self.entity_substitution = entity_substitution
|
||||
self.void_element_close_prefix = void_element_close_prefix
|
||||
self.cdata_containing_tags = self._default(
|
||||
self.language, cdata_containing_tags, "cdata_containing_tags"
|
||||
)
|
||||
self.empty_attributes_are_booleans = empty_attributes_are_booleans
|
||||
if indent is None:
|
||||
indent = 0
|
||||
indent_str: str
|
||||
if isinstance(indent, int):
|
||||
if indent < 0:
|
||||
indent = 0
|
||||
indent_str = " " * indent
|
||||
elif isinstance(indent, str):
|
||||
indent_str = indent
|
||||
else:
|
||||
indent_str = " "
|
||||
self.indent = indent_str
|
||||
|
||||
def substitute(self, ns: str) -> str:
|
||||
"""Process a string that needs to undergo entity substitution.
|
||||
This may be a string encountered in an attribute value or as
|
||||
text.
|
||||
|
||||
:param ns: A string.
|
||||
:return: The same string but with certain characters replaced by named
|
||||
or numeric entities.
|
||||
"""
|
||||
if not self.entity_substitution:
|
||||
return ns
|
||||
from .element import NavigableString
|
||||
|
||||
if (
|
||||
isinstance(ns, NavigableString)
|
||||
and ns.parent is not None
|
||||
and ns.parent.name in self.cdata_containing_tags
|
||||
):
|
||||
# Do nothing.
|
||||
return ns
|
||||
# Substitute.
|
||||
return self.entity_substitution(ns)
|
||||
|
||||
def attribute_value(self, value: str) -> str:
|
||||
"""Process the value of an attribute.
|
||||
|
||||
:param ns: A string.
|
||||
:return: A string with certain characters replaced by named
|
||||
or numeric entities.
|
||||
"""
|
||||
return self.substitute(value)
|
||||
|
||||
def attributes(
|
||||
self, tag: bs4.element.Tag # type:ignore
|
||||
) -> Iterable[Tuple[str, Optional[_AttributeValue]]]:
|
||||
"""Reorder a tag's attributes however you want.
|
||||
|
||||
By default, attributes are sorted alphabetically. This makes
|
||||
behavior consistent between Python 2 and Python 3, and preserves
|
||||
backwards compatibility with older versions of Beautiful Soup.
|
||||
|
||||
If `empty_attributes_are_booleans` is True, then
|
||||
attributes whose values are set to the empty string will be
|
||||
treated as boolean attributes.
|
||||
"""
|
||||
if tag.attrs is None:
|
||||
return []
|
||||
|
||||
items: Iterable[Tuple[str, _AttributeValue]] = list(tag.attrs.items())
|
||||
return sorted(
|
||||
(k, (None if self.empty_attributes_are_booleans and v == "" else v))
|
||||
for k, v in items
|
||||
)
|
||||
|
||||
|
||||
class HTMLFormatter(Formatter):
|
||||
"""A generic Formatter for HTML."""
|
||||
|
||||
REGISTRY: Dict[Optional[str], HTMLFormatter] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
|
||||
void_element_close_prefix: str = "/",
|
||||
cdata_containing_tags: Optional[Set[str]] = None,
|
||||
empty_attributes_are_booleans: bool = False,
|
||||
indent: Union[int,str] = 1,
|
||||
):
|
||||
super(HTMLFormatter, self).__init__(
|
||||
self.HTML,
|
||||
entity_substitution,
|
||||
void_element_close_prefix,
|
||||
cdata_containing_tags,
|
||||
empty_attributes_are_booleans,
|
||||
indent=indent
|
||||
)
|
||||
|
||||
|
||||
class XMLFormatter(Formatter):
|
||||
"""A generic Formatter for XML."""
|
||||
|
||||
REGISTRY: Dict[Optional[str], XMLFormatter] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
|
||||
void_element_close_prefix: str = "/",
|
||||
cdata_containing_tags: Optional[Set[str]] = None,
|
||||
empty_attributes_are_booleans: bool = False,
|
||||
indent: Union[int,str] = 1,
|
||||
):
|
||||
super(XMLFormatter, self).__init__(
|
||||
self.XML,
|
||||
entity_substitution,
|
||||
void_element_close_prefix,
|
||||
cdata_containing_tags,
|
||||
empty_attributes_are_booleans,
|
||||
indent=indent,
|
||||
)
|
||||
|
||||
|
||||
# Set up aliases for the default formatters.
|
||||
HTMLFormatter.REGISTRY["html"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html
|
||||
)
|
||||
|
||||
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html5,
|
||||
void_element_close_prefix="",
|
||||
empty_attributes_are_booleans=True,
|
||||
)
|
||||
HTMLFormatter.REGISTRY["html5-4.12"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html,
|
||||
void_element_close_prefix="",
|
||||
empty_attributes_are_booleans=True,
|
||||
)
|
||||
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_xml
|
||||
)
|
||||
HTMLFormatter.REGISTRY[None] = HTMLFormatter(entity_substitution=None)
|
||||
XMLFormatter.REGISTRY["html"] = XMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html
|
||||
)
|
||||
XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_xml
|
||||
)
|
||||
|
||||
XMLFormatter.REGISTRY[None] = XMLFormatter(entity_substitution=None)
|
||||
|
||||
# Define type aliases to improve readability.
|
||||
#
|
||||
|
||||
#: A function to call to replace special characters with XML or HTML
|
||||
#: entities.
|
||||
_EntitySubstitutionFunction: TypeAlias = Callable[[str], str]
|
||||
|
||||
# Many of the output-centered methods take an argument that can either
|
||||
# be a Formatter object or the name of a Formatter to be looked up.
|
||||
_FormatterOrName = Union[Formatter, str]
|
||||
|
|
@ -0,0 +1,760 @@
|
|||
@Switch01
|
||||
A_Rog
|
||||
Aakanksha Agrawal
|
||||
Abhinav Sagar
|
||||
ABHYUDAY PRATAP SINGH
|
||||
abs51295
|
||||
AceGentile
|
||||
Adam Chainz
|
||||
Adam Tse
|
||||
Adam Wentz
|
||||
admin
|
||||
Adrien Morison
|
||||
ahayrapetyan
|
||||
Ahilya
|
||||
AinsworthK
|
||||
Akash Srivastava
|
||||
Alan Yee
|
||||
Albert Tugushev
|
||||
Albert-Guan
|
||||
albertg
|
||||
Alberto Sottile
|
||||
Aleks Bunin
|
||||
Ales Erjavec
|
||||
Alethea Flowers
|
||||
Alex Gaynor
|
||||
Alex Grönholm
|
||||
Alex Hedges
|
||||
Alex Loosley
|
||||
Alex Morega
|
||||
Alex Stachowiak
|
||||
Alexander Shtyrov
|
||||
Alexandre Conrad
|
||||
Alexey Popravka
|
||||
Aleš Erjavec
|
||||
Alli
|
||||
Ami Fischman
|
||||
Ananya Maiti
|
||||
Anatoly Techtonik
|
||||
Anders Kaseorg
|
||||
Andre Aguiar
|
||||
Andreas Lutro
|
||||
Andrei Geacar
|
||||
Andrew Gaul
|
||||
Andrew Shymanel
|
||||
Andrey Bienkowski
|
||||
Andrey Bulgakov
|
||||
Andrés Delfino
|
||||
Andy Freeland
|
||||
Andy Kluger
|
||||
Ani Hayrapetyan
|
||||
Aniruddha Basak
|
||||
Anish Tambe
|
||||
Anrs Hu
|
||||
Anthony Sottile
|
||||
Antoine Musso
|
||||
Anton Ovchinnikov
|
||||
Anton Patrushev
|
||||
Antonio Alvarado Hernandez
|
||||
Antony Lee
|
||||
Antti Kaihola
|
||||
Anubhav Patel
|
||||
Anudit Nagar
|
||||
Anuj Godase
|
||||
AQNOUCH Mohammed
|
||||
AraHaan
|
||||
Arindam Choudhury
|
||||
Armin Ronacher
|
||||
Artem
|
||||
Arun Babu Neelicattu
|
||||
Ashley Manton
|
||||
Ashwin Ramaswami
|
||||
atse
|
||||
Atsushi Odagiri
|
||||
Avinash Karhana
|
||||
Avner Cohen
|
||||
Awit (Ah-Wit) Ghirmai
|
||||
Baptiste Mispelon
|
||||
Barney Gale
|
||||
barneygale
|
||||
Bartek Ogryczak
|
||||
Bastian Venthur
|
||||
Ben Bodenmiller
|
||||
Ben Darnell
|
||||
Ben Hoyt
|
||||
Ben Mares
|
||||
Ben Rosser
|
||||
Bence Nagy
|
||||
Benjamin Peterson
|
||||
Benjamin VanEvery
|
||||
Benoit Pierre
|
||||
Berker Peksag
|
||||
Bernard
|
||||
Bernard Tyers
|
||||
Bernardo B. Marques
|
||||
Bernhard M. Wiedemann
|
||||
Bertil Hatt
|
||||
Bhavam Vidyarthi
|
||||
Blazej Michalik
|
||||
Bogdan Opanchuk
|
||||
BorisZZZ
|
||||
Brad Erickson
|
||||
Bradley Ayers
|
||||
Brandon L. Reiss
|
||||
Brandt Bucher
|
||||
Brett Randall
|
||||
Brett Rosen
|
||||
Brian Cristante
|
||||
Brian Rosner
|
||||
briantracy
|
||||
BrownTruck
|
||||
Bruno Oliveira
|
||||
Bruno Renié
|
||||
Bruno S
|
||||
Bstrdsmkr
|
||||
Buck Golemon
|
||||
burrows
|
||||
Bussonnier Matthias
|
||||
bwoodsend
|
||||
c22
|
||||
Caleb Martinez
|
||||
Calvin Smith
|
||||
Carl Meyer
|
||||
Carlos Liam
|
||||
Carol Willing
|
||||
Carter Thayer
|
||||
Cass
|
||||
Chandrasekhar Atina
|
||||
Chih-Hsuan Yen
|
||||
Chris Brinker
|
||||
Chris Hunt
|
||||
Chris Jerdonek
|
||||
Chris Kuehl
|
||||
Chris McDonough
|
||||
Chris Pawley
|
||||
Chris Pryer
|
||||
Chris Wolfe
|
||||
Christian Clauss
|
||||
Christian Heimes
|
||||
Christian Oudard
|
||||
Christoph Reiter
|
||||
Christopher Hunt
|
||||
Christopher Snyder
|
||||
cjc7373
|
||||
Clark Boylan
|
||||
Claudio Jolowicz
|
||||
Clay McClure
|
||||
Cody
|
||||
Cody Soyland
|
||||
Colin Watson
|
||||
Collin Anderson
|
||||
Connor Osborn
|
||||
Cooper Lees
|
||||
Cooper Ry Lees
|
||||
Cory Benfield
|
||||
Cory Wright
|
||||
Craig Kerstiens
|
||||
Cristian Sorinel
|
||||
Cristina
|
||||
Cristina Muñoz
|
||||
Curtis Doty
|
||||
cytolentino
|
||||
Daan De Meyer
|
||||
Dale
|
||||
Damian
|
||||
Damian Quiroga
|
||||
Damian Shaw
|
||||
Dan Black
|
||||
Dan Savilonis
|
||||
Dan Sully
|
||||
Dane Hillard
|
||||
daniel
|
||||
Daniel Collins
|
||||
Daniel Hahler
|
||||
Daniel Holth
|
||||
Daniel Jost
|
||||
Daniel Katz
|
||||
Daniel Shaulov
|
||||
Daniele Esposti
|
||||
Daniele Nicolodi
|
||||
Daniele Procida
|
||||
Daniil Konovalenko
|
||||
Danny Hermes
|
||||
Danny McClanahan
|
||||
Darren Kavanagh
|
||||
Dav Clark
|
||||
Dave Abrahams
|
||||
Dave Jones
|
||||
David Aguilar
|
||||
David Black
|
||||
David Bordeynik
|
||||
David Caro
|
||||
David D Lowe
|
||||
David Evans
|
||||
David Hewitt
|
||||
David Linke
|
||||
David Poggi
|
||||
David Pursehouse
|
||||
David Runge
|
||||
David Tucker
|
||||
David Wales
|
||||
Davidovich
|
||||
ddelange
|
||||
Deepak Sharma
|
||||
Deepyaman Datta
|
||||
Denise Yu
|
||||
dependabot[bot]
|
||||
derwolfe
|
||||
Desetude
|
||||
Devesh Kumar Singh
|
||||
Diego Caraballo
|
||||
Diego Ramirez
|
||||
DiegoCaraballo
|
||||
Dimitri Merejkowsky
|
||||
Dimitri Papadopoulos
|
||||
Dirk Stolle
|
||||
Dmitry Gladkov
|
||||
Dmitry Volodin
|
||||
Domen Kožar
|
||||
Dominic Davis-Foster
|
||||
Donald Stufft
|
||||
Dongweiming
|
||||
doron zarhi
|
||||
Dos Moonen
|
||||
Douglas Thor
|
||||
DrFeathers
|
||||
Dustin Ingram
|
||||
Dwayne Bailey
|
||||
Ed Morley
|
||||
Edgar Ramírez
|
||||
Edgar Ramírez Mondragón
|
||||
Ee Durbin
|
||||
Efflam Lemaillet
|
||||
efflamlemaillet
|
||||
Eitan Adler
|
||||
ekristina
|
||||
elainechan
|
||||
Eli Schwartz
|
||||
Elisha Hollander
|
||||
Ellen Marie Dash
|
||||
Emil Burzo
|
||||
Emil Styrke
|
||||
Emmanuel Arias
|
||||
Endoh Takanao
|
||||
enoch
|
||||
Erdinc Mutlu
|
||||
Eric Cousineau
|
||||
Eric Gillingham
|
||||
Eric Hanchrow
|
||||
Eric Hopper
|
||||
Erik M. Bray
|
||||
Erik Rose
|
||||
Erwin Janssen
|
||||
Eugene Vereshchagin
|
||||
everdimension
|
||||
Federico
|
||||
Felipe Peter
|
||||
Felix Yan
|
||||
fiber-space
|
||||
Filip Kokosiński
|
||||
Filipe Laíns
|
||||
Finn Womack
|
||||
finnagin
|
||||
Flavio Amurrio
|
||||
Florian Briand
|
||||
Florian Rathgeber
|
||||
Francesco
|
||||
Francesco Montesano
|
||||
Frost Ming
|
||||
Gabriel Curio
|
||||
Gabriel de Perthuis
|
||||
Garry Polley
|
||||
gavin
|
||||
gdanielson
|
||||
Geoffrey Sneddon
|
||||
George Song
|
||||
Georgi Valkov
|
||||
Georgy Pchelkin
|
||||
ghost
|
||||
Giftlin Rajaiah
|
||||
gizmoguy1
|
||||
gkdoc
|
||||
Godefroid Chapelle
|
||||
Gopinath M
|
||||
GOTO Hayato
|
||||
gousaiyang
|
||||
gpiks
|
||||
Greg Roodt
|
||||
Greg Ward
|
||||
Guilherme Espada
|
||||
Guillaume Seguin
|
||||
gutsytechster
|
||||
Guy Rozendorn
|
||||
Guy Tuval
|
||||
gzpan123
|
||||
Hanjun Kim
|
||||
Hari Charan
|
||||
Harsh Vardhan
|
||||
harupy
|
||||
Harutaka Kawamura
|
||||
hauntsaninja
|
||||
Henrich Hartzer
|
||||
Henry Schreiner
|
||||
Herbert Pfennig
|
||||
Holly Stotelmyer
|
||||
Honnix
|
||||
Hsiaoming Yang
|
||||
Hugo Lopes Tavares
|
||||
Hugo van Kemenade
|
||||
Hugues Bruant
|
||||
Hynek Schlawack
|
||||
Ian Bicking
|
||||
Ian Cordasco
|
||||
Ian Lee
|
||||
Ian Stapleton Cordasco
|
||||
Ian Wienand
|
||||
Igor Kuzmitshov
|
||||
Igor Sobreira
|
||||
Ilan Schnell
|
||||
Illia Volochii
|
||||
Ilya Baryshev
|
||||
Inada Naoki
|
||||
Ionel Cristian Mărieș
|
||||
Ionel Maries Cristian
|
||||
Itamar Turner-Trauring
|
||||
Ivan Pozdeev
|
||||
J. Nick Koston
|
||||
Jacob Kim
|
||||
Jacob Walls
|
||||
Jaime Sanz
|
||||
jakirkham
|
||||
Jakub Kuczys
|
||||
Jakub Stasiak
|
||||
Jakub Vysoky
|
||||
Jakub Wilk
|
||||
James Cleveland
|
||||
James Curtin
|
||||
James Firth
|
||||
James Gerity
|
||||
James Polley
|
||||
Jan Pokorný
|
||||
Jannis Leidel
|
||||
Jarek Potiuk
|
||||
jarondl
|
||||
Jason Curtis
|
||||
Jason R. Coombs
|
||||
JasonMo
|
||||
JasonMo1
|
||||
Jay Graves
|
||||
Jean Abou Samra
|
||||
Jean-Christophe Fillion-Robin
|
||||
Jeff Barber
|
||||
Jeff Dairiki
|
||||
Jeff Widman
|
||||
Jelmer Vernooij
|
||||
jenix21
|
||||
Jeremy Stanley
|
||||
Jeremy Zafran
|
||||
Jesse Rittner
|
||||
Jiashuo Li
|
||||
Jim Fisher
|
||||
Jim Garrison
|
||||
Jiun Bae
|
||||
Jivan Amara
|
||||
Joe Bylund
|
||||
Joe Michelini
|
||||
John Paton
|
||||
John T. Wodder II
|
||||
John-Scott Atlakson
|
||||
johnthagen
|
||||
Jon Banafato
|
||||
Jon Dufresne
|
||||
Jon Parise
|
||||
Jonas Nockert
|
||||
Jonathan Herbert
|
||||
Joonatan Partanen
|
||||
Joost Molenaar
|
||||
Jorge Niedbalski
|
||||
Joseph Bylund
|
||||
Joseph Long
|
||||
Josh Bronson
|
||||
Josh Hansen
|
||||
Josh Schneier
|
||||
Joshua
|
||||
Juan Luis Cano Rodríguez
|
||||
Juanjo Bazán
|
||||
Judah Rand
|
||||
Julian Berman
|
||||
Julian Gethmann
|
||||
Julien Demoor
|
||||
Jussi Kukkonen
|
||||
jwg4
|
||||
Jyrki Pulliainen
|
||||
Kai Chen
|
||||
Kai Mueller
|
||||
Kamal Bin Mustafa
|
||||
kasium
|
||||
kaustav haldar
|
||||
keanemind
|
||||
Keith Maxwell
|
||||
Kelsey Hightower
|
||||
Kenneth Belitzky
|
||||
Kenneth Reitz
|
||||
Kevin Burke
|
||||
Kevin Carter
|
||||
Kevin Frommelt
|
||||
Kevin R Patterson
|
||||
Kexuan Sun
|
||||
Kit Randel
|
||||
Klaas van Schelven
|
||||
KOLANICH
|
||||
kpinc
|
||||
Krishna Oza
|
||||
Kumar McMillan
|
||||
Kurt McKee
|
||||
Kyle Persohn
|
||||
lakshmanaram
|
||||
Laszlo Kiss-Kollar
|
||||
Laurent Bristiel
|
||||
Laurent LAPORTE
|
||||
Laurie O
|
||||
Laurie Opperman
|
||||
layday
|
||||
Leon Sasson
|
||||
Lev Givon
|
||||
Lincoln de Sousa
|
||||
Lipis
|
||||
lorddavidiii
|
||||
Loren Carvalho
|
||||
Lucas Cimon
|
||||
Ludovic Gasc
|
||||
Lukas Geiger
|
||||
Lukas Juhrich
|
||||
Luke Macken
|
||||
Luo Jiebin
|
||||
luojiebin
|
||||
luz.paz
|
||||
László Kiss Kollár
|
||||
M00nL1ght
|
||||
Marc Abramowitz
|
||||
Marc Tamlyn
|
||||
Marcus Smith
|
||||
Mariatta
|
||||
Mark Kohler
|
||||
Mark Williams
|
||||
Markus Hametner
|
||||
Martey Dodoo
|
||||
Martin Fischer
|
||||
Martin Häcker
|
||||
Martin Pavlasek
|
||||
Masaki
|
||||
Masklinn
|
||||
Matej Stuchlik
|
||||
Mathew Jennings
|
||||
Mathieu Bridon
|
||||
Mathieu Kniewallner
|
||||
Matt Bacchi
|
||||
Matt Good
|
||||
Matt Maker
|
||||
Matt Robenolt
|
||||
matthew
|
||||
Matthew Einhorn
|
||||
Matthew Feickert
|
||||
Matthew Gilliard
|
||||
Matthew Iversen
|
||||
Matthew Treinish
|
||||
Matthew Trumbell
|
||||
Matthew Willson
|
||||
Matthias Bussonnier
|
||||
mattip
|
||||
Maurits van Rees
|
||||
Max W Chase
|
||||
Maxim Kurnikov
|
||||
Maxime Rouyrre
|
||||
mayeut
|
||||
mbaluna
|
||||
mdebi
|
||||
memoselyk
|
||||
meowmeowcat
|
||||
Michael
|
||||
Michael Aquilina
|
||||
Michael E. Karpeles
|
||||
Michael Klich
|
||||
Michael Mintz
|
||||
Michael Williamson
|
||||
michaelpacer
|
||||
Michał Górny
|
||||
Mickaël Schoentgen
|
||||
Miguel Araujo Perez
|
||||
Mihir Singh
|
||||
Mike
|
||||
Mike Hendricks
|
||||
Min RK
|
||||
MinRK
|
||||
Miro Hrončok
|
||||
Monica Baluna
|
||||
montefra
|
||||
Monty Taylor
|
||||
Muha Ajjan
|
||||
Nadav Wexler
|
||||
Nahuel Ambrosini
|
||||
Nate Coraor
|
||||
Nate Prewitt
|
||||
Nathan Houghton
|
||||
Nathaniel J. Smith
|
||||
Nehal J Wani
|
||||
Neil Botelho
|
||||
Nguyễn Gia Phong
|
||||
Nicholas Serra
|
||||
Nick Coghlan
|
||||
Nick Stenning
|
||||
Nick Timkovich
|
||||
Nicolas Bock
|
||||
Nicole Harris
|
||||
Nikhil Benesch
|
||||
Nikhil Ladha
|
||||
Nikita Chepanov
|
||||
Nikolay Korolev
|
||||
Nipunn Koorapati
|
||||
Nitesh Sharma
|
||||
Niyas Sait
|
||||
Noah
|
||||
Noah Gorny
|
||||
Nowell Strite
|
||||
NtaleGrey
|
||||
nvdv
|
||||
OBITORASU
|
||||
Ofek Lev
|
||||
ofrinevo
|
||||
Oliver Freund
|
||||
Oliver Jeeves
|
||||
Oliver Mannion
|
||||
Oliver Tonnhofer
|
||||
Olivier Girardot
|
||||
Olivier Grisel
|
||||
Ollie Rutherfurd
|
||||
OMOTO Kenji
|
||||
Omry Yadan
|
||||
onlinejudge95
|
||||
Oren Held
|
||||
Oscar Benjamin
|
||||
Oz N Tiram
|
||||
Pachwenko
|
||||
Patrick Dubroy
|
||||
Patrick Jenkins
|
||||
Patrick Lawson
|
||||
patricktokeeffe
|
||||
Patrik Kopkan
|
||||
Paul Ganssle
|
||||
Paul Kehrer
|
||||
Paul Moore
|
||||
Paul Nasrat
|
||||
Paul Oswald
|
||||
Paul van der Linden
|
||||
Paulus Schoutsen
|
||||
Pavel Safronov
|
||||
Pavithra Eswaramoorthy
|
||||
Pawel Jasinski
|
||||
Paweł Szramowski
|
||||
Pekka Klärck
|
||||
Peter Gessler
|
||||
Peter Lisák
|
||||
Peter Waller
|
||||
petr-tik
|
||||
Phaneendra Chiruvella
|
||||
Phil Elson
|
||||
Phil Freo
|
||||
Phil Pennock
|
||||
Phil Whelan
|
||||
Philip Jägenstedt
|
||||
Philip Molloy
|
||||
Philippe Ombredanne
|
||||
Pi Delport
|
||||
Pierre-Yves Rofes
|
||||
Pieter Degroote
|
||||
pip
|
||||
Prabakaran Kumaresshan
|
||||
Prabhjyotsing Surjit Singh Sodhi
|
||||
Prabhu Marappan
|
||||
Pradyun Gedam
|
||||
Prashant Sharma
|
||||
Pratik Mallya
|
||||
pre-commit-ci[bot]
|
||||
Preet Thakkar
|
||||
Preston Holmes
|
||||
Przemek Wrzos
|
||||
Pulkit Goyal
|
||||
q0w
|
||||
Qiangning Hong
|
||||
Qiming Xu
|
||||
Quentin Lee
|
||||
Quentin Pradet
|
||||
R. David Murray
|
||||
Rafael Caricio
|
||||
Ralf Schmitt
|
||||
Razzi Abuissa
|
||||
rdb
|
||||
Reece Dunham
|
||||
Remi Rampin
|
||||
Rene Dudfield
|
||||
Riccardo Magliocchetti
|
||||
Riccardo Schirone
|
||||
Richard Jones
|
||||
Richard Si
|
||||
Ricky Ng-Adam
|
||||
Rishi
|
||||
RobberPhex
|
||||
Robert Collins
|
||||
Robert McGibbon
|
||||
Robert Pollak
|
||||
Robert T. McGibbon
|
||||
robin elisha robinson
|
||||
Roey Berman
|
||||
Rohan Jain
|
||||
Roman Bogorodskiy
|
||||
Roman Donchenko
|
||||
Romuald Brunet
|
||||
ronaudinho
|
||||
Ronny Pfannschmidt
|
||||
Rory McCann
|
||||
Ross Brattain
|
||||
Roy Wellington Ⅳ
|
||||
Ruairidh MacLeod
|
||||
Russell Keith-Magee
|
||||
Ryan Shepherd
|
||||
Ryan Wooden
|
||||
ryneeverett
|
||||
Sachi King
|
||||
Salvatore Rinchiera
|
||||
sandeepkiran-js
|
||||
Sander Van Balen
|
||||
Savio Jomton
|
||||
schlamar
|
||||
Scott Kitterman
|
||||
Sean
|
||||
seanj
|
||||
Sebastian Jordan
|
||||
Sebastian Schaetz
|
||||
Segev Finer
|
||||
SeongSoo Cho
|
||||
Sergey Vasilyev
|
||||
Seth Michael Larson
|
||||
Seth Woodworth
|
||||
Shahar Epstein
|
||||
Shantanu
|
||||
shireenrao
|
||||
Shivansh-007
|
||||
Shlomi Fish
|
||||
Shovan Maity
|
||||
Simeon Visser
|
||||
Simon Cross
|
||||
Simon Pichugin
|
||||
sinoroc
|
||||
sinscary
|
||||
snook92
|
||||
socketubs
|
||||
Sorin Sbarnea
|
||||
Srinivas Nyayapati
|
||||
Stavros Korokithakis
|
||||
Stefan Scherfke
|
||||
Stefano Rivera
|
||||
Stephan Erb
|
||||
Stephen Rosen
|
||||
stepshal
|
||||
Steve (Gadget) Barnes
|
||||
Steve Barnes
|
||||
Steve Dower
|
||||
Steve Kowalik
|
||||
Steven Myint
|
||||
Steven Silvester
|
||||
stonebig
|
||||
studioj
|
||||
Stéphane Bidoul
|
||||
Stéphane Bidoul (ACSONE)
|
||||
Stéphane Klein
|
||||
Sumana Harihareswara
|
||||
Surbhi Sharma
|
||||
Sviatoslav Sydorenko
|
||||
Swat009
|
||||
Sylvain
|
||||
Takayuki SHIMIZUKAWA
|
||||
Taneli Hukkinen
|
||||
tbeswick
|
||||
Thiago
|
||||
Thijs Triemstra
|
||||
Thomas Fenzl
|
||||
Thomas Grainger
|
||||
Thomas Guettler
|
||||
Thomas Johansson
|
||||
Thomas Kluyver
|
||||
Thomas Smith
|
||||
Thomas VINCENT
|
||||
Tim D. Smith
|
||||
Tim Gates
|
||||
Tim Harder
|
||||
Tim Heap
|
||||
tim smith
|
||||
tinruufu
|
||||
Tobias Hermann
|
||||
Tom Forbes
|
||||
Tom Freudenheim
|
||||
Tom V
|
||||
Tomas Hrnciar
|
||||
Tomas Orsava
|
||||
Tomer Chachamu
|
||||
Tommi Enenkel | AnB
|
||||
Tomáš Hrnčiar
|
||||
Tony Beswick
|
||||
Tony Narlock
|
||||
Tony Zhaocheng Tan
|
||||
TonyBeswick
|
||||
toonarmycaptain
|
||||
Toshio Kuratomi
|
||||
toxinu
|
||||
Travis Swicegood
|
||||
Tushar Sadhwani
|
||||
Tzu-ping Chung
|
||||
Valentin Haenel
|
||||
Victor Stinner
|
||||
victorvpaulo
|
||||
Vikram - Google
|
||||
Viktor Szépe
|
||||
Ville Skyttä
|
||||
Vinay Sajip
|
||||
Vincent Philippon
|
||||
Vinicyus Macedo
|
||||
Vipul Kumar
|
||||
Vitaly Babiy
|
||||
Vladimir Fokow
|
||||
Vladimir Rutsky
|
||||
W. Trevor King
|
||||
Wil Tan
|
||||
Wilfred Hughes
|
||||
William Edwards
|
||||
William ML Leslie
|
||||
William T Olson
|
||||
William Woodruff
|
||||
Wilson Mo
|
||||
wim glenn
|
||||
Winson Luk
|
||||
Wolfgang Maier
|
||||
Wu Zhenyu
|
||||
XAMES3
|
||||
Xavier Fernandez
|
||||
xoviat
|
||||
xtreak
|
||||
YAMAMOTO Takashi
|
||||
Yen Chi Hsuan
|
||||
Yeray Diaz Diaz
|
||||
Yoval P
|
||||
Yu Jian
|
||||
Yuan Jing Vincent Yan
|
||||
Yusuke Hayashi
|
||||
Zearin
|
||||
Zhiping Deng
|
||||
ziebam
|
||||
Zvezdan Petkovic
|
||||
Łukasz Langa
|
||||
Роман Донченко
|
||||
Семён Марьясин
|
||||
rekcäH nitraM
|
||||
|
|
@ -0,0 +1 @@
|
|||
pip
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
Copyright (c) 2008-present The pip developers (see AUTHORS.txt file)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: pip
|
||||
Version: 24.0
|
||||
Summary: The PyPA recommended tool for installing Python packages.
|
||||
Author-email: The pip developers <distutils-sig@python.org>
|
||||
License: MIT
|
||||
Project-URL: Homepage, https://pip.pypa.io/
|
||||
Project-URL: Documentation, https://pip.pypa.io
|
||||
Project-URL: Source, https://github.com/pypa/pip
|
||||
Project-URL: Changelog, https://pip.pypa.io/en/stable/news/
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Topic :: Software Development :: Build Tools
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/x-rst
|
||||
License-File: LICENSE.txt
|
||||
License-File: AUTHORS.txt
|
||||
|
||||
pip - The Python Package Installer
|
||||
==================================
|
||||
|
||||
.. image:: https://img.shields.io/pypi/v/pip.svg
|
||||
:target: https://pypi.org/project/pip/
|
||||
:alt: PyPI
|
||||
|
||||
.. image:: https://img.shields.io/pypi/pyversions/pip
|
||||
:target: https://pypi.org/project/pip
|
||||
:alt: PyPI - Python Version
|
||||
|
||||
.. image:: https://readthedocs.org/projects/pip/badge/?version=latest
|
||||
:target: https://pip.pypa.io/en/latest
|
||||
:alt: Documentation
|
||||
|
||||
pip is the `package installer`_ for Python. You can use pip to install packages from the `Python Package Index`_ and other indexes.
|
||||
|
||||
Please take a look at our documentation for how to install and use pip:
|
||||
|
||||
* `Installation`_
|
||||
* `Usage`_
|
||||
|
||||
We release updates regularly, with a new version every 3 months. Find more details in our documentation:
|
||||
|
||||
* `Release notes`_
|
||||
* `Release process`_
|
||||
|
||||
If you find bugs, need help, or want to talk to the developers, please use our mailing lists or chat rooms:
|
||||
|
||||
* `Issue tracking`_
|
||||
* `Discourse channel`_
|
||||
* `User IRC`_
|
||||
|
||||
If you want to get involved head over to GitHub to get the source code, look at our development documentation and feel free to jump on the developer mailing lists and chat rooms:
|
||||
|
||||
* `GitHub page`_
|
||||
* `Development documentation`_
|
||||
* `Development IRC`_
|
||||
|
||||
Code of Conduct
|
||||
---------------
|
||||
|
||||
Everyone interacting in the pip project's codebases, issue trackers, chat
|
||||
rooms, and mailing lists is expected to follow the `PSF Code of Conduct`_.
|
||||
|
||||
.. _package installer: https://packaging.python.org/guides/tool-recommendations/
|
||||
.. _Python Package Index: https://pypi.org
|
||||
.. _Installation: https://pip.pypa.io/en/stable/installation/
|
||||
.. _Usage: https://pip.pypa.io/en/stable/
|
||||
.. _Release notes: https://pip.pypa.io/en/stable/news.html
|
||||
.. _Release process: https://pip.pypa.io/en/latest/development/release-process/
|
||||
.. _GitHub page: https://github.com/pypa/pip
|
||||
.. _Development documentation: https://pip.pypa.io/en/latest/development
|
||||
.. _Issue tracking: https://github.com/pypa/pip/issues
|
||||
.. _Discourse channel: https://discuss.python.org/c/packaging
|
||||
.. _User IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa
|
||||
.. _Development IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa-dev
|
||||
.. _PSF Code of Conduct: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,5 @@
|
|||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.42.0)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
||||
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
[console_scripts]
|
||||
pip = pip._internal.cli.main:main
|
||||
pip3 = pip._internal.cli.main:main
|
||||
pip3.12 = pip._internal.cli.main:main
|
||||
|
|
@ -0,0 +1 @@
|
|||
pip
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from typing import List, Optional
|
||||
|
||||
__version__ = "24.0"
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None) -> int:
|
||||
"""This is an internal API only meant for use by pip's own console scripts.
|
||||
|
||||
For additional details, see https://github.com/pypa/pip/issues/7498.
|
||||
"""
|
||||
from pip._internal.utils.entrypoints import _wrapper
|
||||
|
||||
return _wrapper(args)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
# Remove '' and current working directory from the first entry
|
||||
# of sys.path, if present to avoid using current directory
|
||||
# in pip commands check, freeze, install, list and show,
|
||||
# when invoked as python -m pip <command>
|
||||
if sys.path[0] in ("", os.getcwd()):
|
||||
sys.path.pop(0)
|
||||
|
||||
# If we are running from a wheel, add the wheel to sys.path
|
||||
# This allows the usage python pip-*.whl/pip install pip-*.whl
|
||||
if __package__ == "":
|
||||
# __file__ is pip-*.whl/pip/__main__.py
|
||||
# first dirname call strips of '/__main__.py', second strips off '/pip'
|
||||
# Resulting path is the name of the wheel itself
|
||||
# Add that to sys.path so we can import pip
|
||||
path = os.path.dirname(os.path.dirname(__file__))
|
||||
sys.path.insert(0, path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pip._internal.cli.main import main as _main
|
||||
|
||||
sys.exit(_main())
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
"""Execute exactly this copy of pip, within a different environment.
|
||||
|
||||
This file is named as it is, to ensure that this module can't be imported via
|
||||
an import statement.
|
||||
"""
|
||||
|
||||
# /!\ This version compatibility check section must be Python 2 compatible. /!\
|
||||
|
||||
import sys
|
||||
|
||||
# Copied from setup.py
|
||||
PYTHON_REQUIRES = (3, 7)
|
||||
|
||||
|
||||
def version_str(version): # type: ignore
|
||||
return ".".join(str(v) for v in version)
|
||||
|
||||
|
||||
if sys.version_info[:2] < PYTHON_REQUIRES:
|
||||
raise SystemExit(
|
||||
"This version of pip does not support python {} (requires >={}).".format(
|
||||
version_str(sys.version_info[:2]), version_str(PYTHON_REQUIRES)
|
||||
)
|
||||
)
|
||||
|
||||
# From here on, we can use Python 3 features, but the syntax must remain
|
||||
# Python 2 compatible.
|
||||
|
||||
import runpy # noqa: E402
|
||||
from importlib.machinery import PathFinder # noqa: E402
|
||||
from os.path import dirname # noqa: E402
|
||||
|
||||
PIP_SOURCES_ROOT = dirname(dirname(__file__))
|
||||
|
||||
|
||||
class PipImportRedirectingFinder:
|
||||
@classmethod
|
||||
def find_spec(self, fullname, path=None, target=None): # type: ignore
|
||||
if fullname != "pip":
|
||||
return None
|
||||
|
||||
spec = PathFinder.find_spec(fullname, [PIP_SOURCES_ROOT], target)
|
||||
assert spec, (PIP_SOURCES_ROOT, fullname)
|
||||
return spec
|
||||
|
||||
|
||||
sys.meta_path.insert(0, PipImportRedirectingFinder())
|
||||
|
||||
assert __name__ == "__main__", "Cannot run __pip-runner__.py as a non-main module"
|
||||
runpy.run_module("pip", run_name="__main__", alter_sys=True)
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
from typing import List, Optional
|
||||
|
||||
from pip._internal.utils import _log
|
||||
|
||||
# init_logging() must be called before any call to logging.getLogger()
|
||||
# which happens at import of most modules.
|
||||
_log.init_logging()
|
||||
|
||||
|
||||
def main(args: (Optional[List[str]]) = None) -> int:
|
||||
"""This is preserved for old console scripts that may still be referencing
|
||||
it.
|
||||
|
||||
For additional details, see https://github.com/pypa/pip/issues/7498.
|
||||
"""
|
||||
from pip._internal.utils.entrypoints import _wrapper
|
||||
|
||||
return _wrapper(args)
|
||||
|
|
@ -0,0 +1,311 @@
|
|||
"""Build Environment used for isolation during sdist building
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import site
|
||||
import sys
|
||||
import textwrap
|
||||
from collections import OrderedDict
|
||||
from types import TracebackType
|
||||
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union
|
||||
|
||||
from pip._vendor.certifi import where
|
||||
from pip._vendor.packaging.requirements import Requirement
|
||||
from pip._vendor.packaging.version import Version
|
||||
|
||||
from pip import __file__ as pip_location
|
||||
from pip._internal.cli.spinners import open_spinner
|
||||
from pip._internal.locations import get_platlib, get_purelib, get_scheme
|
||||
from pip._internal.metadata import get_default_environment, get_environment
|
||||
from pip._internal.utils.subprocess import call_subprocess
|
||||
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]:
|
||||
return (a, b) if a != b else (a,)
|
||||
|
||||
|
||||
class _Prefix:
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.setup = False
|
||||
scheme = get_scheme("", prefix=path)
|
||||
self.bin_dir = scheme.scripts
|
||||
self.lib_dirs = _dedup(scheme.purelib, scheme.platlib)
|
||||
|
||||
|
||||
def get_runnable_pip() -> str:
|
||||
"""Get a file to pass to a Python executable, to run the currently-running pip.
|
||||
|
||||
This is used to run a pip subprocess, for installing requirements into the build
|
||||
environment.
|
||||
"""
|
||||
source = pathlib.Path(pip_location).resolve().parent
|
||||
|
||||
if not source.is_dir():
|
||||
# This would happen if someone is using pip from inside a zip file. In that
|
||||
# case, we can use that directly.
|
||||
return str(source)
|
||||
|
||||
return os.fsdecode(source / "__pip-runner__.py")
|
||||
|
||||
|
||||
def _get_system_sitepackages() -> Set[str]:
|
||||
"""Get system site packages
|
||||
|
||||
Usually from site.getsitepackages,
|
||||
but fallback on `get_purelib()/get_platlib()` if unavailable
|
||||
(e.g. in a virtualenv created by virtualenv<20)
|
||||
|
||||
Returns normalized set of strings.
|
||||
"""
|
||||
if hasattr(site, "getsitepackages"):
|
||||
system_sites = site.getsitepackages()
|
||||
else:
|
||||
# virtualenv < 20 overwrites site.py without getsitepackages
|
||||
# fallback on get_purelib/get_platlib.
|
||||
# this is known to miss things, but shouldn't in the cases
|
||||
# where getsitepackages() has been removed (inside a virtualenv)
|
||||
system_sites = [get_purelib(), get_platlib()]
|
||||
return {os.path.normcase(path) for path in system_sites}
|
||||
|
||||
|
||||
class BuildEnvironment:
|
||||
"""Creates and manages an isolated environment to install build deps"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True)
|
||||
|
||||
self._prefixes = OrderedDict(
|
||||
(name, _Prefix(os.path.join(temp_dir.path, name)))
|
||||
for name in ("normal", "overlay")
|
||||
)
|
||||
|
||||
self._bin_dirs: List[str] = []
|
||||
self._lib_dirs: List[str] = []
|
||||
for prefix in reversed(list(self._prefixes.values())):
|
||||
self._bin_dirs.append(prefix.bin_dir)
|
||||
self._lib_dirs.extend(prefix.lib_dirs)
|
||||
|
||||
# Customize site to:
|
||||
# - ensure .pth files are honored
|
||||
# - prevent access to system site packages
|
||||
system_sites = _get_system_sitepackages()
|
||||
|
||||
self._site_dir = os.path.join(temp_dir.path, "site")
|
||||
if not os.path.exists(self._site_dir):
|
||||
os.mkdir(self._site_dir)
|
||||
with open(
|
||||
os.path.join(self._site_dir, "sitecustomize.py"), "w", encoding="utf-8"
|
||||
) as fp:
|
||||
fp.write(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
import os, site, sys
|
||||
|
||||
# First, drop system-sites related paths.
|
||||
original_sys_path = sys.path[:]
|
||||
known_paths = set()
|
||||
for path in {system_sites!r}:
|
||||
site.addsitedir(path, known_paths=known_paths)
|
||||
system_paths = set(
|
||||
os.path.normcase(path)
|
||||
for path in sys.path[len(original_sys_path):]
|
||||
)
|
||||
original_sys_path = [
|
||||
path for path in original_sys_path
|
||||
if os.path.normcase(path) not in system_paths
|
||||
]
|
||||
sys.path = original_sys_path
|
||||
|
||||
# Second, add lib directories.
|
||||
# ensuring .pth file are processed.
|
||||
for path in {lib_dirs!r}:
|
||||
assert not path in sys.path
|
||||
site.addsitedir(path)
|
||||
"""
|
||||
).format(system_sites=system_sites, lib_dirs=self._lib_dirs)
|
||||
)
|
||||
|
||||
def __enter__(self) -> None:
|
||||
self._save_env = {
|
||||
name: os.environ.get(name, None)
|
||||
for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH")
|
||||
}
|
||||
|
||||
path = self._bin_dirs[:]
|
||||
old_path = self._save_env["PATH"]
|
||||
if old_path:
|
||||
path.extend(old_path.split(os.pathsep))
|
||||
|
||||
pythonpath = [self._site_dir]
|
||||
|
||||
os.environ.update(
|
||||
{
|
||||
"PATH": os.pathsep.join(path),
|
||||
"PYTHONNOUSERSITE": "1",
|
||||
"PYTHONPATH": os.pathsep.join(pythonpath),
|
||||
}
|
||||
)
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[Type[BaseException]],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[TracebackType],
|
||||
) -> None:
|
||||
for varname, old_value in self._save_env.items():
|
||||
if old_value is None:
|
||||
os.environ.pop(varname, None)
|
||||
else:
|
||||
os.environ[varname] = old_value
|
||||
|
||||
def check_requirements(
|
||||
self, reqs: Iterable[str]
|
||||
) -> Tuple[Set[Tuple[str, str]], Set[str]]:
|
||||
"""Return 2 sets:
|
||||
- conflicting requirements: set of (installed, wanted) reqs tuples
|
||||
- missing requirements: set of reqs
|
||||
"""
|
||||
missing = set()
|
||||
conflicting = set()
|
||||
if reqs:
|
||||
env = (
|
||||
get_environment(self._lib_dirs)
|
||||
if hasattr(self, "_lib_dirs")
|
||||
else get_default_environment()
|
||||
)
|
||||
for req_str in reqs:
|
||||
req = Requirement(req_str)
|
||||
# We're explicitly evaluating with an empty extra value, since build
|
||||
# environments are not provided any mechanism to select specific extras.
|
||||
if req.marker is not None and not req.marker.evaluate({"extra": ""}):
|
||||
continue
|
||||
dist = env.get_distribution(req.name)
|
||||
if not dist:
|
||||
missing.add(req_str)
|
||||
continue
|
||||
if isinstance(dist.version, Version):
|
||||
installed_req_str = f"{req.name}=={dist.version}"
|
||||
else:
|
||||
installed_req_str = f"{req.name}==={dist.version}"
|
||||
if not req.specifier.contains(dist.version, prereleases=True):
|
||||
conflicting.add((installed_req_str, req_str))
|
||||
# FIXME: Consider direct URL?
|
||||
return conflicting, missing
|
||||
|
||||
def install_requirements(
|
||||
self,
|
||||
finder: "PackageFinder",
|
||||
requirements: Iterable[str],
|
||||
prefix_as_string: str,
|
||||
*,
|
||||
kind: str,
|
||||
) -> None:
|
||||
prefix = self._prefixes[prefix_as_string]
|
||||
assert not prefix.setup
|
||||
prefix.setup = True
|
||||
if not requirements:
|
||||
return
|
||||
self._install_requirements(
|
||||
get_runnable_pip(),
|
||||
finder,
|
||||
requirements,
|
||||
prefix,
|
||||
kind=kind,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _install_requirements(
|
||||
pip_runnable: str,
|
||||
finder: "PackageFinder",
|
||||
requirements: Iterable[str],
|
||||
prefix: _Prefix,
|
||||
*,
|
||||
kind: str,
|
||||
) -> None:
|
||||
args: List[str] = [
|
||||
sys.executable,
|
||||
pip_runnable,
|
||||
"install",
|
||||
"--ignore-installed",
|
||||
"--no-user",
|
||||
"--prefix",
|
||||
prefix.path,
|
||||
"--no-warn-script-location",
|
||||
]
|
||||
if logger.getEffectiveLevel() <= logging.DEBUG:
|
||||
args.append("-v")
|
||||
for format_control in ("no_binary", "only_binary"):
|
||||
formats = getattr(finder.format_control, format_control)
|
||||
args.extend(
|
||||
(
|
||||
"--" + format_control.replace("_", "-"),
|
||||
",".join(sorted(formats or {":none:"})),
|
||||
)
|
||||
)
|
||||
|
||||
index_urls = finder.index_urls
|
||||
if index_urls:
|
||||
args.extend(["-i", index_urls[0]])
|
||||
for extra_index in index_urls[1:]:
|
||||
args.extend(["--extra-index-url", extra_index])
|
||||
else:
|
||||
args.append("--no-index")
|
||||
for link in finder.find_links:
|
||||
args.extend(["--find-links", link])
|
||||
|
||||
for host in finder.trusted_hosts:
|
||||
args.extend(["--trusted-host", host])
|
||||
if finder.allow_all_prereleases:
|
||||
args.append("--pre")
|
||||
if finder.prefer_binary:
|
||||
args.append("--prefer-binary")
|
||||
args.append("--")
|
||||
args.extend(requirements)
|
||||
extra_environ = {"_PIP_STANDALONE_CERT": where()}
|
||||
with open_spinner(f"Installing {kind}") as spinner:
|
||||
call_subprocess(
|
||||
args,
|
||||
command_desc=f"pip subprocess to install {kind}",
|
||||
spinner=spinner,
|
||||
extra_environ=extra_environ,
|
||||
)
|
||||
|
||||
|
||||
class NoOpBuildEnvironment(BuildEnvironment):
|
||||
"""A no-op drop-in replacement for BuildEnvironment"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def __enter__(self) -> None:
|
||||
pass
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[Type[BaseException]],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[TracebackType],
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def cleanup(self) -> None:
|
||||
pass
|
||||
|
||||
def install_requirements(
|
||||
self,
|
||||
finder: "PackageFinder",
|
||||
requirements: Iterable[str],
|
||||
prefix_as_string: str,
|
||||
*,
|
||||
kind: str,
|
||||
) -> None:
|
||||
raise NotImplementedError()
|
||||
|
|
@ -0,0 +1,290 @@
|
|||
"""Cache Management
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
|
||||
from pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pip._internal.exceptions import InvalidWheelFilename
|
||||
from pip._internal.models.direct_url import DirectUrl
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.models.wheel import Wheel
|
||||
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
from pip._internal.utils.urls import path_to_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ORIGIN_JSON_NAME = "origin.json"
|
||||
|
||||
|
||||
def _hash_dict(d: Dict[str, str]) -> str:
|
||||
"""Return a stable sha224 of a dictionary."""
|
||||
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
|
||||
return hashlib.sha224(s.encode("ascii")).hexdigest()
|
||||
|
||||
|
||||
class Cache:
|
||||
"""An abstract class - provides cache directories for data from links
|
||||
|
||||
:param cache_dir: The root of the cache.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str) -> None:
|
||||
super().__init__()
|
||||
assert not cache_dir or os.path.isabs(cache_dir)
|
||||
self.cache_dir = cache_dir or None
|
||||
|
||||
def _get_cache_path_parts(self, link: Link) -> List[str]:
|
||||
"""Get parts of part that must be os.path.joined with cache_dir"""
|
||||
|
||||
# We want to generate an url to use as our cache key, we don't want to
|
||||
# just re-use the URL because it might have other items in the fragment
|
||||
# and we don't care about those.
|
||||
key_parts = {"url": link.url_without_fragment}
|
||||
if link.hash_name is not None and link.hash is not None:
|
||||
key_parts[link.hash_name] = link.hash
|
||||
if link.subdirectory_fragment:
|
||||
key_parts["subdirectory"] = link.subdirectory_fragment
|
||||
|
||||
# Include interpreter name, major and minor version in cache key
|
||||
# to cope with ill-behaved sdists that build a different wheel
|
||||
# depending on the python version their setup.py is being run on,
|
||||
# and don't encode the difference in compatibility tags.
|
||||
# https://github.com/pypa/pip/issues/7296
|
||||
key_parts["interpreter_name"] = interpreter_name()
|
||||
key_parts["interpreter_version"] = interpreter_version()
|
||||
|
||||
# Encode our key url with sha224, we'll use this because it has similar
|
||||
# security properties to sha256, but with a shorter total output (and
|
||||
# thus less secure). However the differences don't make a lot of
|
||||
# difference for our use case here.
|
||||
hashed = _hash_dict(key_parts)
|
||||
|
||||
# We want to nest the directories some to prevent having a ton of top
|
||||
# level directories where we might run out of sub directories on some
|
||||
# FS.
|
||||
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
|
||||
|
||||
return parts
|
||||
|
||||
def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
|
||||
can_not_cache = not self.cache_dir or not canonical_package_name or not link
|
||||
if can_not_cache:
|
||||
return []
|
||||
|
||||
path = self.get_path_for_link(link)
|
||||
if os.path.isdir(path):
|
||||
return [(candidate, path) for candidate in os.listdir(path)]
|
||||
return []
|
||||
|
||||
def get_path_for_link(self, link: Link) -> str:
|
||||
"""Return a directory to store cached items in for link."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get(
|
||||
self,
|
||||
link: Link,
|
||||
package_name: Optional[str],
|
||||
supported_tags: List[Tag],
|
||||
) -> Link:
|
||||
"""Returns a link to a cached item if it exists, otherwise returns the
|
||||
passed link.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class SimpleWheelCache(Cache):
|
||||
"""A cache of wheels for future installs."""
|
||||
|
||||
def __init__(self, cache_dir: str) -> None:
|
||||
super().__init__(cache_dir)
|
||||
|
||||
def get_path_for_link(self, link: Link) -> str:
|
||||
"""Return a directory to store cached wheels for link
|
||||
|
||||
Because there are M wheels for any one sdist, we provide a directory
|
||||
to cache them in, and then consult that directory when looking up
|
||||
cache hits.
|
||||
|
||||
We only insert things into the cache if they have plausible version
|
||||
numbers, so that we don't contaminate the cache with things that were
|
||||
not unique. E.g. ./package might have dozens of installs done for it
|
||||
and build a version of 0.0...and if we built and cached a wheel, we'd
|
||||
end up using the same wheel even if the source has been edited.
|
||||
|
||||
:param link: The link of the sdist for which this will cache wheels.
|
||||
"""
|
||||
parts = self._get_cache_path_parts(link)
|
||||
assert self.cache_dir
|
||||
# Store wheels within the root cache_dir
|
||||
return os.path.join(self.cache_dir, "wheels", *parts)
|
||||
|
||||
def get(
|
||||
self,
|
||||
link: Link,
|
||||
package_name: Optional[str],
|
||||
supported_tags: List[Tag],
|
||||
) -> Link:
|
||||
candidates = []
|
||||
|
||||
if not package_name:
|
||||
return link
|
||||
|
||||
canonical_package_name = canonicalize_name(package_name)
|
||||
for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
|
||||
try:
|
||||
wheel = Wheel(wheel_name)
|
||||
except InvalidWheelFilename:
|
||||
continue
|
||||
if canonicalize_name(wheel.name) != canonical_package_name:
|
||||
logger.debug(
|
||||
"Ignoring cached wheel %s for %s as it "
|
||||
"does not match the expected distribution name %s.",
|
||||
wheel_name,
|
||||
link,
|
||||
package_name,
|
||||
)
|
||||
continue
|
||||
if not wheel.supported(supported_tags):
|
||||
# Built for a different python/arch/etc
|
||||
continue
|
||||
candidates.append(
|
||||
(
|
||||
wheel.support_index_min(supported_tags),
|
||||
wheel_name,
|
||||
wheel_dir,
|
||||
)
|
||||
)
|
||||
|
||||
if not candidates:
|
||||
return link
|
||||
|
||||
_, wheel_name, wheel_dir = min(candidates)
|
||||
return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
|
||||
|
||||
|
||||
class EphemWheelCache(SimpleWheelCache):
|
||||
"""A SimpleWheelCache that creates it's own temporary cache directory"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._temp_dir = TempDirectory(
|
||||
kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
|
||||
globally_managed=True,
|
||||
)
|
||||
|
||||
super().__init__(self._temp_dir.path)
|
||||
|
||||
|
||||
class CacheEntry:
|
||||
def __init__(
|
||||
self,
|
||||
link: Link,
|
||||
persistent: bool,
|
||||
):
|
||||
self.link = link
|
||||
self.persistent = persistent
|
||||
self.origin: Optional[DirectUrl] = None
|
||||
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
|
||||
if origin_direct_url_path.exists():
|
||||
try:
|
||||
self.origin = DirectUrl.from_json(
|
||||
origin_direct_url_path.read_text(encoding="utf-8")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Ignoring invalid cache entry origin file %s for %s (%s)",
|
||||
origin_direct_url_path,
|
||||
link.filename,
|
||||
e,
|
||||
)
|
||||
|
||||
|
||||
class WheelCache(Cache):
|
||||
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
|
||||
|
||||
This Cache allows for gracefully degradation, using the ephem wheel cache
|
||||
when a certain link is not found in the simple wheel cache first.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: str) -> None:
|
||||
super().__init__(cache_dir)
|
||||
self._wheel_cache = SimpleWheelCache(cache_dir)
|
||||
self._ephem_cache = EphemWheelCache()
|
||||
|
||||
def get_path_for_link(self, link: Link) -> str:
|
||||
return self._wheel_cache.get_path_for_link(link)
|
||||
|
||||
def get_ephem_path_for_link(self, link: Link) -> str:
|
||||
return self._ephem_cache.get_path_for_link(link)
|
||||
|
||||
def get(
|
||||
self,
|
||||
link: Link,
|
||||
package_name: Optional[str],
|
||||
supported_tags: List[Tag],
|
||||
) -> Link:
|
||||
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
|
||||
if cache_entry is None:
|
||||
return link
|
||||
return cache_entry.link
|
||||
|
||||
def get_cache_entry(
|
||||
self,
|
||||
link: Link,
|
||||
package_name: Optional[str],
|
||||
supported_tags: List[Tag],
|
||||
) -> Optional[CacheEntry]:
|
||||
"""Returns a CacheEntry with a link to a cached item if it exists or
|
||||
None. The cache entry indicates if the item was found in the persistent
|
||||
or ephemeral cache.
|
||||
"""
|
||||
retval = self._wheel_cache.get(
|
||||
link=link,
|
||||
package_name=package_name,
|
||||
supported_tags=supported_tags,
|
||||
)
|
||||
if retval is not link:
|
||||
return CacheEntry(retval, persistent=True)
|
||||
|
||||
retval = self._ephem_cache.get(
|
||||
link=link,
|
||||
package_name=package_name,
|
||||
supported_tags=supported_tags,
|
||||
)
|
||||
if retval is not link:
|
||||
return CacheEntry(retval, persistent=False)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
|
||||
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
|
||||
if origin_path.exists():
|
||||
try:
|
||||
origin = DirectUrl.from_json(origin_path.read_text(encoding="utf-8"))
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Could not read origin file %s in cache entry (%s). "
|
||||
"Will attempt to overwrite it.",
|
||||
origin_path,
|
||||
e,
|
||||
)
|
||||
else:
|
||||
# TODO: use DirectUrl.equivalent when
|
||||
# https://github.com/pypa/pip/pull/10564 is merged.
|
||||
if origin.url != download_info.url:
|
||||
logger.warning(
|
||||
"Origin URL %s in cache entry %s does not match download URL "
|
||||
"%s. This is likely a pip bug or a cache corruption issue. "
|
||||
"Will overwrite it with the new value.",
|
||||
origin.url,
|
||||
cache_dir,
|
||||
download_info.url,
|
||||
)
|
||||
origin_path.write_text(download_info.to_json(), encoding="utf-8")
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
"""Subpackage containing all of pip's command line interface related code
|
||||
"""
|
||||
|
||||
# This file intentionally does not import submodules
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
"""Logic that powers autocompletion installed by ``pip completion``.
|
||||
"""
|
||||
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
from itertools import chain
|
||||
from typing import Any, Iterable, List, Optional
|
||||
|
||||
from pip._internal.cli.main_parser import create_main_parser
|
||||
from pip._internal.commands import commands_dict, create_command
|
||||
from pip._internal.metadata import get_default_environment
|
||||
|
||||
|
||||
def autocomplete() -> None:
|
||||
"""Entry Point for completion of main and subcommand options."""
|
||||
# Don't complete if user hasn't sourced bash_completion file.
|
||||
if "PIP_AUTO_COMPLETE" not in os.environ:
|
||||
return
|
||||
cwords = os.environ["COMP_WORDS"].split()[1:]
|
||||
cword = int(os.environ["COMP_CWORD"])
|
||||
try:
|
||||
current = cwords[cword - 1]
|
||||
except IndexError:
|
||||
current = ""
|
||||
|
||||
parser = create_main_parser()
|
||||
subcommands = list(commands_dict)
|
||||
options = []
|
||||
|
||||
# subcommand
|
||||
subcommand_name: Optional[str] = None
|
||||
for word in cwords:
|
||||
if word in subcommands:
|
||||
subcommand_name = word
|
||||
break
|
||||
# subcommand options
|
||||
if subcommand_name is not None:
|
||||
# special case: 'help' subcommand has no options
|
||||
if subcommand_name == "help":
|
||||
sys.exit(1)
|
||||
# special case: list locally installed dists for show and uninstall
|
||||
should_list_installed = not current.startswith("-") and subcommand_name in [
|
||||
"show",
|
||||
"uninstall",
|
||||
]
|
||||
if should_list_installed:
|
||||
env = get_default_environment()
|
||||
lc = current.lower()
|
||||
installed = [
|
||||
dist.canonical_name
|
||||
for dist in env.iter_installed_distributions(local_only=True)
|
||||
if dist.canonical_name.startswith(lc)
|
||||
and dist.canonical_name not in cwords[1:]
|
||||
]
|
||||
# if there are no dists installed, fall back to option completion
|
||||
if installed:
|
||||
for dist in installed:
|
||||
print(dist)
|
||||
sys.exit(1)
|
||||
|
||||
should_list_installables = (
|
||||
not current.startswith("-") and subcommand_name == "install"
|
||||
)
|
||||
if should_list_installables:
|
||||
for path in auto_complete_paths(current, "path"):
|
||||
print(path)
|
||||
sys.exit(1)
|
||||
|
||||
subcommand = create_command(subcommand_name)
|
||||
|
||||
for opt in subcommand.parser.option_list_all:
|
||||
if opt.help != optparse.SUPPRESS_HELP:
|
||||
options += [
|
||||
(opt_str, opt.nargs) for opt_str in opt._long_opts + opt._short_opts
|
||||
]
|
||||
|
||||
# filter out previously specified options from available options
|
||||
prev_opts = [x.split("=")[0] for x in cwords[1 : cword - 1]]
|
||||
options = [(x, v) for (x, v) in options if x not in prev_opts]
|
||||
# filter options by current input
|
||||
options = [(k, v) for k, v in options if k.startswith(current)]
|
||||
# get completion type given cwords and available subcommand options
|
||||
completion_type = get_path_completion_type(
|
||||
cwords,
|
||||
cword,
|
||||
subcommand.parser.option_list_all,
|
||||
)
|
||||
# get completion files and directories if ``completion_type`` is
|
||||
# ``<file>``, ``<dir>`` or ``<path>``
|
||||
if completion_type:
|
||||
paths = auto_complete_paths(current, completion_type)
|
||||
options = [(path, 0) for path in paths]
|
||||
for option in options:
|
||||
opt_label = option[0]
|
||||
# append '=' to options which require args
|
||||
if option[1] and option[0][:2] == "--":
|
||||
opt_label += "="
|
||||
print(opt_label)
|
||||
else:
|
||||
# show main parser options only when necessary
|
||||
|
||||
opts = [i.option_list for i in parser.option_groups]
|
||||
opts.append(parser.option_list)
|
||||
flattened_opts = chain.from_iterable(opts)
|
||||
if current.startswith("-"):
|
||||
for opt in flattened_opts:
|
||||
if opt.help != optparse.SUPPRESS_HELP:
|
||||
subcommands += opt._long_opts + opt._short_opts
|
||||
else:
|
||||
# get completion type given cwords and all available options
|
||||
completion_type = get_path_completion_type(cwords, cword, flattened_opts)
|
||||
if completion_type:
|
||||
subcommands = list(auto_complete_paths(current, completion_type))
|
||||
|
||||
print(" ".join([x for x in subcommands if x.startswith(current)]))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_path_completion_type(
|
||||
cwords: List[str], cword: int, opts: Iterable[Any]
|
||||
) -> Optional[str]:
|
||||
"""Get the type of path completion (``file``, ``dir``, ``path`` or None)
|
||||
|
||||
:param cwords: same as the environmental variable ``COMP_WORDS``
|
||||
:param cword: same as the environmental variable ``COMP_CWORD``
|
||||
:param opts: The available options to check
|
||||
:return: path completion type (``file``, ``dir``, ``path`` or None)
|
||||
"""
|
||||
if cword < 2 or not cwords[cword - 2].startswith("-"):
|
||||
return None
|
||||
for opt in opts:
|
||||
if opt.help == optparse.SUPPRESS_HELP:
|
||||
continue
|
||||
for o in str(opt).split("/"):
|
||||
if cwords[cword - 2].split("=")[0] == o:
|
||||
if not opt.metavar or any(
|
||||
x in ("path", "file", "dir") for x in opt.metavar.split("/")
|
||||
):
|
||||
return opt.metavar
|
||||
return None
|
||||
|
||||
|
||||
def auto_complete_paths(current: str, completion_type: str) -> Iterable[str]:
|
||||
"""If ``completion_type`` is ``file`` or ``path``, list all regular files
|
||||
and directories starting with ``current``; otherwise only list directories
|
||||
starting with ``current``.
|
||||
|
||||
:param current: The word to be completed
|
||||
:param completion_type: path completion type(``file``, ``path`` or ``dir``)
|
||||
:return: A generator of regular files and/or directories
|
||||
"""
|
||||
directory, filename = os.path.split(current)
|
||||
current_path = os.path.abspath(directory)
|
||||
# Don't complete paths if they can't be accessed
|
||||
if not os.access(current_path, os.R_OK):
|
||||
return
|
||||
filename = os.path.normcase(filename)
|
||||
# list all files that start with ``filename``
|
||||
file_list = (
|
||||
x for x in os.listdir(current_path) if os.path.normcase(x).startswith(filename)
|
||||
)
|
||||
for f in file_list:
|
||||
opt = os.path.join(current_path, f)
|
||||
comp_file = os.path.normcase(os.path.join(directory, f))
|
||||
# complete regular files when there is not ``<dir>`` after option
|
||||
# complete directories when there is ``<file>``, ``<path>`` or
|
||||
# ``<dir>``after option
|
||||
if completion_type != "dir" and os.path.isfile(opt):
|
||||
yield comp_file
|
||||
elif os.path.isdir(opt):
|
||||
yield os.path.join(comp_file, "")
|
||||
|
|
@ -0,0 +1,236 @@
|
|||
"""Base Command class, and related routines"""
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import logging.config
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
from optparse import Values
|
||||
from typing import Any, Callable, List, Optional, Tuple
|
||||
|
||||
from pip._vendor.rich import traceback as rich_traceback
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.command_context import CommandContextMixIn
|
||||
from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pip._internal.cli.status_codes import (
|
||||
ERROR,
|
||||
PREVIOUS_BUILD_DIR_ERROR,
|
||||
UNKNOWN_ERROR,
|
||||
VIRTUALENV_NOT_FOUND,
|
||||
)
|
||||
from pip._internal.exceptions import (
|
||||
BadCommand,
|
||||
CommandError,
|
||||
DiagnosticPipError,
|
||||
InstallationError,
|
||||
NetworkConnectionError,
|
||||
PreviousBuildDirError,
|
||||
UninstallationError,
|
||||
)
|
||||
from pip._internal.utils.filesystem import check_path_owner
|
||||
from pip._internal.utils.logging import BrokenStdoutLoggingError, setup_logging
|
||||
from pip._internal.utils.misc import get_prog, normalize_path
|
||||
from pip._internal.utils.temp_dir import TempDirectoryTypeRegistry as TempDirRegistry
|
||||
from pip._internal.utils.temp_dir import global_tempdir_manager, tempdir_registry
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
__all__ = ["Command"]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(CommandContextMixIn):
|
||||
usage: str = ""
|
||||
ignore_require_venv: bool = False
|
||||
|
||||
def __init__(self, name: str, summary: str, isolated: bool = False) -> None:
|
||||
super().__init__()
|
||||
|
||||
self.name = name
|
||||
self.summary = summary
|
||||
self.parser = ConfigOptionParser(
|
||||
usage=self.usage,
|
||||
prog=f"{get_prog()} {name}",
|
||||
formatter=UpdatingDefaultsHelpFormatter(),
|
||||
add_help_option=False,
|
||||
name=name,
|
||||
description=self.__doc__,
|
||||
isolated=isolated,
|
||||
)
|
||||
|
||||
self.tempdir_registry: Optional[TempDirRegistry] = None
|
||||
|
||||
# Commands should add options to this option group
|
||||
optgroup_name = f"{self.name.capitalize()} Options"
|
||||
self.cmd_opts = optparse.OptionGroup(self.parser, optgroup_name)
|
||||
|
||||
# Add the general options
|
||||
gen_opts = cmdoptions.make_option_group(
|
||||
cmdoptions.general_group,
|
||||
self.parser,
|
||||
)
|
||||
self.parser.add_option_group(gen_opts)
|
||||
|
||||
self.add_options()
|
||||
|
||||
def add_options(self) -> None:
|
||||
pass
|
||||
|
||||
def handle_pip_version_check(self, options: Values) -> None:
|
||||
"""
|
||||
This is a no-op so that commands by default do not do the pip version
|
||||
check.
|
||||
"""
|
||||
# Make sure we do the pip version check if the index_group options
|
||||
# are present.
|
||||
assert not hasattr(options, "no_index")
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
def parse_args(self, args: List[str]) -> Tuple[Values, List[str]]:
|
||||
# factored out for testability
|
||||
return self.parser.parse_args(args)
|
||||
|
||||
def main(self, args: List[str]) -> int:
|
||||
try:
|
||||
with self.main_context():
|
||||
return self._main(args)
|
||||
finally:
|
||||
logging.shutdown()
|
||||
|
||||
def _main(self, args: List[str]) -> int:
|
||||
# We must initialize this before the tempdir manager, otherwise the
|
||||
# configuration would not be accessible by the time we clean up the
|
||||
# tempdir manager.
|
||||
self.tempdir_registry = self.enter_context(tempdir_registry())
|
||||
# Intentionally set as early as possible so globally-managed temporary
|
||||
# directories are available to the rest of the code.
|
||||
self.enter_context(global_tempdir_manager())
|
||||
|
||||
options, args = self.parse_args(args)
|
||||
|
||||
# Set verbosity so that it can be used elsewhere.
|
||||
self.verbosity = options.verbose - options.quiet
|
||||
|
||||
level_number = setup_logging(
|
||||
verbosity=self.verbosity,
|
||||
no_color=options.no_color,
|
||||
user_log_file=options.log,
|
||||
)
|
||||
|
||||
always_enabled_features = set(options.features_enabled) & set(
|
||||
cmdoptions.ALWAYS_ENABLED_FEATURES
|
||||
)
|
||||
if always_enabled_features:
|
||||
logger.warning(
|
||||
"The following features are always enabled: %s. ",
|
||||
", ".join(sorted(always_enabled_features)),
|
||||
)
|
||||
|
||||
# Make sure that the --python argument isn't specified after the
|
||||
# subcommand. We can tell, because if --python was specified,
|
||||
# we should only reach this point if we're running in the created
|
||||
# subprocess, which has the _PIP_RUNNING_IN_SUBPROCESS environment
|
||||
# variable set.
|
||||
if options.python and "_PIP_RUNNING_IN_SUBPROCESS" not in os.environ:
|
||||
logger.critical(
|
||||
"The --python option must be placed before the pip subcommand name"
|
||||
)
|
||||
sys.exit(ERROR)
|
||||
|
||||
# TODO: Try to get these passing down from the command?
|
||||
# without resorting to os.environ to hold these.
|
||||
# This also affects isolated builds and it should.
|
||||
|
||||
if options.no_input:
|
||||
os.environ["PIP_NO_INPUT"] = "1"
|
||||
|
||||
if options.exists_action:
|
||||
os.environ["PIP_EXISTS_ACTION"] = " ".join(options.exists_action)
|
||||
|
||||
if options.require_venv and not self.ignore_require_venv:
|
||||
# If a venv is required check if it can really be found
|
||||
if not running_under_virtualenv():
|
||||
logger.critical("Could not find an activated virtualenv (required).")
|
||||
sys.exit(VIRTUALENV_NOT_FOUND)
|
||||
|
||||
if options.cache_dir:
|
||||
options.cache_dir = normalize_path(options.cache_dir)
|
||||
if not check_path_owner(options.cache_dir):
|
||||
logger.warning(
|
||||
"The directory '%s' or its parent directory is not owned "
|
||||
"or is not writable by the current user. The cache "
|
||||
"has been disabled. Check the permissions and owner of "
|
||||
"that directory. If executing pip with sudo, you should "
|
||||
"use sudo's -H flag.",
|
||||
options.cache_dir,
|
||||
)
|
||||
options.cache_dir = None
|
||||
|
||||
def intercepts_unhandled_exc(
|
||||
run_func: Callable[..., int]
|
||||
) -> Callable[..., int]:
|
||||
@functools.wraps(run_func)
|
||||
def exc_logging_wrapper(*args: Any) -> int:
|
||||
try:
|
||||
status = run_func(*args)
|
||||
assert isinstance(status, int)
|
||||
return status
|
||||
except DiagnosticPipError as exc:
|
||||
logger.error("%s", exc, extra={"rich": True})
|
||||
logger.debug("Exception information:", exc_info=True)
|
||||
|
||||
return ERROR
|
||||
except PreviousBuildDirError as exc:
|
||||
logger.critical(str(exc))
|
||||
logger.debug("Exception information:", exc_info=True)
|
||||
|
||||
return PREVIOUS_BUILD_DIR_ERROR
|
||||
except (
|
||||
InstallationError,
|
||||
UninstallationError,
|
||||
BadCommand,
|
||||
NetworkConnectionError,
|
||||
) as exc:
|
||||
logger.critical(str(exc))
|
||||
logger.debug("Exception information:", exc_info=True)
|
||||
|
||||
return ERROR
|
||||
except CommandError as exc:
|
||||
logger.critical("%s", exc)
|
||||
logger.debug("Exception information:", exc_info=True)
|
||||
|
||||
return ERROR
|
||||
except BrokenStdoutLoggingError:
|
||||
# Bypass our logger and write any remaining messages to
|
||||
# stderr because stdout no longer works.
|
||||
print("ERROR: Pipe to stdout was broken", file=sys.stderr)
|
||||
if level_number <= logging.DEBUG:
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
|
||||
return ERROR
|
||||
except KeyboardInterrupt:
|
||||
logger.critical("Operation cancelled by user")
|
||||
logger.debug("Exception information:", exc_info=True)
|
||||
|
||||
return ERROR
|
||||
except BaseException:
|
||||
logger.critical("Exception:", exc_info=True)
|
||||
|
||||
return UNKNOWN_ERROR
|
||||
|
||||
return exc_logging_wrapper
|
||||
|
||||
try:
|
||||
if not options.debug_mode:
|
||||
run = intercepts_unhandled_exc(self.run)
|
||||
else:
|
||||
run = self.run
|
||||
rich_traceback.install(show_locals=True)
|
||||
return run(options, args)
|
||||
finally:
|
||||
self.handle_pip_version_check(options)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,27 @@
|
|||
from contextlib import ExitStack, contextmanager
|
||||
from typing import ContextManager, Generator, TypeVar
|
||||
|
||||
_T = TypeVar("_T", covariant=True)
|
||||
|
||||
|
||||
class CommandContextMixIn:
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._in_main_context = False
|
||||
self._main_context = ExitStack()
|
||||
|
||||
@contextmanager
|
||||
def main_context(self) -> Generator[None, None, None]:
|
||||
assert not self._in_main_context
|
||||
|
||||
self._in_main_context = True
|
||||
try:
|
||||
with self._main_context:
|
||||
yield
|
||||
finally:
|
||||
self._in_main_context = False
|
||||
|
||||
def enter_context(self, context_provider: ContextManager[_T]) -> _T:
|
||||
assert self._in_main_context
|
||||
|
||||
return self._main_context.enter_context(context_provider)
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
"""Primary application entrypoint.
|
||||
"""
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
from typing import List, Optional
|
||||
|
||||
from pip._internal.cli.autocompletion import autocomplete
|
||||
from pip._internal.cli.main_parser import parse_command
|
||||
from pip._internal.commands import create_command
|
||||
from pip._internal.exceptions import PipError
|
||||
from pip._internal.utils import deprecation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Do not import and use main() directly! Using it directly is actively
|
||||
# discouraged by pip's maintainers. The name, location and behavior of
|
||||
# this function is subject to change, so calling it directly is not
|
||||
# portable across different pip versions.
|
||||
|
||||
# In addition, running pip in-process is unsupported and unsafe. This is
|
||||
# elaborated in detail at
|
||||
# https://pip.pypa.io/en/stable/user_guide/#using-pip-from-your-program.
|
||||
# That document also provides suggestions that should work for nearly
|
||||
# all users that are considering importing and using main() directly.
|
||||
|
||||
# However, we know that certain users will still want to invoke pip
|
||||
# in-process. If you understand and accept the implications of using pip
|
||||
# in an unsupported manner, the best approach is to use runpy to avoid
|
||||
# depending on the exact location of this entry point.
|
||||
|
||||
# The following example shows how to use runpy to invoke pip in that
|
||||
# case:
|
||||
#
|
||||
# sys.argv = ["pip", your, args, here]
|
||||
# runpy.run_module("pip", run_name="__main__")
|
||||
#
|
||||
# Note that this will exit the process after running, unlike a direct
|
||||
# call to main. As it is not safe to do any processing after calling
|
||||
# main, this should not be an issue in practice.
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None) -> int:
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
# Suppress the pkg_resources deprecation warning
|
||||
# Note - we use a module of .*pkg_resources to cover
|
||||
# the normal case (pip._vendor.pkg_resources) and the
|
||||
# devendored case (a bare pkg_resources)
|
||||
warnings.filterwarnings(
|
||||
action="ignore", category=DeprecationWarning, module=".*pkg_resources"
|
||||
)
|
||||
|
||||
# Configure our deprecation warnings to be sent through loggers
|
||||
deprecation.install_warning_logger()
|
||||
|
||||
autocomplete()
|
||||
|
||||
try:
|
||||
cmd_name, cmd_args = parse_command(args)
|
||||
except PipError as exc:
|
||||
sys.stderr.write(f"ERROR: {exc}")
|
||||
sys.stderr.write(os.linesep)
|
||||
sys.exit(1)
|
||||
|
||||
# Needed for locale.getpreferredencoding(False) to work
|
||||
# in pip._internal.utils.encoding.auto_decode
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, "")
|
||||
except locale.Error as e:
|
||||
# setlocale can apparently crash if locale are uninitialized
|
||||
logger.debug("Ignoring error %s when setting locale", e)
|
||||
command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
|
||||
|
||||
return command.main(cmd_args)
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
"""A single place for constructing and exposing the main parser
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from pip._internal.build_env import get_runnable_pip
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pip._internal.commands import commands_dict, get_similar_commands
|
||||
from pip._internal.exceptions import CommandError
|
||||
from pip._internal.utils.misc import get_pip_version, get_prog
|
||||
|
||||
__all__ = ["create_main_parser", "parse_command"]
|
||||
|
||||
|
||||
def create_main_parser() -> ConfigOptionParser:
|
||||
"""Creates and returns the main parser for pip's CLI"""
|
||||
|
||||
parser = ConfigOptionParser(
|
||||
usage="\n%prog <command> [options]",
|
||||
add_help_option=False,
|
||||
formatter=UpdatingDefaultsHelpFormatter(),
|
||||
name="global",
|
||||
prog=get_prog(),
|
||||
)
|
||||
parser.disable_interspersed_args()
|
||||
|
||||
parser.version = get_pip_version()
|
||||
|
||||
# add the general options
|
||||
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
|
||||
parser.add_option_group(gen_opts)
|
||||
|
||||
# so the help formatter knows
|
||||
parser.main = True # type: ignore
|
||||
|
||||
# create command listing for description
|
||||
description = [""] + [
|
||||
f"{name:27} {command_info.summary}"
|
||||
for name, command_info in commands_dict.items()
|
||||
]
|
||||
parser.description = "\n".join(description)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def identify_python_interpreter(python: str) -> Optional[str]:
|
||||
# If the named file exists, use it.
|
||||
# If it's a directory, assume it's a virtual environment and
|
||||
# look for the environment's Python executable.
|
||||
if os.path.exists(python):
|
||||
if os.path.isdir(python):
|
||||
# bin/python for Unix, Scripts/python.exe for Windows
|
||||
# Try both in case of odd cases like cygwin.
|
||||
for exe in ("bin/python", "Scripts/python.exe"):
|
||||
py = os.path.join(python, exe)
|
||||
if os.path.exists(py):
|
||||
return py
|
||||
else:
|
||||
return python
|
||||
|
||||
# Could not find the interpreter specified
|
||||
return None
|
||||
|
||||
|
||||
def parse_command(args: List[str]) -> Tuple[str, List[str]]:
|
||||
parser = create_main_parser()
|
||||
|
||||
# Note: parser calls disable_interspersed_args(), so the result of this
|
||||
# call is to split the initial args into the general options before the
|
||||
# subcommand and everything else.
|
||||
# For example:
|
||||
# args: ['--timeout=5', 'install', '--user', 'INITools']
|
||||
# general_options: ['--timeout==5']
|
||||
# args_else: ['install', '--user', 'INITools']
|
||||
general_options, args_else = parser.parse_args(args)
|
||||
|
||||
# --python
|
||||
if general_options.python and "_PIP_RUNNING_IN_SUBPROCESS" not in os.environ:
|
||||
# Re-invoke pip using the specified Python interpreter
|
||||
interpreter = identify_python_interpreter(general_options.python)
|
||||
if interpreter is None:
|
||||
raise CommandError(
|
||||
f"Could not locate Python interpreter {general_options.python}"
|
||||
)
|
||||
|
||||
pip_cmd = [
|
||||
interpreter,
|
||||
get_runnable_pip(),
|
||||
]
|
||||
pip_cmd.extend(args)
|
||||
|
||||
# Set a flag so the child doesn't re-invoke itself, causing
|
||||
# an infinite loop.
|
||||
os.environ["_PIP_RUNNING_IN_SUBPROCESS"] = "1"
|
||||
returncode = 0
|
||||
try:
|
||||
proc = subprocess.run(pip_cmd)
|
||||
returncode = proc.returncode
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
raise CommandError(f"Failed to run pip under {interpreter}: {exc}")
|
||||
sys.exit(returncode)
|
||||
|
||||
# --version
|
||||
if general_options.version:
|
||||
sys.stdout.write(parser.version)
|
||||
sys.stdout.write(os.linesep)
|
||||
sys.exit()
|
||||
|
||||
# pip || pip help -> print_help()
|
||||
if not args_else or (args_else[0] == "help" and len(args_else) == 1):
|
||||
parser.print_help()
|
||||
sys.exit()
|
||||
|
||||
# the subcommand name
|
||||
cmd_name = args_else[0]
|
||||
|
||||
if cmd_name not in commands_dict:
|
||||
guess = get_similar_commands(cmd_name)
|
||||
|
||||
msg = [f'unknown command "{cmd_name}"']
|
||||
if guess:
|
||||
msg.append(f'maybe you meant "{guess}"')
|
||||
|
||||
raise CommandError(" - ".join(msg))
|
||||
|
||||
# all the args without the subcommand
|
||||
cmd_args = args[:]
|
||||
cmd_args.remove(cmd_name)
|
||||
|
||||
return cmd_name, cmd_args
|
||||
|
|
@ -0,0 +1,294 @@
|
|||
"""Base option parser setup"""
|
||||
|
||||
import logging
|
||||
import optparse
|
||||
import shutil
|
||||
import sys
|
||||
import textwrap
|
||||
from contextlib import suppress
|
||||
from typing import Any, Dict, Generator, List, Tuple
|
||||
|
||||
from pip._internal.cli.status_codes import UNKNOWN_ERROR
|
||||
from pip._internal.configuration import Configuration, ConfigurationError
|
||||
from pip._internal.utils.misc import redact_auth_from_url, strtobool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PrettyHelpFormatter(optparse.IndentedHelpFormatter):
|
||||
"""A prettier/less verbose help formatter for optparse."""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
# help position must be aligned with __init__.parseopts.description
|
||||
kwargs["max_help_position"] = 30
|
||||
kwargs["indent_increment"] = 1
|
||||
kwargs["width"] = shutil.get_terminal_size()[0] - 2
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def format_option_strings(self, option: optparse.Option) -> str:
|
||||
return self._format_option_strings(option)
|
||||
|
||||
def _format_option_strings(
|
||||
self, option: optparse.Option, mvarfmt: str = " <{}>", optsep: str = ", "
|
||||
) -> str:
|
||||
"""
|
||||
Return a comma-separated list of option strings and metavars.
|
||||
|
||||
:param option: tuple of (short opt, long opt), e.g: ('-f', '--format')
|
||||
:param mvarfmt: metavar format string
|
||||
:param optsep: separator
|
||||
"""
|
||||
opts = []
|
||||
|
||||
if option._short_opts:
|
||||
opts.append(option._short_opts[0])
|
||||
if option._long_opts:
|
||||
opts.append(option._long_opts[0])
|
||||
if len(opts) > 1:
|
||||
opts.insert(1, optsep)
|
||||
|
||||
if option.takes_value():
|
||||
assert option.dest is not None
|
||||
metavar = option.metavar or option.dest.lower()
|
||||
opts.append(mvarfmt.format(metavar.lower()))
|
||||
|
||||
return "".join(opts)
|
||||
|
||||
def format_heading(self, heading: str) -> str:
|
||||
if heading == "Options":
|
||||
return ""
|
||||
return heading + ":\n"
|
||||
|
||||
def format_usage(self, usage: str) -> str:
|
||||
"""
|
||||
Ensure there is only one newline between usage and the first heading
|
||||
if there is no description.
|
||||
"""
|
||||
msg = "\nUsage: {}\n".format(self.indent_lines(textwrap.dedent(usage), " "))
|
||||
return msg
|
||||
|
||||
def format_description(self, description: str) -> str:
|
||||
# leave full control over description to us
|
||||
if description:
|
||||
if hasattr(self.parser, "main"):
|
||||
label = "Commands"
|
||||
else:
|
||||
label = "Description"
|
||||
# some doc strings have initial newlines, some don't
|
||||
description = description.lstrip("\n")
|
||||
# some doc strings have final newlines and spaces, some don't
|
||||
description = description.rstrip()
|
||||
# dedent, then reindent
|
||||
description = self.indent_lines(textwrap.dedent(description), " ")
|
||||
description = f"{label}:\n{description}\n"
|
||||
return description
|
||||
else:
|
||||
return ""
|
||||
|
||||
def format_epilog(self, epilog: str) -> str:
|
||||
# leave full control over epilog to us
|
||||
if epilog:
|
||||
return epilog
|
||||
else:
|
||||
return ""
|
||||
|
||||
def indent_lines(self, text: str, indent: str) -> str:
|
||||
new_lines = [indent + line for line in text.split("\n")]
|
||||
return "\n".join(new_lines)
|
||||
|
||||
|
||||
class UpdatingDefaultsHelpFormatter(PrettyHelpFormatter):
|
||||
"""Custom help formatter for use in ConfigOptionParser.
|
||||
|
||||
This is updates the defaults before expanding them, allowing
|
||||
them to show up correctly in the help listing.
|
||||
|
||||
Also redact auth from url type options
|
||||
"""
|
||||
|
||||
def expand_default(self, option: optparse.Option) -> str:
|
||||
default_values = None
|
||||
if self.parser is not None:
|
||||
assert isinstance(self.parser, ConfigOptionParser)
|
||||
self.parser._update_defaults(self.parser.defaults)
|
||||
assert option.dest is not None
|
||||
default_values = self.parser.defaults.get(option.dest)
|
||||
help_text = super().expand_default(option)
|
||||
|
||||
if default_values and option.metavar == "URL":
|
||||
if isinstance(default_values, str):
|
||||
default_values = [default_values]
|
||||
|
||||
# If its not a list, we should abort and just return the help text
|
||||
if not isinstance(default_values, list):
|
||||
default_values = []
|
||||
|
||||
for val in default_values:
|
||||
help_text = help_text.replace(val, redact_auth_from_url(val))
|
||||
|
||||
return help_text
|
||||
|
||||
|
||||
class CustomOptionParser(optparse.OptionParser):
|
||||
def insert_option_group(
|
||||
self, idx: int, *args: Any, **kwargs: Any
|
||||
) -> optparse.OptionGroup:
|
||||
"""Insert an OptionGroup at a given position."""
|
||||
group = self.add_option_group(*args, **kwargs)
|
||||
|
||||
self.option_groups.pop()
|
||||
self.option_groups.insert(idx, group)
|
||||
|
||||
return group
|
||||
|
||||
@property
|
||||
def option_list_all(self) -> List[optparse.Option]:
|
||||
"""Get a list of all options, including those in option groups."""
|
||||
res = self.option_list[:]
|
||||
for i in self.option_groups:
|
||||
res.extend(i.option_list)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class ConfigOptionParser(CustomOptionParser):
|
||||
"""Custom option parser which updates its defaults by checking the
|
||||
configuration files and environmental variables"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
name: str,
|
||||
isolated: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.name = name
|
||||
self.config = Configuration(isolated)
|
||||
|
||||
assert self.name
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def check_default(self, option: optparse.Option, key: str, val: Any) -> Any:
|
||||
try:
|
||||
return option.check_value(key, val)
|
||||
except optparse.OptionValueError as exc:
|
||||
print(f"An error occurred during configuration: {exc}")
|
||||
sys.exit(3)
|
||||
|
||||
def _get_ordered_configuration_items(
|
||||
self,
|
||||
) -> Generator[Tuple[str, Any], None, None]:
|
||||
# Configuration gives keys in an unordered manner. Order them.
|
||||
override_order = ["global", self.name, ":env:"]
|
||||
|
||||
# Pool the options into different groups
|
||||
section_items: Dict[str, List[Tuple[str, Any]]] = {
|
||||
name: [] for name in override_order
|
||||
}
|
||||
for section_key, val in self.config.items():
|
||||
# ignore empty values
|
||||
if not val:
|
||||
logger.debug(
|
||||
"Ignoring configuration key '%s' as it's value is empty.",
|
||||
section_key,
|
||||
)
|
||||
continue
|
||||
|
||||
section, key = section_key.split(".", 1)
|
||||
if section in override_order:
|
||||
section_items[section].append((key, val))
|
||||
|
||||
# Yield each group in their override order
|
||||
for section in override_order:
|
||||
for key, val in section_items[section]:
|
||||
yield key, val
|
||||
|
||||
def _update_defaults(self, defaults: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Updates the given defaults with values from the config files and
|
||||
the environ. Does a little special handling for certain types of
|
||||
options (lists)."""
|
||||
|
||||
# Accumulate complex default state.
|
||||
self.values = optparse.Values(self.defaults)
|
||||
late_eval = set()
|
||||
# Then set the options with those values
|
||||
for key, val in self._get_ordered_configuration_items():
|
||||
# '--' because configuration supports only long names
|
||||
option = self.get_option("--" + key)
|
||||
|
||||
# Ignore options not present in this parser. E.g. non-globals put
|
||||
# in [global] by users that want them to apply to all applicable
|
||||
# commands.
|
||||
if option is None:
|
||||
continue
|
||||
|
||||
assert option.dest is not None
|
||||
|
||||
if option.action in ("store_true", "store_false"):
|
||||
try:
|
||||
val = strtobool(val)
|
||||
except ValueError:
|
||||
self.error(
|
||||
f"{val} is not a valid value for {key} option, "
|
||||
"please specify a boolean value like yes/no, "
|
||||
"true/false or 1/0 instead."
|
||||
)
|
||||
elif option.action == "count":
|
||||
with suppress(ValueError):
|
||||
val = strtobool(val)
|
||||
with suppress(ValueError):
|
||||
val = int(val)
|
||||
if not isinstance(val, int) or val < 0:
|
||||
self.error(
|
||||
f"{val} is not a valid value for {key} option, "
|
||||
"please instead specify either a non-negative integer "
|
||||
"or a boolean value like yes/no or false/true "
|
||||
"which is equivalent to 1/0."
|
||||
)
|
||||
elif option.action == "append":
|
||||
val = val.split()
|
||||
val = [self.check_default(option, key, v) for v in val]
|
||||
elif option.action == "callback":
|
||||
assert option.callback is not None
|
||||
late_eval.add(option.dest)
|
||||
opt_str = option.get_opt_string()
|
||||
val = option.convert_value(opt_str, val)
|
||||
# From take_action
|
||||
args = option.callback_args or ()
|
||||
kwargs = option.callback_kwargs or {}
|
||||
option.callback(option, opt_str, val, self, *args, **kwargs)
|
||||
else:
|
||||
val = self.check_default(option, key, val)
|
||||
|
||||
defaults[option.dest] = val
|
||||
|
||||
for key in late_eval:
|
||||
defaults[key] = getattr(self.values, key)
|
||||
self.values = None
|
||||
return defaults
|
||||
|
||||
def get_default_values(self) -> optparse.Values:
|
||||
"""Overriding to make updating the defaults after instantiation of
|
||||
the option parser possible, _update_defaults() does the dirty work."""
|
||||
if not self.process_default_values:
|
||||
# Old, pre-Optik 1.5 behaviour.
|
||||
return optparse.Values(self.defaults)
|
||||
|
||||
# Load the configuration, or error out in case of an error
|
||||
try:
|
||||
self.config.load()
|
||||
except ConfigurationError as err:
|
||||
self.exit(UNKNOWN_ERROR, str(err))
|
||||
|
||||
defaults = self._update_defaults(self.defaults.copy()) # ours
|
||||
for option in self._get_all_options():
|
||||
assert option.dest is not None
|
||||
default = defaults.get(option.dest)
|
||||
if isinstance(default, str):
|
||||
opt_str = option.get_opt_string()
|
||||
defaults[option.dest] = option.check_value(opt_str, default)
|
||||
return optparse.Values(defaults)
|
||||
|
||||
def error(self, msg: str) -> None:
|
||||
self.print_usage(sys.stderr)
|
||||
self.exit(UNKNOWN_ERROR, f"{msg}\n")
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
import functools
|
||||
from typing import Callable, Generator, Iterable, Iterator, Optional, Tuple
|
||||
|
||||
from pip._vendor.rich.progress import (
|
||||
BarColumn,
|
||||
DownloadColumn,
|
||||
FileSizeColumn,
|
||||
Progress,
|
||||
ProgressColumn,
|
||||
SpinnerColumn,
|
||||
TextColumn,
|
||||
TimeElapsedColumn,
|
||||
TimeRemainingColumn,
|
||||
TransferSpeedColumn,
|
||||
)
|
||||
|
||||
from pip._internal.utils.logging import get_indentation
|
||||
|
||||
DownloadProgressRenderer = Callable[[Iterable[bytes]], Iterator[bytes]]
|
||||
|
||||
|
||||
def _rich_progress_bar(
|
||||
iterable: Iterable[bytes],
|
||||
*,
|
||||
bar_type: str,
|
||||
size: int,
|
||||
) -> Generator[bytes, None, None]:
|
||||
assert bar_type == "on", "This should only be used in the default mode."
|
||||
|
||||
if not size:
|
||||
total = float("inf")
|
||||
columns: Tuple[ProgressColumn, ...] = (
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
SpinnerColumn("line", speed=1.5),
|
||||
FileSizeColumn(),
|
||||
TransferSpeedColumn(),
|
||||
TimeElapsedColumn(),
|
||||
)
|
||||
else:
|
||||
total = size
|
||||
columns = (
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
DownloadColumn(),
|
||||
TransferSpeedColumn(),
|
||||
TextColumn("eta"),
|
||||
TimeRemainingColumn(),
|
||||
)
|
||||
|
||||
progress = Progress(*columns, refresh_per_second=30)
|
||||
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
|
||||
with progress:
|
||||
for chunk in iterable:
|
||||
yield chunk
|
||||
progress.update(task_id, advance=len(chunk))
|
||||
|
||||
|
||||
def get_download_progress_renderer(
|
||||
*, bar_type: str, size: Optional[int] = None
|
||||
) -> DownloadProgressRenderer:
|
||||
"""Get an object that can be used to render the download progress.
|
||||
|
||||
Returns a callable, that takes an iterable to "wrap".
|
||||
"""
|
||||
if bar_type == "on":
|
||||
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size)
|
||||
else:
|
||||
return iter # no-op, when passed an iterator
|
||||
|
|
@ -0,0 +1,505 @@
|
|||
"""Contains the Command base classes that depend on PipSession.
|
||||
|
||||
The classes in this module are in a separate module so the commands not
|
||||
needing download / PackageFinder capability don't unnecessarily import the
|
||||
PackageFinder machinery and all its vendored dependencies, etc.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from functools import partial
|
||||
from optparse import Values
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
|
||||
|
||||
from pip._internal.cache import WheelCache
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.command_context import CommandContextMixIn
|
||||
from pip._internal.exceptions import CommandError, PreviousBuildDirError
|
||||
from pip._internal.index.collector import LinkCollector
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.models.selection_prefs import SelectionPreferences
|
||||
from pip._internal.models.target_python import TargetPython
|
||||
from pip._internal.network.session import PipSession
|
||||
from pip._internal.operations.build.build_tracker import BuildTracker
|
||||
from pip._internal.operations.prepare import RequirementPreparer
|
||||
from pip._internal.req.constructors import (
|
||||
install_req_from_editable,
|
||||
install_req_from_line,
|
||||
install_req_from_parsed_requirement,
|
||||
install_req_from_req_string,
|
||||
)
|
||||
from pip._internal.req.req_file import parse_requirements
|
||||
from pip._internal.req.req_install import InstallRequirement
|
||||
from pip._internal.resolution.base import BaseResolver
|
||||
from pip._internal.self_outdated_check import pip_self_version_check
|
||||
from pip._internal.utils.temp_dir import (
|
||||
TempDirectory,
|
||||
TempDirectoryTypeRegistry,
|
||||
tempdir_kinds,
|
||||
)
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ssl import SSLContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _create_truststore_ssl_context() -> Optional["SSLContext"]:
|
||||
if sys.version_info < (3, 10):
|
||||
raise CommandError("The truststore feature is only available for Python 3.10+")
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
logger.warning("Disabling truststore since ssl support is missing")
|
||||
return None
|
||||
|
||||
try:
|
||||
from pip._vendor import truststore
|
||||
except ImportError as e:
|
||||
raise CommandError(f"The truststore feature is unavailable: {e}")
|
||||
|
||||
return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
|
||||
|
||||
class SessionCommandMixin(CommandContextMixIn):
|
||||
|
||||
"""
|
||||
A class mixin for command classes needing _build_session().
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._session: Optional[PipSession] = None
|
||||
|
||||
@classmethod
|
||||
def _get_index_urls(cls, options: Values) -> Optional[List[str]]:
|
||||
"""Return a list of index urls from user-provided options."""
|
||||
index_urls = []
|
||||
if not getattr(options, "no_index", False):
|
||||
url = getattr(options, "index_url", None)
|
||||
if url:
|
||||
index_urls.append(url)
|
||||
urls = getattr(options, "extra_index_urls", None)
|
||||
if urls:
|
||||
index_urls.extend(urls)
|
||||
# Return None rather than an empty list
|
||||
return index_urls or None
|
||||
|
||||
def get_default_session(self, options: Values) -> PipSession:
|
||||
"""Get a default-managed session."""
|
||||
if self._session is None:
|
||||
self._session = self.enter_context(self._build_session(options))
|
||||
# there's no type annotation on requests.Session, so it's
|
||||
# automatically ContextManager[Any] and self._session becomes Any,
|
||||
# then https://github.com/python/mypy/issues/7696 kicks in
|
||||
assert self._session is not None
|
||||
return self._session
|
||||
|
||||
def _build_session(
|
||||
self,
|
||||
options: Values,
|
||||
retries: Optional[int] = None,
|
||||
timeout: Optional[int] = None,
|
||||
fallback_to_certifi: bool = False,
|
||||
) -> PipSession:
|
||||
cache_dir = options.cache_dir
|
||||
assert not cache_dir or os.path.isabs(cache_dir)
|
||||
|
||||
if "truststore" in options.features_enabled:
|
||||
try:
|
||||
ssl_context = _create_truststore_ssl_context()
|
||||
except Exception:
|
||||
if not fallback_to_certifi:
|
||||
raise
|
||||
ssl_context = None
|
||||
else:
|
||||
ssl_context = None
|
||||
|
||||
session = PipSession(
|
||||
cache=os.path.join(cache_dir, "http-v2") if cache_dir else None,
|
||||
retries=retries if retries is not None else options.retries,
|
||||
trusted_hosts=options.trusted_hosts,
|
||||
index_urls=self._get_index_urls(options),
|
||||
ssl_context=ssl_context,
|
||||
)
|
||||
|
||||
# Handle custom ca-bundles from the user
|
||||
if options.cert:
|
||||
session.verify = options.cert
|
||||
|
||||
# Handle SSL client certificate
|
||||
if options.client_cert:
|
||||
session.cert = options.client_cert
|
||||
|
||||
# Handle timeouts
|
||||
if options.timeout or timeout:
|
||||
session.timeout = timeout if timeout is not None else options.timeout
|
||||
|
||||
# Handle configured proxies
|
||||
if options.proxy:
|
||||
session.proxies = {
|
||||
"http": options.proxy,
|
||||
"https": options.proxy,
|
||||
}
|
||||
|
||||
# Determine if we can prompt the user for authentication or not
|
||||
session.auth.prompting = not options.no_input
|
||||
session.auth.keyring_provider = options.keyring_provider
|
||||
|
||||
return session
|
||||
|
||||
|
||||
class IndexGroupCommand(Command, SessionCommandMixin):
|
||||
|
||||
"""
|
||||
Abstract base class for commands with the index_group options.
|
||||
|
||||
This also corresponds to the commands that permit the pip version check.
|
||||
"""
|
||||
|
||||
def handle_pip_version_check(self, options: Values) -> None:
|
||||
"""
|
||||
Do the pip version check if not disabled.
|
||||
|
||||
This overrides the default behavior of not doing the check.
|
||||
"""
|
||||
# Make sure the index_group options are present.
|
||||
assert hasattr(options, "no_index")
|
||||
|
||||
if options.disable_pip_version_check or options.no_index:
|
||||
return
|
||||
|
||||
# Otherwise, check if we're using the latest version of pip available.
|
||||
session = self._build_session(
|
||||
options,
|
||||
retries=0,
|
||||
timeout=min(5, options.timeout),
|
||||
# This is set to ensure the function does not fail when truststore is
|
||||
# specified in use-feature but cannot be loaded. This usually raises a
|
||||
# CommandError and shows a nice user-facing error, but this function is not
|
||||
# called in that try-except block.
|
||||
fallback_to_certifi=True,
|
||||
)
|
||||
with session:
|
||||
pip_self_version_check(session, options)
|
||||
|
||||
|
||||
KEEPABLE_TEMPDIR_TYPES = [
|
||||
tempdir_kinds.BUILD_ENV,
|
||||
tempdir_kinds.EPHEM_WHEEL_CACHE,
|
||||
tempdir_kinds.REQ_BUILD,
|
||||
]
|
||||
|
||||
|
||||
def warn_if_run_as_root() -> None:
|
||||
"""Output a warning for sudo users on Unix.
|
||||
|
||||
In a virtual environment, sudo pip still writes to virtualenv.
|
||||
On Windows, users may run pip as Administrator without issues.
|
||||
This warning only applies to Unix root users outside of virtualenv.
|
||||
"""
|
||||
if running_under_virtualenv():
|
||||
return
|
||||
if not hasattr(os, "getuid"):
|
||||
return
|
||||
# On Windows, there are no "system managed" Python packages. Installing as
|
||||
# Administrator via pip is the correct way of updating system environments.
|
||||
#
|
||||
# We choose sys.platform over utils.compat.WINDOWS here to enable Mypy platform
|
||||
# checks: https://mypy.readthedocs.io/en/stable/common_issues.html
|
||||
if sys.platform == "win32" or sys.platform == "cygwin":
|
||||
return
|
||||
|
||||
if os.getuid() != 0:
|
||||
return
|
||||
|
||||
logger.warning(
|
||||
"Running pip as the 'root' user can result in broken permissions and "
|
||||
"conflicting behaviour with the system package manager. "
|
||||
"It is recommended to use a virtual environment instead: "
|
||||
"https://pip.pypa.io/warnings/venv"
|
||||
)
|
||||
|
||||
|
||||
def with_cleanup(func: Any) -> Any:
|
||||
"""Decorator for common logic related to managing temporary
|
||||
directories.
|
||||
"""
|
||||
|
||||
def configure_tempdir_registry(registry: TempDirectoryTypeRegistry) -> None:
|
||||
for t in KEEPABLE_TEMPDIR_TYPES:
|
||||
registry.set_delete(t, False)
|
||||
|
||||
def wrapper(
|
||||
self: RequirementCommand, options: Values, args: List[Any]
|
||||
) -> Optional[int]:
|
||||
assert self.tempdir_registry is not None
|
||||
if options.no_clean:
|
||||
configure_tempdir_registry(self.tempdir_registry)
|
||||
|
||||
try:
|
||||
return func(self, options, args)
|
||||
except PreviousBuildDirError:
|
||||
# This kind of conflict can occur when the user passes an explicit
|
||||
# build directory with a pre-existing folder. In that case we do
|
||||
# not want to accidentally remove it.
|
||||
configure_tempdir_registry(self.tempdir_registry)
|
||||
raise
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class RequirementCommand(IndexGroupCommand):
|
||||
def __init__(self, *args: Any, **kw: Any) -> None:
|
||||
super().__init__(*args, **kw)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.no_clean())
|
||||
|
||||
@staticmethod
|
||||
def determine_resolver_variant(options: Values) -> str:
|
||||
"""Determines which resolver should be used, based on the given options."""
|
||||
if "legacy-resolver" in options.deprecated_features_enabled:
|
||||
return "legacy"
|
||||
|
||||
return "resolvelib"
|
||||
|
||||
@classmethod
|
||||
def make_requirement_preparer(
|
||||
cls,
|
||||
temp_build_dir: TempDirectory,
|
||||
options: Values,
|
||||
build_tracker: BuildTracker,
|
||||
session: PipSession,
|
||||
finder: PackageFinder,
|
||||
use_user_site: bool,
|
||||
download_dir: Optional[str] = None,
|
||||
verbosity: int = 0,
|
||||
) -> RequirementPreparer:
|
||||
"""
|
||||
Create a RequirementPreparer instance for the given parameters.
|
||||
"""
|
||||
temp_build_dir_path = temp_build_dir.path
|
||||
assert temp_build_dir_path is not None
|
||||
legacy_resolver = False
|
||||
|
||||
resolver_variant = cls.determine_resolver_variant(options)
|
||||
if resolver_variant == "resolvelib":
|
||||
lazy_wheel = "fast-deps" in options.features_enabled
|
||||
if lazy_wheel:
|
||||
logger.warning(
|
||||
"pip is using lazily downloaded wheels using HTTP "
|
||||
"range requests to obtain dependency information. "
|
||||
"This experimental feature is enabled through "
|
||||
"--use-feature=fast-deps and it is not ready for "
|
||||
"production."
|
||||
)
|
||||
else:
|
||||
legacy_resolver = True
|
||||
lazy_wheel = False
|
||||
if "fast-deps" in options.features_enabled:
|
||||
logger.warning(
|
||||
"fast-deps has no effect when used with the legacy resolver."
|
||||
)
|
||||
|
||||
return RequirementPreparer(
|
||||
build_dir=temp_build_dir_path,
|
||||
src_dir=options.src_dir,
|
||||
download_dir=download_dir,
|
||||
build_isolation=options.build_isolation,
|
||||
check_build_deps=options.check_build_deps,
|
||||
build_tracker=build_tracker,
|
||||
session=session,
|
||||
progress_bar=options.progress_bar,
|
||||
finder=finder,
|
||||
require_hashes=options.require_hashes,
|
||||
use_user_site=use_user_site,
|
||||
lazy_wheel=lazy_wheel,
|
||||
verbosity=verbosity,
|
||||
legacy_resolver=legacy_resolver,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def make_resolver(
|
||||
cls,
|
||||
preparer: RequirementPreparer,
|
||||
finder: PackageFinder,
|
||||
options: Values,
|
||||
wheel_cache: Optional[WheelCache] = None,
|
||||
use_user_site: bool = False,
|
||||
ignore_installed: bool = True,
|
||||
ignore_requires_python: bool = False,
|
||||
force_reinstall: bool = False,
|
||||
upgrade_strategy: str = "to-satisfy-only",
|
||||
use_pep517: Optional[bool] = None,
|
||||
py_version_info: Optional[Tuple[int, ...]] = None,
|
||||
) -> BaseResolver:
|
||||
"""
|
||||
Create a Resolver instance for the given parameters.
|
||||
"""
|
||||
make_install_req = partial(
|
||||
install_req_from_req_string,
|
||||
isolated=options.isolated_mode,
|
||||
use_pep517=use_pep517,
|
||||
)
|
||||
resolver_variant = cls.determine_resolver_variant(options)
|
||||
# The long import name and duplicated invocation is needed to convince
|
||||
# Mypy into correctly typechecking. Otherwise it would complain the
|
||||
# "Resolver" class being redefined.
|
||||
if resolver_variant == "resolvelib":
|
||||
import pip._internal.resolution.resolvelib.resolver
|
||||
|
||||
return pip._internal.resolution.resolvelib.resolver.Resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
wheel_cache=wheel_cache,
|
||||
make_install_req=make_install_req,
|
||||
use_user_site=use_user_site,
|
||||
ignore_dependencies=options.ignore_dependencies,
|
||||
ignore_installed=ignore_installed,
|
||||
ignore_requires_python=ignore_requires_python,
|
||||
force_reinstall=force_reinstall,
|
||||
upgrade_strategy=upgrade_strategy,
|
||||
py_version_info=py_version_info,
|
||||
)
|
||||
import pip._internal.resolution.legacy.resolver
|
||||
|
||||
return pip._internal.resolution.legacy.resolver.Resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
wheel_cache=wheel_cache,
|
||||
make_install_req=make_install_req,
|
||||
use_user_site=use_user_site,
|
||||
ignore_dependencies=options.ignore_dependencies,
|
||||
ignore_installed=ignore_installed,
|
||||
ignore_requires_python=ignore_requires_python,
|
||||
force_reinstall=force_reinstall,
|
||||
upgrade_strategy=upgrade_strategy,
|
||||
py_version_info=py_version_info,
|
||||
)
|
||||
|
||||
def get_requirements(
|
||||
self,
|
||||
args: List[str],
|
||||
options: Values,
|
||||
finder: PackageFinder,
|
||||
session: PipSession,
|
||||
) -> List[InstallRequirement]:
|
||||
"""
|
||||
Parse command-line arguments into the corresponding requirements.
|
||||
"""
|
||||
requirements: List[InstallRequirement] = []
|
||||
for filename in options.constraints:
|
||||
for parsed_req in parse_requirements(
|
||||
filename,
|
||||
constraint=True,
|
||||
finder=finder,
|
||||
options=options,
|
||||
session=session,
|
||||
):
|
||||
req_to_add = install_req_from_parsed_requirement(
|
||||
parsed_req,
|
||||
isolated=options.isolated_mode,
|
||||
user_supplied=False,
|
||||
)
|
||||
requirements.append(req_to_add)
|
||||
|
||||
for req in args:
|
||||
req_to_add = install_req_from_line(
|
||||
req,
|
||||
comes_from=None,
|
||||
isolated=options.isolated_mode,
|
||||
use_pep517=options.use_pep517,
|
||||
user_supplied=True,
|
||||
config_settings=getattr(options, "config_settings", None),
|
||||
)
|
||||
requirements.append(req_to_add)
|
||||
|
||||
for req in options.editables:
|
||||
req_to_add = install_req_from_editable(
|
||||
req,
|
||||
user_supplied=True,
|
||||
isolated=options.isolated_mode,
|
||||
use_pep517=options.use_pep517,
|
||||
config_settings=getattr(options, "config_settings", None),
|
||||
)
|
||||
requirements.append(req_to_add)
|
||||
|
||||
# NOTE: options.require_hashes may be set if --require-hashes is True
|
||||
for filename in options.requirements:
|
||||
for parsed_req in parse_requirements(
|
||||
filename, finder=finder, options=options, session=session
|
||||
):
|
||||
req_to_add = install_req_from_parsed_requirement(
|
||||
parsed_req,
|
||||
isolated=options.isolated_mode,
|
||||
use_pep517=options.use_pep517,
|
||||
user_supplied=True,
|
||||
config_settings=parsed_req.options.get("config_settings")
|
||||
if parsed_req.options
|
||||
else None,
|
||||
)
|
||||
requirements.append(req_to_add)
|
||||
|
||||
# If any requirement has hash options, enable hash checking.
|
||||
if any(req.has_hash_options for req in requirements):
|
||||
options.require_hashes = True
|
||||
|
||||
if not (args or options.editables or options.requirements):
|
||||
opts = {"name": self.name}
|
||||
if options.find_links:
|
||||
raise CommandError(
|
||||
"You must give at least one requirement to {name} "
|
||||
'(maybe you meant "pip {name} {links}"?)'.format(
|
||||
**dict(opts, links=" ".join(options.find_links))
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise CommandError(
|
||||
"You must give at least one requirement to {name} "
|
||||
'(see "pip help {name}")'.format(**opts)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@staticmethod
|
||||
def trace_basic_info(finder: PackageFinder) -> None:
|
||||
"""
|
||||
Trace basic information about the provided objects.
|
||||
"""
|
||||
# Display where finder is looking for packages
|
||||
search_scope = finder.search_scope
|
||||
locations = search_scope.get_formatted_locations()
|
||||
if locations:
|
||||
logger.info(locations)
|
||||
|
||||
def _build_package_finder(
|
||||
self,
|
||||
options: Values,
|
||||
session: PipSession,
|
||||
target_python: Optional[TargetPython] = None,
|
||||
ignore_requires_python: Optional[bool] = None,
|
||||
) -> PackageFinder:
|
||||
"""
|
||||
Create a package finder appropriate to this requirement command.
|
||||
|
||||
:param ignore_requires_python: Whether to ignore incompatible
|
||||
"Requires-Python" values in links. Defaults to False.
|
||||
"""
|
||||
link_collector = LinkCollector.create(session, options=options)
|
||||
selection_prefs = SelectionPreferences(
|
||||
allow_yanked=True,
|
||||
format_control=options.format_control,
|
||||
allow_all_prereleases=options.pre,
|
||||
prefer_binary=options.prefer_binary,
|
||||
ignore_requires_python=ignore_requires_python,
|
||||
)
|
||||
|
||||
return PackageFinder.create(
|
||||
link_collector=link_collector,
|
||||
selection_prefs=selection_prefs,
|
||||
target_python=target_python,
|
||||
)
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
import contextlib
|
||||
import itertools
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
from typing import IO, Generator, Optional
|
||||
|
||||
from pip._internal.utils.compat import WINDOWS
|
||||
from pip._internal.utils.logging import get_indentation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpinnerInterface:
|
||||
def spin(self) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def finish(self, final_status: str) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class InteractiveSpinner(SpinnerInterface):
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
file: Optional[IO[str]] = None,
|
||||
spin_chars: str = "-\\|/",
|
||||
# Empirically, 8 updates/second looks nice
|
||||
min_update_interval_seconds: float = 0.125,
|
||||
):
|
||||
self._message = message
|
||||
if file is None:
|
||||
file = sys.stdout
|
||||
self._file = file
|
||||
self._rate_limiter = RateLimiter(min_update_interval_seconds)
|
||||
self._finished = False
|
||||
|
||||
self._spin_cycle = itertools.cycle(spin_chars)
|
||||
|
||||
self._file.write(" " * get_indentation() + self._message + " ... ")
|
||||
self._width = 0
|
||||
|
||||
def _write(self, status: str) -> None:
|
||||
assert not self._finished
|
||||
# Erase what we wrote before by backspacing to the beginning, writing
|
||||
# spaces to overwrite the old text, and then backspacing again
|
||||
backup = "\b" * self._width
|
||||
self._file.write(backup + " " * self._width + backup)
|
||||
# Now we have a blank slate to add our status
|
||||
self._file.write(status)
|
||||
self._width = len(status)
|
||||
self._file.flush()
|
||||
self._rate_limiter.reset()
|
||||
|
||||
def spin(self) -> None:
|
||||
if self._finished:
|
||||
return
|
||||
if not self._rate_limiter.ready():
|
||||
return
|
||||
self._write(next(self._spin_cycle))
|
||||
|
||||
def finish(self, final_status: str) -> None:
|
||||
if self._finished:
|
||||
return
|
||||
self._write(final_status)
|
||||
self._file.write("\n")
|
||||
self._file.flush()
|
||||
self._finished = True
|
||||
|
||||
|
||||
# Used for dumb terminals, non-interactive installs (no tty), etc.
|
||||
# We still print updates occasionally (once every 60 seconds by default) to
|
||||
# act as a keep-alive for systems like Travis-CI that take lack-of-output as
|
||||
# an indication that a task has frozen.
|
||||
class NonInteractiveSpinner(SpinnerInterface):
|
||||
def __init__(self, message: str, min_update_interval_seconds: float = 60.0) -> None:
|
||||
self._message = message
|
||||
self._finished = False
|
||||
self._rate_limiter = RateLimiter(min_update_interval_seconds)
|
||||
self._update("started")
|
||||
|
||||
def _update(self, status: str) -> None:
|
||||
assert not self._finished
|
||||
self._rate_limiter.reset()
|
||||
logger.info("%s: %s", self._message, status)
|
||||
|
||||
def spin(self) -> None:
|
||||
if self._finished:
|
||||
return
|
||||
if not self._rate_limiter.ready():
|
||||
return
|
||||
self._update("still running...")
|
||||
|
||||
def finish(self, final_status: str) -> None:
|
||||
if self._finished:
|
||||
return
|
||||
self._update(f"finished with status '{final_status}'")
|
||||
self._finished = True
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
def __init__(self, min_update_interval_seconds: float) -> None:
|
||||
self._min_update_interval_seconds = min_update_interval_seconds
|
||||
self._last_update: float = 0
|
||||
|
||||
def ready(self) -> bool:
|
||||
now = time.time()
|
||||
delta = now - self._last_update
|
||||
return delta >= self._min_update_interval_seconds
|
||||
|
||||
def reset(self) -> None:
|
||||
self._last_update = time.time()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def open_spinner(message: str) -> Generator[SpinnerInterface, None, None]:
|
||||
# Interactive spinner goes directly to sys.stdout rather than being routed
|
||||
# through the logging system, but it acts like it has level INFO,
|
||||
# i.e. it's only displayed if we're at level INFO or better.
|
||||
# Non-interactive spinner goes through the logging system, so it is always
|
||||
# in sync with logging configuration.
|
||||
if sys.stdout.isatty() and logger.getEffectiveLevel() <= logging.INFO:
|
||||
spinner: SpinnerInterface = InteractiveSpinner(message)
|
||||
else:
|
||||
spinner = NonInteractiveSpinner(message)
|
||||
try:
|
||||
with hidden_cursor(sys.stdout):
|
||||
yield spinner
|
||||
except KeyboardInterrupt:
|
||||
spinner.finish("canceled")
|
||||
raise
|
||||
except Exception:
|
||||
spinner.finish("error")
|
||||
raise
|
||||
else:
|
||||
spinner.finish("done")
|
||||
|
||||
|
||||
HIDE_CURSOR = "\x1b[?25l"
|
||||
SHOW_CURSOR = "\x1b[?25h"
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def hidden_cursor(file: IO[str]) -> Generator[None, None, None]:
|
||||
# The Windows terminal does not support the hide/show cursor ANSI codes,
|
||||
# even via colorama. So don't even try.
|
||||
if WINDOWS:
|
||||
yield
|
||||
# We don't want to clutter the output with control characters if we're
|
||||
# writing to a file, or if the user is running with --quiet.
|
||||
# See https://github.com/pypa/pip/issues/3418
|
||||
elif not file.isatty() or logger.getEffectiveLevel() > logging.INFO:
|
||||
yield
|
||||
else:
|
||||
file.write(HIDE_CURSOR)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
file.write(SHOW_CURSOR)
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
SUCCESS = 0
|
||||
ERROR = 1
|
||||
UNKNOWN_ERROR = 2
|
||||
VIRTUALENV_NOT_FOUND = 3
|
||||
PREVIOUS_BUILD_DIR_ERROR = 4
|
||||
NO_MATCHES_FOUND = 23
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
"""
|
||||
Package containing all pip commands
|
||||
"""
|
||||
|
||||
import importlib
|
||||
from collections import namedtuple
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
|
||||
CommandInfo = namedtuple("CommandInfo", "module_path, class_name, summary")
|
||||
|
||||
# This dictionary does a bunch of heavy lifting for help output:
|
||||
# - Enables avoiding additional (costly) imports for presenting `--help`.
|
||||
# - The ordering matters for help display.
|
||||
#
|
||||
# Even though the module path starts with the same "pip._internal.commands"
|
||||
# prefix, the full path makes testing easier (specifically when modifying
|
||||
# `commands_dict` in test setup / teardown).
|
||||
commands_dict: Dict[str, CommandInfo] = {
|
||||
"install": CommandInfo(
|
||||
"pip._internal.commands.install",
|
||||
"InstallCommand",
|
||||
"Install packages.",
|
||||
),
|
||||
"download": CommandInfo(
|
||||
"pip._internal.commands.download",
|
||||
"DownloadCommand",
|
||||
"Download packages.",
|
||||
),
|
||||
"uninstall": CommandInfo(
|
||||
"pip._internal.commands.uninstall",
|
||||
"UninstallCommand",
|
||||
"Uninstall packages.",
|
||||
),
|
||||
"freeze": CommandInfo(
|
||||
"pip._internal.commands.freeze",
|
||||
"FreezeCommand",
|
||||
"Output installed packages in requirements format.",
|
||||
),
|
||||
"inspect": CommandInfo(
|
||||
"pip._internal.commands.inspect",
|
||||
"InspectCommand",
|
||||
"Inspect the python environment.",
|
||||
),
|
||||
"list": CommandInfo(
|
||||
"pip._internal.commands.list",
|
||||
"ListCommand",
|
||||
"List installed packages.",
|
||||
),
|
||||
"show": CommandInfo(
|
||||
"pip._internal.commands.show",
|
||||
"ShowCommand",
|
||||
"Show information about installed packages.",
|
||||
),
|
||||
"check": CommandInfo(
|
||||
"pip._internal.commands.check",
|
||||
"CheckCommand",
|
||||
"Verify installed packages have compatible dependencies.",
|
||||
),
|
||||
"config": CommandInfo(
|
||||
"pip._internal.commands.configuration",
|
||||
"ConfigurationCommand",
|
||||
"Manage local and global configuration.",
|
||||
),
|
||||
"search": CommandInfo(
|
||||
"pip._internal.commands.search",
|
||||
"SearchCommand",
|
||||
"Search PyPI for packages.",
|
||||
),
|
||||
"cache": CommandInfo(
|
||||
"pip._internal.commands.cache",
|
||||
"CacheCommand",
|
||||
"Inspect and manage pip's wheel cache.",
|
||||
),
|
||||
"index": CommandInfo(
|
||||
"pip._internal.commands.index",
|
||||
"IndexCommand",
|
||||
"Inspect information available from package indexes.",
|
||||
),
|
||||
"wheel": CommandInfo(
|
||||
"pip._internal.commands.wheel",
|
||||
"WheelCommand",
|
||||
"Build wheels from your requirements.",
|
||||
),
|
||||
"hash": CommandInfo(
|
||||
"pip._internal.commands.hash",
|
||||
"HashCommand",
|
||||
"Compute hashes of package archives.",
|
||||
),
|
||||
"completion": CommandInfo(
|
||||
"pip._internal.commands.completion",
|
||||
"CompletionCommand",
|
||||
"A helper command used for command completion.",
|
||||
),
|
||||
"debug": CommandInfo(
|
||||
"pip._internal.commands.debug",
|
||||
"DebugCommand",
|
||||
"Show information useful for debugging.",
|
||||
),
|
||||
"help": CommandInfo(
|
||||
"pip._internal.commands.help",
|
||||
"HelpCommand",
|
||||
"Show help for commands.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def create_command(name: str, **kwargs: Any) -> Command:
|
||||
"""
|
||||
Create an instance of the Command class with the given name.
|
||||
"""
|
||||
module_path, class_name, summary = commands_dict[name]
|
||||
module = importlib.import_module(module_path)
|
||||
command_class = getattr(module, class_name)
|
||||
command = command_class(name=name, summary=summary, **kwargs)
|
||||
|
||||
return command
|
||||
|
||||
|
||||
def get_similar_commands(name: str) -> Optional[str]:
|
||||
"""Command name auto-correct."""
|
||||
from difflib import get_close_matches
|
||||
|
||||
name = name.lower()
|
||||
|
||||
close_commands = get_close_matches(name, commands_dict.keys())
|
||||
|
||||
if close_commands:
|
||||
return close_commands[0]
|
||||
else:
|
||||
return None
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
import os
|
||||
import textwrap
|
||||
from optparse import Values
|
||||
from typing import Any, List
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.exceptions import CommandError, PipError
|
||||
from pip._internal.utils import filesystem
|
||||
from pip._internal.utils.logging import getLogger
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class CacheCommand(Command):
|
||||
"""
|
||||
Inspect and manage pip's wheel cache.
|
||||
|
||||
Subcommands:
|
||||
|
||||
- dir: Show the cache directory.
|
||||
- info: Show information about the cache.
|
||||
- list: List filenames of packages stored in the cache.
|
||||
- remove: Remove one or more package from the cache.
|
||||
- purge: Remove all items from the cache.
|
||||
|
||||
``<pattern>`` can be a glob expression or a package name.
|
||||
"""
|
||||
|
||||
ignore_require_venv = True
|
||||
usage = """
|
||||
%prog dir
|
||||
%prog info
|
||||
%prog list [<pattern>] [--format=[human, abspath]]
|
||||
%prog remove <pattern>
|
||||
%prog purge
|
||||
"""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"--format",
|
||||
action="store",
|
||||
dest="list_format",
|
||||
default="human",
|
||||
choices=("human", "abspath"),
|
||||
help="Select the output format among: human (default) or abspath",
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
handlers = {
|
||||
"dir": self.get_cache_dir,
|
||||
"info": self.get_cache_info,
|
||||
"list": self.list_cache_items,
|
||||
"remove": self.remove_cache_items,
|
||||
"purge": self.purge_cache,
|
||||
}
|
||||
|
||||
if not options.cache_dir:
|
||||
logger.error("pip cache commands can not function since cache is disabled.")
|
||||
return ERROR
|
||||
|
||||
# Determine action
|
||||
if not args or args[0] not in handlers:
|
||||
logger.error(
|
||||
"Need an action (%s) to perform.",
|
||||
", ".join(sorted(handlers)),
|
||||
)
|
||||
return ERROR
|
||||
|
||||
action = args[0]
|
||||
|
||||
# Error handling happens here, not in the action-handlers.
|
||||
try:
|
||||
handlers[action](options, args[1:])
|
||||
except PipError as e:
|
||||
logger.error(e.args[0])
|
||||
return ERROR
|
||||
|
||||
return SUCCESS
|
||||
|
||||
def get_cache_dir(self, options: Values, args: List[Any]) -> None:
|
||||
if args:
|
||||
raise CommandError("Too many arguments")
|
||||
|
||||
logger.info(options.cache_dir)
|
||||
|
||||
def get_cache_info(self, options: Values, args: List[Any]) -> None:
|
||||
if args:
|
||||
raise CommandError("Too many arguments")
|
||||
|
||||
num_http_files = len(self._find_http_files(options))
|
||||
num_packages = len(self._find_wheels(options, "*"))
|
||||
|
||||
http_cache_location = self._cache_dir(options, "http-v2")
|
||||
old_http_cache_location = self._cache_dir(options, "http")
|
||||
wheels_cache_location = self._cache_dir(options, "wheels")
|
||||
http_cache_size = filesystem.format_size(
|
||||
filesystem.directory_size(http_cache_location)
|
||||
+ filesystem.directory_size(old_http_cache_location)
|
||||
)
|
||||
wheels_cache_size = filesystem.format_directory_size(wheels_cache_location)
|
||||
|
||||
message = (
|
||||
textwrap.dedent(
|
||||
"""
|
||||
Package index page cache location (pip v23.3+): {http_cache_location}
|
||||
Package index page cache location (older pips): {old_http_cache_location}
|
||||
Package index page cache size: {http_cache_size}
|
||||
Number of HTTP files: {num_http_files}
|
||||
Locally built wheels location: {wheels_cache_location}
|
||||
Locally built wheels size: {wheels_cache_size}
|
||||
Number of locally built wheels: {package_count}
|
||||
""" # noqa: E501
|
||||
)
|
||||
.format(
|
||||
http_cache_location=http_cache_location,
|
||||
old_http_cache_location=old_http_cache_location,
|
||||
http_cache_size=http_cache_size,
|
||||
num_http_files=num_http_files,
|
||||
wheels_cache_location=wheels_cache_location,
|
||||
package_count=num_packages,
|
||||
wheels_cache_size=wheels_cache_size,
|
||||
)
|
||||
.strip()
|
||||
)
|
||||
|
||||
logger.info(message)
|
||||
|
||||
def list_cache_items(self, options: Values, args: List[Any]) -> None:
|
||||
if len(args) > 1:
|
||||
raise CommandError("Too many arguments")
|
||||
|
||||
if args:
|
||||
pattern = args[0]
|
||||
else:
|
||||
pattern = "*"
|
||||
|
||||
files = self._find_wheels(options, pattern)
|
||||
if options.list_format == "human":
|
||||
self.format_for_human(files)
|
||||
else:
|
||||
self.format_for_abspath(files)
|
||||
|
||||
def format_for_human(self, files: List[str]) -> None:
|
||||
if not files:
|
||||
logger.info("No locally built wheels cached.")
|
||||
return
|
||||
|
||||
results = []
|
||||
for filename in files:
|
||||
wheel = os.path.basename(filename)
|
||||
size = filesystem.format_file_size(filename)
|
||||
results.append(f" - {wheel} ({size})")
|
||||
logger.info("Cache contents:\n")
|
||||
logger.info("\n".join(sorted(results)))
|
||||
|
||||
def format_for_abspath(self, files: List[str]) -> None:
|
||||
if files:
|
||||
logger.info("\n".join(sorted(files)))
|
||||
|
||||
def remove_cache_items(self, options: Values, args: List[Any]) -> None:
|
||||
if len(args) > 1:
|
||||
raise CommandError("Too many arguments")
|
||||
|
||||
if not args:
|
||||
raise CommandError("Please provide a pattern")
|
||||
|
||||
files = self._find_wheels(options, args[0])
|
||||
|
||||
no_matching_msg = "No matching packages"
|
||||
if args[0] == "*":
|
||||
# Only fetch http files if no specific pattern given
|
||||
files += self._find_http_files(options)
|
||||
else:
|
||||
# Add the pattern to the log message
|
||||
no_matching_msg += f' for pattern "{args[0]}"'
|
||||
|
||||
if not files:
|
||||
logger.warning(no_matching_msg)
|
||||
|
||||
for filename in files:
|
||||
os.unlink(filename)
|
||||
logger.verbose("Removed %s", filename)
|
||||
logger.info("Files removed: %s", len(files))
|
||||
|
||||
def purge_cache(self, options: Values, args: List[Any]) -> None:
|
||||
if args:
|
||||
raise CommandError("Too many arguments")
|
||||
|
||||
return self.remove_cache_items(options, ["*"])
|
||||
|
||||
def _cache_dir(self, options: Values, subdir: str) -> str:
|
||||
return os.path.join(options.cache_dir, subdir)
|
||||
|
||||
def _find_http_files(self, options: Values) -> List[str]:
|
||||
old_http_dir = self._cache_dir(options, "http")
|
||||
new_http_dir = self._cache_dir(options, "http-v2")
|
||||
return filesystem.find_files(old_http_dir, "*") + filesystem.find_files(
|
||||
new_http_dir, "*"
|
||||
)
|
||||
|
||||
def _find_wheels(self, options: Values, pattern: str) -> List[str]:
|
||||
wheel_dir = self._cache_dir(options, "wheels")
|
||||
|
||||
# The wheel filename format, as specified in PEP 427, is:
|
||||
# {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
|
||||
#
|
||||
# Additionally, non-alphanumeric values in the distribution are
|
||||
# normalized to underscores (_), meaning hyphens can never occur
|
||||
# before `-{version}`.
|
||||
#
|
||||
# Given that information:
|
||||
# - If the pattern we're given contains a hyphen (-), the user is
|
||||
# providing at least the version. Thus, we can just append `*.whl`
|
||||
# to match the rest of it.
|
||||
# - If the pattern we're given doesn't contain a hyphen (-), the
|
||||
# user is only providing the name. Thus, we append `-*.whl` to
|
||||
# match the hyphen before the version, followed by anything else.
|
||||
#
|
||||
# PEP 427: https://www.python.org/dev/peps/pep-0427/
|
||||
pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl")
|
||||
|
||||
return filesystem.find_files(wheel_dir, pattern)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import logging
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.operations.check import (
|
||||
check_package_set,
|
||||
create_package_set_from_installed,
|
||||
warn_legacy_versions_and_specifiers,
|
||||
)
|
||||
from pip._internal.utils.misc import write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CheckCommand(Command):
|
||||
"""Verify installed packages have compatible dependencies."""
|
||||
|
||||
usage = """
|
||||
%prog [options]"""
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
package_set, parsing_probs = create_package_set_from_installed()
|
||||
warn_legacy_versions_and_specifiers(package_set)
|
||||
missing, conflicting = check_package_set(package_set)
|
||||
|
||||
for project_name in missing:
|
||||
version = package_set[project_name].version
|
||||
for dependency in missing[project_name]:
|
||||
write_output(
|
||||
"%s %s requires %s, which is not installed.",
|
||||
project_name,
|
||||
version,
|
||||
dependency[0],
|
||||
)
|
||||
|
||||
for project_name in conflicting:
|
||||
version = package_set[project_name].version
|
||||
for dep_name, dep_version, req in conflicting[project_name]:
|
||||
write_output(
|
||||
"%s %s has requirement %s, but you have %s %s.",
|
||||
project_name,
|
||||
version,
|
||||
req,
|
||||
dep_name,
|
||||
dep_version,
|
||||
)
|
||||
|
||||
if missing or conflicting or parsing_probs:
|
||||
return ERROR
|
||||
else:
|
||||
write_output("No broken requirements found.")
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
import sys
|
||||
import textwrap
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.utils.misc import get_prog
|
||||
|
||||
BASE_COMPLETION = """
|
||||
# pip {shell} completion start{script}# pip {shell} completion end
|
||||
"""
|
||||
|
||||
COMPLETION_SCRIPTS = {
|
||||
"bash": """
|
||||
_pip_completion()
|
||||
{{
|
||||
COMPREPLY=( $( COMP_WORDS="${{COMP_WORDS[*]}}" \\
|
||||
COMP_CWORD=$COMP_CWORD \\
|
||||
PIP_AUTO_COMPLETE=1 $1 2>/dev/null ) )
|
||||
}}
|
||||
complete -o default -F _pip_completion {prog}
|
||||
""",
|
||||
"zsh": """
|
||||
#compdef -P pip[0-9.]#
|
||||
__pip() {{
|
||||
compadd $( COMP_WORDS="$words[*]" \\
|
||||
COMP_CWORD=$((CURRENT-1)) \\
|
||||
PIP_AUTO_COMPLETE=1 $words[1] 2>/dev/null )
|
||||
}}
|
||||
if [[ $zsh_eval_context[-1] == loadautofunc ]]; then
|
||||
# autoload from fpath, call function directly
|
||||
__pip "$@"
|
||||
else
|
||||
# eval/source/. command, register function for later
|
||||
compdef __pip -P 'pip[0-9.]#'
|
||||
fi
|
||||
""",
|
||||
"fish": """
|
||||
function __fish_complete_pip
|
||||
set -lx COMP_WORDS (commandline -o) ""
|
||||
set -lx COMP_CWORD ( \\
|
||||
math (contains -i -- (commandline -t) $COMP_WORDS)-1 \\
|
||||
)
|
||||
set -lx PIP_AUTO_COMPLETE 1
|
||||
string split \\ -- (eval $COMP_WORDS[1])
|
||||
end
|
||||
complete -fa "(__fish_complete_pip)" -c {prog}
|
||||
""",
|
||||
"powershell": """
|
||||
if ((Test-Path Function:\\TabExpansion) -and -not `
|
||||
(Test-Path Function:\\_pip_completeBackup)) {{
|
||||
Rename-Item Function:\\TabExpansion _pip_completeBackup
|
||||
}}
|
||||
function TabExpansion($line, $lastWord) {{
|
||||
$lastBlock = [regex]::Split($line, '[|;]')[-1].TrimStart()
|
||||
if ($lastBlock.StartsWith("{prog} ")) {{
|
||||
$Env:COMP_WORDS=$lastBlock
|
||||
$Env:COMP_CWORD=$lastBlock.Split().Length - 1
|
||||
$Env:PIP_AUTO_COMPLETE=1
|
||||
(& {prog}).Split()
|
||||
Remove-Item Env:COMP_WORDS
|
||||
Remove-Item Env:COMP_CWORD
|
||||
Remove-Item Env:PIP_AUTO_COMPLETE
|
||||
}}
|
||||
elseif (Test-Path Function:\\_pip_completeBackup) {{
|
||||
# Fall back on existing tab expansion
|
||||
_pip_completeBackup $line $lastWord
|
||||
}}
|
||||
}}
|
||||
""",
|
||||
}
|
||||
|
||||
|
||||
class CompletionCommand(Command):
|
||||
"""A helper command to be used for command completion."""
|
||||
|
||||
ignore_require_venv = True
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"--bash",
|
||||
"-b",
|
||||
action="store_const",
|
||||
const="bash",
|
||||
dest="shell",
|
||||
help="Emit completion code for bash",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--zsh",
|
||||
"-z",
|
||||
action="store_const",
|
||||
const="zsh",
|
||||
dest="shell",
|
||||
help="Emit completion code for zsh",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--fish",
|
||||
"-f",
|
||||
action="store_const",
|
||||
const="fish",
|
||||
dest="shell",
|
||||
help="Emit completion code for fish",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--powershell",
|
||||
"-p",
|
||||
action="store_const",
|
||||
const="powershell",
|
||||
dest="shell",
|
||||
help="Emit completion code for powershell",
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
"""Prints the completion code of the given shell"""
|
||||
shells = COMPLETION_SCRIPTS.keys()
|
||||
shell_options = ["--" + shell for shell in sorted(shells)]
|
||||
if options.shell in shells:
|
||||
script = textwrap.dedent(
|
||||
COMPLETION_SCRIPTS.get(options.shell, "").format(prog=get_prog())
|
||||
)
|
||||
print(BASE_COMPLETION.format(script=script, shell=options.shell))
|
||||
return SUCCESS
|
||||
else:
|
||||
sys.stderr.write(
|
||||
"ERROR: You must pass {}\n".format(" or ".join(shell_options))
|
||||
)
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,280 @@
|
|||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from optparse import Values
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.configuration import (
|
||||
Configuration,
|
||||
Kind,
|
||||
get_configuration_files,
|
||||
kinds,
|
||||
)
|
||||
from pip._internal.exceptions import PipError
|
||||
from pip._internal.utils.logging import indent_log
|
||||
from pip._internal.utils.misc import get_prog, write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConfigurationCommand(Command):
|
||||
"""
|
||||
Manage local and global configuration.
|
||||
|
||||
Subcommands:
|
||||
|
||||
- list: List the active configuration (or from the file specified)
|
||||
- edit: Edit the configuration file in an editor
|
||||
- get: Get the value associated with command.option
|
||||
- set: Set the command.option=value
|
||||
- unset: Unset the value associated with command.option
|
||||
- debug: List the configuration files and values defined under them
|
||||
|
||||
Configuration keys should be dot separated command and option name,
|
||||
with the special prefix "global" affecting any command. For example,
|
||||
"pip config set global.index-url https://example.org/" would configure
|
||||
the index url for all commands, but "pip config set download.timeout 10"
|
||||
would configure a 10 second timeout only for "pip download" commands.
|
||||
|
||||
If none of --user, --global and --site are passed, a virtual
|
||||
environment configuration file is used if one is active and the file
|
||||
exists. Otherwise, all modifications happen to the user file by
|
||||
default.
|
||||
"""
|
||||
|
||||
ignore_require_venv = True
|
||||
usage = """
|
||||
%prog [<file-option>] list
|
||||
%prog [<file-option>] [--editor <editor-path>] edit
|
||||
|
||||
%prog [<file-option>] get command.option
|
||||
%prog [<file-option>] set command.option value
|
||||
%prog [<file-option>] unset command.option
|
||||
%prog [<file-option>] debug
|
||||
"""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"--editor",
|
||||
dest="editor",
|
||||
action="store",
|
||||
default=None,
|
||||
help=(
|
||||
"Editor to use to edit the file. Uses VISUAL or EDITOR "
|
||||
"environment variables if not provided."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--global",
|
||||
dest="global_file",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use the system-wide configuration file only",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--user",
|
||||
dest="user_file",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use the user configuration file only",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--site",
|
||||
dest="site_file",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use the current environment configuration file only",
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
handlers = {
|
||||
"list": self.list_values,
|
||||
"edit": self.open_in_editor,
|
||||
"get": self.get_name,
|
||||
"set": self.set_name_value,
|
||||
"unset": self.unset_name,
|
||||
"debug": self.list_config_values,
|
||||
}
|
||||
|
||||
# Determine action
|
||||
if not args or args[0] not in handlers:
|
||||
logger.error(
|
||||
"Need an action (%s) to perform.",
|
||||
", ".join(sorted(handlers)),
|
||||
)
|
||||
return ERROR
|
||||
|
||||
action = args[0]
|
||||
|
||||
# Determine which configuration files are to be loaded
|
||||
# Depends on whether the command is modifying.
|
||||
try:
|
||||
load_only = self._determine_file(
|
||||
options, need_value=(action in ["get", "set", "unset", "edit"])
|
||||
)
|
||||
except PipError as e:
|
||||
logger.error(e.args[0])
|
||||
return ERROR
|
||||
|
||||
# Load a new configuration
|
||||
self.configuration = Configuration(
|
||||
isolated=options.isolated_mode, load_only=load_only
|
||||
)
|
||||
self.configuration.load()
|
||||
|
||||
# Error handling happens here, not in the action-handlers.
|
||||
try:
|
||||
handlers[action](options, args[1:])
|
||||
except PipError as e:
|
||||
logger.error(e.args[0])
|
||||
return ERROR
|
||||
|
||||
return SUCCESS
|
||||
|
||||
def _determine_file(self, options: Values, need_value: bool) -> Optional[Kind]:
|
||||
file_options = [
|
||||
key
|
||||
for key, value in (
|
||||
(kinds.USER, options.user_file),
|
||||
(kinds.GLOBAL, options.global_file),
|
||||
(kinds.SITE, options.site_file),
|
||||
)
|
||||
if value
|
||||
]
|
||||
|
||||
if not file_options:
|
||||
if not need_value:
|
||||
return None
|
||||
# Default to user, unless there's a site file.
|
||||
elif any(
|
||||
os.path.exists(site_config_file)
|
||||
for site_config_file in get_configuration_files()[kinds.SITE]
|
||||
):
|
||||
return kinds.SITE
|
||||
else:
|
||||
return kinds.USER
|
||||
elif len(file_options) == 1:
|
||||
return file_options[0]
|
||||
|
||||
raise PipError(
|
||||
"Need exactly one file to operate upon "
|
||||
"(--user, --site, --global) to perform."
|
||||
)
|
||||
|
||||
def list_values(self, options: Values, args: List[str]) -> None:
|
||||
self._get_n_args(args, "list", n=0)
|
||||
|
||||
for key, value in sorted(self.configuration.items()):
|
||||
write_output("%s=%r", key, value)
|
||||
|
||||
def get_name(self, options: Values, args: List[str]) -> None:
|
||||
key = self._get_n_args(args, "get [name]", n=1)
|
||||
value = self.configuration.get_value(key)
|
||||
|
||||
write_output("%s", value)
|
||||
|
||||
def set_name_value(self, options: Values, args: List[str]) -> None:
|
||||
key, value = self._get_n_args(args, "set [name] [value]", n=2)
|
||||
self.configuration.set_value(key, value)
|
||||
|
||||
self._save_configuration()
|
||||
|
||||
def unset_name(self, options: Values, args: List[str]) -> None:
|
||||
key = self._get_n_args(args, "unset [name]", n=1)
|
||||
self.configuration.unset_value(key)
|
||||
|
||||
self._save_configuration()
|
||||
|
||||
def list_config_values(self, options: Values, args: List[str]) -> None:
|
||||
"""List config key-value pairs across different config files"""
|
||||
self._get_n_args(args, "debug", n=0)
|
||||
|
||||
self.print_env_var_values()
|
||||
# Iterate over config files and print if they exist, and the
|
||||
# key-value pairs present in them if they do
|
||||
for variant, files in sorted(self.configuration.iter_config_files()):
|
||||
write_output("%s:", variant)
|
||||
for fname in files:
|
||||
with indent_log():
|
||||
file_exists = os.path.exists(fname)
|
||||
write_output("%s, exists: %r", fname, file_exists)
|
||||
if file_exists:
|
||||
self.print_config_file_values(variant)
|
||||
|
||||
def print_config_file_values(self, variant: Kind) -> None:
|
||||
"""Get key-value pairs from the file of a variant"""
|
||||
for name, value in self.configuration.get_values_in_config(variant).items():
|
||||
with indent_log():
|
||||
write_output("%s: %s", name, value)
|
||||
|
||||
def print_env_var_values(self) -> None:
|
||||
"""Get key-values pairs present as environment variables"""
|
||||
write_output("%s:", "env_var")
|
||||
with indent_log():
|
||||
for key, value in sorted(self.configuration.get_environ_vars()):
|
||||
env_var = f"PIP_{key.upper()}"
|
||||
write_output("%s=%r", env_var, value)
|
||||
|
||||
def open_in_editor(self, options: Values, args: List[str]) -> None:
|
||||
editor = self._determine_editor(options)
|
||||
|
||||
fname = self.configuration.get_file_to_edit()
|
||||
if fname is None:
|
||||
raise PipError("Could not determine appropriate file.")
|
||||
elif '"' in fname:
|
||||
# This shouldn't happen, unless we see a username like that.
|
||||
# If that happens, we'd appreciate a pull request fixing this.
|
||||
raise PipError(
|
||||
f'Can not open an editor for a file name containing "\n{fname}'
|
||||
)
|
||||
|
||||
try:
|
||||
subprocess.check_call(f'{editor} "{fname}"', shell=True)
|
||||
except FileNotFoundError as e:
|
||||
if not e.filename:
|
||||
e.filename = editor
|
||||
raise
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise PipError(f"Editor Subprocess exited with exit code {e.returncode}")
|
||||
|
||||
def _get_n_args(self, args: List[str], example: str, n: int) -> Any:
|
||||
"""Helper to make sure the command got the right number of arguments"""
|
||||
if len(args) != n:
|
||||
msg = (
|
||||
f"Got unexpected number of arguments, expected {n}. "
|
||||
f'(example: "{get_prog()} config {example}")'
|
||||
)
|
||||
raise PipError(msg)
|
||||
|
||||
if n == 1:
|
||||
return args[0]
|
||||
else:
|
||||
return args
|
||||
|
||||
def _save_configuration(self) -> None:
|
||||
# We successfully ran a modifying command. Need to save the
|
||||
# configuration.
|
||||
try:
|
||||
self.configuration.save()
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Unable to save configuration. Please report this as a bug."
|
||||
)
|
||||
raise PipError("Internal Error.")
|
||||
|
||||
def _determine_editor(self, options: Values) -> str:
|
||||
if options.editor is not None:
|
||||
return options.editor
|
||||
elif "VISUAL" in os.environ:
|
||||
return os.environ["VISUAL"]
|
||||
elif "EDITOR" in os.environ:
|
||||
return os.environ["EDITOR"]
|
||||
else:
|
||||
raise PipError("Could not determine editor to use.")
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
import importlib.resources
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from optparse import Values
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pip._vendor
|
||||
from pip._vendor.certifi import where
|
||||
from pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.cmdoptions import make_target_python
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.configuration import Configuration
|
||||
from pip._internal.metadata import get_environment
|
||||
from pip._internal.utils.logging import indent_log
|
||||
from pip._internal.utils.misc import get_pip_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def show_value(name: str, value: Any) -> None:
|
||||
logger.info("%s: %s", name, value)
|
||||
|
||||
|
||||
def show_sys_implementation() -> None:
|
||||
logger.info("sys.implementation:")
|
||||
implementation_name = sys.implementation.name
|
||||
with indent_log():
|
||||
show_value("name", implementation_name)
|
||||
|
||||
|
||||
def create_vendor_txt_map() -> Dict[str, str]:
|
||||
with importlib.resources.open_text("pip._vendor", "vendor.txt") as f:
|
||||
# Purge non version specifying lines.
|
||||
# Also, remove any space prefix or suffixes (including comments).
|
||||
lines = [
|
||||
line.strip().split(" ", 1)[0] for line in f.readlines() if "==" in line
|
||||
]
|
||||
|
||||
# Transform into "module" -> version dict.
|
||||
return dict(line.split("==", 1) for line in lines)
|
||||
|
||||
|
||||
def get_module_from_module_name(module_name: str) -> Optional[ModuleType]:
|
||||
# Module name can be uppercase in vendor.txt for some reason...
|
||||
module_name = module_name.lower().replace("-", "_")
|
||||
# PATCH: setuptools is actually only pkg_resources.
|
||||
if module_name == "setuptools":
|
||||
module_name = "pkg_resources"
|
||||
|
||||
try:
|
||||
__import__(f"pip._vendor.{module_name}", globals(), locals(), level=0)
|
||||
return getattr(pip._vendor, module_name)
|
||||
except ImportError:
|
||||
# We allow 'truststore' to fail to import due
|
||||
# to being unavailable on Python 3.9 and earlier.
|
||||
if module_name == "truststore" and sys.version_info < (3, 10):
|
||||
return None
|
||||
raise
|
||||
|
||||
|
||||
def get_vendor_version_from_module(module_name: str) -> Optional[str]:
|
||||
module = get_module_from_module_name(module_name)
|
||||
version = getattr(module, "__version__", None)
|
||||
|
||||
if module and not version:
|
||||
# Try to find version in debundled module info.
|
||||
assert module.__file__ is not None
|
||||
env = get_environment([os.path.dirname(module.__file__)])
|
||||
dist = env.get_distribution(module_name)
|
||||
if dist:
|
||||
version = str(dist.version)
|
||||
|
||||
return version
|
||||
|
||||
|
||||
def show_actual_vendor_versions(vendor_txt_versions: Dict[str, str]) -> None:
|
||||
"""Log the actual version and print extra info if there is
|
||||
a conflict or if the actual version could not be imported.
|
||||
"""
|
||||
for module_name, expected_version in vendor_txt_versions.items():
|
||||
extra_message = ""
|
||||
actual_version = get_vendor_version_from_module(module_name)
|
||||
if not actual_version:
|
||||
extra_message = (
|
||||
" (Unable to locate actual module version, using"
|
||||
" vendor.txt specified version)"
|
||||
)
|
||||
actual_version = expected_version
|
||||
elif parse_version(actual_version) != parse_version(expected_version):
|
||||
extra_message = (
|
||||
" (CONFLICT: vendor.txt suggests version should"
|
||||
f" be {expected_version})"
|
||||
)
|
||||
logger.info("%s==%s%s", module_name, actual_version, extra_message)
|
||||
|
||||
|
||||
def show_vendor_versions() -> None:
|
||||
logger.info("vendored library versions:")
|
||||
|
||||
vendor_txt_versions = create_vendor_txt_map()
|
||||
with indent_log():
|
||||
show_actual_vendor_versions(vendor_txt_versions)
|
||||
|
||||
|
||||
def show_tags(options: Values) -> None:
|
||||
tag_limit = 10
|
||||
|
||||
target_python = make_target_python(options)
|
||||
tags = target_python.get_sorted_tags()
|
||||
|
||||
# Display the target options that were explicitly provided.
|
||||
formatted_target = target_python.format_given()
|
||||
suffix = ""
|
||||
if formatted_target:
|
||||
suffix = f" (target: {formatted_target})"
|
||||
|
||||
msg = f"Compatible tags: {len(tags)}{suffix}"
|
||||
logger.info(msg)
|
||||
|
||||
if options.verbose < 1 and len(tags) > tag_limit:
|
||||
tags_limited = True
|
||||
tags = tags[:tag_limit]
|
||||
else:
|
||||
tags_limited = False
|
||||
|
||||
with indent_log():
|
||||
for tag in tags:
|
||||
logger.info(str(tag))
|
||||
|
||||
if tags_limited:
|
||||
msg = f"...\n[First {tag_limit} tags shown. Pass --verbose to show all.]"
|
||||
logger.info(msg)
|
||||
|
||||
|
||||
def ca_bundle_info(config: Configuration) -> str:
|
||||
levels = {key.split(".", 1)[0] for key, _ in config.items()}
|
||||
if not levels:
|
||||
return "Not specified"
|
||||
|
||||
levels_that_override_global = ["install", "wheel", "download"]
|
||||
global_overriding_level = [
|
||||
level for level in levels if level in levels_that_override_global
|
||||
]
|
||||
if not global_overriding_level:
|
||||
return "global"
|
||||
|
||||
if "global" in levels:
|
||||
levels.remove("global")
|
||||
return ", ".join(levels)
|
||||
|
||||
|
||||
class DebugCommand(Command):
|
||||
"""
|
||||
Display debug information.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog <options>"""
|
||||
ignore_require_venv = True
|
||||
|
||||
def add_options(self) -> None:
|
||||
cmdoptions.add_target_python_options(self.cmd_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
self.parser.config.load()
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
logger.warning(
|
||||
"This command is only meant for debugging. "
|
||||
"Do not use this with automation for parsing and getting these "
|
||||
"details, since the output and options of this command may "
|
||||
"change without notice."
|
||||
)
|
||||
show_value("pip version", get_pip_version())
|
||||
show_value("sys.version", sys.version)
|
||||
show_value("sys.executable", sys.executable)
|
||||
show_value("sys.getdefaultencoding", sys.getdefaultencoding())
|
||||
show_value("sys.getfilesystemencoding", sys.getfilesystemencoding())
|
||||
show_value(
|
||||
"locale.getpreferredencoding",
|
||||
locale.getpreferredencoding(),
|
||||
)
|
||||
show_value("sys.platform", sys.platform)
|
||||
show_sys_implementation()
|
||||
|
||||
show_value("'cert' config value", ca_bundle_info(self.parser.config))
|
||||
show_value("REQUESTS_CA_BUNDLE", os.environ.get("REQUESTS_CA_BUNDLE"))
|
||||
show_value("CURL_CA_BUNDLE", os.environ.get("CURL_CA_BUNDLE"))
|
||||
show_value("pip._vendor.certifi.where()", where())
|
||||
show_value("pip._vendor.DEBUNDLED", pip._vendor.DEBUNDLED)
|
||||
|
||||
show_vendor_versions()
|
||||
|
||||
show_tags(options)
|
||||
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
import logging
|
||||
import os
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.cmdoptions import make_target_python
|
||||
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.operations.build.build_tracker import get_build_tracker
|
||||
from pip._internal.req.req_install import check_legacy_setup_py_options
|
||||
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
|
||||
from pip._internal.utils.temp_dir import TempDirectory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DownloadCommand(RequirementCommand):
|
||||
"""
|
||||
Download packages from:
|
||||
|
||||
- PyPI (and other indexes) using requirement specifiers.
|
||||
- VCS project urls.
|
||||
- Local project directories.
|
||||
- Local or remote source archives.
|
||||
|
||||
pip also supports downloading from "requirements files", which provide
|
||||
an easy way to specify a whole environment to be downloaded.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options] <requirement specifier> [package-index-options] ...
|
||||
%prog [options] -r <requirements file> [package-index-options] ...
|
||||
%prog [options] <vcs project url> ...
|
||||
%prog [options] <local project path> ...
|
||||
%prog [options] <archive url/path> ..."""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(cmdoptions.constraints())
|
||||
self.cmd_opts.add_option(cmdoptions.requirements())
|
||||
self.cmd_opts.add_option(cmdoptions.no_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.global_options())
|
||||
self.cmd_opts.add_option(cmdoptions.no_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.only_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.prefer_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.src())
|
||||
self.cmd_opts.add_option(cmdoptions.pre())
|
||||
self.cmd_opts.add_option(cmdoptions.require_hashes())
|
||||
self.cmd_opts.add_option(cmdoptions.progress_bar())
|
||||
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
|
||||
self.cmd_opts.add_option(cmdoptions.use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.check_build_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"-d",
|
||||
"--dest",
|
||||
"--destination-dir",
|
||||
"--destination-directory",
|
||||
dest="download_dir",
|
||||
metavar="dir",
|
||||
default=os.curdir,
|
||||
help="Download packages into <dir>.",
|
||||
)
|
||||
|
||||
cmdoptions.add_target_python_options(self.cmd_opts)
|
||||
|
||||
index_opts = cmdoptions.make_option_group(
|
||||
cmdoptions.index_group,
|
||||
self.parser,
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, index_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
@with_cleanup
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
options.ignore_installed = True
|
||||
# editable doesn't really make sense for `pip download`, but the bowels
|
||||
# of the RequirementSet code require that property.
|
||||
options.editables = []
|
||||
|
||||
cmdoptions.check_dist_restriction(options)
|
||||
|
||||
options.download_dir = normalize_path(options.download_dir)
|
||||
ensure_dir(options.download_dir)
|
||||
|
||||
session = self.get_default_session(options)
|
||||
|
||||
target_python = make_target_python(options)
|
||||
finder = self._build_package_finder(
|
||||
options=options,
|
||||
session=session,
|
||||
target_python=target_python,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
)
|
||||
|
||||
build_tracker = self.enter_context(get_build_tracker())
|
||||
|
||||
directory = TempDirectory(
|
||||
delete=not options.no_clean,
|
||||
kind="download",
|
||||
globally_managed=True,
|
||||
)
|
||||
|
||||
reqs = self.get_requirements(args, options, finder, session)
|
||||
check_legacy_setup_py_options(options, reqs)
|
||||
|
||||
preparer = self.make_requirement_preparer(
|
||||
temp_build_dir=directory,
|
||||
options=options,
|
||||
build_tracker=build_tracker,
|
||||
session=session,
|
||||
finder=finder,
|
||||
download_dir=options.download_dir,
|
||||
use_user_site=False,
|
||||
verbosity=self.verbosity,
|
||||
)
|
||||
|
||||
resolver = self.make_resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
options=options,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
use_pep517=options.use_pep517,
|
||||
py_version_info=options.python_version,
|
||||
)
|
||||
|
||||
self.trace_basic_info(finder)
|
||||
|
||||
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
|
||||
|
||||
downloaded: List[str] = []
|
||||
for req in requirement_set.requirements.values():
|
||||
if req.satisfied_by is None:
|
||||
assert req.name is not None
|
||||
preparer.save_linked_requirement(req)
|
||||
downloaded.append(req.name)
|
||||
|
||||
preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
|
||||
requirement_set.warn_legacy_versions_and_specifiers()
|
||||
|
||||
if downloaded:
|
||||
write_output("Successfully downloaded %s", " ".join(downloaded))
|
||||
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
import sys
|
||||
from optparse import Values
|
||||
from typing import AbstractSet, List
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.operations.freeze import freeze
|
||||
from pip._internal.utils.compat import stdlib_pkgs
|
||||
|
||||
|
||||
def _should_suppress_build_backends() -> bool:
|
||||
return sys.version_info < (3, 12)
|
||||
|
||||
|
||||
def _dev_pkgs() -> AbstractSet[str]:
|
||||
pkgs = {"pip"}
|
||||
|
||||
if _should_suppress_build_backends():
|
||||
pkgs |= {"setuptools", "distribute", "wheel"}
|
||||
pkgs |= {"setuptools", "distribute", "wheel", "pkg-resources"}
|
||||
|
||||
return pkgs
|
||||
|
||||
|
||||
class FreezeCommand(Command):
|
||||
"""
|
||||
Output installed packages in requirements format.
|
||||
|
||||
packages are listed in a case-insensitive sorted order.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options]"""
|
||||
log_streams = ("ext://sys.stderr", "ext://sys.stderr")
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-r",
|
||||
"--requirement",
|
||||
dest="requirements",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="file",
|
||||
help=(
|
||||
"Use the order in the given requirements file and its "
|
||||
"comments when generating output. This option can be "
|
||||
"used multiple times."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-l",
|
||||
"--local",
|
||||
dest="local",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If in a virtualenv that has global access, do not output "
|
||||
"globally-installed packages."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--user",
|
||||
dest="user",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Only output packages installed in user-site.",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.list_path())
|
||||
self.cmd_opts.add_option(
|
||||
"--all",
|
||||
dest="freeze_all",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Do not skip these packages in the output:"
|
||||
" {}".format(", ".join(_dev_pkgs()))
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--exclude-editable",
|
||||
dest="exclude_editable",
|
||||
action="store_true",
|
||||
help="Exclude editable package from output.",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.list_exclude())
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
skip = set(stdlib_pkgs)
|
||||
if not options.freeze_all:
|
||||
skip.update(_dev_pkgs())
|
||||
|
||||
if options.excludes:
|
||||
skip.update(options.excludes)
|
||||
|
||||
cmdoptions.check_list_path_option(options)
|
||||
|
||||
for line in freeze(
|
||||
requirement=options.requirements,
|
||||
local_only=options.local,
|
||||
user_only=options.user,
|
||||
paths=options.path,
|
||||
isolated=options.isolated_mode,
|
||||
skip=skip,
|
||||
exclude_editable=options.exclude_editable,
|
||||
):
|
||||
sys.stdout.write(line + "\n")
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import hashlib
|
||||
import logging
|
||||
import sys
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.utils.hashes import FAVORITE_HASH, STRONG_HASHES
|
||||
from pip._internal.utils.misc import read_chunks, write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HashCommand(Command):
|
||||
"""
|
||||
Compute a hash of a local package archive.
|
||||
|
||||
These can be used with --hash in a requirements file to do repeatable
|
||||
installs.
|
||||
"""
|
||||
|
||||
usage = "%prog [options] <file> ..."
|
||||
ignore_require_venv = True
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-a",
|
||||
"--algorithm",
|
||||
dest="algorithm",
|
||||
choices=STRONG_HASHES,
|
||||
action="store",
|
||||
default=FAVORITE_HASH,
|
||||
help="The hash algorithm to use: one of {}".format(
|
||||
", ".join(STRONG_HASHES)
|
||||
),
|
||||
)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
if not args:
|
||||
self.parser.print_usage(sys.stderr)
|
||||
return ERROR
|
||||
|
||||
algorithm = options.algorithm
|
||||
for path in args:
|
||||
write_output(
|
||||
"%s:\n--hash=%s:%s", path, algorithm, _hash_of_file(path, algorithm)
|
||||
)
|
||||
return SUCCESS
|
||||
|
||||
|
||||
def _hash_of_file(path: str, algorithm: str) -> str:
|
||||
"""Return the hash digest of a file."""
|
||||
with open(path, "rb") as archive:
|
||||
hash = hashlib.new(algorithm)
|
||||
for chunk in read_chunks(archive):
|
||||
hash.update(chunk)
|
||||
return hash.hexdigest()
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.exceptions import CommandError
|
||||
|
||||
|
||||
class HelpCommand(Command):
|
||||
"""Show help for commands"""
|
||||
|
||||
usage = """
|
||||
%prog <command>"""
|
||||
ignore_require_venv = True
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
from pip._internal.commands import (
|
||||
commands_dict,
|
||||
create_command,
|
||||
get_similar_commands,
|
||||
)
|
||||
|
||||
try:
|
||||
# 'pip help' with no args is handled by pip.__init__.parseopt()
|
||||
cmd_name = args[0] # the command we need help for
|
||||
except IndexError:
|
||||
return SUCCESS
|
||||
|
||||
if cmd_name not in commands_dict:
|
||||
guess = get_similar_commands(cmd_name)
|
||||
|
||||
msg = [f'unknown command "{cmd_name}"']
|
||||
if guess:
|
||||
msg.append(f'maybe you meant "{guess}"')
|
||||
|
||||
raise CommandError(" - ".join(msg))
|
||||
|
||||
command = create_command(cmd_name)
|
||||
command.parser.print_help()
|
||||
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
import logging
|
||||
from optparse import Values
|
||||
from typing import Any, Iterable, List, Optional, Union
|
||||
|
||||
from pip._vendor.packaging.version import LegacyVersion, Version
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.req_command import IndexGroupCommand
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.commands.search import print_dist_installation_info
|
||||
from pip._internal.exceptions import CommandError, DistributionNotFound, PipError
|
||||
from pip._internal.index.collector import LinkCollector
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.models.selection_prefs import SelectionPreferences
|
||||
from pip._internal.models.target_python import TargetPython
|
||||
from pip._internal.network.session import PipSession
|
||||
from pip._internal.utils.misc import write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IndexCommand(IndexGroupCommand):
|
||||
"""
|
||||
Inspect information available from package indexes.
|
||||
"""
|
||||
|
||||
ignore_require_venv = True
|
||||
usage = """
|
||||
%prog versions <package>
|
||||
"""
|
||||
|
||||
def add_options(self) -> None:
|
||||
cmdoptions.add_target_python_options(self.cmd_opts)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
|
||||
self.cmd_opts.add_option(cmdoptions.pre())
|
||||
self.cmd_opts.add_option(cmdoptions.no_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.only_binary())
|
||||
|
||||
index_opts = cmdoptions.make_option_group(
|
||||
cmdoptions.index_group,
|
||||
self.parser,
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, index_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
handlers = {
|
||||
"versions": self.get_available_package_versions,
|
||||
}
|
||||
|
||||
logger.warning(
|
||||
"pip index is currently an experimental command. "
|
||||
"It may be removed/changed in a future release "
|
||||
"without prior warning."
|
||||
)
|
||||
|
||||
# Determine action
|
||||
if not args or args[0] not in handlers:
|
||||
logger.error(
|
||||
"Need an action (%s) to perform.",
|
||||
", ".join(sorted(handlers)),
|
||||
)
|
||||
return ERROR
|
||||
|
||||
action = args[0]
|
||||
|
||||
# Error handling happens here, not in the action-handlers.
|
||||
try:
|
||||
handlers[action](options, args[1:])
|
||||
except PipError as e:
|
||||
logger.error(e.args[0])
|
||||
return ERROR
|
||||
|
||||
return SUCCESS
|
||||
|
||||
def _build_package_finder(
|
||||
self,
|
||||
options: Values,
|
||||
session: PipSession,
|
||||
target_python: Optional[TargetPython] = None,
|
||||
ignore_requires_python: Optional[bool] = None,
|
||||
) -> PackageFinder:
|
||||
"""
|
||||
Create a package finder appropriate to the index command.
|
||||
"""
|
||||
link_collector = LinkCollector.create(session, options=options)
|
||||
|
||||
# Pass allow_yanked=False to ignore yanked versions.
|
||||
selection_prefs = SelectionPreferences(
|
||||
allow_yanked=False,
|
||||
allow_all_prereleases=options.pre,
|
||||
ignore_requires_python=ignore_requires_python,
|
||||
)
|
||||
|
||||
return PackageFinder.create(
|
||||
link_collector=link_collector,
|
||||
selection_prefs=selection_prefs,
|
||||
target_python=target_python,
|
||||
)
|
||||
|
||||
def get_available_package_versions(self, options: Values, args: List[Any]) -> None:
|
||||
if len(args) != 1:
|
||||
raise CommandError("You need to specify exactly one argument")
|
||||
|
||||
target_python = cmdoptions.make_target_python(options)
|
||||
query = args[0]
|
||||
|
||||
with self._build_session(options) as session:
|
||||
finder = self._build_package_finder(
|
||||
options=options,
|
||||
session=session,
|
||||
target_python=target_python,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
)
|
||||
|
||||
versions: Iterable[Union[LegacyVersion, Version]] = (
|
||||
candidate.version for candidate in finder.find_all_candidates(query)
|
||||
)
|
||||
|
||||
if not options.pre:
|
||||
# Remove prereleases
|
||||
versions = (
|
||||
version for version in versions if not version.is_prerelease
|
||||
)
|
||||
versions = set(versions)
|
||||
|
||||
if not versions:
|
||||
raise DistributionNotFound(
|
||||
f"No matching distribution found for {query}"
|
||||
)
|
||||
|
||||
formatted_versions = [str(ver) for ver in sorted(versions, reverse=True)]
|
||||
latest = formatted_versions[0]
|
||||
|
||||
write_output(f"{query} ({latest})")
|
||||
write_output("Available versions: {}".format(", ".join(formatted_versions)))
|
||||
print_dist_installation_info(query, latest)
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
import logging
|
||||
from optparse import Values
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pip._vendor.packaging.markers import default_environment
|
||||
from pip._vendor.rich import print_json
|
||||
|
||||
from pip import __version__
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.req_command import Command
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.metadata import BaseDistribution, get_environment
|
||||
from pip._internal.utils.compat import stdlib_pkgs
|
||||
from pip._internal.utils.urls import path_to_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InspectCommand(Command):
|
||||
"""
|
||||
Inspect the content of a Python environment and produce a report in JSON format.
|
||||
"""
|
||||
|
||||
ignore_require_venv = True
|
||||
usage = """
|
||||
%prog [options]"""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"--local",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If in a virtualenv that has global access, do not list "
|
||||
"globally-installed packages."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--user",
|
||||
dest="user",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Only output packages installed in user-site.",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.list_path())
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
cmdoptions.check_list_path_option(options)
|
||||
dists = get_environment(options.path).iter_installed_distributions(
|
||||
local_only=options.local,
|
||||
user_only=options.user,
|
||||
skip=set(stdlib_pkgs),
|
||||
)
|
||||
output = {
|
||||
"version": "1",
|
||||
"pip_version": __version__,
|
||||
"installed": [self._dist_to_dict(dist) for dist in dists],
|
||||
"environment": default_environment(),
|
||||
# TODO tags? scheme?
|
||||
}
|
||||
print_json(data=output)
|
||||
return SUCCESS
|
||||
|
||||
def _dist_to_dict(self, dist: BaseDistribution) -> Dict[str, Any]:
|
||||
res: Dict[str, Any] = {
|
||||
"metadata": dist.metadata_dict,
|
||||
"metadata_location": dist.info_location,
|
||||
}
|
||||
# direct_url. Note that we don't have download_info (as in the installation
|
||||
# report) since it is not recorded in installed metadata.
|
||||
direct_url = dist.direct_url
|
||||
if direct_url is not None:
|
||||
res["direct_url"] = direct_url.to_dict()
|
||||
else:
|
||||
# Emulate direct_url for legacy editable installs.
|
||||
editable_project_location = dist.editable_project_location
|
||||
if editable_project_location is not None:
|
||||
res["direct_url"] = {
|
||||
"url": path_to_url(editable_project_location),
|
||||
"dir_info": {
|
||||
"editable": True,
|
||||
},
|
||||
}
|
||||
# installer
|
||||
installer = dist.installer
|
||||
if dist.installer:
|
||||
res["installer"] = installer
|
||||
# requested
|
||||
if dist.installed_with_dist_info:
|
||||
res["requested"] = dist.requested
|
||||
return res
|
||||
|
|
@ -0,0 +1,774 @@
|
|||
import errno
|
||||
import json
|
||||
import operator
|
||||
import os
|
||||
import shutil
|
||||
import site
|
||||
from optparse import SUPPRESS_HELP, Values
|
||||
from typing import List, Optional
|
||||
|
||||
from pip._vendor.rich import print_json
|
||||
|
||||
from pip._internal.cache import WheelCache
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.cmdoptions import make_target_python
|
||||
from pip._internal.cli.req_command import (
|
||||
RequirementCommand,
|
||||
warn_if_run_as_root,
|
||||
with_cleanup,
|
||||
)
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.exceptions import CommandError, InstallationError
|
||||
from pip._internal.locations import get_scheme
|
||||
from pip._internal.metadata import get_environment
|
||||
from pip._internal.models.installation_report import InstallationReport
|
||||
from pip._internal.operations.build.build_tracker import get_build_tracker
|
||||
from pip._internal.operations.check import ConflictDetails, check_install_conflicts
|
||||
from pip._internal.req import install_given_reqs
|
||||
from pip._internal.req.req_install import (
|
||||
InstallRequirement,
|
||||
check_legacy_setup_py_options,
|
||||
)
|
||||
from pip._internal.utils.compat import WINDOWS
|
||||
from pip._internal.utils.filesystem import test_writable_dir
|
||||
from pip._internal.utils.logging import getLogger
|
||||
from pip._internal.utils.misc import (
|
||||
check_externally_managed,
|
||||
ensure_dir,
|
||||
get_pip_version,
|
||||
protect_pip_from_modification_on_windows,
|
||||
write_output,
|
||||
)
|
||||
from pip._internal.utils.temp_dir import TempDirectory
|
||||
from pip._internal.utils.virtualenv import (
|
||||
running_under_virtualenv,
|
||||
virtualenv_no_global,
|
||||
)
|
||||
from pip._internal.wheel_builder import build, should_build_for_install_command
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class InstallCommand(RequirementCommand):
|
||||
"""
|
||||
Install packages from:
|
||||
|
||||
- PyPI (and other indexes) using requirement specifiers.
|
||||
- VCS project urls.
|
||||
- Local project directories.
|
||||
- Local or remote source archives.
|
||||
|
||||
pip also supports installing from "requirements files", which provide
|
||||
an easy way to specify a whole environment to be installed.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options] <requirement specifier> [package-index-options] ...
|
||||
%prog [options] -r <requirements file> [package-index-options] ...
|
||||
%prog [options] [-e] <vcs project url> ...
|
||||
%prog [options] [-e] <local project path> ...
|
||||
%prog [options] <archive url/path> ..."""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(cmdoptions.requirements())
|
||||
self.cmd_opts.add_option(cmdoptions.constraints())
|
||||
self.cmd_opts.add_option(cmdoptions.no_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.pre())
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.editable())
|
||||
self.cmd_opts.add_option(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
dest="dry_run",
|
||||
default=False,
|
||||
help=(
|
||||
"Don't actually install anything, just print what would be. "
|
||||
"Can be used in combination with --ignore-installed "
|
||||
"to 'resolve' the requirements."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-t",
|
||||
"--target",
|
||||
dest="target_dir",
|
||||
metavar="dir",
|
||||
default=None,
|
||||
help=(
|
||||
"Install packages into <dir>. "
|
||||
"By default this will not replace existing files/folders in "
|
||||
"<dir>. Use --upgrade to replace existing packages in <dir> "
|
||||
"with new versions."
|
||||
),
|
||||
)
|
||||
cmdoptions.add_target_python_options(self.cmd_opts)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--user",
|
||||
dest="use_user_site",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Install to the Python user install directory for your "
|
||||
"platform. Typically ~/.local/, or %APPDATA%\\Python on "
|
||||
"Windows. (See the Python documentation for site.USER_BASE "
|
||||
"for full details.)"
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--no-user",
|
||||
dest="use_user_site",
|
||||
action="store_false",
|
||||
help=SUPPRESS_HELP,
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--root",
|
||||
dest="root_path",
|
||||
metavar="dir",
|
||||
default=None,
|
||||
help="Install everything relative to this alternate root directory.",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--prefix",
|
||||
dest="prefix_path",
|
||||
metavar="dir",
|
||||
default=None,
|
||||
help=(
|
||||
"Installation prefix where lib, bin and other top-level "
|
||||
"folders are placed. Note that the resulting installation may "
|
||||
"contain scripts and other resources which reference the "
|
||||
"Python interpreter of pip, and not that of ``--prefix``. "
|
||||
"See also the ``--python`` option if the intention is to "
|
||||
"install packages into another (possibly pip-free) "
|
||||
"environment."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.src())
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"-U",
|
||||
"--upgrade",
|
||||
dest="upgrade",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Upgrade all specified packages to the newest available "
|
||||
"version. The handling of dependencies depends on the "
|
||||
"upgrade-strategy used."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--upgrade-strategy",
|
||||
dest="upgrade_strategy",
|
||||
default="only-if-needed",
|
||||
choices=["only-if-needed", "eager"],
|
||||
help=(
|
||||
"Determines how dependency upgrading should be handled "
|
||||
"[default: %default]. "
|
||||
'"eager" - dependencies are upgraded regardless of '
|
||||
"whether the currently installed version satisfies the "
|
||||
"requirements of the upgraded package(s). "
|
||||
'"only-if-needed" - are upgraded only when they do not '
|
||||
"satisfy the requirements of the upgraded package(s)."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--force-reinstall",
|
||||
dest="force_reinstall",
|
||||
action="store_true",
|
||||
help="Reinstall all packages even if they are already up-to-date.",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"-I",
|
||||
"--ignore-installed",
|
||||
dest="ignore_installed",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Ignore the installed packages, overwriting them. "
|
||||
"This can break your system if the existing package "
|
||||
"is of a different version or was installed "
|
||||
"with a different package manager!"
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
|
||||
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
|
||||
self.cmd_opts.add_option(cmdoptions.use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.check_build_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.override_externally_managed())
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.config_settings())
|
||||
self.cmd_opts.add_option(cmdoptions.global_options())
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--compile",
|
||||
action="store_true",
|
||||
dest="compile",
|
||||
default=True,
|
||||
help="Compile Python source files to bytecode",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--no-compile",
|
||||
action="store_false",
|
||||
dest="compile",
|
||||
help="Do not compile Python source files to bytecode",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--no-warn-script-location",
|
||||
action="store_false",
|
||||
dest="warn_script_location",
|
||||
default=True,
|
||||
help="Do not warn when installing scripts outside PATH",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--no-warn-conflicts",
|
||||
action="store_false",
|
||||
dest="warn_about_conflicts",
|
||||
default=True,
|
||||
help="Do not warn about broken dependencies",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.no_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.only_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.prefer_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.require_hashes())
|
||||
self.cmd_opts.add_option(cmdoptions.progress_bar())
|
||||
self.cmd_opts.add_option(cmdoptions.root_user_action())
|
||||
|
||||
index_opts = cmdoptions.make_option_group(
|
||||
cmdoptions.index_group,
|
||||
self.parser,
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, index_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--report",
|
||||
dest="json_report_file",
|
||||
metavar="file",
|
||||
default=None,
|
||||
help=(
|
||||
"Generate a JSON file describing what pip did to install "
|
||||
"the provided requirements. "
|
||||
"Can be used in combination with --dry-run and --ignore-installed "
|
||||
"to 'resolve' the requirements. "
|
||||
"When - is used as file name it writes to stdout. "
|
||||
"When writing to stdout, please combine with the --quiet option "
|
||||
"to avoid mixing pip logging output with JSON output."
|
||||
),
|
||||
)
|
||||
|
||||
@with_cleanup
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
if options.use_user_site and options.target_dir is not None:
|
||||
raise CommandError("Can not combine '--user' and '--target'")
|
||||
|
||||
# Check whether the environment we're installing into is externally
|
||||
# managed, as specified in PEP 668. Specifying --root, --target, or
|
||||
# --prefix disables the check, since there's no reliable way to locate
|
||||
# the EXTERNALLY-MANAGED file for those cases. An exception is also
|
||||
# made specifically for "--dry-run --report" for convenience.
|
||||
installing_into_current_environment = (
|
||||
not (options.dry_run and options.json_report_file)
|
||||
and options.root_path is None
|
||||
and options.target_dir is None
|
||||
and options.prefix_path is None
|
||||
)
|
||||
if (
|
||||
installing_into_current_environment
|
||||
and not options.override_externally_managed
|
||||
):
|
||||
check_externally_managed()
|
||||
|
||||
upgrade_strategy = "to-satisfy-only"
|
||||
if options.upgrade:
|
||||
upgrade_strategy = options.upgrade_strategy
|
||||
|
||||
cmdoptions.check_dist_restriction(options, check_target=True)
|
||||
|
||||
logger.verbose("Using %s", get_pip_version())
|
||||
options.use_user_site = decide_user_install(
|
||||
options.use_user_site,
|
||||
prefix_path=options.prefix_path,
|
||||
target_dir=options.target_dir,
|
||||
root_path=options.root_path,
|
||||
isolated_mode=options.isolated_mode,
|
||||
)
|
||||
|
||||
target_temp_dir: Optional[TempDirectory] = None
|
||||
target_temp_dir_path: Optional[str] = None
|
||||
if options.target_dir:
|
||||
options.ignore_installed = True
|
||||
options.target_dir = os.path.abspath(options.target_dir)
|
||||
if (
|
||||
# fmt: off
|
||||
os.path.exists(options.target_dir) and
|
||||
not os.path.isdir(options.target_dir)
|
||||
# fmt: on
|
||||
):
|
||||
raise CommandError(
|
||||
"Target path exists but is not a directory, will not continue."
|
||||
)
|
||||
|
||||
# Create a target directory for using with the target option
|
||||
target_temp_dir = TempDirectory(kind="target")
|
||||
target_temp_dir_path = target_temp_dir.path
|
||||
self.enter_context(target_temp_dir)
|
||||
|
||||
global_options = options.global_options or []
|
||||
|
||||
session = self.get_default_session(options)
|
||||
|
||||
target_python = make_target_python(options)
|
||||
finder = self._build_package_finder(
|
||||
options=options,
|
||||
session=session,
|
||||
target_python=target_python,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
)
|
||||
build_tracker = self.enter_context(get_build_tracker())
|
||||
|
||||
directory = TempDirectory(
|
||||
delete=not options.no_clean,
|
||||
kind="install",
|
||||
globally_managed=True,
|
||||
)
|
||||
|
||||
try:
|
||||
reqs = self.get_requirements(args, options, finder, session)
|
||||
check_legacy_setup_py_options(options, reqs)
|
||||
|
||||
wheel_cache = WheelCache(options.cache_dir)
|
||||
|
||||
# Only when installing is it permitted to use PEP 660.
|
||||
# In other circumstances (pip wheel, pip download) we generate
|
||||
# regular (i.e. non editable) metadata and wheels.
|
||||
for req in reqs:
|
||||
req.permit_editable_wheels = True
|
||||
|
||||
preparer = self.make_requirement_preparer(
|
||||
temp_build_dir=directory,
|
||||
options=options,
|
||||
build_tracker=build_tracker,
|
||||
session=session,
|
||||
finder=finder,
|
||||
use_user_site=options.use_user_site,
|
||||
verbosity=self.verbosity,
|
||||
)
|
||||
resolver = self.make_resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
options=options,
|
||||
wheel_cache=wheel_cache,
|
||||
use_user_site=options.use_user_site,
|
||||
ignore_installed=options.ignore_installed,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
force_reinstall=options.force_reinstall,
|
||||
upgrade_strategy=upgrade_strategy,
|
||||
use_pep517=options.use_pep517,
|
||||
)
|
||||
|
||||
self.trace_basic_info(finder)
|
||||
|
||||
requirement_set = resolver.resolve(
|
||||
reqs, check_supported_wheels=not options.target_dir
|
||||
)
|
||||
|
||||
if options.json_report_file:
|
||||
report = InstallationReport(requirement_set.requirements_to_install)
|
||||
if options.json_report_file == "-":
|
||||
print_json(data=report.to_dict())
|
||||
else:
|
||||
with open(options.json_report_file, "w", encoding="utf-8") as f:
|
||||
json.dump(report.to_dict(), f, indent=2, ensure_ascii=False)
|
||||
|
||||
if options.dry_run:
|
||||
# In non dry-run mode, the legacy versions and specifiers check
|
||||
# will be done as part of conflict detection.
|
||||
requirement_set.warn_legacy_versions_and_specifiers()
|
||||
would_install_items = sorted(
|
||||
(r.metadata["name"], r.metadata["version"])
|
||||
for r in requirement_set.requirements_to_install
|
||||
)
|
||||
if would_install_items:
|
||||
write_output(
|
||||
"Would install %s",
|
||||
" ".join("-".join(item) for item in would_install_items),
|
||||
)
|
||||
return SUCCESS
|
||||
|
||||
try:
|
||||
pip_req = requirement_set.get_requirement("pip")
|
||||
except KeyError:
|
||||
modifying_pip = False
|
||||
else:
|
||||
# If we're not replacing an already installed pip,
|
||||
# we're not modifying it.
|
||||
modifying_pip = pip_req.satisfied_by is None
|
||||
protect_pip_from_modification_on_windows(modifying_pip=modifying_pip)
|
||||
|
||||
reqs_to_build = [
|
||||
r
|
||||
for r in requirement_set.requirements.values()
|
||||
if should_build_for_install_command(r)
|
||||
]
|
||||
|
||||
_, build_failures = build(
|
||||
reqs_to_build,
|
||||
wheel_cache=wheel_cache,
|
||||
verify=True,
|
||||
build_options=[],
|
||||
global_options=global_options,
|
||||
)
|
||||
|
||||
if build_failures:
|
||||
raise InstallationError(
|
||||
"Could not build wheels for {}, which is required to "
|
||||
"install pyproject.toml-based projects".format(
|
||||
", ".join(r.name for r in build_failures) # type: ignore
|
||||
)
|
||||
)
|
||||
|
||||
to_install = resolver.get_installation_order(requirement_set)
|
||||
|
||||
# Check for conflicts in the package set we're installing.
|
||||
conflicts: Optional[ConflictDetails] = None
|
||||
should_warn_about_conflicts = (
|
||||
not options.ignore_dependencies and options.warn_about_conflicts
|
||||
)
|
||||
if should_warn_about_conflicts:
|
||||
conflicts = self._determine_conflicts(to_install)
|
||||
|
||||
# Don't warn about script install locations if
|
||||
# --target or --prefix has been specified
|
||||
warn_script_location = options.warn_script_location
|
||||
if options.target_dir or options.prefix_path:
|
||||
warn_script_location = False
|
||||
|
||||
installed = install_given_reqs(
|
||||
to_install,
|
||||
global_options,
|
||||
root=options.root_path,
|
||||
home=target_temp_dir_path,
|
||||
prefix=options.prefix_path,
|
||||
warn_script_location=warn_script_location,
|
||||
use_user_site=options.use_user_site,
|
||||
pycompile=options.compile,
|
||||
)
|
||||
|
||||
lib_locations = get_lib_location_guesses(
|
||||
user=options.use_user_site,
|
||||
home=target_temp_dir_path,
|
||||
root=options.root_path,
|
||||
prefix=options.prefix_path,
|
||||
isolated=options.isolated_mode,
|
||||
)
|
||||
env = get_environment(lib_locations)
|
||||
|
||||
installed.sort(key=operator.attrgetter("name"))
|
||||
items = []
|
||||
for result in installed:
|
||||
item = result.name
|
||||
try:
|
||||
installed_dist = env.get_distribution(item)
|
||||
if installed_dist is not None:
|
||||
item = f"{item}-{installed_dist.version}"
|
||||
except Exception:
|
||||
pass
|
||||
items.append(item)
|
||||
|
||||
if conflicts is not None:
|
||||
self._warn_about_conflicts(
|
||||
conflicts,
|
||||
resolver_variant=self.determine_resolver_variant(options),
|
||||
)
|
||||
|
||||
installed_desc = " ".join(items)
|
||||
if installed_desc:
|
||||
write_output(
|
||||
"Successfully installed %s",
|
||||
installed_desc,
|
||||
)
|
||||
except OSError as error:
|
||||
show_traceback = self.verbosity >= 1
|
||||
|
||||
message = create_os_error_message(
|
||||
error,
|
||||
show_traceback,
|
||||
options.use_user_site,
|
||||
)
|
||||
logger.error(message, exc_info=show_traceback)
|
||||
|
||||
return ERROR
|
||||
|
||||
if options.target_dir:
|
||||
assert target_temp_dir
|
||||
self._handle_target_dir(
|
||||
options.target_dir, target_temp_dir, options.upgrade
|
||||
)
|
||||
if options.root_user_action == "warn":
|
||||
warn_if_run_as_root()
|
||||
return SUCCESS
|
||||
|
||||
def _handle_target_dir(
|
||||
self, target_dir: str, target_temp_dir: TempDirectory, upgrade: bool
|
||||
) -> None:
|
||||
ensure_dir(target_dir)
|
||||
|
||||
# Checking both purelib and platlib directories for installed
|
||||
# packages to be moved to target directory
|
||||
lib_dir_list = []
|
||||
|
||||
# Checking both purelib and platlib directories for installed
|
||||
# packages to be moved to target directory
|
||||
scheme = get_scheme("", home=target_temp_dir.path)
|
||||
purelib_dir = scheme.purelib
|
||||
platlib_dir = scheme.platlib
|
||||
data_dir = scheme.data
|
||||
|
||||
if os.path.exists(purelib_dir):
|
||||
lib_dir_list.append(purelib_dir)
|
||||
if os.path.exists(platlib_dir) and platlib_dir != purelib_dir:
|
||||
lib_dir_list.append(platlib_dir)
|
||||
if os.path.exists(data_dir):
|
||||
lib_dir_list.append(data_dir)
|
||||
|
||||
for lib_dir in lib_dir_list:
|
||||
for item in os.listdir(lib_dir):
|
||||
if lib_dir == data_dir:
|
||||
ddir = os.path.join(data_dir, item)
|
||||
if any(s.startswith(ddir) for s in lib_dir_list[:-1]):
|
||||
continue
|
||||
target_item_dir = os.path.join(target_dir, item)
|
||||
if os.path.exists(target_item_dir):
|
||||
if not upgrade:
|
||||
logger.warning(
|
||||
"Target directory %s already exists. Specify "
|
||||
"--upgrade to force replacement.",
|
||||
target_item_dir,
|
||||
)
|
||||
continue
|
||||
if os.path.islink(target_item_dir):
|
||||
logger.warning(
|
||||
"Target directory %s already exists and is "
|
||||
"a link. pip will not automatically replace "
|
||||
"links, please remove if replacement is "
|
||||
"desired.",
|
||||
target_item_dir,
|
||||
)
|
||||
continue
|
||||
if os.path.isdir(target_item_dir):
|
||||
shutil.rmtree(target_item_dir)
|
||||
else:
|
||||
os.remove(target_item_dir)
|
||||
|
||||
shutil.move(os.path.join(lib_dir, item), target_item_dir)
|
||||
|
||||
def _determine_conflicts(
|
||||
self, to_install: List[InstallRequirement]
|
||||
) -> Optional[ConflictDetails]:
|
||||
try:
|
||||
return check_install_conflicts(to_install)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Error while checking for conflicts. Please file an issue on "
|
||||
"pip's issue tracker: https://github.com/pypa/pip/issues/new"
|
||||
)
|
||||
return None
|
||||
|
||||
def _warn_about_conflicts(
|
||||
self, conflict_details: ConflictDetails, resolver_variant: str
|
||||
) -> None:
|
||||
package_set, (missing, conflicting) = conflict_details
|
||||
if not missing and not conflicting:
|
||||
return
|
||||
|
||||
parts: List[str] = []
|
||||
if resolver_variant == "legacy":
|
||||
parts.append(
|
||||
"pip's legacy dependency resolver does not consider dependency "
|
||||
"conflicts when selecting packages. This behaviour is the "
|
||||
"source of the following dependency conflicts."
|
||||
)
|
||||
else:
|
||||
assert resolver_variant == "resolvelib"
|
||||
parts.append(
|
||||
"pip's dependency resolver does not currently take into account "
|
||||
"all the packages that are installed. This behaviour is the "
|
||||
"source of the following dependency conflicts."
|
||||
)
|
||||
|
||||
# NOTE: There is some duplication here, with commands/check.py
|
||||
for project_name in missing:
|
||||
version = package_set[project_name][0]
|
||||
for dependency in missing[project_name]:
|
||||
message = (
|
||||
f"{project_name} {version} requires {dependency[1]}, "
|
||||
"which is not installed."
|
||||
)
|
||||
parts.append(message)
|
||||
|
||||
for project_name in conflicting:
|
||||
version = package_set[project_name][0]
|
||||
for dep_name, dep_version, req in conflicting[project_name]:
|
||||
message = (
|
||||
"{name} {version} requires {requirement}, but {you} have "
|
||||
"{dep_name} {dep_version} which is incompatible."
|
||||
).format(
|
||||
name=project_name,
|
||||
version=version,
|
||||
requirement=req,
|
||||
dep_name=dep_name,
|
||||
dep_version=dep_version,
|
||||
you=("you" if resolver_variant == "resolvelib" else "you'll"),
|
||||
)
|
||||
parts.append(message)
|
||||
|
||||
logger.critical("\n".join(parts))
|
||||
|
||||
|
||||
def get_lib_location_guesses(
|
||||
user: bool = False,
|
||||
home: Optional[str] = None,
|
||||
root: Optional[str] = None,
|
||||
isolated: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
scheme = get_scheme(
|
||||
"",
|
||||
user=user,
|
||||
home=home,
|
||||
root=root,
|
||||
isolated=isolated,
|
||||
prefix=prefix,
|
||||
)
|
||||
return [scheme.purelib, scheme.platlib]
|
||||
|
||||
|
||||
def site_packages_writable(root: Optional[str], isolated: bool) -> bool:
|
||||
return all(
|
||||
test_writable_dir(d)
|
||||
for d in set(get_lib_location_guesses(root=root, isolated=isolated))
|
||||
)
|
||||
|
||||
|
||||
def decide_user_install(
|
||||
use_user_site: Optional[bool],
|
||||
prefix_path: Optional[str] = None,
|
||||
target_dir: Optional[str] = None,
|
||||
root_path: Optional[str] = None,
|
||||
isolated_mode: bool = False,
|
||||
) -> bool:
|
||||
"""Determine whether to do a user install based on the input options.
|
||||
|
||||
If use_user_site is False, no additional checks are done.
|
||||
If use_user_site is True, it is checked for compatibility with other
|
||||
options.
|
||||
If use_user_site is None, the default behaviour depends on the environment,
|
||||
which is provided by the other arguments.
|
||||
"""
|
||||
# In some cases (config from tox), use_user_site can be set to an integer
|
||||
# rather than a bool, which 'use_user_site is False' wouldn't catch.
|
||||
if (use_user_site is not None) and (not use_user_site):
|
||||
logger.debug("Non-user install by explicit request")
|
||||
return False
|
||||
|
||||
if use_user_site:
|
||||
if prefix_path:
|
||||
raise CommandError(
|
||||
"Can not combine '--user' and '--prefix' as they imply "
|
||||
"different installation locations"
|
||||
)
|
||||
if virtualenv_no_global():
|
||||
raise InstallationError(
|
||||
"Can not perform a '--user' install. User site-packages "
|
||||
"are not visible in this virtualenv."
|
||||
)
|
||||
logger.debug("User install by explicit request")
|
||||
return True
|
||||
|
||||
# If we are here, user installs have not been explicitly requested/avoided
|
||||
assert use_user_site is None
|
||||
|
||||
# user install incompatible with --prefix/--target
|
||||
if prefix_path or target_dir:
|
||||
logger.debug("Non-user install due to --prefix or --target option")
|
||||
return False
|
||||
|
||||
# If user installs are not enabled, choose a non-user install
|
||||
if not site.ENABLE_USER_SITE:
|
||||
logger.debug("Non-user install because user site-packages disabled")
|
||||
return False
|
||||
|
||||
# If we have permission for a non-user install, do that,
|
||||
# otherwise do a user install.
|
||||
if site_packages_writable(root=root_path, isolated=isolated_mode):
|
||||
logger.debug("Non-user install because site-packages writeable")
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"Defaulting to user installation because normal site-packages "
|
||||
"is not writeable"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def create_os_error_message(
|
||||
error: OSError, show_traceback: bool, using_user_site: bool
|
||||
) -> str:
|
||||
"""Format an error message for an OSError
|
||||
|
||||
It may occur anytime during the execution of the install command.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Mention the error if we are not going to show a traceback
|
||||
parts.append("Could not install packages due to an OSError")
|
||||
if not show_traceback:
|
||||
parts.append(": ")
|
||||
parts.append(str(error))
|
||||
else:
|
||||
parts.append(".")
|
||||
|
||||
# Spilt the error indication from a helper message (if any)
|
||||
parts[-1] += "\n"
|
||||
|
||||
# Suggest useful actions to the user:
|
||||
# (1) using user site-packages or (2) verifying the permissions
|
||||
if error.errno == errno.EACCES:
|
||||
user_option_part = "Consider using the `--user` option"
|
||||
permissions_part = "Check the permissions"
|
||||
|
||||
if not running_under_virtualenv() and not using_user_site:
|
||||
parts.extend(
|
||||
[
|
||||
user_option_part,
|
||||
" or ",
|
||||
permissions_part.lower(),
|
||||
]
|
||||
)
|
||||
else:
|
||||
parts.append(permissions_part)
|
||||
parts.append(".\n")
|
||||
|
||||
# Suggest the user to enable Long Paths if path length is
|
||||
# more than 260
|
||||
if (
|
||||
WINDOWS
|
||||
and error.errno == errno.ENOENT
|
||||
and error.filename
|
||||
and len(error.filename) > 260
|
||||
):
|
||||
parts.append(
|
||||
"HINT: This error might have occurred since "
|
||||
"this system does not have Windows Long Path "
|
||||
"support enabled. You can find information on "
|
||||
"how to enable this at "
|
||||
"https://pip.pypa.io/warnings/enable-long-paths\n"
|
||||
)
|
||||
|
||||
return "".join(parts).strip() + "\n"
|
||||
|
|
@ -0,0 +1,370 @@
|
|||
import json
|
||||
import logging
|
||||
from optparse import Values
|
||||
from typing import TYPE_CHECKING, Generator, List, Optional, Sequence, Tuple, cast
|
||||
|
||||
from pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.req_command import IndexGroupCommand
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.exceptions import CommandError
|
||||
from pip._internal.index.collector import LinkCollector
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.metadata import BaseDistribution, get_environment
|
||||
from pip._internal.models.selection_prefs import SelectionPreferences
|
||||
from pip._internal.network.session import PipSession
|
||||
from pip._internal.utils.compat import stdlib_pkgs
|
||||
from pip._internal.utils.misc import tabulate, write_output
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._internal.metadata.base import DistributionVersion
|
||||
|
||||
class _DistWithLatestInfo(BaseDistribution):
|
||||
"""Give the distribution object a couple of extra fields.
|
||||
|
||||
These will be populated during ``get_outdated()``. This is dirty but
|
||||
makes the rest of the code much cleaner.
|
||||
"""
|
||||
|
||||
latest_version: DistributionVersion
|
||||
latest_filetype: str
|
||||
|
||||
_ProcessedDists = Sequence[_DistWithLatestInfo]
|
||||
|
||||
|
||||
from pip._vendor.packaging.version import parse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ListCommand(IndexGroupCommand):
|
||||
"""
|
||||
List installed packages, including editables.
|
||||
|
||||
Packages are listed in a case-insensitive sorted order.
|
||||
"""
|
||||
|
||||
ignore_require_venv = True
|
||||
usage = """
|
||||
%prog [options]"""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-o",
|
||||
"--outdated",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="List outdated packages",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-u",
|
||||
"--uptodate",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="List uptodate packages",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-e",
|
||||
"--editable",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="List editable projects.",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-l",
|
||||
"--local",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If in a virtualenv that has global access, do not list "
|
||||
"globally-installed packages."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--user",
|
||||
dest="user",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Only output packages installed in user-site.",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.list_path())
|
||||
self.cmd_opts.add_option(
|
||||
"--pre",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Include pre-release and development versions. By default, "
|
||||
"pip only finds stable versions."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--format",
|
||||
action="store",
|
||||
dest="list_format",
|
||||
default="columns",
|
||||
choices=("columns", "freeze", "json"),
|
||||
help=(
|
||||
"Select the output format among: columns (default), freeze, or json. "
|
||||
"The 'freeze' format cannot be used with the --outdated option."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--not-required",
|
||||
action="store_true",
|
||||
dest="not_required",
|
||||
help="List packages that are not dependencies of installed packages.",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--exclude-editable",
|
||||
action="store_false",
|
||||
dest="include_editable",
|
||||
help="Exclude editable package from output.",
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"--include-editable",
|
||||
action="store_true",
|
||||
dest="include_editable",
|
||||
help="Include editable package from output.",
|
||||
default=True,
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.list_exclude())
|
||||
index_opts = cmdoptions.make_option_group(cmdoptions.index_group, self.parser)
|
||||
|
||||
self.parser.insert_option_group(0, index_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def _build_package_finder(
|
||||
self, options: Values, session: PipSession
|
||||
) -> PackageFinder:
|
||||
"""
|
||||
Create a package finder appropriate to this list command.
|
||||
"""
|
||||
link_collector = LinkCollector.create(session, options=options)
|
||||
|
||||
# Pass allow_yanked=False to ignore yanked versions.
|
||||
selection_prefs = SelectionPreferences(
|
||||
allow_yanked=False,
|
||||
allow_all_prereleases=options.pre,
|
||||
)
|
||||
|
||||
return PackageFinder.create(
|
||||
link_collector=link_collector,
|
||||
selection_prefs=selection_prefs,
|
||||
)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
if options.outdated and options.uptodate:
|
||||
raise CommandError("Options --outdated and --uptodate cannot be combined.")
|
||||
|
||||
if options.outdated and options.list_format == "freeze":
|
||||
raise CommandError(
|
||||
"List format 'freeze' cannot be used with the --outdated option."
|
||||
)
|
||||
|
||||
cmdoptions.check_list_path_option(options)
|
||||
|
||||
skip = set(stdlib_pkgs)
|
||||
if options.excludes:
|
||||
skip.update(canonicalize_name(n) for n in options.excludes)
|
||||
|
||||
packages: "_ProcessedDists" = [
|
||||
cast("_DistWithLatestInfo", d)
|
||||
for d in get_environment(options.path).iter_installed_distributions(
|
||||
local_only=options.local,
|
||||
user_only=options.user,
|
||||
editables_only=options.editable,
|
||||
include_editables=options.include_editable,
|
||||
skip=skip,
|
||||
)
|
||||
]
|
||||
|
||||
# get_not_required must be called firstly in order to find and
|
||||
# filter out all dependencies correctly. Otherwise a package
|
||||
# can't be identified as requirement because some parent packages
|
||||
# could be filtered out before.
|
||||
if options.not_required:
|
||||
packages = self.get_not_required(packages, options)
|
||||
|
||||
if options.outdated:
|
||||
packages = self.get_outdated(packages, options)
|
||||
elif options.uptodate:
|
||||
packages = self.get_uptodate(packages, options)
|
||||
|
||||
self.output_package_listing(packages, options)
|
||||
return SUCCESS
|
||||
|
||||
def get_outdated(
|
||||
self, packages: "_ProcessedDists", options: Values
|
||||
) -> "_ProcessedDists":
|
||||
return [
|
||||
dist
|
||||
for dist in self.iter_packages_latest_infos(packages, options)
|
||||
if parse(str(dist.latest_version)) > parse(str(dist.version))
|
||||
]
|
||||
|
||||
def get_uptodate(
|
||||
self, packages: "_ProcessedDists", options: Values
|
||||
) -> "_ProcessedDists":
|
||||
return [
|
||||
dist
|
||||
for dist in self.iter_packages_latest_infos(packages, options)
|
||||
if parse(str(dist.latest_version)) == parse(str(dist.version))
|
||||
]
|
||||
|
||||
def get_not_required(
|
||||
self, packages: "_ProcessedDists", options: Values
|
||||
) -> "_ProcessedDists":
|
||||
dep_keys = {
|
||||
canonicalize_name(dep.name)
|
||||
for dist in packages
|
||||
for dep in (dist.iter_dependencies() or ())
|
||||
}
|
||||
|
||||
# Create a set to remove duplicate packages, and cast it to a list
|
||||
# to keep the return type consistent with get_outdated and
|
||||
# get_uptodate
|
||||
return list({pkg for pkg in packages if pkg.canonical_name not in dep_keys})
|
||||
|
||||
def iter_packages_latest_infos(
|
||||
self, packages: "_ProcessedDists", options: Values
|
||||
) -> Generator["_DistWithLatestInfo", None, None]:
|
||||
with self._build_session(options) as session:
|
||||
finder = self._build_package_finder(options, session)
|
||||
|
||||
def latest_info(
|
||||
dist: "_DistWithLatestInfo",
|
||||
) -> Optional["_DistWithLatestInfo"]:
|
||||
all_candidates = finder.find_all_candidates(dist.canonical_name)
|
||||
if not options.pre:
|
||||
# Remove prereleases
|
||||
all_candidates = [
|
||||
candidate
|
||||
for candidate in all_candidates
|
||||
if not candidate.version.is_prerelease
|
||||
]
|
||||
|
||||
evaluator = finder.make_candidate_evaluator(
|
||||
project_name=dist.canonical_name,
|
||||
)
|
||||
best_candidate = evaluator.sort_best_candidate(all_candidates)
|
||||
if best_candidate is None:
|
||||
return None
|
||||
|
||||
remote_version = best_candidate.version
|
||||
if best_candidate.link.is_wheel:
|
||||
typ = "wheel"
|
||||
else:
|
||||
typ = "sdist"
|
||||
dist.latest_version = remote_version
|
||||
dist.latest_filetype = typ
|
||||
return dist
|
||||
|
||||
for dist in map(latest_info, packages):
|
||||
if dist is not None:
|
||||
yield dist
|
||||
|
||||
def output_package_listing(
|
||||
self, packages: "_ProcessedDists", options: Values
|
||||
) -> None:
|
||||
packages = sorted(
|
||||
packages,
|
||||
key=lambda dist: dist.canonical_name,
|
||||
)
|
||||
if options.list_format == "columns" and packages:
|
||||
data, header = format_for_columns(packages, options)
|
||||
self.output_package_listing_columns(data, header)
|
||||
elif options.list_format == "freeze":
|
||||
for dist in packages:
|
||||
if options.verbose >= 1:
|
||||
write_output(
|
||||
"%s==%s (%s)", dist.raw_name, dist.version, dist.location
|
||||
)
|
||||
else:
|
||||
write_output("%s==%s", dist.raw_name, dist.version)
|
||||
elif options.list_format == "json":
|
||||
write_output(format_for_json(packages, options))
|
||||
|
||||
def output_package_listing_columns(
|
||||
self, data: List[List[str]], header: List[str]
|
||||
) -> None:
|
||||
# insert the header first: we need to know the size of column names
|
||||
if len(data) > 0:
|
||||
data.insert(0, header)
|
||||
|
||||
pkg_strings, sizes = tabulate(data)
|
||||
|
||||
# Create and add a separator.
|
||||
if len(data) > 0:
|
||||
pkg_strings.insert(1, " ".join("-" * x for x in sizes))
|
||||
|
||||
for val in pkg_strings:
|
||||
write_output(val)
|
||||
|
||||
|
||||
def format_for_columns(
|
||||
pkgs: "_ProcessedDists", options: Values
|
||||
) -> Tuple[List[List[str]], List[str]]:
|
||||
"""
|
||||
Convert the package data into something usable
|
||||
by output_package_listing_columns.
|
||||
"""
|
||||
header = ["Package", "Version"]
|
||||
|
||||
running_outdated = options.outdated
|
||||
if running_outdated:
|
||||
header.extend(["Latest", "Type"])
|
||||
|
||||
has_editables = any(x.editable for x in pkgs)
|
||||
if has_editables:
|
||||
header.append("Editable project location")
|
||||
|
||||
if options.verbose >= 1:
|
||||
header.append("Location")
|
||||
if options.verbose >= 1:
|
||||
header.append("Installer")
|
||||
|
||||
data = []
|
||||
for proj in pkgs:
|
||||
# if we're working on the 'outdated' list, separate out the
|
||||
# latest_version and type
|
||||
row = [proj.raw_name, str(proj.version)]
|
||||
|
||||
if running_outdated:
|
||||
row.append(str(proj.latest_version))
|
||||
row.append(proj.latest_filetype)
|
||||
|
||||
if has_editables:
|
||||
row.append(proj.editable_project_location or "")
|
||||
|
||||
if options.verbose >= 1:
|
||||
row.append(proj.location or "")
|
||||
if options.verbose >= 1:
|
||||
row.append(proj.installer)
|
||||
|
||||
data.append(row)
|
||||
|
||||
return data, header
|
||||
|
||||
|
||||
def format_for_json(packages: "_ProcessedDists", options: Values) -> str:
|
||||
data = []
|
||||
for dist in packages:
|
||||
info = {
|
||||
"name": dist.raw_name,
|
||||
"version": str(dist.version),
|
||||
}
|
||||
if options.verbose >= 1:
|
||||
info["location"] = dist.location or ""
|
||||
info["installer"] = dist.installer
|
||||
if options.outdated:
|
||||
info["latest_version"] = str(dist.latest_version)
|
||||
info["latest_filetype"] = dist.latest_filetype
|
||||
editable_project_location = dist.editable_project_location
|
||||
if editable_project_location:
|
||||
info["editable_project_location"] = editable_project_location
|
||||
data.append(info)
|
||||
return json.dumps(data)
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
import logging
|
||||
import shutil
|
||||
import sys
|
||||
import textwrap
|
||||
import xmlrpc.client
|
||||
from collections import OrderedDict
|
||||
from optparse import Values
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional
|
||||
|
||||
from pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.req_command import SessionCommandMixin
|
||||
from pip._internal.cli.status_codes import NO_MATCHES_FOUND, SUCCESS
|
||||
from pip._internal.exceptions import CommandError
|
||||
from pip._internal.metadata import get_default_environment
|
||||
from pip._internal.models.index import PyPI
|
||||
from pip._internal.network.xmlrpc import PipXmlrpcTransport
|
||||
from pip._internal.utils.logging import indent_log
|
||||
from pip._internal.utils.misc import write_output
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import TypedDict
|
||||
|
||||
class TransformedHit(TypedDict):
|
||||
name: str
|
||||
summary: str
|
||||
versions: List[str]
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SearchCommand(Command, SessionCommandMixin):
|
||||
"""Search for PyPI packages whose name or summary contains <query>."""
|
||||
|
||||
usage = """
|
||||
%prog [options] <query>"""
|
||||
ignore_require_venv = True
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-i",
|
||||
"--index",
|
||||
dest="index",
|
||||
metavar="URL",
|
||||
default=PyPI.pypi_url,
|
||||
help="Base URL of Python Package Index (default %default)",
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
if not args:
|
||||
raise CommandError("Missing required argument (search query).")
|
||||
query = args
|
||||
pypi_hits = self.search(query, options)
|
||||
hits = transform_hits(pypi_hits)
|
||||
|
||||
terminal_width = None
|
||||
if sys.stdout.isatty():
|
||||
terminal_width = shutil.get_terminal_size()[0]
|
||||
|
||||
print_results(hits, terminal_width=terminal_width)
|
||||
if pypi_hits:
|
||||
return SUCCESS
|
||||
return NO_MATCHES_FOUND
|
||||
|
||||
def search(self, query: List[str], options: Values) -> List[Dict[str, str]]:
|
||||
index_url = options.index
|
||||
|
||||
session = self.get_default_session(options)
|
||||
|
||||
transport = PipXmlrpcTransport(index_url, session)
|
||||
pypi = xmlrpc.client.ServerProxy(index_url, transport)
|
||||
try:
|
||||
hits = pypi.search({"name": query, "summary": query}, "or")
|
||||
except xmlrpc.client.Fault as fault:
|
||||
message = "XMLRPC request failed [code: {code}]\n{string}".format(
|
||||
code=fault.faultCode,
|
||||
string=fault.faultString,
|
||||
)
|
||||
raise CommandError(message)
|
||||
assert isinstance(hits, list)
|
||||
return hits
|
||||
|
||||
|
||||
def transform_hits(hits: List[Dict[str, str]]) -> List["TransformedHit"]:
|
||||
"""
|
||||
The list from pypi is really a list of versions. We want a list of
|
||||
packages with the list of versions stored inline. This converts the
|
||||
list from pypi into one we can use.
|
||||
"""
|
||||
packages: Dict[str, "TransformedHit"] = OrderedDict()
|
||||
for hit in hits:
|
||||
name = hit["name"]
|
||||
summary = hit["summary"]
|
||||
version = hit["version"]
|
||||
|
||||
if name not in packages.keys():
|
||||
packages[name] = {
|
||||
"name": name,
|
||||
"summary": summary,
|
||||
"versions": [version],
|
||||
}
|
||||
else:
|
||||
packages[name]["versions"].append(version)
|
||||
|
||||
# if this is the highest version, replace summary and score
|
||||
if version == highest_version(packages[name]["versions"]):
|
||||
packages[name]["summary"] = summary
|
||||
|
||||
return list(packages.values())
|
||||
|
||||
|
||||
def print_dist_installation_info(name: str, latest: str) -> None:
|
||||
env = get_default_environment()
|
||||
dist = env.get_distribution(name)
|
||||
if dist is not None:
|
||||
with indent_log():
|
||||
if dist.version == latest:
|
||||
write_output("INSTALLED: %s (latest)", dist.version)
|
||||
else:
|
||||
write_output("INSTALLED: %s", dist.version)
|
||||
if parse_version(latest).pre:
|
||||
write_output(
|
||||
"LATEST: %s (pre-release; install"
|
||||
" with `pip install --pre`)",
|
||||
latest,
|
||||
)
|
||||
else:
|
||||
write_output("LATEST: %s", latest)
|
||||
|
||||
|
||||
def print_results(
|
||||
hits: List["TransformedHit"],
|
||||
name_column_width: Optional[int] = None,
|
||||
terminal_width: Optional[int] = None,
|
||||
) -> None:
|
||||
if not hits:
|
||||
return
|
||||
if name_column_width is None:
|
||||
name_column_width = (
|
||||
max(
|
||||
[
|
||||
len(hit["name"]) + len(highest_version(hit.get("versions", ["-"])))
|
||||
for hit in hits
|
||||
]
|
||||
)
|
||||
+ 4
|
||||
)
|
||||
|
||||
for hit in hits:
|
||||
name = hit["name"]
|
||||
summary = hit["summary"] or ""
|
||||
latest = highest_version(hit.get("versions", ["-"]))
|
||||
if terminal_width is not None:
|
||||
target_width = terminal_width - name_column_width - 5
|
||||
if target_width > 10:
|
||||
# wrap and indent summary to fit terminal
|
||||
summary_lines = textwrap.wrap(summary, target_width)
|
||||
summary = ("\n" + " " * (name_column_width + 3)).join(summary_lines)
|
||||
|
||||
name_latest = f"{name} ({latest})"
|
||||
line = f"{name_latest:{name_column_width}} - {summary}"
|
||||
try:
|
||||
write_output(line)
|
||||
print_dist_installation_info(name, latest)
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
|
||||
def highest_version(versions: List[str]) -> str:
|
||||
return max(versions, key=parse_version)
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
import logging
|
||||
from optparse import Values
|
||||
from typing import Generator, Iterable, Iterator, List, NamedTuple, Optional
|
||||
|
||||
from pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pip._internal.metadata import BaseDistribution, get_default_environment
|
||||
from pip._internal.utils.misc import write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ShowCommand(Command):
|
||||
"""
|
||||
Show information about one or more installed packages.
|
||||
|
||||
The output is in RFC-compliant mail header format.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options] <package> ..."""
|
||||
ignore_require_venv = True
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-f",
|
||||
"--files",
|
||||
dest="files",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Show the full list of installed files for each package.",
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
if not args:
|
||||
logger.warning("ERROR: Please provide a package name or names.")
|
||||
return ERROR
|
||||
query = args
|
||||
|
||||
results = search_packages_info(query)
|
||||
if not print_results(
|
||||
results, list_files=options.files, verbose=options.verbose
|
||||
):
|
||||
return ERROR
|
||||
return SUCCESS
|
||||
|
||||
|
||||
class _PackageInfo(NamedTuple):
|
||||
name: str
|
||||
version: str
|
||||
location: str
|
||||
editable_project_location: Optional[str]
|
||||
requires: List[str]
|
||||
required_by: List[str]
|
||||
installer: str
|
||||
metadata_version: str
|
||||
classifiers: List[str]
|
||||
summary: str
|
||||
homepage: str
|
||||
project_urls: List[str]
|
||||
author: str
|
||||
author_email: str
|
||||
license: str
|
||||
entry_points: List[str]
|
||||
files: Optional[List[str]]
|
||||
|
||||
|
||||
def search_packages_info(query: List[str]) -> Generator[_PackageInfo, None, None]:
|
||||
"""
|
||||
Gather details from installed distributions. Print distribution name,
|
||||
version, location, and installed files. Installed files requires a
|
||||
pip generated 'installed-files.txt' in the distributions '.egg-info'
|
||||
directory.
|
||||
"""
|
||||
env = get_default_environment()
|
||||
|
||||
installed = {dist.canonical_name: dist for dist in env.iter_all_distributions()}
|
||||
query_names = [canonicalize_name(name) for name in query]
|
||||
missing = sorted(
|
||||
[name for name, pkg in zip(query, query_names) if pkg not in installed]
|
||||
)
|
||||
if missing:
|
||||
logger.warning("Package(s) not found: %s", ", ".join(missing))
|
||||
|
||||
def _get_requiring_packages(current_dist: BaseDistribution) -> Iterator[str]:
|
||||
return (
|
||||
dist.metadata["Name"] or "UNKNOWN"
|
||||
for dist in installed.values()
|
||||
if current_dist.canonical_name
|
||||
in {canonicalize_name(d.name) for d in dist.iter_dependencies()}
|
||||
)
|
||||
|
||||
for query_name in query_names:
|
||||
try:
|
||||
dist = installed[query_name]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
requires = sorted((req.name for req in dist.iter_dependencies()), key=str.lower)
|
||||
required_by = sorted(_get_requiring_packages(dist), key=str.lower)
|
||||
|
||||
try:
|
||||
entry_points_text = dist.read_text("entry_points.txt")
|
||||
entry_points = entry_points_text.splitlines(keepends=False)
|
||||
except FileNotFoundError:
|
||||
entry_points = []
|
||||
|
||||
files_iter = dist.iter_declared_entries()
|
||||
if files_iter is None:
|
||||
files: Optional[List[str]] = None
|
||||
else:
|
||||
files = sorted(files_iter)
|
||||
|
||||
metadata = dist.metadata
|
||||
|
||||
yield _PackageInfo(
|
||||
name=dist.raw_name,
|
||||
version=str(dist.version),
|
||||
location=dist.location or "",
|
||||
editable_project_location=dist.editable_project_location,
|
||||
requires=requires,
|
||||
required_by=required_by,
|
||||
installer=dist.installer,
|
||||
metadata_version=dist.metadata_version or "",
|
||||
classifiers=metadata.get_all("Classifier", []),
|
||||
summary=metadata.get("Summary", ""),
|
||||
homepage=metadata.get("Home-page", ""),
|
||||
project_urls=metadata.get_all("Project-URL", []),
|
||||
author=metadata.get("Author", ""),
|
||||
author_email=metadata.get("Author-email", ""),
|
||||
license=metadata.get("License", ""),
|
||||
entry_points=entry_points,
|
||||
files=files,
|
||||
)
|
||||
|
||||
|
||||
def print_results(
|
||||
distributions: Iterable[_PackageInfo],
|
||||
list_files: bool,
|
||||
verbose: bool,
|
||||
) -> bool:
|
||||
"""
|
||||
Print the information from installed distributions found.
|
||||
"""
|
||||
results_printed = False
|
||||
for i, dist in enumerate(distributions):
|
||||
results_printed = True
|
||||
if i > 0:
|
||||
write_output("---")
|
||||
|
||||
write_output("Name: %s", dist.name)
|
||||
write_output("Version: %s", dist.version)
|
||||
write_output("Summary: %s", dist.summary)
|
||||
write_output("Home-page: %s", dist.homepage)
|
||||
write_output("Author: %s", dist.author)
|
||||
write_output("Author-email: %s", dist.author_email)
|
||||
write_output("License: %s", dist.license)
|
||||
write_output("Location: %s", dist.location)
|
||||
if dist.editable_project_location is not None:
|
||||
write_output(
|
||||
"Editable project location: %s", dist.editable_project_location
|
||||
)
|
||||
write_output("Requires: %s", ", ".join(dist.requires))
|
||||
write_output("Required-by: %s", ", ".join(dist.required_by))
|
||||
|
||||
if verbose:
|
||||
write_output("Metadata-Version: %s", dist.metadata_version)
|
||||
write_output("Installer: %s", dist.installer)
|
||||
write_output("Classifiers:")
|
||||
for classifier in dist.classifiers:
|
||||
write_output(" %s", classifier)
|
||||
write_output("Entry-points:")
|
||||
for entry in dist.entry_points:
|
||||
write_output(" %s", entry.strip())
|
||||
write_output("Project-URLs:")
|
||||
for project_url in dist.project_urls:
|
||||
write_output(" %s", project_url)
|
||||
if list_files:
|
||||
write_output("Files:")
|
||||
if dist.files is None:
|
||||
write_output("Cannot locate RECORD or installed-files.txt")
|
||||
else:
|
||||
for line in dist.files:
|
||||
write_output(" %s", line.strip())
|
||||
return results_printed
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
import logging
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.base_command import Command
|
||||
from pip._internal.cli.req_command import SessionCommandMixin, warn_if_run_as_root
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.exceptions import InstallationError
|
||||
from pip._internal.req import parse_requirements
|
||||
from pip._internal.req.constructors import (
|
||||
install_req_from_line,
|
||||
install_req_from_parsed_requirement,
|
||||
)
|
||||
from pip._internal.utils.misc import (
|
||||
check_externally_managed,
|
||||
protect_pip_from_modification_on_windows,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UninstallCommand(Command, SessionCommandMixin):
|
||||
"""
|
||||
Uninstall packages.
|
||||
|
||||
pip is able to uninstall most installed packages. Known exceptions are:
|
||||
|
||||
- Pure distutils packages installed with ``python setup.py install``, which
|
||||
leave behind no metadata to determine what files were installed.
|
||||
- Script wrappers installed by ``python setup.py develop``.
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options] <package> ...
|
||||
%prog [options] -r <requirements file> ..."""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-r",
|
||||
"--requirement",
|
||||
dest="requirements",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="file",
|
||||
help=(
|
||||
"Uninstall all the packages listed in the given requirements "
|
||||
"file. This option can be used multiple times."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(
|
||||
"-y",
|
||||
"--yes",
|
||||
dest="yes",
|
||||
action="store_true",
|
||||
help="Don't ask for confirmation of uninstall deletions.",
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.root_user_action())
|
||||
self.cmd_opts.add_option(cmdoptions.override_externally_managed())
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
session = self.get_default_session(options)
|
||||
|
||||
reqs_to_uninstall = {}
|
||||
for name in args:
|
||||
req = install_req_from_line(
|
||||
name,
|
||||
isolated=options.isolated_mode,
|
||||
)
|
||||
if req.name:
|
||||
reqs_to_uninstall[canonicalize_name(req.name)] = req
|
||||
else:
|
||||
logger.warning(
|
||||
"Invalid requirement: %r ignored -"
|
||||
" the uninstall command expects named"
|
||||
" requirements.",
|
||||
name,
|
||||
)
|
||||
for filename in options.requirements:
|
||||
for parsed_req in parse_requirements(
|
||||
filename, options=options, session=session
|
||||
):
|
||||
req = install_req_from_parsed_requirement(
|
||||
parsed_req, isolated=options.isolated_mode
|
||||
)
|
||||
if req.name:
|
||||
reqs_to_uninstall[canonicalize_name(req.name)] = req
|
||||
if not reqs_to_uninstall:
|
||||
raise InstallationError(
|
||||
f"You must give at least one requirement to {self.name} (see "
|
||||
f'"pip help {self.name}")'
|
||||
)
|
||||
|
||||
if not options.override_externally_managed:
|
||||
check_externally_managed()
|
||||
|
||||
protect_pip_from_modification_on_windows(
|
||||
modifying_pip="pip" in reqs_to_uninstall
|
||||
)
|
||||
|
||||
for req in reqs_to_uninstall.values():
|
||||
uninstall_pathset = req.uninstall(
|
||||
auto_confirm=options.yes,
|
||||
verbose=self.verbosity > 0,
|
||||
)
|
||||
if uninstall_pathset:
|
||||
uninstall_pathset.commit()
|
||||
if options.root_user_action == "warn":
|
||||
warn_if_run_as_root()
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pip._internal.cache import WheelCache
|
||||
from pip._internal.cli import cmdoptions
|
||||
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
|
||||
from pip._internal.cli.status_codes import SUCCESS
|
||||
from pip._internal.exceptions import CommandError
|
||||
from pip._internal.operations.build.build_tracker import get_build_tracker
|
||||
from pip._internal.req.req_install import (
|
||||
InstallRequirement,
|
||||
check_legacy_setup_py_options,
|
||||
)
|
||||
from pip._internal.utils.misc import ensure_dir, normalize_path
|
||||
from pip._internal.utils.temp_dir import TempDirectory
|
||||
from pip._internal.wheel_builder import build, should_build_for_wheel_command
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WheelCommand(RequirementCommand):
|
||||
"""
|
||||
Build Wheel archives for your requirements and dependencies.
|
||||
|
||||
Wheel is a built-package format, and offers the advantage of not
|
||||
recompiling your software during every install. For more details, see the
|
||||
wheel docs: https://wheel.readthedocs.io/en/latest/
|
||||
|
||||
'pip wheel' uses the build system interface as described here:
|
||||
https://pip.pypa.io/en/stable/reference/build-system/
|
||||
|
||||
"""
|
||||
|
||||
usage = """
|
||||
%prog [options] <requirement specifier> ...
|
||||
%prog [options] -r <requirements file> ...
|
||||
%prog [options] [-e] <vcs project url> ...
|
||||
%prog [options] [-e] <local project path> ...
|
||||
%prog [options] <archive url/path> ..."""
|
||||
|
||||
def add_options(self) -> None:
|
||||
self.cmd_opts.add_option(
|
||||
"-w",
|
||||
"--wheel-dir",
|
||||
dest="wheel_dir",
|
||||
metavar="dir",
|
||||
default=os.curdir,
|
||||
help=(
|
||||
"Build wheels into <dir>, where the default is the "
|
||||
"current working directory."
|
||||
),
|
||||
)
|
||||
self.cmd_opts.add_option(cmdoptions.no_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.only_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.prefer_binary())
|
||||
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
|
||||
self.cmd_opts.add_option(cmdoptions.use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
|
||||
self.cmd_opts.add_option(cmdoptions.check_build_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.constraints())
|
||||
self.cmd_opts.add_option(cmdoptions.editable())
|
||||
self.cmd_opts.add_option(cmdoptions.requirements())
|
||||
self.cmd_opts.add_option(cmdoptions.src())
|
||||
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
|
||||
self.cmd_opts.add_option(cmdoptions.no_deps())
|
||||
self.cmd_opts.add_option(cmdoptions.progress_bar())
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--no-verify",
|
||||
dest="no_verify",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Don't verify if built wheel is valid.",
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.config_settings())
|
||||
self.cmd_opts.add_option(cmdoptions.build_options())
|
||||
self.cmd_opts.add_option(cmdoptions.global_options())
|
||||
|
||||
self.cmd_opts.add_option(
|
||||
"--pre",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Include pre-release and development versions. By default, "
|
||||
"pip only finds stable versions."
|
||||
),
|
||||
)
|
||||
|
||||
self.cmd_opts.add_option(cmdoptions.require_hashes())
|
||||
|
||||
index_opts = cmdoptions.make_option_group(
|
||||
cmdoptions.index_group,
|
||||
self.parser,
|
||||
)
|
||||
|
||||
self.parser.insert_option_group(0, index_opts)
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
@with_cleanup
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
session = self.get_default_session(options)
|
||||
|
||||
finder = self._build_package_finder(options, session)
|
||||
|
||||
options.wheel_dir = normalize_path(options.wheel_dir)
|
||||
ensure_dir(options.wheel_dir)
|
||||
|
||||
build_tracker = self.enter_context(get_build_tracker())
|
||||
|
||||
directory = TempDirectory(
|
||||
delete=not options.no_clean,
|
||||
kind="wheel",
|
||||
globally_managed=True,
|
||||
)
|
||||
|
||||
reqs = self.get_requirements(args, options, finder, session)
|
||||
check_legacy_setup_py_options(options, reqs)
|
||||
|
||||
wheel_cache = WheelCache(options.cache_dir)
|
||||
|
||||
preparer = self.make_requirement_preparer(
|
||||
temp_build_dir=directory,
|
||||
options=options,
|
||||
build_tracker=build_tracker,
|
||||
session=session,
|
||||
finder=finder,
|
||||
download_dir=options.wheel_dir,
|
||||
use_user_site=False,
|
||||
verbosity=self.verbosity,
|
||||
)
|
||||
|
||||
resolver = self.make_resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
options=options,
|
||||
wheel_cache=wheel_cache,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
use_pep517=options.use_pep517,
|
||||
)
|
||||
|
||||
self.trace_basic_info(finder)
|
||||
|
||||
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
|
||||
|
||||
reqs_to_build: List[InstallRequirement] = []
|
||||
for req in requirement_set.requirements.values():
|
||||
if req.is_wheel:
|
||||
preparer.save_linked_requirement(req)
|
||||
elif should_build_for_wheel_command(req):
|
||||
reqs_to_build.append(req)
|
||||
|
||||
preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
|
||||
requirement_set.warn_legacy_versions_and_specifiers()
|
||||
|
||||
# build wheels
|
||||
build_successes, build_failures = build(
|
||||
reqs_to_build,
|
||||
wheel_cache=wheel_cache,
|
||||
verify=(not options.no_verify),
|
||||
build_options=options.build_options or [],
|
||||
global_options=options.global_options or [],
|
||||
)
|
||||
for req in build_successes:
|
||||
assert req.link and req.link.is_wheel
|
||||
assert req.local_file_path
|
||||
# copy from cache to target directory
|
||||
try:
|
||||
shutil.copy(req.local_file_path, options.wheel_dir)
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"Building wheel for %s failed: %s",
|
||||
req.name,
|
||||
e,
|
||||
)
|
||||
build_failures.append(req)
|
||||
if len(build_failures) != 0:
|
||||
raise CommandError("Failed to build one or more wheels")
|
||||
|
||||
return SUCCESS
|
||||
|
|
@ -0,0 +1,383 @@
|
|||
"""Configuration management setup
|
||||
|
||||
Some terminology:
|
||||
- name
|
||||
As written in config files.
|
||||
- value
|
||||
Value associated with a name
|
||||
- key
|
||||
Name combined with it's section (section.name)
|
||||
- variant
|
||||
A single word describing where the configuration key-value pair came from
|
||||
"""
|
||||
|
||||
import configparser
|
||||
import locale
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, Iterable, List, NewType, Optional, Tuple
|
||||
|
||||
from pip._internal.exceptions import (
|
||||
ConfigurationError,
|
||||
ConfigurationFileCouldNotBeLoaded,
|
||||
)
|
||||
from pip._internal.utils import appdirs
|
||||
from pip._internal.utils.compat import WINDOWS
|
||||
from pip._internal.utils.logging import getLogger
|
||||
from pip._internal.utils.misc import ensure_dir, enum
|
||||
|
||||
RawConfigParser = configparser.RawConfigParser # Shorthand
|
||||
Kind = NewType("Kind", str)
|
||||
|
||||
CONFIG_BASENAME = "pip.ini" if WINDOWS else "pip.conf"
|
||||
ENV_NAMES_IGNORED = "version", "help"
|
||||
|
||||
# The kinds of configurations there are.
|
||||
kinds = enum(
|
||||
USER="user", # User Specific
|
||||
GLOBAL="global", # System Wide
|
||||
SITE="site", # [Virtual] Environment Specific
|
||||
ENV="env", # from PIP_CONFIG_FILE
|
||||
ENV_VAR="env-var", # from Environment Variables
|
||||
)
|
||||
OVERRIDE_ORDER = kinds.GLOBAL, kinds.USER, kinds.SITE, kinds.ENV, kinds.ENV_VAR
|
||||
VALID_LOAD_ONLY = kinds.USER, kinds.GLOBAL, kinds.SITE
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE: Maybe use the optionx attribute to normalize keynames.
|
||||
def _normalize_name(name: str) -> str:
|
||||
"""Make a name consistent regardless of source (environment or file)"""
|
||||
name = name.lower().replace("_", "-")
|
||||
if name.startswith("--"):
|
||||
name = name[2:] # only prefer long opts
|
||||
return name
|
||||
|
||||
|
||||
def _disassemble_key(name: str) -> List[str]:
|
||||
if "." not in name:
|
||||
error_message = (
|
||||
"Key does not contain dot separated section and key. "
|
||||
f"Perhaps you wanted to use 'global.{name}' instead?"
|
||||
)
|
||||
raise ConfigurationError(error_message)
|
||||
return name.split(".", 1)
|
||||
|
||||
|
||||
def get_configuration_files() -> Dict[Kind, List[str]]:
|
||||
global_config_files = [
|
||||
os.path.join(path, CONFIG_BASENAME) for path in appdirs.site_config_dirs("pip")
|
||||
]
|
||||
|
||||
site_config_file = os.path.join(sys.prefix, CONFIG_BASENAME)
|
||||
legacy_config_file = os.path.join(
|
||||
os.path.expanduser("~"),
|
||||
"pip" if WINDOWS else ".pip",
|
||||
CONFIG_BASENAME,
|
||||
)
|
||||
new_config_file = os.path.join(appdirs.user_config_dir("pip"), CONFIG_BASENAME)
|
||||
return {
|
||||
kinds.GLOBAL: global_config_files,
|
||||
kinds.SITE: [site_config_file],
|
||||
kinds.USER: [legacy_config_file, new_config_file],
|
||||
}
|
||||
|
||||
|
||||
class Configuration:
|
||||
"""Handles management of configuration.
|
||||
|
||||
Provides an interface to accessing and managing configuration files.
|
||||
|
||||
This class converts provides an API that takes "section.key-name" style
|
||||
keys and stores the value associated with it as "key-name" under the
|
||||
section "section".
|
||||
|
||||
This allows for a clean interface wherein the both the section and the
|
||||
key-name are preserved in an easy to manage form in the configuration files
|
||||
and the data stored is also nice.
|
||||
"""
|
||||
|
||||
def __init__(self, isolated: bool, load_only: Optional[Kind] = None) -> None:
|
||||
super().__init__()
|
||||
|
||||
if load_only is not None and load_only not in VALID_LOAD_ONLY:
|
||||
raise ConfigurationError(
|
||||
"Got invalid value for load_only - should be one of {}".format(
|
||||
", ".join(map(repr, VALID_LOAD_ONLY))
|
||||
)
|
||||
)
|
||||
self.isolated = isolated
|
||||
self.load_only = load_only
|
||||
|
||||
# Because we keep track of where we got the data from
|
||||
self._parsers: Dict[Kind, List[Tuple[str, RawConfigParser]]] = {
|
||||
variant: [] for variant in OVERRIDE_ORDER
|
||||
}
|
||||
self._config: Dict[Kind, Dict[str, Any]] = {
|
||||
variant: {} for variant in OVERRIDE_ORDER
|
||||
}
|
||||
self._modified_parsers: List[Tuple[str, RawConfigParser]] = []
|
||||
|
||||
def load(self) -> None:
|
||||
"""Loads configuration from configuration files and environment"""
|
||||
self._load_config_files()
|
||||
if not self.isolated:
|
||||
self._load_environment_vars()
|
||||
|
||||
def get_file_to_edit(self) -> Optional[str]:
|
||||
"""Returns the file with highest priority in configuration"""
|
||||
assert self.load_only is not None, "Need to be specified a file to be editing"
|
||||
|
||||
try:
|
||||
return self._get_parser_to_modify()[0]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def items(self) -> Iterable[Tuple[str, Any]]:
|
||||
"""Returns key-value pairs like dict.items() representing the loaded
|
||||
configuration
|
||||
"""
|
||||
return self._dictionary.items()
|
||||
|
||||
def get_value(self, key: str) -> Any:
|
||||
"""Get a value from the configuration."""
|
||||
orig_key = key
|
||||
key = _normalize_name(key)
|
||||
try:
|
||||
return self._dictionary[key]
|
||||
except KeyError:
|
||||
# disassembling triggers a more useful error message than simply
|
||||
# "No such key" in the case that the key isn't in the form command.option
|
||||
_disassemble_key(key)
|
||||
raise ConfigurationError(f"No such key - {orig_key}")
|
||||
|
||||
def set_value(self, key: str, value: Any) -> None:
|
||||
"""Modify a value in the configuration."""
|
||||
key = _normalize_name(key)
|
||||
self._ensure_have_load_only()
|
||||
|
||||
assert self.load_only
|
||||
fname, parser = self._get_parser_to_modify()
|
||||
|
||||
if parser is not None:
|
||||
section, name = _disassemble_key(key)
|
||||
|
||||
# Modify the parser and the configuration
|
||||
if not parser.has_section(section):
|
||||
parser.add_section(section)
|
||||
parser.set(section, name, value)
|
||||
|
||||
self._config[self.load_only][key] = value
|
||||
self._mark_as_modified(fname, parser)
|
||||
|
||||
def unset_value(self, key: str) -> None:
|
||||
"""Unset a value in the configuration."""
|
||||
orig_key = key
|
||||
key = _normalize_name(key)
|
||||
self._ensure_have_load_only()
|
||||
|
||||
assert self.load_only
|
||||
if key not in self._config[self.load_only]:
|
||||
raise ConfigurationError(f"No such key - {orig_key}")
|
||||
|
||||
fname, parser = self._get_parser_to_modify()
|
||||
|
||||
if parser is not None:
|
||||
section, name = _disassemble_key(key)
|
||||
if not (
|
||||
parser.has_section(section) and parser.remove_option(section, name)
|
||||
):
|
||||
# The option was not removed.
|
||||
raise ConfigurationError(
|
||||
"Fatal Internal error [id=1]. Please report as a bug."
|
||||
)
|
||||
|
||||
# The section may be empty after the option was removed.
|
||||
if not parser.items(section):
|
||||
parser.remove_section(section)
|
||||
self._mark_as_modified(fname, parser)
|
||||
|
||||
del self._config[self.load_only][key]
|
||||
|
||||
def save(self) -> None:
|
||||
"""Save the current in-memory state."""
|
||||
self._ensure_have_load_only()
|
||||
|
||||
for fname, parser in self._modified_parsers:
|
||||
logger.info("Writing to %s", fname)
|
||||
|
||||
# Ensure directory exists.
|
||||
ensure_dir(os.path.dirname(fname))
|
||||
|
||||
# Ensure directory's permission(need to be writeable)
|
||||
try:
|
||||
with open(fname, "w") as f:
|
||||
parser.write(f)
|
||||
except OSError as error:
|
||||
raise ConfigurationError(
|
||||
f"An error occurred while writing to the configuration file "
|
||||
f"{fname}: {error}"
|
||||
)
|
||||
|
||||
#
|
||||
# Private routines
|
||||
#
|
||||
|
||||
def _ensure_have_load_only(self) -> None:
|
||||
if self.load_only is None:
|
||||
raise ConfigurationError("Needed a specific file to be modifying.")
|
||||
logger.debug("Will be working with %s variant only", self.load_only)
|
||||
|
||||
@property
|
||||
def _dictionary(self) -> Dict[str, Any]:
|
||||
"""A dictionary representing the loaded configuration."""
|
||||
# NOTE: Dictionaries are not populated if not loaded. So, conditionals
|
||||
# are not needed here.
|
||||
retval = {}
|
||||
|
||||
for variant in OVERRIDE_ORDER:
|
||||
retval.update(self._config[variant])
|
||||
|
||||
return retval
|
||||
|
||||
def _load_config_files(self) -> None:
|
||||
"""Loads configuration from configuration files"""
|
||||
config_files = dict(self.iter_config_files())
|
||||
if config_files[kinds.ENV][0:1] == [os.devnull]:
|
||||
logger.debug(
|
||||
"Skipping loading configuration files due to "
|
||||
"environment's PIP_CONFIG_FILE being os.devnull"
|
||||
)
|
||||
return
|
||||
|
||||
for variant, files in config_files.items():
|
||||
for fname in files:
|
||||
# If there's specific variant set in `load_only`, load only
|
||||
# that variant, not the others.
|
||||
if self.load_only is not None and variant != self.load_only:
|
||||
logger.debug("Skipping file '%s' (variant: %s)", fname, variant)
|
||||
continue
|
||||
|
||||
parser = self._load_file(variant, fname)
|
||||
|
||||
# Keeping track of the parsers used
|
||||
self._parsers[variant].append((fname, parser))
|
||||
|
||||
def _load_file(self, variant: Kind, fname: str) -> RawConfigParser:
|
||||
logger.verbose("For variant '%s', will try loading '%s'", variant, fname)
|
||||
parser = self._construct_parser(fname)
|
||||
|
||||
for section in parser.sections():
|
||||
items = parser.items(section)
|
||||
self._config[variant].update(self._normalized_keys(section, items))
|
||||
|
||||
return parser
|
||||
|
||||
def _construct_parser(self, fname: str) -> RawConfigParser:
|
||||
parser = configparser.RawConfigParser()
|
||||
# If there is no such file, don't bother reading it but create the
|
||||
# parser anyway, to hold the data.
|
||||
# Doing this is useful when modifying and saving files, where we don't
|
||||
# need to construct a parser.
|
||||
if os.path.exists(fname):
|
||||
locale_encoding = locale.getpreferredencoding(False)
|
||||
try:
|
||||
parser.read(fname, encoding=locale_encoding)
|
||||
except UnicodeDecodeError:
|
||||
# See https://github.com/pypa/pip/issues/4963
|
||||
raise ConfigurationFileCouldNotBeLoaded(
|
||||
reason=f"contains invalid {locale_encoding} characters",
|
||||
fname=fname,
|
||||
)
|
||||
except configparser.Error as error:
|
||||
# See https://github.com/pypa/pip/issues/4893
|
||||
raise ConfigurationFileCouldNotBeLoaded(error=error)
|
||||
return parser
|
||||
|
||||
def _load_environment_vars(self) -> None:
|
||||
"""Loads configuration from environment variables"""
|
||||
self._config[kinds.ENV_VAR].update(
|
||||
self._normalized_keys(":env:", self.get_environ_vars())
|
||||
)
|
||||
|
||||
def _normalized_keys(
|
||||
self, section: str, items: Iterable[Tuple[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""Normalizes items to construct a dictionary with normalized keys.
|
||||
|
||||
This routine is where the names become keys and are made the same
|
||||
regardless of source - configuration files or environment.
|
||||
"""
|
||||
normalized = {}
|
||||
for name, val in items:
|
||||
key = section + "." + _normalize_name(name)
|
||||
normalized[key] = val
|
||||
return normalized
|
||||
|
||||
def get_environ_vars(self) -> Iterable[Tuple[str, str]]:
|
||||
"""Returns a generator with all environmental vars with prefix PIP_"""
|
||||
for key, val in os.environ.items():
|
||||
if key.startswith("PIP_"):
|
||||
name = key[4:].lower()
|
||||
if name not in ENV_NAMES_IGNORED:
|
||||
yield name, val
|
||||
|
||||
# XXX: This is patched in the tests.
|
||||
def iter_config_files(self) -> Iterable[Tuple[Kind, List[str]]]:
|
||||
"""Yields variant and configuration files associated with it.
|
||||
|
||||
This should be treated like items of a dictionary. The order
|
||||
here doesn't affect what gets overridden. That is controlled
|
||||
by OVERRIDE_ORDER. However this does control the order they are
|
||||
displayed to the user. It's probably most ergononmic to display
|
||||
things in the same order as OVERRIDE_ORDER
|
||||
"""
|
||||
# SMELL: Move the conditions out of this function
|
||||
|
||||
env_config_file = os.environ.get("PIP_CONFIG_FILE", None)
|
||||
config_files = get_configuration_files()
|
||||
|
||||
yield kinds.GLOBAL, config_files[kinds.GLOBAL]
|
||||
|
||||
# per-user config is not loaded when env_config_file exists
|
||||
should_load_user_config = not self.isolated and not (
|
||||
env_config_file and os.path.exists(env_config_file)
|
||||
)
|
||||
if should_load_user_config:
|
||||
# The legacy config file is overridden by the new config file
|
||||
yield kinds.USER, config_files[kinds.USER]
|
||||
|
||||
# virtualenv config
|
||||
yield kinds.SITE, config_files[kinds.SITE]
|
||||
|
||||
if env_config_file is not None:
|
||||
yield kinds.ENV, [env_config_file]
|
||||
else:
|
||||
yield kinds.ENV, []
|
||||
|
||||
def get_values_in_config(self, variant: Kind) -> Dict[str, Any]:
|
||||
"""Get values present in a config file"""
|
||||
return self._config[variant]
|
||||
|
||||
def _get_parser_to_modify(self) -> Tuple[str, RawConfigParser]:
|
||||
# Determine which parser to modify
|
||||
assert self.load_only
|
||||
parsers = self._parsers[self.load_only]
|
||||
if not parsers:
|
||||
# This should not happen if everything works correctly.
|
||||
raise ConfigurationError(
|
||||
"Fatal Internal error [id=2]. Please report as a bug."
|
||||
)
|
||||
|
||||
# Use the highest priority parser.
|
||||
return parsers[-1]
|
||||
|
||||
# XXX: This is patched in the tests.
|
||||
def _mark_as_modified(self, fname: str, parser: RawConfigParser) -> None:
|
||||
file_parser_tuple = (fname, parser)
|
||||
if file_parser_tuple not in self._modified_parsers:
|
||||
self._modified_parsers.append(file_parser_tuple)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.__class__.__name__}({self._dictionary!r})"
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
from pip._internal.distributions.base import AbstractDistribution
|
||||
from pip._internal.distributions.sdist import SourceDistribution
|
||||
from pip._internal.distributions.wheel import WheelDistribution
|
||||
from pip._internal.req.req_install import InstallRequirement
|
||||
|
||||
|
||||
def make_distribution_for_install_requirement(
|
||||
install_req: InstallRequirement,
|
||||
) -> AbstractDistribution:
|
||||
"""Returns a Distribution for the given InstallRequirement"""
|
||||
# Editable requirements will always be source distributions. They use the
|
||||
# legacy logic until we create a modern standard for them.
|
||||
if install_req.editable:
|
||||
return SourceDistribution(install_req)
|
||||
|
||||
# If it's a wheel, it's a WheelDistribution
|
||||
if install_req.is_wheel:
|
||||
return WheelDistribution(install_req)
|
||||
|
||||
# Otherwise, a SourceDistribution
|
||||
return SourceDistribution(install_req)
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import abc
|
||||
from typing import Optional
|
||||
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.metadata.base import BaseDistribution
|
||||
from pip._internal.req import InstallRequirement
|
||||
|
||||
|
||||
class AbstractDistribution(metaclass=abc.ABCMeta):
|
||||
"""A base class for handling installable artifacts.
|
||||
|
||||
The requirements for anything installable are as follows:
|
||||
|
||||
- we must be able to determine the requirement name
|
||||
(or we can't correctly handle the non-upgrade case).
|
||||
|
||||
- for packages with setup requirements, we must also be able
|
||||
to determine their requirements without installing additional
|
||||
packages (for the same reason as run-time dependencies)
|
||||
|
||||
- we must be able to create a Distribution object exposing the
|
||||
above metadata.
|
||||
|
||||
- if we need to do work in the build tracker, we must be able to generate a unique
|
||||
string to identify the requirement in the build tracker.
|
||||
"""
|
||||
|
||||
def __init__(self, req: InstallRequirement) -> None:
|
||||
super().__init__()
|
||||
self.req = req
|
||||
|
||||
@abc.abstractproperty
|
||||
def build_tracker_id(self) -> Optional[str]:
|
||||
"""A string that uniquely identifies this requirement to the build tracker.
|
||||
|
||||
If None, then this dist has no work to do in the build tracker, and
|
||||
``.prepare_distribution_metadata()`` will not be called."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_metadata_distribution(self) -> BaseDistribution:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def prepare_distribution_metadata(
|
||||
self,
|
||||
finder: PackageFinder,
|
||||
build_isolation: bool,
|
||||
check_build_deps: bool,
|
||||
) -> None:
|
||||
raise NotImplementedError()
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
from typing import Optional
|
||||
|
||||
from pip._internal.distributions.base import AbstractDistribution
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.metadata import BaseDistribution
|
||||
|
||||
|
||||
class InstalledDistribution(AbstractDistribution):
|
||||
"""Represents an installed package.
|
||||
|
||||
This does not need any preparation as the required information has already
|
||||
been computed.
|
||||
"""
|
||||
|
||||
@property
|
||||
def build_tracker_id(self) -> Optional[str]:
|
||||
return None
|
||||
|
||||
def get_metadata_distribution(self) -> BaseDistribution:
|
||||
assert self.req.satisfied_by is not None, "not actually installed"
|
||||
return self.req.satisfied_by
|
||||
|
||||
def prepare_distribution_metadata(
|
||||
self,
|
||||
finder: PackageFinder,
|
||||
build_isolation: bool,
|
||||
check_build_deps: bool,
|
||||
) -> None:
|
||||
pass
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
import logging
|
||||
from typing import Iterable, Optional, Set, Tuple
|
||||
|
||||
from pip._internal.build_env import BuildEnvironment
|
||||
from pip._internal.distributions.base import AbstractDistribution
|
||||
from pip._internal.exceptions import InstallationError
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.metadata import BaseDistribution
|
||||
from pip._internal.utils.subprocess import runner_with_spinner_message
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SourceDistribution(AbstractDistribution):
|
||||
"""Represents a source distribution.
|
||||
|
||||
The preparation step for these needs metadata for the packages to be
|
||||
generated, either using PEP 517 or using the legacy `setup.py egg_info`.
|
||||
"""
|
||||
|
||||
@property
|
||||
def build_tracker_id(self) -> Optional[str]:
|
||||
"""Identify this requirement uniquely by its link."""
|
||||
assert self.req.link
|
||||
return self.req.link.url_without_fragment
|
||||
|
||||
def get_metadata_distribution(self) -> BaseDistribution:
|
||||
return self.req.get_dist()
|
||||
|
||||
def prepare_distribution_metadata(
|
||||
self,
|
||||
finder: PackageFinder,
|
||||
build_isolation: bool,
|
||||
check_build_deps: bool,
|
||||
) -> None:
|
||||
# Load pyproject.toml, to determine whether PEP 517 is to be used
|
||||
self.req.load_pyproject_toml()
|
||||
|
||||
# Set up the build isolation, if this requirement should be isolated
|
||||
should_isolate = self.req.use_pep517 and build_isolation
|
||||
if should_isolate:
|
||||
# Setup an isolated environment and install the build backend static
|
||||
# requirements in it.
|
||||
self._prepare_build_backend(finder)
|
||||
# Check that if the requirement is editable, it either supports PEP 660 or
|
||||
# has a setup.py or a setup.cfg. This cannot be done earlier because we need
|
||||
# to setup the build backend to verify it supports build_editable, nor can
|
||||
# it be done later, because we want to avoid installing build requirements
|
||||
# needlessly. Doing it here also works around setuptools generating
|
||||
# UNKNOWN.egg-info when running get_requires_for_build_wheel on a directory
|
||||
# without setup.py nor setup.cfg.
|
||||
self.req.isolated_editable_sanity_check()
|
||||
# Install the dynamic build requirements.
|
||||
self._install_build_reqs(finder)
|
||||
# Check if the current environment provides build dependencies
|
||||
should_check_deps = self.req.use_pep517 and check_build_deps
|
||||
if should_check_deps:
|
||||
pyproject_requires = self.req.pyproject_requires
|
||||
assert pyproject_requires is not None
|
||||
conflicting, missing = self.req.build_env.check_requirements(
|
||||
pyproject_requires
|
||||
)
|
||||
if conflicting:
|
||||
self._raise_conflicts("the backend dependencies", conflicting)
|
||||
if missing:
|
||||
self._raise_missing_reqs(missing)
|
||||
self.req.prepare_metadata()
|
||||
|
||||
def _prepare_build_backend(self, finder: PackageFinder) -> None:
|
||||
# Isolate in a BuildEnvironment and install the build-time
|
||||
# requirements.
|
||||
pyproject_requires = self.req.pyproject_requires
|
||||
assert pyproject_requires is not None
|
||||
|
||||
self.req.build_env = BuildEnvironment()
|
||||
self.req.build_env.install_requirements(
|
||||
finder, pyproject_requires, "overlay", kind="build dependencies"
|
||||
)
|
||||
conflicting, missing = self.req.build_env.check_requirements(
|
||||
self.req.requirements_to_check
|
||||
)
|
||||
if conflicting:
|
||||
self._raise_conflicts("PEP 517/518 supported requirements", conflicting)
|
||||
if missing:
|
||||
logger.warning(
|
||||
"Missing build requirements in pyproject.toml for %s.",
|
||||
self.req,
|
||||
)
|
||||
logger.warning(
|
||||
"The project does not specify a build backend, and "
|
||||
"pip cannot fall back to setuptools without %s.",
|
||||
" and ".join(map(repr, sorted(missing))),
|
||||
)
|
||||
|
||||
def _get_build_requires_wheel(self) -> Iterable[str]:
|
||||
with self.req.build_env:
|
||||
runner = runner_with_spinner_message("Getting requirements to build wheel")
|
||||
backend = self.req.pep517_backend
|
||||
assert backend is not None
|
||||
with backend.subprocess_runner(runner):
|
||||
return backend.get_requires_for_build_wheel()
|
||||
|
||||
def _get_build_requires_editable(self) -> Iterable[str]:
|
||||
with self.req.build_env:
|
||||
runner = runner_with_spinner_message(
|
||||
"Getting requirements to build editable"
|
||||
)
|
||||
backend = self.req.pep517_backend
|
||||
assert backend is not None
|
||||
with backend.subprocess_runner(runner):
|
||||
return backend.get_requires_for_build_editable()
|
||||
|
||||
def _install_build_reqs(self, finder: PackageFinder) -> None:
|
||||
# Install any extra build dependencies that the backend requests.
|
||||
# This must be done in a second pass, as the pyproject.toml
|
||||
# dependencies must be installed before we can call the backend.
|
||||
if (
|
||||
self.req.editable
|
||||
and self.req.permit_editable_wheels
|
||||
and self.req.supports_pyproject_editable()
|
||||
):
|
||||
build_reqs = self._get_build_requires_editable()
|
||||
else:
|
||||
build_reqs = self._get_build_requires_wheel()
|
||||
conflicting, missing = self.req.build_env.check_requirements(build_reqs)
|
||||
if conflicting:
|
||||
self._raise_conflicts("the backend dependencies", conflicting)
|
||||
self.req.build_env.install_requirements(
|
||||
finder, missing, "normal", kind="backend dependencies"
|
||||
)
|
||||
|
||||
def _raise_conflicts(
|
||||
self, conflicting_with: str, conflicting_reqs: Set[Tuple[str, str]]
|
||||
) -> None:
|
||||
format_string = (
|
||||
"Some build dependencies for {requirement} "
|
||||
"conflict with {conflicting_with}: {description}."
|
||||
)
|
||||
error_message = format_string.format(
|
||||
requirement=self.req,
|
||||
conflicting_with=conflicting_with,
|
||||
description=", ".join(
|
||||
f"{installed} is incompatible with {wanted}"
|
||||
for installed, wanted in sorted(conflicting_reqs)
|
||||
),
|
||||
)
|
||||
raise InstallationError(error_message)
|
||||
|
||||
def _raise_missing_reqs(self, missing: Set[str]) -> None:
|
||||
format_string = (
|
||||
"Some build dependencies for {requirement} are missing: {missing}."
|
||||
)
|
||||
error_message = format_string.format(
|
||||
requirement=self.req, missing=", ".join(map(repr, sorted(missing)))
|
||||
)
|
||||
raise InstallationError(error_message)
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
from typing import Optional
|
||||
|
||||
from pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pip._internal.distributions.base import AbstractDistribution
|
||||
from pip._internal.index.package_finder import PackageFinder
|
||||
from pip._internal.metadata import (
|
||||
BaseDistribution,
|
||||
FilesystemWheel,
|
||||
get_wheel_distribution,
|
||||
)
|
||||
|
||||
|
||||
class WheelDistribution(AbstractDistribution):
|
||||
"""Represents a wheel distribution.
|
||||
|
||||
This does not need any preparation as wheels can be directly unpacked.
|
||||
"""
|
||||
|
||||
@property
|
||||
def build_tracker_id(self) -> Optional[str]:
|
||||
return None
|
||||
|
||||
def get_metadata_distribution(self) -> BaseDistribution:
|
||||
"""Loads the metadata from the wheel file into memory and returns a
|
||||
Distribution that uses it, not relying on the wheel file or
|
||||
requirement.
|
||||
"""
|
||||
assert self.req.local_file_path, "Set as part of preparation during download"
|
||||
assert self.req.name, "Wheels are never unnamed"
|
||||
wheel = FilesystemWheel(self.req.local_file_path)
|
||||
return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
|
||||
|
||||
def prepare_distribution_metadata(
|
||||
self,
|
||||
finder: PackageFinder,
|
||||
build_isolation: bool,
|
||||
check_build_deps: bool,
|
||||
) -> None:
|
||||
pass
|
||||
|
|
@ -0,0 +1,728 @@
|
|||
"""Exceptions used throughout package.
|
||||
|
||||
This module MUST NOT try to import from anything within `pip._internal` to
|
||||
operate. This is expected to be importable from any/all files within the
|
||||
subpackage and, thus, should not depend on them.
|
||||
"""
|
||||
|
||||
import configparser
|
||||
import contextlib
|
||||
import locale
|
||||
import logging
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
from itertools import chain, groupby, repeat
|
||||
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from pip._vendor.requests.models import Request, Response
|
||||
from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
|
||||
from pip._vendor.rich.markup import escape
|
||||
from pip._vendor.rich.text import Text
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from hashlib import _Hash
|
||||
from typing import Literal
|
||||
|
||||
from pip._internal.metadata import BaseDistribution
|
||||
from pip._internal.req.req_install import InstallRequirement
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
#
|
||||
# Scaffolding
|
||||
#
|
||||
def _is_kebab_case(s: str) -> bool:
|
||||
return re.match(r"^[a-z]+(-[a-z]+)*$", s) is not None
|
||||
|
||||
|
||||
def _prefix_with_indent(
|
||||
s: Union[Text, str],
|
||||
console: Console,
|
||||
*,
|
||||
prefix: str,
|
||||
indent: str,
|
||||
) -> Text:
|
||||
if isinstance(s, Text):
|
||||
text = s
|
||||
else:
|
||||
text = console.render_str(s)
|
||||
|
||||
return console.render_str(prefix, overflow="ignore") + console.render_str(
|
||||
f"\n{indent}", overflow="ignore"
|
||||
).join(text.split(allow_blank=True))
|
||||
|
||||
|
||||
class PipError(Exception):
|
||||
"""The base pip error."""
|
||||
|
||||
|
||||
class DiagnosticPipError(PipError):
|
||||
"""An error, that presents diagnostic information to the user.
|
||||
|
||||
This contains a bunch of logic, to enable pretty presentation of our error
|
||||
messages. Each error gets a unique reference. Each error can also include
|
||||
additional context, a hint and/or a note -- which are presented with the
|
||||
main error message in a consistent style.
|
||||
|
||||
This is adapted from the error output styling in `sphinx-theme-builder`.
|
||||
"""
|
||||
|
||||
reference: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
kind: 'Literal["error", "warning"]' = "error",
|
||||
reference: Optional[str] = None,
|
||||
message: Union[str, Text],
|
||||
context: Optional[Union[str, Text]],
|
||||
hint_stmt: Optional[Union[str, Text]],
|
||||
note_stmt: Optional[Union[str, Text]] = None,
|
||||
link: Optional[str] = None,
|
||||
) -> None:
|
||||
# Ensure a proper reference is provided.
|
||||
if reference is None:
|
||||
assert hasattr(self, "reference"), "error reference not provided!"
|
||||
reference = self.reference
|
||||
assert _is_kebab_case(reference), "error reference must be kebab-case!"
|
||||
|
||||
self.kind = kind
|
||||
self.reference = reference
|
||||
|
||||
self.message = message
|
||||
self.context = context
|
||||
|
||||
self.note_stmt = note_stmt
|
||||
self.hint_stmt = hint_stmt
|
||||
|
||||
self.link = link
|
||||
|
||||
super().__init__(f"<{self.__class__.__name__}: {self.reference}>")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"<{self.__class__.__name__}("
|
||||
f"reference={self.reference!r}, "
|
||||
f"message={self.message!r}, "
|
||||
f"context={self.context!r}, "
|
||||
f"note_stmt={self.note_stmt!r}, "
|
||||
f"hint_stmt={self.hint_stmt!r}"
|
||||
")>"
|
||||
)
|
||||
|
||||
def __rich_console__(
|
||||
self,
|
||||
console: Console,
|
||||
options: ConsoleOptions,
|
||||
) -> RenderResult:
|
||||
colour = "red" if self.kind == "error" else "yellow"
|
||||
|
||||
yield f"[{colour} bold]{self.kind}[/]: [bold]{self.reference}[/]"
|
||||
yield ""
|
||||
|
||||
if not options.ascii_only:
|
||||
# Present the main message, with relevant context indented.
|
||||
if self.context is not None:
|
||||
yield _prefix_with_indent(
|
||||
self.message,
|
||||
console,
|
||||
prefix=f"[{colour}]×[/] ",
|
||||
indent=f"[{colour}]│[/] ",
|
||||
)
|
||||
yield _prefix_with_indent(
|
||||
self.context,
|
||||
console,
|
||||
prefix=f"[{colour}]╰─>[/] ",
|
||||
indent=f"[{colour}] [/] ",
|
||||
)
|
||||
else:
|
||||
yield _prefix_with_indent(
|
||||
self.message,
|
||||
console,
|
||||
prefix="[red]×[/] ",
|
||||
indent=" ",
|
||||
)
|
||||
else:
|
||||
yield self.message
|
||||
if self.context is not None:
|
||||
yield ""
|
||||
yield self.context
|
||||
|
||||
if self.note_stmt is not None or self.hint_stmt is not None:
|
||||
yield ""
|
||||
|
||||
if self.note_stmt is not None:
|
||||
yield _prefix_with_indent(
|
||||
self.note_stmt,
|
||||
console,
|
||||
prefix="[magenta bold]note[/]: ",
|
||||
indent=" ",
|
||||
)
|
||||
if self.hint_stmt is not None:
|
||||
yield _prefix_with_indent(
|
||||
self.hint_stmt,
|
||||
console,
|
||||
prefix="[cyan bold]hint[/]: ",
|
||||
indent=" ",
|
||||
)
|
||||
|
||||
if self.link is not None:
|
||||
yield ""
|
||||
yield f"Link: {self.link}"
|
||||
|
||||
|
||||
#
|
||||
# Actual Errors
|
||||
#
|
||||
class ConfigurationError(PipError):
|
||||
"""General exception in configuration"""
|
||||
|
||||
|
||||
class InstallationError(PipError):
|
||||
"""General exception during installation"""
|
||||
|
||||
|
||||
class UninstallationError(PipError):
|
||||
"""General exception during uninstallation"""
|
||||
|
||||
|
||||
class MissingPyProjectBuildRequires(DiagnosticPipError):
|
||||
"""Raised when pyproject.toml has `build-system`, but no `build-system.requires`."""
|
||||
|
||||
reference = "missing-pyproject-build-system-requires"
|
||||
|
||||
def __init__(self, *, package: str) -> None:
|
||||
super().__init__(
|
||||
message=f"Can not process {escape(package)}",
|
||||
context=Text(
|
||||
"This package has an invalid pyproject.toml file.\n"
|
||||
"The [build-system] table is missing the mandatory `requires` key."
|
||||
),
|
||||
note_stmt="This is an issue with the package mentioned above, not pip.",
|
||||
hint_stmt=Text("See PEP 518 for the detailed specification."),
|
||||
)
|
||||
|
||||
|
||||
class InvalidPyProjectBuildRequires(DiagnosticPipError):
|
||||
"""Raised when pyproject.toml an invalid `build-system.requires`."""
|
||||
|
||||
reference = "invalid-pyproject-build-system-requires"
|
||||
|
||||
def __init__(self, *, package: str, reason: str) -> None:
|
||||
super().__init__(
|
||||
message=f"Can not process {escape(package)}",
|
||||
context=Text(
|
||||
"This package has an invalid `build-system.requires` key in "
|
||||
f"pyproject.toml.\n{reason}"
|
||||
),
|
||||
note_stmt="This is an issue with the package mentioned above, not pip.",
|
||||
hint_stmt=Text("See PEP 518 for the detailed specification."),
|
||||
)
|
||||
|
||||
|
||||
class NoneMetadataError(PipError):
|
||||
"""Raised when accessing a Distribution's "METADATA" or "PKG-INFO".
|
||||
|
||||
This signifies an inconsistency, when the Distribution claims to have
|
||||
the metadata file (if not, raise ``FileNotFoundError`` instead), but is
|
||||
not actually able to produce its content. This may be due to permission
|
||||
errors.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dist: "BaseDistribution",
|
||||
metadata_name: str,
|
||||
) -> None:
|
||||
"""
|
||||
:param dist: A Distribution object.
|
||||
:param metadata_name: The name of the metadata being accessed
|
||||
(can be "METADATA" or "PKG-INFO").
|
||||
"""
|
||||
self.dist = dist
|
||||
self.metadata_name = metadata_name
|
||||
|
||||
def __str__(self) -> str:
|
||||
# Use `dist` in the error message because its stringification
|
||||
# includes more information, like the version and location.
|
||||
return f"None {self.metadata_name} metadata found for distribution: {self.dist}"
|
||||
|
||||
|
||||
class UserInstallationInvalid(InstallationError):
|
||||
"""A --user install is requested on an environment without user site."""
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "User base directory is not specified"
|
||||
|
||||
|
||||
class InvalidSchemeCombination(InstallationError):
|
||||
def __str__(self) -> str:
|
||||
before = ", ".join(str(a) for a in self.args[:-1])
|
||||
return f"Cannot set {before} and {self.args[-1]} together"
|
||||
|
||||
|
||||
class DistributionNotFound(InstallationError):
|
||||
"""Raised when a distribution cannot be found to satisfy a requirement"""
|
||||
|
||||
|
||||
class RequirementsFileParseError(InstallationError):
|
||||
"""Raised when a general error occurs parsing a requirements file line."""
|
||||
|
||||
|
||||
class BestVersionAlreadyInstalled(PipError):
|
||||
"""Raised when the most up-to-date version of a package is already
|
||||
installed."""
|
||||
|
||||
|
||||
class BadCommand(PipError):
|
||||
"""Raised when virtualenv or a command is not found"""
|
||||
|
||||
|
||||
class CommandError(PipError):
|
||||
"""Raised when there is an error in command-line arguments"""
|
||||
|
||||
|
||||
class PreviousBuildDirError(PipError):
|
||||
"""Raised when there's a previous conflicting build directory"""
|
||||
|
||||
|
||||
class NetworkConnectionError(PipError):
|
||||
"""HTTP connection error"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
error_msg: str,
|
||||
response: Optional[Response] = None,
|
||||
request: Optional[Request] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize NetworkConnectionError with `request` and `response`
|
||||
objects.
|
||||
"""
|
||||
self.response = response
|
||||
self.request = request
|
||||
self.error_msg = error_msg
|
||||
if (
|
||||
self.response is not None
|
||||
and not self.request
|
||||
and hasattr(response, "request")
|
||||
):
|
||||
self.request = self.response.request
|
||||
super().__init__(error_msg, response, request)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self.error_msg)
|
||||
|
||||
|
||||
class InvalidWheelFilename(InstallationError):
|
||||
"""Invalid wheel filename."""
|
||||
|
||||
|
||||
class UnsupportedWheel(InstallationError):
|
||||
"""Unsupported wheel."""
|
||||
|
||||
|
||||
class InvalidWheel(InstallationError):
|
||||
"""Invalid (e.g. corrupt) wheel."""
|
||||
|
||||
def __init__(self, location: str, name: str):
|
||||
self.location = location
|
||||
self.name = name
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Wheel '{self.name}' located at {self.location} is invalid."
|
||||
|
||||
|
||||
class MetadataInconsistent(InstallationError):
|
||||
"""Built metadata contains inconsistent information.
|
||||
|
||||
This is raised when the metadata contains values (e.g. name and version)
|
||||
that do not match the information previously obtained from sdist filename,
|
||||
user-supplied ``#egg=`` value, or an install requirement name.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, ireq: "InstallRequirement", field: str, f_val: str, m_val: str
|
||||
) -> None:
|
||||
self.ireq = ireq
|
||||
self.field = field
|
||||
self.f_val = f_val
|
||||
self.m_val = m_val
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
f"Requested {self.ireq} has inconsistent {self.field}: "
|
||||
f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
|
||||
)
|
||||
|
||||
|
||||
class InstallationSubprocessError(DiagnosticPipError, InstallationError):
|
||||
"""A subprocess call failed."""
|
||||
|
||||
reference = "subprocess-exited-with-error"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
command_description: str,
|
||||
exit_code: int,
|
||||
output_lines: Optional[List[str]],
|
||||
) -> None:
|
||||
if output_lines is None:
|
||||
output_prompt = Text("See above for output.")
|
||||
else:
|
||||
output_prompt = (
|
||||
Text.from_markup(f"[red][{len(output_lines)} lines of output][/]\n")
|
||||
+ Text("".join(output_lines))
|
||||
+ Text.from_markup(R"[red]\[end of output][/]")
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
message=(
|
||||
f"[green]{escape(command_description)}[/] did not run successfully.\n"
|
||||
f"exit code: {exit_code}"
|
||||
),
|
||||
context=output_prompt,
|
||||
hint_stmt=None,
|
||||
note_stmt=(
|
||||
"This error originates from a subprocess, and is likely not a "
|
||||
"problem with pip."
|
||||
),
|
||||
)
|
||||
|
||||
self.command_description = command_description
|
||||
self.exit_code = exit_code
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.command_description} exited with {self.exit_code}"
|
||||
|
||||
|
||||
class MetadataGenerationFailed(InstallationSubprocessError, InstallationError):
|
||||
reference = "metadata-generation-failed"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
package_details: str,
|
||||
) -> None:
|
||||
super(InstallationSubprocessError, self).__init__(
|
||||
message="Encountered error while generating package metadata.",
|
||||
context=escape(package_details),
|
||||
hint_stmt="See above for details.",
|
||||
note_stmt="This is an issue with the package mentioned above, not pip.",
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "metadata generation failed"
|
||||
|
||||
|
||||
class HashErrors(InstallationError):
|
||||
"""Multiple HashError instances rolled into one for reporting"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.errors: List["HashError"] = []
|
||||
|
||||
def append(self, error: "HashError") -> None:
|
||||
self.errors.append(error)
|
||||
|
||||
def __str__(self) -> str:
|
||||
lines = []
|
||||
self.errors.sort(key=lambda e: e.order)
|
||||
for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__):
|
||||
lines.append(cls.head)
|
||||
lines.extend(e.body() for e in errors_of_cls)
|
||||
if lines:
|
||||
return "\n".join(lines)
|
||||
return ""
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.errors)
|
||||
|
||||
|
||||
class HashError(InstallationError):
|
||||
"""
|
||||
A failure to verify a package against known-good hashes
|
||||
|
||||
:cvar order: An int sorting hash exception classes by difficulty of
|
||||
recovery (lower being harder), so the user doesn't bother fretting
|
||||
about unpinned packages when he has deeper issues, like VCS
|
||||
dependencies, to deal with. Also keeps error reports in a
|
||||
deterministic order.
|
||||
:cvar head: A section heading for display above potentially many
|
||||
exceptions of this kind
|
||||
:ivar req: The InstallRequirement that triggered this error. This is
|
||||
pasted on after the exception is instantiated, because it's not
|
||||
typically available earlier.
|
||||
|
||||
"""
|
||||
|
||||
req: Optional["InstallRequirement"] = None
|
||||
head = ""
|
||||
order: int = -1
|
||||
|
||||
def body(self) -> str:
|
||||
"""Return a summary of me for display under the heading.
|
||||
|
||||
This default implementation simply prints a description of the
|
||||
triggering requirement.
|
||||
|
||||
:param req: The InstallRequirement that provoked this error, with
|
||||
its link already populated by the resolver's _populate_link().
|
||||
|
||||
"""
|
||||
return f" {self._requirement_name()}"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.head}\n{self.body()}"
|
||||
|
||||
def _requirement_name(self) -> str:
|
||||
"""Return a description of the requirement that triggered me.
|
||||
|
||||
This default implementation returns long description of the req, with
|
||||
line numbers
|
||||
|
||||
"""
|
||||
return str(self.req) if self.req else "unknown package"
|
||||
|
||||
|
||||
class VcsHashUnsupported(HashError):
|
||||
"""A hash was provided for a version-control-system-based requirement, but
|
||||
we don't have a method for hashing those."""
|
||||
|
||||
order = 0
|
||||
head = (
|
||||
"Can't verify hashes for these requirements because we don't "
|
||||
"have a way to hash version control repositories:"
|
||||
)
|
||||
|
||||
|
||||
class DirectoryUrlHashUnsupported(HashError):
|
||||
"""A hash was provided for a version-control-system-based requirement, but
|
||||
we don't have a method for hashing those."""
|
||||
|
||||
order = 1
|
||||
head = (
|
||||
"Can't verify hashes for these file:// requirements because they "
|
||||
"point to directories:"
|
||||
)
|
||||
|
||||
|
||||
class HashMissing(HashError):
|
||||
"""A hash was needed for a requirement but is absent."""
|
||||
|
||||
order = 2
|
||||
head = (
|
||||
"Hashes are required in --require-hashes mode, but they are "
|
||||
"missing from some requirements. Here is a list of those "
|
||||
"requirements along with the hashes their downloaded archives "
|
||||
"actually had. Add lines like these to your requirements files to "
|
||||
"prevent tampering. (If you did not enable --require-hashes "
|
||||
"manually, note that it turns on automatically when any package "
|
||||
"has a hash.)"
|
||||
)
|
||||
|
||||
def __init__(self, gotten_hash: str) -> None:
|
||||
"""
|
||||
:param gotten_hash: The hash of the (possibly malicious) archive we
|
||||
just downloaded
|
||||
"""
|
||||
self.gotten_hash = gotten_hash
|
||||
|
||||
def body(self) -> str:
|
||||
# Dodge circular import.
|
||||
from pip._internal.utils.hashes import FAVORITE_HASH
|
||||
|
||||
package = None
|
||||
if self.req:
|
||||
# In the case of URL-based requirements, display the original URL
|
||||
# seen in the requirements file rather than the package name,
|
||||
# so the output can be directly copied into the requirements file.
|
||||
package = (
|
||||
self.req.original_link
|
||||
if self.req.is_direct
|
||||
# In case someone feeds something downright stupid
|
||||
# to InstallRequirement's constructor.
|
||||
else getattr(self.req, "req", None)
|
||||
)
|
||||
return " {} --hash={}:{}".format(
|
||||
package or "unknown package", FAVORITE_HASH, self.gotten_hash
|
||||
)
|
||||
|
||||
|
||||
class HashUnpinned(HashError):
|
||||
"""A requirement had a hash specified but was not pinned to a specific
|
||||
version."""
|
||||
|
||||
order = 3
|
||||
head = (
|
||||
"In --require-hashes mode, all requirements must have their "
|
||||
"versions pinned with ==. These do not:"
|
||||
)
|
||||
|
||||
|
||||
class HashMismatch(HashError):
|
||||
"""
|
||||
Distribution file hash values don't match.
|
||||
|
||||
:ivar package_name: The name of the package that triggered the hash
|
||||
mismatch. Feel free to write to this after the exception is raise to
|
||||
improve its error message.
|
||||
|
||||
"""
|
||||
|
||||
order = 4
|
||||
head = (
|
||||
"THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS "
|
||||
"FILE. If you have updated the package versions, please update "
|
||||
"the hashes. Otherwise, examine the package contents carefully; "
|
||||
"someone may have tampered with them."
|
||||
)
|
||||
|
||||
def __init__(self, allowed: Dict[str, List[str]], gots: Dict[str, "_Hash"]) -> None:
|
||||
"""
|
||||
:param allowed: A dict of algorithm names pointing to lists of allowed
|
||||
hex digests
|
||||
:param gots: A dict of algorithm names pointing to hashes we
|
||||
actually got from the files under suspicion
|
||||
"""
|
||||
self.allowed = allowed
|
||||
self.gots = gots
|
||||
|
||||
def body(self) -> str:
|
||||
return f" {self._requirement_name()}:\n{self._hash_comparison()}"
|
||||
|
||||
def _hash_comparison(self) -> str:
|
||||
"""
|
||||
Return a comparison of actual and expected hash values.
|
||||
|
||||
Example::
|
||||
|
||||
Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde
|
||||
or 123451234512345123451234512345123451234512345
|
||||
Got bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef
|
||||
|
||||
"""
|
||||
|
||||
def hash_then_or(hash_name: str) -> "chain[str]":
|
||||
# For now, all the decent hashes have 6-char names, so we can get
|
||||
# away with hard-coding space literals.
|
||||
return chain([hash_name], repeat(" or"))
|
||||
|
||||
lines: List[str] = []
|
||||
for hash_name, expecteds in self.allowed.items():
|
||||
prefix = hash_then_or(hash_name)
|
||||
lines.extend((f" Expected {next(prefix)} {e}") for e in expecteds)
|
||||
lines.append(
|
||||
f" Got {self.gots[hash_name].hexdigest()}\n"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class UnsupportedPythonVersion(InstallationError):
|
||||
"""Unsupported python version according to Requires-Python package
|
||||
metadata."""
|
||||
|
||||
|
||||
class ConfigurationFileCouldNotBeLoaded(ConfigurationError):
|
||||
"""When there are errors while loading a configuration file"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
reason: str = "could not be loaded",
|
||||
fname: Optional[str] = None,
|
||||
error: Optional[configparser.Error] = None,
|
||||
) -> None:
|
||||
super().__init__(error)
|
||||
self.reason = reason
|
||||
self.fname = fname
|
||||
self.error = error
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.fname is not None:
|
||||
message_part = f" in {self.fname}."
|
||||
else:
|
||||
assert self.error is not None
|
||||
message_part = f".\n{self.error}\n"
|
||||
return f"Configuration file {self.reason}{message_part}"
|
||||
|
||||
|
||||
_DEFAULT_EXTERNALLY_MANAGED_ERROR = f"""\
|
||||
The Python environment under {sys.prefix} is managed externally, and may not be
|
||||
manipulated by the user. Please use specific tooling from the distributor of
|
||||
the Python installation to interact with this environment instead.
|
||||
"""
|
||||
|
||||
|
||||
class ExternallyManagedEnvironment(DiagnosticPipError):
|
||||
"""The current environment is externally managed.
|
||||
|
||||
This is raised when the current environment is externally managed, as
|
||||
defined by `PEP 668`_. The ``EXTERNALLY-MANAGED`` configuration is checked
|
||||
and displayed when the error is bubbled up to the user.
|
||||
|
||||
:param error: The error message read from ``EXTERNALLY-MANAGED``.
|
||||
"""
|
||||
|
||||
reference = "externally-managed-environment"
|
||||
|
||||
def __init__(self, error: Optional[str]) -> None:
|
||||
if error is None:
|
||||
context = Text(_DEFAULT_EXTERNALLY_MANAGED_ERROR)
|
||||
else:
|
||||
context = Text(error)
|
||||
super().__init__(
|
||||
message="This environment is externally managed",
|
||||
context=context,
|
||||
note_stmt=(
|
||||
"If you believe this is a mistake, please contact your "
|
||||
"Python installation or OS distribution provider. "
|
||||
"You can override this, at the risk of breaking your Python "
|
||||
"installation or OS, by passing --break-system-packages."
|
||||
),
|
||||
hint_stmt=Text("See PEP 668 for the detailed specification."),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _iter_externally_managed_error_keys() -> Iterator[str]:
|
||||
# LC_MESSAGES is in POSIX, but not the C standard. The most common
|
||||
# platform that does not implement this category is Windows, where
|
||||
# using other categories for console message localization is equally
|
||||
# unreliable, so we fall back to the locale-less vendor message. This
|
||||
# can always be re-evaluated when a vendor proposes a new alternative.
|
||||
try:
|
||||
category = locale.LC_MESSAGES
|
||||
except AttributeError:
|
||||
lang: Optional[str] = None
|
||||
else:
|
||||
lang, _ = locale.getlocale(category)
|
||||
if lang is not None:
|
||||
yield f"Error-{lang}"
|
||||
for sep in ("-", "_"):
|
||||
before, found, _ = lang.partition(sep)
|
||||
if not found:
|
||||
continue
|
||||
yield f"Error-{before}"
|
||||
yield "Error"
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
config: Union[pathlib.Path, str],
|
||||
) -> "ExternallyManagedEnvironment":
|
||||
parser = configparser.ConfigParser(interpolation=None)
|
||||
try:
|
||||
parser.read(config, encoding="utf-8")
|
||||
section = parser["externally-managed"]
|
||||
for key in cls._iter_externally_managed_error_keys():
|
||||
with contextlib.suppress(KeyError):
|
||||
return cls(section[key])
|
||||
except KeyError:
|
||||
pass
|
||||
except (OSError, UnicodeDecodeError, configparser.ParsingError):
|
||||
from pip._internal.utils._log import VERBOSE
|
||||
|
||||
exc_info = logger.isEnabledFor(VERBOSE)
|
||||
logger.warning("Failed to read %s", config, exc_info=exc_info)
|
||||
return cls(None)
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
"""Index interaction code
|
||||
"""
|
||||
|
|
@ -0,0 +1,507 @@
|
|||
"""
|
||||
The main purpose of this module is to expose LinkCollector.collect_sources().
|
||||
"""
|
||||
|
||||
import collections
|
||||
import email.message
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from html.parser import HTMLParser
|
||||
from optparse import Values
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
MutableMapping,
|
||||
NamedTuple,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pip._vendor import requests
|
||||
from pip._vendor.requests import Response
|
||||
from pip._vendor.requests.exceptions import RetryError, SSLError
|
||||
|
||||
from pip._internal.exceptions import NetworkConnectionError
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.models.search_scope import SearchScope
|
||||
from pip._internal.network.session import PipSession
|
||||
from pip._internal.network.utils import raise_for_status
|
||||
from pip._internal.utils.filetypes import is_archive_file
|
||||
from pip._internal.utils.misc import redact_auth_from_url
|
||||
from pip._internal.vcs import vcs
|
||||
|
||||
from .sources import CandidatesFromPage, LinkSource, build_source
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Protocol
|
||||
else:
|
||||
Protocol = object
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ResponseHeaders = MutableMapping[str, str]
|
||||
|
||||
|
||||
def _match_vcs_scheme(url: str) -> Optional[str]:
|
||||
"""Look for VCS schemes in the URL.
|
||||
|
||||
Returns the matched VCS scheme, or None if there's no match.
|
||||
"""
|
||||
for scheme in vcs.schemes:
|
||||
if url.lower().startswith(scheme) and url[len(scheme)] in "+:":
|
||||
return scheme
|
||||
return None
|
||||
|
||||
|
||||
class _NotAPIContent(Exception):
|
||||
def __init__(self, content_type: str, request_desc: str) -> None:
|
||||
super().__init__(content_type, request_desc)
|
||||
self.content_type = content_type
|
||||
self.request_desc = request_desc
|
||||
|
||||
|
||||
def _ensure_api_header(response: Response) -> None:
|
||||
"""
|
||||
Check the Content-Type header to ensure the response contains a Simple
|
||||
API Response.
|
||||
|
||||
Raises `_NotAPIContent` if the content type is not a valid content-type.
|
||||
"""
|
||||
content_type = response.headers.get("Content-Type", "Unknown")
|
||||
|
||||
content_type_l = content_type.lower()
|
||||
if content_type_l.startswith(
|
||||
(
|
||||
"text/html",
|
||||
"application/vnd.pypi.simple.v1+html",
|
||||
"application/vnd.pypi.simple.v1+json",
|
||||
)
|
||||
):
|
||||
return
|
||||
|
||||
raise _NotAPIContent(content_type, response.request.method)
|
||||
|
||||
|
||||
class _NotHTTP(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _ensure_api_response(url: str, session: PipSession) -> None:
|
||||
"""
|
||||
Send a HEAD request to the URL, and ensure the response contains a simple
|
||||
API Response.
|
||||
|
||||
Raises `_NotHTTP` if the URL is not available for a HEAD request, or
|
||||
`_NotAPIContent` if the content type is not a valid content type.
|
||||
"""
|
||||
scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
|
||||
if scheme not in {"http", "https"}:
|
||||
raise _NotHTTP()
|
||||
|
||||
resp = session.head(url, allow_redirects=True)
|
||||
raise_for_status(resp)
|
||||
|
||||
_ensure_api_header(resp)
|
||||
|
||||
|
||||
def _get_simple_response(url: str, session: PipSession) -> Response:
|
||||
"""Access an Simple API response with GET, and return the response.
|
||||
|
||||
This consists of three parts:
|
||||
|
||||
1. If the URL looks suspiciously like an archive, send a HEAD first to
|
||||
check the Content-Type is HTML or Simple API, to avoid downloading a
|
||||
large file. Raise `_NotHTTP` if the content type cannot be determined, or
|
||||
`_NotAPIContent` if it is not HTML or a Simple API.
|
||||
2. Actually perform the request. Raise HTTP exceptions on network failures.
|
||||
3. Check the Content-Type header to make sure we got a Simple API response,
|
||||
and raise `_NotAPIContent` otherwise.
|
||||
"""
|
||||
if is_archive_file(Link(url).filename):
|
||||
_ensure_api_response(url, session=session)
|
||||
|
||||
logger.debug("Getting page %s", redact_auth_from_url(url))
|
||||
|
||||
resp = session.get(
|
||||
url,
|
||||
headers={
|
||||
"Accept": ", ".join(
|
||||
[
|
||||
"application/vnd.pypi.simple.v1+json",
|
||||
"application/vnd.pypi.simple.v1+html; q=0.1",
|
||||
"text/html; q=0.01",
|
||||
]
|
||||
),
|
||||
# We don't want to blindly returned cached data for
|
||||
# /simple/, because authors generally expecting that
|
||||
# twine upload && pip install will function, but if
|
||||
# they've done a pip install in the last ~10 minutes
|
||||
# it won't. Thus by setting this to zero we will not
|
||||
# blindly use any cached data, however the benefit of
|
||||
# using max-age=0 instead of no-cache, is that we will
|
||||
# still support conditional requests, so we will still
|
||||
# minimize traffic sent in cases where the page hasn't
|
||||
# changed at all, we will just always incur the round
|
||||
# trip for the conditional GET now instead of only
|
||||
# once per 10 minutes.
|
||||
# For more information, please see pypa/pip#5670.
|
||||
"Cache-Control": "max-age=0",
|
||||
},
|
||||
)
|
||||
raise_for_status(resp)
|
||||
|
||||
# The check for archives above only works if the url ends with
|
||||
# something that looks like an archive. However that is not a
|
||||
# requirement of an url. Unless we issue a HEAD request on every
|
||||
# url we cannot know ahead of time for sure if something is a
|
||||
# Simple API response or not. However we can check after we've
|
||||
# downloaded it.
|
||||
_ensure_api_header(resp)
|
||||
|
||||
logger.debug(
|
||||
"Fetched page %s as %s",
|
||||
redact_auth_from_url(url),
|
||||
resp.headers.get("Content-Type", "Unknown"),
|
||||
)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
|
||||
"""Determine if we have any encoding information in our headers."""
|
||||
if headers and "Content-Type" in headers:
|
||||
m = email.message.Message()
|
||||
m["content-type"] = headers["Content-Type"]
|
||||
charset = m.get_param("charset")
|
||||
if charset:
|
||||
return str(charset)
|
||||
return None
|
||||
|
||||
|
||||
class CacheablePageContent:
|
||||
def __init__(self, page: "IndexContent") -> None:
|
||||
assert page.cache_link_parsing
|
||||
self.page = page
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
return isinstance(other, type(self)) and self.page.url == other.page.url
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.page.url)
|
||||
|
||||
|
||||
class ParseLinks(Protocol):
|
||||
def __call__(self, page: "IndexContent") -> Iterable[Link]:
|
||||
...
|
||||
|
||||
|
||||
def with_cached_index_content(fn: ParseLinks) -> ParseLinks:
|
||||
"""
|
||||
Given a function that parses an Iterable[Link] from an IndexContent, cache the
|
||||
function's result (keyed by CacheablePageContent), unless the IndexContent
|
||||
`page` has `page.cache_link_parsing == False`.
|
||||
"""
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def wrapper(cacheable_page: CacheablePageContent) -> List[Link]:
|
||||
return list(fn(cacheable_page.page))
|
||||
|
||||
@functools.wraps(fn)
|
||||
def wrapper_wrapper(page: "IndexContent") -> List[Link]:
|
||||
if page.cache_link_parsing:
|
||||
return wrapper(CacheablePageContent(page))
|
||||
return list(fn(page))
|
||||
|
||||
return wrapper_wrapper
|
||||
|
||||
|
||||
@with_cached_index_content
|
||||
def parse_links(page: "IndexContent") -> Iterable[Link]:
|
||||
"""
|
||||
Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
|
||||
"""
|
||||
|
||||
content_type_l = page.content_type.lower()
|
||||
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
|
||||
data = json.loads(page.content)
|
||||
for file in data.get("files", []):
|
||||
link = Link.from_json(file, page.url)
|
||||
if link is None:
|
||||
continue
|
||||
yield link
|
||||
return
|
||||
|
||||
parser = HTMLLinkParser(page.url)
|
||||
encoding = page.encoding or "utf-8"
|
||||
parser.feed(page.content.decode(encoding))
|
||||
|
||||
url = page.url
|
||||
base_url = parser.base_url or url
|
||||
for anchor in parser.anchors:
|
||||
link = Link.from_element(anchor, page_url=url, base_url=base_url)
|
||||
if link is None:
|
||||
continue
|
||||
yield link
|
||||
|
||||
|
||||
class IndexContent:
|
||||
"""Represents one response (or page), along with its URL"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
encoding: Optional[str],
|
||||
url: str,
|
||||
cache_link_parsing: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
:param encoding: the encoding to decode the given content.
|
||||
:param url: the URL from which the HTML was downloaded.
|
||||
:param cache_link_parsing: whether links parsed from this page's url
|
||||
should be cached. PyPI index urls should
|
||||
have this set to False, for example.
|
||||
"""
|
||||
self.content = content
|
||||
self.content_type = content_type
|
||||
self.encoding = encoding
|
||||
self.url = url
|
||||
self.cache_link_parsing = cache_link_parsing
|
||||
|
||||
def __str__(self) -> str:
|
||||
return redact_auth_from_url(self.url)
|
||||
|
||||
|
||||
class HTMLLinkParser(HTMLParser):
|
||||
"""
|
||||
HTMLParser that keeps the first base HREF and a list of all anchor
|
||||
elements' attributes.
|
||||
"""
|
||||
|
||||
def __init__(self, url: str) -> None:
|
||||
super().__init__(convert_charrefs=True)
|
||||
|
||||
self.url: str = url
|
||||
self.base_url: Optional[str] = None
|
||||
self.anchors: List[Dict[str, Optional[str]]] = []
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
|
||||
if tag == "base" and self.base_url is None:
|
||||
href = self.get_href(attrs)
|
||||
if href is not None:
|
||||
self.base_url = href
|
||||
elif tag == "a":
|
||||
self.anchors.append(dict(attrs))
|
||||
|
||||
def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]:
|
||||
for name, value in attrs:
|
||||
if name == "href":
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _handle_get_simple_fail(
|
||||
link: Link,
|
||||
reason: Union[str, Exception],
|
||||
meth: Optional[Callable[..., None]] = None,
|
||||
) -> None:
|
||||
if meth is None:
|
||||
meth = logger.debug
|
||||
meth("Could not fetch URL %s: %s - skipping", link, reason)
|
||||
|
||||
|
||||
def _make_index_content(
|
||||
response: Response, cache_link_parsing: bool = True
|
||||
) -> IndexContent:
|
||||
encoding = _get_encoding_from_headers(response.headers)
|
||||
return IndexContent(
|
||||
response.content,
|
||||
response.headers["Content-Type"],
|
||||
encoding=encoding,
|
||||
url=response.url,
|
||||
cache_link_parsing=cache_link_parsing,
|
||||
)
|
||||
|
||||
|
||||
def _get_index_content(link: Link, *, session: PipSession) -> Optional["IndexContent"]:
|
||||
url = link.url.split("#", 1)[0]
|
||||
|
||||
# Check for VCS schemes that do not support lookup as web pages.
|
||||
vcs_scheme = _match_vcs_scheme(url)
|
||||
if vcs_scheme:
|
||||
logger.warning(
|
||||
"Cannot look at %s URL %s because it does not support lookup as web pages.",
|
||||
vcs_scheme,
|
||||
link,
|
||||
)
|
||||
return None
|
||||
|
||||
# Tack index.html onto file:// URLs that point to directories
|
||||
scheme, _, path, _, _, _ = urllib.parse.urlparse(url)
|
||||
if scheme == "file" and os.path.isdir(urllib.request.url2pathname(path)):
|
||||
# add trailing slash if not present so urljoin doesn't trim
|
||||
# final segment
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
# TODO: In the future, it would be nice if pip supported PEP 691
|
||||
# style responses in the file:// URLs, however there's no
|
||||
# standard file extension for application/vnd.pypi.simple.v1+json
|
||||
# so we'll need to come up with something on our own.
|
||||
url = urllib.parse.urljoin(url, "index.html")
|
||||
logger.debug(" file: URL is directory, getting %s", url)
|
||||
|
||||
try:
|
||||
resp = _get_simple_response(url, session=session)
|
||||
except _NotHTTP:
|
||||
logger.warning(
|
||||
"Skipping page %s because it looks like an archive, and cannot "
|
||||
"be checked by a HTTP HEAD request.",
|
||||
link,
|
||||
)
|
||||
except _NotAPIContent as exc:
|
||||
logger.warning(
|
||||
"Skipping page %s because the %s request got Content-Type: %s. "
|
||||
"The only supported Content-Types are application/vnd.pypi.simple.v1+json, "
|
||||
"application/vnd.pypi.simple.v1+html, and text/html",
|
||||
link,
|
||||
exc.request_desc,
|
||||
exc.content_type,
|
||||
)
|
||||
except NetworkConnectionError as exc:
|
||||
_handle_get_simple_fail(link, exc)
|
||||
except RetryError as exc:
|
||||
_handle_get_simple_fail(link, exc)
|
||||
except SSLError as exc:
|
||||
reason = "There was a problem confirming the ssl certificate: "
|
||||
reason += str(exc)
|
||||
_handle_get_simple_fail(link, reason, meth=logger.info)
|
||||
except requests.ConnectionError as exc:
|
||||
_handle_get_simple_fail(link, f"connection error: {exc}")
|
||||
except requests.Timeout:
|
||||
_handle_get_simple_fail(link, "timed out")
|
||||
else:
|
||||
return _make_index_content(resp, cache_link_parsing=link.cache_link_parsing)
|
||||
return None
|
||||
|
||||
|
||||
class CollectedSources(NamedTuple):
|
||||
find_links: Sequence[Optional[LinkSource]]
|
||||
index_urls: Sequence[Optional[LinkSource]]
|
||||
|
||||
|
||||
class LinkCollector:
|
||||
|
||||
"""
|
||||
Responsible for collecting Link objects from all configured locations,
|
||||
making network requests as needed.
|
||||
|
||||
The class's main method is its collect_sources() method.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session: PipSession,
|
||||
search_scope: SearchScope,
|
||||
) -> None:
|
||||
self.search_scope = search_scope
|
||||
self.session = session
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
session: PipSession,
|
||||
options: Values,
|
||||
suppress_no_index: bool = False,
|
||||
) -> "LinkCollector":
|
||||
"""
|
||||
:param session: The Session to use to make requests.
|
||||
:param suppress_no_index: Whether to ignore the --no-index option
|
||||
when constructing the SearchScope object.
|
||||
"""
|
||||
index_urls = [options.index_url] + options.extra_index_urls
|
||||
if options.no_index and not suppress_no_index:
|
||||
logger.debug(
|
||||
"Ignoring indexes: %s",
|
||||
",".join(redact_auth_from_url(url) for url in index_urls),
|
||||
)
|
||||
index_urls = []
|
||||
|
||||
# Make sure find_links is a list before passing to create().
|
||||
find_links = options.find_links or []
|
||||
|
||||
search_scope = SearchScope.create(
|
||||
find_links=find_links,
|
||||
index_urls=index_urls,
|
||||
no_index=options.no_index,
|
||||
)
|
||||
link_collector = LinkCollector(
|
||||
session=session,
|
||||
search_scope=search_scope,
|
||||
)
|
||||
return link_collector
|
||||
|
||||
@property
|
||||
def find_links(self) -> List[str]:
|
||||
return self.search_scope.find_links
|
||||
|
||||
def fetch_response(self, location: Link) -> Optional[IndexContent]:
|
||||
"""
|
||||
Fetch an HTML page containing package links.
|
||||
"""
|
||||
return _get_index_content(location, session=self.session)
|
||||
|
||||
def collect_sources(
|
||||
self,
|
||||
project_name: str,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
) -> CollectedSources:
|
||||
# The OrderedDict calls deduplicate sources by URL.
|
||||
index_url_sources = collections.OrderedDict(
|
||||
build_source(
|
||||
loc,
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=False,
|
||||
cache_link_parsing=False,
|
||||
project_name=project_name,
|
||||
)
|
||||
for loc in self.search_scope.get_index_urls_locations(project_name)
|
||||
).values()
|
||||
find_links_sources = collections.OrderedDict(
|
||||
build_source(
|
||||
loc,
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=True,
|
||||
cache_link_parsing=True,
|
||||
project_name=project_name,
|
||||
)
|
||||
for loc in self.find_links
|
||||
).values()
|
||||
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
lines = [
|
||||
f"* {s.link}"
|
||||
for s in itertools.chain(find_links_sources, index_url_sources)
|
||||
if s is not None and s.link is not None
|
||||
]
|
||||
lines = [
|
||||
f"{len(lines)} location(s) to search "
|
||||
f"for versions of {project_name}:"
|
||||
] + lines
|
||||
logger.debug("\n".join(lines))
|
||||
|
||||
return CollectedSources(
|
||||
find_links=list(find_links_sources),
|
||||
index_urls=list(index_url_sources),
|
||||
)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,285 @@
|
|||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from pip._vendor.packaging.utils import (
|
||||
InvalidSdistFilename,
|
||||
InvalidVersion,
|
||||
InvalidWheelFilename,
|
||||
canonicalize_name,
|
||||
parse_sdist_filename,
|
||||
parse_wheel_filename,
|
||||
)
|
||||
|
||||
from pip._internal.models.candidate import InstallationCandidate
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.utils.urls import path_to_url, url_to_path
|
||||
from pip._internal.vcs import is_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FoundCandidates = Iterable[InstallationCandidate]
|
||||
FoundLinks = Iterable[Link]
|
||||
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
|
||||
PageValidator = Callable[[Link], bool]
|
||||
|
||||
|
||||
class LinkSource:
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
"""Returns the underlying link, if there's one."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
"""Candidates found by parsing an archive listing HTML file."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
"""Links found by specifying archives directly."""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _is_html_file(file_url: str) -> bool:
|
||||
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
|
||||
|
||||
|
||||
class _FlatDirectoryToUrls:
|
||||
"""Scans directory and caches results"""
|
||||
|
||||
def __init__(self, path: str) -> None:
|
||||
self._path = path
|
||||
self._page_candidates: List[str] = []
|
||||
self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
|
||||
self._scanned_directory = False
|
||||
|
||||
def _scan_directory(self) -> None:
|
||||
"""Scans directory once and populates both page_candidates
|
||||
and project_name_to_urls at the same time
|
||||
"""
|
||||
for entry in os.scandir(self._path):
|
||||
url = path_to_url(entry.path)
|
||||
if _is_html_file(url):
|
||||
self._page_candidates.append(url)
|
||||
continue
|
||||
|
||||
# File must have a valid wheel or sdist name,
|
||||
# otherwise not worth considering as a package
|
||||
try:
|
||||
project_filename = parse_wheel_filename(entry.name)[0]
|
||||
except (InvalidWheelFilename, InvalidVersion):
|
||||
try:
|
||||
project_filename = parse_sdist_filename(entry.name)[0]
|
||||
except (InvalidSdistFilename, InvalidVersion):
|
||||
continue
|
||||
|
||||
self._project_name_to_urls[project_filename].append(url)
|
||||
self._scanned_directory = True
|
||||
|
||||
@property
|
||||
def page_candidates(self) -> List[str]:
|
||||
if not self._scanned_directory:
|
||||
self._scan_directory()
|
||||
|
||||
return self._page_candidates
|
||||
|
||||
@property
|
||||
def project_name_to_urls(self) -> Dict[str, List[str]]:
|
||||
if not self._scanned_directory:
|
||||
self._scan_directory()
|
||||
|
||||
return self._project_name_to_urls
|
||||
|
||||
|
||||
class _FlatDirectorySource(LinkSource):
|
||||
"""Link source specified by ``--find-links=<path-to-dir>``.
|
||||
|
||||
This looks the content of the directory, and returns:
|
||||
|
||||
* ``page_candidates``: Links listed on each HTML file in the directory.
|
||||
* ``file_candidates``: Archives in the directory.
|
||||
"""
|
||||
|
||||
_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
path: str,
|
||||
project_name: str,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._project_name = canonicalize_name(project_name)
|
||||
|
||||
# Get existing instance of _FlatDirectoryToUrls if it exists
|
||||
if path in self._paths_to_urls:
|
||||
self._path_to_urls = self._paths_to_urls[path]
|
||||
else:
|
||||
self._path_to_urls = _FlatDirectoryToUrls(path=path)
|
||||
self._paths_to_urls[path] = self._path_to_urls
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return None
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
for url in self._path_to_urls.page_candidates:
|
||||
yield from self._candidates_from_page(Link(url))
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
for url in self._path_to_urls.project_name_to_urls[self._project_name]:
|
||||
yield Link(url)
|
||||
|
||||
|
||||
class _LocalFileSource(LinkSource):
|
||||
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
|
||||
|
||||
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
|
||||
the option, it is converted to a URL first. This returns:
|
||||
|
||||
* ``page_candidates``: Links listed on an HTML file.
|
||||
* ``file_candidates``: The non-HTML file.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
if not _is_html_file(self._link.url):
|
||||
return
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
if _is_html_file(self._link.url):
|
||||
return
|
||||
yield self._link
|
||||
|
||||
|
||||
class _RemoteFileSource(LinkSource):
|
||||
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
|
||||
|
||||
This returns:
|
||||
|
||||
* ``page_candidates``: Links listed on an HTML file.
|
||||
* ``file_candidates``: The non-HTML file.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
page_validator: PageValidator,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._page_validator = page_validator
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
if not self._page_validator(self._link):
|
||||
return
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
yield self._link
|
||||
|
||||
|
||||
class _IndexDirectorySource(LinkSource):
|
||||
"""``--[extra-]index-url=<path-to-directory>``.
|
||||
|
||||
This is treated like a remote URL; ``candidates_from_page`` contains logic
|
||||
for this by appending ``index.html`` to the link.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
return ()
|
||||
|
||||
|
||||
def build_source(
|
||||
location: str,
|
||||
*,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
page_validator: PageValidator,
|
||||
expand_dir: bool,
|
||||
cache_link_parsing: bool,
|
||||
project_name: str,
|
||||
) -> Tuple[Optional[str], Optional[LinkSource]]:
|
||||
path: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
if os.path.exists(location): # Is a local path.
|
||||
url = path_to_url(location)
|
||||
path = location
|
||||
elif location.startswith("file:"): # A file: URL.
|
||||
url = location
|
||||
path = url_to_path(location)
|
||||
elif is_url(location):
|
||||
url = location
|
||||
|
||||
if url is None:
|
||||
msg = (
|
||||
"Location '%s' is ignored: "
|
||||
"it is either a non-existing path or lacks a specific scheme."
|
||||
)
|
||||
logger.warning(msg, location)
|
||||
return (None, None)
|
||||
|
||||
if path is None:
|
||||
source: LinkSource = _RemoteFileSource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=page_validator,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
|
||||
if os.path.isdir(path):
|
||||
if expand_dir:
|
||||
source = _FlatDirectorySource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
path=path,
|
||||
project_name=project_name,
|
||||
)
|
||||
else:
|
||||
source = _IndexDirectorySource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
elif os.path.isfile(path):
|
||||
source = _LocalFileSource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
logger.warning(
|
||||
"Location '%s' is ignored: it is neither a file nor a directory.",
|
||||
location,
|
||||
)
|
||||
return (url, None)
|
||||
|
|
@ -0,0 +1,467 @@
|
|||
import functools
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import sysconfig
|
||||
from typing import Any, Dict, Generator, Optional, Tuple
|
||||
|
||||
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
|
||||
from pip._internal.utils.compat import WINDOWS
|
||||
from pip._internal.utils.deprecation import deprecated
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
from . import _sysconfig
|
||||
from .base import (
|
||||
USER_CACHE_DIR,
|
||||
get_major_minor_version,
|
||||
get_src_prefix,
|
||||
is_osx_framework,
|
||||
site_packages,
|
||||
user_site,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"USER_CACHE_DIR",
|
||||
"get_bin_prefix",
|
||||
"get_bin_user",
|
||||
"get_major_minor_version",
|
||||
"get_platlib",
|
||||
"get_purelib",
|
||||
"get_scheme",
|
||||
"get_src_prefix",
|
||||
"site_packages",
|
||||
"user_site",
|
||||
]
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_PLATLIBDIR: str = getattr(sys, "platlibdir", "lib")
|
||||
|
||||
_USE_SYSCONFIG_DEFAULT = sys.version_info >= (3, 10)
|
||||
|
||||
|
||||
def _should_use_sysconfig() -> bool:
|
||||
"""This function determines the value of _USE_SYSCONFIG.
|
||||
|
||||
By default, pip uses sysconfig on Python 3.10+.
|
||||
But Python distributors can override this decision by setting:
|
||||
sysconfig._PIP_USE_SYSCONFIG = True / False
|
||||
Rationale in https://github.com/pypa/pip/issues/10647
|
||||
|
||||
This is a function for testability, but should be constant during any one
|
||||
run.
|
||||
"""
|
||||
return bool(getattr(sysconfig, "_PIP_USE_SYSCONFIG", _USE_SYSCONFIG_DEFAULT))
|
||||
|
||||
|
||||
_USE_SYSCONFIG = _should_use_sysconfig()
|
||||
|
||||
if not _USE_SYSCONFIG:
|
||||
# Import distutils lazily to avoid deprecation warnings,
|
||||
# but import it soon enough that it is in memory and available during
|
||||
# a pip reinstall.
|
||||
from . import _distutils
|
||||
|
||||
# Be noisy about incompatibilities if this platforms "should" be using
|
||||
# sysconfig, but is explicitly opting out and using distutils instead.
|
||||
if _USE_SYSCONFIG_DEFAULT and not _USE_SYSCONFIG:
|
||||
_MISMATCH_LEVEL = logging.WARNING
|
||||
else:
|
||||
_MISMATCH_LEVEL = logging.DEBUG
|
||||
|
||||
|
||||
def _looks_like_bpo_44860() -> bool:
|
||||
"""The resolution to bpo-44860 will change this incorrect platlib.
|
||||
|
||||
See <https://bugs.python.org/issue44860>.
|
||||
"""
|
||||
from distutils.command.install import INSTALL_SCHEMES
|
||||
|
||||
try:
|
||||
unix_user_platlib = INSTALL_SCHEMES["unix_user"]["platlib"]
|
||||
except KeyError:
|
||||
return False
|
||||
return unix_user_platlib == "$usersite"
|
||||
|
||||
|
||||
def _looks_like_red_hat_patched_platlib_purelib(scheme: Dict[str, str]) -> bool:
|
||||
platlib = scheme["platlib"]
|
||||
if "/$platlibdir/" in platlib:
|
||||
platlib = platlib.replace("/$platlibdir/", f"/{_PLATLIBDIR}/")
|
||||
if "/lib64/" not in platlib:
|
||||
return False
|
||||
unpatched = platlib.replace("/lib64/", "/lib/")
|
||||
return unpatched.replace("$platbase/", "$base/") == scheme["purelib"]
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _looks_like_red_hat_lib() -> bool:
|
||||
"""Red Hat patches platlib in unix_prefix and unix_home, but not purelib.
|
||||
|
||||
This is the only way I can see to tell a Red Hat-patched Python.
|
||||
"""
|
||||
from distutils.command.install import INSTALL_SCHEMES
|
||||
|
||||
return all(
|
||||
k in INSTALL_SCHEMES
|
||||
and _looks_like_red_hat_patched_platlib_purelib(INSTALL_SCHEMES[k])
|
||||
for k in ("unix_prefix", "unix_home")
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _looks_like_debian_scheme() -> bool:
|
||||
"""Debian adds two additional schemes."""
|
||||
from distutils.command.install import INSTALL_SCHEMES
|
||||
|
||||
return "deb_system" in INSTALL_SCHEMES and "unix_local" in INSTALL_SCHEMES
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _looks_like_red_hat_scheme() -> bool:
|
||||
"""Red Hat patches ``sys.prefix`` and ``sys.exec_prefix``.
|
||||
|
||||
Red Hat's ``00251-change-user-install-location.patch`` changes the install
|
||||
command's ``prefix`` and ``exec_prefix`` to append ``"/local"``. This is
|
||||
(fortunately?) done quite unconditionally, so we create a default command
|
||||
object without any configuration to detect this.
|
||||
"""
|
||||
from distutils.command.install import install
|
||||
from distutils.dist import Distribution
|
||||
|
||||
cmd: Any = install(Distribution())
|
||||
cmd.finalize_options()
|
||||
return (
|
||||
cmd.exec_prefix == f"{os.path.normpath(sys.exec_prefix)}/local"
|
||||
and cmd.prefix == f"{os.path.normpath(sys.prefix)}/local"
|
||||
)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _looks_like_slackware_scheme() -> bool:
|
||||
"""Slackware patches sysconfig but fails to patch distutils and site.
|
||||
|
||||
Slackware changes sysconfig's user scheme to use ``"lib64"`` for the lib
|
||||
path, but does not do the same to the site module.
|
||||
"""
|
||||
if user_site is None: # User-site not available.
|
||||
return False
|
||||
try:
|
||||
paths = sysconfig.get_paths(scheme="posix_user", expand=False)
|
||||
except KeyError: # User-site not available.
|
||||
return False
|
||||
return "/lib64/" in paths["purelib"] and "/lib64/" not in user_site
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _looks_like_msys2_mingw_scheme() -> bool:
|
||||
"""MSYS2 patches distutils and sysconfig to use a UNIX-like scheme.
|
||||
|
||||
However, MSYS2 incorrectly patches sysconfig ``nt`` scheme. The fix is
|
||||
likely going to be included in their 3.10 release, so we ignore the warning.
|
||||
See msys2/MINGW-packages#9319.
|
||||
|
||||
MSYS2 MINGW's patch uses lowercase ``"lib"`` instead of the usual uppercase,
|
||||
and is missing the final ``"site-packages"``.
|
||||
"""
|
||||
paths = sysconfig.get_paths("nt", expand=False)
|
||||
return all(
|
||||
"Lib" not in p and "lib" in p and not p.endswith("site-packages")
|
||||
for p in (paths[key] for key in ("platlib", "purelib"))
|
||||
)
|
||||
|
||||
|
||||
def _fix_abiflags(parts: Tuple[str]) -> Generator[str, None, None]:
|
||||
ldversion = sysconfig.get_config_var("LDVERSION")
|
||||
abiflags = getattr(sys, "abiflags", None)
|
||||
|
||||
# LDVERSION does not end with sys.abiflags. Just return the path unchanged.
|
||||
if not ldversion or not abiflags or not ldversion.endswith(abiflags):
|
||||
yield from parts
|
||||
return
|
||||
|
||||
# Strip sys.abiflags from LDVERSION-based path components.
|
||||
for part in parts:
|
||||
if part.endswith(ldversion):
|
||||
part = part[: (0 - len(abiflags))]
|
||||
yield part
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _warn_mismatched(old: pathlib.Path, new: pathlib.Path, *, key: str) -> None:
|
||||
issue_url = "https://github.com/pypa/pip/issues/10151"
|
||||
message = (
|
||||
"Value for %s does not match. Please report this to <%s>"
|
||||
"\ndistutils: %s"
|
||||
"\nsysconfig: %s"
|
||||
)
|
||||
logger.log(_MISMATCH_LEVEL, message, key, issue_url, old, new)
|
||||
|
||||
|
||||
def _warn_if_mismatch(old: pathlib.Path, new: pathlib.Path, *, key: str) -> bool:
|
||||
if old == new:
|
||||
return False
|
||||
_warn_mismatched(old, new, key=key)
|
||||
return True
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _log_context(
|
||||
*,
|
||||
user: bool = False,
|
||||
home: Optional[str] = None,
|
||||
root: Optional[str] = None,
|
||||
prefix: Optional[str] = None,
|
||||
) -> None:
|
||||
parts = [
|
||||
"Additional context:",
|
||||
"user = %r",
|
||||
"home = %r",
|
||||
"root = %r",
|
||||
"prefix = %r",
|
||||
]
|
||||
|
||||
logger.log(_MISMATCH_LEVEL, "\n".join(parts), user, home, root, prefix)
|
||||
|
||||
|
||||
def get_scheme(
|
||||
dist_name: str,
|
||||
user: bool = False,
|
||||
home: Optional[str] = None,
|
||||
root: Optional[str] = None,
|
||||
isolated: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
) -> Scheme:
|
||||
new = _sysconfig.get_scheme(
|
||||
dist_name,
|
||||
user=user,
|
||||
home=home,
|
||||
root=root,
|
||||
isolated=isolated,
|
||||
prefix=prefix,
|
||||
)
|
||||
if _USE_SYSCONFIG:
|
||||
return new
|
||||
|
||||
old = _distutils.get_scheme(
|
||||
dist_name,
|
||||
user=user,
|
||||
home=home,
|
||||
root=root,
|
||||
isolated=isolated,
|
||||
prefix=prefix,
|
||||
)
|
||||
|
||||
warning_contexts = []
|
||||
for k in SCHEME_KEYS:
|
||||
old_v = pathlib.Path(getattr(old, k))
|
||||
new_v = pathlib.Path(getattr(new, k))
|
||||
|
||||
if old_v == new_v:
|
||||
continue
|
||||
|
||||
# distutils incorrectly put PyPy packages under ``site-packages/python``
|
||||
# in the ``posix_home`` scheme, but PyPy devs said they expect the
|
||||
# directory name to be ``pypy`` instead. So we treat this as a bug fix
|
||||
# and not warn about it. See bpo-43307 and python/cpython#24628.
|
||||
skip_pypy_special_case = (
|
||||
sys.implementation.name == "pypy"
|
||||
and home is not None
|
||||
and k in ("platlib", "purelib")
|
||||
and old_v.parent == new_v.parent
|
||||
and old_v.name.startswith("python")
|
||||
and new_v.name.startswith("pypy")
|
||||
)
|
||||
if skip_pypy_special_case:
|
||||
continue
|
||||
|
||||
# sysconfig's ``osx_framework_user`` does not include ``pythonX.Y`` in
|
||||
# the ``include`` value, but distutils's ``headers`` does. We'll let
|
||||
# CPython decide whether this is a bug or feature. See bpo-43948.
|
||||
skip_osx_framework_user_special_case = (
|
||||
user
|
||||
and is_osx_framework()
|
||||
and k == "headers"
|
||||
and old_v.parent.parent == new_v.parent
|
||||
and old_v.parent.name.startswith("python")
|
||||
)
|
||||
if skip_osx_framework_user_special_case:
|
||||
continue
|
||||
|
||||
# On Red Hat and derived Linux distributions, distutils is patched to
|
||||
# use "lib64" instead of "lib" for platlib.
|
||||
if k == "platlib" and _looks_like_red_hat_lib():
|
||||
continue
|
||||
|
||||
# On Python 3.9+, sysconfig's posix_user scheme sets platlib against
|
||||
# sys.platlibdir, but distutils's unix_user incorrectly coninutes
|
||||
# using the same $usersite for both platlib and purelib. This creates a
|
||||
# mismatch when sys.platlibdir is not "lib".
|
||||
skip_bpo_44860 = (
|
||||
user
|
||||
and k == "platlib"
|
||||
and not WINDOWS
|
||||
and sys.version_info >= (3, 9)
|
||||
and _PLATLIBDIR != "lib"
|
||||
and _looks_like_bpo_44860()
|
||||
)
|
||||
if skip_bpo_44860:
|
||||
continue
|
||||
|
||||
# Slackware incorrectly patches posix_user to use lib64 instead of lib,
|
||||
# but not usersite to match the location.
|
||||
skip_slackware_user_scheme = (
|
||||
user
|
||||
and k in ("platlib", "purelib")
|
||||
and not WINDOWS
|
||||
and _looks_like_slackware_scheme()
|
||||
)
|
||||
if skip_slackware_user_scheme:
|
||||
continue
|
||||
|
||||
# Both Debian and Red Hat patch Python to place the system site under
|
||||
# /usr/local instead of /usr. Debian also places lib in dist-packages
|
||||
# instead of site-packages, but the /usr/local check should cover it.
|
||||
skip_linux_system_special_case = (
|
||||
not (user or home or prefix or running_under_virtualenv())
|
||||
and old_v.parts[1:3] == ("usr", "local")
|
||||
and len(new_v.parts) > 1
|
||||
and new_v.parts[1] == "usr"
|
||||
and (len(new_v.parts) < 3 or new_v.parts[2] != "local")
|
||||
and (_looks_like_red_hat_scheme() or _looks_like_debian_scheme())
|
||||
)
|
||||
if skip_linux_system_special_case:
|
||||
continue
|
||||
|
||||
# On Python 3.7 and earlier, sysconfig does not include sys.abiflags in
|
||||
# the "pythonX.Y" part of the path, but distutils does.
|
||||
skip_sysconfig_abiflag_bug = (
|
||||
sys.version_info < (3, 8)
|
||||
and not WINDOWS
|
||||
and k in ("headers", "platlib", "purelib")
|
||||
and tuple(_fix_abiflags(old_v.parts)) == new_v.parts
|
||||
)
|
||||
if skip_sysconfig_abiflag_bug:
|
||||
continue
|
||||
|
||||
# MSYS2 MINGW's sysconfig patch does not include the "site-packages"
|
||||
# part of the path. This is incorrect and will be fixed in MSYS.
|
||||
skip_msys2_mingw_bug = (
|
||||
WINDOWS and k in ("platlib", "purelib") and _looks_like_msys2_mingw_scheme()
|
||||
)
|
||||
if skip_msys2_mingw_bug:
|
||||
continue
|
||||
|
||||
# CPython's POSIX install script invokes pip (via ensurepip) against the
|
||||
# interpreter located in the source tree, not the install site. This
|
||||
# triggers special logic in sysconfig that's not present in distutils.
|
||||
# https://github.com/python/cpython/blob/8c21941ddaf/Lib/sysconfig.py#L178-L194
|
||||
skip_cpython_build = (
|
||||
sysconfig.is_python_build(check_home=True)
|
||||
and not WINDOWS
|
||||
and k in ("headers", "include", "platinclude")
|
||||
)
|
||||
if skip_cpython_build:
|
||||
continue
|
||||
|
||||
warning_contexts.append((old_v, new_v, f"scheme.{k}"))
|
||||
|
||||
if not warning_contexts:
|
||||
return old
|
||||
|
||||
# Check if this path mismatch is caused by distutils config files. Those
|
||||
# files will no longer work once we switch to sysconfig, so this raises a
|
||||
# deprecation message for them.
|
||||
default_old = _distutils.distutils_scheme(
|
||||
dist_name,
|
||||
user,
|
||||
home,
|
||||
root,
|
||||
isolated,
|
||||
prefix,
|
||||
ignore_config_files=True,
|
||||
)
|
||||
if any(default_old[k] != getattr(old, k) for k in SCHEME_KEYS):
|
||||
deprecated(
|
||||
reason=(
|
||||
"Configuring installation scheme with distutils config files "
|
||||
"is deprecated and will no longer work in the near future. If you "
|
||||
"are using a Homebrew or Linuxbrew Python, please see discussion "
|
||||
"at https://github.com/Homebrew/homebrew-core/issues/76621"
|
||||
),
|
||||
replacement=None,
|
||||
gone_in=None,
|
||||
)
|
||||
return old
|
||||
|
||||
# Post warnings about this mismatch so user can report them back.
|
||||
for old_v, new_v, key in warning_contexts:
|
||||
_warn_mismatched(old_v, new_v, key=key)
|
||||
_log_context(user=user, home=home, root=root, prefix=prefix)
|
||||
|
||||
return old
|
||||
|
||||
|
||||
def get_bin_prefix() -> str:
|
||||
new = _sysconfig.get_bin_prefix()
|
||||
if _USE_SYSCONFIG:
|
||||
return new
|
||||
|
||||
old = _distutils.get_bin_prefix()
|
||||
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="bin_prefix"):
|
||||
_log_context()
|
||||
return old
|
||||
|
||||
|
||||
def get_bin_user() -> str:
|
||||
return _sysconfig.get_scheme("", user=True).scripts
|
||||
|
||||
|
||||
def _looks_like_deb_system_dist_packages(value: str) -> bool:
|
||||
"""Check if the value is Debian's APT-controlled dist-packages.
|
||||
|
||||
Debian's ``distutils.sysconfig.get_python_lib()`` implementation returns the
|
||||
default package path controlled by APT, but does not patch ``sysconfig`` to
|
||||
do the same. This is similar to the bug worked around in ``get_scheme()``,
|
||||
but here the default is ``deb_system`` instead of ``unix_local``. Ultimately
|
||||
we can't do anything about this Debian bug, and this detection allows us to
|
||||
skip the warning when needed.
|
||||
"""
|
||||
if not _looks_like_debian_scheme():
|
||||
return False
|
||||
if value == "/usr/lib/python3/dist-packages":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_purelib() -> str:
|
||||
"""Return the default pure-Python lib location."""
|
||||
new = _sysconfig.get_purelib()
|
||||
if _USE_SYSCONFIG:
|
||||
return new
|
||||
|
||||
old = _distutils.get_purelib()
|
||||
if _looks_like_deb_system_dist_packages(old):
|
||||
return old
|
||||
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="purelib"):
|
||||
_log_context()
|
||||
return old
|
||||
|
||||
|
||||
def get_platlib() -> str:
|
||||
"""Return the default platform-shared lib location."""
|
||||
new = _sysconfig.get_platlib()
|
||||
if _USE_SYSCONFIG:
|
||||
return new
|
||||
|
||||
from . import _distutils
|
||||
|
||||
old = _distutils.get_platlib()
|
||||
if _looks_like_deb_system_dist_packages(old):
|
||||
return old
|
||||
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="platlib"):
|
||||
_log_context()
|
||||
return old
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
"""Locations where we look for configs, install stuff, etc"""
|
||||
|
||||
# The following comment should be removed at some point in the future.
|
||||
# mypy: strict-optional=False
|
||||
|
||||
# If pip's going to use distutils, it should not be using the copy that setuptools
|
||||
# might have injected into the environment. This is done by removing the injected
|
||||
# shim, if it's injected.
|
||||
#
|
||||
# See https://github.com/pypa/pip/issues/8761 for the original discussion and
|
||||
# rationale for why this is done within pip.
|
||||
try:
|
||||
__import__("_distutils_hack").remove_shim()
|
||||
except (ImportError, AttributeError):
|
||||
pass
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from distutils.cmd import Command as DistutilsCommand
|
||||
from distutils.command.install import SCHEME_KEYS
|
||||
from distutils.command.install import install as distutils_install_command
|
||||
from distutils.sysconfig import get_python_lib
|
||||
from typing import Dict, List, Optional, Union, cast
|
||||
|
||||
from pip._internal.models.scheme import Scheme
|
||||
from pip._internal.utils.compat import WINDOWS
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
from .base import get_major_minor_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def distutils_scheme(
|
||||
dist_name: str,
|
||||
user: bool = False,
|
||||
home: Optional[str] = None,
|
||||
root: Optional[str] = None,
|
||||
isolated: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
*,
|
||||
ignore_config_files: bool = False,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Return a distutils install scheme
|
||||
"""
|
||||
from distutils.dist import Distribution
|
||||
|
||||
dist_args: Dict[str, Union[str, List[str]]] = {"name": dist_name}
|
||||
if isolated:
|
||||
dist_args["script_args"] = ["--no-user-cfg"]
|
||||
|
||||
d = Distribution(dist_args)
|
||||
if not ignore_config_files:
|
||||
try:
|
||||
d.parse_config_files()
|
||||
except UnicodeDecodeError:
|
||||
paths = d.find_config_files()
|
||||
logger.warning(
|
||||
"Ignore distutils configs in %s due to encoding errors.",
|
||||
", ".join(os.path.basename(p) for p in paths),
|
||||
)
|
||||
obj: Optional[DistutilsCommand] = None
|
||||
obj = d.get_command_obj("install", create=True)
|
||||
assert obj is not None
|
||||
i = cast(distutils_install_command, obj)
|
||||
# NOTE: setting user or home has the side-effect of creating the home dir
|
||||
# or user base for installations during finalize_options()
|
||||
# ideally, we'd prefer a scheme class that has no side-effects.
|
||||
assert not (user and prefix), f"user={user} prefix={prefix}"
|
||||
assert not (home and prefix), f"home={home} prefix={prefix}"
|
||||
i.user = user or i.user
|
||||
if user or home:
|
||||
i.prefix = ""
|
||||
i.prefix = prefix or i.prefix
|
||||
i.home = home or i.home
|
||||
i.root = root or i.root
|
||||
i.finalize_options()
|
||||
|
||||
scheme = {}
|
||||
for key in SCHEME_KEYS:
|
||||
scheme[key] = getattr(i, "install_" + key)
|
||||
|
||||
# install_lib specified in setup.cfg should install *everything*
|
||||
# into there (i.e. it takes precedence over both purelib and
|
||||
# platlib). Note, i.install_lib is *always* set after
|
||||
# finalize_options(); we only want to override here if the user
|
||||
# has explicitly requested it hence going back to the config
|
||||
if "install_lib" in d.get_option_dict("install"):
|
||||
scheme.update({"purelib": i.install_lib, "platlib": i.install_lib})
|
||||
|
||||
if running_under_virtualenv():
|
||||
if home:
|
||||
prefix = home
|
||||
elif user:
|
||||
prefix = i.install_userbase
|
||||
else:
|
||||
prefix = i.prefix
|
||||
scheme["headers"] = os.path.join(
|
||||
prefix,
|
||||
"include",
|
||||
"site",
|
||||
f"python{get_major_minor_version()}",
|
||||
dist_name,
|
||||
)
|
||||
|
||||
if root is not None:
|
||||
path_no_drive = os.path.splitdrive(os.path.abspath(scheme["headers"]))[1]
|
||||
scheme["headers"] = os.path.join(root, path_no_drive[1:])
|
||||
|
||||
return scheme
|
||||
|
||||
|
||||
def get_scheme(
|
||||
dist_name: str,
|
||||
user: bool = False,
|
||||
home: Optional[str] = None,
|
||||
root: Optional[str] = None,
|
||||
isolated: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
) -> Scheme:
|
||||
"""
|
||||
Get the "scheme" corresponding to the input parameters. The distutils
|
||||
documentation provides the context for the available schemes:
|
||||
https://docs.python.org/3/install/index.html#alternate-installation
|
||||
|
||||
:param dist_name: the name of the package to retrieve the scheme for, used
|
||||
in the headers scheme path
|
||||
:param user: indicates to use the "user" scheme
|
||||
:param home: indicates to use the "home" scheme and provides the base
|
||||
directory for the same
|
||||
:param root: root under which other directories are re-based
|
||||
:param isolated: equivalent to --no-user-cfg, i.e. do not consider
|
||||
~/.pydistutils.cfg (posix) or ~/pydistutils.cfg (non-posix) for
|
||||
scheme paths
|
||||
:param prefix: indicates to use the "prefix" scheme and provides the
|
||||
base directory for the same
|
||||
"""
|
||||
scheme = distutils_scheme(dist_name, user, home, root, isolated, prefix)
|
||||
return Scheme(
|
||||
platlib=scheme["platlib"],
|
||||
purelib=scheme["purelib"],
|
||||
headers=scheme["headers"],
|
||||
scripts=scheme["scripts"],
|
||||
data=scheme["data"],
|
||||
)
|
||||
|
||||
|
||||
def get_bin_prefix() -> str:
|
||||
# XXX: In old virtualenv versions, sys.prefix can contain '..' components,
|
||||
# so we need to call normpath to eliminate them.
|
||||
prefix = os.path.normpath(sys.prefix)
|
||||
if WINDOWS:
|
||||
bin_py = os.path.join(prefix, "Scripts")
|
||||
# buildout uses 'bin' on Windows too?
|
||||
if not os.path.exists(bin_py):
|
||||
bin_py = os.path.join(prefix, "bin")
|
||||
return bin_py
|
||||
# Forcing to use /usr/local/bin for standard macOS framework installs
|
||||
# Also log to ~/Library/Logs/ for use with the Console.app log viewer
|
||||
if sys.platform[:6] == "darwin" and prefix[:16] == "/System/Library/":
|
||||
return "/usr/local/bin"
|
||||
return os.path.join(prefix, "bin")
|
||||
|
||||
|
||||
def get_purelib() -> str:
|
||||
return get_python_lib(plat_specific=False)
|
||||
|
||||
|
||||
def get_platlib() -> str:
|
||||
return get_python_lib(plat_specific=True)
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
import sysconfig
|
||||
import typing
|
||||
|
||||
from pip._internal.exceptions import InvalidSchemeCombination, UserInstallationInvalid
|
||||
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
from .base import change_root, get_major_minor_version, is_osx_framework
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Notes on _infer_* functions.
|
||||
# Unfortunately ``get_default_scheme()`` didn't exist before 3.10, so there's no
|
||||
# way to ask things like "what is the '_prefix' scheme on this platform". These
|
||||
# functions try to answer that with some heuristics while accounting for ad-hoc
|
||||
# platforms not covered by CPython's default sysconfig implementation. If the
|
||||
# ad-hoc implementation does not fully implement sysconfig, we'll fall back to
|
||||
# a POSIX scheme.
|
||||
|
||||
_AVAILABLE_SCHEMES = set(sysconfig.get_scheme_names())
|
||||
|
||||
_PREFERRED_SCHEME_API = getattr(sysconfig, "get_preferred_scheme", None)
|
||||
|
||||
|
||||
def _should_use_osx_framework_prefix() -> bool:
|
||||
"""Check for Apple's ``osx_framework_library`` scheme.
|
||||
|
||||
Python distributed by Apple's Command Line Tools has this special scheme
|
||||
that's used when:
|
||||
|
||||
* This is a framework build.
|
||||
* We are installing into the system prefix.
|
||||
|
||||
This does not account for ``pip install --prefix`` (also means we're not
|
||||
installing to the system prefix), which should use ``posix_prefix``, but
|
||||
logic here means ``_infer_prefix()`` outputs ``osx_framework_library``. But
|
||||
since ``prefix`` is not available for ``sysconfig.get_default_scheme()``,
|
||||
which is the stdlib replacement for ``_infer_prefix()``, presumably Apple
|
||||
wouldn't be able to magically switch between ``osx_framework_library`` and
|
||||
``posix_prefix``. ``_infer_prefix()`` returning ``osx_framework_library``
|
||||
means its behavior is consistent whether we use the stdlib implementation
|
||||
or our own, and we deal with this special case in ``get_scheme()`` instead.
|
||||
"""
|
||||
return (
|
||||
"osx_framework_library" in _AVAILABLE_SCHEMES
|
||||
and not running_under_virtualenv()
|
||||
and is_osx_framework()
|
||||
)
|
||||
|
||||
|
||||
def _infer_prefix() -> str:
|
||||
"""Try to find a prefix scheme for the current platform.
|
||||
|
||||
This tries:
|
||||
|
||||
* A special ``osx_framework_library`` for Python distributed by Apple's
|
||||
Command Line Tools, when not running in a virtual environment.
|
||||
* Implementation + OS, used by PyPy on Windows (``pypy_nt``).
|
||||
* Implementation without OS, used by PyPy on POSIX (``pypy``).
|
||||
* OS + "prefix", used by CPython on POSIX (``posix_prefix``).
|
||||
* Just the OS name, used by CPython on Windows (``nt``).
|
||||
|
||||
If none of the above works, fall back to ``posix_prefix``.
|
||||
"""
|
||||
if _PREFERRED_SCHEME_API:
|
||||
return _PREFERRED_SCHEME_API("prefix")
|
||||
if _should_use_osx_framework_prefix():
|
||||
return "osx_framework_library"
|
||||
implementation_suffixed = f"{sys.implementation.name}_{os.name}"
|
||||
if implementation_suffixed in _AVAILABLE_SCHEMES:
|
||||
return implementation_suffixed
|
||||
if sys.implementation.name in _AVAILABLE_SCHEMES:
|
||||
return sys.implementation.name
|
||||
suffixed = f"{os.name}_prefix"
|
||||
if suffixed in _AVAILABLE_SCHEMES:
|
||||
return suffixed
|
||||
if os.name in _AVAILABLE_SCHEMES: # On Windows, prefx is just called "nt".
|
||||
return os.name
|
||||
return "posix_prefix"
|
||||
|
||||
|
||||
def _infer_user() -> str:
|
||||
"""Try to find a user scheme for the current platform."""
|
||||
if _PREFERRED_SCHEME_API:
|
||||
return _PREFERRED_SCHEME_API("user")
|
||||
if is_osx_framework() and not running_under_virtualenv():
|
||||
suffixed = "osx_framework_user"
|
||||
else:
|
||||
suffixed = f"{os.name}_user"
|
||||
if suffixed in _AVAILABLE_SCHEMES:
|
||||
return suffixed
|
||||
if "posix_user" not in _AVAILABLE_SCHEMES: # User scheme unavailable.
|
||||
raise UserInstallationInvalid()
|
||||
return "posix_user"
|
||||
|
||||
|
||||
def _infer_home() -> str:
|
||||
"""Try to find a home for the current platform."""
|
||||
if _PREFERRED_SCHEME_API:
|
||||
return _PREFERRED_SCHEME_API("home")
|
||||
suffixed = f"{os.name}_home"
|
||||
if suffixed in _AVAILABLE_SCHEMES:
|
||||
return suffixed
|
||||
return "posix_home"
|
||||
|
||||
|
||||
# Update these keys if the user sets a custom home.
|
||||
_HOME_KEYS = [
|
||||
"installed_base",
|
||||
"base",
|
||||
"installed_platbase",
|
||||
"platbase",
|
||||
"prefix",
|
||||
"exec_prefix",
|
||||
]
|
||||
if sysconfig.get_config_var("userbase") is not None:
|
||||
_HOME_KEYS.append("userbase")
|
||||
|
||||
|
||||
def get_scheme(
|
||||
dist_name: str,
|
||||
user: bool = False,
|
||||
home: typing.Optional[str] = None,
|
||||
root: typing.Optional[str] = None,
|
||||
isolated: bool = False,
|
||||
prefix: typing.Optional[str] = None,
|
||||
) -> Scheme:
|
||||
"""
|
||||
Get the "scheme" corresponding to the input parameters.
|
||||
|
||||
:param dist_name: the name of the package to retrieve the scheme for, used
|
||||
in the headers scheme path
|
||||
:param user: indicates to use the "user" scheme
|
||||
:param home: indicates to use the "home" scheme
|
||||
:param root: root under which other directories are re-based
|
||||
:param isolated: ignored, but kept for distutils compatibility (where
|
||||
this controls whether the user-site pydistutils.cfg is honored)
|
||||
:param prefix: indicates to use the "prefix" scheme and provides the
|
||||
base directory for the same
|
||||
"""
|
||||
if user and prefix:
|
||||
raise InvalidSchemeCombination("--user", "--prefix")
|
||||
if home and prefix:
|
||||
raise InvalidSchemeCombination("--home", "--prefix")
|
||||
|
||||
if home is not None:
|
||||
scheme_name = _infer_home()
|
||||
elif user:
|
||||
scheme_name = _infer_user()
|
||||
else:
|
||||
scheme_name = _infer_prefix()
|
||||
|
||||
# Special case: When installing into a custom prefix, use posix_prefix
|
||||
# instead of osx_framework_library. See _should_use_osx_framework_prefix()
|
||||
# docstring for details.
|
||||
if prefix is not None and scheme_name == "osx_framework_library":
|
||||
scheme_name = "posix_prefix"
|
||||
|
||||
if home is not None:
|
||||
variables = {k: home for k in _HOME_KEYS}
|
||||
elif prefix is not None:
|
||||
variables = {k: prefix for k in _HOME_KEYS}
|
||||
else:
|
||||
variables = {}
|
||||
|
||||
paths = sysconfig.get_paths(scheme=scheme_name, vars=variables)
|
||||
|
||||
# Logic here is very arbitrary, we're doing it for compatibility, don't ask.
|
||||
# 1. Pip historically uses a special header path in virtual environments.
|
||||
# 2. If the distribution name is not known, distutils uses 'UNKNOWN'. We
|
||||
# only do the same when not running in a virtual environment because
|
||||
# pip's historical header path logic (see point 1) did not do this.
|
||||
if running_under_virtualenv():
|
||||
if user:
|
||||
base = variables.get("userbase", sys.prefix)
|
||||
else:
|
||||
base = variables.get("base", sys.prefix)
|
||||
python_xy = f"python{get_major_minor_version()}"
|
||||
paths["include"] = os.path.join(base, "include", "site", python_xy)
|
||||
elif not dist_name:
|
||||
dist_name = "UNKNOWN"
|
||||
|
||||
scheme = Scheme(
|
||||
platlib=paths["platlib"],
|
||||
purelib=paths["purelib"],
|
||||
headers=os.path.join(paths["include"], dist_name),
|
||||
scripts=paths["scripts"],
|
||||
data=paths["data"],
|
||||
)
|
||||
if root is not None:
|
||||
for key in SCHEME_KEYS:
|
||||
value = change_root(root, getattr(scheme, key))
|
||||
setattr(scheme, key, value)
|
||||
return scheme
|
||||
|
||||
|
||||
def get_bin_prefix() -> str:
|
||||
# Forcing to use /usr/local/bin for standard macOS framework installs.
|
||||
if sys.platform[:6] == "darwin" and sys.prefix[:16] == "/System/Library/":
|
||||
return "/usr/local/bin"
|
||||
return sysconfig.get_paths()["scripts"]
|
||||
|
||||
|
||||
def get_purelib() -> str:
|
||||
return sysconfig.get_paths()["purelib"]
|
||||
|
||||
|
||||
def get_platlib() -> str:
|
||||
return sysconfig.get_paths()["platlib"]
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
import functools
|
||||
import os
|
||||
import site
|
||||
import sys
|
||||
import sysconfig
|
||||
import typing
|
||||
|
||||
from pip._internal.exceptions import InstallationError
|
||||
from pip._internal.utils import appdirs
|
||||
from pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
# Application Directories
|
||||
USER_CACHE_DIR = appdirs.user_cache_dir("pip")
|
||||
|
||||
# FIXME doesn't account for venv linked to global site-packages
|
||||
site_packages: str = sysconfig.get_path("purelib")
|
||||
|
||||
|
||||
def get_major_minor_version() -> str:
|
||||
"""
|
||||
Return the major-minor version of the current Python as a string, e.g.
|
||||
"3.7" or "3.10".
|
||||
"""
|
||||
return "{}.{}".format(*sys.version_info)
|
||||
|
||||
|
||||
def change_root(new_root: str, pathname: str) -> str:
|
||||
"""Return 'pathname' with 'new_root' prepended.
|
||||
|
||||
If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname).
|
||||
Otherwise, it requires making 'pathname' relative and then joining the
|
||||
two, which is tricky on DOS/Windows and Mac OS.
|
||||
|
||||
This is borrowed from Python's standard library's distutils module.
|
||||
"""
|
||||
if os.name == "posix":
|
||||
if not os.path.isabs(pathname):
|
||||
return os.path.join(new_root, pathname)
|
||||
else:
|
||||
return os.path.join(new_root, pathname[1:])
|
||||
|
||||
elif os.name == "nt":
|
||||
(drive, path) = os.path.splitdrive(pathname)
|
||||
if path[0] == "\\":
|
||||
path = path[1:]
|
||||
return os.path.join(new_root, path)
|
||||
|
||||
else:
|
||||
raise InstallationError(
|
||||
f"Unknown platform: {os.name}\n"
|
||||
"Can not change root path prefix on unknown platform."
|
||||
)
|
||||
|
||||
|
||||
def get_src_prefix() -> str:
|
||||
if running_under_virtualenv():
|
||||
src_prefix = os.path.join(sys.prefix, "src")
|
||||
else:
|
||||
# FIXME: keep src in cwd for now (it is not a temporary folder)
|
||||
try:
|
||||
src_prefix = os.path.join(os.getcwd(), "src")
|
||||
except OSError:
|
||||
# In case the current working directory has been renamed or deleted
|
||||
sys.exit("The folder you are executing pip from can no longer be found.")
|
||||
|
||||
# under macOS + virtualenv sys.prefix is not properly resolved
|
||||
# it is something like /path/to/python/bin/..
|
||||
return os.path.abspath(src_prefix)
|
||||
|
||||
|
||||
try:
|
||||
# Use getusersitepackages if this is present, as it ensures that the
|
||||
# value is initialised properly.
|
||||
user_site: typing.Optional[str] = site.getusersitepackages()
|
||||
except AttributeError:
|
||||
user_site = site.USER_SITE
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def is_osx_framework() -> bool:
|
||||
return bool(sysconfig.get_config_var("PYTHONFRAMEWORK"))
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
from typing import List, Optional
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None) -> int:
|
||||
"""This is preserved for old console scripts that may still be referencing
|
||||
it.
|
||||
|
||||
For additional details, see https://github.com/pypa/pip/issues/7498.
|
||||
"""
|
||||
from pip._internal.utils.entrypoints import _wrapper
|
||||
|
||||
return _wrapper(args)
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
import contextlib
|
||||
import functools
|
||||
import os
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, List, Optional, Type, cast
|
||||
|
||||
from pip._internal.utils.misc import strtobool
|
||||
|
||||
from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Literal, Protocol
|
||||
else:
|
||||
Protocol = object
|
||||
|
||||
__all__ = [
|
||||
"BaseDistribution",
|
||||
"BaseEnvironment",
|
||||
"FilesystemWheel",
|
||||
"MemoryWheel",
|
||||
"Wheel",
|
||||
"get_default_environment",
|
||||
"get_environment",
|
||||
"get_wheel_distribution",
|
||||
"select_backend",
|
||||
]
|
||||
|
||||
|
||||
def _should_use_importlib_metadata() -> bool:
|
||||
"""Whether to use the ``importlib.metadata`` or ``pkg_resources`` backend.
|
||||
|
||||
By default, pip uses ``importlib.metadata`` on Python 3.11+, and
|
||||
``pkg_resourcess`` otherwise. This can be overridden by a couple of ways:
|
||||
|
||||
* If environment variable ``_PIP_USE_IMPORTLIB_METADATA`` is set, it
|
||||
dictates whether ``importlib.metadata`` is used, regardless of Python
|
||||
version.
|
||||
* On Python 3.11+, Python distributors can patch ``importlib.metadata``
|
||||
to add a global constant ``_PIP_USE_IMPORTLIB_METADATA = False``. This
|
||||
makes pip use ``pkg_resources`` (unless the user set the aforementioned
|
||||
environment variable to *True*).
|
||||
"""
|
||||
with contextlib.suppress(KeyError, ValueError):
|
||||
return bool(strtobool(os.environ["_PIP_USE_IMPORTLIB_METADATA"]))
|
||||
if sys.version_info < (3, 11):
|
||||
return False
|
||||
import importlib.metadata
|
||||
|
||||
return bool(getattr(importlib.metadata, "_PIP_USE_IMPORTLIB_METADATA", True))
|
||||
|
||||
|
||||
class Backend(Protocol):
|
||||
NAME: 'Literal["importlib", "pkg_resources"]'
|
||||
Distribution: Type[BaseDistribution]
|
||||
Environment: Type[BaseEnvironment]
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def select_backend() -> Backend:
|
||||
if _should_use_importlib_metadata():
|
||||
from . import importlib
|
||||
|
||||
return cast(Backend, importlib)
|
||||
from . import pkg_resources
|
||||
|
||||
return cast(Backend, pkg_resources)
|
||||
|
||||
|
||||
def get_default_environment() -> BaseEnvironment:
|
||||
"""Get the default representation for the current environment.
|
||||
|
||||
This returns an Environment instance from the chosen backend. The default
|
||||
Environment instance should be built from ``sys.path`` and may use caching
|
||||
to share instance state accorss calls.
|
||||
"""
|
||||
return select_backend().Environment.default()
|
||||
|
||||
|
||||
def get_environment(paths: Optional[List[str]]) -> BaseEnvironment:
|
||||
"""Get a representation of the environment specified by ``paths``.
|
||||
|
||||
This returns an Environment instance from the chosen backend based on the
|
||||
given import paths. The backend must build a fresh instance representing
|
||||
the state of installed distributions when this function is called.
|
||||
"""
|
||||
return select_backend().Environment.from_paths(paths)
|
||||
|
||||
|
||||
def get_directory_distribution(directory: str) -> BaseDistribution:
|
||||
"""Get the distribution metadata representation in the specified directory.
|
||||
|
||||
This returns a Distribution instance from the chosen backend based on
|
||||
the given on-disk ``.dist-info`` directory.
|
||||
"""
|
||||
return select_backend().Distribution.from_directory(directory)
|
||||
|
||||
|
||||
def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistribution:
|
||||
"""Get the representation of the specified wheel's distribution metadata.
|
||||
|
||||
This returns a Distribution instance from the chosen backend based on
|
||||
the given wheel's ``.dist-info`` directory.
|
||||
|
||||
:param canonical_name: Normalized project name of the given wheel.
|
||||
"""
|
||||
return select_backend().Distribution.from_wheel(wheel, canonical_name)
|
||||
|
||||
|
||||
def get_metadata_distribution(
|
||||
metadata_contents: bytes,
|
||||
filename: str,
|
||||
canonical_name: str,
|
||||
) -> BaseDistribution:
|
||||
"""Get the dist representation of the specified METADATA file contents.
|
||||
|
||||
This returns a Distribution instance from the chosen backend sourced from the data
|
||||
in `metadata_contents`.
|
||||
|
||||
:param metadata_contents: Contents of a METADATA file within a dist, or one served
|
||||
via PEP 658.
|
||||
:param filename: Filename for the dist this metadata represents.
|
||||
:param canonical_name: Normalized project name of the given dist.
|
||||
"""
|
||||
return select_backend().Distribution.from_metadata_file_contents(
|
||||
metadata_contents,
|
||||
filename,
|
||||
canonical_name,
|
||||
)
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
# Extracted from https://github.com/pfmoore/pkg_metadata
|
||||
|
||||
from email.header import Header, decode_header, make_header
|
||||
from email.message import Message
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
METADATA_FIELDS = [
|
||||
# Name, Multiple-Use
|
||||
("Metadata-Version", False),
|
||||
("Name", False),
|
||||
("Version", False),
|
||||
("Dynamic", True),
|
||||
("Platform", True),
|
||||
("Supported-Platform", True),
|
||||
("Summary", False),
|
||||
("Description", False),
|
||||
("Description-Content-Type", False),
|
||||
("Keywords", False),
|
||||
("Home-page", False),
|
||||
("Download-URL", False),
|
||||
("Author", False),
|
||||
("Author-email", False),
|
||||
("Maintainer", False),
|
||||
("Maintainer-email", False),
|
||||
("License", False),
|
||||
("Classifier", True),
|
||||
("Requires-Dist", True),
|
||||
("Requires-Python", False),
|
||||
("Requires-External", True),
|
||||
("Project-URL", True),
|
||||
("Provides-Extra", True),
|
||||
("Provides-Dist", True),
|
||||
("Obsoletes-Dist", True),
|
||||
]
|
||||
|
||||
|
||||
def json_name(field: str) -> str:
|
||||
return field.lower().replace("-", "_")
|
||||
|
||||
|
||||
def msg_to_json(msg: Message) -> Dict[str, Any]:
|
||||
"""Convert a Message object into a JSON-compatible dictionary."""
|
||||
|
||||
def sanitise_header(h: Union[Header, str]) -> str:
|
||||
if isinstance(h, Header):
|
||||
chunks = []
|
||||
for bytes, encoding in decode_header(h):
|
||||
if encoding == "unknown-8bit":
|
||||
try:
|
||||
# See if UTF-8 works
|
||||
bytes.decode("utf-8")
|
||||
encoding = "utf-8"
|
||||
except UnicodeDecodeError:
|
||||
# If not, latin1 at least won't fail
|
||||
encoding = "latin1"
|
||||
chunks.append((bytes, encoding))
|
||||
return str(make_header(chunks))
|
||||
return str(h)
|
||||
|
||||
result = {}
|
||||
for field, multi in METADATA_FIELDS:
|
||||
if field not in msg:
|
||||
continue
|
||||
key = json_name(field)
|
||||
if multi:
|
||||
value: Union[str, List[str]] = [
|
||||
sanitise_header(v) for v in msg.get_all(field) # type: ignore
|
||||
]
|
||||
else:
|
||||
value = sanitise_header(msg.get(field)) # type: ignore
|
||||
if key == "keywords":
|
||||
# Accept both comma-separated and space-separated
|
||||
# forms, for better compatibility with old data.
|
||||
if "," in value:
|
||||
value = [v.strip() for v in value.split(",")]
|
||||
else:
|
||||
value = value.split()
|
||||
result[key] = value
|
||||
|
||||
payload = msg.get_payload()
|
||||
if payload:
|
||||
result["description"] = payload
|
||||
|
||||
return result
|
||||
|
|
@ -0,0 +1,702 @@
|
|||
import csv
|
||||
import email.message
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import pathlib
|
||||
import re
|
||||
import zipfile
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Collection,
|
||||
Container,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
NamedTuple,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pip._vendor.packaging.requirements import Requirement
|
||||
from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet
|
||||
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
|
||||
from pip._vendor.packaging.version import LegacyVersion, Version
|
||||
|
||||
from pip._internal.exceptions import NoneMetadataError
|
||||
from pip._internal.locations import site_packages, user_site
|
||||
from pip._internal.models.direct_url import (
|
||||
DIRECT_URL_METADATA_NAME,
|
||||
DirectUrl,
|
||||
DirectUrlValidationError,
|
||||
)
|
||||
from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
|
||||
from pip._internal.utils.egg_link import egg_link_path_from_sys_path
|
||||
from pip._internal.utils.misc import is_local, normalize_path
|
||||
from pip._internal.utils.urls import url_to_path
|
||||
|
||||
from ._json import msg_to_json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Protocol
|
||||
else:
|
||||
Protocol = object
|
||||
|
||||
DistributionVersion = Union[LegacyVersion, Version]
|
||||
|
||||
InfoPath = Union[str, pathlib.PurePath]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseEntryPoint(Protocol):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def value(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def group(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _convert_installed_files_path(
|
||||
entry: Tuple[str, ...],
|
||||
info: Tuple[str, ...],
|
||||
) -> str:
|
||||
"""Convert a legacy installed-files.txt path into modern RECORD path.
|
||||
|
||||
The legacy format stores paths relative to the info directory, while the
|
||||
modern format stores paths relative to the package root, e.g. the
|
||||
site-packages directory.
|
||||
|
||||
:param entry: Path parts of the installed-files.txt entry.
|
||||
:param info: Path parts of the egg-info directory relative to package root.
|
||||
:returns: The converted entry.
|
||||
|
||||
For best compatibility with symlinks, this does not use ``abspath()`` or
|
||||
``Path.resolve()``, but tries to work with path parts:
|
||||
|
||||
1. While ``entry`` starts with ``..``, remove the equal amounts of parts
|
||||
from ``info``; if ``info`` is empty, start appending ``..`` instead.
|
||||
2. Join the two directly.
|
||||
"""
|
||||
while entry and entry[0] == "..":
|
||||
if not info or info[-1] == "..":
|
||||
info += ("..",)
|
||||
else:
|
||||
info = info[:-1]
|
||||
entry = entry[1:]
|
||||
return str(pathlib.Path(*info, *entry))
|
||||
|
||||
|
||||
class RequiresEntry(NamedTuple):
|
||||
requirement: str
|
||||
extra: str
|
||||
marker: str
|
||||
|
||||
|
||||
class BaseDistribution(Protocol):
|
||||
@classmethod
|
||||
def from_directory(cls, directory: str) -> "BaseDistribution":
|
||||
"""Load the distribution from a metadata directory.
|
||||
|
||||
:param directory: Path to a metadata directory, e.g. ``.dist-info``.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def from_metadata_file_contents(
|
||||
cls,
|
||||
metadata_contents: bytes,
|
||||
filename: str,
|
||||
project_name: str,
|
||||
) -> "BaseDistribution":
|
||||
"""Load the distribution from the contents of a METADATA file.
|
||||
|
||||
This is used to implement PEP 658 by generating a "shallow" dist object that can
|
||||
be used for resolution without downloading or building the actual dist yet.
|
||||
|
||||
:param metadata_contents: The contents of a METADATA file.
|
||||
:param filename: File name for the dist with this metadata.
|
||||
:param project_name: Name of the project this dist represents.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
|
||||
"""Load the distribution from a given wheel.
|
||||
|
||||
:param wheel: A concrete wheel definition.
|
||||
:param name: File name of the wheel.
|
||||
|
||||
:raises InvalidWheel: Whenever loading of the wheel causes a
|
||||
:py:exc:`zipfile.BadZipFile` exception to be thrown.
|
||||
:raises UnsupportedWheel: If the wheel is a valid zip, but malformed
|
||||
internally.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.raw_name} {self.version} ({self.location})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.raw_name} {self.version}"
|
||||
|
||||
@property
|
||||
def location(self) -> Optional[str]:
|
||||
"""Where the distribution is loaded from.
|
||||
|
||||
A string value is not necessarily a filesystem path, since distributions
|
||||
can be loaded from other sources, e.g. arbitrary zip archives. ``None``
|
||||
means the distribution is created in-memory.
|
||||
|
||||
Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
|
||||
this is a symbolic link, we want to preserve the relative path between
|
||||
it and files in the distribution.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def editable_project_location(self) -> Optional[str]:
|
||||
"""The project location for editable distributions.
|
||||
|
||||
This is the directory where pyproject.toml or setup.py is located.
|
||||
None if the distribution is not installed in editable mode.
|
||||
"""
|
||||
# TODO: this property is relatively costly to compute, memoize it ?
|
||||
direct_url = self.direct_url
|
||||
if direct_url:
|
||||
if direct_url.is_local_editable():
|
||||
return url_to_path(direct_url.url)
|
||||
else:
|
||||
# Search for an .egg-link file by walking sys.path, as it was
|
||||
# done before by dist_is_editable().
|
||||
egg_link_path = egg_link_path_from_sys_path(self.raw_name)
|
||||
if egg_link_path:
|
||||
# TODO: get project location from second line of egg_link file
|
||||
# (https://github.com/pypa/pip/issues/10243)
|
||||
return self.location
|
||||
return None
|
||||
|
||||
@property
|
||||
def installed_location(self) -> Optional[str]:
|
||||
"""The distribution's "installed" location.
|
||||
|
||||
This should generally be a ``site-packages`` directory. This is
|
||||
usually ``dist.location``, except for legacy develop-installed packages,
|
||||
where ``dist.location`` is the source code location, and this is where
|
||||
the ``.egg-link`` file is.
|
||||
|
||||
The returned location is normalized (in particular, with symlinks removed).
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def info_location(self) -> Optional[str]:
|
||||
"""Location of the .[egg|dist]-info directory or file.
|
||||
|
||||
Similarly to ``location``, a string value is not necessarily a
|
||||
filesystem path. ``None`` means the distribution is created in-memory.
|
||||
|
||||
For a modern .dist-info installation on disk, this should be something
|
||||
like ``{location}/{raw_name}-{version}.dist-info``.
|
||||
|
||||
Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
|
||||
this is a symbolic link, we want to preserve the relative path between
|
||||
it and other files in the distribution.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def installed_by_distutils(self) -> bool:
|
||||
"""Whether this distribution is installed with legacy distutils format.
|
||||
|
||||
A distribution installed with "raw" distutils not patched by setuptools
|
||||
uses one single file at ``info_location`` to store metadata. We need to
|
||||
treat this specially on uninstallation.
|
||||
"""
|
||||
info_location = self.info_location
|
||||
if not info_location:
|
||||
return False
|
||||
return pathlib.Path(info_location).is_file()
|
||||
|
||||
@property
|
||||
def installed_as_egg(self) -> bool:
|
||||
"""Whether this distribution is installed as an egg.
|
||||
|
||||
This usually indicates the distribution was installed by (older versions
|
||||
of) easy_install.
|
||||
"""
|
||||
location = self.location
|
||||
if not location:
|
||||
return False
|
||||
return location.endswith(".egg")
|
||||
|
||||
@property
|
||||
def installed_with_setuptools_egg_info(self) -> bool:
|
||||
"""Whether this distribution is installed with the ``.egg-info`` format.
|
||||
|
||||
This usually indicates the distribution was installed with setuptools
|
||||
with an old pip version or with ``single-version-externally-managed``.
|
||||
|
||||
Note that this ensure the metadata store is a directory. distutils can
|
||||
also installs an ``.egg-info``, but as a file, not a directory. This
|
||||
property is *False* for that case. Also see ``installed_by_distutils``.
|
||||
"""
|
||||
info_location = self.info_location
|
||||
if not info_location:
|
||||
return False
|
||||
if not info_location.endswith(".egg-info"):
|
||||
return False
|
||||
return pathlib.Path(info_location).is_dir()
|
||||
|
||||
@property
|
||||
def installed_with_dist_info(self) -> bool:
|
||||
"""Whether this distribution is installed with the "modern format".
|
||||
|
||||
This indicates a "modern" installation, e.g. storing metadata in the
|
||||
``.dist-info`` directory. This applies to installations made by
|
||||
setuptools (but through pip, not directly), or anything using the
|
||||
standardized build backend interface (PEP 517).
|
||||
"""
|
||||
info_location = self.info_location
|
||||
if not info_location:
|
||||
return False
|
||||
if not info_location.endswith(".dist-info"):
|
||||
return False
|
||||
return pathlib.Path(info_location).is_dir()
|
||||
|
||||
@property
|
||||
def canonical_name(self) -> NormalizedName:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def version(self) -> DistributionVersion:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def setuptools_filename(self) -> str:
|
||||
"""Convert a project name to its setuptools-compatible filename.
|
||||
|
||||
This is a copy of ``pkg_resources.to_filename()`` for compatibility.
|
||||
"""
|
||||
return self.raw_name.replace("-", "_")
|
||||
|
||||
@property
|
||||
def direct_url(self) -> Optional[DirectUrl]:
|
||||
"""Obtain a DirectUrl from this distribution.
|
||||
|
||||
Returns None if the distribution has no `direct_url.json` metadata,
|
||||
or if `direct_url.json` is invalid.
|
||||
"""
|
||||
try:
|
||||
content = self.read_text(DIRECT_URL_METADATA_NAME)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
try:
|
||||
return DirectUrl.from_json(content)
|
||||
except (
|
||||
UnicodeDecodeError,
|
||||
json.JSONDecodeError,
|
||||
DirectUrlValidationError,
|
||||
) as e:
|
||||
logger.warning(
|
||||
"Error parsing %s for %s: %s",
|
||||
DIRECT_URL_METADATA_NAME,
|
||||
self.canonical_name,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
@property
|
||||
def installer(self) -> str:
|
||||
try:
|
||||
installer_text = self.read_text("INSTALLER")
|
||||
except (OSError, ValueError, NoneMetadataError):
|
||||
return "" # Fail silently if the installer file cannot be read.
|
||||
for line in installer_text.splitlines():
|
||||
cleaned_line = line.strip()
|
||||
if cleaned_line:
|
||||
return cleaned_line
|
||||
return ""
|
||||
|
||||
@property
|
||||
def requested(self) -> bool:
|
||||
return self.is_file("REQUESTED")
|
||||
|
||||
@property
|
||||
def editable(self) -> bool:
|
||||
return bool(self.editable_project_location)
|
||||
|
||||
@property
|
||||
def local(self) -> bool:
|
||||
"""If distribution is installed in the current virtual environment.
|
||||
|
||||
Always True if we're not in a virtualenv.
|
||||
"""
|
||||
if self.installed_location is None:
|
||||
return False
|
||||
return is_local(self.installed_location)
|
||||
|
||||
@property
|
||||
def in_usersite(self) -> bool:
|
||||
if self.installed_location is None or user_site is None:
|
||||
return False
|
||||
return self.installed_location.startswith(normalize_path(user_site))
|
||||
|
||||
@property
|
||||
def in_site_packages(self) -> bool:
|
||||
if self.installed_location is None or site_packages is None:
|
||||
return False
|
||||
return self.installed_location.startswith(normalize_path(site_packages))
|
||||
|
||||
def is_file(self, path: InfoPath) -> bool:
|
||||
"""Check whether an entry in the info directory is a file."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def iter_distutils_script_names(self) -> Iterator[str]:
|
||||
"""Find distutils 'scripts' entries metadata.
|
||||
|
||||
If 'scripts' is supplied in ``setup.py``, distutils records those in the
|
||||
installed distribution's ``scripts`` directory, a file for each script.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def read_text(self, path: InfoPath) -> str:
|
||||
"""Read a file in the info directory.
|
||||
|
||||
:raise FileNotFoundError: If ``path`` does not exist in the directory.
|
||||
:raise NoneMetadataError: If ``path`` exists in the info directory, but
|
||||
cannot be read.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def _metadata_impl(self) -> email.message.Message:
|
||||
raise NotImplementedError()
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _metadata_cached(self) -> email.message.Message:
|
||||
# When we drop python 3.7 support, move this to the metadata property and use
|
||||
# functools.cached_property instead of lru_cache.
|
||||
metadata = self._metadata_impl()
|
||||
self._add_egg_info_requires(metadata)
|
||||
return metadata
|
||||
|
||||
@property
|
||||
def metadata(self) -> email.message.Message:
|
||||
"""Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
|
||||
|
||||
This should return an empty message if the metadata file is unavailable.
|
||||
|
||||
:raises NoneMetadataError: If the metadata file is available, but does
|
||||
not contain valid metadata.
|
||||
"""
|
||||
return self._metadata_cached()
|
||||
|
||||
@property
|
||||
def metadata_dict(self) -> Dict[str, Any]:
|
||||
"""PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO.
|
||||
|
||||
This should return an empty dict if the metadata file is unavailable.
|
||||
|
||||
:raises NoneMetadataError: If the metadata file is available, but does
|
||||
not contain valid metadata.
|
||||
"""
|
||||
return msg_to_json(self.metadata)
|
||||
|
||||
@property
|
||||
def metadata_version(self) -> Optional[str]:
|
||||
"""Value of "Metadata-Version:" in distribution metadata, if available."""
|
||||
return self.metadata.get("Metadata-Version")
|
||||
|
||||
@property
|
||||
def raw_name(self) -> str:
|
||||
"""Value of "Name:" in distribution metadata."""
|
||||
# The metadata should NEVER be missing the Name: key, but if it somehow
|
||||
# does, fall back to the known canonical name.
|
||||
return self.metadata.get("Name", self.canonical_name)
|
||||
|
||||
@property
|
||||
def requires_python(self) -> SpecifierSet:
|
||||
"""Value of "Requires-Python:" in distribution metadata.
|
||||
|
||||
If the key does not exist or contains an invalid value, an empty
|
||||
SpecifierSet should be returned.
|
||||
"""
|
||||
value = self.metadata.get("Requires-Python")
|
||||
if value is None:
|
||||
return SpecifierSet()
|
||||
try:
|
||||
# Convert to str to satisfy the type checker; this can be a Header object.
|
||||
spec = SpecifierSet(str(value))
|
||||
except InvalidSpecifier as e:
|
||||
message = "Package %r has an invalid Requires-Python: %s"
|
||||
logger.warning(message, self.raw_name, e)
|
||||
return SpecifierSet()
|
||||
return spec
|
||||
|
||||
def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
|
||||
"""Dependencies of this distribution.
|
||||
|
||||
For modern .dist-info distributions, this is the collection of
|
||||
"Requires-Dist:" entries in distribution metadata.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def iter_provided_extras(self) -> Iterable[str]:
|
||||
"""Extras provided by this distribution.
|
||||
|
||||
For modern .dist-info distributions, this is the collection of
|
||||
"Provides-Extra:" entries in distribution metadata.
|
||||
|
||||
The return value of this function is not particularly useful other than
|
||||
display purposes due to backward compatibility issues and the extra
|
||||
names being poorly normalized prior to PEP 685. If you want to perform
|
||||
logic operations on extras, use :func:`is_extra_provided` instead.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_extra_provided(self, extra: str) -> bool:
|
||||
"""Check whether an extra is provided by this distribution.
|
||||
|
||||
This is needed mostly for compatibility issues with pkg_resources not
|
||||
following the extra normalization rules defined in PEP 685.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _iter_declared_entries_from_record(self) -> Optional[Iterator[str]]:
|
||||
try:
|
||||
text = self.read_text("RECORD")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
# This extra Path-str cast normalizes entries.
|
||||
return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines()))
|
||||
|
||||
def _iter_declared_entries_from_legacy(self) -> Optional[Iterator[str]]:
|
||||
try:
|
||||
text = self.read_text("installed-files.txt")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
paths = (p for p in text.splitlines(keepends=False) if p)
|
||||
root = self.location
|
||||
info = self.info_location
|
||||
if root is None or info is None:
|
||||
return paths
|
||||
try:
|
||||
info_rel = pathlib.Path(info).relative_to(root)
|
||||
except ValueError: # info is not relative to root.
|
||||
return paths
|
||||
if not info_rel.parts: # info *is* root.
|
||||
return paths
|
||||
return (
|
||||
_convert_installed_files_path(pathlib.Path(p).parts, info_rel.parts)
|
||||
for p in paths
|
||||
)
|
||||
|
||||
def iter_declared_entries(self) -> Optional[Iterator[str]]:
|
||||
"""Iterate through file entries declared in this distribution.
|
||||
|
||||
For modern .dist-info distributions, this is the files listed in the
|
||||
``RECORD`` metadata file. For legacy setuptools distributions, this
|
||||
comes from ``installed-files.txt``, with entries normalized to be
|
||||
compatible with the format used by ``RECORD``.
|
||||
|
||||
:return: An iterator for listed entries, or None if the distribution
|
||||
contains neither ``RECORD`` nor ``installed-files.txt``.
|
||||
"""
|
||||
return (
|
||||
self._iter_declared_entries_from_record()
|
||||
or self._iter_declared_entries_from_legacy()
|
||||
)
|
||||
|
||||
def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
|
||||
"""Parse a ``requires.txt`` in an egg-info directory.
|
||||
|
||||
This is an INI-ish format where an egg-info stores dependencies. A
|
||||
section name describes extra other environment markers, while each entry
|
||||
is an arbitrary string (not a key-value pair) representing a dependency
|
||||
as a requirement string (no markers).
|
||||
|
||||
There is a construct in ``importlib.metadata`` called ``Sectioned`` that
|
||||
does mostly the same, but the format is currently considered private.
|
||||
"""
|
||||
try:
|
||||
content = self.read_text("requires.txt")
|
||||
except FileNotFoundError:
|
||||
return
|
||||
extra = marker = "" # Section-less entries don't have markers.
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"): # Comment; ignored.
|
||||
continue
|
||||
if line.startswith("[") and line.endswith("]"): # A section header.
|
||||
extra, _, marker = line.strip("[]").partition(":")
|
||||
continue
|
||||
yield RequiresEntry(requirement=line, extra=extra, marker=marker)
|
||||
|
||||
def _iter_egg_info_extras(self) -> Iterable[str]:
|
||||
"""Get extras from the egg-info directory."""
|
||||
known_extras = {""}
|
||||
for entry in self._iter_requires_txt_entries():
|
||||
extra = canonicalize_name(entry.extra)
|
||||
if extra in known_extras:
|
||||
continue
|
||||
known_extras.add(extra)
|
||||
yield extra
|
||||
|
||||
def _iter_egg_info_dependencies(self) -> Iterable[str]:
|
||||
"""Get distribution dependencies from the egg-info directory.
|
||||
|
||||
To ease parsing, this converts a legacy dependency entry into a PEP 508
|
||||
requirement string. Like ``_iter_requires_txt_entries()``, there is code
|
||||
in ``importlib.metadata`` that does mostly the same, but not do exactly
|
||||
what we need.
|
||||
|
||||
Namely, ``importlib.metadata`` does not normalize the extra name before
|
||||
putting it into the requirement string, which causes marker comparison
|
||||
to fail because the dist-info format do normalize. This is consistent in
|
||||
all currently available PEP 517 backends, although not standardized.
|
||||
"""
|
||||
for entry in self._iter_requires_txt_entries():
|
||||
extra = canonicalize_name(entry.extra)
|
||||
if extra and entry.marker:
|
||||
marker = f'({entry.marker}) and extra == "{extra}"'
|
||||
elif extra:
|
||||
marker = f'extra == "{extra}"'
|
||||
elif entry.marker:
|
||||
marker = entry.marker
|
||||
else:
|
||||
marker = ""
|
||||
if marker:
|
||||
yield f"{entry.requirement} ; {marker}"
|
||||
else:
|
||||
yield entry.requirement
|
||||
|
||||
def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
|
||||
"""Add egg-info requires.txt information to the metadata."""
|
||||
if not metadata.get_all("Requires-Dist"):
|
||||
for dep in self._iter_egg_info_dependencies():
|
||||
metadata["Requires-Dist"] = dep
|
||||
if not metadata.get_all("Provides-Extra"):
|
||||
for extra in self._iter_egg_info_extras():
|
||||
metadata["Provides-Extra"] = extra
|
||||
|
||||
|
||||
class BaseEnvironment:
|
||||
"""An environment containing distributions to introspect."""
|
||||
|
||||
@classmethod
|
||||
def default(cls) -> "BaseEnvironment":
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def from_paths(cls, paths: Optional[List[str]]) -> "BaseEnvironment":
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_distribution(self, name: str) -> Optional["BaseDistribution"]:
|
||||
"""Given a requirement name, return the installed distributions.
|
||||
|
||||
The name may not be normalized. The implementation must canonicalize
|
||||
it for lookup.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _iter_distributions(self) -> Iterator["BaseDistribution"]:
|
||||
"""Iterate through installed distributions.
|
||||
|
||||
This function should be implemented by subclass, but never called
|
||||
directly. Use the public ``iter_distribution()`` instead, which
|
||||
implements additional logic to make sure the distributions are valid.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def iter_all_distributions(self) -> Iterator[BaseDistribution]:
|
||||
"""Iterate through all installed distributions without any filtering."""
|
||||
for dist in self._iter_distributions():
|
||||
# Make sure the distribution actually comes from a valid Python
|
||||
# packaging distribution. Pip's AdjacentTempDirectory leaves folders
|
||||
# e.g. ``~atplotlib.dist-info`` if cleanup was interrupted. The
|
||||
# valid project name pattern is taken from PEP 508.
|
||||
project_name_valid = re.match(
|
||||
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$",
|
||||
dist.canonical_name,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if not project_name_valid:
|
||||
logger.warning(
|
||||
"Ignoring invalid distribution %s (%s)",
|
||||
dist.canonical_name,
|
||||
dist.location,
|
||||
)
|
||||
continue
|
||||
yield dist
|
||||
|
||||
def iter_installed_distributions(
|
||||
self,
|
||||
local_only: bool = True,
|
||||
skip: Container[str] = stdlib_pkgs,
|
||||
include_editables: bool = True,
|
||||
editables_only: bool = False,
|
||||
user_only: bool = False,
|
||||
) -> Iterator[BaseDistribution]:
|
||||
"""Return a list of installed distributions.
|
||||
|
||||
This is based on ``iter_all_distributions()`` with additional filtering
|
||||
options. Note that ``iter_installed_distributions()`` without arguments
|
||||
is *not* equal to ``iter_all_distributions()``, since some of the
|
||||
configurations exclude packages by default.
|
||||
|
||||
:param local_only: If True (default), only return installations
|
||||
local to the current virtualenv, if in a virtualenv.
|
||||
:param skip: An iterable of canonicalized project names to ignore;
|
||||
defaults to ``stdlib_pkgs``.
|
||||
:param include_editables: If False, don't report editables.
|
||||
:param editables_only: If True, only report editables.
|
||||
:param user_only: If True, only report installations in the user
|
||||
site directory.
|
||||
"""
|
||||
it = self.iter_all_distributions()
|
||||
if local_only:
|
||||
it = (d for d in it if d.local)
|
||||
if not include_editables:
|
||||
it = (d for d in it if not d.editable)
|
||||
if editables_only:
|
||||
it = (d for d in it if d.editable)
|
||||
if user_only:
|
||||
it = (d for d in it if d.in_usersite)
|
||||
return (d for d in it if d.canonical_name not in skip)
|
||||
|
||||
|
||||
class Wheel(Protocol):
|
||||
location: str
|
||||
|
||||
def as_zipfile(self) -> zipfile.ZipFile:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class FilesystemWheel(Wheel):
|
||||
def __init__(self, location: str) -> None:
|
||||
self.location = location
|
||||
|
||||
def as_zipfile(self) -> zipfile.ZipFile:
|
||||
return zipfile.ZipFile(self.location, allowZip64=True)
|
||||
|
||||
|
||||
class MemoryWheel(Wheel):
|
||||
def __init__(self, location: str, stream: IO[bytes]) -> None:
|
||||
self.location = location
|
||||
self.stream = stream
|
||||
|
||||
def as_zipfile(self) -> zipfile.ZipFile:
|
||||
return zipfile.ZipFile(self.stream, allowZip64=True)
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
from ._dists import Distribution
|
||||
from ._envs import Environment
|
||||
|
||||
__all__ = ["NAME", "Distribution", "Environment"]
|
||||
|
||||
NAME = "importlib"
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
import importlib.metadata
|
||||
from typing import Any, Optional, Protocol, cast
|
||||
|
||||
|
||||
class BadMetadata(ValueError):
|
||||
def __init__(self, dist: importlib.metadata.Distribution, *, reason: str) -> None:
|
||||
self.dist = dist
|
||||
self.reason = reason
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Bad metadata in {self.dist} ({self.reason})"
|
||||
|
||||
|
||||
class BasePath(Protocol):
|
||||
"""A protocol that various path objects conform.
|
||||
|
||||
This exists because importlib.metadata uses both ``pathlib.Path`` and
|
||||
``zipfile.Path``, and we need a common base for type hints (Union does not
|
||||
work well since ``zipfile.Path`` is too new for our linter setup).
|
||||
|
||||
This does not mean to be exhaustive, but only contains things that present
|
||||
in both classes *that we need*.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def parent(self) -> "BasePath":
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]:
|
||||
"""Find the path to the distribution's metadata directory.
|
||||
|
||||
HACK: This relies on importlib.metadata's private ``_path`` attribute. Not
|
||||
all distributions exist on disk, so importlib.metadata is correct to not
|
||||
expose the attribute as public. But pip's code base is old and not as clean,
|
||||
so we do this to avoid having to rewrite too many things. Hopefully we can
|
||||
eliminate this some day.
|
||||
"""
|
||||
return getattr(d, "_path", None)
|
||||
|
||||
|
||||
def get_dist_name(dist: importlib.metadata.Distribution) -> str:
|
||||
"""Get the distribution's project name.
|
||||
|
||||
The ``name`` attribute is only available in Python 3.10 or later. We are
|
||||
targeting exactly that, but Mypy does not know this.
|
||||
"""
|
||||
name = cast(Any, dist).name
|
||||
if not isinstance(name, str):
|
||||
raise BadMetadata(dist, reason="invalid metadata entry 'name'")
|
||||
return name
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
import email.message
|
||||
import importlib.metadata
|
||||
import os
|
||||
import pathlib
|
||||
import zipfile
|
||||
from typing import (
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
cast,
|
||||
)
|
||||
|
||||
from pip._vendor.packaging.requirements import Requirement
|
||||
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
|
||||
from pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
from pip._internal.exceptions import InvalidWheel, UnsupportedWheel
|
||||
from pip._internal.metadata.base import (
|
||||
BaseDistribution,
|
||||
BaseEntryPoint,
|
||||
DistributionVersion,
|
||||
InfoPath,
|
||||
Wheel,
|
||||
)
|
||||
from pip._internal.utils.misc import normalize_path
|
||||
from pip._internal.utils.temp_dir import TempDirectory
|
||||
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
|
||||
|
||||
from ._compat import BasePath, get_dist_name
|
||||
|
||||
|
||||
class WheelDistribution(importlib.metadata.Distribution):
|
||||
"""An ``importlib.metadata.Distribution`` read from a wheel.
|
||||
|
||||
Although ``importlib.metadata.PathDistribution`` accepts ``zipfile.Path``,
|
||||
its implementation is too "lazy" for pip's needs (we can't keep the ZipFile
|
||||
handle open for the entire lifetime of the distribution object).
|
||||
|
||||
This implementation eagerly reads the entire metadata directory into the
|
||||
memory instead, and operates from that.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
files: Mapping[pathlib.PurePosixPath, bytes],
|
||||
info_location: pathlib.PurePosixPath,
|
||||
) -> None:
|
||||
self._files = files
|
||||
self.info_location = info_location
|
||||
|
||||
@classmethod
|
||||
def from_zipfile(
|
||||
cls,
|
||||
zf: zipfile.ZipFile,
|
||||
name: str,
|
||||
location: str,
|
||||
) -> "WheelDistribution":
|
||||
info_dir, _ = parse_wheel(zf, name)
|
||||
paths = (
|
||||
(name, pathlib.PurePosixPath(name.split("/", 1)[-1]))
|
||||
for name in zf.namelist()
|
||||
if name.startswith(f"{info_dir}/")
|
||||
)
|
||||
files = {
|
||||
relpath: read_wheel_metadata_file(zf, fullpath)
|
||||
for fullpath, relpath in paths
|
||||
}
|
||||
info_location = pathlib.PurePosixPath(location, info_dir)
|
||||
return cls(files, info_location)
|
||||
|
||||
def iterdir(self, path: InfoPath) -> Iterator[pathlib.PurePosixPath]:
|
||||
# Only allow iterating through the metadata directory.
|
||||
if pathlib.PurePosixPath(str(path)) in self._files:
|
||||
return iter(self._files)
|
||||
raise FileNotFoundError(path)
|
||||
|
||||
def read_text(self, filename: str) -> Optional[str]:
|
||||
try:
|
||||
data = self._files[pathlib.PurePosixPath(filename)]
|
||||
except KeyError:
|
||||
return None
|
||||
try:
|
||||
text = data.decode("utf-8")
|
||||
except UnicodeDecodeError as e:
|
||||
wheel = self.info_location.parent
|
||||
error = f"Error decoding metadata for {wheel}: {e} in {filename} file"
|
||||
raise UnsupportedWheel(error)
|
||||
return text
|
||||
|
||||
|
||||
class Distribution(BaseDistribution):
|
||||
def __init__(
|
||||
self,
|
||||
dist: importlib.metadata.Distribution,
|
||||
info_location: Optional[BasePath],
|
||||
installed_location: Optional[BasePath],
|
||||
) -> None:
|
||||
self._dist = dist
|
||||
self._info_location = info_location
|
||||
self._installed_location = installed_location
|
||||
|
||||
@classmethod
|
||||
def from_directory(cls, directory: str) -> BaseDistribution:
|
||||
info_location = pathlib.Path(directory)
|
||||
dist = importlib.metadata.Distribution.at(info_location)
|
||||
return cls(dist, info_location, info_location.parent)
|
||||
|
||||
@classmethod
|
||||
def from_metadata_file_contents(
|
||||
cls,
|
||||
metadata_contents: bytes,
|
||||
filename: str,
|
||||
project_name: str,
|
||||
) -> BaseDistribution:
|
||||
# Generate temp dir to contain the metadata file, and write the file contents.
|
||||
temp_dir = pathlib.Path(
|
||||
TempDirectory(kind="metadata", globally_managed=True).path
|
||||
)
|
||||
metadata_path = temp_dir / "METADATA"
|
||||
metadata_path.write_bytes(metadata_contents)
|
||||
# Construct dist pointing to the newly created directory.
|
||||
dist = importlib.metadata.Distribution.at(metadata_path.parent)
|
||||
return cls(dist, metadata_path.parent, None)
|
||||
|
||||
@classmethod
|
||||
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
|
||||
try:
|
||||
with wheel.as_zipfile() as zf:
|
||||
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
|
||||
except zipfile.BadZipFile as e:
|
||||
raise InvalidWheel(wheel.location, name) from e
|
||||
except UnsupportedWheel as e:
|
||||
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
|
||||
return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
|
||||
|
||||
@property
|
||||
def location(self) -> Optional[str]:
|
||||
if self._info_location is None:
|
||||
return None
|
||||
return str(self._info_location.parent)
|
||||
|
||||
@property
|
||||
def info_location(self) -> Optional[str]:
|
||||
if self._info_location is None:
|
||||
return None
|
||||
return str(self._info_location)
|
||||
|
||||
@property
|
||||
def installed_location(self) -> Optional[str]:
|
||||
if self._installed_location is None:
|
||||
return None
|
||||
return normalize_path(str(self._installed_location))
|
||||
|
||||
def _get_dist_name_from_location(self) -> Optional[str]:
|
||||
"""Try to get the name from the metadata directory name.
|
||||
|
||||
This is much faster than reading metadata.
|
||||
"""
|
||||
if self._info_location is None:
|
||||
return None
|
||||
stem, suffix = os.path.splitext(self._info_location.name)
|
||||
if suffix not in (".dist-info", ".egg-info"):
|
||||
return None
|
||||
return stem.split("-", 1)[0]
|
||||
|
||||
@property
|
||||
def canonical_name(self) -> NormalizedName:
|
||||
name = self._get_dist_name_from_location() or get_dist_name(self._dist)
|
||||
return canonicalize_name(name)
|
||||
|
||||
@property
|
||||
def version(self) -> DistributionVersion:
|
||||
return parse_version(self._dist.version)
|
||||
|
||||
def is_file(self, path: InfoPath) -> bool:
|
||||
return self._dist.read_text(str(path)) is not None
|
||||
|
||||
def iter_distutils_script_names(self) -> Iterator[str]:
|
||||
# A distutils installation is always "flat" (not in e.g. egg form), so
|
||||
# if this distribution's info location is NOT a pathlib.Path (but e.g.
|
||||
# zipfile.Path), it can never contain any distutils scripts.
|
||||
if not isinstance(self._info_location, pathlib.Path):
|
||||
return
|
||||
for child in self._info_location.joinpath("scripts").iterdir():
|
||||
yield child.name
|
||||
|
||||
def read_text(self, path: InfoPath) -> str:
|
||||
content = self._dist.read_text(str(path))
|
||||
if content is None:
|
||||
raise FileNotFoundError(path)
|
||||
return content
|
||||
|
||||
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
|
||||
# importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint.
|
||||
return self._dist.entry_points
|
||||
|
||||
def _metadata_impl(self) -> email.message.Message:
|
||||
# From Python 3.10+, importlib.metadata declares PackageMetadata as the
|
||||
# return type. This protocol is unfortunately a disaster now and misses
|
||||
# a ton of fields that we need, including get() and get_payload(). We
|
||||
# rely on the implementation that the object is actually a Message now,
|
||||
# until upstream can improve the protocol. (python/cpython#94952)
|
||||
return cast(email.message.Message, self._dist.metadata)
|
||||
|
||||
def iter_provided_extras(self) -> Iterable[str]:
|
||||
return self.metadata.get_all("Provides-Extra", [])
|
||||
|
||||
def is_extra_provided(self, extra: str) -> bool:
|
||||
return any(
|
||||
canonicalize_name(provided_extra) == canonicalize_name(extra)
|
||||
for provided_extra in self.metadata.get_all("Provides-Extra", [])
|
||||
)
|
||||
|
||||
def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
|
||||
contexts: Sequence[Dict[str, str]] = [{"extra": e} for e in extras]
|
||||
for req_string in self.metadata.get_all("Requires-Dist", []):
|
||||
req = Requirement(req_string)
|
||||
if not req.marker:
|
||||
yield req
|
||||
elif not extras and req.marker.evaluate({"extra": ""}):
|
||||
yield req
|
||||
elif any(req.marker.evaluate(context) for context in contexts):
|
||||
yield req
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue