Compare commits

..

No commits in common. "main" and "changeset-release/main" have entirely different histories.

609 changed files with 213336 additions and 3993 deletions

View file

@ -0,0 +1,247 @@
<#
.Synopsis
Activate a Python virtual environment for the current PowerShell session.
.Description
Pushes the python executable for a virtual environment to the front of the
$Env:PATH environment variable and sets the prompt to signify that you are
in a Python virtual environment. Makes use of the command line switches as
well as the `pyvenv.cfg` file values present in the virtual environment.
.Parameter VenvDir
Path to the directory that contains the virtual environment to activate. The
default value for this is the parent of the directory that the Activate.ps1
script is located within.
.Parameter Prompt
The prompt prefix to display when this virtual environment is activated. By
default, this prompt is the name of the virtual environment folder (VenvDir)
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
.Example
Activate.ps1
Activates the Python virtual environment that contains the Activate.ps1 script.
.Example
Activate.ps1 -Verbose
Activates the Python virtual environment that contains the Activate.ps1 script,
and shows extra information about the activation as it executes.
.Example
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
Activates the Python virtual environment located in the specified location.
.Example
Activate.ps1 -Prompt "MyPython"
Activates the Python virtual environment that contains the Activate.ps1 script,
and prefixes the current prompt with the specified string (surrounded in
parentheses) while the virtual environment is active.
.Notes
On Windows, it may be required to enable this Activate.ps1 script by setting the
execution policy for the user. You can do this by issuing the following PowerShell
command:
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
For more information on Execution Policies:
https://go.microsoft.com/fwlink/?LinkID=135170
#>
Param(
[Parameter(Mandatory = $false)]
[String]
$VenvDir,
[Parameter(Mandatory = $false)]
[String]
$Prompt
)
<# Function declarations --------------------------------------------------- #>
<#
.Synopsis
Remove all shell session elements added by the Activate script, including the
addition of the virtual environment's Python executable from the beginning of
the PATH variable.
.Parameter NonDestructive
If present, do not remove this function from the global namespace for the
session.
#>
function global:deactivate ([switch]$NonDestructive) {
# Revert to original values
# The prior prompt:
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
}
# The prior PYTHONHOME:
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
}
# The prior PATH:
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
}
# Just remove the VIRTUAL_ENV altogether:
if (Test-Path -Path Env:VIRTUAL_ENV) {
Remove-Item -Path env:VIRTUAL_ENV
}
# Just remove VIRTUAL_ENV_PROMPT altogether.
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
}
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
}
# Leave deactivate function in the global namespace if requested:
if (-not $NonDestructive) {
Remove-Item -Path function:deactivate
}
}
<#
.Description
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
given folder, and returns them in a map.
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
two strings separated by `=` (with any amount of whitespace surrounding the =)
then it is considered a `key = value` line. The left hand string is the key,
the right hand is the value.
If the value starts with a `'` or a `"` then the first and last character is
stripped from the value before being captured.
.Parameter ConfigDir
Path to the directory that contains the `pyvenv.cfg` file.
#>
function Get-PyVenvConfig(
[String]
$ConfigDir
) {
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
# An empty map will be returned if no config file is found.
$pyvenvConfig = @{ }
if ($pyvenvConfigPath) {
Write-Verbose "File exists, parse `key = value` lines"
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
$pyvenvConfigContent | ForEach-Object {
$keyval = $PSItem -split "\s*=\s*", 2
if ($keyval[0] -and $keyval[1]) {
$val = $keyval[1]
# Remove extraneous quotations around a string value.
if ("'""".Contains($val.Substring(0, 1))) {
$val = $val.Substring(1, $val.Length - 2)
}
$pyvenvConfig[$keyval[0]] = $val
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
}
}
}
return $pyvenvConfig
}
<# Begin Activate script --------------------------------------------------- #>
# Determine the containing directory of this script
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
$VenvExecDir = Get-Item -Path $VenvExecPath
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
# Set values required in priority: CmdLine, ConfigFile, Default
# First, get the location of the virtual environment, it might not be
# VenvExecDir if specified on the command line.
if ($VenvDir) {
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
}
else {
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
Write-Verbose "VenvDir=$VenvDir"
}
# Next, read the `pyvenv.cfg` file to determine any required value such
# as `prompt`.
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
# Next, set the prompt from the command line, or the config file, or
# just use the name of the virtual environment folder.
if ($Prompt) {
Write-Verbose "Prompt specified as argument, using '$Prompt'"
}
else {
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
$Prompt = $pyvenvCfg['prompt'];
}
else {
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
$Prompt = Split-Path -Path $venvDir -Leaf
}
}
Write-Verbose "Prompt = '$Prompt'"
Write-Verbose "VenvDir='$VenvDir'"
# Deactivate any currently active virtual environment, but leave the
# deactivate function in place.
deactivate -nondestructive
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
# that there is an activated venv.
$env:VIRTUAL_ENV = $VenvDir
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
Write-Verbose "Setting prompt to '$Prompt'"
# Set the prompt to include the env name
# Make sure _OLD_VIRTUAL_PROMPT is global
function global:_OLD_VIRTUAL_PROMPT { "" }
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
function global:prompt {
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
_OLD_VIRTUAL_PROMPT
}
$env:VIRTUAL_ENV_PROMPT = $Prompt
}
# Clear PYTHONHOME
if (Test-Path -Path Env:PYTHONHOME) {
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
Remove-Item -Path Env:PYTHONHOME
}
# Add the venv to the PATH
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"

View file

@ -0,0 +1,70 @@
# This file must be used with "source bin/activate" *from bash*
# You cannot run it directly
deactivate () {
# reset old environment variables
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
# Call hash to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
hash -r 2> /dev/null
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
PS1="${_OLD_VIRTUAL_PS1:-}"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
unset VIRTUAL_ENV_PROMPT
if [ ! "${1:-}" = "nondestructive" ] ; then
# Self destruct!
unset -f deactivate
fi
}
# unset irrelevant variables
deactivate nondestructive
# on Windows, a path can contain colons and backslashes and has to be converted:
if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
# transform D:\path\to\venv to /d/path/to/venv on MSYS
# and to /cygdrive/d/path/to/venv on Cygwin
export VIRTUAL_ENV=$(cygpath /home/runner/work/k-skill/k-skill/.cache/python-test-venv)
else
# use the path as-is
export VIRTUAL_ENV=/home/runner/work/k-skill/k-skill/.cache/python-test-venv
fi
_OLD_VIRTUAL_PATH="$PATH"
PATH="$VIRTUAL_ENV/"bin":$PATH"
export PATH
# unset PYTHONHOME if set
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
# could use `if (set -u; : $PYTHONHOME) ;` in bash
if [ -n "${PYTHONHOME:-}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
unset PYTHONHOME
fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
PS1='(python-test-venv) '"${PS1:-}"
export PS1
VIRTUAL_ENV_PROMPT='(python-test-venv) '
export VIRTUAL_ENV_PROMPT
fi
# Call hash to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
hash -r 2> /dev/null

View file

@ -0,0 +1,27 @@
# This file must be used with "source bin/activate.csh" *from csh*.
# You cannot run it directly.
# Created by Davide Di Blasi <davidedb@gmail.com>.
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
# Unset irrelevant variables.
deactivate nondestructive
setenv VIRTUAL_ENV /home/runner/work/k-skill/k-skill/.cache/python-test-venv
set _OLD_VIRTUAL_PATH="$PATH"
setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
set _OLD_VIRTUAL_PROMPT="$prompt"
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
set prompt = '(python-test-venv) '"$prompt"
setenv VIRTUAL_ENV_PROMPT '(python-test-venv) '
endif
alias pydoc python -m pydoc
rehash

View file

@ -0,0 +1,69 @@
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
# (https://fishshell.com/). You cannot run it directly.
function deactivate -d "Exit virtual environment and return to normal shell environment"
# reset old environment variables
if test -n "$_OLD_VIRTUAL_PATH"
set -gx PATH $_OLD_VIRTUAL_PATH
set -e _OLD_VIRTUAL_PATH
end
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
set -e _OLD_VIRTUAL_PYTHONHOME
end
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
set -e _OLD_FISH_PROMPT_OVERRIDE
# prevents error when using nested fish instances (Issue #93858)
if functions -q _old_fish_prompt
functions -e fish_prompt
functions -c _old_fish_prompt fish_prompt
functions -e _old_fish_prompt
end
end
set -e VIRTUAL_ENV
set -e VIRTUAL_ENV_PROMPT
if test "$argv[1]" != "nondestructive"
# Self-destruct!
functions -e deactivate
end
end
# Unset irrelevant variables.
deactivate nondestructive
set -gx VIRTUAL_ENV /home/runner/work/k-skill/k-skill/.cache/python-test-venv
set -gx _OLD_VIRTUAL_PATH $PATH
set -gx PATH "$VIRTUAL_ENV/"bin $PATH
# Unset PYTHONHOME if set.
if set -q PYTHONHOME
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
set -e PYTHONHOME
end
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
# fish uses a function instead of an env var to generate the prompt.
# Save the current fish_prompt function as the function _old_fish_prompt.
functions -c fish_prompt _old_fish_prompt
# With the original prompt function renamed, we can override with our own.
function fish_prompt
# Save the return status of the last command.
set -l old_status $status
# Output the venv prompt; color taken from the blue of the Python logo.
printf "%s%s%s" (set_color 4B8BBE) '(python-test-venv) ' (set_color normal)
# Restore the return status of the previous command.
echo "exit $old_status" | .
# Output the original/"old" prompt.
_old_fish_prompt
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
set -gx VIRTUAL_ENV_PROMPT '(python-test-venv) '
end

View file

@ -0,0 +1,8 @@
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

View file

@ -0,0 +1,8 @@
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

View file

@ -0,0 +1,8 @@
#!/home/runner/work/k-skill/k-skill/.cache/python-test-venv/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

View file

@ -0,0 +1 @@
python3

View file

@ -0,0 +1 @@
/usr/bin/python3

View file

@ -0,0 +1 @@
python3

View file

@ -0,0 +1,123 @@
Metadata-Version: 2.4
Name: beautifulsoup4
Version: 4.14.3
Summary: Screen-scraping library
Project-URL: Download, https://www.crummy.com/software/BeautifulSoup/bs4/download/
Project-URL: Homepage, https://www.crummy.com/software/BeautifulSoup/bs4/
Author-email: Leonard Richardson <leonardr@segfault.org>
License: MIT License
License-File: AUTHORS
License-File: LICENSE
Keywords: HTML,XML,parse,soup
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Text Processing :: Markup :: SGML
Classifier: Topic :: Text Processing :: Markup :: XML
Requires-Python: >=3.7.0
Requires-Dist: soupsieve>=1.6.1
Requires-Dist: typing-extensions>=4.0.0
Provides-Extra: cchardet
Requires-Dist: cchardet; extra == 'cchardet'
Provides-Extra: chardet
Requires-Dist: chardet; extra == 'chardet'
Provides-Extra: charset-normalizer
Requires-Dist: charset-normalizer; extra == 'charset-normalizer'
Provides-Extra: html5lib
Requires-Dist: html5lib; extra == 'html5lib'
Provides-Extra: lxml
Requires-Dist: lxml; extra == 'lxml'
Description-Content-Type: text/markdown
Beautiful Soup is a library that makes it easy to scrape information
from web pages. It sits atop an HTML or XML parser, providing Pythonic
idioms for iterating, searching, and modifying the parse tree.
# Quick start
```
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print(soup.prettify())
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(string="bad")
'bad'
>>> soup.i
<i>HTML</i>
#
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
#
>>> print(soup.prettify())
<?xml version="1.0" encoding="utf-8"?>
<tag1>
Some
<tag2/>
bad
<tag3>
XML
</tag3>
</tag1>
```
To go beyond the basics, [comprehensive documentation is available](https://www.crummy.com/software/BeautifulSoup/bs4/doc/).
# Links
* [Homepage](https://www.crummy.com/software/BeautifulSoup/bs4/)
* [Documentation](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
* [Discussion group](https://groups.google.com/group/beautifulsoup/)
* [Development](https://code.launchpad.net/beautifulsoup/)
* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
* [Complete changelog](https://git.launchpad.net/beautifulsoup/tree/CHANGELOG)
# Note on Python 2 sunsetting
Beautiful Soup's support for Python 2 was discontinued on December 31,
2020: one year after the sunset date for Python 2 itself. From this
point onward, new Beautiful Soup development will exclusively target
Python 3. The final release of Beautiful Soup 4 to support Python 2
was 4.9.3.
# Supporting the project
If you use Beautiful Soup as part of your professional work, please consider a
[Tidelift subscription](https://tidelift.com/subscription/pkg/pypi-beautifulsoup4?utm_source=pypi-beautifulsoup4&utm_medium=referral&utm_campaign=readme).
This will support many of the free software projects your organization
depends on, not just Beautiful Soup.
If you use Beautiful Soup for personal projects, the best way to say
thank you is to read
[Tool Safety](https://www.crummy.com/software/BeautifulSoup/zine/), a zine I
wrote about what Beautiful Soup has taught me about software
development.
# Building the documentation
The bs4/doc/ directory contains full documentation in Sphinx
format. Run `make html` in that directory to create HTML
documentation.
# Running the unit tests
Beautiful Soup supports unit test discovery using Pytest:
```
$ pytest
```

View file

@ -0,0 +1,38 @@
beautifulsoup4-4.14.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
beautifulsoup4-4.14.3.dist-info/METADATA,sha256=Ac93vA8Xp9FtgOcKXFM8ESfVdztimUfJ3WUpVlhKtsY,3812
beautifulsoup4-4.14.3.dist-info/RECORD,,
beautifulsoup4-4.14.3.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
beautifulsoup4-4.14.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
beautifulsoup4-4.14.3.dist-info/licenses/AUTHORS,sha256=uYkjiRjh_aweRnF8tAW2PpJJeickE68NmJwd9siry28,2201
beautifulsoup4-4.14.3.dist-info/licenses/LICENSE,sha256=VbTY1LHlvIbRDvrJG3TIe8t3UmsPW57a-LnNKtxzl7I,1441
bs4/__init__.py,sha256=E7wiVp7oQK0JhdAYxpehZa8drv3W_sJv5oeTFiBfR5o,44386
bs4/__pycache__/__init__.cpython-312.pyc,,
bs4/__pycache__/_deprecation.cpython-312.pyc,,
bs4/__pycache__/_typing.cpython-312.pyc,,
bs4/__pycache__/_warnings.cpython-312.pyc,,
bs4/__pycache__/css.cpython-312.pyc,,
bs4/__pycache__/dammit.cpython-312.pyc,,
bs4/__pycache__/diagnose.cpython-312.pyc,,
bs4/__pycache__/element.cpython-312.pyc,,
bs4/__pycache__/exceptions.cpython-312.pyc,,
bs4/__pycache__/filter.cpython-312.pyc,,
bs4/__pycache__/formatter.cpython-312.pyc,,
bs4/_deprecation.py,sha256=niHJCk37APg8KEuFOa57ZXaxLdBmc_2V6uuaJqu7r30,2408
bs4/_typing.py,sha256=zNcx7R1yCTK8WwtumP28hc7CJ3pMyZXj_VAeYaNXMZA,7549
bs4/_warnings.py,sha256=ZuOETgcnEbZgw2N0nnNXn6wvtrn2ut7AF0d98bvkMFc,4711
bs4/builder/__init__.py,sha256=Rl4qjOXvdyyyjayOFqbkgoUoo81IgoyKD-RwWeVK59g,31194
bs4/builder/__pycache__/__init__.cpython-312.pyc,,
bs4/builder/__pycache__/_html5lib.cpython-312.pyc,,
bs4/builder/__pycache__/_htmlparser.cpython-312.pyc,,
bs4/builder/__pycache__/_lxml.cpython-312.pyc,,
bs4/builder/_html5lib.py,sha256=hL6xUk4_I2i5CMguFoYFlrI26cY4Dut7fOEQrUctHIM,23607
bs4/builder/_htmlparser.py,sha256=CnULPQV2rm4vLojJABpQ7Xm9diddnEZx2Wcz_VTC1Mg,17445
bs4/builder/_lxml.py,sha256=ks1e8boA_nOA2oomAhxeudccR6ThbEE-EllFqHRoPLA,18969
bs4/css.py,sha256=_m_l_4SGWHnY620VJ21j_qQH1RX3p91sYVemgKxaLsM,12713
bs4/dammit.py,sha256=ZJWa9K32X6N2imFHleqUq0ekf592weU1lvULN_WYWYk,57024
bs4/diagnose.py,sha256=at98iuxyOrqec4V8iwkTIbNUqBCsq9Lr3fDAQx2129Y,7846
bs4/element.py,sha256=oXmj7LG_2NpsDK90mq73q0PMK0FjFBIGSeTTJLVwwTc,120237
bs4/exceptions.py,sha256=Q9FOadNe8QRvzDMaKSXe2Wtl8JK_oAZW7mbFZBVP_GE,951
bs4/filter.py,sha256=rw8ZNhTDLEJVCEiSifou5tZR_3zBLeuvAyouY82qU_E,29201
bs4/formatter.py,sha256=uBT0k6W8O5kJ9PCuJYjra97yoUqC-dlM9D_v-oRM0r8,10478
bs4/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

View file

@ -0,0 +1,4 @@
Wheel-Version: 1.0
Generator: hatchling 1.27.0
Root-Is-Purelib: true
Tag: py3-none-any

View file

@ -0,0 +1,49 @@
Behold, mortal, the origins of Beautiful Soup...
================================================
Leonard Richardson is the primary maintainer.
Aaron DeVore, Isaac Muse and Chris Papademetrious have made
significant contributions to the code base.
Mark Pilgrim provided the encoding detection code that forms the base
of UnicodeDammit.
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
Soup 4 working under Python 3.
Simon Willison wrote soupselect, which was used to make Beautiful Soup
support CSS selectors. Isaac Muse wrote SoupSieve, which made it
possible to _remove_ the CSS selector code from Beautiful Soup.
Sam Ruby helped with a lot of edge cases.
Jonathan Ellis was awarded the prestigious Beau Potage D'Or for his
work in solving the nestable tags conundrum.
An incomplete list of people have contributed patches to Beautiful
Soup:
Istvan Albert, Andrew Lin, Anthony Baxter, Oliver Beattie, Andrew
Boyko, Tony Chang, Francisco Canas, "Delong", Zephyr Fang, Fuzzy,
Roman Gaufman, Yoni Gilad, Richie Hindle, Toshihiro Kamiya, Peteris
Krumins, Kent Johnson, Marek Kapolka, Andreas Kostyrka, Roel Kramer,
Ben Last, Robert Leftwich, Stefaan Lippens, "liquider", Staffan
Malmgren, Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon",
Ed Oskiewicz, Martijn Peters, Greg Phillips, Giles Radford, Stefano
Revera, Arthur Rudolph, Marko Samastur, James Salter, Jouni Seppänen,
Alexander Schmolck, Tim Shirley, Geoffrey Sneddon, Ville Skyttä,
"Vikas", Jens Svalgaard, Andy Theyers, Eric Weiser, Glyn Webster, John
Wiseman, Paul Wright, Danny Yoo
An incomplete list of people who made suggestions or found bugs or
found ways to break Beautiful Soup:
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
Summers, Dennis Sutch, Chris Smith, Aaron Swartz, Stuart
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
Sousa Rocha, Yichun Wei, Per Vognsen

View file

@ -0,0 +1,31 @@
Beautiful Soup is made available under the MIT license:
Copyright (c) Leonard Richardson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Beautiful Soup incorporates code from the html5lib library, which is
also made available under the MIT license. Copyright (c) James Graham
and other contributors
Beautiful Soup has an optional dependency on the soupsieve library,
which is also made available under the MIT license. Copyright (c)
Isaac Muse

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,80 @@
"""Helper functions for deprecation.
This interface is itself unstable and may change without warning. Do
not use these functions yourself, even as a joke. The underscores are
there for a reason. No support will be given.
In particular, most of this will go away without warning once
Beautiful Soup drops support for Python 3.11, since Python 3.12
defines a `@typing.deprecated()
decorator. <https://peps.python.org/pep-0702/>`_
"""
import functools
import warnings
from typing import (
Any,
Callable,
)
def _deprecated_alias(old_name: str, new_name: str, version: str):
"""Alias one attribute name to another for backward compatibility
:meta private:
"""
@property # type:ignore
def alias(self) -> Any:
":meta private:"
warnings.warn(
f"Access to deprecated property {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
DeprecationWarning,
stacklevel=2,
)
return getattr(self, new_name)
@alias.setter
def alias(self, value: str) -> None:
":meta private:"
warnings.warn(
f"Write to deprecated property {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
DeprecationWarning,
stacklevel=2,
)
return setattr(self, new_name, value)
return alias
def _deprecated_function_alias(
old_name: str, new_name: str, version: str
) -> Callable[[Any], Any]:
def alias(self, *args: Any, **kwargs: Any) -> Any:
":meta private:"
warnings.warn(
f"Call to deprecated method {old_name}. (Replaced by {new_name}) -- Deprecated since version {version}.",
DeprecationWarning,
stacklevel=2,
)
return getattr(self, new_name)(*args, **kwargs)
return alias
def _deprecated(replaced_by: str, version: str) -> Callable:
def deprecate(func: Callable) -> Callable:
@functools.wraps(func)
def with_warning(*args: Any, **kwargs: Any) -> Any:
":meta private:"
warnings.warn(
f"Call to deprecated method {func.__name__}. (Replaced by {replaced_by}) -- Deprecated since version {version}.",
DeprecationWarning,
stacklevel=2,
)
return func(*args, **kwargs)
return with_warning
return deprecate

View file

@ -0,0 +1,205 @@
# Custom type aliases used throughout Beautiful Soup to improve readability.
# Notes on improvements to the type system in newer versions of Python
# that can be used once Beautiful Soup drops support for older
# versions:
#
# * ClassVar can be put on class variables now.
# * In 3.10, x|y is an accepted shorthand for Union[x,y].
# * In 3.10, TypeAlias gains capabilities that can be used to
# improve the tree matching types (I don't remember what, exactly).
# * In 3.9 it's possible to specialize the re.Match type,
# e.g. re.Match[str]. In 3.8 there's a typing.re namespace for this,
# but it's removed in 3.12, so to support the widest possible set of
# versions I'm not using it.
from typing_extensions import (
runtime_checkable,
Protocol,
TypeAlias,
)
from typing import (
Any,
Callable,
Dict,
IO,
Iterable,
Mapping,
Optional,
Pattern,
TYPE_CHECKING,
Union,
)
if TYPE_CHECKING:
from bs4.element import (
AttributeValueList,
NamespacedAttribute,
NavigableString,
PageElement,
ResultSet,
Tag,
)
@runtime_checkable
class _RegularExpressionProtocol(Protocol):
"""A protocol object which can accept either Python's built-in
`re.Pattern` objects, or the similar ``Regex`` objects defined by the
third-party ``regex`` package.
"""
def search(
self, string: str, pos: int = ..., endpos: int = ...
) -> Optional[Any]: ...
@property
def pattern(self) -> str: ...
# Aliases for markup in various stages of processing.
#
#: The rawest form of markup: either a string, bytestring, or an open filehandle.
_IncomingMarkup: TypeAlias = Union[str, bytes, IO[str], IO[bytes]]
#: Markup that is in memory but has (potentially) yet to be converted
#: to Unicode.
_RawMarkup: TypeAlias = Union[str, bytes]
# Aliases for character encodings
#
#: A data encoding.
_Encoding: TypeAlias = str
#: One or more data encodings.
_Encodings: TypeAlias = Iterable[_Encoding]
# Aliases for XML namespaces
#
#: The prefix for an XML namespace.
_NamespacePrefix: TypeAlias = str
#: The URL of an XML namespace
_NamespaceURL: TypeAlias = str
#: A mapping of prefixes to namespace URLs.
_NamespaceMapping: TypeAlias = Dict[_NamespacePrefix, _NamespaceURL]
#: A mapping of namespace URLs to prefixes
_InvertedNamespaceMapping: TypeAlias = Dict[_NamespaceURL, _NamespacePrefix]
# Aliases for the attribute values associated with HTML/XML tags.
#
#: The value associated with an HTML or XML attribute. This is the
#: relatively unprocessed value Beautiful Soup expects to come from a
#: `TreeBuilder`.
_RawAttributeValue: TypeAlias = str
#: A dictionary of names to `_RawAttributeValue` objects. This is how
#: Beautiful Soup expects a `TreeBuilder` to represent a tag's
#: attribute values.
_RawAttributeValues: TypeAlias = (
"Mapping[Union[str, NamespacedAttribute], _RawAttributeValue]"
)
#: An attribute value in its final form, as stored in the
# `Tag` class, after it has been processed and (in some cases)
# split into a list of strings.
_AttributeValue: TypeAlias = Union[str, "AttributeValueList"]
#: A dictionary of names to :py:data:`_AttributeValue` objects. This is what
#: a tag's attributes look like after processing.
_AttributeValues: TypeAlias = Dict[str, _AttributeValue]
#: The methods that deal with turning :py:data:`_RawAttributeValue` into
#: :py:data:`_AttributeValue` may be called several times, even after the values
#: are already processed (e.g. when cloning a tag), so they need to
#: be able to acommodate both possibilities.
_RawOrProcessedAttributeValues: TypeAlias = Union[_RawAttributeValues, _AttributeValues]
#: A number of tree manipulation methods can take either a `PageElement` or a
#: normal Python string (which will be converted to a `NavigableString`).
_InsertableElement: TypeAlias = Union["PageElement", str]
# Aliases to represent the many possibilities for matching bits of a
# parse tree.
#
# This is very complicated because we're applying a formal type system
# to some very DWIM code. The types we end up with will be the types
# of the arguments to the SoupStrainer constructor and (more
# familiarly to Beautiful Soup users) the find* methods.
#: A function that takes a PageElement and returns a yes-or-no answer.
_PageElementMatchFunction: TypeAlias = Callable[["PageElement"], bool]
#: A function that takes the raw parsed ingredients of a markup tag
#: and returns a yes-or-no answer.
# Not necessary at the moment.
# _AllowTagCreationFunction:TypeAlias = Callable[[Optional[str], str, Optional[_RawAttributeValues]], bool]
#: A function that takes the raw parsed ingredients of a markup string node
#: and returns a yes-or-no answer.
# Not necessary at the moment.
# _AllowStringCreationFunction:TypeAlias = Callable[[Optional[str]], bool]
#: A function that takes a `Tag` and returns a yes-or-no answer.
#: A `TagNameMatchRule` expects this kind of function, if you're
#: going to pass it a function.
_TagMatchFunction: TypeAlias = Callable[["Tag"], bool]
#: A function that takes a string (or None) and returns a yes-or-no
#: answer. An `AttributeValueMatchRule` expects this kind of function, if
#: you're going to pass it a function.
_NullableStringMatchFunction: TypeAlias = Callable[[Optional[str]], bool]
#: A function that takes a string and returns a yes-or-no answer. A
# `StringMatchRule` expects this kind of function, if you're going to
# pass it a function.
_StringMatchFunction: TypeAlias = Callable[[str], bool]
#: Either a tag name, an attribute value or a string can be matched
#: against a string, bytestring, regular expression, or a boolean.
_BaseStrainable: TypeAlias = Union[str, bytes, Pattern[str], bool]
#: A tag can be matched either with the `_BaseStrainable` options, or
#: using a function that takes the `Tag` as its sole argument.
_BaseStrainableElement: TypeAlias = Union[_BaseStrainable, _TagMatchFunction]
#: A tag's attribute value can be matched either with the
#: `_BaseStrainable` options, or using a function that takes that
#: value as its sole argument.
_BaseStrainableAttribute: TypeAlias = Union[_BaseStrainable, _NullableStringMatchFunction]
#: A tag can be matched using either a single criterion or a list of
#: criteria.
_StrainableElement: TypeAlias = Union[
_BaseStrainableElement, Iterable[_BaseStrainableElement]
]
#: An attribute value can be matched using either a single criterion
#: or a list of criteria.
_StrainableAttribute: TypeAlias = Union[
_BaseStrainableAttribute, Iterable[_BaseStrainableAttribute]
]
#: An string can be matched using the same techniques as
#: an attribute value.
_StrainableString: TypeAlias = _StrainableAttribute
#: A dictionary may be used to match against multiple attribute vlaues at once.
_StrainableAttributes: TypeAlias = Dict[str, _StrainableAttribute]
#: Many Beautiful soup methods return a PageElement or an ResultSet of
#: PageElements. A PageElement is either a Tag or a NavigableString.
#: These convenience aliases make it easier for IDE users to see which methods
#: are available on the objects they're dealing with.
_OneElement: TypeAlias = Union["PageElement", "Tag", "NavigableString"]
_AtMostOneElement: TypeAlias = Optional[_OneElement]
_AtMostOneTag: TypeAlias = Optional["Tag"]
_AtMostOneNavigableString: TypeAlias = Optional["NavigableString"]
_QueryResults: TypeAlias = "ResultSet[_OneElement]"
_SomeTags: TypeAlias = "ResultSet[Tag]"
_SomeNavigableStrings: TypeAlias = "ResultSet[NavigableString]"

View file

@ -0,0 +1,98 @@
"""Define some custom warnings."""
class GuessedAtParserWarning(UserWarning):
"""The warning issued when BeautifulSoup has to guess what parser to
use -- probably because no parser was specified in the constructor.
"""
MESSAGE: str = """No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system ("%(parser)s"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
The code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features="%(parser)s"' to the BeautifulSoup constructor.
"""
class UnusualUsageWarning(UserWarning):
"""A superclass for warnings issued when Beautiful Soup sees
something that is typically the result of a mistake in the calling
code, but might be intentional on the part of the user. If it is
in fact intentional, you can filter the individual warning class
to get rid of the warning. If you don't like Beautiful Soup
second-guessing what you are doing, you can filter the
UnusualUsageWarningclass itself and get rid of these entirely.
"""
class MarkupResemblesLocatorWarning(UnusualUsageWarning):
"""The warning issued when BeautifulSoup is given 'markup' that
actually looks like a resource locator -- a URL or a path to a file
on disk.
"""
#: :meta private:
GENERIC_MESSAGE: str = """
However, if you want to parse some data that happens to look like a %(what)s, then nothing has gone wrong: you are using Beautiful Soup correctly, and this warning is spurious and can be filtered. To make this warning go away, run this code before calling the BeautifulSoup constructor:
from bs4 import MarkupResemblesLocatorWarning
import warnings
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
"""
URL_MESSAGE: str = (
"""The input passed in on this line looks more like a URL than HTML or XML.
If you meant to use Beautiful Soup to parse the web page found at a certain URL, then something has gone wrong. You should use an Python package like 'requests' to fetch the content behind the URL. Once you have the content as a string, you can feed that string into Beautiful Soup."""
+ GENERIC_MESSAGE
)
FILENAME_MESSAGE: str = (
"""The input passed in on this line looks more like a filename than HTML or XML.
If you meant to use Beautiful Soup to parse the contents of a file on disk, then something has gone wrong. You should open the file first, using code like this:
filehandle = open(your filename)
You can then feed the open filehandle into Beautiful Soup instead of using the filename."""
+ GENERIC_MESSAGE
)
class AttributeResemblesVariableWarning(UnusualUsageWarning, SyntaxWarning):
"""The warning issued when Beautiful Soup suspects a provided
attribute name may actually be the misspelled name of a Beautiful
Soup variable. Generally speaking, this is only used in cases like
"_class" where it's very unlikely the user would be referencing an
XML attribute with that name.
"""
MESSAGE: str = """%(original)r is an unusual attribute name and is a common misspelling for %(autocorrect)r.
If you meant %(autocorrect)r, change your code to use it, and this warning will go away.
If you really did mean to check the %(original)r attribute, this warning is spurious and can be filtered. To make it go away, run this code before creating your BeautifulSoup object:
from bs4 import AttributeResemblesVariableWarning
import warnings
warnings.filterwarnings("ignore", category=AttributeResemblesVariableWarning)
"""
class XMLParsedAsHTMLWarning(UnusualUsageWarning):
"""The warning issued when an HTML parser is used to parse
XML that is not (as far as we can tell) XHTML.
"""
MESSAGE: str = """It looks like you're using an HTML parser to parse an XML document.
Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.
If you want or need to use an HTML parser on this document, you can make this warning go away by filtering it. To do that, run this code before calling the BeautifulSoup constructor:
from bs4 import XMLParsedAsHTMLWarning
import warnings
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
"""

View file

@ -0,0 +1,848 @@
from __future__ import annotations
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
from collections import defaultdict
import re
from types import ModuleType
from typing import (
Any,
cast,
Dict,
Iterable,
List,
Optional,
Pattern,
Set,
Tuple,
Type,
TYPE_CHECKING,
)
import warnings
import sys
from bs4.element import (
AttributeDict,
AttributeValueList,
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
RubyParenthesisString,
RubyTextString,
Stylesheet,
Script,
TemplateString,
nonwhitespace_re,
)
# Exceptions were moved to their own module in 4.13. Import here for
# backwards compatibility.
from bs4.exceptions import ParserRejectedMarkup
from bs4._typing import (
_AttributeValues,
_RawAttributeValue,
)
from bs4._warnings import XMLParsedAsHTMLWarning
if TYPE_CHECKING:
from bs4 import BeautifulSoup
from bs4.element import (
NavigableString,
Tag,
)
from bs4._typing import (
_AttributeValue,
_Encoding,
_Encodings,
_RawOrProcessedAttributeValues,
_RawMarkup,
)
__all__ = [
"HTMLTreeBuilder",
"SAXTreeBuilder",
"TreeBuilder",
"TreeBuilderRegistry",
]
# Some useful features for a TreeBuilder to have.
FAST = "fast"
PERMISSIVE = "permissive"
STRICT = "strict"
XML = "xml"
HTML = "html"
HTML_5 = "html5"
__all__ = [
"TreeBuilderRegistry",
"TreeBuilder",
"HTMLTreeBuilder",
"DetectsXMLParsedAsHTML",
"ParserRejectedMarkup", # backwards compatibility only as of 4.13.0
]
class TreeBuilderRegistry(object):
"""A way of looking up TreeBuilder subclasses by their name or by desired
features.
"""
builders_for_feature: Dict[str, List[Type[TreeBuilder]]]
builders: List[Type[TreeBuilder]]
def __init__(self) -> None:
self.builders_for_feature = defaultdict(list)
self.builders = []
def register(self, treebuilder_class: type[TreeBuilder]) -> None:
"""Register a treebuilder based on its advertised features.
:param treebuilder_class: A subclass of `TreeBuilder`. its
`TreeBuilder.features` attribute should list its features.
"""
for feature in treebuilder_class.features:
self.builders_for_feature[feature].insert(0, treebuilder_class)
self.builders.insert(0, treebuilder_class)
def lookup(self, *features: str) -> Optional[Type[TreeBuilder]]:
"""Look up a TreeBuilder subclass with the desired features.
:param features: A list of features to look for. If none are
provided, the most recently registered TreeBuilder subclass
will be used.
:return: A TreeBuilder subclass, or None if there's no
registered subclass with all the requested features.
"""
if len(self.builders) == 0:
# There are no builders at all.
return None
if len(features) == 0:
# They didn't ask for any features. Give them the most
# recently registered builder.
return self.builders[0]
# Go down the list of features in order, and eliminate any builders
# that don't match every feature.
feature_list = list(features)
feature_list.reverse()
candidates = None
candidate_set = None
while len(feature_list) > 0:
feature = feature_list.pop()
we_have_the_feature = self.builders_for_feature.get(feature, [])
if len(we_have_the_feature) > 0:
if candidates is None:
candidates = we_have_the_feature
candidate_set = set(candidates)
elif candidate_set is not None:
# Eliminate any candidates that don't have this feature.
candidate_set = candidate_set.intersection(set(we_have_the_feature))
# The only valid candidates are the ones in candidate_set.
# Go through the original list of candidates and pick the first one
# that's in candidate_set.
if candidate_set is None or candidates is None:
return None
for candidate in candidates:
if candidate in candidate_set:
return candidate
return None
#: The `BeautifulSoup` constructor will take a list of features
#: and use it to look up `TreeBuilder` classes in this registry.
builder_registry: TreeBuilderRegistry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a textual document into a Beautiful Soup object tree.
This is an abstract superclass which smooths out the behavior of
different parser libraries into a single, unified interface.
:param multi_valued_attributes: If this is set to None, the
TreeBuilder will not turn any values for attributes like
'class' into lists. Setting this to a dictionary will
customize this behavior; look at :py:attr:`bs4.builder.HTMLTreeBuilder.DEFAULT_CDATA_LIST_ATTRIBUTES`
for an example.
Internally, these are called "CDATA list attributes", but that
probably doesn't make sense to an end-user, so the argument name
is ``multi_valued_attributes``.
:param preserve_whitespace_tags: A set of tags to treat
the way <pre> tags are treated in HTML. Tags in this set
are immune from pretty-printing; their contents will always be
output as-is.
:param string_containers: A dictionary mapping tag names to
the classes that should be instantiated to contain the textual
contents of those tags. The default is to use NavigableString
for every tag, no matter what the name. You can override the
default by changing :py:attr:`DEFAULT_STRING_CONTAINERS`.
:param store_line_numbers: If the parser keeps track of the line
numbers and positions of the original markup, that information
will, by default, be stored in each corresponding
:py:class:`bs4.element.Tag` object. You can turn this off by
passing store_line_numbers=False; then Tag.sourcepos and
Tag.sourceline will always be None. If the parser you're using
doesn't keep track of this information, then store_line_numbers
is irrelevant.
:param attribute_dict_class: The value of a multi-valued attribute
(such as HTML's 'class') willl be stored in an instance of this
class. The default is Beautiful Soup's built-in
`AttributeValueList`, which is a normal Python list, and you
will probably never need to change it.
"""
USE_DEFAULT: Any = object() #: :meta private:
def __init__(
self,
multi_valued_attributes: Dict[str, Set[str]] = USE_DEFAULT,
preserve_whitespace_tags: Set[str] = USE_DEFAULT,
store_line_numbers: bool = USE_DEFAULT,
string_containers: Dict[str, Type[NavigableString]] = USE_DEFAULT,
empty_element_tags: Set[str] = USE_DEFAULT,
attribute_dict_class: Type[AttributeDict] = AttributeDict,
attribute_value_list_class: Type[AttributeValueList] = AttributeValueList,
):
self.soup = None
if multi_valued_attributes is self.USE_DEFAULT:
multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
self.cdata_list_attributes = multi_valued_attributes
if preserve_whitespace_tags is self.USE_DEFAULT:
preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
self.preserve_whitespace_tags = preserve_whitespace_tags
if empty_element_tags is self.USE_DEFAULT:
self.empty_element_tags = self.DEFAULT_EMPTY_ELEMENT_TAGS
else:
self.empty_element_tags = empty_element_tags
# TODO: store_line_numbers is probably irrelevant now that
# the behavior of sourceline and sourcepos has been made consistent
# everywhere.
if store_line_numbers == self.USE_DEFAULT:
store_line_numbers = self.TRACKS_LINE_NUMBERS
self.store_line_numbers = store_line_numbers
if string_containers == self.USE_DEFAULT:
string_containers = self.DEFAULT_STRING_CONTAINERS
self.string_containers = string_containers
self.attribute_dict_class = attribute_dict_class
self.attribute_value_list_class = attribute_value_list_class
NAME: str = "[Unknown tree builder]"
ALTERNATE_NAMES: Iterable[str] = []
features: Iterable[str] = []
is_xml: bool = False
picklable: bool = False
soup: Optional[BeautifulSoup] #: :meta private:
#: A tag will be considered an empty-element
#: tag when and only when it has no contents.
empty_element_tags: Optional[Set[str]] = None #: :meta private:
cdata_list_attributes: Dict[str, Set[str]] #: :meta private:
preserve_whitespace_tags: Set[str] #: :meta private:
string_containers: Dict[str, Type[NavigableString]] #: :meta private:
tracks_line_numbers: bool #: :meta private:
#: A value for these tag/attribute combinations is a space- or
#: comma-separated list of CDATA, rather than a single CDATA.
DEFAULT_CDATA_LIST_ATTRIBUTES: Dict[str, Set[str]] = defaultdict(set)
#: Whitespace should be preserved inside these tags.
DEFAULT_PRESERVE_WHITESPACE_TAGS: Set[str] = set()
#: The textual contents of tags with these names should be
#: instantiated with some class other than `bs4.element.NavigableString`.
DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = {} # type:ignore
#: By default, tags are treated as empty-element tags if they have
#: no contents--that is, using XML rules. HTMLTreeBuilder
#: defines a different set of DEFAULT_EMPTY_ELEMENT_TAGS based on the
#: HTML 4 and HTML5 standards.
DEFAULT_EMPTY_ELEMENT_TAGS: Optional[Set[str]] = None
#: Most parsers don't keep track of line numbers.
TRACKS_LINE_NUMBERS: bool = False
def initialize_soup(self, soup: BeautifulSoup) -> None:
"""The BeautifulSoup object has been initialized and is now
being associated with the TreeBuilder.
:param soup: A BeautifulSoup object.
"""
self.soup = soup
def reset(self) -> None:
"""Do any work necessary to reset the underlying parser
for a new document.
By default, this does nothing.
"""
pass
def can_be_empty_element(self, tag_name: str) -> bool:
"""Might a tag with this name be an empty-element tag?
The final markup may or may not actually present this tag as
self-closing.
For instance: an HTMLBuilder does not consider a <p> tag to be
an empty-element tag (it's not in
HTMLBuilder.empty_element_tags). This means an empty <p> tag
will be presented as "<p></p>", not "<p/>" or "<p>".
The default implementation has no opinion about which tags are
empty-element tags, so a tag will be presented as an
empty-element tag if and only if it has no children.
"<foo></foo>" will become "<foo/>", and "<foo>bar</foo>" will
be left alone.
:param tag_name: The name of a markup tag.
"""
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
def feed(self, markup: _RawMarkup) -> None:
"""Run incoming markup through some parsing process."""
raise NotImplementedError()
def prepare_markup(
self,
markup: _RawMarkup,
user_specified_encoding: Optional[_Encoding] = None,
document_declared_encoding: Optional[_Encoding] = None,
exclude_encodings: Optional[_Encodings] = None,
) -> Iterable[Tuple[_RawMarkup, Optional[_Encoding], Optional[_Encoding], bool]]:
"""Run any preliminary steps necessary to make incoming markup
acceptable to the parser.
:param markup: The markup that's about to be parsed.
:param user_specified_encoding: The user asked to try this encoding
to convert the markup into a Unicode string.
:param document_declared_encoding: The markup itself claims to be
in this encoding. NOTE: This argument is not used by the
calling code and can probably be removed.
:param exclude_encodings: The user asked *not* to try any of
these encodings.
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy that the parser can try
to convert the document to Unicode and parse it. Each
strategy will be tried in turn.
By default, the only strategy is to parse the markup
as-is. See `LXMLTreeBuilderForXML` and
`HTMLParserTreeBuilder` for implementations that take into
account the quirks of particular parsers.
:meta private:
"""
yield markup, None, None, False
def test_fragment_to_document(self, fragment: str) -> str:
"""Wrap an HTML fragment to make it look like a document.
Different parsers do this differently. For instance, lxml
introduces an empty <head> tag, and html5lib
doesn't. Abstracting this away lets us write simple tests
which run HTML fragments through the parser and compare the
results against other HTML fragments.
This method should not be used outside of unit tests.
:param fragment: A fragment of HTML.
:return: A full HTML document.
:meta private:
"""
return fragment
def set_up_substitutions(self, tag: Tag) -> bool:
"""Set up any substitutions that will need to be performed on
a `Tag` when it's output as a string.
By default, this does nothing. See `HTMLTreeBuilder` for a
case where this is used.
:return: Whether or not a substitution was performed.
:meta private:
"""
return False
def _replace_cdata_list_attribute_values(
self, tag_name: str, attrs: _RawOrProcessedAttributeValues
) -> _AttributeValues:
"""When an attribute value is associated with a tag that can
have multiple values for that attribute, convert the string
value to a list of strings.
Basically, replaces class="foo bar" with class=["foo", "bar"]
NOTE: This method modifies its input in place.
:param tag_name: The name of a tag.
:param attrs: A dictionary containing the tag's attributes.
Any appropriate attribute values will be modified in place.
:return: The modified dictionary that was originally passed in.
"""
# First, cast the attrs dict to _AttributeValues. This might
# not be accurate yet, but it will be by the time this method
# returns.
modified_attrs = cast(_AttributeValues, attrs)
if not modified_attrs or not self.cdata_list_attributes:
# Nothing to do.
return modified_attrs
# There is at least a possibility that we need to modify one of
# the attribute values.
universal: Set[str] = self.cdata_list_attributes.get("*", set())
tag_specific = self.cdata_list_attributes.get(tag_name.lower(), None)
for attr in list(modified_attrs.keys()):
modified_value: _AttributeValue
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
original_value: _AttributeValue = modified_attrs[attr]
if isinstance(original_value, _RawAttributeValue):
# This is a _RawAttributeValue (a string) that
# needs to be split and converted to a
# AttributeValueList so it can be an
# _AttributeValue.
modified_value = self.attribute_value_list_class(
nonwhitespace_re.findall(original_value)
)
else:
# html5lib calls setAttributes twice for the
# same tag when rearranging the parse tree. On
# the second call the attribute value here is
# already a list. This can also happen when a
# Tag object is cloned. If this happens, leave
# the value alone rather than trying to split
# it again.
modified_value = original_value
modified_attrs[attr] = modified_value
return modified_attrs
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events.
This is not currently used for anything, and it will be removed
soon. It was a good idea, but it wasn't properly integrated into the
rest of Beautiful Soup, so there have been long stretches where it
hasn't worked properly.
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
warnings.warn(
"The SAXTreeBuilder class was deprecated in 4.13.0 and will be removed soon thereafter. It is completely untested and probably doesn't work; do not use it.",
DeprecationWarning,
stacklevel=2,
)
super(SAXTreeBuilder, self).__init__(*args, **kwargs)
def feed(self, markup: _RawMarkup) -> None:
raise NotImplementedError()
def close(self) -> None:
pass
def startElement(self, name: str, attrs: Dict[str, str]) -> None:
attrs = AttributeDict((key[1], value) for key, value in list(attrs.items()))
# print("Start %s, %r" % (name, attrs))
assert self.soup is not None
self.soup.handle_starttag(name, None, None, attrs)
def endElement(self, name: str) -> None:
# print("End %s" % name)
assert self.soup is not None
self.soup.handle_endtag(name)
def startElementNS(
self, nsTuple: Tuple[str, str], nodeName: str, attrs: Dict[str, str]
) -> None:
# Throw away (ns, nodeName) for now.
self.startElement(nodeName, attrs)
def endElementNS(self, nsTuple: Tuple[str, str], nodeName: str) -> None:
# Throw away (ns, nodeName) for now.
self.endElement(nodeName)
# handler.endElementNS((ns, node.nodeName), node.nodeName)
def startPrefixMapping(self, prefix: str, nodeValue: str) -> None:
# Ignore the prefix for now.
pass
def endPrefixMapping(self, prefix: str) -> None:
# Ignore the prefix for now.
# handler.endPrefixMapping(prefix)
pass
def characters(self, content: str) -> None:
assert self.soup is not None
self.soup.handle_data(content)
def startDocument(self) -> None:
pass
def endDocument(self) -> None:
pass
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML, such as which tags are treated
specially by the HTML standard.
"""
#: Some HTML tags are defined as having no contents. Beautiful Soup
#: treats these specially.
DEFAULT_EMPTY_ELEMENT_TAGS: Optional[Set[str]] = set(
[
# These are from HTML5.
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"menuitem",
"meta",
"param",
"source",
"track",
"wbr",
# These are from earlier versions of HTML and are removed in HTML5.
"basefont",
"bgsound",
"command",
"frame",
"image",
"isindex",
"nextid",
"spacer",
]
)
#: The HTML standard defines these tags as block-level elements. Beautiful
#: Soup does not treat these elements differently from other elements,
#: but it may do so eventually, and this information is available if
#: you need to use it.
DEFAULT_BLOCK_ELEMENTS: Set[str] = set(
[
"address",
"article",
"aside",
"blockquote",
"canvas",
"dd",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"header",
"hr",
"li",
"main",
"nav",
"noscript",
"ol",
"output",
"p",
"pre",
"section",
"table",
"tfoot",
"ul",
"video",
]
)
#: These HTML tags need special treatment so they can be
#: represented by a string class other than `bs4.element.NavigableString`.
#:
#: For some of these tags, it's because the HTML standard defines
#: an unusual content model for them. I made this list by going
#: through the HTML spec
#: (https://html.spec.whatwg.org/#metadata-content) and looking for
#: "metadata content" elements that can contain strings.
#:
#: The Ruby tags (<rt> and <rp>) are here despite being normal
#: "phrasing content" tags, because the content they contain is
#: qualitatively different from other text in the document, and it
#: can be useful to be able to distinguish it.
#:
#: TODO: Arguably <noscript> could go here but it seems
#: qualitatively different from the other tags.
DEFAULT_STRING_CONTAINERS: Dict[str, Type[bs4.element.NavigableString]] = { # type:ignore
"rt": RubyTextString,
"rp": RubyParenthesisString,
"style": Stylesheet,
"script": Script,
"template": TemplateString,
}
#: The HTML standard defines these attributes as containing a
#: space-separated list of values, not a single value. That is,
#: class="foo bar" means that the 'class' attribute has two values,
#: 'foo' and 'bar', not the single value 'foo bar'. When we
#: encounter one of these attributes, we will parse its value into
#: a list of values if possible. Upon output, the list will be
#: converted back into a string.
DEFAULT_CDATA_LIST_ATTRIBUTES: Dict[str, Set[str]] = {
"*": {"class", "accesskey", "dropzone"},
"a": {"rel", "rev"},
"link": {"rel", "rev"},
"td": {"headers"},
"th": {"headers"},
"form": {"accept-charset"},
"object": {"archive"},
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
"area": {"rel"},
"icon": {"sizes"},
"iframe": {"sandbox"},
"output": {"for"},
}
#: By default, whitespace inside these HTML tags will be
#: preserved rather than being collapsed.
DEFAULT_PRESERVE_WHITESPACE_TAGS: set[str] = set(["pre", "textarea"])
def set_up_substitutions(self, tag: Tag) -> bool:
"""Replace the declared encoding in a <meta> tag with a placeholder,
to be substituted when the tag is output to a string.
An HTML document may come in to Beautiful Soup as one
encoding, but exit in a different encoding, and the <meta> tag
needs to be changed to reflect this.
:return: Whether or not a substitution was performed.
:meta private:
"""
# We are only interested in <meta> tags
if tag.name != "meta":
return False
# TODO: This cast will fail in the (very unlikely) scenario
# that the programmer who instantiates the TreeBuilder
# specifies meta['content'] or meta['charset'] as
# cdata_list_attributes.
content: Optional[str] = cast(Optional[str], tag.get("content"))
charset: Optional[str] = cast(Optional[str], tag.get("charset"))
# But we can accommodate meta['http-equiv'] being made a
# cdata_list_attribute (again, very unlikely) without much
# trouble.
http_equiv: List[str] = tag.get_attribute_list("http-equiv")
# We are interested in <meta> tags that say what encoding the
# document was originally in. This means HTML 5-style <meta>
# tags that provide the "charset" attribute. It also means
# HTML 4-style <meta> tags that provide the "content"
# attribute and have "http-equiv" set to "content-type".
#
# In both cases we will replace the value of the appropriate
# attribute with a standin object that can take on any
# encoding.
substituted = False
if charset is not None:
# HTML 5 style:
# <meta charset="utf8">
tag["charset"] = CharsetMetaAttributeValue(charset)
substituted = True
elif content is not None and any(
x.lower() == "content-type" for x in http_equiv
):
# HTML 4 style:
# <meta http-equiv="content-type" content="text/html; charset=utf8">
tag["content"] = ContentMetaAttributeValue(content)
substituted = True
return substituted
class DetectsXMLParsedAsHTML(object):
"""A mixin class for any class (a TreeBuilder, or some class used by a
TreeBuilder) that's in a position to detect whether an XML
document is being incorrectly parsed as HTML, and issue an
appropriate warning.
This requires being able to observe an incoming processing
instruction that might be an XML declaration, and also able to
observe tags as they're opened. If you can't do that for a given
`TreeBuilder`, there's a less reliable implementation based on
examining the raw markup.
"""
#: Regular expression for seeing if string markup has an <html> tag.
LOOKS_LIKE_HTML: Pattern[str] = re.compile("<[^ +]html", re.I)
#: Regular expression for seeing if byte markup has an <html> tag.
LOOKS_LIKE_HTML_B: Pattern[bytes] = re.compile(b"<[^ +]html", re.I)
#: The start of an XML document string.
XML_PREFIX: str = "<?xml"
#: The start of an XML document bytestring.
XML_PREFIX_B: bytes = b"<?xml"
# This is typed as str, not `ProcessingInstruction`, because this
# check may be run before any Beautiful Soup objects are created.
_first_processing_instruction: Optional[str] #: :meta private:
_root_tag_name: Optional[str] #: :meta private:
@classmethod
def warn_if_markup_looks_like_xml(
cls, markup: Optional[_RawMarkup], stacklevel: int = 3
) -> bool:
"""Perform a check on some markup to see if it looks like XML
that's not XHTML. If so, issue a warning.
This is much less reliable than doing the check while parsing,
but some of the tree builders can't do that.
:param stacklevel: The stacklevel of the code calling this\
function.
:return: True if the markup looks like non-XHTML XML, False
otherwise.
"""
if markup is None:
return False
markup = markup[:500]
if isinstance(markup, bytes):
markup_b: bytes = markup
looks_like_xml = markup_b.startswith(
cls.XML_PREFIX_B
) and not cls.LOOKS_LIKE_HTML_B.search(markup)
else:
markup_s: str = markup
looks_like_xml = markup_s.startswith(
cls.XML_PREFIX
) and not cls.LOOKS_LIKE_HTML.search(markup)
if looks_like_xml:
cls._warn(stacklevel=stacklevel + 2)
return True
return False
@classmethod
def _warn(cls, stacklevel: int = 5) -> None:
"""Issue a warning about XML being parsed as HTML."""
warnings.warn(
XMLParsedAsHTMLWarning.MESSAGE,
XMLParsedAsHTMLWarning,
stacklevel=stacklevel,
)
def _initialize_xml_detector(self) -> None:
"""Call this method before parsing a document."""
self._first_processing_instruction = None
self._root_tag_name = None
def _document_might_be_xml(self, processing_instruction: str) -> None:
"""Call this method when encountering an XML declaration, or a
"processing instruction" that might be an XML declaration.
This helps Beautiful Soup detect potential issues later, if
the XML document turns out to be a non-XHTML document that's
being parsed as XML.
"""
if (
self._first_processing_instruction is not None
or self._root_tag_name is not None
):
# The document has already started. Don't bother checking
# anymore.
return
self._first_processing_instruction = processing_instruction
# We won't know until we encounter the first tag whether or
# not this is actually a problem.
def _root_tag_encountered(self, name: str) -> None:
"""Call this when you encounter the document's root tag.
This is where we actually check whether an XML document is
being incorrectly parsed as HTML, and issue the warning.
"""
if self._root_tag_name is not None:
# This method was incorrectly called multiple times. Do
# nothing.
return
self._root_tag_name = name
if (
name != "html"
and self._first_processing_instruction is not None
and self._first_processing_instruction.lower().startswith("xml ")
):
# We encountered an XML declaration and then a tag other
# than 'html'. This is a reliable indicator that a
# non-XHTML document is being parsed as XML.
self._warn(stacklevel=10)
def register_treebuilders_from(module: ModuleType) -> None:
"""Copy TreeBuilders from the given module into this module."""
this_module = sys.modules[__name__]
for name in module.__all__:
obj = getattr(module, name)
if issubclass(obj, TreeBuilder):
setattr(this_module, name, obj)
this_module.__all__.append(name)
# Register the builder while we're at it.
this_module.builder_registry.register(obj)
# Builders are registered in reverse order of priority, so that custom
# builder registrations will take precedence. In general, we want lxml
# to take precedence over html5lib, because it's faster. And we only
# want to use HTMLParser as a last resort.
from . import _htmlparser # noqa: E402
register_treebuilders_from(_htmlparser)
try:
from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
pass
try:
from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass

View file

@ -0,0 +1,611 @@
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
__all__ = [
"HTML5TreeBuilder",
]
from typing import (
Any,
cast,
Dict,
Iterable,
Optional,
Sequence,
TYPE_CHECKING,
Tuple,
Union,
)
from typing_extensions import TypeAlias
from bs4._typing import (
_AttributeValue,
_AttributeValues,
_Encoding,
_Encodings,
_NamespaceURL,
_RawMarkup,
)
import warnings
from bs4.builder import (
DetectsXMLParsedAsHTML,
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from bs4.element import (
NamespacedAttribute,
PageElement,
nonwhitespace_re,
)
import html5lib
from html5lib.constants import (
namespaces,
)
from bs4.element import (
Comment,
Doctype,
NavigableString,
Tag,
)
if TYPE_CHECKING:
from bs4 import BeautifulSoup
from html5lib.treebuilders import base as treebuilder_base
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use `html5lib <https://github.com/html5lib/html5lib-python>`_ to
build a tree.
Note that `HTML5TreeBuilder` does not support some common HTML
`TreeBuilder` features. Some of these features could theoretically
be implemented, but at the very least it's quite difficult,
because html5lib moves the parse tree around as it's being built.
Specifically:
* This `TreeBuilder` doesn't use different subclasses of
`NavigableString` (e.g. `Script`) based on the name of the tag
in which the string was found.
* You can't use a `SoupStrainer` to parse only part of a document.
"""
NAME: str = "html5lib"
features: Iterable[str] = [NAME, PERMISSIVE, HTML_5, HTML]
#: html5lib can tell us which line number and position in the
#: original file is the source of an element.
TRACKS_LINE_NUMBERS: bool = True
underlying_builder: "TreeBuilderForHtml5lib" #: :meta private:
user_specified_encoding: Optional[_Encoding]
def prepare_markup(
self,
markup: _RawMarkup,
user_specified_encoding: Optional[_Encoding] = None,
document_declared_encoding: Optional[_Encoding] = None,
exclude_encodings: Optional[_Encodings] = None,
) -> Iterable[Tuple[_RawMarkup, Optional[_Encoding], Optional[_Encoding], bool]]:
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
# document_declared_encoding and exclude_encodings aren't used
# ATM because the html5lib TreeBuilder doesn't use
# UnicodeDammit.
for variable, name in (
(document_declared_encoding, "document_declared_encoding"),
(exclude_encodings, "exclude_encodings"),
):
if variable:
warnings.warn(
f"You provided a value for {name}, but the html5lib tree builder doesn't support {name}.",
stacklevel=3,
)
# html5lib only parses HTML, so if it's given XML that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup, stacklevel=3)
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.
def feed(self, markup: _RawMarkup) -> None:
"""Run some incoming markup through some parsing process,
populating the `BeautifulSoup` object in `HTML5TreeBuilder.soup`.
"""
if self.soup is not None and self.soup.parse_only is not None:
warnings.warn(
"You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
stacklevel=4,
)
# self.underlying_builder is probably None now, but it'll be set
# when html5lib calls self.create_treebuilder().
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
assert self.underlying_builder is not None
self.underlying_builder.parser = parser
extra_kwargs = dict()
if not isinstance(markup, str):
# kwargs, specifically override_encoding, will eventually
# be passed in to html5lib's
# HTMLBinaryInputStream.__init__.
extra_kwargs["override_encoding"] = self.user_specified_encoding
doc = parser.parse(markup, **extra_kwargs) # type:ignore
# Set the character encoding detected by the tokenizer.
if isinstance(markup, str):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
original_encoding = parser.tokenizer.stream.charEncoding[0] # type:ignore
# The encoding is an html5lib Encoding object. We want to
# use a string for compatibility with other tree builders.
original_encoding = original_encoding.name
doc.original_encoding = original_encoding
self.underlying_builder.parser = None
def create_treebuilder(
self, namespaceHTMLElements: bool
) -> "TreeBuilderForHtml5lib":
"""Called by html5lib to instantiate the kind of class it
calls a 'TreeBuilder'.
:param namespaceHTMLElements: Whether or not to namespace HTML elements.
:meta private:
"""
self.underlying_builder = TreeBuilderForHtml5lib(
namespaceHTMLElements, self.soup, store_line_numbers=self.store_line_numbers
)
return self.underlying_builder
def test_fragment_to_document(self, fragment: str) -> str:
"""See `TreeBuilder`."""
return "<html><head></head><body>%s</body></html>" % fragment
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
soup: "BeautifulSoup" #: :meta private:
parser: Optional[html5lib.HTMLParser] #: :meta private:
def __init__(
self,
namespaceHTMLElements: bool,
soup: Optional["BeautifulSoup"] = None,
store_line_numbers: bool = True,
**kwargs: Any,
):
if soup:
self.soup = soup
else:
warnings.warn(
"The optionality of the 'soup' argument to the TreeBuilderForHtml5lib constructor is deprecated as of Beautiful Soup 4.13.0: 'soup' is now required. If you can't pass in a BeautifulSoup object here, or you get this warning and it seems mysterious to you, please contact the Beautiful Soup developer team for possible un-deprecation.",
DeprecationWarning,
stacklevel=2,
)
from bs4 import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? Using
# html5lib doesn't cause an infinite loop and is more
# accurate. Best to get rid of this entire section, I think.
self.soup = BeautifulSoup(
"", "html.parser", store_line_numbers=store_line_numbers, **kwargs
)
# TODO: What are **kwargs exactly? Should they be passed in
# here in addition to/instead of being passed to the BeautifulSoup
# constructor?
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
# This will be set later to a real html5lib HTMLParser object,
# which we can use to track the current line number.
self.parser = None
self.store_line_numbers = store_line_numbers
def documentClass(self) -> "Element":
self.soup.reset()
return Element(self.soup, self.soup, None)
def insertDoctype(self, token: Dict[str, Any]) -> None:
name: str = cast(str, token["name"])
publicId: Optional[str] = cast(Optional[str], token["publicId"])
systemId: Optional[str] = cast(Optional[str], token["systemId"])
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
self.soup.object_was_parsed(doctype)
def elementClass(self, name: str, namespace: str) -> "Element":
sourceline: Optional[int] = None
sourcepos: Optional[int] = None
if self.parser is not None and self.store_line_numbers:
# This represents the point immediately after the end of the
# tag. We don't know when the tag started, but we do know
# where it ended -- the character just before this one.
sourceline, sourcepos = self.parser.tokenizer.stream.position() # type:ignore
assert sourcepos is not None
sourcepos = sourcepos - 1
tag = self.soup.new_tag(
name, namespace, sourceline=sourceline, sourcepos=sourcepos
)
return Element(tag, self.soup, namespace)
def commentClass(self, data: str) -> "TextNode":
return TextNode(Comment(data), self.soup)
def fragmentClass(self) -> "Element":
"""This is only used by html5lib HTMLParser.parseFragment(),
which is never used by Beautiful Soup, only by the html5lib
unit tests. Since we don't currently hook into those tests,
the implementation is left blank.
"""
raise NotImplementedError()
def getFragment(self) -> "Element":
"""This is only used by the html5lib unit tests. Since we
don't currently hook into those tests, the implementation is
left blank.
"""
raise NotImplementedError()
def appendChild(self, node: "Element") -> None:
# TODO: This code is not covered by the BS4 tests, and
# apparently not triggered by the html5lib test suite either.
# But it doesn't seem test-specific and there are calls to it
# (or a method with the same name) all over html5lib, so I'm
# leaving the implementation in place rather than replacing it
# with NotImplementedError()
self.soup.append(node.element)
def getDocument(self) -> "BeautifulSoup":
return self.soup
def testSerializer(self, node: "Element") -> None:
"""This is only used by the html5lib unit tests. Since we
don't currently hook into those tests, the implementation is
left blank.
"""
raise NotImplementedError()
class AttrList(object):
"""Represents a Tag's attributes in a way compatible with html5lib."""
element: Tag
attrs: _AttributeValues
def __init__(self, element: Tag):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self) -> Iterable[Tuple[str, _AttributeValue]]:
return list(self.attrs.items()).__iter__()
def __setitem__(self, name: str, value: _AttributeValue) -> None:
# If this attribute is a multi-valued attribute for this element,
# turn its value into a list.
list_attr = self.element.cdata_list_attributes or {}
if name in list_attr.get("*", []) or (
self.element.name in list_attr
and name in list_attr.get(self.element.name, [])
):
# A node that is being cloned may have already undergone
# this procedure. Check for this and skip it.
if not isinstance(value, list):
assert isinstance(value, str)
value = self.element.attribute_value_list_class(
nonwhitespace_re.findall(value)
)
self.element[name] = value
def items(self) -> Iterable[Tuple[str, _AttributeValue]]:
return list(self.attrs.items())
def keys(self) -> Iterable[str]:
return list(self.attrs.keys())
def __len__(self) -> int:
return len(self.attrs)
def __getitem__(self, name: str) -> _AttributeValue:
return self.attrs[name]
def __contains__(self, name: str) -> bool:
return name in list(self.attrs.keys())
class BeautifulSoupNode(treebuilder_base.Node):
# A node can correspond to _either_ a Tag _or_ a NavigableString.
tag: Optional[Tag]
string: Optional[NavigableString]
soup: "BeautifulSoup"
namespace: Optional[_NamespaceURL]
@property
def element(self) -> PageElement:
assert self.tag is not None or self.string is not None
if self.tag is not None:
return self.tag
else:
assert self.string is not None
return self.string
@property
def nodeType(self) -> int:
"""Return the html5lib constant corresponding to the type of
the underlying DOM object.
NOTE: This property is only accessed by the html5lib test
suite, not by Beautiful Soup proper.
"""
raise NotImplementedError()
# TODO-TYPING: typeshed stubs are incorrect about this;
# cloneNode returns a new Node, not None.
def cloneNode(self) -> treebuilder_base.Node: # type:ignore
raise NotImplementedError()
class Element(BeautifulSoupNode):
namespace: Optional[_NamespaceURL]
def __init__(
self, element: Tag, soup: "BeautifulSoup", namespace: Optional[_NamespaceURL]
):
self.tag = element
self.string = None
self.soup = soup
self.namespace = namespace
treebuilder_base.Node.__init__(self, element.name)
def appendChild(self, node: "BeautifulSoupNode") -> None:
string_child: Optional[NavigableString] = None
child: PageElement
if type(node.string) is NavigableString:
# We check for NavigableString *only* because we want to avoid
# joining PreformattedStrings, such as Comments, with nearby strings.
string_child = child = node.string
else:
child = node.element
node.parent = self
if (
child is not None
and child.parent is not None
and not isinstance(child, str)
):
node.element.extract()
if (
string_child is not None
and self.tag is not None and self.tag.contents
and type(self.tag.contents[-1]) is NavigableString
):
# We are appending a string onto another string.
# TODO This has O(n^2) performance, for input like
# "a</a>a</a>a</a>..."
old_element = self.tag.contents[-1]
new_element = self.soup.new_string(old_element + string_child)
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
if isinstance(node, str):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
# Tell Beautiful Soup to act as if it parsed this element
# immediately after the parent's last descendant. (Or
# immediately after the parent, if it has no children.)
if self.tag is not None and self.tag.contents:
most_recent_element = self.tag._last_descendant(False)
elif self.element.next_element is not None:
# Something from further ahead in the parse tree is
# being inserted into this earlier element. This is
# very annoying because it means an expensive search
# for the last element in the tree.
most_recent_element = self.soup._last_descendant()
else:
most_recent_element = self.element
self.soup.object_was_parsed(
child, parent=self.tag, most_recent_element=most_recent_element
)
def getAttributes(self) -> AttrList:
assert self.tag is not None
return AttrList(self.tag)
# An HTML5lib attribute name may either be a single string,
# or a tuple (namespace, name).
_Html5libAttributeName: TypeAlias = Union[str, Tuple[str, str]]
# Now we can define the type this method accepts as a dictionary
# mapping those attribute names to single string values.
_Html5libAttributes: TypeAlias = Dict[_Html5libAttributeName, str]
def setAttributes(self, attributes: Optional[_Html5libAttributes]) -> None:
assert self.tag is not None
if attributes is not None and len(attributes) > 0:
# Replace any namespaced attributes with
# NamespacedAttribute objects.
for name, value in list(attributes.items()):
if isinstance(name, tuple):
new_name = NamespacedAttribute(*name)
del attributes[name]
attributes[new_name] = value
# We can now cast attributes to the type of Dict
# used by Beautiful Soup.
normalized_attributes = cast(_AttributeValues, attributes)
# Values for tags like 'class' came in as single strings;
# replace them with lists of strings as appropriate.
self.soup.builder._replace_cdata_list_attribute_values(
self.name, normalized_attributes
)
# Then set the attributes on the Tag associated with this
# BeautifulSoupNode.
for name, value_or_values in list(normalized_attributes.items()):
self.tag[name] = value_or_values
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
#
# The Tag constructor called this method when the Tag was created,
# but we just set/changed the attributes, so call it again.
self.soup.builder.set_up_substitutions(self.tag)
attributes = property(getAttributes, setAttributes)
def insertText(
self, data: str, insertBefore: Optional["BeautifulSoupNode"] = None
) -> None:
text = TextNode(self.soup.new_string(data), self.soup)
if insertBefore:
self.insertBefore(text, insertBefore)
else:
self.appendChild(text)
def insertBefore(
self, node: "BeautifulSoupNode", refNode: "BeautifulSoupNode"
) -> None:
assert self.tag is not None
index = self.tag.index(refNode.element)
if (
type(node.element) is NavigableString
and self.tag.contents
and type(self.tag.contents[index - 1]) is NavigableString
):
# (See comments in appendChild)
old_node = self.tag.contents[index - 1]
assert type(old_node) is NavigableString
new_str = self.soup.new_string(old_node + node.element)
old_node.replace_with(new_str)
else:
self.tag.insert(index, node.element)
node.parent = self
def removeChild(self, node: "Element") -> None:
node.element.extract()
def reparentChildren(self, newParent: "Element") -> None:
"""Move all of this tag's children into another tag."""
# print("MOVE", self.element.contents)
# print("FROM", self.element)
# print("TO", new_parent.element)
element = self.tag
assert element is not None
new_parent_element = newParent.tag
assert new_parent_element is not None
# Determine what this tag's next_element will be once all the children
# are removed.
final_next_element = element.next_sibling
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
if len(new_parent_element.contents) > 0:
# The new parent already contains children. We will be
# appending this tag's children to the end.
# We can make this assertion since we know new_parent has
# children.
assert new_parents_last_descendant is not None
new_parents_last_child = new_parent_element.contents[-1]
new_parents_last_descendant_next_element = (
new_parents_last_descendant.next_element
)
else:
# The new parent contains no children.
new_parents_last_child = None
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
if new_parents_last_descendant is not None:
first_child.previous_element = new_parents_last_descendant
else:
first_child.previous_element = new_parent_element
first_child.previous_sibling = new_parents_last_child
if new_parents_last_descendant is not None:
new_parents_last_descendant.next_element = first_child
else:
new_parent_element.next_element = first_child
if new_parents_last_child is not None:
new_parents_last_child.next_sibling = first_child
# Find the very last element being moved. It is now the
# parent's last descendant. It has no .next_sibling and
# its .next_element is whatever the previous last
# descendant had.
last_childs_last_descendant = to_append[-1]._last_descendant(
is_initialized=False, accept_self=True
)
# Since we passed accept_self=True into _last_descendant,
# there's no possibility that the result is None.
assert last_childs_last_descendant is not None
last_childs_last_descendant.next_element = (
new_parents_last_descendant_next_element
)
if new_parents_last_descendant_next_element is not None:
# TODO-COVERAGE: This code has no test coverage and
# I'm not sure how to get html5lib to go through this
# path, but it's just the other side of the previous
# line.
new_parents_last_descendant_next_element.previous_element = (
last_childs_last_descendant
)
last_childs_last_descendant.next_sibling = None
for child in to_append:
child.parent = new_parent_element
new_parent_element.contents.append(child)
# Now that this element has no children, change its .next_element.
element.contents = []
element.next_element = final_next_element
# print("DONE WITH MOVE")
# print("FROM", self.element)
# print("TO", new_parent_element)
# TODO-TYPING: typeshed stubs are incorrect about this;
# hasContent returns a boolean, not None.
def hasContent(self) -> bool: # type:ignore
return self.tag is None or len(self.tag.contents) > 0
# TODO-TYPING: typeshed stubs are incorrect about this;
# cloneNode returns a new Node, not None.
def cloneNode(self) -> treebuilder_base.Node: # type:ignore
assert self.tag is not None
tag = self.soup.new_tag(self.tag.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
for key, value in self.attributes:
node.attributes[key] = value
return node
def getNameTuple(self) -> Tuple[Optional[_NamespaceURL], str]:
if self.namespace is None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class TextNode(BeautifulSoupNode):
def __init__(self, element: NavigableString, soup: "BeautifulSoup"):
treebuilder_base.Node.__init__(self, None)
self.tag = None
self.string = element
self.soup = soup

View file

@ -0,0 +1,462 @@
# encoding: utf-8
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
from __future__ import annotations
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
__all__ = [
"HTMLParserTreeBuilder",
]
from html.parser import HTMLParser
from typing import (
Any,
Callable,
cast,
Dict,
Iterable,
List,
Optional,
TYPE_CHECKING,
Tuple,
Type,
Union,
)
from bs4.element import (
AttributeDict,
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
DetectsXMLParsedAsHTML,
HTML,
HTMLTreeBuilder,
STRICT,
)
from bs4.exceptions import ParserRejectedMarkup
if TYPE_CHECKING:
from bs4 import BeautifulSoup
from bs4.element import NavigableString
from bs4._typing import (
_Encoding,
_Encodings,
_RawMarkup,
)
HTMLPARSER = "html.parser"
_DuplicateAttributeHandler = Callable[[Dict[str, str], str, str], None]
class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
#: Constant to handle duplicate attributes by ignoring later values
#: and keeping the earlier ones.
REPLACE: str = "replace"
#: Constant to handle duplicate attributes by replacing earlier values
#: with later ones.
IGNORE: str = "ignore"
"""A subclass of the Python standard library's HTMLParser class, which
listens for HTMLParser events and translates them into calls
to Beautiful Soup's tree construction API.
:param on_duplicate_attribute: A strategy for what to do if a
tag includes the same attribute more than once. Accepted
values are: REPLACE (replace earlier values with later
ones, the default), IGNORE (keep the earliest value
encountered), or a callable. A callable must take three
arguments: the dictionary of attributes already processed,
the name of the duplicate attribute, and the most recent value
encountered.
"""
def __init__(
self,
soup: BeautifulSoup,
*args: Any,
on_duplicate_attribute: Union[str, _DuplicateAttributeHandler] = REPLACE,
**kwargs: Any,
):
self.soup = soup
self.on_duplicate_attribute = on_duplicate_attribute
self.attribute_dict_class = soup.builder.attribute_dict_class
HTMLParser.__init__(self, *args, **kwargs)
# Keep a list of empty-element tags that were encountered
# without an explicit closing tag. If we encounter a closing tag
# of this type, we'll associate it with one of those entries.
#
# This isn't a stack because we don't care about the
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
self._initialize_xml_detector()
on_duplicate_attribute: Union[str, _DuplicateAttributeHandler]
already_closed_empty_element: List[str]
soup: BeautifulSoup
def error(self, message: str) -> None:
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)
def handle_startendtag(
self, tag: str, attrs: List[Tuple[str, Optional[str]]]
) -> None:
"""Handle an incoming empty-element tag.
html.parser only calls this method when the markup looks like
<tag/>.
"""
# `handle_empty_element` tells handle_starttag not to close the tag
# just because its name matches a known empty-element tag. We
# know that this is an empty-element tag, and we want to call
# handle_endtag ourselves.
self.handle_starttag(tag, attrs, handle_empty_element=False)
self.handle_endtag(tag)
def handle_starttag(
self,
tag: str,
attrs: List[Tuple[str, Optional[str]]],
handle_empty_element: bool = True,
) -> None:
"""Handle an opening tag, e.g. '<tag>'
:param handle_empty_element: True if this tag is known to be
an empty-element tag (i.e. there is not expected to be any
closing tag).
"""
# TODO: handle namespaces here?
attr_dict: AttributeDict = self.attribute_dict_class()
for key, value in attrs:
# Change None attribute values to the empty string
# for consistency with the other tree builders.
if value is None:
value = ""
if key in attr_dict:
# A single attribute shows up multiple times in this
# tag. How to handle it depends on the
# on_duplicate_attribute setting.
on_dupe = self.on_duplicate_attribute
if on_dupe == self.IGNORE:
pass
elif on_dupe in (None, self.REPLACE):
attr_dict[key] = value
else:
on_dupe = cast(_DuplicateAttributeHandler, on_dupe)
on_dupe(attr_dict, key, value)
else:
attr_dict[key] = value
# print("START", tag)
sourceline: Optional[int]
sourcepos: Optional[int]
if self.soup.builder.store_line_numbers:
sourceline, sourcepos = self.getpos()
else:
sourceline = sourcepos = None
tagObj = self.soup.handle_starttag(
tag, None, None, attr_dict, sourceline=sourceline, sourcepos=sourcepos
)
if tagObj is not None and tagObj.is_empty_element and handle_empty_element:
# Unlike other parsers, html.parser doesn't send separate end tag
# events for empty-element tags. (It's handled in
# handle_startendtag, but only if the original markup looked like
# <tag/>.)
#
# So we need to call handle_endtag() ourselves. Since we
# know the start event is identical to the end event, we
# don't want handle_endtag() to cross off any previous end
# events for tags of this name.
self.handle_endtag(tag, check_already_closed=False)
# But we might encounter an explicit closing tag for this tag
# later on. If so, we want to ignore it.
self.already_closed_empty_element.append(tag)
if self._root_tag_name is None:
self._root_tag_encountered(tag)
def handle_endtag(self, tag: str, check_already_closed: bool = True) -> None:
"""Handle a closing tag, e.g. '</tag>'
:param tag: A tag name.
:param check_already_closed: True if this tag is expected to
be the closing portion of an empty-element tag,
e.g. '<tag></tag>'.
"""
# print("END", tag)
if check_already_closed and tag in self.already_closed_empty_element:
# This is a redundant end tag for an empty-element tag.
# We've already called handle_endtag() for it, so just
# check it off the list.
# print("ALREADY CLOSED", tag)
self.already_closed_empty_element.remove(tag)
else:
self.soup.handle_endtag(tag)
def handle_data(self, data: str) -> None:
"""Handle some textual data that shows up between tags."""
self.soup.handle_data(data)
def handle_charref(self, name: str) -> None:
"""Handle a numeric character reference by converting it to the
corresponding Unicode character and treating it as textual
data.
:param name: Character number, possibly in hexadecimal.
"""
# TODO: This was originally a workaround for a bug in
# HTMLParser. (http://bugs.python.org/issue13633) The bug has
# been fixed, but removing this code still makes some
# Beautiful Soup tests fail. This needs investigation.
real_name:int
if name.startswith("x"):
real_name = int(name.lstrip("x"), 16)
elif name.startswith("X"):
real_name = int(name.lstrip("X"), 16)
else:
real_name = int(name)
data, replacement_added = UnicodeDammit.numeric_character_reference(real_name)
if replacement_added:
self.soup.contains_replacement_characters = True
self.handle_data(data)
def handle_entityref(self, name: str) -> None:
"""Handle a named entity reference by converting it to the
corresponding Unicode character(s) and treating it as textual
data.
:param name: Name of the entity reference.
"""
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
if character is not None:
data = character
else:
# If this were XML, it would be ambiguous whether "&foo"
# was an character entity reference with a missing
# semicolon or the literal string "&foo". Since this is
# HTML, we have a complete list of all character entity references,
# and this one wasn't found, so assume it's the literal string "&foo".
data = "&%s" % name
self.handle_data(data)
def handle_comment(self, data: str) -> None:
"""Handle an HTML comment.
:param data: The text of the comment.
"""
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(Comment)
def handle_decl(self, decl: str) -> None:
"""Handle a DOCTYPE declaration.
:param data: The text of the declaration.
"""
self.soup.endData()
decl = decl[len("DOCTYPE ") :]
self.soup.handle_data(decl)
self.soup.endData(Doctype)
def unknown_decl(self, data: str) -> None:
"""Handle a declaration of unknown type -- probably a CDATA block.
:param data: The text of the declaration.
"""
cls: Type[NavigableString]
if data.upper().startswith("CDATA["):
cls = CData
data = data[len("CDATA[") :]
else:
cls = Declaration
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(cls)
def handle_pi(self, data: str) -> None:
"""Handle a processing instruction.
:param data: The text of the instruction.
"""
self.soup.endData()
self.soup.handle_data(data)
self._document_might_be_xml(data)
self.soup.endData(ProcessingInstruction)
class HTMLParserTreeBuilder(HTMLTreeBuilder):
"""A Beautiful soup `bs4.builder.TreeBuilder` that uses the
:py:class:`html.parser.HTMLParser` parser, found in the Python
standard library.
"""
is_xml: bool = False
picklable: bool = True
NAME: str = HTMLPARSER
features: Iterable[str] = [NAME, HTML, STRICT]
parser_args: Tuple[Iterable[Any], Dict[str, Any]]
#: The html.parser knows which line number and position in the
#: original file is the source of an element.
TRACKS_LINE_NUMBERS: bool = True
def __init__(
self,
parser_args: Optional[Iterable[Any]] = None,
parser_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
):
"""Constructor.
:param parser_args: Positional arguments to pass into
the BeautifulSoupHTMLParser constructor, once it's
invoked.
:param parser_kwargs: Keyword arguments to pass into
the BeautifulSoupHTMLParser constructor, once it's
invoked.
:param kwargs: Keyword arguments for the superclass constructor.
"""
# Some keyword arguments will be pulled out of kwargs and placed
# into parser_kwargs.
extra_parser_kwargs = dict()
for arg in ("on_duplicate_attribute",):
if arg in kwargs:
value = kwargs.pop(arg)
extra_parser_kwargs[arg] = value
super(HTMLParserTreeBuilder, self).__init__(**kwargs)
parser_args = parser_args or []
parser_kwargs = parser_kwargs or {}
parser_kwargs.update(extra_parser_kwargs)
parser_kwargs["convert_charrefs"] = False
self.parser_args = (parser_args, parser_kwargs)
def prepare_markup(
self,
markup: _RawMarkup,
user_specified_encoding: Optional[_Encoding] = None,
document_declared_encoding: Optional[_Encoding] = None,
exclude_encodings: Optional[_Encodings] = None,
) -> Iterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]]:
"""Run any preliminary steps necessary to make incoming markup
acceptable to the parser.
:param markup: Some markup -- probably a bytestring.
:param user_specified_encoding: The user asked to try this encoding.
:param document_declared_encoding: The markup itself claims to be
in this encoding.
:param exclude_encodings: The user asked _not_ to try any of
these encodings.
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy for parsing the document.
This TreeBuilder uses Unicode, Dammit to convert the markup
into Unicode, so the ``markup`` element of the tuple will
always be a string.
"""
if isinstance(markup, str):
# Parse Unicode as-is.
yield (markup, None, None, False)
return
# Ask UnicodeDammit to sniff the most likely encoding.
known_definite_encodings: List[_Encoding] = []
if user_specified_encoding:
# This was provided by the end-user; treat it as a known
# definite encoding per the algorithm laid out in the
# HTML5 spec. (See the EncodingDetector class for
# details.)
known_definite_encodings.append(user_specified_encoding)
user_encodings: List[_Encoding] = []
if document_declared_encoding:
# This was found in the document; treat it as a slightly
# lower-priority user encoding.
user_encodings.append(document_declared_encoding)
dammit = UnicodeDammit(
markup,
known_definite_encodings=known_definite_encodings,
user_encodings=user_encodings,
is_html=True,
exclude_encodings=exclude_encodings,
)
if dammit.unicode_markup is None:
# In every case I've seen, Unicode, Dammit is able to
# convert the markup into Unicode, even if it needs to use
# REPLACEMENT CHARACTER. But there is a code path that
# could result in unicode_markup being None, and
# HTMLParser can only parse Unicode, so here we handle
# that code path.
raise ParserRejectedMarkup(
"Could not convert input to Unicode, and html.parser will not accept bytestrings."
)
else:
yield (
dammit.unicode_markup,
dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters,
)
def feed(self, markup: _RawMarkup, _parser_class:type[BeautifulSoupHTMLParser] =BeautifulSoupHTMLParser) -> None:
"""
:param markup: The markup to feed into the parser.
:param _parser_class: An HTMLParser subclass to use. This is only intended for use in unit tests.
"""
args, kwargs = self.parser_args
# HTMLParser.feed will only handle str, but
# BeautifulSoup.markup is allowed to be _RawMarkup, because
# it's set by the yield value of
# TreeBuilder.prepare_markup. Fortunately,
# HTMLParserTreeBuilder.prepare_markup always yields a str
# (UnicodeDammit.unicode_markup).
assert isinstance(markup, str)
# We know BeautifulSoup calls TreeBuilder.initialize_soup
# before calling feed(), so we can assume self.soup
# is set.
assert self.soup is not None
parser = _parser_class(self.soup, *args, **kwargs)
try:
parser.feed(markup)
parser.close()
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.already_closed_empty_element = []

View file

@ -0,0 +1,501 @@
# encoding: utf-8
from __future__ import annotations
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
__all__ = [
"LXMLTreeBuilderForXML",
"LXMLTreeBuilder",
]
from typing import (
Any,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
Type,
TYPE_CHECKING,
Union,
)
from io import BytesIO
from io import StringIO
from typing_extensions import TypeAlias
from lxml import etree # type:ignore
from bs4.element import (
AttributeDict,
XMLAttributeDict,
Comment,
Doctype,
NamespacedAttribute,
ProcessingInstruction,
XMLProcessingInstruction,
)
from bs4.builder import (
DetectsXMLParsedAsHTML,
FAST,
HTML,
HTMLTreeBuilder,
PERMISSIVE,
TreeBuilder,
XML,
)
from bs4.dammit import EncodingDetector
from bs4.exceptions import ParserRejectedMarkup
if TYPE_CHECKING:
from bs4._typing import (
_Encoding,
_Encodings,
_NamespacePrefix,
_NamespaceURL,
_NamespaceMapping,
_InvertedNamespaceMapping,
_RawMarkup,
)
from bs4 import BeautifulSoup
LXML: str = "lxml"
def _invert(d: dict[Any, Any]) -> dict[Any, Any]:
"Invert a dictionary."
return dict((v, k) for k, v in list(d.items()))
_LXMLParser: TypeAlias = Union[etree.XMLParser, etree.HTMLParser]
_ParserOrParserClass: TypeAlias = Union[
_LXMLParser, Type[etree.XMLParser], Type[etree.HTMLParser]
]
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS: Type[etree.XMLParser] = etree.XMLParser
is_xml: bool = True
#: Set this to true (probably by passing huge_tree=True into the :
#: BeautifulSoup constructor) to enable the lxml feature "disable security
#: restrictions and support very deep trees and very long text
#: content".
huge_tree: bool
processing_instruction_class: Type[ProcessingInstruction]
NAME: str = "lxml-xml"
ALTERNATE_NAMES: Iterable[str] = ["xml"]
# Well, it's permissive by XML parser standards.
features: Iterable[str] = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE: int = 512
# This namespace mapping is specified in the XML Namespace
# standard.
DEFAULT_NSMAPS: _NamespaceMapping = dict(xml="http://www.w3.org/XML/1998/namespace")
DEFAULT_NSMAPS_INVERTED: _InvertedNamespaceMapping = _invert(DEFAULT_NSMAPS)
nsmaps: List[Optional[_InvertedNamespaceMapping]]
empty_element_tags: Optional[Set[str]]
parser: Any
_default_parser: Optional[etree.XMLParser]
# NOTE: If we parsed Element objects and looked at .sourceline,
# we'd be able to see the line numbers from the original document.
# But instead we build an XMLParser or HTMLParser object to serve
# as the target of parse messages, and those messages don't include
# line numbers.
# See: https://bugs.launchpad.net/lxml/+bug/1846906
def initialize_soup(self, soup: BeautifulSoup) -> None:
"""Let the BeautifulSoup object know about the standard namespace
mapping.
:param soup: A `BeautifulSoup`.
"""
# Beyond this point, self.soup is set, so we can assume (and
# assert) it's not None whenever necessary.
super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
self._register_namespaces(self.DEFAULT_NSMAPS)
def _register_namespaces(self, mapping: Dict[str, str]) -> None:
"""Let the BeautifulSoup object know about namespaces encountered
while parsing the document.
This might be useful later on when creating CSS selectors.
This will track (almost) all namespaces, even ones that were
only in scope for part of the document. If two namespaces have
the same prefix, only the first one encountered will be
tracked. Un-prefixed namespaces are not tracked.
:param mapping: A dictionary mapping namespace prefixes to URIs.
"""
assert self.soup is not None
for key, value in list(mapping.items()):
# This is 'if key' and not 'if key is not None' because we
# don't track un-prefixed namespaces. Soupselect will
# treat an un-prefixed namespace as the default, which
# causes confusion in some cases.
if key and key not in self.soup._namespaces:
# Let the BeautifulSoup object know about a new namespace.
# If there are multiple namespaces defined with the same
# prefix, the first one in the document takes precedence.
self.soup._namespaces[key] = value
def default_parser(self, encoding: Optional[_Encoding]) -> _ParserOrParserClass:
"""Find the default parser for the given encoding.
:return: Either a parser object or a class, which
will be instantiated with default arguments.
"""
if self._default_parser is not None:
return self._default_parser
return self.DEFAULT_PARSER_CLASS(target=self, recover=True, huge_tree=self.huge_tree, encoding=encoding)
def parser_for(self, encoding: Optional[_Encoding]) -> _LXMLParser:
"""Instantiate an appropriate parser for the given encoding.
:param encoding: A string.
:return: A parser object such as an `etree.XMLParser`.
"""
# Use the default parser.
parser = self.default_parser(encoding)
if callable(parser):
# Instantiate the parser with default arguments
parser = parser(target=self, recover=True, huge_tree=self.huge_tree, encoding=encoding)
return parser
def __init__(
self,
parser: Optional[etree.XMLParser] = None,
empty_element_tags: Optional[Set[str]] = None,
huge_tree: bool = False,
**kwargs: Any,
):
# TODO: Issue a warning if parser is present but not a
# callable, since that means there's no way to create new
# parsers for different encodings.
self._default_parser = parser
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
self.active_namespace_prefixes = [dict(self.DEFAULT_NSMAPS)]
if self.is_xml:
self.processing_instruction_class = XMLProcessingInstruction
else:
self.processing_instruction_class = ProcessingInstruction
if "attribute_dict_class" not in kwargs:
kwargs["attribute_dict_class"] = XMLAttributeDict
self.huge_tree = huge_tree
super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
def _getNsTag(self, tag: str) -> Tuple[Optional[str], str]:
# Split the namespace URL out of a fully-qualified lxml tag
# name. Copied from lxml's src/lxml/sax.py.
if tag[0] == "{" and "}" in tag:
namespace, name = tag[1:].split("}", 1)
return (namespace, name)
return (None, tag)
def prepare_markup(
self,
markup: _RawMarkup,
user_specified_encoding: Optional[_Encoding] = None,
document_declared_encoding: Optional[_Encoding] = None,
exclude_encodings: Optional[_Encodings] = None,
) -> Iterable[
Tuple[Union[str, bytes], Optional[_Encoding], Optional[_Encoding], bool]
]:
"""Run any preliminary steps necessary to make incoming markup
acceptable to the parser.
lxml really wants to get a bytestring and convert it to
Unicode itself. So instead of using UnicodeDammit to convert
the bytestring to Unicode using different encodings, this
implementation uses EncodingDetector to iterate over the
encodings, and tell lxml to try to parse the document as each
one in turn.
:param markup: Some markup -- hopefully a bytestring.
:param user_specified_encoding: The user asked to try this encoding.
:param document_declared_encoding: The markup itself claims to be
in this encoding.
:param exclude_encodings: The user asked _not_ to try any of
these encodings.
:yield: A series of 4-tuples: (markup, encoding, declared encoding,
has undergone character replacement)
Each 4-tuple represents a strategy for converting the
document to Unicode and parsing it. Each strategy will be tried
in turn.
"""
if not self.is_xml:
# We're in HTML mode, so if we're given XML, that's worth
# noting.
DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(markup, stacklevel=3)
if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
# TODO: This is a workaround for
# https://bugs.launchpad.net/lxml/+bug/1948551.
# We can remove it once the upstream issue is fixed.
if len(markup) > 0 and markup[0] == "\N{BYTE ORDER MARK}":
markup = markup[1:]
yield markup, None, document_declared_encoding, False
if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8", document_declared_encoding, False)
# Since the document was Unicode in the first place, there
# is no need to try any more strategies; we know this will
# work.
return
known_definite_encodings: List[_Encoding] = []
if user_specified_encoding:
# This was provided by the end-user; treat it as a known
# definite encoding per the algorithm laid out in the
# HTML5 spec. (See the EncodingDetector class for
# details.)
known_definite_encodings.append(user_specified_encoding)
user_encodings: List[_Encoding] = []
if document_declared_encoding:
# This was found in the document; treat it as a slightly
# lower-priority user encoding.
user_encodings.append(document_declared_encoding)
detector = EncodingDetector(
markup,
known_definite_encodings=known_definite_encodings,
user_encodings=user_encodings,
is_html=not self.is_xml,
exclude_encodings=exclude_encodings,
)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup: _RawMarkup) -> None:
io: Union[BytesIO, StringIO]
if isinstance(markup, bytes):
io = BytesIO(markup)
elif isinstance(markup, str):
io = StringIO(markup)
# initialize_soup is called before feed, so we know this
# is not None.
assert self.soup is not None
# Call feed() at least once, even if the markup is empty,
# or the parser won't be initialized.
data = io.read(self.CHUNK_SIZE)
try:
self.parser = self.parser_for(self.soup.original_encoding)
self.parser.feed(data)
while len(data) != 0:
# Now call feed() on the rest of the data, chunk by chunk.
data = io.read(self.CHUNK_SIZE)
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(e)
def close(self) -> None:
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
def start(
self,
tag: str | bytes,
attrib: Dict[str | bytes, str | bytes],
nsmap: _NamespaceMapping = {},
) -> None:
# This is called by lxml code as a result of calling
# BeautifulSoup.feed(), and we know self.soup is set by the time feed()
# is called.
assert self.soup is not None
assert isinstance(tag, str)
# We need to recreate the attribute dict for three
# reasons. First, for type checking, so we can assert there
# are no bytestrings in the keys or values. Second, because we
# need a mutable dict--lxml might send us an immutable
# dictproxy. Third, so we can handle namespaced attribute
# names by converting the keys to NamespacedAttributes.
new_attrib: Dict[Union[str, NamespacedAttribute], str] = (
self.attribute_dict_class()
)
for k, v in attrib.items():
assert isinstance(k, str)
assert isinstance(v, str)
new_attrib[k] = v
nsprefix: Optional[_NamespacePrefix] = None
namespace: Optional[_NamespaceURL] = None
# Invert each namespace map as it comes in.
if len(nsmap) == 0 and len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
# First, Let the BeautifulSoup object know about it.
self._register_namespaces(nsmap)
# Then, add it to our running list of inverted namespace
# mappings.
self.nsmaps.append(_invert(nsmap))
# The currently active namespace prefixes have
# changed. Calculate the new mapping so it can be stored
# with all Tag objects created while these prefixes are in
# scope.
current_mapping = dict(self.active_namespace_prefixes[-1])
current_mapping.update(nsmap)
# We should not track un-prefixed namespaces as we can only hold one
# and it will be recognized as the default namespace by soupsieve,
# which may be confusing in some situations.
if "" in current_mapping:
del current_mapping[""]
self.active_namespace_prefixes.append(current_mapping)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/"
)
new_attrib[attribute] = namespace
# Namespaces are in play. Find any attributes that came in
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
final_attrib: AttributeDict = self.attribute_dict_class()
for attr, value in list(new_attrib.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
final_attrib[attr] = value
else:
nsprefix = self._prefix_for_namespace(namespace)
attr = NamespacedAttribute(nsprefix, attr, namespace)
final_attrib[attr] = value
namespace, tag = self._getNsTag(tag)
nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(
tag,
namespace,
nsprefix,
final_attrib,
namespaces=self.active_namespace_prefixes[-1],
)
def _prefix_for_namespace(
self, namespace: Optional[_NamespaceURL]
) -> Optional[_NamespacePrefix]:
"""Find the currently active prefix for the given namespace."""
if namespace is None:
return None
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
return inverted_nsmap[namespace]
return None
def end(self, tag: str | bytes) -> None:
assert self.soup is not None
assert isinstance(tag, str)
self.soup.endData()
namespace, tag = self._getNsTag(tag)
nsprefix = None
if namespace is not None:
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
nsprefix = inverted_nsmap[namespace]
break
self.soup.handle_endtag(tag, nsprefix)
if len(self.nsmaps) > 1:
# This tag, or one of its parents, introduced a namespace
# mapping, so pop it off the stack.
out_of_scope_nsmap = self.nsmaps.pop()
if out_of_scope_nsmap is not None:
# This tag introduced a namespace mapping which is no
# longer in scope. Recalculate the currently active
# namespace prefixes.
self.active_namespace_prefixes.pop()
def pi(self, target: str, data: str) -> None:
assert self.soup is not None
self.soup.endData()
data = target + " " + data
self.soup.handle_data(data)
self.soup.endData(self.processing_instruction_class)
def data(self, data: str | bytes) -> None:
assert self.soup is not None
assert isinstance(data, str)
self.soup.handle_data(data)
def doctype(self, name: str, pubid: str, system: str) -> None:
assert self.soup is not None
self.soup.endData()
doctype_string = Doctype._string_for_name_and_ids(name, pubid, system)
self.soup.handle_data(doctype_string)
self.soup.endData(containerClass=Doctype)
def comment(self, text: str | bytes) -> None:
"Handle comments as Comment objects."
assert self.soup is not None
assert isinstance(text, str)
self.soup.endData()
self.soup.handle_data(text)
self.soup.endData(Comment)
def test_fragment_to_document(self, fragment: str) -> str:
"""See `TreeBuilder`."""
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
NAME: str = LXML
ALTERNATE_NAMES: Iterable[str] = ["lxml-html"]
features: Iterable[str] = list(ALTERNATE_NAMES) + [NAME, HTML, FAST, PERMISSIVE]
is_xml: bool = False
def default_parser(self, encoding: Optional[_Encoding]) -> _ParserOrParserClass:
return etree.HTMLParser
def feed(self, markup: _RawMarkup) -> None:
# We know self.soup is set by the time feed() is called.
assert self.soup is not None
encoding = self.soup.original_encoding
try:
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(e)
def test_fragment_to_document(self, fragment: str) -> str:
"""See `TreeBuilder`."""
return "<html><body>%s</body></html>" % fragment

View file

@ -0,0 +1,339 @@
"""Integration code for CSS selectors using `Soup Sieve <https://facelessuser.github.io/soupsieve/>`_ (pypi: ``soupsieve``).
Acquire a `CSS` object through the `element.Tag.css` attribute of
the starting point of your CSS selector, or (if you want to run a
selector against the entire document) of the `BeautifulSoup` object
itself.
The main advantage of doing this instead of using ``soupsieve``
functions is that you don't need to keep passing the `element.Tag` to be
selected against, since the `CSS` object is permanently scoped to that
`element.Tag`.
"""
from __future__ import annotations
from types import ModuleType
from typing import (
Any,
cast,
Iterable,
Iterator,
MutableSequence,
Optional,
TYPE_CHECKING,
)
import warnings
from bs4._typing import _NamespaceMapping
if TYPE_CHECKING:
from soupsieve import SoupSieve
from bs4 import element
from bs4.element import ResultSet, Tag
soupsieve: Optional[ModuleType]
try:
import soupsieve
except ImportError:
soupsieve = None
warnings.warn(
"The soupsieve package is not installed. CSS selectors cannot be used."
)
class CSS(object):
"""A proxy object against the ``soupsieve`` library, to simplify its
CSS selector API.
You don't need to instantiate this class yourself; instead, use
`element.Tag.css`.
:param tag: All CSS selectors run by this object will use this as
their starting point.
:param api: An optional drop-in replacement for the ``soupsieve`` module,
intended for use in unit tests.
"""
def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None):
if api is None:
api = soupsieve
if api is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
self.api = api
self.tag = tag
def escape(self, ident: str) -> str:
"""Escape a CSS identifier.
This is a simple wrapper around `soupsieve.escape() <https://facelessuser.github.io/soupsieve/api/#soupsieveescape>`_. See the
documentation for that function for more information.
"""
if soupsieve is None:
raise NotImplementedError(
"Cannot escape CSS identifiers because the soupsieve package is not installed."
)
return cast(str, self.api.escape(ident))
def _ns(
self, ns: Optional[_NamespaceMapping], select: str
) -> Optional[_NamespaceMapping]:
"""Normalize a dictionary of namespaces."""
if not isinstance(select, self.api.SoupSieve) and ns is None:
# If the selector is a precompiled pattern, it already has
# a namespace context compiled in, which cannot be
# replaced.
ns = self.tag._namespaces
return ns
def _rs(self, results: MutableSequence[Tag]) -> ResultSet[Tag]:
"""Normalize a list of results to a py:class:`ResultSet`.
A py:class:`ResultSet` is more consistent with the rest of
Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has
a helpful error message if you try to treat a list of results
as a single result (a common mistake).
"""
# Import here to avoid circular import
from bs4 import ResultSet
return ResultSet(None, results)
def compile(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
flags: int = 0,
**kwargs: Any,
) -> SoupSieve:
"""Pre-compile a selector and return the compiled object.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
:param kwargs: Keyword arguments to be passed into Soup Sieve's
`soupsieve.compile() <https://facelessuser.github.io/soupsieve/api/#soupsievecompile>`_ method.
:return: A precompiled selector object.
:rtype: soupsieve.SoupSieve
"""
return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs)
def select_one(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
flags: int = 0,
**kwargs: Any,
) -> element.Tag | None:
"""Perform a CSS selection operation on the current Tag and return the
first result, if any.
This uses the Soup Sieve library. For more information, see
that library's documentation for the `soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
:param kwargs: Keyword arguments to be passed into Soup Sieve's
`soupsieve.select_one() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect_one>`_ method.
"""
return self.api.select_one(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def select(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
limit: int = 0,
flags: int = 0,
**kwargs: Any,
) -> ResultSet[element.Tag]:
"""Perform a CSS selection operation on the current `element.Tag`.
This uses the Soup Sieve library. For more information, see
that library's documentation for the `soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
:param kwargs: Keyword arguments to be passed into Soup Sieve's
`soupsieve.select() <https://facelessuser.github.io/soupsieve/api/#soupsieveselect>`_ method.
"""
if limit is None:
limit = 0
return self._rs(
self.api.select(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
)
def iselect(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
limit: int = 0,
flags: int = 0,
**kwargs: Any,
) -> Iterator[element.Tag]:
"""Perform a CSS selection operation on the current `element.Tag`.
This uses the Soup Sieve library. For more information, see
that library's documentation for the `soupsieve.iselect()
<https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_
method. It is the same as select(), but it returns a generator
instead of a list.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
:param kwargs: Keyword arguments to be passed into Soup Sieve's
`soupsieve.iselect() <https://facelessuser.github.io/soupsieve/api/#soupsieveiselect>`_ method.
"""
return self.api.iselect(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
def closest(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
flags: int = 0,
**kwargs: Any,
) -> Optional[element.Tag]:
"""Find the `element.Tag` closest to this one that matches the given selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the `soupsieve.closest()
<https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
:param kwargs: Keyword arguments to be passed into Soup Sieve's
`soupsieve.closest() <https://facelessuser.github.io/soupsieve/api/#soupsieveclosest>`_ method.
"""
return self.api.closest(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def match(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
flags: int = 0,
**kwargs: Any,
) -> bool:
"""Check whether or not this `element.Tag` matches the given CSS selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the `soupsieve.match()
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
method.
:param: a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.match()
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
`soupsieve.match()
<https://facelessuser.github.io/soupsieve/api/#soupsievematch>`_
method.
"""
return cast(
bool,
self.api.match(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
),
)
def filter(
self,
select: str,
namespaces: Optional[_NamespaceMapping] = None,
flags: int = 0,
**kwargs: Any,
) -> ResultSet[element.Tag]:
"""Filter this `element.Tag`'s direct children based on the given CSS selector.
This uses the Soup Sieve library. It works the same way as
passing a `element.Tag` into that library's `soupsieve.filter()
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
method. For more information, see the documentation for
`soupsieve.filter()
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
`soupsieve.filter()
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
`soupsieve.filter()
<https://facelessuser.github.io/soupsieve/api/#soupsievefilter>`_
method.
"""
return self._rs(
self.api.filter(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,268 @@
"""Diagnostic functions, mainly for use when doing tech support."""
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
import cProfile
from io import BytesIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
from typing import (
Any,
IO,
List,
Optional,
Tuple,
TYPE_CHECKING,
)
if TYPE_CHECKING:
from bs4._typing import _IncomingMarkup
import pstats
import random
import tempfile
import time
import traceback
import sys
def diagnose(data: "_IncomingMarkup") -> None:
"""Diagnostic suite for isolating common problems.
:param data: Some markup that needs to be explained.
:return: None; diagnostics are printed to standard output.
"""
print(("Diagnostic running on Beautiful Soup %s" % __version__))
print(("Python version %s" % sys.version))
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
for builder in builder_registry.builders:
if name in builder.features:
break
else:
basic_parsers.remove(name)
print(
("I noticed that %s is not installed. Installing it may help." % name)
)
if "lxml" in basic_parsers:
basic_parsers.append("lxml-xml")
try:
from lxml import etree # type:ignore
print(("Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))))
except ImportError:
print("lxml is not installed or couldn't be imported.")
if "html5lib" in basic_parsers:
try:
import html5lib
print(("Found html5lib version %s" % html5lib.__version__))
except ImportError:
print("html5lib is not installed or couldn't be imported.")
if hasattr(data, "read"):
data = data.read()
for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser))
success = False
try:
soup = BeautifulSoup(data, features=parser)
success = True
except Exception:
print(("%s could not parse the markup." % parser))
traceback.print_exc()
if success:
print(("Here's what %s did with the markup:" % parser))
print((soup.prettify()))
print(("-" * 80))
def lxml_trace(data: "_IncomingMarkup", html: bool = True, **kwargs: Any) -> None:
"""Print out the lxml events that occur during parsing.
This lets you see how lxml parses a document when no Beautiful
Soup code is running. You can use this to determine whether
an lxml-specific problem is in Beautiful Soup's lxml tree builders
or in lxml itself.
:param data: Some markup.
:param html: If True, markup will be parsed with lxml's HTML parser.
if False, lxml's XML parser will be used.
"""
from lxml import etree
recover = kwargs.pop("recover", True)
if isinstance(data, str):
data = data.encode("utf8")
if not isinstance(data, IO):
reader = BytesIO(data)
for event, element in etree.iterparse(reader, html=html, recover=recover, **kwargs):
print(("%s, %4s, %s" % (event, element.tag, element.text)))
class AnnouncingParser(HTMLParser):
"""Subclass of HTMLParser that announces parse events, without doing
anything else.
You can use this to get a picture of how html.parser sees a given
document. The easiest way to do this is to call `htmlparser_trace`.
"""
def _p(self, s: str) -> None:
print(s)
def handle_starttag(
self,
name: str,
attrs: List[Tuple[str, Optional[str]]],
handle_empty_element: bool = True,
) -> None:
self._p(f"{name} {attrs} START")
def handle_endtag(self, name: str, check_already_closed: bool = True) -> None:
self._p("%s END" % name)
def handle_data(self, data: str) -> None:
self._p("%s DATA" % data)
def handle_charref(self, name: str) -> None:
self._p("%s CHARREF" % name)
def handle_entityref(self, name: str) -> None:
self._p("%s ENTITYREF" % name)
def handle_comment(self, data: str) -> None:
self._p("%s COMMENT" % data)
def handle_decl(self, data: str) -> None:
self._p("%s DECL" % data)
def unknown_decl(self, data: str) -> None:
self._p("%s UNKNOWN-DECL" % data)
def handle_pi(self, data: str) -> None:
self._p("%s PI" % data)
def htmlparser_trace(data: str) -> None:
"""Print out the HTMLParser events that occur during parsing.
This lets you see how HTMLParser parses a document when no
Beautiful Soup code is running.
:param data: Some markup.
"""
parser = AnnouncingParser()
parser.feed(data)
_vowels: str = "aeiou"
_consonants: str = "bcdfghjklmnpqrstvwxyz"
def rword(length: int = 5) -> str:
"""Generate a random word-like string.
:meta private:
"""
s = ""
for i in range(length):
if i % 2 == 0:
t = _consonants
else:
t = _vowels
s += random.choice(t)
return s
def rsentence(length: int = 4) -> str:
"""Generate a random sentence-like string.
:meta private:
"""
return " ".join(rword(random.randint(4, 9)) for i in range(length))
def rdoc(num_elements: int = 1000) -> str:
"""Randomly generate an invalid HTML document.
:meta private:
"""
tag_names = ["p", "div", "span", "i", "b", "script", "table"]
elements = []
for i in range(num_elements):
choice = random.randint(0, 3)
if choice == 0:
# New tag.
tag_name = random.choice(tag_names)
elements.append("<%s>" % tag_name)
elif choice == 1:
elements.append(rsentence(random.randint(1, 4)))
elif choice == 2:
# Close a tag.
tag_name = random.choice(tag_names)
elements.append("</%s>" % tag_name)
return "<html>" + "\n".join(elements) + "</html>"
def benchmark_parsers(num_elements: int = 100000) -> None:
"""Very basic head-to-head performance benchmark."""
print(("Comparative parser benchmark on Beautiful Soup %s" % __version__))
data = rdoc(num_elements)
print(("Generated a large invalid HTML document (%d bytes)." % len(data)))
for parser_name in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
try:
a = time.time()
BeautifulSoup(data, parser_name)
b = time.time()
success = True
except Exception:
print(("%s could not parse the markup." % parser_name))
traceback.print_exc()
if success:
print(("BS4+%s parsed the markup in %.2fs." % (parser_name, b - a)))
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print(("Raw lxml parsed the markup in %.2fs." % (b - a)))
import html5lib
parser = html5lib.HTMLParser()
a = time.time()
parser.parse(data)
b = time.time()
print(("Raw html5lib parsed the markup in %.2fs." % (b - a)))
def profile(num_elements: int = 100000, parser: str = "lxml") -> None:
"""Use Python's profiler on a randomly generated document."""
filehandle = tempfile.NamedTemporaryFile()
filename = filehandle.name
data = rdoc(num_elements)
vars = dict(bs4=bs4, data=data, parser=parser)
cProfile.runctx("bs4.BeautifulSoup(data, parser)", vars, vars, filename)
stats = pstats.Stats(filename)
# stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats("_html5lib|bs4", 50)
# If this file is run as a script, standard input is diagnosed.
if __name__ == "__main__":
diagnose(sys.stdin.read())

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,28 @@
"""Exceptions defined by Beautiful Soup itself."""
from typing import Union
class StopParsing(Exception):
"""Exception raised by a TreeBuilder if it's unable to continue parsing."""
class FeatureNotFound(ValueError):
"""Exception raised by the BeautifulSoup constructor if no parser with the
requested features is found.
"""
class ParserRejectedMarkup(Exception):
"""An Exception to be raised when the underlying parser simply
refuses to parse the given markup.
"""
def __init__(self, message_or_exception: Union[str, Exception]):
"""Explain why the parser rejected the given markup, either
with a textual explanation or another exception.
"""
if isinstance(message_or_exception, Exception):
e = message_or_exception
message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
super(ParserRejectedMarkup, self).__init__(message_or_exception)

View file

@ -0,0 +1,764 @@
from __future__ import annotations
from collections import defaultdict
import re
from typing import (
Any,
Callable,
cast,
Dict,
Iterator,
Iterable,
List,
Optional,
Sequence,
Type,
Union,
)
import warnings
from bs4._deprecation import _deprecated
from bs4.element import (
AttributeDict,
NavigableString,
PageElement,
ResultSet,
Tag,
)
from bs4._typing import (
_AtMostOneElement,
_AttributeValue,
_NullableStringMatchFunction,
_OneElement,
_PageElementMatchFunction,
_QueryResults,
_RawAttributeValues,
_RegularExpressionProtocol,
_StrainableAttribute,
_StrainableElement,
_StrainableString,
_StringMatchFunction,
_TagMatchFunction,
)
class ElementFilter(object):
"""`ElementFilter` encapsulates the logic necessary to decide:
1. whether a `PageElement` (a `Tag` or a `NavigableString`) matches a
user-specified query.
2. whether a given sequence of markup found during initial parsing
should be turned into a `PageElement` at all, or simply discarded.
The base class is the simplest `ElementFilter`. By default, it
matches everything and allows all markup to become `PageElement`
objects. You can make it more selective by passing in a
user-defined match function, or defining a subclass.
Most users of Beautiful Soup will never need to use
`ElementFilter`, or its more capable subclass
`SoupStrainer`. Instead, they will use methods like
:py:meth:`Tag.find`, which will convert their arguments into
`SoupStrainer` objects and run them against the tree.
However, if you find yourself wanting to treat the arguments to
Beautiful Soup's find_*() methods as first-class objects, those
objects will be `SoupStrainer` objects. You can create them
yourself and then make use of functions like
`ElementFilter.filter()`.
"""
match_function: Optional[_PageElementMatchFunction]
def __init__(self, match_function: Optional[_PageElementMatchFunction] = None):
"""Pass in a match function to easily customize the behavior of
`ElementFilter.match` without needing to subclass.
:param match_function: A function that takes a `PageElement`
and returns `True` if that `PageElement` matches some criteria.
"""
self.match_function = match_function
@property
def includes_everything(self) -> bool:
"""Does this `ElementFilter` obviously include everything? If so,
the filter process can be made much faster.
The `ElementFilter` might turn out to include everything even
if this returns `False`, but it won't include everything in an
obvious way.
The base `ElementFilter` implementation includes things based on
the match function, so includes_everything is only true if
there is no match function.
"""
return not self.match_function
@property
def excludes_everything(self) -> bool:
"""Does this `ElementFilter` obviously exclude everything? If
so, Beautiful Soup will issue a warning if you try to use it
when parsing a document.
The `ElementFilter` might turn out to exclude everything even
if this returns `False`, but it won't exclude everything in an
obvious way.
The base `ElementFilter` implementation excludes things based
on a match function we can't inspect, so excludes_everything
is always false.
"""
return False
def match(self, element: PageElement, _known_rules:bool=False) -> bool:
"""Does the given PageElement match the rules set down by this
ElementFilter?
The base implementation delegates to the function passed in to
the constructor.
:param _known_rules: Defined for compatibility with
SoupStrainer._match(). Used more for consistency than because
we need the performance optimization.
"""
if not _known_rules and self.includes_everything:
return True
if not self.match_function:
return True
return self.match_function(element)
def filter(self, generator: Iterator[PageElement]) -> Iterator[_OneElement]:
"""The most generic search method offered by Beautiful Soup.
Acts like Python's built-in `filter`, using
`ElementFilter.match` as the filtering function.
"""
# If there are no rules at all, don't bother filtering. Let
# anything through.
if self.includes_everything:
yield from generator
while True:
try:
i = next(generator)
except StopIteration:
break
if i:
if self.match(i, _known_rules=True):
yield cast("_OneElement", i)
def find(self, generator: Iterator[PageElement]) -> _AtMostOneElement:
"""A lower-level equivalent of :py:meth:`Tag.find`.
You can pass in your own generator for iterating over
`PageElement` objects. The first one that matches this
`ElementFilter` will be returned.
:param generator: A way of iterating over `PageElement`
objects.
"""
for match in self.filter(generator):
return match
return None
def find_all(
self, generator: Iterator[PageElement], limit: Optional[int] = None
) -> _QueryResults:
"""A lower-level equivalent of :py:meth:`Tag.find_all`.
You can pass in your own generator for iterating over
`PageElement` objects. Only elements that match this
`ElementFilter` will be returned in the :py:class:`ResultSet`.
:param generator: A way of iterating over `PageElement`
objects.
:param limit: Stop looking after finding this many results.
"""
results = []
for match in self.filter(generator):
results.append(match)
if limit is not None and len(results) >= limit:
break
return ResultSet(self, results)
def allow_tag_creation(
self, nsprefix: Optional[str], name: str, attrs: Optional[_RawAttributeValues]
) -> bool:
"""Based on the name and attributes of a tag, see whether this
`ElementFilter` will allow a `Tag` object to even be created.
By default, all tags are parsed. To change this, subclass
`ElementFilter`.
:param name: The name of the prospective tag.
:param attrs: The attributes of the prospective tag.
"""
return True
def allow_string_creation(self, string: str) -> bool:
"""Based on the content of a string, see whether this
`ElementFilter` will allow a `NavigableString` object based on
this string to be added to the parse tree.
By default, all strings are processed into `NavigableString`
objects. To change this, subclass `ElementFilter`.
:param str: The string under consideration.
"""
return True
class MatchRule(object):
"""Each MatchRule encapsulates the logic behind a single argument
passed in to one of the Beautiful Soup find* methods.
"""
string: Optional[str]
pattern: Optional[_RegularExpressionProtocol]
present: Optional[bool]
exclude_everything: Optional[bool]
# TODO-TYPING: All MatchRule objects also have an attribute
# ``function``, but the type of the function depends on the
# subclass.
def __init__(
self,
string: Optional[Union[str, bytes]] = None,
pattern: Optional[_RegularExpressionProtocol] = None,
function: Optional[Callable] = None,
present: Optional[bool] = None,
exclude_everything: Optional[bool] = None
):
if isinstance(string, bytes):
string = string.decode("utf8")
self.string = string
if isinstance(pattern, bytes):
self.pattern = re.compile(pattern.decode("utf8"))
elif isinstance(pattern, str):
self.pattern = re.compile(pattern)
else:
self.pattern = pattern
self.function = function
self.present = present
self.exclude_everything = exclude_everything
values = [
x
for x in (self.string, self.pattern, self.function, self.present, self.exclude_everything)
if x is not None
]
if len(values) == 0:
raise ValueError(
"Either string, pattern, function, present, or exclude_everything must be provided."
)
if len(values) > 1:
raise ValueError(
"At most one of string, pattern, function, present, and exclude_everything must be provided."
)
def _base_match(self, string: Optional[str]) -> Optional[bool]:
"""Run the 'cheap' portion of a match, trying to get an answer without
calling a potentially expensive custom function.
:return: True or False if we have a (positive or negative)
match; None if we need to keep trying.
"""
# self.exclude_everything matches nothing.
if self.exclude_everything:
return False
# self.present==True matches everything except None.
if self.present is True:
return string is not None
# self.present==False matches _only_ None.
if self.present is False:
return string is None
# self.string does an exact string match.
if self.string is not None:
# print(f"{self.string} ?= {string}")
return self.string == string
# self.pattern does a regular expression search.
if self.pattern is not None:
# print(f"{self.pattern} ?~ {string}")
if string is None:
return False
return self.pattern.search(string) is not None
return None
def matches_string(self, string: Optional[str]) -> bool:
_base_result = self._base_match(string)
if _base_result is not None:
# No need to invoke the test function.
return _base_result
if self.function is not None and not self.function(string):
# print(f"{self.function}({string}) == False")
return False
return True
def __repr__(self) -> str:
cls = type(self).__name__
return f"<{cls} string={self.string} pattern={self.pattern} function={self.function} present={self.present}>"
def __eq__(self, other: Any) -> bool:
return (
isinstance(other, MatchRule)
and self.string == other.string
and self.pattern == other.pattern
and self.function == other.function
and self.present == other.present
)
class TagNameMatchRule(MatchRule):
"""A MatchRule implementing the rules for matches against tag name."""
function: Optional[_TagMatchFunction]
def matches_tag(self, tag: Tag) -> bool:
base_value = self._base_match(tag.name)
if base_value is not None:
return base_value
# The only remaining possibility is that the match is determined
# by a function call. Call the function.
function = cast(_TagMatchFunction, self.function)
if function(tag):
return True
return False
class AttributeValueMatchRule(MatchRule):
"""A MatchRule implementing the rules for matches against attribute value."""
function: Optional[_NullableStringMatchFunction]
class StringMatchRule(MatchRule):
"""A MatchRule implementing the rules for matches against a NavigableString."""
function: Optional[_StringMatchFunction]
class SoupStrainer(ElementFilter):
"""The `ElementFilter` subclass used internally by Beautiful Soup.
A `SoupStrainer` encapsulates the logic necessary to perform the
kind of matches supported by methods such as
:py:meth:`Tag.find`. `SoupStrainer` objects are primarily created
internally, but you can create one yourself and pass it in as
``parse_only`` to the `BeautifulSoup` constructor, to parse a
subset of a large document.
Internally, `SoupStrainer` objects work by converting the
constructor arguments into `MatchRule` objects. Incoming
tags/markup are matched against those rules.
:param name: One or more restrictions on the tags found in a document.
:param attrs: A dictionary that maps attribute names to
restrictions on tags that use those attributes.
:param string: One or more restrictions on the strings found in a
document.
:param kwargs: A dictionary that maps attribute names to restrictions
on tags that use those attributes. These restrictions are additive to
any specified in ``attrs``.
"""
name_rules: List[TagNameMatchRule]
attribute_rules: Dict[str, List[AttributeValueMatchRule]]
string_rules: List[StringMatchRule]
def __init__(
self,
name: Optional[_StrainableElement] = None,
attrs: Optional[Dict[str, _StrainableAttribute]] = None,
string: Optional[_StrainableString] = None,
**kwargs: _StrainableAttribute,
):
if string is None and "text" in kwargs:
string = cast(Optional[_StrainableString], kwargs.pop("text"))
warnings.warn(
"As of version 4.11.0, the 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
DeprecationWarning,
stacklevel=2,
)
if name is None and not attrs and not string and not kwargs:
# Special case for backwards compatibility. Instantiating
# a SoupStrainer with no arguments whatsoever gets you one
# that matches all Tags, and only Tags.
self.name_rules = [TagNameMatchRule(present=True)]
else:
self.name_rules = cast(
List[TagNameMatchRule], list(self._make_match_rules(name, TagNameMatchRule))
)
self.attribute_rules = defaultdict(list)
if attrs is None:
attrs = {}
if not isinstance(attrs, dict):
# Passing something other than a dictionary as attrs is
# sugar for matching that thing against the 'class'
# attribute.
attrs = {"class": attrs}
for attrdict in attrs, kwargs:
for attr, value in attrdict.items():
if attr == "class_" and attrdict is kwargs:
# If you pass in 'class_' as part of kwargs, it's
# because class is a Python reserved word. If you
# pass it in as part of the attrs dict, it's
# because you really are looking for an attribute
# called 'class_'.
attr = "class"
if value is None:
value = False
for rule_obj in self._make_match_rules(value, AttributeValueMatchRule):
self.attribute_rules[attr].append(
cast(AttributeValueMatchRule, rule_obj)
)
self.string_rules = cast(
List[StringMatchRule], list(self._make_match_rules(string, StringMatchRule))
)
#: DEPRECATED 4.13.0: You shouldn't need to check this under
#: any name (.string or .text), and if you do, you're probably
#: not taking into account all of the types of values this
#: variable might have. Look at the .string_rules list instead.
self.__string = string
@property
def includes_everything(self) -> bool:
"""Check whether the provided rules will obviously include
everything. (They might include everything even if this returns `False`,
but not in an obvious way.)
"""
return not self.name_rules and not self.string_rules and not self.attribute_rules
@property
def excludes_everything(self) -> bool:
"""Check whether the provided rules will obviously exclude
everything. (They might exclude everything even if this returns `False`,
but not in an obvious way.)
"""
if (self.string_rules and (self.name_rules or self.attribute_rules)):
# This is self-contradictory, so the rules exclude everything.
return True
# If there's a rule that ended up treated as an "exclude everything"
# rule due to creating a logical inconsistency, then the rules
# exclude everything.
if any(x.exclude_everything for x in self.string_rules):
return True
if any(x.exclude_everything for x in self.name_rules):
return True
for ruleset in self.attribute_rules.values():
if any(x.exclude_everything for x in ruleset):
return True
return False
@property
def string(self) -> Optional[_StrainableString]:
":meta private:"
warnings.warn(
"Access to deprecated property string. (Look at .string_rules instead) -- Deprecated since version 4.13.0.",
DeprecationWarning,
stacklevel=2,
)
return self.__string
@property
def text(self) -> Optional[_StrainableString]:
":meta private:"
warnings.warn(
"Access to deprecated property text. (Look at .string_rules instead) -- Deprecated since version 4.13.0.",
DeprecationWarning,
stacklevel=2,
)
return self.__string
def __repr__(self) -> str:
return f"<{self.__class__.__name__} name={self.name_rules} attrs={self.attribute_rules} string={self.string_rules}>"
@classmethod
def _make_match_rules(
cls,
obj: Optional[Union[_StrainableElement, _StrainableAttribute]],
rule_class: Type[MatchRule],
) -> Iterator[MatchRule]:
"""Convert a vaguely-specific 'object' into one or more well-defined
`MatchRule` objects.
:param obj: Some kind of object that corresponds to one or more
matching rules.
:param rule_class: Create instances of this `MatchRule` subclass.
"""
if obj is None:
return
if isinstance(obj, (str, bytes)):
yield rule_class(string=obj)
elif isinstance(obj, bool):
yield rule_class(present=obj)
elif callable(obj):
yield rule_class(function=obj)
elif isinstance(obj, _RegularExpressionProtocol):
yield rule_class(pattern=obj)
elif hasattr(obj, "__iter__"):
if not obj:
# The attribute is being matched against the null set,
# which means it should exclude everything.
yield rule_class(exclude_everything=True)
for o in obj:
if not isinstance(o, (bytes, str)) and hasattr(o, "__iter__"):
# This is almost certainly the user's
# mistake. This list contains another list, which
# opens up the possibility of infinite
# self-reference. In the interests of avoiding
# infinite recursion, we'll treat this as an
# impossible match and issue a rule that excludes
# everything, rather than looking inside.
warnings.warn(
f"Ignoring nested list {o} to avoid the possibility of infinite recursion.",
stacklevel=5,
)
yield rule_class(exclude_everything=True)
continue
for x in cls._make_match_rules(o, rule_class):
yield x
else:
yield rule_class(string=str(obj))
def matches_tag(self, tag: Tag) -> bool:
"""Do the rules of this `SoupStrainer` trigger a match against the
given `Tag`?
If the `SoupStrainer` has any `TagNameMatchRule`, at least one
must match the `Tag` or its `Tag.name`.
If there are any `AttributeValueMatchRule` for a given
attribute, at least one of them must match the attribute
value.
If there are any `StringMatchRule`, at least one must match,
but a `SoupStrainer` that *only* contains `StringMatchRule`
cannot match a `Tag`, only a `NavigableString`.
"""
# If there are no rules at all, let anything through.
#if self.includes_everything:
# return True
# String rules cannot not match a Tag on their own.
if not self.name_rules and not self.attribute_rules:
return False
# Optimization for a very common case where the user is
# searching for a tag with one specific name, and we're
# looking at a tag with a different name.
if (
not tag.prefix
and len(self.name_rules) == 1
and self.name_rules[0].string is not None
and tag.name != self.name_rules[0].string
):
return False
# If there are name rules, at least one must match. It can
# match either the Tag object itself or the prefixed name of
# the tag.
prefixed_name = None
if tag.prefix:
prefixed_name = f"{tag.prefix}:{tag.name}"
if self.name_rules:
name_matches = False
for rule in self.name_rules:
# attrs = " ".join(
# [f"{k}={v}" for k, v in sorted(tag.attrs.items())]
# )
# print(f"Testing <{tag.name} {attrs}>{tag.string}</{tag.name}> against {rule}")
# If the rule contains a function, the function will be called
# with `tag`. It will not be called a second time with
# `prefixed_name`.
if rule.matches_tag(tag) or (
not rule.function and prefixed_name is not None and rule.matches_string(prefixed_name)
):
name_matches = True
break
if not name_matches:
return False
# If there are attribute rules for a given attribute, at least
# one of them must match. If there are rules for multiple
# attributes, each attribute must have at least one match.
for attr, rules in self.attribute_rules.items():
attr_value = tag.get(attr, None)
this_attr_match = self._attribute_match(attr_value, rules)
if not this_attr_match:
return False
# If there are string rules, at least one must match.
if self.string_rules:
_str = tag.string
if _str is None:
return False
if not self.matches_any_string_rule(_str):
return False
return True
def _attribute_match(
self,
attr_value: Optional[_AttributeValue],
rules: Iterable[AttributeValueMatchRule],
) -> bool:
attr_values: Sequence[Optional[str]]
if isinstance(attr_value, list):
attr_values = attr_value
else:
attr_values = [cast(str, attr_value)]
def _match_attribute_value_helper(attr_values: Sequence[Optional[str]]) -> bool:
for rule in rules:
for attr_value in attr_values:
if rule.matches_string(attr_value):
return True
return False
this_attr_match = _match_attribute_value_helper(attr_values)
if not this_attr_match and len(attr_values) != 1:
# Try again but treat the attribute value as a single
# string instead of a list. The result can only be
# different if the list of values contains more or less
# than one item.
# This cast converts Optional[str] to plain str.
#
# We know there can't be any None in the list. Beautiful
# Soup never uses None as a value of a multi-valued
# attribute, and if None is passed in as attr_value, it's
# turned into a list with 1 element, which was excluded by
# the if statement above.
attr_values = cast(Sequence[str], attr_values)
joined_attr_value = " ".join(attr_values)
this_attr_match = _match_attribute_value_helper([joined_attr_value])
return this_attr_match
def allow_tag_creation(
self, nsprefix: Optional[str], name: str, attrs: Optional[_RawAttributeValues]
) -> bool:
"""Based on the name and attributes of a tag, see whether this
`SoupStrainer` will allow a `Tag` object to even be created.
:param name: The name of the prospective tag.
:param attrs: The attributes of the prospective tag.
"""
if self.string_rules:
# A SoupStrainer that has string rules can't be used to
# manage tag creation, because the string rule can't be
# evaluated until after the tag and all of its contents
# have been parsed.
return False
prefixed_name = None
if nsprefix:
prefixed_name = f"{nsprefix}:{name}"
if self.name_rules:
# At least one name rule must match.
name_match = False
for rule in self.name_rules:
for x in name, prefixed_name:
if x is not None:
if rule.matches_string(x):
name_match = True
break
if not name_match:
return False
# For each attribute that has rules, at least one rule must
# match.
if attrs is None:
attrs = AttributeDict()
for attr, rules in self.attribute_rules.items():
attr_value = attrs.get(attr)
if not self._attribute_match(attr_value, rules):
return False
return True
def allow_string_creation(self, string: str) -> bool:
"""Based on the content of a markup string, see whether this
`SoupStrainer` will allow it to be instantiated as a
`NavigableString` object, or whether it should be ignored.
"""
if self.name_rules or self.attribute_rules:
# A SoupStrainer that has name or attribute rules won't
# match any strings; it's designed to match tags with
# certain properties.
return False
if not self.string_rules:
# A SoupStrainer with no string rules will match
# all strings.
return True
if not self.matches_any_string_rule(string):
return False
return True
def matches_any_string_rule(self, string: str) -> bool:
"""See whether the content of a string matches any of
this `SoupStrainer`'s string rules.
"""
if not self.string_rules:
return True
for string_rule in self.string_rules:
if string_rule.matches_string(string):
return True
return False
def match(self, element: PageElement, _known_rules: bool=False) -> bool:
"""Does the given `PageElement` match the rules set down by this
`SoupStrainer`?
The find_* methods rely heavily on this method to find matches.
:param element: A `PageElement`.
:param _known_rules: Set to true in the common case where
we already checked and found at least one rule in this SoupStrainer
that might exclude a PageElement. Without this, we need
to check .includes_everything every time, just to be safe.
:return: `True` if the element matches this `SoupStrainer`'s rules; `False` otherwise.
"""
# If there are no rules at all, let anything through.
if not _known_rules and self.includes_everything:
return True
if isinstance(element, Tag):
return self.matches_tag(element)
assert isinstance(element, NavigableString)
if not (self.name_rules or self.attribute_rules):
# A NavigableString can only match a SoupStrainer that
# does not define any name or attribute rules.
# Then it comes down to the string rules.
return self.matches_any_string_rule(element)
return False
@_deprecated("allow_tag_creation", "4.13.0")
def search_tag(self, name: str, attrs: Optional[_RawAttributeValues]) -> bool:
"""A less elegant version of `allow_tag_creation`. Deprecated as of 4.13.0"""
":meta private:"
return self.allow_tag_creation(None, name, attrs)
@_deprecated("match", "4.13.0")
def search(self, element: PageElement) -> Optional[PageElement]:
"""A less elegant version of match(). Deprecated as of 4.13.0.
:meta private:
"""
return element if self.match(element) else None

View file

@ -0,0 +1,276 @@
from __future__ import annotations
from typing import Callable, Dict, Iterable, Optional, Set, Tuple, TYPE_CHECKING, Union
from typing_extensions import TypeAlias
from bs4.dammit import EntitySubstitution
if TYPE_CHECKING:
from bs4._typing import _AttributeValue
class Formatter(EntitySubstitution):
"""Describes a strategy to use when outputting a parse tree to a string.
Some parts of this strategy come from the distinction between
HTML4, HTML5, and XML. Others are configurable by the user.
Formatters are passed in as the `formatter` argument to methods
like `bs4.element.Tag.encode`. Most people won't need to
think about formatters, and most people who need to think about
them can pass in one of these predefined strings as `formatter`
rather than making a new Formatter object:
For HTML documents:
* 'html' - HTML entity substitution for generic HTML documents. (default)
* 'html5' - HTML entity substitution for HTML5 documents, as
well as some optimizations in the way tags are rendered.
* 'html5-4.12.0' - The version of the 'html5' formatter used prior to
Beautiful Soup 4.13.0.
* 'minimal' - Only make the substitutions necessary to guarantee
valid HTML.
* None - Do not perform any substitution. This will be faster
but may result in invalid markup.
For XML documents:
* 'html' - Entity substitution for XHTML documents.
* 'minimal' - Only make the substitutions necessary to guarantee
valid XML. (default)
* None - Do not perform any substitution. This will be faster
but may result in invalid markup.
"""
#: Constant name denoting HTML markup
HTML: str = "html"
#: Constant name denoting XML markup
XML: str = "xml"
#: Default values for the various constructor options when the
#: markup language is HTML.
HTML_DEFAULTS: Dict[str, Set[str]] = dict(
cdata_containing_tags=set(["script", "style"]),
)
language: Optional[str] #: :meta private:
entity_substitution: Optional[_EntitySubstitutionFunction] #: :meta private:
void_element_close_prefix: str #: :meta private:
cdata_containing_tags: Set[str] #: :meta private:
indent: str #: :meta private:
#: If this is set to true by the constructor, then attributes whose
#: values are sent to the empty string will be treated as HTML
#: boolean attributes. (Attributes whose value is None are always
#: rendered this way.)
empty_attributes_are_booleans: bool
def _default(
self, language: str, value: Optional[Set[str]], kwarg: str
) -> Set[str]:
if value is not None:
return value
if language == self.XML:
# When XML is the markup language in use, all of the
# defaults are the empty list.
return set()
# Otherwise, it depends on what's in HTML_DEFAULTS.
return self.HTML_DEFAULTS[kwarg]
def __init__(
self,
language: Optional[str] = None,
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
void_element_close_prefix: str = "/",
cdata_containing_tags: Optional[Set[str]] = None,
empty_attributes_are_booleans: bool = False,
indent: Union[int,str] = 1,
):
r"""Constructor.
:param language: This should be `Formatter.XML` if you are formatting
XML markup and `Formatter.HTML` if you are formatting HTML markup.
:param entity_substitution: A function to call to replace special
characters with XML/HTML entities. For examples, see
bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
:param void_element_close_prefix: By default, void elements
are represented as <tag/> (XML rules) rather than <tag>
(HTML rules). To get <tag>, pass in the empty string.
:param cdata_containing_tags: The set of tags that are defined
as containing CDATA in this dialect. For example, in HTML,
<script> and <style> tags are defined as containing CDATA,
and their contents should not be formatted.
:param empty_attributes_are_booleans: If this is set to true,
then attributes whose values are sent to the empty string
will be treated as `HTML boolean
attributes<https://dev.w3.org/html5/spec-LC/common-microsyntaxes.html#boolean-attributes>`_. (Attributes
whose value is None are always rendered this way.)
:param indent: If indent is a non-negative integer or string,
then the contents of elements will be indented
appropriately when pretty-printing. An indent level of 0,
negative, or "" will only insert newlines. Using a
positive integer indent indents that many spaces per
level. If indent is a string (such as "\t"), that string
is used to indent each level. The default behavior is to
indent one space per level.
"""
self.language = language or self.HTML
self.entity_substitution = entity_substitution
self.void_element_close_prefix = void_element_close_prefix
self.cdata_containing_tags = self._default(
self.language, cdata_containing_tags, "cdata_containing_tags"
)
self.empty_attributes_are_booleans = empty_attributes_are_booleans
if indent is None:
indent = 0
indent_str: str
if isinstance(indent, int):
if indent < 0:
indent = 0
indent_str = " " * indent
elif isinstance(indent, str):
indent_str = indent
else:
indent_str = " "
self.indent = indent_str
def substitute(self, ns: str) -> str:
"""Process a string that needs to undergo entity substitution.
This may be a string encountered in an attribute value or as
text.
:param ns: A string.
:return: The same string but with certain characters replaced by named
or numeric entities.
"""
if not self.entity_substitution:
return ns
from .element import NavigableString
if (
isinstance(ns, NavigableString)
and ns.parent is not None
and ns.parent.name in self.cdata_containing_tags
):
# Do nothing.
return ns
# Substitute.
return self.entity_substitution(ns)
def attribute_value(self, value: str) -> str:
"""Process the value of an attribute.
:param ns: A string.
:return: A string with certain characters replaced by named
or numeric entities.
"""
return self.substitute(value)
def attributes(
self, tag: bs4.element.Tag # type:ignore
) -> Iterable[Tuple[str, Optional[_AttributeValue]]]:
"""Reorder a tag's attributes however you want.
By default, attributes are sorted alphabetically. This makes
behavior consistent between Python 2 and Python 3, and preserves
backwards compatibility with older versions of Beautiful Soup.
If `empty_attributes_are_booleans` is True, then
attributes whose values are set to the empty string will be
treated as boolean attributes.
"""
if tag.attrs is None:
return []
items: Iterable[Tuple[str, _AttributeValue]] = list(tag.attrs.items())
return sorted(
(k, (None if self.empty_attributes_are_booleans and v == "" else v))
for k, v in items
)
class HTMLFormatter(Formatter):
"""A generic Formatter for HTML."""
REGISTRY: Dict[Optional[str], HTMLFormatter] = {}
def __init__(
self,
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
void_element_close_prefix: str = "/",
cdata_containing_tags: Optional[Set[str]] = None,
empty_attributes_are_booleans: bool = False,
indent: Union[int,str] = 1,
):
super(HTMLFormatter, self).__init__(
self.HTML,
entity_substitution,
void_element_close_prefix,
cdata_containing_tags,
empty_attributes_are_booleans,
indent=indent
)
class XMLFormatter(Formatter):
"""A generic Formatter for XML."""
REGISTRY: Dict[Optional[str], XMLFormatter] = {}
def __init__(
self,
entity_substitution: Optional[_EntitySubstitutionFunction] = None,
void_element_close_prefix: str = "/",
cdata_containing_tags: Optional[Set[str]] = None,
empty_attributes_are_booleans: bool = False,
indent: Union[int,str] = 1,
):
super(XMLFormatter, self).__init__(
self.XML,
entity_substitution,
void_element_close_prefix,
cdata_containing_tags,
empty_attributes_are_booleans,
indent=indent,
)
# Set up aliases for the default formatters.
HTMLFormatter.REGISTRY["html"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html
)
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html5,
void_element_close_prefix="",
empty_attributes_are_booleans=True,
)
HTMLFormatter.REGISTRY["html5-4.12"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_html,
void_element_close_prefix="",
empty_attributes_are_booleans=True,
)
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
entity_substitution=EntitySubstitution.substitute_xml
)
HTMLFormatter.REGISTRY[None] = HTMLFormatter(entity_substitution=None)
XMLFormatter.REGISTRY["html"] = XMLFormatter(
entity_substitution=EntitySubstitution.substitute_html
)
XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
entity_substitution=EntitySubstitution.substitute_xml
)
XMLFormatter.REGISTRY[None] = XMLFormatter(entity_substitution=None)
# Define type aliases to improve readability.
#
#: A function to call to replace special characters with XML or HTML
#: entities.
_EntitySubstitutionFunction: TypeAlias = Callable[[str], str]
# Many of the output-centered methods take an argument that can either
# be a Formatter object or the name of a Formatter to be looked up.
_FormatterOrName = Union[Formatter, str]

View file

@ -0,0 +1,760 @@
@Switch01
A_Rog
Aakanksha Agrawal
Abhinav Sagar
ABHYUDAY PRATAP SINGH
abs51295
AceGentile
Adam Chainz
Adam Tse
Adam Wentz
admin
Adrien Morison
ahayrapetyan
Ahilya
AinsworthK
Akash Srivastava
Alan Yee
Albert Tugushev
Albert-Guan
albertg
Alberto Sottile
Aleks Bunin
Ales Erjavec
Alethea Flowers
Alex Gaynor
Alex Grönholm
Alex Hedges
Alex Loosley
Alex Morega
Alex Stachowiak
Alexander Shtyrov
Alexandre Conrad
Alexey Popravka
Aleš Erjavec
Alli
Ami Fischman
Ananya Maiti
Anatoly Techtonik
Anders Kaseorg
Andre Aguiar
Andreas Lutro
Andrei Geacar
Andrew Gaul
Andrew Shymanel
Andrey Bienkowski
Andrey Bulgakov
Andrés Delfino
Andy Freeland
Andy Kluger
Ani Hayrapetyan
Aniruddha Basak
Anish Tambe
Anrs Hu
Anthony Sottile
Antoine Musso
Anton Ovchinnikov
Anton Patrushev
Antonio Alvarado Hernandez
Antony Lee
Antti Kaihola
Anubhav Patel
Anudit Nagar
Anuj Godase
AQNOUCH Mohammed
AraHaan
Arindam Choudhury
Armin Ronacher
Artem
Arun Babu Neelicattu
Ashley Manton
Ashwin Ramaswami
atse
Atsushi Odagiri
Avinash Karhana
Avner Cohen
Awit (Ah-Wit) Ghirmai
Baptiste Mispelon
Barney Gale
barneygale
Bartek Ogryczak
Bastian Venthur
Ben Bodenmiller
Ben Darnell
Ben Hoyt
Ben Mares
Ben Rosser
Bence Nagy
Benjamin Peterson
Benjamin VanEvery
Benoit Pierre
Berker Peksag
Bernard
Bernard Tyers
Bernardo B. Marques
Bernhard M. Wiedemann
Bertil Hatt
Bhavam Vidyarthi
Blazej Michalik
Bogdan Opanchuk
BorisZZZ
Brad Erickson
Bradley Ayers
Brandon L. Reiss
Brandt Bucher
Brett Randall
Brett Rosen
Brian Cristante
Brian Rosner
briantracy
BrownTruck
Bruno Oliveira
Bruno Renié
Bruno S
Bstrdsmkr
Buck Golemon
burrows
Bussonnier Matthias
bwoodsend
c22
Caleb Martinez
Calvin Smith
Carl Meyer
Carlos Liam
Carol Willing
Carter Thayer
Cass
Chandrasekhar Atina
Chih-Hsuan Yen
Chris Brinker
Chris Hunt
Chris Jerdonek
Chris Kuehl
Chris McDonough
Chris Pawley
Chris Pryer
Chris Wolfe
Christian Clauss
Christian Heimes
Christian Oudard
Christoph Reiter
Christopher Hunt
Christopher Snyder
cjc7373
Clark Boylan
Claudio Jolowicz
Clay McClure
Cody
Cody Soyland
Colin Watson
Collin Anderson
Connor Osborn
Cooper Lees
Cooper Ry Lees
Cory Benfield
Cory Wright
Craig Kerstiens
Cristian Sorinel
Cristina
Cristina Muñoz
Curtis Doty
cytolentino
Daan De Meyer
Dale
Damian
Damian Quiroga
Damian Shaw
Dan Black
Dan Savilonis
Dan Sully
Dane Hillard
daniel
Daniel Collins
Daniel Hahler
Daniel Holth
Daniel Jost
Daniel Katz
Daniel Shaulov
Daniele Esposti
Daniele Nicolodi
Daniele Procida
Daniil Konovalenko
Danny Hermes
Danny McClanahan
Darren Kavanagh
Dav Clark
Dave Abrahams
Dave Jones
David Aguilar
David Black
David Bordeynik
David Caro
David D Lowe
David Evans
David Hewitt
David Linke
David Poggi
David Pursehouse
David Runge
David Tucker
David Wales
Davidovich
ddelange
Deepak Sharma
Deepyaman Datta
Denise Yu
dependabot[bot]
derwolfe
Desetude
Devesh Kumar Singh
Diego Caraballo
Diego Ramirez
DiegoCaraballo
Dimitri Merejkowsky
Dimitri Papadopoulos
Dirk Stolle
Dmitry Gladkov
Dmitry Volodin
Domen Kožar
Dominic Davis-Foster
Donald Stufft
Dongweiming
doron zarhi
Dos Moonen
Douglas Thor
DrFeathers
Dustin Ingram
Dwayne Bailey
Ed Morley
Edgar Ramírez
Edgar Ramírez Mondragón
Ee Durbin
Efflam Lemaillet
efflamlemaillet
Eitan Adler
ekristina
elainechan
Eli Schwartz
Elisha Hollander
Ellen Marie Dash
Emil Burzo
Emil Styrke
Emmanuel Arias
Endoh Takanao
enoch
Erdinc Mutlu
Eric Cousineau
Eric Gillingham
Eric Hanchrow
Eric Hopper
Erik M. Bray
Erik Rose
Erwin Janssen
Eugene Vereshchagin
everdimension
Federico
Felipe Peter
Felix Yan
fiber-space
Filip Kokosiński
Filipe Laíns
Finn Womack
finnagin
Flavio Amurrio
Florian Briand
Florian Rathgeber
Francesco
Francesco Montesano
Frost Ming
Gabriel Curio
Gabriel de Perthuis
Garry Polley
gavin
gdanielson
Geoffrey Sneddon
George Song
Georgi Valkov
Georgy Pchelkin
ghost
Giftlin Rajaiah
gizmoguy1
gkdoc
Godefroid Chapelle
Gopinath M
GOTO Hayato
gousaiyang
gpiks
Greg Roodt
Greg Ward
Guilherme Espada
Guillaume Seguin
gutsytechster
Guy Rozendorn
Guy Tuval
gzpan123
Hanjun Kim
Hari Charan
Harsh Vardhan
harupy
Harutaka Kawamura
hauntsaninja
Henrich Hartzer
Henry Schreiner
Herbert Pfennig
Holly Stotelmyer
Honnix
Hsiaoming Yang
Hugo Lopes Tavares
Hugo van Kemenade
Hugues Bruant
Hynek Schlawack
Ian Bicking
Ian Cordasco
Ian Lee
Ian Stapleton Cordasco
Ian Wienand
Igor Kuzmitshov
Igor Sobreira
Ilan Schnell
Illia Volochii
Ilya Baryshev
Inada Naoki
Ionel Cristian Mărieș
Ionel Maries Cristian
Itamar Turner-Trauring
Ivan Pozdeev
J. Nick Koston
Jacob Kim
Jacob Walls
Jaime Sanz
jakirkham
Jakub Kuczys
Jakub Stasiak
Jakub Vysoky
Jakub Wilk
James Cleveland
James Curtin
James Firth
James Gerity
James Polley
Jan Pokorný
Jannis Leidel
Jarek Potiuk
jarondl
Jason Curtis
Jason R. Coombs
JasonMo
JasonMo1
Jay Graves
Jean Abou Samra
Jean-Christophe Fillion-Robin
Jeff Barber
Jeff Dairiki
Jeff Widman
Jelmer Vernooij
jenix21
Jeremy Stanley
Jeremy Zafran
Jesse Rittner
Jiashuo Li
Jim Fisher
Jim Garrison
Jiun Bae
Jivan Amara
Joe Bylund
Joe Michelini
John Paton
John T. Wodder II
John-Scott Atlakson
johnthagen
Jon Banafato
Jon Dufresne
Jon Parise
Jonas Nockert
Jonathan Herbert
Joonatan Partanen
Joost Molenaar
Jorge Niedbalski
Joseph Bylund
Joseph Long
Josh Bronson
Josh Hansen
Josh Schneier
Joshua
Juan Luis Cano Rodríguez
Juanjo Bazán
Judah Rand
Julian Berman
Julian Gethmann
Julien Demoor
Jussi Kukkonen
jwg4
Jyrki Pulliainen
Kai Chen
Kai Mueller
Kamal Bin Mustafa
kasium
kaustav haldar
keanemind
Keith Maxwell
Kelsey Hightower
Kenneth Belitzky
Kenneth Reitz
Kevin Burke
Kevin Carter
Kevin Frommelt
Kevin R Patterson
Kexuan Sun
Kit Randel
Klaas van Schelven
KOLANICH
kpinc
Krishna Oza
Kumar McMillan
Kurt McKee
Kyle Persohn
lakshmanaram
Laszlo Kiss-Kollar
Laurent Bristiel
Laurent LAPORTE
Laurie O
Laurie Opperman
layday
Leon Sasson
Lev Givon
Lincoln de Sousa
Lipis
lorddavidiii
Loren Carvalho
Lucas Cimon
Ludovic Gasc
Lukas Geiger
Lukas Juhrich
Luke Macken
Luo Jiebin
luojiebin
luz.paz
László Kiss Kollár
M00nL1ght
Marc Abramowitz
Marc Tamlyn
Marcus Smith
Mariatta
Mark Kohler
Mark Williams
Markus Hametner
Martey Dodoo
Martin Fischer
Martin Häcker
Martin Pavlasek
Masaki
Masklinn
Matej Stuchlik
Mathew Jennings
Mathieu Bridon
Mathieu Kniewallner
Matt Bacchi
Matt Good
Matt Maker
Matt Robenolt
matthew
Matthew Einhorn
Matthew Feickert
Matthew Gilliard
Matthew Iversen
Matthew Treinish
Matthew Trumbell
Matthew Willson
Matthias Bussonnier
mattip
Maurits van Rees
Max W Chase
Maxim Kurnikov
Maxime Rouyrre
mayeut
mbaluna
mdebi
memoselyk
meowmeowcat
Michael
Michael Aquilina
Michael E. Karpeles
Michael Klich
Michael Mintz
Michael Williamson
michaelpacer
Michał Górny
Mickaël Schoentgen
Miguel Araujo Perez
Mihir Singh
Mike
Mike Hendricks
Min RK
MinRK
Miro Hrončok
Monica Baluna
montefra
Monty Taylor
Muha Ajjan
Nadav Wexler
Nahuel Ambrosini
Nate Coraor
Nate Prewitt
Nathan Houghton
Nathaniel J. Smith
Nehal J Wani
Neil Botelho
Nguyễn Gia Phong
Nicholas Serra
Nick Coghlan
Nick Stenning
Nick Timkovich
Nicolas Bock
Nicole Harris
Nikhil Benesch
Nikhil Ladha
Nikita Chepanov
Nikolay Korolev
Nipunn Koorapati
Nitesh Sharma
Niyas Sait
Noah
Noah Gorny
Nowell Strite
NtaleGrey
nvdv
OBITORASU
Ofek Lev
ofrinevo
Oliver Freund
Oliver Jeeves
Oliver Mannion
Oliver Tonnhofer
Olivier Girardot
Olivier Grisel
Ollie Rutherfurd
OMOTO Kenji
Omry Yadan
onlinejudge95
Oren Held
Oscar Benjamin
Oz N Tiram
Pachwenko
Patrick Dubroy
Patrick Jenkins
Patrick Lawson
patricktokeeffe
Patrik Kopkan
Paul Ganssle
Paul Kehrer
Paul Moore
Paul Nasrat
Paul Oswald
Paul van der Linden
Paulus Schoutsen
Pavel Safronov
Pavithra Eswaramoorthy
Pawel Jasinski
Paweł Szramowski
Pekka Klärck
Peter Gessler
Peter Lisák
Peter Waller
petr-tik
Phaneendra Chiruvella
Phil Elson
Phil Freo
Phil Pennock
Phil Whelan
Philip Jägenstedt
Philip Molloy
Philippe Ombredanne
Pi Delport
Pierre-Yves Rofes
Pieter Degroote
pip
Prabakaran Kumaresshan
Prabhjyotsing Surjit Singh Sodhi
Prabhu Marappan
Pradyun Gedam
Prashant Sharma
Pratik Mallya
pre-commit-ci[bot]
Preet Thakkar
Preston Holmes
Przemek Wrzos
Pulkit Goyal
q0w
Qiangning Hong
Qiming Xu
Quentin Lee
Quentin Pradet
R. David Murray
Rafael Caricio
Ralf Schmitt
Razzi Abuissa
rdb
Reece Dunham
Remi Rampin
Rene Dudfield
Riccardo Magliocchetti
Riccardo Schirone
Richard Jones
Richard Si
Ricky Ng-Adam
Rishi
RobberPhex
Robert Collins
Robert McGibbon
Robert Pollak
Robert T. McGibbon
robin elisha robinson
Roey Berman
Rohan Jain
Roman Bogorodskiy
Roman Donchenko
Romuald Brunet
ronaudinho
Ronny Pfannschmidt
Rory McCann
Ross Brattain
Roy Wellington Ⅳ
Ruairidh MacLeod
Russell Keith-Magee
Ryan Shepherd
Ryan Wooden
ryneeverett
Sachi King
Salvatore Rinchiera
sandeepkiran-js
Sander Van Balen
Savio Jomton
schlamar
Scott Kitterman
Sean
seanj
Sebastian Jordan
Sebastian Schaetz
Segev Finer
SeongSoo Cho
Sergey Vasilyev
Seth Michael Larson
Seth Woodworth
Shahar Epstein
Shantanu
shireenrao
Shivansh-007
Shlomi Fish
Shovan Maity
Simeon Visser
Simon Cross
Simon Pichugin
sinoroc
sinscary
snook92
socketubs
Sorin Sbarnea
Srinivas Nyayapati
Stavros Korokithakis
Stefan Scherfke
Stefano Rivera
Stephan Erb
Stephen Rosen
stepshal
Steve (Gadget) Barnes
Steve Barnes
Steve Dower
Steve Kowalik
Steven Myint
Steven Silvester
stonebig
studioj
Stéphane Bidoul
Stéphane Bidoul (ACSONE)
Stéphane Klein
Sumana Harihareswara
Surbhi Sharma
Sviatoslav Sydorenko
Swat009
Sylvain
Takayuki SHIMIZUKAWA
Taneli Hukkinen
tbeswick
Thiago
Thijs Triemstra
Thomas Fenzl
Thomas Grainger
Thomas Guettler
Thomas Johansson
Thomas Kluyver
Thomas Smith
Thomas VINCENT
Tim D. Smith
Tim Gates
Tim Harder
Tim Heap
tim smith
tinruufu
Tobias Hermann
Tom Forbes
Tom Freudenheim
Tom V
Tomas Hrnciar
Tomas Orsava
Tomer Chachamu
Tommi Enenkel | AnB
Tomáš Hrnčiar
Tony Beswick
Tony Narlock
Tony Zhaocheng Tan
TonyBeswick
toonarmycaptain
Toshio Kuratomi
toxinu
Travis Swicegood
Tushar Sadhwani
Tzu-ping Chung
Valentin Haenel
Victor Stinner
victorvpaulo
Vikram - Google
Viktor Szépe
Ville Skyttä
Vinay Sajip
Vincent Philippon
Vinicyus Macedo
Vipul Kumar
Vitaly Babiy
Vladimir Fokow
Vladimir Rutsky
W. Trevor King
Wil Tan
Wilfred Hughes
William Edwards
William ML Leslie
William T Olson
William Woodruff
Wilson Mo
wim glenn
Winson Luk
Wolfgang Maier
Wu Zhenyu
XAMES3
Xavier Fernandez
xoviat
xtreak
YAMAMOTO Takashi
Yen Chi Hsuan
Yeray Diaz Diaz
Yoval P
Yu Jian
Yuan Jing Vincent Yan
Yusuke Hayashi
Zearin
Zhiping Deng
ziebam
Zvezdan Petkovic
Łukasz Langa
Роман Донченко
Семён Марьясин
rekcäH nitraM

View file

@ -0,0 +1,20 @@
Copyright (c) 2008-present The pip developers (see AUTHORS.txt file)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,88 @@
Metadata-Version: 2.1
Name: pip
Version: 24.0
Summary: The PyPA recommended tool for installing Python packages.
Author-email: The pip developers <distutils-sig@python.org>
License: MIT
Project-URL: Homepage, https://pip.pypa.io/
Project-URL: Documentation, https://pip.pypa.io
Project-URL: Source, https://github.com/pypa/pip
Project-URL: Changelog, https://pip.pypa.io/en/stable/news/
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Topic :: Software Development :: Build Tools
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
License-File: LICENSE.txt
License-File: AUTHORS.txt
pip - The Python Package Installer
==================================
.. image:: https://img.shields.io/pypi/v/pip.svg
:target: https://pypi.org/project/pip/
:alt: PyPI
.. image:: https://img.shields.io/pypi/pyversions/pip
:target: https://pypi.org/project/pip
:alt: PyPI - Python Version
.. image:: https://readthedocs.org/projects/pip/badge/?version=latest
:target: https://pip.pypa.io/en/latest
:alt: Documentation
pip is the `package installer`_ for Python. You can use pip to install packages from the `Python Package Index`_ and other indexes.
Please take a look at our documentation for how to install and use pip:
* `Installation`_
* `Usage`_
We release updates regularly, with a new version every 3 months. Find more details in our documentation:
* `Release notes`_
* `Release process`_
If you find bugs, need help, or want to talk to the developers, please use our mailing lists or chat rooms:
* `Issue tracking`_
* `Discourse channel`_
* `User IRC`_
If you want to get involved head over to GitHub to get the source code, look at our development documentation and feel free to jump on the developer mailing lists and chat rooms:
* `GitHub page`_
* `Development documentation`_
* `Development IRC`_
Code of Conduct
---------------
Everyone interacting in the pip project's codebases, issue trackers, chat
rooms, and mailing lists is expected to follow the `PSF Code of Conduct`_.
.. _package installer: https://packaging.python.org/guides/tool-recommendations/
.. _Python Package Index: https://pypi.org
.. _Installation: https://pip.pypa.io/en/stable/installation/
.. _Usage: https://pip.pypa.io/en/stable/
.. _Release notes: https://pip.pypa.io/en/stable/news.html
.. _Release process: https://pip.pypa.io/en/latest/development/release-process/
.. _GitHub page: https://github.com/pypa/pip
.. _Development documentation: https://pip.pypa.io/en/latest/development
.. _Issue tracking: https://github.com/pypa/pip/issues
.. _Discourse channel: https://discuss.python.org/c/packaging
.. _User IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa
.. _Development IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa-dev
.. _PSF Code of Conduct: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.42.0)
Root-Is-Purelib: true
Tag: py3-none-any

View file

@ -0,0 +1,4 @@
[console_scripts]
pip = pip._internal.cli.main:main
pip3 = pip._internal.cli.main:main
pip3.12 = pip._internal.cli.main:main

View file

@ -0,0 +1,13 @@
from typing import List, Optional
__version__ = "24.0"
def main(args: Optional[List[str]] = None) -> int:
"""This is an internal API only meant for use by pip's own console scripts.
For additional details, see https://github.com/pypa/pip/issues/7498.
"""
from pip._internal.utils.entrypoints import _wrapper
return _wrapper(args)

View file

@ -0,0 +1,24 @@
import os
import sys
# Remove '' and current working directory from the first entry
# of sys.path, if present to avoid using current directory
# in pip commands check, freeze, install, list and show,
# when invoked as python -m pip <command>
if sys.path[0] in ("", os.getcwd()):
sys.path.pop(0)
# If we are running from a wheel, add the wheel to sys.path
# This allows the usage python pip-*.whl/pip install pip-*.whl
if __package__ == "":
# __file__ is pip-*.whl/pip/__main__.py
# first dirname call strips of '/__main__.py', second strips off '/pip'
# Resulting path is the name of the wheel itself
# Add that to sys.path so we can import pip
path = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, path)
if __name__ == "__main__":
from pip._internal.cli.main import main as _main
sys.exit(_main())

View file

@ -0,0 +1,50 @@
"""Execute exactly this copy of pip, within a different environment.
This file is named as it is, to ensure that this module can't be imported via
an import statement.
"""
# /!\ This version compatibility check section must be Python 2 compatible. /!\
import sys
# Copied from setup.py
PYTHON_REQUIRES = (3, 7)
def version_str(version): # type: ignore
return ".".join(str(v) for v in version)
if sys.version_info[:2] < PYTHON_REQUIRES:
raise SystemExit(
"This version of pip does not support python {} (requires >={}).".format(
version_str(sys.version_info[:2]), version_str(PYTHON_REQUIRES)
)
)
# From here on, we can use Python 3 features, but the syntax must remain
# Python 2 compatible.
import runpy # noqa: E402
from importlib.machinery import PathFinder # noqa: E402
from os.path import dirname # noqa: E402
PIP_SOURCES_ROOT = dirname(dirname(__file__))
class PipImportRedirectingFinder:
@classmethod
def find_spec(self, fullname, path=None, target=None): # type: ignore
if fullname != "pip":
return None
spec = PathFinder.find_spec(fullname, [PIP_SOURCES_ROOT], target)
assert spec, (PIP_SOURCES_ROOT, fullname)
return spec
sys.meta_path.insert(0, PipImportRedirectingFinder())
assert __name__ == "__main__", "Cannot run __pip-runner__.py as a non-main module"
runpy.run_module("pip", run_name="__main__", alter_sys=True)

View file

@ -0,0 +1,18 @@
from typing import List, Optional
from pip._internal.utils import _log
# init_logging() must be called before any call to logging.getLogger()
# which happens at import of most modules.
_log.init_logging()
def main(args: (Optional[List[str]]) = None) -> int:
"""This is preserved for old console scripts that may still be referencing
it.
For additional details, see https://github.com/pypa/pip/issues/7498.
"""
from pip._internal.utils.entrypoints import _wrapper
return _wrapper(args)

View file

@ -0,0 +1,311 @@
"""Build Environment used for isolation during sdist building
"""
import logging
import os
import pathlib
import site
import sys
import textwrap
from collections import OrderedDict
from types import TracebackType
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union
from pip._vendor.certifi import where
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.version import Version
from pip import __file__ as pip_location
from pip._internal.cli.spinners import open_spinner
from pip._internal.locations import get_platlib, get_purelib, get_scheme
from pip._internal.metadata import get_default_environment, get_environment
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
if TYPE_CHECKING:
from pip._internal.index.package_finder import PackageFinder
logger = logging.getLogger(__name__)
def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]:
return (a, b) if a != b else (a,)
class _Prefix:
def __init__(self, path: str) -> None:
self.path = path
self.setup = False
scheme = get_scheme("", prefix=path)
self.bin_dir = scheme.scripts
self.lib_dirs = _dedup(scheme.purelib, scheme.platlib)
def get_runnable_pip() -> str:
"""Get a file to pass to a Python executable, to run the currently-running pip.
This is used to run a pip subprocess, for installing requirements into the build
environment.
"""
source = pathlib.Path(pip_location).resolve().parent
if not source.is_dir():
# This would happen if someone is using pip from inside a zip file. In that
# case, we can use that directly.
return str(source)
return os.fsdecode(source / "__pip-runner__.py")
def _get_system_sitepackages() -> Set[str]:
"""Get system site packages
Usually from site.getsitepackages,
but fallback on `get_purelib()/get_platlib()` if unavailable
(e.g. in a virtualenv created by virtualenv<20)
Returns normalized set of strings.
"""
if hasattr(site, "getsitepackages"):
system_sites = site.getsitepackages()
else:
# virtualenv < 20 overwrites site.py without getsitepackages
# fallback on get_purelib/get_platlib.
# this is known to miss things, but shouldn't in the cases
# where getsitepackages() has been removed (inside a virtualenv)
system_sites = [get_purelib(), get_platlib()]
return {os.path.normcase(path) for path in system_sites}
class BuildEnvironment:
"""Creates and manages an isolated environment to install build deps"""
def __init__(self) -> None:
temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True)
self._prefixes = OrderedDict(
(name, _Prefix(os.path.join(temp_dir.path, name)))
for name in ("normal", "overlay")
)
self._bin_dirs: List[str] = []
self._lib_dirs: List[str] = []
for prefix in reversed(list(self._prefixes.values())):
self._bin_dirs.append(prefix.bin_dir)
self._lib_dirs.extend(prefix.lib_dirs)
# Customize site to:
# - ensure .pth files are honored
# - prevent access to system site packages
system_sites = _get_system_sitepackages()
self._site_dir = os.path.join(temp_dir.path, "site")
if not os.path.exists(self._site_dir):
os.mkdir(self._site_dir)
with open(
os.path.join(self._site_dir, "sitecustomize.py"), "w", encoding="utf-8"
) as fp:
fp.write(
textwrap.dedent(
"""
import os, site, sys
# First, drop system-sites related paths.
original_sys_path = sys.path[:]
known_paths = set()
for path in {system_sites!r}:
site.addsitedir(path, known_paths=known_paths)
system_paths = set(
os.path.normcase(path)
for path in sys.path[len(original_sys_path):]
)
original_sys_path = [
path for path in original_sys_path
if os.path.normcase(path) not in system_paths
]
sys.path = original_sys_path
# Second, add lib directories.
# ensuring .pth file are processed.
for path in {lib_dirs!r}:
assert not path in sys.path
site.addsitedir(path)
"""
).format(system_sites=system_sites, lib_dirs=self._lib_dirs)
)
def __enter__(self) -> None:
self._save_env = {
name: os.environ.get(name, None)
for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH")
}
path = self._bin_dirs[:]
old_path = self._save_env["PATH"]
if old_path:
path.extend(old_path.split(os.pathsep))
pythonpath = [self._site_dir]
os.environ.update(
{
"PATH": os.pathsep.join(path),
"PYTHONNOUSERSITE": "1",
"PYTHONPATH": os.pathsep.join(pythonpath),
}
)
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
for varname, old_value in self._save_env.items():
if old_value is None:
os.environ.pop(varname, None)
else:
os.environ[varname] = old_value
def check_requirements(
self, reqs: Iterable[str]
) -> Tuple[Set[Tuple[str, str]], Set[str]]:
"""Return 2 sets:
- conflicting requirements: set of (installed, wanted) reqs tuples
- missing requirements: set of reqs
"""
missing = set()
conflicting = set()
if reqs:
env = (
get_environment(self._lib_dirs)
if hasattr(self, "_lib_dirs")
else get_default_environment()
)
for req_str in reqs:
req = Requirement(req_str)
# We're explicitly evaluating with an empty extra value, since build
# environments are not provided any mechanism to select specific extras.
if req.marker is not None and not req.marker.evaluate({"extra": ""}):
continue
dist = env.get_distribution(req.name)
if not dist:
missing.add(req_str)
continue
if isinstance(dist.version, Version):
installed_req_str = f"{req.name}=={dist.version}"
else:
installed_req_str = f"{req.name}==={dist.version}"
if not req.specifier.contains(dist.version, prereleases=True):
conflicting.add((installed_req_str, req_str))
# FIXME: Consider direct URL?
return conflicting, missing
def install_requirements(
self,
finder: "PackageFinder",
requirements: Iterable[str],
prefix_as_string: str,
*,
kind: str,
) -> None:
prefix = self._prefixes[prefix_as_string]
assert not prefix.setup
prefix.setup = True
if not requirements:
return
self._install_requirements(
get_runnable_pip(),
finder,
requirements,
prefix,
kind=kind,
)
@staticmethod
def _install_requirements(
pip_runnable: str,
finder: "PackageFinder",
requirements: Iterable[str],
prefix: _Prefix,
*,
kind: str,
) -> None:
args: List[str] = [
sys.executable,
pip_runnable,
"install",
"--ignore-installed",
"--no-user",
"--prefix",
prefix.path,
"--no-warn-script-location",
]
if logger.getEffectiveLevel() <= logging.DEBUG:
args.append("-v")
for format_control in ("no_binary", "only_binary"):
formats = getattr(finder.format_control, format_control)
args.extend(
(
"--" + format_control.replace("_", "-"),
",".join(sorted(formats or {":none:"})),
)
)
index_urls = finder.index_urls
if index_urls:
args.extend(["-i", index_urls[0]])
for extra_index in index_urls[1:]:
args.extend(["--extra-index-url", extra_index])
else:
args.append("--no-index")
for link in finder.find_links:
args.extend(["--find-links", link])
for host in finder.trusted_hosts:
args.extend(["--trusted-host", host])
if finder.allow_all_prereleases:
args.append("--pre")
if finder.prefer_binary:
args.append("--prefer-binary")
args.append("--")
args.extend(requirements)
extra_environ = {"_PIP_STANDALONE_CERT": where()}
with open_spinner(f"Installing {kind}") as spinner:
call_subprocess(
args,
command_desc=f"pip subprocess to install {kind}",
spinner=spinner,
extra_environ=extra_environ,
)
class NoOpBuildEnvironment(BuildEnvironment):
"""A no-op drop-in replacement for BuildEnvironment"""
def __init__(self) -> None:
pass
def __enter__(self) -> None:
pass
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
pass
def cleanup(self) -> None:
pass
def install_requirements(
self,
finder: "PackageFinder",
requirements: Iterable[str],
prefix_as_string: str,
*,
kind: str,
) -> None:
raise NotImplementedError()

View file

@ -0,0 +1,290 @@
"""Cache Management
"""
import hashlib
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
logger = logging.getLogger(__name__)
ORIGIN_JSON_NAME = "origin.json"
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()
class Cache:
"""An abstract class - provides cache directories for data from links
:param cache_dir: The root of the cache.
"""
def __init__(self, cache_dir: str) -> None:
super().__init__()
assert not cache_dir or os.path.isabs(cache_dir)
self.cache_dir = cache_dir or None
def _get_cache_path_parts(self, link: Link) -> List[str]:
"""Get parts of part that must be os.path.joined with cache_dir"""
# We want to generate an url to use as our cache key, we don't want to
# just re-use the URL because it might have other items in the fragment
# and we don't care about those.
key_parts = {"url": link.url_without_fragment}
if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash
if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment
# Include interpreter name, major and minor version in cache key
# to cope with ill-behaved sdists that build a different wheel
# depending on the python version their setup.py is being run on,
# and don't encode the difference in compatibility tags.
# https://github.com/pypa/pip/issues/7296
key_parts["interpreter_name"] = interpreter_name()
key_parts["interpreter_version"] = interpreter_version()
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
# thus less secure). However the differences don't make a lot of
# difference for our use case here.
hashed = _hash_dict(key_parts)
# We want to nest the directories some to prevent having a ton of top
# level directories where we might run out of sub directories on some
# FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
return parts
def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
can_not_cache = not self.cache_dir or not canonical_package_name or not link
if can_not_cache:
return []
path = self.get_path_for_link(link)
if os.path.isdir(path):
return [(candidate, path) for candidate in os.listdir(path)]
return []
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached items in for link."""
raise NotImplementedError()
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
raise NotImplementedError()
class SimpleWheelCache(Cache):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir: str) -> None:
super().__init__(cache_dir)
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached wheels for link
Because there are M wheels for any one sdist, we provide a directory
to cache them in, and then consult that directory when looking up
cache hits.
We only insert things into the cache if they have plausible version
numbers, so that we don't contaminate the cache with things that were
not unique. E.g. ./package might have dozens of installs done for it
and build a version of 0.0...and if we built and cached a wheel, we'd
end up using the same wheel even if the source has been edited.
:param link: The link of the sdist for which this will cache wheels.
"""
parts = self._get_cache_path_parts(link)
assert self.cache_dir
# Store wheels within the root cache_dir
return os.path.join(self.cache_dir, "wheels", *parts)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
candidates = []
if not package_name:
return link
canonical_package_name = canonicalize_name(package_name)
for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
try:
wheel = Wheel(wheel_name)
except InvalidWheelFilename:
continue
if canonicalize_name(wheel.name) != canonical_package_name:
logger.debug(
"Ignoring cached wheel %s for %s as it "
"does not match the expected distribution name %s.",
wheel_name,
link,
package_name,
)
continue
if not wheel.supported(supported_tags):
# Built for a different python/arch/etc
continue
candidates.append(
(
wheel.support_index_min(supported_tags),
wheel_name,
wheel_dir,
)
)
if not candidates:
return link
_, wheel_name, wheel_dir = min(candidates)
return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
class EphemWheelCache(SimpleWheelCache):
"""A SimpleWheelCache that creates it's own temporary cache directory"""
def __init__(self) -> None:
self._temp_dir = TempDirectory(
kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
globally_managed=True,
)
super().__init__(self._temp_dir.path)
class CacheEntry:
def __init__(
self,
link: Link,
persistent: bool,
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] = None
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
if origin_direct_url_path.exists():
try:
self.origin = DirectUrl.from_json(
origin_direct_url_path.read_text(encoding="utf-8")
)
except Exception as e:
logger.warning(
"Ignoring invalid cache entry origin file %s for %s (%s)",
origin_direct_url_path,
link.filename,
e,
)
class WheelCache(Cache):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
This Cache allows for gracefully degradation, using the ephem wheel cache
when a certain link is not found in the simple wheel cache first.
"""
def __init__(self, cache_dir: str) -> None:
super().__init__(cache_dir)
self._wheel_cache = SimpleWheelCache(cache_dir)
self._ephem_cache = EphemWheelCache()
def get_path_for_link(self, link: Link) -> str:
return self._wheel_cache.get_path_for_link(link)
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
def get(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
cache_entry = self.get_cache_entry(link, package_name, supported_tags)
if cache_entry is None:
return link
return cache_entry.link
def get_cache_entry(
self,
link: Link,
package_name: Optional[str],
supported_tags: List[Tag],
) -> Optional[CacheEntry]:
"""Returns a CacheEntry with a link to a cached item if it exists or
None. The cache entry indicates if the item was found in the persistent
or ephemeral cache.
"""
retval = self._wheel_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return CacheEntry(retval, persistent=True)
retval = self._ephem_cache.get(
link=link,
package_name=package_name,
supported_tags=supported_tags,
)
if retval is not link:
return CacheEntry(retval, persistent=False)
return None
@staticmethod
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
if origin_path.exists():
try:
origin = DirectUrl.from_json(origin_path.read_text(encoding="utf-8"))
except Exception as e:
logger.warning(
"Could not read origin file %s in cache entry (%s). "
"Will attempt to overwrite it.",
origin_path,
e,
)
else:
# TODO: use DirectUrl.equivalent when
# https://github.com/pypa/pip/pull/10564 is merged.
if origin.url != download_info.url:
logger.warning(
"Origin URL %s in cache entry %s does not match download URL "
"%s. This is likely a pip bug or a cache corruption issue. "
"Will overwrite it with the new value.",
origin.url,
cache_dir,
download_info.url,
)
origin_path.write_text(download_info.to_json(), encoding="utf-8")

View file

@ -0,0 +1,4 @@
"""Subpackage containing all of pip's command line interface related code
"""
# This file intentionally does not import submodules

View file

@ -0,0 +1,172 @@
"""Logic that powers autocompletion installed by ``pip completion``.
"""
import optparse
import os
import sys
from itertools import chain
from typing import Any, Iterable, List, Optional
from pip._internal.cli.main_parser import create_main_parser
from pip._internal.commands import commands_dict, create_command
from pip._internal.metadata import get_default_environment
def autocomplete() -> None:
"""Entry Point for completion of main and subcommand options."""
# Don't complete if user hasn't sourced bash_completion file.
if "PIP_AUTO_COMPLETE" not in os.environ:
return
cwords = os.environ["COMP_WORDS"].split()[1:]
cword = int(os.environ["COMP_CWORD"])
try:
current = cwords[cword - 1]
except IndexError:
current = ""
parser = create_main_parser()
subcommands = list(commands_dict)
options = []
# subcommand
subcommand_name: Optional[str] = None
for word in cwords:
if word in subcommands:
subcommand_name = word
break
# subcommand options
if subcommand_name is not None:
# special case: 'help' subcommand has no options
if subcommand_name == "help":
sys.exit(1)
# special case: list locally installed dists for show and uninstall
should_list_installed = not current.startswith("-") and subcommand_name in [
"show",
"uninstall",
]
if should_list_installed:
env = get_default_environment()
lc = current.lower()
installed = [
dist.canonical_name
for dist in env.iter_installed_distributions(local_only=True)
if dist.canonical_name.startswith(lc)
and dist.canonical_name not in cwords[1:]
]
# if there are no dists installed, fall back to option completion
if installed:
for dist in installed:
print(dist)
sys.exit(1)
should_list_installables = (
not current.startswith("-") and subcommand_name == "install"
)
if should_list_installables:
for path in auto_complete_paths(current, "path"):
print(path)
sys.exit(1)
subcommand = create_command(subcommand_name)
for opt in subcommand.parser.option_list_all:
if opt.help != optparse.SUPPRESS_HELP:
options += [
(opt_str, opt.nargs) for opt_str in opt._long_opts + opt._short_opts
]
# filter out previously specified options from available options
prev_opts = [x.split("=")[0] for x in cwords[1 : cword - 1]]
options = [(x, v) for (x, v) in options if x not in prev_opts]
# filter options by current input
options = [(k, v) for k, v in options if k.startswith(current)]
# get completion type given cwords and available subcommand options
completion_type = get_path_completion_type(
cwords,
cword,
subcommand.parser.option_list_all,
)
# get completion files and directories if ``completion_type`` is
# ``<file>``, ``<dir>`` or ``<path>``
if completion_type:
paths = auto_complete_paths(current, completion_type)
options = [(path, 0) for path in paths]
for option in options:
opt_label = option[0]
# append '=' to options which require args
if option[1] and option[0][:2] == "--":
opt_label += "="
print(opt_label)
else:
# show main parser options only when necessary
opts = [i.option_list for i in parser.option_groups]
opts.append(parser.option_list)
flattened_opts = chain.from_iterable(opts)
if current.startswith("-"):
for opt in flattened_opts:
if opt.help != optparse.SUPPRESS_HELP:
subcommands += opt._long_opts + opt._short_opts
else:
# get completion type given cwords and all available options
completion_type = get_path_completion_type(cwords, cword, flattened_opts)
if completion_type:
subcommands = list(auto_complete_paths(current, completion_type))
print(" ".join([x for x in subcommands if x.startswith(current)]))
sys.exit(1)
def get_path_completion_type(
cwords: List[str], cword: int, opts: Iterable[Any]
) -> Optional[str]:
"""Get the type of path completion (``file``, ``dir``, ``path`` or None)
:param cwords: same as the environmental variable ``COMP_WORDS``
:param cword: same as the environmental variable ``COMP_CWORD``
:param opts: The available options to check
:return: path completion type (``file``, ``dir``, ``path`` or None)
"""
if cword < 2 or not cwords[cword - 2].startswith("-"):
return None
for opt in opts:
if opt.help == optparse.SUPPRESS_HELP:
continue
for o in str(opt).split("/"):
if cwords[cword - 2].split("=")[0] == o:
if not opt.metavar or any(
x in ("path", "file", "dir") for x in opt.metavar.split("/")
):
return opt.metavar
return None
def auto_complete_paths(current: str, completion_type: str) -> Iterable[str]:
"""If ``completion_type`` is ``file`` or ``path``, list all regular files
and directories starting with ``current``; otherwise only list directories
starting with ``current``.
:param current: The word to be completed
:param completion_type: path completion type(``file``, ``path`` or ``dir``)
:return: A generator of regular files and/or directories
"""
directory, filename = os.path.split(current)
current_path = os.path.abspath(directory)
# Don't complete paths if they can't be accessed
if not os.access(current_path, os.R_OK):
return
filename = os.path.normcase(filename)
# list all files that start with ``filename``
file_list = (
x for x in os.listdir(current_path) if os.path.normcase(x).startswith(filename)
)
for f in file_list:
opt = os.path.join(current_path, f)
comp_file = os.path.normcase(os.path.join(directory, f))
# complete regular files when there is not ``<dir>`` after option
# complete directories when there is ``<file>``, ``<path>`` or
# ``<dir>``after option
if completion_type != "dir" and os.path.isfile(opt):
yield comp_file
elif os.path.isdir(opt):
yield os.path.join(comp_file, "")

View file

@ -0,0 +1,236 @@
"""Base Command class, and related routines"""
import functools
import logging
import logging.config
import optparse
import os
import sys
import traceback
from optparse import Values
from typing import Any, Callable, List, Optional, Tuple
from pip._vendor.rich import traceback as rich_traceback
from pip._internal.cli import cmdoptions
from pip._internal.cli.command_context import CommandContextMixIn
from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
from pip._internal.cli.status_codes import (
ERROR,
PREVIOUS_BUILD_DIR_ERROR,
UNKNOWN_ERROR,
VIRTUALENV_NOT_FOUND,
)
from pip._internal.exceptions import (
BadCommand,
CommandError,
DiagnosticPipError,
InstallationError,
NetworkConnectionError,
PreviousBuildDirError,
UninstallationError,
)
from pip._internal.utils.filesystem import check_path_owner
from pip._internal.utils.logging import BrokenStdoutLoggingError, setup_logging
from pip._internal.utils.misc import get_prog, normalize_path
from pip._internal.utils.temp_dir import TempDirectoryTypeRegistry as TempDirRegistry
from pip._internal.utils.temp_dir import global_tempdir_manager, tempdir_registry
from pip._internal.utils.virtualenv import running_under_virtualenv
__all__ = ["Command"]
logger = logging.getLogger(__name__)
class Command(CommandContextMixIn):
usage: str = ""
ignore_require_venv: bool = False
def __init__(self, name: str, summary: str, isolated: bool = False) -> None:
super().__init__()
self.name = name
self.summary = summary
self.parser = ConfigOptionParser(
usage=self.usage,
prog=f"{get_prog()} {name}",
formatter=UpdatingDefaultsHelpFormatter(),
add_help_option=False,
name=name,
description=self.__doc__,
isolated=isolated,
)
self.tempdir_registry: Optional[TempDirRegistry] = None
# Commands should add options to this option group
optgroup_name = f"{self.name.capitalize()} Options"
self.cmd_opts = optparse.OptionGroup(self.parser, optgroup_name)
# Add the general options
gen_opts = cmdoptions.make_option_group(
cmdoptions.general_group,
self.parser,
)
self.parser.add_option_group(gen_opts)
self.add_options()
def add_options(self) -> None:
pass
def handle_pip_version_check(self, options: Values) -> None:
"""
This is a no-op so that commands by default do not do the pip version
check.
"""
# Make sure we do the pip version check if the index_group options
# are present.
assert not hasattr(options, "no_index")
def run(self, options: Values, args: List[str]) -> int:
raise NotImplementedError
def parse_args(self, args: List[str]) -> Tuple[Values, List[str]]:
# factored out for testability
return self.parser.parse_args(args)
def main(self, args: List[str]) -> int:
try:
with self.main_context():
return self._main(args)
finally:
logging.shutdown()
def _main(self, args: List[str]) -> int:
# We must initialize this before the tempdir manager, otherwise the
# configuration would not be accessible by the time we clean up the
# tempdir manager.
self.tempdir_registry = self.enter_context(tempdir_registry())
# Intentionally set as early as possible so globally-managed temporary
# directories are available to the rest of the code.
self.enter_context(global_tempdir_manager())
options, args = self.parse_args(args)
# Set verbosity so that it can be used elsewhere.
self.verbosity = options.verbose - options.quiet
level_number = setup_logging(
verbosity=self.verbosity,
no_color=options.no_color,
user_log_file=options.log,
)
always_enabled_features = set(options.features_enabled) & set(
cmdoptions.ALWAYS_ENABLED_FEATURES
)
if always_enabled_features:
logger.warning(
"The following features are always enabled: %s. ",
", ".join(sorted(always_enabled_features)),
)
# Make sure that the --python argument isn't specified after the
# subcommand. We can tell, because if --python was specified,
# we should only reach this point if we're running in the created
# subprocess, which has the _PIP_RUNNING_IN_SUBPROCESS environment
# variable set.
if options.python and "_PIP_RUNNING_IN_SUBPROCESS" not in os.environ:
logger.critical(
"The --python option must be placed before the pip subcommand name"
)
sys.exit(ERROR)
# TODO: Try to get these passing down from the command?
# without resorting to os.environ to hold these.
# This also affects isolated builds and it should.
if options.no_input:
os.environ["PIP_NO_INPUT"] = "1"
if options.exists_action:
os.environ["PIP_EXISTS_ACTION"] = " ".join(options.exists_action)
if options.require_venv and not self.ignore_require_venv:
# If a venv is required check if it can really be found
if not running_under_virtualenv():
logger.critical("Could not find an activated virtualenv (required).")
sys.exit(VIRTUALENV_NOT_FOUND)
if options.cache_dir:
options.cache_dir = normalize_path(options.cache_dir)
if not check_path_owner(options.cache_dir):
logger.warning(
"The directory '%s' or its parent directory is not owned "
"or is not writable by the current user. The cache "
"has been disabled. Check the permissions and owner of "
"that directory. If executing pip with sudo, you should "
"use sudo's -H flag.",
options.cache_dir,
)
options.cache_dir = None
def intercepts_unhandled_exc(
run_func: Callable[..., int]
) -> Callable[..., int]:
@functools.wraps(run_func)
def exc_logging_wrapper(*args: Any) -> int:
try:
status = run_func(*args)
assert isinstance(status, int)
return status
except DiagnosticPipError as exc:
logger.error("%s", exc, extra={"rich": True})
logger.debug("Exception information:", exc_info=True)
return ERROR
except PreviousBuildDirError as exc:
logger.critical(str(exc))
logger.debug("Exception information:", exc_info=True)
return PREVIOUS_BUILD_DIR_ERROR
except (
InstallationError,
UninstallationError,
BadCommand,
NetworkConnectionError,
) as exc:
logger.critical(str(exc))
logger.debug("Exception information:", exc_info=True)
return ERROR
except CommandError as exc:
logger.critical("%s", exc)
logger.debug("Exception information:", exc_info=True)
return ERROR
except BrokenStdoutLoggingError:
# Bypass our logger and write any remaining messages to
# stderr because stdout no longer works.
print("ERROR: Pipe to stdout was broken", file=sys.stderr)
if level_number <= logging.DEBUG:
traceback.print_exc(file=sys.stderr)
return ERROR
except KeyboardInterrupt:
logger.critical("Operation cancelled by user")
logger.debug("Exception information:", exc_info=True)
return ERROR
except BaseException:
logger.critical("Exception:", exc_info=True)
return UNKNOWN_ERROR
return exc_logging_wrapper
try:
if not options.debug_mode:
run = intercepts_unhandled_exc(self.run)
else:
run = self.run
rich_traceback.install(show_locals=True)
return run(options, args)
finally:
self.handle_pip_version_check(options)

View file

@ -0,0 +1,27 @@
from contextlib import ExitStack, contextmanager
from typing import ContextManager, Generator, TypeVar
_T = TypeVar("_T", covariant=True)
class CommandContextMixIn:
def __init__(self) -> None:
super().__init__()
self._in_main_context = False
self._main_context = ExitStack()
@contextmanager
def main_context(self) -> Generator[None, None, None]:
assert not self._in_main_context
self._in_main_context = True
try:
with self._main_context:
yield
finally:
self._in_main_context = False
def enter_context(self, context_provider: ContextManager[_T]) -> _T:
assert self._in_main_context
return self._main_context.enter_context(context_provider)

View file

@ -0,0 +1,79 @@
"""Primary application entrypoint.
"""
import locale
import logging
import os
import sys
import warnings
from typing import List, Optional
from pip._internal.cli.autocompletion import autocomplete
from pip._internal.cli.main_parser import parse_command
from pip._internal.commands import create_command
from pip._internal.exceptions import PipError
from pip._internal.utils import deprecation
logger = logging.getLogger(__name__)
# Do not import and use main() directly! Using it directly is actively
# discouraged by pip's maintainers. The name, location and behavior of
# this function is subject to change, so calling it directly is not
# portable across different pip versions.
# In addition, running pip in-process is unsupported and unsafe. This is
# elaborated in detail at
# https://pip.pypa.io/en/stable/user_guide/#using-pip-from-your-program.
# That document also provides suggestions that should work for nearly
# all users that are considering importing and using main() directly.
# However, we know that certain users will still want to invoke pip
# in-process. If you understand and accept the implications of using pip
# in an unsupported manner, the best approach is to use runpy to avoid
# depending on the exact location of this entry point.
# The following example shows how to use runpy to invoke pip in that
# case:
#
# sys.argv = ["pip", your, args, here]
# runpy.run_module("pip", run_name="__main__")
#
# Note that this will exit the process after running, unlike a direct
# call to main. As it is not safe to do any processing after calling
# main, this should not be an issue in practice.
def main(args: Optional[List[str]] = None) -> int:
if args is None:
args = sys.argv[1:]
# Suppress the pkg_resources deprecation warning
# Note - we use a module of .*pkg_resources to cover
# the normal case (pip._vendor.pkg_resources) and the
# devendored case (a bare pkg_resources)
warnings.filterwarnings(
action="ignore", category=DeprecationWarning, module=".*pkg_resources"
)
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parse_command(args)
except PipError as exc:
sys.stderr.write(f"ERROR: {exc}")
sys.stderr.write(os.linesep)
sys.exit(1)
# Needed for locale.getpreferredencoding(False) to work
# in pip._internal.utils.encoding.auto_decode
try:
locale.setlocale(locale.LC_ALL, "")
except locale.Error as e:
# setlocale can apparently crash if locale are uninitialized
logger.debug("Ignoring error %s when setting locale", e)
command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
return command.main(cmd_args)

View file

@ -0,0 +1,134 @@
"""A single place for constructing and exposing the main parser
"""
import os
import subprocess
import sys
from typing import List, Optional, Tuple
from pip._internal.build_env import get_runnable_pip
from pip._internal.cli import cmdoptions
from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
from pip._internal.commands import commands_dict, get_similar_commands
from pip._internal.exceptions import CommandError
from pip._internal.utils.misc import get_pip_version, get_prog
__all__ = ["create_main_parser", "parse_command"]
def create_main_parser() -> ConfigOptionParser:
"""Creates and returns the main parser for pip's CLI"""
parser = ConfigOptionParser(
usage="\n%prog <command> [options]",
add_help_option=False,
formatter=UpdatingDefaultsHelpFormatter(),
name="global",
prog=get_prog(),
)
parser.disable_interspersed_args()
parser.version = get_pip_version()
# add the general options
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
parser.add_option_group(gen_opts)
# so the help formatter knows
parser.main = True # type: ignore
# create command listing for description
description = [""] + [
f"{name:27} {command_info.summary}"
for name, command_info in commands_dict.items()
]
parser.description = "\n".join(description)
return parser
def identify_python_interpreter(python: str) -> Optional[str]:
# If the named file exists, use it.
# If it's a directory, assume it's a virtual environment and
# look for the environment's Python executable.
if os.path.exists(python):
if os.path.isdir(python):
# bin/python for Unix, Scripts/python.exe for Windows
# Try both in case of odd cases like cygwin.
for exe in ("bin/python", "Scripts/python.exe"):
py = os.path.join(python, exe)
if os.path.exists(py):
return py
else:
return python
# Could not find the interpreter specified
return None
def parse_command(args: List[str]) -> Tuple[str, List[str]]:
parser = create_main_parser()
# Note: parser calls disable_interspersed_args(), so the result of this
# call is to split the initial args into the general options before the
# subcommand and everything else.
# For example:
# args: ['--timeout=5', 'install', '--user', 'INITools']
# general_options: ['--timeout==5']
# args_else: ['install', '--user', 'INITools']
general_options, args_else = parser.parse_args(args)
# --python
if general_options.python and "_PIP_RUNNING_IN_SUBPROCESS" not in os.environ:
# Re-invoke pip using the specified Python interpreter
interpreter = identify_python_interpreter(general_options.python)
if interpreter is None:
raise CommandError(
f"Could not locate Python interpreter {general_options.python}"
)
pip_cmd = [
interpreter,
get_runnable_pip(),
]
pip_cmd.extend(args)
# Set a flag so the child doesn't re-invoke itself, causing
# an infinite loop.
os.environ["_PIP_RUNNING_IN_SUBPROCESS"] = "1"
returncode = 0
try:
proc = subprocess.run(pip_cmd)
returncode = proc.returncode
except (subprocess.SubprocessError, OSError) as exc:
raise CommandError(f"Failed to run pip under {interpreter}: {exc}")
sys.exit(returncode)
# --version
if general_options.version:
sys.stdout.write(parser.version)
sys.stdout.write(os.linesep)
sys.exit()
# pip || pip help -> print_help()
if not args_else or (args_else[0] == "help" and len(args_else) == 1):
parser.print_help()
sys.exit()
# the subcommand name
cmd_name = args_else[0]
if cmd_name not in commands_dict:
guess = get_similar_commands(cmd_name)
msg = [f'unknown command "{cmd_name}"']
if guess:
msg.append(f'maybe you meant "{guess}"')
raise CommandError(" - ".join(msg))
# all the args without the subcommand
cmd_args = args[:]
cmd_args.remove(cmd_name)
return cmd_name, cmd_args

View file

@ -0,0 +1,294 @@
"""Base option parser setup"""
import logging
import optparse
import shutil
import sys
import textwrap
from contextlib import suppress
from typing import Any, Dict, Generator, List, Tuple
from pip._internal.cli.status_codes import UNKNOWN_ERROR
from pip._internal.configuration import Configuration, ConfigurationError
from pip._internal.utils.misc import redact_auth_from_url, strtobool
logger = logging.getLogger(__name__)
class PrettyHelpFormatter(optparse.IndentedHelpFormatter):
"""A prettier/less verbose help formatter for optparse."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
# help position must be aligned with __init__.parseopts.description
kwargs["max_help_position"] = 30
kwargs["indent_increment"] = 1
kwargs["width"] = shutil.get_terminal_size()[0] - 2
super().__init__(*args, **kwargs)
def format_option_strings(self, option: optparse.Option) -> str:
return self._format_option_strings(option)
def _format_option_strings(
self, option: optparse.Option, mvarfmt: str = " <{}>", optsep: str = ", "
) -> str:
"""
Return a comma-separated list of option strings and metavars.
:param option: tuple of (short opt, long opt), e.g: ('-f', '--format')
:param mvarfmt: metavar format string
:param optsep: separator
"""
opts = []
if option._short_opts:
opts.append(option._short_opts[0])
if option._long_opts:
opts.append(option._long_opts[0])
if len(opts) > 1:
opts.insert(1, optsep)
if option.takes_value():
assert option.dest is not None
metavar = option.metavar or option.dest.lower()
opts.append(mvarfmt.format(metavar.lower()))
return "".join(opts)
def format_heading(self, heading: str) -> str:
if heading == "Options":
return ""
return heading + ":\n"
def format_usage(self, usage: str) -> str:
"""
Ensure there is only one newline between usage and the first heading
if there is no description.
"""
msg = "\nUsage: {}\n".format(self.indent_lines(textwrap.dedent(usage), " "))
return msg
def format_description(self, description: str) -> str:
# leave full control over description to us
if description:
if hasattr(self.parser, "main"):
label = "Commands"
else:
label = "Description"
# some doc strings have initial newlines, some don't
description = description.lstrip("\n")
# some doc strings have final newlines and spaces, some don't
description = description.rstrip()
# dedent, then reindent
description = self.indent_lines(textwrap.dedent(description), " ")
description = f"{label}:\n{description}\n"
return description
else:
return ""
def format_epilog(self, epilog: str) -> str:
# leave full control over epilog to us
if epilog:
return epilog
else:
return ""
def indent_lines(self, text: str, indent: str) -> str:
new_lines = [indent + line for line in text.split("\n")]
return "\n".join(new_lines)
class UpdatingDefaultsHelpFormatter(PrettyHelpFormatter):
"""Custom help formatter for use in ConfigOptionParser.
This is updates the defaults before expanding them, allowing
them to show up correctly in the help listing.
Also redact auth from url type options
"""
def expand_default(self, option: optparse.Option) -> str:
default_values = None
if self.parser is not None:
assert isinstance(self.parser, ConfigOptionParser)
self.parser._update_defaults(self.parser.defaults)
assert option.dest is not None
default_values = self.parser.defaults.get(option.dest)
help_text = super().expand_default(option)
if default_values and option.metavar == "URL":
if isinstance(default_values, str):
default_values = [default_values]
# If its not a list, we should abort and just return the help text
if not isinstance(default_values, list):
default_values = []
for val in default_values:
help_text = help_text.replace(val, redact_auth_from_url(val))
return help_text
class CustomOptionParser(optparse.OptionParser):
def insert_option_group(
self, idx: int, *args: Any, **kwargs: Any
) -> optparse.OptionGroup:
"""Insert an OptionGroup at a given position."""
group = self.add_option_group(*args, **kwargs)
self.option_groups.pop()
self.option_groups.insert(idx, group)
return group
@property
def option_list_all(self) -> List[optparse.Option]:
"""Get a list of all options, including those in option groups."""
res = self.option_list[:]
for i in self.option_groups:
res.extend(i.option_list)
return res
class ConfigOptionParser(CustomOptionParser):
"""Custom option parser which updates its defaults by checking the
configuration files and environmental variables"""
def __init__(
self,
*args: Any,
name: str,
isolated: bool = False,
**kwargs: Any,
) -> None:
self.name = name
self.config = Configuration(isolated)
assert self.name
super().__init__(*args, **kwargs)
def check_default(self, option: optparse.Option, key: str, val: Any) -> Any:
try:
return option.check_value(key, val)
except optparse.OptionValueError as exc:
print(f"An error occurred during configuration: {exc}")
sys.exit(3)
def _get_ordered_configuration_items(
self,
) -> Generator[Tuple[str, Any], None, None]:
# Configuration gives keys in an unordered manner. Order them.
override_order = ["global", self.name, ":env:"]
# Pool the options into different groups
section_items: Dict[str, List[Tuple[str, Any]]] = {
name: [] for name in override_order
}
for section_key, val in self.config.items():
# ignore empty values
if not val:
logger.debug(
"Ignoring configuration key '%s' as it's value is empty.",
section_key,
)
continue
section, key = section_key.split(".", 1)
if section in override_order:
section_items[section].append((key, val))
# Yield each group in their override order
for section in override_order:
for key, val in section_items[section]:
yield key, val
def _update_defaults(self, defaults: Dict[str, Any]) -> Dict[str, Any]:
"""Updates the given defaults with values from the config files and
the environ. Does a little special handling for certain types of
options (lists)."""
# Accumulate complex default state.
self.values = optparse.Values(self.defaults)
late_eval = set()
# Then set the options with those values
for key, val in self._get_ordered_configuration_items():
# '--' because configuration supports only long names
option = self.get_option("--" + key)
# Ignore options not present in this parser. E.g. non-globals put
# in [global] by users that want them to apply to all applicable
# commands.
if option is None:
continue
assert option.dest is not None
if option.action in ("store_true", "store_false"):
try:
val = strtobool(val)
except ValueError:
self.error(
f"{val} is not a valid value for {key} option, "
"please specify a boolean value like yes/no, "
"true/false or 1/0 instead."
)
elif option.action == "count":
with suppress(ValueError):
val = strtobool(val)
with suppress(ValueError):
val = int(val)
if not isinstance(val, int) or val < 0:
self.error(
f"{val} is not a valid value for {key} option, "
"please instead specify either a non-negative integer "
"or a boolean value like yes/no or false/true "
"which is equivalent to 1/0."
)
elif option.action == "append":
val = val.split()
val = [self.check_default(option, key, v) for v in val]
elif option.action == "callback":
assert option.callback is not None
late_eval.add(option.dest)
opt_str = option.get_opt_string()
val = option.convert_value(opt_str, val)
# From take_action
args = option.callback_args or ()
kwargs = option.callback_kwargs or {}
option.callback(option, opt_str, val, self, *args, **kwargs)
else:
val = self.check_default(option, key, val)
defaults[option.dest] = val
for key in late_eval:
defaults[key] = getattr(self.values, key)
self.values = None
return defaults
def get_default_values(self) -> optparse.Values:
"""Overriding to make updating the defaults after instantiation of
the option parser possible, _update_defaults() does the dirty work."""
if not self.process_default_values:
# Old, pre-Optik 1.5 behaviour.
return optparse.Values(self.defaults)
# Load the configuration, or error out in case of an error
try:
self.config.load()
except ConfigurationError as err:
self.exit(UNKNOWN_ERROR, str(err))
defaults = self._update_defaults(self.defaults.copy()) # ours
for option in self._get_all_options():
assert option.dest is not None
default = defaults.get(option.dest)
if isinstance(default, str):
opt_str = option.get_opt_string()
defaults[option.dest] = option.check_value(opt_str, default)
return optparse.Values(defaults)
def error(self, msg: str) -> None:
self.print_usage(sys.stderr)
self.exit(UNKNOWN_ERROR, f"{msg}\n")

View file

@ -0,0 +1,68 @@
import functools
from typing import Callable, Generator, Iterable, Iterator, Optional, Tuple
from pip._vendor.rich.progress import (
BarColumn,
DownloadColumn,
FileSizeColumn,
Progress,
ProgressColumn,
SpinnerColumn,
TextColumn,
TimeElapsedColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
from pip._internal.utils.logging import get_indentation
DownloadProgressRenderer = Callable[[Iterable[bytes]], Iterator[bytes]]
def _rich_progress_bar(
iterable: Iterable[bytes],
*,
bar_type: str,
size: int,
) -> Generator[bytes, None, None]:
assert bar_type == "on", "This should only be used in the default mode."
if not size:
total = float("inf")
columns: Tuple[ProgressColumn, ...] = (
TextColumn("[progress.description]{task.description}"),
SpinnerColumn("line", speed=1.5),
FileSizeColumn(),
TransferSpeedColumn(),
TimeElapsedColumn(),
)
else:
total = size
columns = (
TextColumn("[progress.description]{task.description}"),
BarColumn(),
DownloadColumn(),
TransferSpeedColumn(),
TextColumn("eta"),
TimeRemainingColumn(),
)
progress = Progress(*columns, refresh_per_second=30)
task_id = progress.add_task(" " * (get_indentation() + 2), total=total)
with progress:
for chunk in iterable:
yield chunk
progress.update(task_id, advance=len(chunk))
def get_download_progress_renderer(
*, bar_type: str, size: Optional[int] = None
) -> DownloadProgressRenderer:
"""Get an object that can be used to render the download progress.
Returns a callable, that takes an iterable to "wrap".
"""
if bar_type == "on":
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size)
else:
return iter # no-op, when passed an iterator

View file

@ -0,0 +1,505 @@
"""Contains the Command base classes that depend on PipSession.
The classes in this module are in a separate module so the commands not
needing download / PackageFinder capability don't unnecessarily import the
PackageFinder machinery and all its vendored dependencies, etc.
"""
import logging
import os
import sys
from functools import partial
from optparse import Values
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
from pip._internal.cache import WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.base_command import Command
from pip._internal.cli.command_context import CommandContextMixIn
from pip._internal.exceptions import CommandError, PreviousBuildDirError
from pip._internal.index.collector import LinkCollector
from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.models.target_python import TargetPython
from pip._internal.network.session import PipSession
from pip._internal.operations.build.build_tracker import BuildTracker
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req.constructors import (
install_req_from_editable,
install_req_from_line,
install_req_from_parsed_requirement,
install_req_from_req_string,
)
from pip._internal.req.req_file import parse_requirements
from pip._internal.req.req_install import InstallRequirement
from pip._internal.resolution.base import BaseResolver
from pip._internal.self_outdated_check import pip_self_version_check
from pip._internal.utils.temp_dir import (
TempDirectory,
TempDirectoryTypeRegistry,
tempdir_kinds,
)
from pip._internal.utils.virtualenv import running_under_virtualenv
if TYPE_CHECKING:
from ssl import SSLContext
logger = logging.getLogger(__name__)
def _create_truststore_ssl_context() -> Optional["SSLContext"]:
if sys.version_info < (3, 10):
raise CommandError("The truststore feature is only available for Python 3.10+")
try:
import ssl
except ImportError:
logger.warning("Disabling truststore since ssl support is missing")
return None
try:
from pip._vendor import truststore
except ImportError as e:
raise CommandError(f"The truststore feature is unavailable: {e}")
return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
class SessionCommandMixin(CommandContextMixIn):
"""
A class mixin for command classes needing _build_session().
"""
def __init__(self) -> None:
super().__init__()
self._session: Optional[PipSession] = None
@classmethod
def _get_index_urls(cls, options: Values) -> Optional[List[str]]:
"""Return a list of index urls from user-provided options."""
index_urls = []
if not getattr(options, "no_index", False):
url = getattr(options, "index_url", None)
if url:
index_urls.append(url)
urls = getattr(options, "extra_index_urls", None)
if urls:
index_urls.extend(urls)
# Return None rather than an empty list
return index_urls or None
def get_default_session(self, options: Values) -> PipSession:
"""Get a default-managed session."""
if self._session is None:
self._session = self.enter_context(self._build_session(options))
# there's no type annotation on requests.Session, so it's
# automatically ContextManager[Any] and self._session becomes Any,
# then https://github.com/python/mypy/issues/7696 kicks in
assert self._session is not None
return self._session
def _build_session(
self,
options: Values,
retries: Optional[int] = None,
timeout: Optional[int] = None,
fallback_to_certifi: bool = False,
) -> PipSession:
cache_dir = options.cache_dir
assert not cache_dir or os.path.isabs(cache_dir)
if "truststore" in options.features_enabled:
try:
ssl_context = _create_truststore_ssl_context()
except Exception:
if not fallback_to_certifi:
raise
ssl_context = None
else:
ssl_context = None
session = PipSession(
cache=os.path.join(cache_dir, "http-v2") if cache_dir else None,
retries=retries if retries is not None else options.retries,
trusted_hosts=options.trusted_hosts,
index_urls=self._get_index_urls(options),
ssl_context=ssl_context,
)
# Handle custom ca-bundles from the user
if options.cert:
session.verify = options.cert
# Handle SSL client certificate
if options.client_cert:
session.cert = options.client_cert
# Handle timeouts
if options.timeout or timeout:
session.timeout = timeout if timeout is not None else options.timeout
# Handle configured proxies
if options.proxy:
session.proxies = {
"http": options.proxy,
"https": options.proxy,
}
# Determine if we can prompt the user for authentication or not
session.auth.prompting = not options.no_input
session.auth.keyring_provider = options.keyring_provider
return session
class IndexGroupCommand(Command, SessionCommandMixin):
"""
Abstract base class for commands with the index_group options.
This also corresponds to the commands that permit the pip version check.
"""
def handle_pip_version_check(self, options: Values) -> None:
"""
Do the pip version check if not disabled.
This overrides the default behavior of not doing the check.
"""
# Make sure the index_group options are present.
assert hasattr(options, "no_index")
if options.disable_pip_version_check or options.no_index:
return
# Otherwise, check if we're using the latest version of pip available.
session = self._build_session(
options,
retries=0,
timeout=min(5, options.timeout),
# This is set to ensure the function does not fail when truststore is
# specified in use-feature but cannot be loaded. This usually raises a
# CommandError and shows a nice user-facing error, but this function is not
# called in that try-except block.
fallback_to_certifi=True,
)
with session:
pip_self_version_check(session, options)
KEEPABLE_TEMPDIR_TYPES = [
tempdir_kinds.BUILD_ENV,
tempdir_kinds.EPHEM_WHEEL_CACHE,
tempdir_kinds.REQ_BUILD,
]
def warn_if_run_as_root() -> None:
"""Output a warning for sudo users on Unix.
In a virtual environment, sudo pip still writes to virtualenv.
On Windows, users may run pip as Administrator without issues.
This warning only applies to Unix root users outside of virtualenv.
"""
if running_under_virtualenv():
return
if not hasattr(os, "getuid"):
return
# On Windows, there are no "system managed" Python packages. Installing as
# Administrator via pip is the correct way of updating system environments.
#
# We choose sys.platform over utils.compat.WINDOWS here to enable Mypy platform
# checks: https://mypy.readthedocs.io/en/stable/common_issues.html
if sys.platform == "win32" or sys.platform == "cygwin":
return
if os.getuid() != 0:
return
logger.warning(
"Running pip as the 'root' user can result in broken permissions and "
"conflicting behaviour with the system package manager. "
"It is recommended to use a virtual environment instead: "
"https://pip.pypa.io/warnings/venv"
)
def with_cleanup(func: Any) -> Any:
"""Decorator for common logic related to managing temporary
directories.
"""
def configure_tempdir_registry(registry: TempDirectoryTypeRegistry) -> None:
for t in KEEPABLE_TEMPDIR_TYPES:
registry.set_delete(t, False)
def wrapper(
self: RequirementCommand, options: Values, args: List[Any]
) -> Optional[int]:
assert self.tempdir_registry is not None
if options.no_clean:
configure_tempdir_registry(self.tempdir_registry)
try:
return func(self, options, args)
except PreviousBuildDirError:
# This kind of conflict can occur when the user passes an explicit
# build directory with a pre-existing folder. In that case we do
# not want to accidentally remove it.
configure_tempdir_registry(self.tempdir_registry)
raise
return wrapper
class RequirementCommand(IndexGroupCommand):
def __init__(self, *args: Any, **kw: Any) -> None:
super().__init__(*args, **kw)
self.cmd_opts.add_option(cmdoptions.no_clean())
@staticmethod
def determine_resolver_variant(options: Values) -> str:
"""Determines which resolver should be used, based on the given options."""
if "legacy-resolver" in options.deprecated_features_enabled:
return "legacy"
return "resolvelib"
@classmethod
def make_requirement_preparer(
cls,
temp_build_dir: TempDirectory,
options: Values,
build_tracker: BuildTracker,
session: PipSession,
finder: PackageFinder,
use_user_site: bool,
download_dir: Optional[str] = None,
verbosity: int = 0,
) -> RequirementPreparer:
"""
Create a RequirementPreparer instance for the given parameters.
"""
temp_build_dir_path = temp_build_dir.path
assert temp_build_dir_path is not None
legacy_resolver = False
resolver_variant = cls.determine_resolver_variant(options)
if resolver_variant == "resolvelib":
lazy_wheel = "fast-deps" in options.features_enabled
if lazy_wheel:
logger.warning(
"pip is using lazily downloaded wheels using HTTP "
"range requests to obtain dependency information. "
"This experimental feature is enabled through "
"--use-feature=fast-deps and it is not ready for "
"production."
)
else:
legacy_resolver = True
lazy_wheel = False
if "fast-deps" in options.features_enabled:
logger.warning(
"fast-deps has no effect when used with the legacy resolver."
)
return RequirementPreparer(
build_dir=temp_build_dir_path,
src_dir=options.src_dir,
download_dir=download_dir,
build_isolation=options.build_isolation,
check_build_deps=options.check_build_deps,
build_tracker=build_tracker,
session=session,
progress_bar=options.progress_bar,
finder=finder,
require_hashes=options.require_hashes,
use_user_site=use_user_site,
lazy_wheel=lazy_wheel,
verbosity=verbosity,
legacy_resolver=legacy_resolver,
)
@classmethod
def make_resolver(
cls,
preparer: RequirementPreparer,
finder: PackageFinder,
options: Values,
wheel_cache: Optional[WheelCache] = None,
use_user_site: bool = False,
ignore_installed: bool = True,
ignore_requires_python: bool = False,
force_reinstall: bool = False,
upgrade_strategy: str = "to-satisfy-only",
use_pep517: Optional[bool] = None,
py_version_info: Optional[Tuple[int, ...]] = None,
) -> BaseResolver:
"""
Create a Resolver instance for the given parameters.
"""
make_install_req = partial(
install_req_from_req_string,
isolated=options.isolated_mode,
use_pep517=use_pep517,
)
resolver_variant = cls.determine_resolver_variant(options)
# The long import name and duplicated invocation is needed to convince
# Mypy into correctly typechecking. Otherwise it would complain the
# "Resolver" class being redefined.
if resolver_variant == "resolvelib":
import pip._internal.resolution.resolvelib.resolver
return pip._internal.resolution.resolvelib.resolver.Resolver(
preparer=preparer,
finder=finder,
wheel_cache=wheel_cache,
make_install_req=make_install_req,
use_user_site=use_user_site,
ignore_dependencies=options.ignore_dependencies,
ignore_installed=ignore_installed,
ignore_requires_python=ignore_requires_python,
force_reinstall=force_reinstall,
upgrade_strategy=upgrade_strategy,
py_version_info=py_version_info,
)
import pip._internal.resolution.legacy.resolver
return pip._internal.resolution.legacy.resolver.Resolver(
preparer=preparer,
finder=finder,
wheel_cache=wheel_cache,
make_install_req=make_install_req,
use_user_site=use_user_site,
ignore_dependencies=options.ignore_dependencies,
ignore_installed=ignore_installed,
ignore_requires_python=ignore_requires_python,
force_reinstall=force_reinstall,
upgrade_strategy=upgrade_strategy,
py_version_info=py_version_info,
)
def get_requirements(
self,
args: List[str],
options: Values,
finder: PackageFinder,
session: PipSession,
) -> List[InstallRequirement]:
"""
Parse command-line arguments into the corresponding requirements.
"""
requirements: List[InstallRequirement] = []
for filename in options.constraints:
for parsed_req in parse_requirements(
filename,
constraint=True,
finder=finder,
options=options,
session=session,
):
req_to_add = install_req_from_parsed_requirement(
parsed_req,
isolated=options.isolated_mode,
user_supplied=False,
)
requirements.append(req_to_add)
for req in args:
req_to_add = install_req_from_line(
req,
comes_from=None,
isolated=options.isolated_mode,
use_pep517=options.use_pep517,
user_supplied=True,
config_settings=getattr(options, "config_settings", None),
)
requirements.append(req_to_add)
for req in options.editables:
req_to_add = install_req_from_editable(
req,
user_supplied=True,
isolated=options.isolated_mode,
use_pep517=options.use_pep517,
config_settings=getattr(options, "config_settings", None),
)
requirements.append(req_to_add)
# NOTE: options.require_hashes may be set if --require-hashes is True
for filename in options.requirements:
for parsed_req in parse_requirements(
filename, finder=finder, options=options, session=session
):
req_to_add = install_req_from_parsed_requirement(
parsed_req,
isolated=options.isolated_mode,
use_pep517=options.use_pep517,
user_supplied=True,
config_settings=parsed_req.options.get("config_settings")
if parsed_req.options
else None,
)
requirements.append(req_to_add)
# If any requirement has hash options, enable hash checking.
if any(req.has_hash_options for req in requirements):
options.require_hashes = True
if not (args or options.editables or options.requirements):
opts = {"name": self.name}
if options.find_links:
raise CommandError(
"You must give at least one requirement to {name} "
'(maybe you meant "pip {name} {links}"?)'.format(
**dict(opts, links=" ".join(options.find_links))
)
)
else:
raise CommandError(
"You must give at least one requirement to {name} "
'(see "pip help {name}")'.format(**opts)
)
return requirements
@staticmethod
def trace_basic_info(finder: PackageFinder) -> None:
"""
Trace basic information about the provided objects.
"""
# Display where finder is looking for packages
search_scope = finder.search_scope
locations = search_scope.get_formatted_locations()
if locations:
logger.info(locations)
def _build_package_finder(
self,
options: Values,
session: PipSession,
target_python: Optional[TargetPython] = None,
ignore_requires_python: Optional[bool] = None,
) -> PackageFinder:
"""
Create a package finder appropriate to this requirement command.
:param ignore_requires_python: Whether to ignore incompatible
"Requires-Python" values in links. Defaults to False.
"""
link_collector = LinkCollector.create(session, options=options)
selection_prefs = SelectionPreferences(
allow_yanked=True,
format_control=options.format_control,
allow_all_prereleases=options.pre,
prefer_binary=options.prefer_binary,
ignore_requires_python=ignore_requires_python,
)
return PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
)

View file

@ -0,0 +1,159 @@
import contextlib
import itertools
import logging
import sys
import time
from typing import IO, Generator, Optional
from pip._internal.utils.compat import WINDOWS
from pip._internal.utils.logging import get_indentation
logger = logging.getLogger(__name__)
class SpinnerInterface:
def spin(self) -> None:
raise NotImplementedError()
def finish(self, final_status: str) -> None:
raise NotImplementedError()
class InteractiveSpinner(SpinnerInterface):
def __init__(
self,
message: str,
file: Optional[IO[str]] = None,
spin_chars: str = "-\\|/",
# Empirically, 8 updates/second looks nice
min_update_interval_seconds: float = 0.125,
):
self._message = message
if file is None:
file = sys.stdout
self._file = file
self._rate_limiter = RateLimiter(min_update_interval_seconds)
self._finished = False
self._spin_cycle = itertools.cycle(spin_chars)
self._file.write(" " * get_indentation() + self._message + " ... ")
self._width = 0
def _write(self, status: str) -> None:
assert not self._finished
# Erase what we wrote before by backspacing to the beginning, writing
# spaces to overwrite the old text, and then backspacing again
backup = "\b" * self._width
self._file.write(backup + " " * self._width + backup)
# Now we have a blank slate to add our status
self._file.write(status)
self._width = len(status)
self._file.flush()
self._rate_limiter.reset()
def spin(self) -> None:
if self._finished:
return
if not self._rate_limiter.ready():
return
self._write(next(self._spin_cycle))
def finish(self, final_status: str) -> None:
if self._finished:
return
self._write(final_status)
self._file.write("\n")
self._file.flush()
self._finished = True
# Used for dumb terminals, non-interactive installs (no tty), etc.
# We still print updates occasionally (once every 60 seconds by default) to
# act as a keep-alive for systems like Travis-CI that take lack-of-output as
# an indication that a task has frozen.
class NonInteractiveSpinner(SpinnerInterface):
def __init__(self, message: str, min_update_interval_seconds: float = 60.0) -> None:
self._message = message
self._finished = False
self._rate_limiter = RateLimiter(min_update_interval_seconds)
self._update("started")
def _update(self, status: str) -> None:
assert not self._finished
self._rate_limiter.reset()
logger.info("%s: %s", self._message, status)
def spin(self) -> None:
if self._finished:
return
if not self._rate_limiter.ready():
return
self._update("still running...")
def finish(self, final_status: str) -> None:
if self._finished:
return
self._update(f"finished with status '{final_status}'")
self._finished = True
class RateLimiter:
def __init__(self, min_update_interval_seconds: float) -> None:
self._min_update_interval_seconds = min_update_interval_seconds
self._last_update: float = 0
def ready(self) -> bool:
now = time.time()
delta = now - self._last_update
return delta >= self._min_update_interval_seconds
def reset(self) -> None:
self._last_update = time.time()
@contextlib.contextmanager
def open_spinner(message: str) -> Generator[SpinnerInterface, None, None]:
# Interactive spinner goes directly to sys.stdout rather than being routed
# through the logging system, but it acts like it has level INFO,
# i.e. it's only displayed if we're at level INFO or better.
# Non-interactive spinner goes through the logging system, so it is always
# in sync with logging configuration.
if sys.stdout.isatty() and logger.getEffectiveLevel() <= logging.INFO:
spinner: SpinnerInterface = InteractiveSpinner(message)
else:
spinner = NonInteractiveSpinner(message)
try:
with hidden_cursor(sys.stdout):
yield spinner
except KeyboardInterrupt:
spinner.finish("canceled")
raise
except Exception:
spinner.finish("error")
raise
else:
spinner.finish("done")
HIDE_CURSOR = "\x1b[?25l"
SHOW_CURSOR = "\x1b[?25h"
@contextlib.contextmanager
def hidden_cursor(file: IO[str]) -> Generator[None, None, None]:
# The Windows terminal does not support the hide/show cursor ANSI codes,
# even via colorama. So don't even try.
if WINDOWS:
yield
# We don't want to clutter the output with control characters if we're
# writing to a file, or if the user is running with --quiet.
# See https://github.com/pypa/pip/issues/3418
elif not file.isatty() or logger.getEffectiveLevel() > logging.INFO:
yield
else:
file.write(HIDE_CURSOR)
try:
yield
finally:
file.write(SHOW_CURSOR)

View file

@ -0,0 +1,6 @@
SUCCESS = 0
ERROR = 1
UNKNOWN_ERROR = 2
VIRTUALENV_NOT_FOUND = 3
PREVIOUS_BUILD_DIR_ERROR = 4
NO_MATCHES_FOUND = 23

View file

@ -0,0 +1,132 @@
"""
Package containing all pip commands
"""
import importlib
from collections import namedtuple
from typing import Any, Dict, Optional
from pip._internal.cli.base_command import Command
CommandInfo = namedtuple("CommandInfo", "module_path, class_name, summary")
# This dictionary does a bunch of heavy lifting for help output:
# - Enables avoiding additional (costly) imports for presenting `--help`.
# - The ordering matters for help display.
#
# Even though the module path starts with the same "pip._internal.commands"
# prefix, the full path makes testing easier (specifically when modifying
# `commands_dict` in test setup / teardown).
commands_dict: Dict[str, CommandInfo] = {
"install": CommandInfo(
"pip._internal.commands.install",
"InstallCommand",
"Install packages.",
),
"download": CommandInfo(
"pip._internal.commands.download",
"DownloadCommand",
"Download packages.",
),
"uninstall": CommandInfo(
"pip._internal.commands.uninstall",
"UninstallCommand",
"Uninstall packages.",
),
"freeze": CommandInfo(
"pip._internal.commands.freeze",
"FreezeCommand",
"Output installed packages in requirements format.",
),
"inspect": CommandInfo(
"pip._internal.commands.inspect",
"InspectCommand",
"Inspect the python environment.",
),
"list": CommandInfo(
"pip._internal.commands.list",
"ListCommand",
"List installed packages.",
),
"show": CommandInfo(
"pip._internal.commands.show",
"ShowCommand",
"Show information about installed packages.",
),
"check": CommandInfo(
"pip._internal.commands.check",
"CheckCommand",
"Verify installed packages have compatible dependencies.",
),
"config": CommandInfo(
"pip._internal.commands.configuration",
"ConfigurationCommand",
"Manage local and global configuration.",
),
"search": CommandInfo(
"pip._internal.commands.search",
"SearchCommand",
"Search PyPI for packages.",
),
"cache": CommandInfo(
"pip._internal.commands.cache",
"CacheCommand",
"Inspect and manage pip's wheel cache.",
),
"index": CommandInfo(
"pip._internal.commands.index",
"IndexCommand",
"Inspect information available from package indexes.",
),
"wheel": CommandInfo(
"pip._internal.commands.wheel",
"WheelCommand",
"Build wheels from your requirements.",
),
"hash": CommandInfo(
"pip._internal.commands.hash",
"HashCommand",
"Compute hashes of package archives.",
),
"completion": CommandInfo(
"pip._internal.commands.completion",
"CompletionCommand",
"A helper command used for command completion.",
),
"debug": CommandInfo(
"pip._internal.commands.debug",
"DebugCommand",
"Show information useful for debugging.",
),
"help": CommandInfo(
"pip._internal.commands.help",
"HelpCommand",
"Show help for commands.",
),
}
def create_command(name: str, **kwargs: Any) -> Command:
"""
Create an instance of the Command class with the given name.
"""
module_path, class_name, summary = commands_dict[name]
module = importlib.import_module(module_path)
command_class = getattr(module, class_name)
command = command_class(name=name, summary=summary, **kwargs)
return command
def get_similar_commands(name: str) -> Optional[str]:
"""Command name auto-correct."""
from difflib import get_close_matches
name = name.lower()
close_commands = get_close_matches(name, commands_dict.keys())
if close_commands:
return close_commands[0]
else:
return None

View file

@ -0,0 +1,225 @@
import os
import textwrap
from optparse import Values
from typing import Any, List
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.exceptions import CommandError, PipError
from pip._internal.utils import filesystem
from pip._internal.utils.logging import getLogger
logger = getLogger(__name__)
class CacheCommand(Command):
"""
Inspect and manage pip's wheel cache.
Subcommands:
- dir: Show the cache directory.
- info: Show information about the cache.
- list: List filenames of packages stored in the cache.
- remove: Remove one or more package from the cache.
- purge: Remove all items from the cache.
``<pattern>`` can be a glob expression or a package name.
"""
ignore_require_venv = True
usage = """
%prog dir
%prog info
%prog list [<pattern>] [--format=[human, abspath]]
%prog remove <pattern>
%prog purge
"""
def add_options(self) -> None:
self.cmd_opts.add_option(
"--format",
action="store",
dest="list_format",
default="human",
choices=("human", "abspath"),
help="Select the output format among: human (default) or abspath",
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
handlers = {
"dir": self.get_cache_dir,
"info": self.get_cache_info,
"list": self.list_cache_items,
"remove": self.remove_cache_items,
"purge": self.purge_cache,
}
if not options.cache_dir:
logger.error("pip cache commands can not function since cache is disabled.")
return ERROR
# Determine action
if not args or args[0] not in handlers:
logger.error(
"Need an action (%s) to perform.",
", ".join(sorted(handlers)),
)
return ERROR
action = args[0]
# Error handling happens here, not in the action-handlers.
try:
handlers[action](options, args[1:])
except PipError as e:
logger.error(e.args[0])
return ERROR
return SUCCESS
def get_cache_dir(self, options: Values, args: List[Any]) -> None:
if args:
raise CommandError("Too many arguments")
logger.info(options.cache_dir)
def get_cache_info(self, options: Values, args: List[Any]) -> None:
if args:
raise CommandError("Too many arguments")
num_http_files = len(self._find_http_files(options))
num_packages = len(self._find_wheels(options, "*"))
http_cache_location = self._cache_dir(options, "http-v2")
old_http_cache_location = self._cache_dir(options, "http")
wheels_cache_location = self._cache_dir(options, "wheels")
http_cache_size = filesystem.format_size(
filesystem.directory_size(http_cache_location)
+ filesystem.directory_size(old_http_cache_location)
)
wheels_cache_size = filesystem.format_directory_size(wheels_cache_location)
message = (
textwrap.dedent(
"""
Package index page cache location (pip v23.3+): {http_cache_location}
Package index page cache location (older pips): {old_http_cache_location}
Package index page cache size: {http_cache_size}
Number of HTTP files: {num_http_files}
Locally built wheels location: {wheels_cache_location}
Locally built wheels size: {wheels_cache_size}
Number of locally built wheels: {package_count}
""" # noqa: E501
)
.format(
http_cache_location=http_cache_location,
old_http_cache_location=old_http_cache_location,
http_cache_size=http_cache_size,
num_http_files=num_http_files,
wheels_cache_location=wheels_cache_location,
package_count=num_packages,
wheels_cache_size=wheels_cache_size,
)
.strip()
)
logger.info(message)
def list_cache_items(self, options: Values, args: List[Any]) -> None:
if len(args) > 1:
raise CommandError("Too many arguments")
if args:
pattern = args[0]
else:
pattern = "*"
files = self._find_wheels(options, pattern)
if options.list_format == "human":
self.format_for_human(files)
else:
self.format_for_abspath(files)
def format_for_human(self, files: List[str]) -> None:
if not files:
logger.info("No locally built wheels cached.")
return
results = []
for filename in files:
wheel = os.path.basename(filename)
size = filesystem.format_file_size(filename)
results.append(f" - {wheel} ({size})")
logger.info("Cache contents:\n")
logger.info("\n".join(sorted(results)))
def format_for_abspath(self, files: List[str]) -> None:
if files:
logger.info("\n".join(sorted(files)))
def remove_cache_items(self, options: Values, args: List[Any]) -> None:
if len(args) > 1:
raise CommandError("Too many arguments")
if not args:
raise CommandError("Please provide a pattern")
files = self._find_wheels(options, args[0])
no_matching_msg = "No matching packages"
if args[0] == "*":
# Only fetch http files if no specific pattern given
files += self._find_http_files(options)
else:
# Add the pattern to the log message
no_matching_msg += f' for pattern "{args[0]}"'
if not files:
logger.warning(no_matching_msg)
for filename in files:
os.unlink(filename)
logger.verbose("Removed %s", filename)
logger.info("Files removed: %s", len(files))
def purge_cache(self, options: Values, args: List[Any]) -> None:
if args:
raise CommandError("Too many arguments")
return self.remove_cache_items(options, ["*"])
def _cache_dir(self, options: Values, subdir: str) -> str:
return os.path.join(options.cache_dir, subdir)
def _find_http_files(self, options: Values) -> List[str]:
old_http_dir = self._cache_dir(options, "http")
new_http_dir = self._cache_dir(options, "http-v2")
return filesystem.find_files(old_http_dir, "*") + filesystem.find_files(
new_http_dir, "*"
)
def _find_wheels(self, options: Values, pattern: str) -> List[str]:
wheel_dir = self._cache_dir(options, "wheels")
# The wheel filename format, as specified in PEP 427, is:
# {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
#
# Additionally, non-alphanumeric values in the distribution are
# normalized to underscores (_), meaning hyphens can never occur
# before `-{version}`.
#
# Given that information:
# - If the pattern we're given contains a hyphen (-), the user is
# providing at least the version. Thus, we can just append `*.whl`
# to match the rest of it.
# - If the pattern we're given doesn't contain a hyphen (-), the
# user is only providing the name. Thus, we append `-*.whl` to
# match the hyphen before the version, followed by anything else.
#
# PEP 427: https://www.python.org/dev/peps/pep-0427/
pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl")
return filesystem.find_files(wheel_dir, pattern)

View file

@ -0,0 +1,54 @@
import logging
from optparse import Values
from typing import List
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.operations.check import (
check_package_set,
create_package_set_from_installed,
warn_legacy_versions_and_specifiers,
)
from pip._internal.utils.misc import write_output
logger = logging.getLogger(__name__)
class CheckCommand(Command):
"""Verify installed packages have compatible dependencies."""
usage = """
%prog [options]"""
def run(self, options: Values, args: List[str]) -> int:
package_set, parsing_probs = create_package_set_from_installed()
warn_legacy_versions_and_specifiers(package_set)
missing, conflicting = check_package_set(package_set)
for project_name in missing:
version = package_set[project_name].version
for dependency in missing[project_name]:
write_output(
"%s %s requires %s, which is not installed.",
project_name,
version,
dependency[0],
)
for project_name in conflicting:
version = package_set[project_name].version
for dep_name, dep_version, req in conflicting[project_name]:
write_output(
"%s %s has requirement %s, but you have %s %s.",
project_name,
version,
req,
dep_name,
dep_version,
)
if missing or conflicting or parsing_probs:
return ERROR
else:
write_output("No broken requirements found.")
return SUCCESS

View file

@ -0,0 +1,130 @@
import sys
import textwrap
from optparse import Values
from typing import List
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.utils.misc import get_prog
BASE_COMPLETION = """
# pip {shell} completion start{script}# pip {shell} completion end
"""
COMPLETION_SCRIPTS = {
"bash": """
_pip_completion()
{{
COMPREPLY=( $( COMP_WORDS="${{COMP_WORDS[*]}}" \\
COMP_CWORD=$COMP_CWORD \\
PIP_AUTO_COMPLETE=1 $1 2>/dev/null ) )
}}
complete -o default -F _pip_completion {prog}
""",
"zsh": """
#compdef -P pip[0-9.]#
__pip() {{
compadd $( COMP_WORDS="$words[*]" \\
COMP_CWORD=$((CURRENT-1)) \\
PIP_AUTO_COMPLETE=1 $words[1] 2>/dev/null )
}}
if [[ $zsh_eval_context[-1] == loadautofunc ]]; then
# autoload from fpath, call function directly
__pip "$@"
else
# eval/source/. command, register function for later
compdef __pip -P 'pip[0-9.]#'
fi
""",
"fish": """
function __fish_complete_pip
set -lx COMP_WORDS (commandline -o) ""
set -lx COMP_CWORD ( \\
math (contains -i -- (commandline -t) $COMP_WORDS)-1 \\
)
set -lx PIP_AUTO_COMPLETE 1
string split \\ -- (eval $COMP_WORDS[1])
end
complete -fa "(__fish_complete_pip)" -c {prog}
""",
"powershell": """
if ((Test-Path Function:\\TabExpansion) -and -not `
(Test-Path Function:\\_pip_completeBackup)) {{
Rename-Item Function:\\TabExpansion _pip_completeBackup
}}
function TabExpansion($line, $lastWord) {{
$lastBlock = [regex]::Split($line, '[|;]')[-1].TrimStart()
if ($lastBlock.StartsWith("{prog} ")) {{
$Env:COMP_WORDS=$lastBlock
$Env:COMP_CWORD=$lastBlock.Split().Length - 1
$Env:PIP_AUTO_COMPLETE=1
(& {prog}).Split()
Remove-Item Env:COMP_WORDS
Remove-Item Env:COMP_CWORD
Remove-Item Env:PIP_AUTO_COMPLETE
}}
elseif (Test-Path Function:\\_pip_completeBackup) {{
# Fall back on existing tab expansion
_pip_completeBackup $line $lastWord
}}
}}
""",
}
class CompletionCommand(Command):
"""A helper command to be used for command completion."""
ignore_require_venv = True
def add_options(self) -> None:
self.cmd_opts.add_option(
"--bash",
"-b",
action="store_const",
const="bash",
dest="shell",
help="Emit completion code for bash",
)
self.cmd_opts.add_option(
"--zsh",
"-z",
action="store_const",
const="zsh",
dest="shell",
help="Emit completion code for zsh",
)
self.cmd_opts.add_option(
"--fish",
"-f",
action="store_const",
const="fish",
dest="shell",
help="Emit completion code for fish",
)
self.cmd_opts.add_option(
"--powershell",
"-p",
action="store_const",
const="powershell",
dest="shell",
help="Emit completion code for powershell",
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
"""Prints the completion code of the given shell"""
shells = COMPLETION_SCRIPTS.keys()
shell_options = ["--" + shell for shell in sorted(shells)]
if options.shell in shells:
script = textwrap.dedent(
COMPLETION_SCRIPTS.get(options.shell, "").format(prog=get_prog())
)
print(BASE_COMPLETION.format(script=script, shell=options.shell))
return SUCCESS
else:
sys.stderr.write(
"ERROR: You must pass {}\n".format(" or ".join(shell_options))
)
return SUCCESS

View file

@ -0,0 +1,280 @@
import logging
import os
import subprocess
from optparse import Values
from typing import Any, List, Optional
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.configuration import (
Configuration,
Kind,
get_configuration_files,
kinds,
)
from pip._internal.exceptions import PipError
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import get_prog, write_output
logger = logging.getLogger(__name__)
class ConfigurationCommand(Command):
"""
Manage local and global configuration.
Subcommands:
- list: List the active configuration (or from the file specified)
- edit: Edit the configuration file in an editor
- get: Get the value associated with command.option
- set: Set the command.option=value
- unset: Unset the value associated with command.option
- debug: List the configuration files and values defined under them
Configuration keys should be dot separated command and option name,
with the special prefix "global" affecting any command. For example,
"pip config set global.index-url https://example.org/" would configure
the index url for all commands, but "pip config set download.timeout 10"
would configure a 10 second timeout only for "pip download" commands.
If none of --user, --global and --site are passed, a virtual
environment configuration file is used if one is active and the file
exists. Otherwise, all modifications happen to the user file by
default.
"""
ignore_require_venv = True
usage = """
%prog [<file-option>] list
%prog [<file-option>] [--editor <editor-path>] edit
%prog [<file-option>] get command.option
%prog [<file-option>] set command.option value
%prog [<file-option>] unset command.option
%prog [<file-option>] debug
"""
def add_options(self) -> None:
self.cmd_opts.add_option(
"--editor",
dest="editor",
action="store",
default=None,
help=(
"Editor to use to edit the file. Uses VISUAL or EDITOR "
"environment variables if not provided."
),
)
self.cmd_opts.add_option(
"--global",
dest="global_file",
action="store_true",
default=False,
help="Use the system-wide configuration file only",
)
self.cmd_opts.add_option(
"--user",
dest="user_file",
action="store_true",
default=False,
help="Use the user configuration file only",
)
self.cmd_opts.add_option(
"--site",
dest="site_file",
action="store_true",
default=False,
help="Use the current environment configuration file only",
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
handlers = {
"list": self.list_values,
"edit": self.open_in_editor,
"get": self.get_name,
"set": self.set_name_value,
"unset": self.unset_name,
"debug": self.list_config_values,
}
# Determine action
if not args or args[0] not in handlers:
logger.error(
"Need an action (%s) to perform.",
", ".join(sorted(handlers)),
)
return ERROR
action = args[0]
# Determine which configuration files are to be loaded
# Depends on whether the command is modifying.
try:
load_only = self._determine_file(
options, need_value=(action in ["get", "set", "unset", "edit"])
)
except PipError as e:
logger.error(e.args[0])
return ERROR
# Load a new configuration
self.configuration = Configuration(
isolated=options.isolated_mode, load_only=load_only
)
self.configuration.load()
# Error handling happens here, not in the action-handlers.
try:
handlers[action](options, args[1:])
except PipError as e:
logger.error(e.args[0])
return ERROR
return SUCCESS
def _determine_file(self, options: Values, need_value: bool) -> Optional[Kind]:
file_options = [
key
for key, value in (
(kinds.USER, options.user_file),
(kinds.GLOBAL, options.global_file),
(kinds.SITE, options.site_file),
)
if value
]
if not file_options:
if not need_value:
return None
# Default to user, unless there's a site file.
elif any(
os.path.exists(site_config_file)
for site_config_file in get_configuration_files()[kinds.SITE]
):
return kinds.SITE
else:
return kinds.USER
elif len(file_options) == 1:
return file_options[0]
raise PipError(
"Need exactly one file to operate upon "
"(--user, --site, --global) to perform."
)
def list_values(self, options: Values, args: List[str]) -> None:
self._get_n_args(args, "list", n=0)
for key, value in sorted(self.configuration.items()):
write_output("%s=%r", key, value)
def get_name(self, options: Values, args: List[str]) -> None:
key = self._get_n_args(args, "get [name]", n=1)
value = self.configuration.get_value(key)
write_output("%s", value)
def set_name_value(self, options: Values, args: List[str]) -> None:
key, value = self._get_n_args(args, "set [name] [value]", n=2)
self.configuration.set_value(key, value)
self._save_configuration()
def unset_name(self, options: Values, args: List[str]) -> None:
key = self._get_n_args(args, "unset [name]", n=1)
self.configuration.unset_value(key)
self._save_configuration()
def list_config_values(self, options: Values, args: List[str]) -> None:
"""List config key-value pairs across different config files"""
self._get_n_args(args, "debug", n=0)
self.print_env_var_values()
# Iterate over config files and print if they exist, and the
# key-value pairs present in them if they do
for variant, files in sorted(self.configuration.iter_config_files()):
write_output("%s:", variant)
for fname in files:
with indent_log():
file_exists = os.path.exists(fname)
write_output("%s, exists: %r", fname, file_exists)
if file_exists:
self.print_config_file_values(variant)
def print_config_file_values(self, variant: Kind) -> None:
"""Get key-value pairs from the file of a variant"""
for name, value in self.configuration.get_values_in_config(variant).items():
with indent_log():
write_output("%s: %s", name, value)
def print_env_var_values(self) -> None:
"""Get key-values pairs present as environment variables"""
write_output("%s:", "env_var")
with indent_log():
for key, value in sorted(self.configuration.get_environ_vars()):
env_var = f"PIP_{key.upper()}"
write_output("%s=%r", env_var, value)
def open_in_editor(self, options: Values, args: List[str]) -> None:
editor = self._determine_editor(options)
fname = self.configuration.get_file_to_edit()
if fname is None:
raise PipError("Could not determine appropriate file.")
elif '"' in fname:
# This shouldn't happen, unless we see a username like that.
# If that happens, we'd appreciate a pull request fixing this.
raise PipError(
f'Can not open an editor for a file name containing "\n{fname}'
)
try:
subprocess.check_call(f'{editor} "{fname}"', shell=True)
except FileNotFoundError as e:
if not e.filename:
e.filename = editor
raise
except subprocess.CalledProcessError as e:
raise PipError(f"Editor Subprocess exited with exit code {e.returncode}")
def _get_n_args(self, args: List[str], example: str, n: int) -> Any:
"""Helper to make sure the command got the right number of arguments"""
if len(args) != n:
msg = (
f"Got unexpected number of arguments, expected {n}. "
f'(example: "{get_prog()} config {example}")'
)
raise PipError(msg)
if n == 1:
return args[0]
else:
return args
def _save_configuration(self) -> None:
# We successfully ran a modifying command. Need to save the
# configuration.
try:
self.configuration.save()
except Exception:
logger.exception(
"Unable to save configuration. Please report this as a bug."
)
raise PipError("Internal Error.")
def _determine_editor(self, options: Values) -> str:
if options.editor is not None:
return options.editor
elif "VISUAL" in os.environ:
return os.environ["VISUAL"]
elif "EDITOR" in os.environ:
return os.environ["EDITOR"]
else:
raise PipError("Could not determine editor to use.")

View file

@ -0,0 +1,201 @@
import importlib.resources
import locale
import logging
import os
import sys
from optparse import Values
from types import ModuleType
from typing import Any, Dict, List, Optional
import pip._vendor
from pip._vendor.certifi import where
from pip._vendor.packaging.version import parse as parse_version
from pip._internal.cli import cmdoptions
from pip._internal.cli.base_command import Command
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.configuration import Configuration
from pip._internal.metadata import get_environment
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import get_pip_version
logger = logging.getLogger(__name__)
def show_value(name: str, value: Any) -> None:
logger.info("%s: %s", name, value)
def show_sys_implementation() -> None:
logger.info("sys.implementation:")
implementation_name = sys.implementation.name
with indent_log():
show_value("name", implementation_name)
def create_vendor_txt_map() -> Dict[str, str]:
with importlib.resources.open_text("pip._vendor", "vendor.txt") as f:
# Purge non version specifying lines.
# Also, remove any space prefix or suffixes (including comments).
lines = [
line.strip().split(" ", 1)[0] for line in f.readlines() if "==" in line
]
# Transform into "module" -> version dict.
return dict(line.split("==", 1) for line in lines)
def get_module_from_module_name(module_name: str) -> Optional[ModuleType]:
# Module name can be uppercase in vendor.txt for some reason...
module_name = module_name.lower().replace("-", "_")
# PATCH: setuptools is actually only pkg_resources.
if module_name == "setuptools":
module_name = "pkg_resources"
try:
__import__(f"pip._vendor.{module_name}", globals(), locals(), level=0)
return getattr(pip._vendor, module_name)
except ImportError:
# We allow 'truststore' to fail to import due
# to being unavailable on Python 3.9 and earlier.
if module_name == "truststore" and sys.version_info < (3, 10):
return None
raise
def get_vendor_version_from_module(module_name: str) -> Optional[str]:
module = get_module_from_module_name(module_name)
version = getattr(module, "__version__", None)
if module and not version:
# Try to find version in debundled module info.
assert module.__file__ is not None
env = get_environment([os.path.dirname(module.__file__)])
dist = env.get_distribution(module_name)
if dist:
version = str(dist.version)
return version
def show_actual_vendor_versions(vendor_txt_versions: Dict[str, str]) -> None:
"""Log the actual version and print extra info if there is
a conflict or if the actual version could not be imported.
"""
for module_name, expected_version in vendor_txt_versions.items():
extra_message = ""
actual_version = get_vendor_version_from_module(module_name)
if not actual_version:
extra_message = (
" (Unable to locate actual module version, using"
" vendor.txt specified version)"
)
actual_version = expected_version
elif parse_version(actual_version) != parse_version(expected_version):
extra_message = (
" (CONFLICT: vendor.txt suggests version should"
f" be {expected_version})"
)
logger.info("%s==%s%s", module_name, actual_version, extra_message)
def show_vendor_versions() -> None:
logger.info("vendored library versions:")
vendor_txt_versions = create_vendor_txt_map()
with indent_log():
show_actual_vendor_versions(vendor_txt_versions)
def show_tags(options: Values) -> None:
tag_limit = 10
target_python = make_target_python(options)
tags = target_python.get_sorted_tags()
# Display the target options that were explicitly provided.
formatted_target = target_python.format_given()
suffix = ""
if formatted_target:
suffix = f" (target: {formatted_target})"
msg = f"Compatible tags: {len(tags)}{suffix}"
logger.info(msg)
if options.verbose < 1 and len(tags) > tag_limit:
tags_limited = True
tags = tags[:tag_limit]
else:
tags_limited = False
with indent_log():
for tag in tags:
logger.info(str(tag))
if tags_limited:
msg = f"...\n[First {tag_limit} tags shown. Pass --verbose to show all.]"
logger.info(msg)
def ca_bundle_info(config: Configuration) -> str:
levels = {key.split(".", 1)[0] for key, _ in config.items()}
if not levels:
return "Not specified"
levels_that_override_global = ["install", "wheel", "download"]
global_overriding_level = [
level for level in levels if level in levels_that_override_global
]
if not global_overriding_level:
return "global"
if "global" in levels:
levels.remove("global")
return ", ".join(levels)
class DebugCommand(Command):
"""
Display debug information.
"""
usage = """
%prog <options>"""
ignore_require_venv = True
def add_options(self) -> None:
cmdoptions.add_target_python_options(self.cmd_opts)
self.parser.insert_option_group(0, self.cmd_opts)
self.parser.config.load()
def run(self, options: Values, args: List[str]) -> int:
logger.warning(
"This command is only meant for debugging. "
"Do not use this with automation for parsing and getting these "
"details, since the output and options of this command may "
"change without notice."
)
show_value("pip version", get_pip_version())
show_value("sys.version", sys.version)
show_value("sys.executable", sys.executable)
show_value("sys.getdefaultencoding", sys.getdefaultencoding())
show_value("sys.getfilesystemencoding", sys.getfilesystemencoding())
show_value(
"locale.getpreferredencoding",
locale.getpreferredencoding(),
)
show_value("sys.platform", sys.platform)
show_sys_implementation()
show_value("'cert' config value", ca_bundle_info(self.parser.config))
show_value("REQUESTS_CA_BUNDLE", os.environ.get("REQUESTS_CA_BUNDLE"))
show_value("CURL_CA_BUNDLE", os.environ.get("CURL_CA_BUNDLE"))
show_value("pip._vendor.certifi.where()", where())
show_value("pip._vendor.DEBUNDLED", pip._vendor.DEBUNDLED)
show_vendor_versions()
show_tags(options)
return SUCCESS

View file

@ -0,0 +1,147 @@
import logging
import os
from optparse import Values
from typing import List
from pip._internal.cli import cmdoptions
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.operations.build.build_tracker import get_build_tracker
from pip._internal.req.req_install import check_legacy_setup_py_options
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
from pip._internal.utils.temp_dir import TempDirectory
logger = logging.getLogger(__name__)
class DownloadCommand(RequirementCommand):
"""
Download packages from:
- PyPI (and other indexes) using requirement specifiers.
- VCS project urls.
- Local project directories.
- Local or remote source archives.
pip also supports downloading from "requirements files", which provide
an easy way to specify a whole environment to be downloaded.
"""
usage = """
%prog [options] <requirement specifier> [package-index-options] ...
%prog [options] -r <requirements file> [package-index-options] ...
%prog [options] <vcs project url> ...
%prog [options] <local project path> ...
%prog [options] <archive url/path> ..."""
def add_options(self) -> None:
self.cmd_opts.add_option(cmdoptions.constraints())
self.cmd_opts.add_option(cmdoptions.requirements())
self.cmd_opts.add_option(cmdoptions.no_deps())
self.cmd_opts.add_option(cmdoptions.global_options())
self.cmd_opts.add_option(cmdoptions.no_binary())
self.cmd_opts.add_option(cmdoptions.only_binary())
self.cmd_opts.add_option(cmdoptions.prefer_binary())
self.cmd_opts.add_option(cmdoptions.src())
self.cmd_opts.add_option(cmdoptions.pre())
self.cmd_opts.add_option(cmdoptions.require_hashes())
self.cmd_opts.add_option(cmdoptions.progress_bar())
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
self.cmd_opts.add_option(cmdoptions.use_pep517())
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
self.cmd_opts.add_option(cmdoptions.check_build_deps())
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
self.cmd_opts.add_option(
"-d",
"--dest",
"--destination-dir",
"--destination-directory",
dest="download_dir",
metavar="dir",
default=os.curdir,
help="Download packages into <dir>.",
)
cmdoptions.add_target_python_options(self.cmd_opts)
index_opts = cmdoptions.make_option_group(
cmdoptions.index_group,
self.parser,
)
self.parser.insert_option_group(0, index_opts)
self.parser.insert_option_group(0, self.cmd_opts)
@with_cleanup
def run(self, options: Values, args: List[str]) -> int:
options.ignore_installed = True
# editable doesn't really make sense for `pip download`, but the bowels
# of the RequirementSet code require that property.
options.editables = []
cmdoptions.check_dist_restriction(options)
options.download_dir = normalize_path(options.download_dir)
ensure_dir(options.download_dir)
session = self.get_default_session(options)
target_python = make_target_python(options)
finder = self._build_package_finder(
options=options,
session=session,
target_python=target_python,
ignore_requires_python=options.ignore_requires_python,
)
build_tracker = self.enter_context(get_build_tracker())
directory = TempDirectory(
delete=not options.no_clean,
kind="download",
globally_managed=True,
)
reqs = self.get_requirements(args, options, finder, session)
check_legacy_setup_py_options(options, reqs)
preparer = self.make_requirement_preparer(
temp_build_dir=directory,
options=options,
build_tracker=build_tracker,
session=session,
finder=finder,
download_dir=options.download_dir,
use_user_site=False,
verbosity=self.verbosity,
)
resolver = self.make_resolver(
preparer=preparer,
finder=finder,
options=options,
ignore_requires_python=options.ignore_requires_python,
use_pep517=options.use_pep517,
py_version_info=options.python_version,
)
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
downloaded: List[str] = []
for req in requirement_set.requirements.values():
if req.satisfied_by is None:
assert req.name is not None
preparer.save_linked_requirement(req)
downloaded.append(req.name)
preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
requirement_set.warn_legacy_versions_and_specifiers()
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
return SUCCESS

View file

@ -0,0 +1,109 @@
import sys
from optparse import Values
from typing import AbstractSet, List
from pip._internal.cli import cmdoptions
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.operations.freeze import freeze
from pip._internal.utils.compat import stdlib_pkgs
def _should_suppress_build_backends() -> bool:
return sys.version_info < (3, 12)
def _dev_pkgs() -> AbstractSet[str]:
pkgs = {"pip"}
if _should_suppress_build_backends():
pkgs |= {"setuptools", "distribute", "wheel"}
pkgs |= {"setuptools", "distribute", "wheel", "pkg-resources"}
return pkgs
class FreezeCommand(Command):
"""
Output installed packages in requirements format.
packages are listed in a case-insensitive sorted order.
"""
usage = """
%prog [options]"""
log_streams = ("ext://sys.stderr", "ext://sys.stderr")
def add_options(self) -> None:
self.cmd_opts.add_option(
"-r",
"--requirement",
dest="requirements",
action="append",
default=[],
metavar="file",
help=(
"Use the order in the given requirements file and its "
"comments when generating output. This option can be "
"used multiple times."
),
)
self.cmd_opts.add_option(
"-l",
"--local",
dest="local",
action="store_true",
default=False,
help=(
"If in a virtualenv that has global access, do not output "
"globally-installed packages."
),
)
self.cmd_opts.add_option(
"--user",
dest="user",
action="store_true",
default=False,
help="Only output packages installed in user-site.",
)
self.cmd_opts.add_option(cmdoptions.list_path())
self.cmd_opts.add_option(
"--all",
dest="freeze_all",
action="store_true",
help=(
"Do not skip these packages in the output:"
" {}".format(", ".join(_dev_pkgs()))
),
)
self.cmd_opts.add_option(
"--exclude-editable",
dest="exclude_editable",
action="store_true",
help="Exclude editable package from output.",
)
self.cmd_opts.add_option(cmdoptions.list_exclude())
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
skip = set(stdlib_pkgs)
if not options.freeze_all:
skip.update(_dev_pkgs())
if options.excludes:
skip.update(options.excludes)
cmdoptions.check_list_path_option(options)
for line in freeze(
requirement=options.requirements,
local_only=options.local,
user_only=options.user,
paths=options.path,
isolated=options.isolated_mode,
skip=skip,
exclude_editable=options.exclude_editable,
):
sys.stdout.write(line + "\n")
return SUCCESS

View file

@ -0,0 +1,59 @@
import hashlib
import logging
import sys
from optparse import Values
from typing import List
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.utils.hashes import FAVORITE_HASH, STRONG_HASHES
from pip._internal.utils.misc import read_chunks, write_output
logger = logging.getLogger(__name__)
class HashCommand(Command):
"""
Compute a hash of a local package archive.
These can be used with --hash in a requirements file to do repeatable
installs.
"""
usage = "%prog [options] <file> ..."
ignore_require_venv = True
def add_options(self) -> None:
self.cmd_opts.add_option(
"-a",
"--algorithm",
dest="algorithm",
choices=STRONG_HASHES,
action="store",
default=FAVORITE_HASH,
help="The hash algorithm to use: one of {}".format(
", ".join(STRONG_HASHES)
),
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
if not args:
self.parser.print_usage(sys.stderr)
return ERROR
algorithm = options.algorithm
for path in args:
write_output(
"%s:\n--hash=%s:%s", path, algorithm, _hash_of_file(path, algorithm)
)
return SUCCESS
def _hash_of_file(path: str, algorithm: str) -> str:
"""Return the hash digest of a file."""
with open(path, "rb") as archive:
hash = hashlib.new(algorithm)
for chunk in read_chunks(archive):
hash.update(chunk)
return hash.hexdigest()

View file

@ -0,0 +1,41 @@
from optparse import Values
from typing import List
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.exceptions import CommandError
class HelpCommand(Command):
"""Show help for commands"""
usage = """
%prog <command>"""
ignore_require_venv = True
def run(self, options: Values, args: List[str]) -> int:
from pip._internal.commands import (
commands_dict,
create_command,
get_similar_commands,
)
try:
# 'pip help' with no args is handled by pip.__init__.parseopt()
cmd_name = args[0] # the command we need help for
except IndexError:
return SUCCESS
if cmd_name not in commands_dict:
guess = get_similar_commands(cmd_name)
msg = [f'unknown command "{cmd_name}"']
if guess:
msg.append(f'maybe you meant "{guess}"')
raise CommandError(" - ".join(msg))
command = create_command(cmd_name)
command.parser.print_help()
return SUCCESS

View file

@ -0,0 +1,139 @@
import logging
from optparse import Values
from typing import Any, Iterable, List, Optional, Union
from pip._vendor.packaging.version import LegacyVersion, Version
from pip._internal.cli import cmdoptions
from pip._internal.cli.req_command import IndexGroupCommand
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.commands.search import print_dist_installation_info
from pip._internal.exceptions import CommandError, DistributionNotFound, PipError
from pip._internal.index.collector import LinkCollector
from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.models.target_python import TargetPython
from pip._internal.network.session import PipSession
from pip._internal.utils.misc import write_output
logger = logging.getLogger(__name__)
class IndexCommand(IndexGroupCommand):
"""
Inspect information available from package indexes.
"""
ignore_require_venv = True
usage = """
%prog versions <package>
"""
def add_options(self) -> None:
cmdoptions.add_target_python_options(self.cmd_opts)
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
self.cmd_opts.add_option(cmdoptions.pre())
self.cmd_opts.add_option(cmdoptions.no_binary())
self.cmd_opts.add_option(cmdoptions.only_binary())
index_opts = cmdoptions.make_option_group(
cmdoptions.index_group,
self.parser,
)
self.parser.insert_option_group(0, index_opts)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
handlers = {
"versions": self.get_available_package_versions,
}
logger.warning(
"pip index is currently an experimental command. "
"It may be removed/changed in a future release "
"without prior warning."
)
# Determine action
if not args or args[0] not in handlers:
logger.error(
"Need an action (%s) to perform.",
", ".join(sorted(handlers)),
)
return ERROR
action = args[0]
# Error handling happens here, not in the action-handlers.
try:
handlers[action](options, args[1:])
except PipError as e:
logger.error(e.args[0])
return ERROR
return SUCCESS
def _build_package_finder(
self,
options: Values,
session: PipSession,
target_python: Optional[TargetPython] = None,
ignore_requires_python: Optional[bool] = None,
) -> PackageFinder:
"""
Create a package finder appropriate to the index command.
"""
link_collector = LinkCollector.create(session, options=options)
# Pass allow_yanked=False to ignore yanked versions.
selection_prefs = SelectionPreferences(
allow_yanked=False,
allow_all_prereleases=options.pre,
ignore_requires_python=ignore_requires_python,
)
return PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
)
def get_available_package_versions(self, options: Values, args: List[Any]) -> None:
if len(args) != 1:
raise CommandError("You need to specify exactly one argument")
target_python = cmdoptions.make_target_python(options)
query = args[0]
with self._build_session(options) as session:
finder = self._build_package_finder(
options=options,
session=session,
target_python=target_python,
ignore_requires_python=options.ignore_requires_python,
)
versions: Iterable[Union[LegacyVersion, Version]] = (
candidate.version for candidate in finder.find_all_candidates(query)
)
if not options.pre:
# Remove prereleases
versions = (
version for version in versions if not version.is_prerelease
)
versions = set(versions)
if not versions:
raise DistributionNotFound(
f"No matching distribution found for {query}"
)
formatted_versions = [str(ver) for ver in sorted(versions, reverse=True)]
latest = formatted_versions[0]
write_output(f"{query} ({latest})")
write_output("Available versions: {}".format(", ".join(formatted_versions)))
print_dist_installation_info(query, latest)

View file

@ -0,0 +1,92 @@
import logging
from optparse import Values
from typing import Any, Dict, List
from pip._vendor.packaging.markers import default_environment
from pip._vendor.rich import print_json
from pip import __version__
from pip._internal.cli import cmdoptions
from pip._internal.cli.req_command import Command
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.metadata import BaseDistribution, get_environment
from pip._internal.utils.compat import stdlib_pkgs
from pip._internal.utils.urls import path_to_url
logger = logging.getLogger(__name__)
class InspectCommand(Command):
"""
Inspect the content of a Python environment and produce a report in JSON format.
"""
ignore_require_venv = True
usage = """
%prog [options]"""
def add_options(self) -> None:
self.cmd_opts.add_option(
"--local",
action="store_true",
default=False,
help=(
"If in a virtualenv that has global access, do not list "
"globally-installed packages."
),
)
self.cmd_opts.add_option(
"--user",
dest="user",
action="store_true",
default=False,
help="Only output packages installed in user-site.",
)
self.cmd_opts.add_option(cmdoptions.list_path())
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
cmdoptions.check_list_path_option(options)
dists = get_environment(options.path).iter_installed_distributions(
local_only=options.local,
user_only=options.user,
skip=set(stdlib_pkgs),
)
output = {
"version": "1",
"pip_version": __version__,
"installed": [self._dist_to_dict(dist) for dist in dists],
"environment": default_environment(),
# TODO tags? scheme?
}
print_json(data=output)
return SUCCESS
def _dist_to_dict(self, dist: BaseDistribution) -> Dict[str, Any]:
res: Dict[str, Any] = {
"metadata": dist.metadata_dict,
"metadata_location": dist.info_location,
}
# direct_url. Note that we don't have download_info (as in the installation
# report) since it is not recorded in installed metadata.
direct_url = dist.direct_url
if direct_url is not None:
res["direct_url"] = direct_url.to_dict()
else:
# Emulate direct_url for legacy editable installs.
editable_project_location = dist.editable_project_location
if editable_project_location is not None:
res["direct_url"] = {
"url": path_to_url(editable_project_location),
"dir_info": {
"editable": True,
},
}
# installer
installer = dist.installer
if dist.installer:
res["installer"] = installer
# requested
if dist.installed_with_dist_info:
res["requested"] = dist.requested
return res

View file

@ -0,0 +1,774 @@
import errno
import json
import operator
import os
import shutil
import site
from optparse import SUPPRESS_HELP, Values
from typing import List, Optional
from pip._vendor.rich import print_json
from pip._internal.cache import WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.req_command import (
RequirementCommand,
warn_if_run_as_root,
with_cleanup,
)
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.exceptions import CommandError, InstallationError
from pip._internal.locations import get_scheme
from pip._internal.metadata import get_environment
from pip._internal.models.installation_report import InstallationReport
from pip._internal.operations.build.build_tracker import get_build_tracker
from pip._internal.operations.check import ConflictDetails, check_install_conflicts
from pip._internal.req import install_given_reqs
from pip._internal.req.req_install import (
InstallRequirement,
check_legacy_setup_py_options,
)
from pip._internal.utils.compat import WINDOWS
from pip._internal.utils.filesystem import test_writable_dir
from pip._internal.utils.logging import getLogger
from pip._internal.utils.misc import (
check_externally_managed,
ensure_dir,
get_pip_version,
protect_pip_from_modification_on_windows,
write_output,
)
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.virtualenv import (
running_under_virtualenv,
virtualenv_no_global,
)
from pip._internal.wheel_builder import build, should_build_for_install_command
logger = getLogger(__name__)
class InstallCommand(RequirementCommand):
"""
Install packages from:
- PyPI (and other indexes) using requirement specifiers.
- VCS project urls.
- Local project directories.
- Local or remote source archives.
pip also supports installing from "requirements files", which provide
an easy way to specify a whole environment to be installed.
"""
usage = """
%prog [options] <requirement specifier> [package-index-options] ...
%prog [options] -r <requirements file> [package-index-options] ...
%prog [options] [-e] <vcs project url> ...
%prog [options] [-e] <local project path> ...
%prog [options] <archive url/path> ..."""
def add_options(self) -> None:
self.cmd_opts.add_option(cmdoptions.requirements())
self.cmd_opts.add_option(cmdoptions.constraints())
self.cmd_opts.add_option(cmdoptions.no_deps())
self.cmd_opts.add_option(cmdoptions.pre())
self.cmd_opts.add_option(cmdoptions.editable())
self.cmd_opts.add_option(
"--dry-run",
action="store_true",
dest="dry_run",
default=False,
help=(
"Don't actually install anything, just print what would be. "
"Can be used in combination with --ignore-installed "
"to 'resolve' the requirements."
),
)
self.cmd_opts.add_option(
"-t",
"--target",
dest="target_dir",
metavar="dir",
default=None,
help=(
"Install packages into <dir>. "
"By default this will not replace existing files/folders in "
"<dir>. Use --upgrade to replace existing packages in <dir> "
"with new versions."
),
)
cmdoptions.add_target_python_options(self.cmd_opts)
self.cmd_opts.add_option(
"--user",
dest="use_user_site",
action="store_true",
help=(
"Install to the Python user install directory for your "
"platform. Typically ~/.local/, or %APPDATA%\\Python on "
"Windows. (See the Python documentation for site.USER_BASE "
"for full details.)"
),
)
self.cmd_opts.add_option(
"--no-user",
dest="use_user_site",
action="store_false",
help=SUPPRESS_HELP,
)
self.cmd_opts.add_option(
"--root",
dest="root_path",
metavar="dir",
default=None,
help="Install everything relative to this alternate root directory.",
)
self.cmd_opts.add_option(
"--prefix",
dest="prefix_path",
metavar="dir",
default=None,
help=(
"Installation prefix where lib, bin and other top-level "
"folders are placed. Note that the resulting installation may "
"contain scripts and other resources which reference the "
"Python interpreter of pip, and not that of ``--prefix``. "
"See also the ``--python`` option if the intention is to "
"install packages into another (possibly pip-free) "
"environment."
),
)
self.cmd_opts.add_option(cmdoptions.src())
self.cmd_opts.add_option(
"-U",
"--upgrade",
dest="upgrade",
action="store_true",
help=(
"Upgrade all specified packages to the newest available "
"version. The handling of dependencies depends on the "
"upgrade-strategy used."
),
)
self.cmd_opts.add_option(
"--upgrade-strategy",
dest="upgrade_strategy",
default="only-if-needed",
choices=["only-if-needed", "eager"],
help=(
"Determines how dependency upgrading should be handled "
"[default: %default]. "
'"eager" - dependencies are upgraded regardless of '
"whether the currently installed version satisfies the "
"requirements of the upgraded package(s). "
'"only-if-needed" - are upgraded only when they do not '
"satisfy the requirements of the upgraded package(s)."
),
)
self.cmd_opts.add_option(
"--force-reinstall",
dest="force_reinstall",
action="store_true",
help="Reinstall all packages even if they are already up-to-date.",
)
self.cmd_opts.add_option(
"-I",
"--ignore-installed",
dest="ignore_installed",
action="store_true",
help=(
"Ignore the installed packages, overwriting them. "
"This can break your system if the existing package "
"is of a different version or was installed "
"with a different package manager!"
),
)
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
self.cmd_opts.add_option(cmdoptions.use_pep517())
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
self.cmd_opts.add_option(cmdoptions.check_build_deps())
self.cmd_opts.add_option(cmdoptions.override_externally_managed())
self.cmd_opts.add_option(cmdoptions.config_settings())
self.cmd_opts.add_option(cmdoptions.global_options())
self.cmd_opts.add_option(
"--compile",
action="store_true",
dest="compile",
default=True,
help="Compile Python source files to bytecode",
)
self.cmd_opts.add_option(
"--no-compile",
action="store_false",
dest="compile",
help="Do not compile Python source files to bytecode",
)
self.cmd_opts.add_option(
"--no-warn-script-location",
action="store_false",
dest="warn_script_location",
default=True,
help="Do not warn when installing scripts outside PATH",
)
self.cmd_opts.add_option(
"--no-warn-conflicts",
action="store_false",
dest="warn_about_conflicts",
default=True,
help="Do not warn about broken dependencies",
)
self.cmd_opts.add_option(cmdoptions.no_binary())
self.cmd_opts.add_option(cmdoptions.only_binary())
self.cmd_opts.add_option(cmdoptions.prefer_binary())
self.cmd_opts.add_option(cmdoptions.require_hashes())
self.cmd_opts.add_option(cmdoptions.progress_bar())
self.cmd_opts.add_option(cmdoptions.root_user_action())
index_opts = cmdoptions.make_option_group(
cmdoptions.index_group,
self.parser,
)
self.parser.insert_option_group(0, index_opts)
self.parser.insert_option_group(0, self.cmd_opts)
self.cmd_opts.add_option(
"--report",
dest="json_report_file",
metavar="file",
default=None,
help=(
"Generate a JSON file describing what pip did to install "
"the provided requirements. "
"Can be used in combination with --dry-run and --ignore-installed "
"to 'resolve' the requirements. "
"When - is used as file name it writes to stdout. "
"When writing to stdout, please combine with the --quiet option "
"to avoid mixing pip logging output with JSON output."
),
)
@with_cleanup
def run(self, options: Values, args: List[str]) -> int:
if options.use_user_site and options.target_dir is not None:
raise CommandError("Can not combine '--user' and '--target'")
# Check whether the environment we're installing into is externally
# managed, as specified in PEP 668. Specifying --root, --target, or
# --prefix disables the check, since there's no reliable way to locate
# the EXTERNALLY-MANAGED file for those cases. An exception is also
# made specifically for "--dry-run --report" for convenience.
installing_into_current_environment = (
not (options.dry_run and options.json_report_file)
and options.root_path is None
and options.target_dir is None
and options.prefix_path is None
)
if (
installing_into_current_environment
and not options.override_externally_managed
):
check_externally_managed()
upgrade_strategy = "to-satisfy-only"
if options.upgrade:
upgrade_strategy = options.upgrade_strategy
cmdoptions.check_dist_restriction(options, check_target=True)
logger.verbose("Using %s", get_pip_version())
options.use_user_site = decide_user_install(
options.use_user_site,
prefix_path=options.prefix_path,
target_dir=options.target_dir,
root_path=options.root_path,
isolated_mode=options.isolated_mode,
)
target_temp_dir: Optional[TempDirectory] = None
target_temp_dir_path: Optional[str] = None
if options.target_dir:
options.ignore_installed = True
options.target_dir = os.path.abspath(options.target_dir)
if (
# fmt: off
os.path.exists(options.target_dir) and
not os.path.isdir(options.target_dir)
# fmt: on
):
raise CommandError(
"Target path exists but is not a directory, will not continue."
)
# Create a target directory for using with the target option
target_temp_dir = TempDirectory(kind="target")
target_temp_dir_path = target_temp_dir.path
self.enter_context(target_temp_dir)
global_options = options.global_options or []
session = self.get_default_session(options)
target_python = make_target_python(options)
finder = self._build_package_finder(
options=options,
session=session,
target_python=target_python,
ignore_requires_python=options.ignore_requires_python,
)
build_tracker = self.enter_context(get_build_tracker())
directory = TempDirectory(
delete=not options.no_clean,
kind="install",
globally_managed=True,
)
try:
reqs = self.get_requirements(args, options, finder, session)
check_legacy_setup_py_options(options, reqs)
wheel_cache = WheelCache(options.cache_dir)
# Only when installing is it permitted to use PEP 660.
# In other circumstances (pip wheel, pip download) we generate
# regular (i.e. non editable) metadata and wheels.
for req in reqs:
req.permit_editable_wheels = True
preparer = self.make_requirement_preparer(
temp_build_dir=directory,
options=options,
build_tracker=build_tracker,
session=session,
finder=finder,
use_user_site=options.use_user_site,
verbosity=self.verbosity,
)
resolver = self.make_resolver(
preparer=preparer,
finder=finder,
options=options,
wheel_cache=wheel_cache,
use_user_site=options.use_user_site,
ignore_installed=options.ignore_installed,
ignore_requires_python=options.ignore_requires_python,
force_reinstall=options.force_reinstall,
upgrade_strategy=upgrade_strategy,
use_pep517=options.use_pep517,
)
self.trace_basic_info(finder)
requirement_set = resolver.resolve(
reqs, check_supported_wheels=not options.target_dir
)
if options.json_report_file:
report = InstallationReport(requirement_set.requirements_to_install)
if options.json_report_file == "-":
print_json(data=report.to_dict())
else:
with open(options.json_report_file, "w", encoding="utf-8") as f:
json.dump(report.to_dict(), f, indent=2, ensure_ascii=False)
if options.dry_run:
# In non dry-run mode, the legacy versions and specifiers check
# will be done as part of conflict detection.
requirement_set.warn_legacy_versions_and_specifiers()
would_install_items = sorted(
(r.metadata["name"], r.metadata["version"])
for r in requirement_set.requirements_to_install
)
if would_install_items:
write_output(
"Would install %s",
" ".join("-".join(item) for item in would_install_items),
)
return SUCCESS
try:
pip_req = requirement_set.get_requirement("pip")
except KeyError:
modifying_pip = False
else:
# If we're not replacing an already installed pip,
# we're not modifying it.
modifying_pip = pip_req.satisfied_by is None
protect_pip_from_modification_on_windows(modifying_pip=modifying_pip)
reqs_to_build = [
r
for r in requirement_set.requirements.values()
if should_build_for_install_command(r)
]
_, build_failures = build(
reqs_to_build,
wheel_cache=wheel_cache,
verify=True,
build_options=[],
global_options=global_options,
)
if build_failures:
raise InstallationError(
"Could not build wheels for {}, which is required to "
"install pyproject.toml-based projects".format(
", ".join(r.name for r in build_failures) # type: ignore
)
)
to_install = resolver.get_installation_order(requirement_set)
# Check for conflicts in the package set we're installing.
conflicts: Optional[ConflictDetails] = None
should_warn_about_conflicts = (
not options.ignore_dependencies and options.warn_about_conflicts
)
if should_warn_about_conflicts:
conflicts = self._determine_conflicts(to_install)
# Don't warn about script install locations if
# --target or --prefix has been specified
warn_script_location = options.warn_script_location
if options.target_dir or options.prefix_path:
warn_script_location = False
installed = install_given_reqs(
to_install,
global_options,
root=options.root_path,
home=target_temp_dir_path,
prefix=options.prefix_path,
warn_script_location=warn_script_location,
use_user_site=options.use_user_site,
pycompile=options.compile,
)
lib_locations = get_lib_location_guesses(
user=options.use_user_site,
home=target_temp_dir_path,
root=options.root_path,
prefix=options.prefix_path,
isolated=options.isolated_mode,
)
env = get_environment(lib_locations)
installed.sort(key=operator.attrgetter("name"))
items = []
for result in installed:
item = result.name
try:
installed_dist = env.get_distribution(item)
if installed_dist is not None:
item = f"{item}-{installed_dist.version}"
except Exception:
pass
items.append(item)
if conflicts is not None:
self._warn_about_conflicts(
conflicts,
resolver_variant=self.determine_resolver_variant(options),
)
installed_desc = " ".join(items)
if installed_desc:
write_output(
"Successfully installed %s",
installed_desc,
)
except OSError as error:
show_traceback = self.verbosity >= 1
message = create_os_error_message(
error,
show_traceback,
options.use_user_site,
)
logger.error(message, exc_info=show_traceback)
return ERROR
if options.target_dir:
assert target_temp_dir
self._handle_target_dir(
options.target_dir, target_temp_dir, options.upgrade
)
if options.root_user_action == "warn":
warn_if_run_as_root()
return SUCCESS
def _handle_target_dir(
self, target_dir: str, target_temp_dir: TempDirectory, upgrade: bool
) -> None:
ensure_dir(target_dir)
# Checking both purelib and platlib directories for installed
# packages to be moved to target directory
lib_dir_list = []
# Checking both purelib and platlib directories for installed
# packages to be moved to target directory
scheme = get_scheme("", home=target_temp_dir.path)
purelib_dir = scheme.purelib
platlib_dir = scheme.platlib
data_dir = scheme.data
if os.path.exists(purelib_dir):
lib_dir_list.append(purelib_dir)
if os.path.exists(platlib_dir) and platlib_dir != purelib_dir:
lib_dir_list.append(platlib_dir)
if os.path.exists(data_dir):
lib_dir_list.append(data_dir)
for lib_dir in lib_dir_list:
for item in os.listdir(lib_dir):
if lib_dir == data_dir:
ddir = os.path.join(data_dir, item)
if any(s.startswith(ddir) for s in lib_dir_list[:-1]):
continue
target_item_dir = os.path.join(target_dir, item)
if os.path.exists(target_item_dir):
if not upgrade:
logger.warning(
"Target directory %s already exists. Specify "
"--upgrade to force replacement.",
target_item_dir,
)
continue
if os.path.islink(target_item_dir):
logger.warning(
"Target directory %s already exists and is "
"a link. pip will not automatically replace "
"links, please remove if replacement is "
"desired.",
target_item_dir,
)
continue
if os.path.isdir(target_item_dir):
shutil.rmtree(target_item_dir)
else:
os.remove(target_item_dir)
shutil.move(os.path.join(lib_dir, item), target_item_dir)
def _determine_conflicts(
self, to_install: List[InstallRequirement]
) -> Optional[ConflictDetails]:
try:
return check_install_conflicts(to_install)
except Exception:
logger.exception(
"Error while checking for conflicts. Please file an issue on "
"pip's issue tracker: https://github.com/pypa/pip/issues/new"
)
return None
def _warn_about_conflicts(
self, conflict_details: ConflictDetails, resolver_variant: str
) -> None:
package_set, (missing, conflicting) = conflict_details
if not missing and not conflicting:
return
parts: List[str] = []
if resolver_variant == "legacy":
parts.append(
"pip's legacy dependency resolver does not consider dependency "
"conflicts when selecting packages. This behaviour is the "
"source of the following dependency conflicts."
)
else:
assert resolver_variant == "resolvelib"
parts.append(
"pip's dependency resolver does not currently take into account "
"all the packages that are installed. This behaviour is the "
"source of the following dependency conflicts."
)
# NOTE: There is some duplication here, with commands/check.py
for project_name in missing:
version = package_set[project_name][0]
for dependency in missing[project_name]:
message = (
f"{project_name} {version} requires {dependency[1]}, "
"which is not installed."
)
parts.append(message)
for project_name in conflicting:
version = package_set[project_name][0]
for dep_name, dep_version, req in conflicting[project_name]:
message = (
"{name} {version} requires {requirement}, but {you} have "
"{dep_name} {dep_version} which is incompatible."
).format(
name=project_name,
version=version,
requirement=req,
dep_name=dep_name,
dep_version=dep_version,
you=("you" if resolver_variant == "resolvelib" else "you'll"),
)
parts.append(message)
logger.critical("\n".join(parts))
def get_lib_location_guesses(
user: bool = False,
home: Optional[str] = None,
root: Optional[str] = None,
isolated: bool = False,
prefix: Optional[str] = None,
) -> List[str]:
scheme = get_scheme(
"",
user=user,
home=home,
root=root,
isolated=isolated,
prefix=prefix,
)
return [scheme.purelib, scheme.platlib]
def site_packages_writable(root: Optional[str], isolated: bool) -> bool:
return all(
test_writable_dir(d)
for d in set(get_lib_location_guesses(root=root, isolated=isolated))
)
def decide_user_install(
use_user_site: Optional[bool],
prefix_path: Optional[str] = None,
target_dir: Optional[str] = None,
root_path: Optional[str] = None,
isolated_mode: bool = False,
) -> bool:
"""Determine whether to do a user install based on the input options.
If use_user_site is False, no additional checks are done.
If use_user_site is True, it is checked for compatibility with other
options.
If use_user_site is None, the default behaviour depends on the environment,
which is provided by the other arguments.
"""
# In some cases (config from tox), use_user_site can be set to an integer
# rather than a bool, which 'use_user_site is False' wouldn't catch.
if (use_user_site is not None) and (not use_user_site):
logger.debug("Non-user install by explicit request")
return False
if use_user_site:
if prefix_path:
raise CommandError(
"Can not combine '--user' and '--prefix' as they imply "
"different installation locations"
)
if virtualenv_no_global():
raise InstallationError(
"Can not perform a '--user' install. User site-packages "
"are not visible in this virtualenv."
)
logger.debug("User install by explicit request")
return True
# If we are here, user installs have not been explicitly requested/avoided
assert use_user_site is None
# user install incompatible with --prefix/--target
if prefix_path or target_dir:
logger.debug("Non-user install due to --prefix or --target option")
return False
# If user installs are not enabled, choose a non-user install
if not site.ENABLE_USER_SITE:
logger.debug("Non-user install because user site-packages disabled")
return False
# If we have permission for a non-user install, do that,
# otherwise do a user install.
if site_packages_writable(root=root_path, isolated=isolated_mode):
logger.debug("Non-user install because site-packages writeable")
return False
logger.info(
"Defaulting to user installation because normal site-packages "
"is not writeable"
)
return True
def create_os_error_message(
error: OSError, show_traceback: bool, using_user_site: bool
) -> str:
"""Format an error message for an OSError
It may occur anytime during the execution of the install command.
"""
parts = []
# Mention the error if we are not going to show a traceback
parts.append("Could not install packages due to an OSError")
if not show_traceback:
parts.append(": ")
parts.append(str(error))
else:
parts.append(".")
# Spilt the error indication from a helper message (if any)
parts[-1] += "\n"
# Suggest useful actions to the user:
# (1) using user site-packages or (2) verifying the permissions
if error.errno == errno.EACCES:
user_option_part = "Consider using the `--user` option"
permissions_part = "Check the permissions"
if not running_under_virtualenv() and not using_user_site:
parts.extend(
[
user_option_part,
" or ",
permissions_part.lower(),
]
)
else:
parts.append(permissions_part)
parts.append(".\n")
# Suggest the user to enable Long Paths if path length is
# more than 260
if (
WINDOWS
and error.errno == errno.ENOENT
and error.filename
and len(error.filename) > 260
):
parts.append(
"HINT: This error might have occurred since "
"this system does not have Windows Long Path "
"support enabled. You can find information on "
"how to enable this at "
"https://pip.pypa.io/warnings/enable-long-paths\n"
)
return "".join(parts).strip() + "\n"

View file

@ -0,0 +1,370 @@
import json
import logging
from optparse import Values
from typing import TYPE_CHECKING, Generator, List, Optional, Sequence, Tuple, cast
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.cli import cmdoptions
from pip._internal.cli.req_command import IndexGroupCommand
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.exceptions import CommandError
from pip._internal.index.collector import LinkCollector
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution, get_environment
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.network.session import PipSession
from pip._internal.utils.compat import stdlib_pkgs
from pip._internal.utils.misc import tabulate, write_output
if TYPE_CHECKING:
from pip._internal.metadata.base import DistributionVersion
class _DistWithLatestInfo(BaseDistribution):
"""Give the distribution object a couple of extra fields.
These will be populated during ``get_outdated()``. This is dirty but
makes the rest of the code much cleaner.
"""
latest_version: DistributionVersion
latest_filetype: str
_ProcessedDists = Sequence[_DistWithLatestInfo]
from pip._vendor.packaging.version import parse
logger = logging.getLogger(__name__)
class ListCommand(IndexGroupCommand):
"""
List installed packages, including editables.
Packages are listed in a case-insensitive sorted order.
"""
ignore_require_venv = True
usage = """
%prog [options]"""
def add_options(self) -> None:
self.cmd_opts.add_option(
"-o",
"--outdated",
action="store_true",
default=False,
help="List outdated packages",
)
self.cmd_opts.add_option(
"-u",
"--uptodate",
action="store_true",
default=False,
help="List uptodate packages",
)
self.cmd_opts.add_option(
"-e",
"--editable",
action="store_true",
default=False,
help="List editable projects.",
)
self.cmd_opts.add_option(
"-l",
"--local",
action="store_true",
default=False,
help=(
"If in a virtualenv that has global access, do not list "
"globally-installed packages."
),
)
self.cmd_opts.add_option(
"--user",
dest="user",
action="store_true",
default=False,
help="Only output packages installed in user-site.",
)
self.cmd_opts.add_option(cmdoptions.list_path())
self.cmd_opts.add_option(
"--pre",
action="store_true",
default=False,
help=(
"Include pre-release and development versions. By default, "
"pip only finds stable versions."
),
)
self.cmd_opts.add_option(
"--format",
action="store",
dest="list_format",
default="columns",
choices=("columns", "freeze", "json"),
help=(
"Select the output format among: columns (default), freeze, or json. "
"The 'freeze' format cannot be used with the --outdated option."
),
)
self.cmd_opts.add_option(
"--not-required",
action="store_true",
dest="not_required",
help="List packages that are not dependencies of installed packages.",
)
self.cmd_opts.add_option(
"--exclude-editable",
action="store_false",
dest="include_editable",
help="Exclude editable package from output.",
)
self.cmd_opts.add_option(
"--include-editable",
action="store_true",
dest="include_editable",
help="Include editable package from output.",
default=True,
)
self.cmd_opts.add_option(cmdoptions.list_exclude())
index_opts = cmdoptions.make_option_group(cmdoptions.index_group, self.parser)
self.parser.insert_option_group(0, index_opts)
self.parser.insert_option_group(0, self.cmd_opts)
def _build_package_finder(
self, options: Values, session: PipSession
) -> PackageFinder:
"""
Create a package finder appropriate to this list command.
"""
link_collector = LinkCollector.create(session, options=options)
# Pass allow_yanked=False to ignore yanked versions.
selection_prefs = SelectionPreferences(
allow_yanked=False,
allow_all_prereleases=options.pre,
)
return PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
)
def run(self, options: Values, args: List[str]) -> int:
if options.outdated and options.uptodate:
raise CommandError("Options --outdated and --uptodate cannot be combined.")
if options.outdated and options.list_format == "freeze":
raise CommandError(
"List format 'freeze' cannot be used with the --outdated option."
)
cmdoptions.check_list_path_option(options)
skip = set(stdlib_pkgs)
if options.excludes:
skip.update(canonicalize_name(n) for n in options.excludes)
packages: "_ProcessedDists" = [
cast("_DistWithLatestInfo", d)
for d in get_environment(options.path).iter_installed_distributions(
local_only=options.local,
user_only=options.user,
editables_only=options.editable,
include_editables=options.include_editable,
skip=skip,
)
]
# get_not_required must be called firstly in order to find and
# filter out all dependencies correctly. Otherwise a package
# can't be identified as requirement because some parent packages
# could be filtered out before.
if options.not_required:
packages = self.get_not_required(packages, options)
if options.outdated:
packages = self.get_outdated(packages, options)
elif options.uptodate:
packages = self.get_uptodate(packages, options)
self.output_package_listing(packages, options)
return SUCCESS
def get_outdated(
self, packages: "_ProcessedDists", options: Values
) -> "_ProcessedDists":
return [
dist
for dist in self.iter_packages_latest_infos(packages, options)
if parse(str(dist.latest_version)) > parse(str(dist.version))
]
def get_uptodate(
self, packages: "_ProcessedDists", options: Values
) -> "_ProcessedDists":
return [
dist
for dist in self.iter_packages_latest_infos(packages, options)
if parse(str(dist.latest_version)) == parse(str(dist.version))
]
def get_not_required(
self, packages: "_ProcessedDists", options: Values
) -> "_ProcessedDists":
dep_keys = {
canonicalize_name(dep.name)
for dist in packages
for dep in (dist.iter_dependencies() or ())
}
# Create a set to remove duplicate packages, and cast it to a list
# to keep the return type consistent with get_outdated and
# get_uptodate
return list({pkg for pkg in packages if pkg.canonical_name not in dep_keys})
def iter_packages_latest_infos(
self, packages: "_ProcessedDists", options: Values
) -> Generator["_DistWithLatestInfo", None, None]:
with self._build_session(options) as session:
finder = self._build_package_finder(options, session)
def latest_info(
dist: "_DistWithLatestInfo",
) -> Optional["_DistWithLatestInfo"]:
all_candidates = finder.find_all_candidates(dist.canonical_name)
if not options.pre:
# Remove prereleases
all_candidates = [
candidate
for candidate in all_candidates
if not candidate.version.is_prerelease
]
evaluator = finder.make_candidate_evaluator(
project_name=dist.canonical_name,
)
best_candidate = evaluator.sort_best_candidate(all_candidates)
if best_candidate is None:
return None
remote_version = best_candidate.version
if best_candidate.link.is_wheel:
typ = "wheel"
else:
typ = "sdist"
dist.latest_version = remote_version
dist.latest_filetype = typ
return dist
for dist in map(latest_info, packages):
if dist is not None:
yield dist
def output_package_listing(
self, packages: "_ProcessedDists", options: Values
) -> None:
packages = sorted(
packages,
key=lambda dist: dist.canonical_name,
)
if options.list_format == "columns" and packages:
data, header = format_for_columns(packages, options)
self.output_package_listing_columns(data, header)
elif options.list_format == "freeze":
for dist in packages:
if options.verbose >= 1:
write_output(
"%s==%s (%s)", dist.raw_name, dist.version, dist.location
)
else:
write_output("%s==%s", dist.raw_name, dist.version)
elif options.list_format == "json":
write_output(format_for_json(packages, options))
def output_package_listing_columns(
self, data: List[List[str]], header: List[str]
) -> None:
# insert the header first: we need to know the size of column names
if len(data) > 0:
data.insert(0, header)
pkg_strings, sizes = tabulate(data)
# Create and add a separator.
if len(data) > 0:
pkg_strings.insert(1, " ".join("-" * x for x in sizes))
for val in pkg_strings:
write_output(val)
def format_for_columns(
pkgs: "_ProcessedDists", options: Values
) -> Tuple[List[List[str]], List[str]]:
"""
Convert the package data into something usable
by output_package_listing_columns.
"""
header = ["Package", "Version"]
running_outdated = options.outdated
if running_outdated:
header.extend(["Latest", "Type"])
has_editables = any(x.editable for x in pkgs)
if has_editables:
header.append("Editable project location")
if options.verbose >= 1:
header.append("Location")
if options.verbose >= 1:
header.append("Installer")
data = []
for proj in pkgs:
# if we're working on the 'outdated' list, separate out the
# latest_version and type
row = [proj.raw_name, str(proj.version)]
if running_outdated:
row.append(str(proj.latest_version))
row.append(proj.latest_filetype)
if has_editables:
row.append(proj.editable_project_location or "")
if options.verbose >= 1:
row.append(proj.location or "")
if options.verbose >= 1:
row.append(proj.installer)
data.append(row)
return data, header
def format_for_json(packages: "_ProcessedDists", options: Values) -> str:
data = []
for dist in packages:
info = {
"name": dist.raw_name,
"version": str(dist.version),
}
if options.verbose >= 1:
info["location"] = dist.location or ""
info["installer"] = dist.installer
if options.outdated:
info["latest_version"] = str(dist.latest_version)
info["latest_filetype"] = dist.latest_filetype
editable_project_location = dist.editable_project_location
if editable_project_location:
info["editable_project_location"] = editable_project_location
data.append(info)
return json.dumps(data)

View file

@ -0,0 +1,174 @@
import logging
import shutil
import sys
import textwrap
import xmlrpc.client
from collections import OrderedDict
from optparse import Values
from typing import TYPE_CHECKING, Dict, List, Optional
from pip._vendor.packaging.version import parse as parse_version
from pip._internal.cli.base_command import Command
from pip._internal.cli.req_command import SessionCommandMixin
from pip._internal.cli.status_codes import NO_MATCHES_FOUND, SUCCESS
from pip._internal.exceptions import CommandError
from pip._internal.metadata import get_default_environment
from pip._internal.models.index import PyPI
from pip._internal.network.xmlrpc import PipXmlrpcTransport
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import write_output
if TYPE_CHECKING:
from typing import TypedDict
class TransformedHit(TypedDict):
name: str
summary: str
versions: List[str]
logger = logging.getLogger(__name__)
class SearchCommand(Command, SessionCommandMixin):
"""Search for PyPI packages whose name or summary contains <query>."""
usage = """
%prog [options] <query>"""
ignore_require_venv = True
def add_options(self) -> None:
self.cmd_opts.add_option(
"-i",
"--index",
dest="index",
metavar="URL",
default=PyPI.pypi_url,
help="Base URL of Python Package Index (default %default)",
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
if not args:
raise CommandError("Missing required argument (search query).")
query = args
pypi_hits = self.search(query, options)
hits = transform_hits(pypi_hits)
terminal_width = None
if sys.stdout.isatty():
terminal_width = shutil.get_terminal_size()[0]
print_results(hits, terminal_width=terminal_width)
if pypi_hits:
return SUCCESS
return NO_MATCHES_FOUND
def search(self, query: List[str], options: Values) -> List[Dict[str, str]]:
index_url = options.index
session = self.get_default_session(options)
transport = PipXmlrpcTransport(index_url, session)
pypi = xmlrpc.client.ServerProxy(index_url, transport)
try:
hits = pypi.search({"name": query, "summary": query}, "or")
except xmlrpc.client.Fault as fault:
message = "XMLRPC request failed [code: {code}]\n{string}".format(
code=fault.faultCode,
string=fault.faultString,
)
raise CommandError(message)
assert isinstance(hits, list)
return hits
def transform_hits(hits: List[Dict[str, str]]) -> List["TransformedHit"]:
"""
The list from pypi is really a list of versions. We want a list of
packages with the list of versions stored inline. This converts the
list from pypi into one we can use.
"""
packages: Dict[str, "TransformedHit"] = OrderedDict()
for hit in hits:
name = hit["name"]
summary = hit["summary"]
version = hit["version"]
if name not in packages.keys():
packages[name] = {
"name": name,
"summary": summary,
"versions": [version],
}
else:
packages[name]["versions"].append(version)
# if this is the highest version, replace summary and score
if version == highest_version(packages[name]["versions"]):
packages[name]["summary"] = summary
return list(packages.values())
def print_dist_installation_info(name: str, latest: str) -> None:
env = get_default_environment()
dist = env.get_distribution(name)
if dist is not None:
with indent_log():
if dist.version == latest:
write_output("INSTALLED: %s (latest)", dist.version)
else:
write_output("INSTALLED: %s", dist.version)
if parse_version(latest).pre:
write_output(
"LATEST: %s (pre-release; install"
" with `pip install --pre`)",
latest,
)
else:
write_output("LATEST: %s", latest)
def print_results(
hits: List["TransformedHit"],
name_column_width: Optional[int] = None,
terminal_width: Optional[int] = None,
) -> None:
if not hits:
return
if name_column_width is None:
name_column_width = (
max(
[
len(hit["name"]) + len(highest_version(hit.get("versions", ["-"])))
for hit in hits
]
)
+ 4
)
for hit in hits:
name = hit["name"]
summary = hit["summary"] or ""
latest = highest_version(hit.get("versions", ["-"]))
if terminal_width is not None:
target_width = terminal_width - name_column_width - 5
if target_width > 10:
# wrap and indent summary to fit terminal
summary_lines = textwrap.wrap(summary, target_width)
summary = ("\n" + " " * (name_column_width + 3)).join(summary_lines)
name_latest = f"{name} ({latest})"
line = f"{name_latest:{name_column_width}} - {summary}"
try:
write_output(line)
print_dist_installation_info(name, latest)
except UnicodeEncodeError:
pass
def highest_version(versions: List[str]) -> str:
return max(versions, key=parse_version)

View file

@ -0,0 +1,189 @@
import logging
from optparse import Values
from typing import Generator, Iterable, Iterator, List, NamedTuple, Optional
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.cli.base_command import Command
from pip._internal.cli.status_codes import ERROR, SUCCESS
from pip._internal.metadata import BaseDistribution, get_default_environment
from pip._internal.utils.misc import write_output
logger = logging.getLogger(__name__)
class ShowCommand(Command):
"""
Show information about one or more installed packages.
The output is in RFC-compliant mail header format.
"""
usage = """
%prog [options] <package> ..."""
ignore_require_venv = True
def add_options(self) -> None:
self.cmd_opts.add_option(
"-f",
"--files",
dest="files",
action="store_true",
default=False,
help="Show the full list of installed files for each package.",
)
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
if not args:
logger.warning("ERROR: Please provide a package name or names.")
return ERROR
query = args
results = search_packages_info(query)
if not print_results(
results, list_files=options.files, verbose=options.verbose
):
return ERROR
return SUCCESS
class _PackageInfo(NamedTuple):
name: str
version: str
location: str
editable_project_location: Optional[str]
requires: List[str]
required_by: List[str]
installer: str
metadata_version: str
classifiers: List[str]
summary: str
homepage: str
project_urls: List[str]
author: str
author_email: str
license: str
entry_points: List[str]
files: Optional[List[str]]
def search_packages_info(query: List[str]) -> Generator[_PackageInfo, None, None]:
"""
Gather details from installed distributions. Print distribution name,
version, location, and installed files. Installed files requires a
pip generated 'installed-files.txt' in the distributions '.egg-info'
directory.
"""
env = get_default_environment()
installed = {dist.canonical_name: dist for dist in env.iter_all_distributions()}
query_names = [canonicalize_name(name) for name in query]
missing = sorted(
[name for name, pkg in zip(query, query_names) if pkg not in installed]
)
if missing:
logger.warning("Package(s) not found: %s", ", ".join(missing))
def _get_requiring_packages(current_dist: BaseDistribution) -> Iterator[str]:
return (
dist.metadata["Name"] or "UNKNOWN"
for dist in installed.values()
if current_dist.canonical_name
in {canonicalize_name(d.name) for d in dist.iter_dependencies()}
)
for query_name in query_names:
try:
dist = installed[query_name]
except KeyError:
continue
requires = sorted((req.name for req in dist.iter_dependencies()), key=str.lower)
required_by = sorted(_get_requiring_packages(dist), key=str.lower)
try:
entry_points_text = dist.read_text("entry_points.txt")
entry_points = entry_points_text.splitlines(keepends=False)
except FileNotFoundError:
entry_points = []
files_iter = dist.iter_declared_entries()
if files_iter is None:
files: Optional[List[str]] = None
else:
files = sorted(files_iter)
metadata = dist.metadata
yield _PackageInfo(
name=dist.raw_name,
version=str(dist.version),
location=dist.location or "",
editable_project_location=dist.editable_project_location,
requires=requires,
required_by=required_by,
installer=dist.installer,
metadata_version=dist.metadata_version or "",
classifiers=metadata.get_all("Classifier", []),
summary=metadata.get("Summary", ""),
homepage=metadata.get("Home-page", ""),
project_urls=metadata.get_all("Project-URL", []),
author=metadata.get("Author", ""),
author_email=metadata.get("Author-email", ""),
license=metadata.get("License", ""),
entry_points=entry_points,
files=files,
)
def print_results(
distributions: Iterable[_PackageInfo],
list_files: bool,
verbose: bool,
) -> bool:
"""
Print the information from installed distributions found.
"""
results_printed = False
for i, dist in enumerate(distributions):
results_printed = True
if i > 0:
write_output("---")
write_output("Name: %s", dist.name)
write_output("Version: %s", dist.version)
write_output("Summary: %s", dist.summary)
write_output("Home-page: %s", dist.homepage)
write_output("Author: %s", dist.author)
write_output("Author-email: %s", dist.author_email)
write_output("License: %s", dist.license)
write_output("Location: %s", dist.location)
if dist.editable_project_location is not None:
write_output(
"Editable project location: %s", dist.editable_project_location
)
write_output("Requires: %s", ", ".join(dist.requires))
write_output("Required-by: %s", ", ".join(dist.required_by))
if verbose:
write_output("Metadata-Version: %s", dist.metadata_version)
write_output("Installer: %s", dist.installer)
write_output("Classifiers:")
for classifier in dist.classifiers:
write_output(" %s", classifier)
write_output("Entry-points:")
for entry in dist.entry_points:
write_output(" %s", entry.strip())
write_output("Project-URLs:")
for project_url in dist.project_urls:
write_output(" %s", project_url)
if list_files:
write_output("Files:")
if dist.files is None:
write_output("Cannot locate RECORD or installed-files.txt")
else:
for line in dist.files:
write_output(" %s", line.strip())
return results_printed

View file

@ -0,0 +1,113 @@
import logging
from optparse import Values
from typing import List
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.cli import cmdoptions
from pip._internal.cli.base_command import Command
from pip._internal.cli.req_command import SessionCommandMixin, warn_if_run_as_root
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.exceptions import InstallationError
from pip._internal.req import parse_requirements
from pip._internal.req.constructors import (
install_req_from_line,
install_req_from_parsed_requirement,
)
from pip._internal.utils.misc import (
check_externally_managed,
protect_pip_from_modification_on_windows,
)
logger = logging.getLogger(__name__)
class UninstallCommand(Command, SessionCommandMixin):
"""
Uninstall packages.
pip is able to uninstall most installed packages. Known exceptions are:
- Pure distutils packages installed with ``python setup.py install``, which
leave behind no metadata to determine what files were installed.
- Script wrappers installed by ``python setup.py develop``.
"""
usage = """
%prog [options] <package> ...
%prog [options] -r <requirements file> ..."""
def add_options(self) -> None:
self.cmd_opts.add_option(
"-r",
"--requirement",
dest="requirements",
action="append",
default=[],
metavar="file",
help=(
"Uninstall all the packages listed in the given requirements "
"file. This option can be used multiple times."
),
)
self.cmd_opts.add_option(
"-y",
"--yes",
dest="yes",
action="store_true",
help="Don't ask for confirmation of uninstall deletions.",
)
self.cmd_opts.add_option(cmdoptions.root_user_action())
self.cmd_opts.add_option(cmdoptions.override_externally_managed())
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
session = self.get_default_session(options)
reqs_to_uninstall = {}
for name in args:
req = install_req_from_line(
name,
isolated=options.isolated_mode,
)
if req.name:
reqs_to_uninstall[canonicalize_name(req.name)] = req
else:
logger.warning(
"Invalid requirement: %r ignored -"
" the uninstall command expects named"
" requirements.",
name,
)
for filename in options.requirements:
for parsed_req in parse_requirements(
filename, options=options, session=session
):
req = install_req_from_parsed_requirement(
parsed_req, isolated=options.isolated_mode
)
if req.name:
reqs_to_uninstall[canonicalize_name(req.name)] = req
if not reqs_to_uninstall:
raise InstallationError(
f"You must give at least one requirement to {self.name} (see "
f'"pip help {self.name}")'
)
if not options.override_externally_managed:
check_externally_managed()
protect_pip_from_modification_on_windows(
modifying_pip="pip" in reqs_to_uninstall
)
for req in reqs_to_uninstall.values():
uninstall_pathset = req.uninstall(
auto_confirm=options.yes,
verbose=self.verbosity > 0,
)
if uninstall_pathset:
uninstall_pathset.commit()
if options.root_user_action == "warn":
warn_if_run_as_root()
return SUCCESS

View file

@ -0,0 +1,183 @@
import logging
import os
import shutil
from optparse import Values
from typing import List
from pip._internal.cache import WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.exceptions import CommandError
from pip._internal.operations.build.build_tracker import get_build_tracker
from pip._internal.req.req_install import (
InstallRequirement,
check_legacy_setup_py_options,
)
from pip._internal.utils.misc import ensure_dir, normalize_path
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.wheel_builder import build, should_build_for_wheel_command
logger = logging.getLogger(__name__)
class WheelCommand(RequirementCommand):
"""
Build Wheel archives for your requirements and dependencies.
Wheel is a built-package format, and offers the advantage of not
recompiling your software during every install. For more details, see the
wheel docs: https://wheel.readthedocs.io/en/latest/
'pip wheel' uses the build system interface as described here:
https://pip.pypa.io/en/stable/reference/build-system/
"""
usage = """
%prog [options] <requirement specifier> ...
%prog [options] -r <requirements file> ...
%prog [options] [-e] <vcs project url> ...
%prog [options] [-e] <local project path> ...
%prog [options] <archive url/path> ..."""
def add_options(self) -> None:
self.cmd_opts.add_option(
"-w",
"--wheel-dir",
dest="wheel_dir",
metavar="dir",
default=os.curdir,
help=(
"Build wheels into <dir>, where the default is the "
"current working directory."
),
)
self.cmd_opts.add_option(cmdoptions.no_binary())
self.cmd_opts.add_option(cmdoptions.only_binary())
self.cmd_opts.add_option(cmdoptions.prefer_binary())
self.cmd_opts.add_option(cmdoptions.no_build_isolation())
self.cmd_opts.add_option(cmdoptions.use_pep517())
self.cmd_opts.add_option(cmdoptions.no_use_pep517())
self.cmd_opts.add_option(cmdoptions.check_build_deps())
self.cmd_opts.add_option(cmdoptions.constraints())
self.cmd_opts.add_option(cmdoptions.editable())
self.cmd_opts.add_option(cmdoptions.requirements())
self.cmd_opts.add_option(cmdoptions.src())
self.cmd_opts.add_option(cmdoptions.ignore_requires_python())
self.cmd_opts.add_option(cmdoptions.no_deps())
self.cmd_opts.add_option(cmdoptions.progress_bar())
self.cmd_opts.add_option(
"--no-verify",
dest="no_verify",
action="store_true",
default=False,
help="Don't verify if built wheel is valid.",
)
self.cmd_opts.add_option(cmdoptions.config_settings())
self.cmd_opts.add_option(cmdoptions.build_options())
self.cmd_opts.add_option(cmdoptions.global_options())
self.cmd_opts.add_option(
"--pre",
action="store_true",
default=False,
help=(
"Include pre-release and development versions. By default, "
"pip only finds stable versions."
),
)
self.cmd_opts.add_option(cmdoptions.require_hashes())
index_opts = cmdoptions.make_option_group(
cmdoptions.index_group,
self.parser,
)
self.parser.insert_option_group(0, index_opts)
self.parser.insert_option_group(0, self.cmd_opts)
@with_cleanup
def run(self, options: Values, args: List[str]) -> int:
session = self.get_default_session(options)
finder = self._build_package_finder(options, session)
options.wheel_dir = normalize_path(options.wheel_dir)
ensure_dir(options.wheel_dir)
build_tracker = self.enter_context(get_build_tracker())
directory = TempDirectory(
delete=not options.no_clean,
kind="wheel",
globally_managed=True,
)
reqs = self.get_requirements(args, options, finder, session)
check_legacy_setup_py_options(options, reqs)
wheel_cache = WheelCache(options.cache_dir)
preparer = self.make_requirement_preparer(
temp_build_dir=directory,
options=options,
build_tracker=build_tracker,
session=session,
finder=finder,
download_dir=options.wheel_dir,
use_user_site=False,
verbosity=self.verbosity,
)
resolver = self.make_resolver(
preparer=preparer,
finder=finder,
options=options,
wheel_cache=wheel_cache,
ignore_requires_python=options.ignore_requires_python,
use_pep517=options.use_pep517,
)
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
reqs_to_build: List[InstallRequirement] = []
for req in requirement_set.requirements.values():
if req.is_wheel:
preparer.save_linked_requirement(req)
elif should_build_for_wheel_command(req):
reqs_to_build.append(req)
preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
requirement_set.warn_legacy_versions_and_specifiers()
# build wheels
build_successes, build_failures = build(
reqs_to_build,
wheel_cache=wheel_cache,
verify=(not options.no_verify),
build_options=options.build_options or [],
global_options=options.global_options or [],
)
for req in build_successes:
assert req.link and req.link.is_wheel
assert req.local_file_path
# copy from cache to target directory
try:
shutil.copy(req.local_file_path, options.wheel_dir)
except OSError as e:
logger.warning(
"Building wheel for %s failed: %s",
req.name,
e,
)
build_failures.append(req)
if len(build_failures) != 0:
raise CommandError("Failed to build one or more wheels")
return SUCCESS

View file

@ -0,0 +1,383 @@
"""Configuration management setup
Some terminology:
- name
As written in config files.
- value
Value associated with a name
- key
Name combined with it's section (section.name)
- variant
A single word describing where the configuration key-value pair came from
"""
import configparser
import locale
import os
import sys
from typing import Any, Dict, Iterable, List, NewType, Optional, Tuple
from pip._internal.exceptions import (
ConfigurationError,
ConfigurationFileCouldNotBeLoaded,
)
from pip._internal.utils import appdirs
from pip._internal.utils.compat import WINDOWS
from pip._internal.utils.logging import getLogger
from pip._internal.utils.misc import ensure_dir, enum
RawConfigParser = configparser.RawConfigParser # Shorthand
Kind = NewType("Kind", str)
CONFIG_BASENAME = "pip.ini" if WINDOWS else "pip.conf"
ENV_NAMES_IGNORED = "version", "help"
# The kinds of configurations there are.
kinds = enum(
USER="user", # User Specific
GLOBAL="global", # System Wide
SITE="site", # [Virtual] Environment Specific
ENV="env", # from PIP_CONFIG_FILE
ENV_VAR="env-var", # from Environment Variables
)
OVERRIDE_ORDER = kinds.GLOBAL, kinds.USER, kinds.SITE, kinds.ENV, kinds.ENV_VAR
VALID_LOAD_ONLY = kinds.USER, kinds.GLOBAL, kinds.SITE
logger = getLogger(__name__)
# NOTE: Maybe use the optionx attribute to normalize keynames.
def _normalize_name(name: str) -> str:
"""Make a name consistent regardless of source (environment or file)"""
name = name.lower().replace("_", "-")
if name.startswith("--"):
name = name[2:] # only prefer long opts
return name
def _disassemble_key(name: str) -> List[str]:
if "." not in name:
error_message = (
"Key does not contain dot separated section and key. "
f"Perhaps you wanted to use 'global.{name}' instead?"
)
raise ConfigurationError(error_message)
return name.split(".", 1)
def get_configuration_files() -> Dict[Kind, List[str]]:
global_config_files = [
os.path.join(path, CONFIG_BASENAME) for path in appdirs.site_config_dirs("pip")
]
site_config_file = os.path.join(sys.prefix, CONFIG_BASENAME)
legacy_config_file = os.path.join(
os.path.expanduser("~"),
"pip" if WINDOWS else ".pip",
CONFIG_BASENAME,
)
new_config_file = os.path.join(appdirs.user_config_dir("pip"), CONFIG_BASENAME)
return {
kinds.GLOBAL: global_config_files,
kinds.SITE: [site_config_file],
kinds.USER: [legacy_config_file, new_config_file],
}
class Configuration:
"""Handles management of configuration.
Provides an interface to accessing and managing configuration files.
This class converts provides an API that takes "section.key-name" style
keys and stores the value associated with it as "key-name" under the
section "section".
This allows for a clean interface wherein the both the section and the
key-name are preserved in an easy to manage form in the configuration files
and the data stored is also nice.
"""
def __init__(self, isolated: bool, load_only: Optional[Kind] = None) -> None:
super().__init__()
if load_only is not None and load_only not in VALID_LOAD_ONLY:
raise ConfigurationError(
"Got invalid value for load_only - should be one of {}".format(
", ".join(map(repr, VALID_LOAD_ONLY))
)
)
self.isolated = isolated
self.load_only = load_only
# Because we keep track of where we got the data from
self._parsers: Dict[Kind, List[Tuple[str, RawConfigParser]]] = {
variant: [] for variant in OVERRIDE_ORDER
}
self._config: Dict[Kind, Dict[str, Any]] = {
variant: {} for variant in OVERRIDE_ORDER
}
self._modified_parsers: List[Tuple[str, RawConfigParser]] = []
def load(self) -> None:
"""Loads configuration from configuration files and environment"""
self._load_config_files()
if not self.isolated:
self._load_environment_vars()
def get_file_to_edit(self) -> Optional[str]:
"""Returns the file with highest priority in configuration"""
assert self.load_only is not None, "Need to be specified a file to be editing"
try:
return self._get_parser_to_modify()[0]
except IndexError:
return None
def items(self) -> Iterable[Tuple[str, Any]]:
"""Returns key-value pairs like dict.items() representing the loaded
configuration
"""
return self._dictionary.items()
def get_value(self, key: str) -> Any:
"""Get a value from the configuration."""
orig_key = key
key = _normalize_name(key)
try:
return self._dictionary[key]
except KeyError:
# disassembling triggers a more useful error message than simply
# "No such key" in the case that the key isn't in the form command.option
_disassemble_key(key)
raise ConfigurationError(f"No such key - {orig_key}")
def set_value(self, key: str, value: Any) -> None:
"""Modify a value in the configuration."""
key = _normalize_name(key)
self._ensure_have_load_only()
assert self.load_only
fname, parser = self._get_parser_to_modify()
if parser is not None:
section, name = _disassemble_key(key)
# Modify the parser and the configuration
if not parser.has_section(section):
parser.add_section(section)
parser.set(section, name, value)
self._config[self.load_only][key] = value
self._mark_as_modified(fname, parser)
def unset_value(self, key: str) -> None:
"""Unset a value in the configuration."""
orig_key = key
key = _normalize_name(key)
self._ensure_have_load_only()
assert self.load_only
if key not in self._config[self.load_only]:
raise ConfigurationError(f"No such key - {orig_key}")
fname, parser = self._get_parser_to_modify()
if parser is not None:
section, name = _disassemble_key(key)
if not (
parser.has_section(section) and parser.remove_option(section, name)
):
# The option was not removed.
raise ConfigurationError(
"Fatal Internal error [id=1]. Please report as a bug."
)
# The section may be empty after the option was removed.
if not parser.items(section):
parser.remove_section(section)
self._mark_as_modified(fname, parser)
del self._config[self.load_only][key]
def save(self) -> None:
"""Save the current in-memory state."""
self._ensure_have_load_only()
for fname, parser in self._modified_parsers:
logger.info("Writing to %s", fname)
# Ensure directory exists.
ensure_dir(os.path.dirname(fname))
# Ensure directory's permission(need to be writeable)
try:
with open(fname, "w") as f:
parser.write(f)
except OSError as error:
raise ConfigurationError(
f"An error occurred while writing to the configuration file "
f"{fname}: {error}"
)
#
# Private routines
#
def _ensure_have_load_only(self) -> None:
if self.load_only is None:
raise ConfigurationError("Needed a specific file to be modifying.")
logger.debug("Will be working with %s variant only", self.load_only)
@property
def _dictionary(self) -> Dict[str, Any]:
"""A dictionary representing the loaded configuration."""
# NOTE: Dictionaries are not populated if not loaded. So, conditionals
# are not needed here.
retval = {}
for variant in OVERRIDE_ORDER:
retval.update(self._config[variant])
return retval
def _load_config_files(self) -> None:
"""Loads configuration from configuration files"""
config_files = dict(self.iter_config_files())
if config_files[kinds.ENV][0:1] == [os.devnull]:
logger.debug(
"Skipping loading configuration files due to "
"environment's PIP_CONFIG_FILE being os.devnull"
)
return
for variant, files in config_files.items():
for fname in files:
# If there's specific variant set in `load_only`, load only
# that variant, not the others.
if self.load_only is not None and variant != self.load_only:
logger.debug("Skipping file '%s' (variant: %s)", fname, variant)
continue
parser = self._load_file(variant, fname)
# Keeping track of the parsers used
self._parsers[variant].append((fname, parser))
def _load_file(self, variant: Kind, fname: str) -> RawConfigParser:
logger.verbose("For variant '%s', will try loading '%s'", variant, fname)
parser = self._construct_parser(fname)
for section in parser.sections():
items = parser.items(section)
self._config[variant].update(self._normalized_keys(section, items))
return parser
def _construct_parser(self, fname: str) -> RawConfigParser:
parser = configparser.RawConfigParser()
# If there is no such file, don't bother reading it but create the
# parser anyway, to hold the data.
# Doing this is useful when modifying and saving files, where we don't
# need to construct a parser.
if os.path.exists(fname):
locale_encoding = locale.getpreferredencoding(False)
try:
parser.read(fname, encoding=locale_encoding)
except UnicodeDecodeError:
# See https://github.com/pypa/pip/issues/4963
raise ConfigurationFileCouldNotBeLoaded(
reason=f"contains invalid {locale_encoding} characters",
fname=fname,
)
except configparser.Error as error:
# See https://github.com/pypa/pip/issues/4893
raise ConfigurationFileCouldNotBeLoaded(error=error)
return parser
def _load_environment_vars(self) -> None:
"""Loads configuration from environment variables"""
self._config[kinds.ENV_VAR].update(
self._normalized_keys(":env:", self.get_environ_vars())
)
def _normalized_keys(
self, section: str, items: Iterable[Tuple[str, Any]]
) -> Dict[str, Any]:
"""Normalizes items to construct a dictionary with normalized keys.
This routine is where the names become keys and are made the same
regardless of source - configuration files or environment.
"""
normalized = {}
for name, val in items:
key = section + "." + _normalize_name(name)
normalized[key] = val
return normalized
def get_environ_vars(self) -> Iterable[Tuple[str, str]]:
"""Returns a generator with all environmental vars with prefix PIP_"""
for key, val in os.environ.items():
if key.startswith("PIP_"):
name = key[4:].lower()
if name not in ENV_NAMES_IGNORED:
yield name, val
# XXX: This is patched in the tests.
def iter_config_files(self) -> Iterable[Tuple[Kind, List[str]]]:
"""Yields variant and configuration files associated with it.
This should be treated like items of a dictionary. The order
here doesn't affect what gets overridden. That is controlled
by OVERRIDE_ORDER. However this does control the order they are
displayed to the user. It's probably most ergononmic to display
things in the same order as OVERRIDE_ORDER
"""
# SMELL: Move the conditions out of this function
env_config_file = os.environ.get("PIP_CONFIG_FILE", None)
config_files = get_configuration_files()
yield kinds.GLOBAL, config_files[kinds.GLOBAL]
# per-user config is not loaded when env_config_file exists
should_load_user_config = not self.isolated and not (
env_config_file and os.path.exists(env_config_file)
)
if should_load_user_config:
# The legacy config file is overridden by the new config file
yield kinds.USER, config_files[kinds.USER]
# virtualenv config
yield kinds.SITE, config_files[kinds.SITE]
if env_config_file is not None:
yield kinds.ENV, [env_config_file]
else:
yield kinds.ENV, []
def get_values_in_config(self, variant: Kind) -> Dict[str, Any]:
"""Get values present in a config file"""
return self._config[variant]
def _get_parser_to_modify(self) -> Tuple[str, RawConfigParser]:
# Determine which parser to modify
assert self.load_only
parsers = self._parsers[self.load_only]
if not parsers:
# This should not happen if everything works correctly.
raise ConfigurationError(
"Fatal Internal error [id=2]. Please report as a bug."
)
# Use the highest priority parser.
return parsers[-1]
# XXX: This is patched in the tests.
def _mark_as_modified(self, fname: str, parser: RawConfigParser) -> None:
file_parser_tuple = (fname, parser)
if file_parser_tuple not in self._modified_parsers:
self._modified_parsers.append(file_parser_tuple)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._dictionary!r})"

View file

@ -0,0 +1,21 @@
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
"""Returns a Distribution for the given InstallRequirement"""
# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
return SourceDistribution(install_req)
# If it's a wheel, it's a WheelDistribution
if install_req.is_wheel:
return WheelDistribution(install_req)
# Otherwise, a SourceDistribution
return SourceDistribution(install_req)

View file

@ -0,0 +1,51 @@
import abc
from typing import Optional
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata.base import BaseDistribution
from pip._internal.req import InstallRequirement
class AbstractDistribution(metaclass=abc.ABCMeta):
"""A base class for handling installable artifacts.
The requirements for anything installable are as follows:
- we must be able to determine the requirement name
(or we can't correctly handle the non-upgrade case).
- for packages with setup requirements, we must also be able
to determine their requirements without installing additional
packages (for the same reason as run-time dependencies)
- we must be able to create a Distribution object exposing the
above metadata.
- if we need to do work in the build tracker, we must be able to generate a unique
string to identify the requirement in the build tracker.
"""
def __init__(self, req: InstallRequirement) -> None:
super().__init__()
self.req = req
@abc.abstractproperty
def build_tracker_id(self) -> Optional[str]:
"""A string that uniquely identifies this requirement to the build tracker.
If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
raise NotImplementedError()
@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
raise NotImplementedError()
@abc.abstractmethod
def prepare_distribution_metadata(
self,
finder: PackageFinder,
build_isolation: bool,
check_build_deps: bool,
) -> None:
raise NotImplementedError()

View file

@ -0,0 +1,29 @@
from typing import Optional
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
class InstalledDistribution(AbstractDistribution):
"""Represents an installed package.
This does not need any preparation as the required information has already
been computed.
"""
@property
def build_tracker_id(self) -> Optional[str]:
return None
def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.satisfied_by is not None, "not actually installed"
return self.req.satisfied_by
def prepare_distribution_metadata(
self,
finder: PackageFinder,
build_isolation: bool,
check_build_deps: bool,
) -> None:
pass

View file

@ -0,0 +1,156 @@
import logging
from typing import Iterable, Optional, Set, Tuple
from pip._internal.build_env import BuildEnvironment
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.exceptions import InstallationError
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
from pip._internal.utils.subprocess import runner_with_spinner_message
logger = logging.getLogger(__name__)
class SourceDistribution(AbstractDistribution):
"""Represents a source distribution.
The preparation step for these needs metadata for the packages to be
generated, either using PEP 517 or using the legacy `setup.py egg_info`.
"""
@property
def build_tracker_id(self) -> Optional[str]:
"""Identify this requirement uniquely by its link."""
assert self.req.link
return self.req.link.url_without_fragment
def get_metadata_distribution(self) -> BaseDistribution:
return self.req.get_dist()
def prepare_distribution_metadata(
self,
finder: PackageFinder,
build_isolation: bool,
check_build_deps: bool,
) -> None:
# Load pyproject.toml, to determine whether PEP 517 is to be used
self.req.load_pyproject_toml()
# Set up the build isolation, if this requirement should be isolated
should_isolate = self.req.use_pep517 and build_isolation
if should_isolate:
# Setup an isolated environment and install the build backend static
# requirements in it.
self._prepare_build_backend(finder)
# Check that if the requirement is editable, it either supports PEP 660 or
# has a setup.py or a setup.cfg. This cannot be done earlier because we need
# to setup the build backend to verify it supports build_editable, nor can
# it be done later, because we want to avoid installing build requirements
# needlessly. Doing it here also works around setuptools generating
# UNKNOWN.egg-info when running get_requires_for_build_wheel on a directory
# without setup.py nor setup.cfg.
self.req.isolated_editable_sanity_check()
# Install the dynamic build requirements.
self._install_build_reqs(finder)
# Check if the current environment provides build dependencies
should_check_deps = self.req.use_pep517 and check_build_deps
if should_check_deps:
pyproject_requires = self.req.pyproject_requires
assert pyproject_requires is not None
conflicting, missing = self.req.build_env.check_requirements(
pyproject_requires
)
if conflicting:
self._raise_conflicts("the backend dependencies", conflicting)
if missing:
self._raise_missing_reqs(missing)
self.req.prepare_metadata()
def _prepare_build_backend(self, finder: PackageFinder) -> None:
# Isolate in a BuildEnvironment and install the build-time
# requirements.
pyproject_requires = self.req.pyproject_requires
assert pyproject_requires is not None
self.req.build_env = BuildEnvironment()
self.req.build_env.install_requirements(
finder, pyproject_requires, "overlay", kind="build dependencies"
)
conflicting, missing = self.req.build_env.check_requirements(
self.req.requirements_to_check
)
if conflicting:
self._raise_conflicts("PEP 517/518 supported requirements", conflicting)
if missing:
logger.warning(
"Missing build requirements in pyproject.toml for %s.",
self.req,
)
logger.warning(
"The project does not specify a build backend, and "
"pip cannot fall back to setuptools without %s.",
" and ".join(map(repr, sorted(missing))),
)
def _get_build_requires_wheel(self) -> Iterable[str]:
with self.req.build_env:
runner = runner_with_spinner_message("Getting requirements to build wheel")
backend = self.req.pep517_backend
assert backend is not None
with backend.subprocess_runner(runner):
return backend.get_requires_for_build_wheel()
def _get_build_requires_editable(self) -> Iterable[str]:
with self.req.build_env:
runner = runner_with_spinner_message(
"Getting requirements to build editable"
)
backend = self.req.pep517_backend
assert backend is not None
with backend.subprocess_runner(runner):
return backend.get_requires_for_build_editable()
def _install_build_reqs(self, finder: PackageFinder) -> None:
# Install any extra build dependencies that the backend requests.
# This must be done in a second pass, as the pyproject.toml
# dependencies must be installed before we can call the backend.
if (
self.req.editable
and self.req.permit_editable_wheels
and self.req.supports_pyproject_editable()
):
build_reqs = self._get_build_requires_editable()
else:
build_reqs = self._get_build_requires_wheel()
conflicting, missing = self.req.build_env.check_requirements(build_reqs)
if conflicting:
self._raise_conflicts("the backend dependencies", conflicting)
self.req.build_env.install_requirements(
finder, missing, "normal", kind="backend dependencies"
)
def _raise_conflicts(
self, conflicting_with: str, conflicting_reqs: Set[Tuple[str, str]]
) -> None:
format_string = (
"Some build dependencies for {requirement} "
"conflict with {conflicting_with}: {description}."
)
error_message = format_string.format(
requirement=self.req,
conflicting_with=conflicting_with,
description=", ".join(
f"{installed} is incompatible with {wanted}"
for installed, wanted in sorted(conflicting_reqs)
),
)
raise InstallationError(error_message)
def _raise_missing_reqs(self, missing: Set[str]) -> None:
format_string = (
"Some build dependencies for {requirement} are missing: {missing}."
)
error_message = format_string.format(
requirement=self.req, missing=", ".join(map(repr, sorted(missing)))
)
raise InstallationError(error_message)

View file

@ -0,0 +1,40 @@
from typing import Optional
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import (
BaseDistribution,
FilesystemWheel,
get_wheel_distribution,
)
class WheelDistribution(AbstractDistribution):
"""Represents a wheel distribution.
This does not need any preparation as wheels can be directly unpacked.
"""
@property
def build_tracker_id(self) -> Optional[str]:
return None
def get_metadata_distribution(self) -> BaseDistribution:
"""Loads the metadata from the wheel file into memory and returns a
Distribution that uses it, not relying on the wheel file or
requirement.
"""
assert self.req.local_file_path, "Set as part of preparation during download"
assert self.req.name, "Wheels are never unnamed"
wheel = FilesystemWheel(self.req.local_file_path)
return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
def prepare_distribution_metadata(
self,
finder: PackageFinder,
build_isolation: bool,
check_build_deps: bool,
) -> None:
pass

View file

@ -0,0 +1,728 @@
"""Exceptions used throughout package.
This module MUST NOT try to import from anything within `pip._internal` to
operate. This is expected to be importable from any/all files within the
subpackage and, thus, should not depend on them.
"""
import configparser
import contextlib
import locale
import logging
import pathlib
import re
import sys
from itertools import chain, groupby, repeat
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union
from pip._vendor.requests.models import Request, Response
from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
from pip._vendor.rich.markup import escape
from pip._vendor.rich.text import Text
if TYPE_CHECKING:
from hashlib import _Hash
from typing import Literal
from pip._internal.metadata import BaseDistribution
from pip._internal.req.req_install import InstallRequirement
logger = logging.getLogger(__name__)
#
# Scaffolding
#
def _is_kebab_case(s: str) -> bool:
return re.match(r"^[a-z]+(-[a-z]+)*$", s) is not None
def _prefix_with_indent(
s: Union[Text, str],
console: Console,
*,
prefix: str,
indent: str,
) -> Text:
if isinstance(s, Text):
text = s
else:
text = console.render_str(s)
return console.render_str(prefix, overflow="ignore") + console.render_str(
f"\n{indent}", overflow="ignore"
).join(text.split(allow_blank=True))
class PipError(Exception):
"""The base pip error."""
class DiagnosticPipError(PipError):
"""An error, that presents diagnostic information to the user.
This contains a bunch of logic, to enable pretty presentation of our error
messages. Each error gets a unique reference. Each error can also include
additional context, a hint and/or a note -- which are presented with the
main error message in a consistent style.
This is adapted from the error output styling in `sphinx-theme-builder`.
"""
reference: str
def __init__(
self,
*,
kind: 'Literal["error", "warning"]' = "error",
reference: Optional[str] = None,
message: Union[str, Text],
context: Optional[Union[str, Text]],
hint_stmt: Optional[Union[str, Text]],
note_stmt: Optional[Union[str, Text]] = None,
link: Optional[str] = None,
) -> None:
# Ensure a proper reference is provided.
if reference is None:
assert hasattr(self, "reference"), "error reference not provided!"
reference = self.reference
assert _is_kebab_case(reference), "error reference must be kebab-case!"
self.kind = kind
self.reference = reference
self.message = message
self.context = context
self.note_stmt = note_stmt
self.hint_stmt = hint_stmt
self.link = link
super().__init__(f"<{self.__class__.__name__}: {self.reference}>")
def __repr__(self) -> str:
return (
f"<{self.__class__.__name__}("
f"reference={self.reference!r}, "
f"message={self.message!r}, "
f"context={self.context!r}, "
f"note_stmt={self.note_stmt!r}, "
f"hint_stmt={self.hint_stmt!r}"
")>"
)
def __rich_console__(
self,
console: Console,
options: ConsoleOptions,
) -> RenderResult:
colour = "red" if self.kind == "error" else "yellow"
yield f"[{colour} bold]{self.kind}[/]: [bold]{self.reference}[/]"
yield ""
if not options.ascii_only:
# Present the main message, with relevant context indented.
if self.context is not None:
yield _prefix_with_indent(
self.message,
console,
prefix=f"[{colour}]×[/] ",
indent=f"[{colour}]│[/] ",
)
yield _prefix_with_indent(
self.context,
console,
prefix=f"[{colour}]╰─>[/] ",
indent=f"[{colour}] [/] ",
)
else:
yield _prefix_with_indent(
self.message,
console,
prefix="[red]×[/] ",
indent=" ",
)
else:
yield self.message
if self.context is not None:
yield ""
yield self.context
if self.note_stmt is not None or self.hint_stmt is not None:
yield ""
if self.note_stmt is not None:
yield _prefix_with_indent(
self.note_stmt,
console,
prefix="[magenta bold]note[/]: ",
indent=" ",
)
if self.hint_stmt is not None:
yield _prefix_with_indent(
self.hint_stmt,
console,
prefix="[cyan bold]hint[/]: ",
indent=" ",
)
if self.link is not None:
yield ""
yield f"Link: {self.link}"
#
# Actual Errors
#
class ConfigurationError(PipError):
"""General exception in configuration"""
class InstallationError(PipError):
"""General exception during installation"""
class UninstallationError(PipError):
"""General exception during uninstallation"""
class MissingPyProjectBuildRequires(DiagnosticPipError):
"""Raised when pyproject.toml has `build-system`, but no `build-system.requires`."""
reference = "missing-pyproject-build-system-requires"
def __init__(self, *, package: str) -> None:
super().__init__(
message=f"Can not process {escape(package)}",
context=Text(
"This package has an invalid pyproject.toml file.\n"
"The [build-system] table is missing the mandatory `requires` key."
),
note_stmt="This is an issue with the package mentioned above, not pip.",
hint_stmt=Text("See PEP 518 for the detailed specification."),
)
class InvalidPyProjectBuildRequires(DiagnosticPipError):
"""Raised when pyproject.toml an invalid `build-system.requires`."""
reference = "invalid-pyproject-build-system-requires"
def __init__(self, *, package: str, reason: str) -> None:
super().__init__(
message=f"Can not process {escape(package)}",
context=Text(
"This package has an invalid `build-system.requires` key in "
f"pyproject.toml.\n{reason}"
),
note_stmt="This is an issue with the package mentioned above, not pip.",
hint_stmt=Text("See PEP 518 for the detailed specification."),
)
class NoneMetadataError(PipError):
"""Raised when accessing a Distribution's "METADATA" or "PKG-INFO".
This signifies an inconsistency, when the Distribution claims to have
the metadata file (if not, raise ``FileNotFoundError`` instead), but is
not actually able to produce its content. This may be due to permission
errors.
"""
def __init__(
self,
dist: "BaseDistribution",
metadata_name: str,
) -> None:
"""
:param dist: A Distribution object.
:param metadata_name: The name of the metadata being accessed
(can be "METADATA" or "PKG-INFO").
"""
self.dist = dist
self.metadata_name = metadata_name
def __str__(self) -> str:
# Use `dist` in the error message because its stringification
# includes more information, like the version and location.
return f"None {self.metadata_name} metadata found for distribution: {self.dist}"
class UserInstallationInvalid(InstallationError):
"""A --user install is requested on an environment without user site."""
def __str__(self) -> str:
return "User base directory is not specified"
class InvalidSchemeCombination(InstallationError):
def __str__(self) -> str:
before = ", ".join(str(a) for a in self.args[:-1])
return f"Cannot set {before} and {self.args[-1]} together"
class DistributionNotFound(InstallationError):
"""Raised when a distribution cannot be found to satisfy a requirement"""
class RequirementsFileParseError(InstallationError):
"""Raised when a general error occurs parsing a requirements file line."""
class BestVersionAlreadyInstalled(PipError):
"""Raised when the most up-to-date version of a package is already
installed."""
class BadCommand(PipError):
"""Raised when virtualenv or a command is not found"""
class CommandError(PipError):
"""Raised when there is an error in command-line arguments"""
class PreviousBuildDirError(PipError):
"""Raised when there's a previous conflicting build directory"""
class NetworkConnectionError(PipError):
"""HTTP connection error"""
def __init__(
self,
error_msg: str,
response: Optional[Response] = None,
request: Optional[Request] = None,
) -> None:
"""
Initialize NetworkConnectionError with `request` and `response`
objects.
"""
self.response = response
self.request = request
self.error_msg = error_msg
if (
self.response is not None
and not self.request
and hasattr(response, "request")
):
self.request = self.response.request
super().__init__(error_msg, response, request)
def __str__(self) -> str:
return str(self.error_msg)
class InvalidWheelFilename(InstallationError):
"""Invalid wheel filename."""
class UnsupportedWheel(InstallationError):
"""Unsupported wheel."""
class InvalidWheel(InstallationError):
"""Invalid (e.g. corrupt) wheel."""
def __init__(self, location: str, name: str):
self.location = location
self.name = name
def __str__(self) -> str:
return f"Wheel '{self.name}' located at {self.location} is invalid."
class MetadataInconsistent(InstallationError):
"""Built metadata contains inconsistent information.
This is raised when the metadata contains values (e.g. name and version)
that do not match the information previously obtained from sdist filename,
user-supplied ``#egg=`` value, or an install requirement name.
"""
def __init__(
self, ireq: "InstallRequirement", field: str, f_val: str, m_val: str
) -> None:
self.ireq = ireq
self.field = field
self.f_val = f_val
self.m_val = m_val
def __str__(self) -> str:
return (
f"Requested {self.ireq} has inconsistent {self.field}: "
f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
)
class InstallationSubprocessError(DiagnosticPipError, InstallationError):
"""A subprocess call failed."""
reference = "subprocess-exited-with-error"
def __init__(
self,
*,
command_description: str,
exit_code: int,
output_lines: Optional[List[str]],
) -> None:
if output_lines is None:
output_prompt = Text("See above for output.")
else:
output_prompt = (
Text.from_markup(f"[red][{len(output_lines)} lines of output][/]\n")
+ Text("".join(output_lines))
+ Text.from_markup(R"[red]\[end of output][/]")
)
super().__init__(
message=(
f"[green]{escape(command_description)}[/] did not run successfully.\n"
f"exit code: {exit_code}"
),
context=output_prompt,
hint_stmt=None,
note_stmt=(
"This error originates from a subprocess, and is likely not a "
"problem with pip."
),
)
self.command_description = command_description
self.exit_code = exit_code
def __str__(self) -> str:
return f"{self.command_description} exited with {self.exit_code}"
class MetadataGenerationFailed(InstallationSubprocessError, InstallationError):
reference = "metadata-generation-failed"
def __init__(
self,
*,
package_details: str,
) -> None:
super(InstallationSubprocessError, self).__init__(
message="Encountered error while generating package metadata.",
context=escape(package_details),
hint_stmt="See above for details.",
note_stmt="This is an issue with the package mentioned above, not pip.",
)
def __str__(self) -> str:
return "metadata generation failed"
class HashErrors(InstallationError):
"""Multiple HashError instances rolled into one for reporting"""
def __init__(self) -> None:
self.errors: List["HashError"] = []
def append(self, error: "HashError") -> None:
self.errors.append(error)
def __str__(self) -> str:
lines = []
self.errors.sort(key=lambda e: e.order)
for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__):
lines.append(cls.head)
lines.extend(e.body() for e in errors_of_cls)
if lines:
return "\n".join(lines)
return ""
def __bool__(self) -> bool:
return bool(self.errors)
class HashError(InstallationError):
"""
A failure to verify a package against known-good hashes
:cvar order: An int sorting hash exception classes by difficulty of
recovery (lower being harder), so the user doesn't bother fretting
about unpinned packages when he has deeper issues, like VCS
dependencies, to deal with. Also keeps error reports in a
deterministic order.
:cvar head: A section heading for display above potentially many
exceptions of this kind
:ivar req: The InstallRequirement that triggered this error. This is
pasted on after the exception is instantiated, because it's not
typically available earlier.
"""
req: Optional["InstallRequirement"] = None
head = ""
order: int = -1
def body(self) -> str:
"""Return a summary of me for display under the heading.
This default implementation simply prints a description of the
triggering requirement.
:param req: The InstallRequirement that provoked this error, with
its link already populated by the resolver's _populate_link().
"""
return f" {self._requirement_name()}"
def __str__(self) -> str:
return f"{self.head}\n{self.body()}"
def _requirement_name(self) -> str:
"""Return a description of the requirement that triggered me.
This default implementation returns long description of the req, with
line numbers
"""
return str(self.req) if self.req else "unknown package"
class VcsHashUnsupported(HashError):
"""A hash was provided for a version-control-system-based requirement, but
we don't have a method for hashing those."""
order = 0
head = (
"Can't verify hashes for these requirements because we don't "
"have a way to hash version control repositories:"
)
class DirectoryUrlHashUnsupported(HashError):
"""A hash was provided for a version-control-system-based requirement, but
we don't have a method for hashing those."""
order = 1
head = (
"Can't verify hashes for these file:// requirements because they "
"point to directories:"
)
class HashMissing(HashError):
"""A hash was needed for a requirement but is absent."""
order = 2
head = (
"Hashes are required in --require-hashes mode, but they are "
"missing from some requirements. Here is a list of those "
"requirements along with the hashes their downloaded archives "
"actually had. Add lines like these to your requirements files to "
"prevent tampering. (If you did not enable --require-hashes "
"manually, note that it turns on automatically when any package "
"has a hash.)"
)
def __init__(self, gotten_hash: str) -> None:
"""
:param gotten_hash: The hash of the (possibly malicious) archive we
just downloaded
"""
self.gotten_hash = gotten_hash
def body(self) -> str:
# Dodge circular import.
from pip._internal.utils.hashes import FAVORITE_HASH
package = None
if self.req:
# In the case of URL-based requirements, display the original URL
# seen in the requirements file rather than the package name,
# so the output can be directly copied into the requirements file.
package = (
self.req.original_link
if self.req.is_direct
# In case someone feeds something downright stupid
# to InstallRequirement's constructor.
else getattr(self.req, "req", None)
)
return " {} --hash={}:{}".format(
package or "unknown package", FAVORITE_HASH, self.gotten_hash
)
class HashUnpinned(HashError):
"""A requirement had a hash specified but was not pinned to a specific
version."""
order = 3
head = (
"In --require-hashes mode, all requirements must have their "
"versions pinned with ==. These do not:"
)
class HashMismatch(HashError):
"""
Distribution file hash values don't match.
:ivar package_name: The name of the package that triggered the hash
mismatch. Feel free to write to this after the exception is raise to
improve its error message.
"""
order = 4
head = (
"THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS "
"FILE. If you have updated the package versions, please update "
"the hashes. Otherwise, examine the package contents carefully; "
"someone may have tampered with them."
)
def __init__(self, allowed: Dict[str, List[str]], gots: Dict[str, "_Hash"]) -> None:
"""
:param allowed: A dict of algorithm names pointing to lists of allowed
hex digests
:param gots: A dict of algorithm names pointing to hashes we
actually got from the files under suspicion
"""
self.allowed = allowed
self.gots = gots
def body(self) -> str:
return f" {self._requirement_name()}:\n{self._hash_comparison()}"
def _hash_comparison(self) -> str:
"""
Return a comparison of actual and expected hash values.
Example::
Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde
or 123451234512345123451234512345123451234512345
Got bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef
"""
def hash_then_or(hash_name: str) -> "chain[str]":
# For now, all the decent hashes have 6-char names, so we can get
# away with hard-coding space literals.
return chain([hash_name], repeat(" or"))
lines: List[str] = []
for hash_name, expecteds in self.allowed.items():
prefix = hash_then_or(hash_name)
lines.extend((f" Expected {next(prefix)} {e}") for e in expecteds)
lines.append(
f" Got {self.gots[hash_name].hexdigest()}\n"
)
return "\n".join(lines)
class UnsupportedPythonVersion(InstallationError):
"""Unsupported python version according to Requires-Python package
metadata."""
class ConfigurationFileCouldNotBeLoaded(ConfigurationError):
"""When there are errors while loading a configuration file"""
def __init__(
self,
reason: str = "could not be loaded",
fname: Optional[str] = None,
error: Optional[configparser.Error] = None,
) -> None:
super().__init__(error)
self.reason = reason
self.fname = fname
self.error = error
def __str__(self) -> str:
if self.fname is not None:
message_part = f" in {self.fname}."
else:
assert self.error is not None
message_part = f".\n{self.error}\n"
return f"Configuration file {self.reason}{message_part}"
_DEFAULT_EXTERNALLY_MANAGED_ERROR = f"""\
The Python environment under {sys.prefix} is managed externally, and may not be
manipulated by the user. Please use specific tooling from the distributor of
the Python installation to interact with this environment instead.
"""
class ExternallyManagedEnvironment(DiagnosticPipError):
"""The current environment is externally managed.
This is raised when the current environment is externally managed, as
defined by `PEP 668`_. The ``EXTERNALLY-MANAGED`` configuration is checked
and displayed when the error is bubbled up to the user.
:param error: The error message read from ``EXTERNALLY-MANAGED``.
"""
reference = "externally-managed-environment"
def __init__(self, error: Optional[str]) -> None:
if error is None:
context = Text(_DEFAULT_EXTERNALLY_MANAGED_ERROR)
else:
context = Text(error)
super().__init__(
message="This environment is externally managed",
context=context,
note_stmt=(
"If you believe this is a mistake, please contact your "
"Python installation or OS distribution provider. "
"You can override this, at the risk of breaking your Python "
"installation or OS, by passing --break-system-packages."
),
hint_stmt=Text("See PEP 668 for the detailed specification."),
)
@staticmethod
def _iter_externally_managed_error_keys() -> Iterator[str]:
# LC_MESSAGES is in POSIX, but not the C standard. The most common
# platform that does not implement this category is Windows, where
# using other categories for console message localization is equally
# unreliable, so we fall back to the locale-less vendor message. This
# can always be re-evaluated when a vendor proposes a new alternative.
try:
category = locale.LC_MESSAGES
except AttributeError:
lang: Optional[str] = None
else:
lang, _ = locale.getlocale(category)
if lang is not None:
yield f"Error-{lang}"
for sep in ("-", "_"):
before, found, _ = lang.partition(sep)
if not found:
continue
yield f"Error-{before}"
yield "Error"
@classmethod
def from_config(
cls,
config: Union[pathlib.Path, str],
) -> "ExternallyManagedEnvironment":
parser = configparser.ConfigParser(interpolation=None)
try:
parser.read(config, encoding="utf-8")
section = parser["externally-managed"]
for key in cls._iter_externally_managed_error_keys():
with contextlib.suppress(KeyError):
return cls(section[key])
except KeyError:
pass
except (OSError, UnicodeDecodeError, configparser.ParsingError):
from pip._internal.utils._log import VERBOSE
exc_info = logger.isEnabledFor(VERBOSE)
logger.warning("Failed to read %s", config, exc_info=exc_info)
return cls(None)

View file

@ -0,0 +1,2 @@
"""Index interaction code
"""

View file

@ -0,0 +1,507 @@
"""
The main purpose of this module is to expose LinkCollector.collect_sources().
"""
import collections
import email.message
import functools
import itertools
import json
import logging
import os
import urllib.parse
import urllib.request
from html.parser import HTMLParser
from optparse import Values
from typing import (
TYPE_CHECKING,
Callable,
Dict,
Iterable,
List,
MutableMapping,
NamedTuple,
Optional,
Sequence,
Tuple,
Union,
)
from pip._vendor import requests
from pip._vendor.requests import Response
from pip._vendor.requests.exceptions import RetryError, SSLError
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.link import Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs
from .sources import CandidatesFromPage, LinkSource, build_source
if TYPE_CHECKING:
from typing import Protocol
else:
Protocol = object
logger = logging.getLogger(__name__)
ResponseHeaders = MutableMapping[str, str]
def _match_vcs_scheme(url: str) -> Optional[str]:
"""Look for VCS schemes in the URL.
Returns the matched VCS scheme, or None if there's no match.
"""
for scheme in vcs.schemes:
if url.lower().startswith(scheme) and url[len(scheme)] in "+:":
return scheme
return None
class _NotAPIContent(Exception):
def __init__(self, content_type: str, request_desc: str) -> None:
super().__init__(content_type, request_desc)
self.content_type = content_type
self.request_desc = request_desc
def _ensure_api_header(response: Response) -> None:
"""
Check the Content-Type header to ensure the response contains a Simple
API Response.
Raises `_NotAPIContent` if the content type is not a valid content-type.
"""
content_type = response.headers.get("Content-Type", "Unknown")
content_type_l = content_type.lower()
if content_type_l.startswith(
(
"text/html",
"application/vnd.pypi.simple.v1+html",
"application/vnd.pypi.simple.v1+json",
)
):
return
raise _NotAPIContent(content_type, response.request.method)
class _NotHTTP(Exception):
pass
def _ensure_api_response(url: str, session: PipSession) -> None:
"""
Send a HEAD request to the URL, and ensure the response contains a simple
API Response.
Raises `_NotHTTP` if the URL is not available for a HEAD request, or
`_NotAPIContent` if the content type is not a valid content type.
"""
scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
if scheme not in {"http", "https"}:
raise _NotHTTP()
resp = session.head(url, allow_redirects=True)
raise_for_status(resp)
_ensure_api_header(resp)
def _get_simple_response(url: str, session: PipSession) -> Response:
"""Access an Simple API response with GET, and return the response.
This consists of three parts:
1. If the URL looks suspiciously like an archive, send a HEAD first to
check the Content-Type is HTML or Simple API, to avoid downloading a
large file. Raise `_NotHTTP` if the content type cannot be determined, or
`_NotAPIContent` if it is not HTML or a Simple API.
2. Actually perform the request. Raise HTTP exceptions on network failures.
3. Check the Content-Type header to make sure we got a Simple API response,
and raise `_NotAPIContent` otherwise.
"""
if is_archive_file(Link(url).filename):
_ensure_api_response(url, session=session)
logger.debug("Getting page %s", redact_auth_from_url(url))
resp = session.get(
url,
headers={
"Accept": ", ".join(
[
"application/vnd.pypi.simple.v1+json",
"application/vnd.pypi.simple.v1+html; q=0.1",
"text/html; q=0.01",
]
),
# We don't want to blindly returned cached data for
# /simple/, because authors generally expecting that
# twine upload && pip install will function, but if
# they've done a pip install in the last ~10 minutes
# it won't. Thus by setting this to zero we will not
# blindly use any cached data, however the benefit of
# using max-age=0 instead of no-cache, is that we will
# still support conditional requests, so we will still
# minimize traffic sent in cases where the page hasn't
# changed at all, we will just always incur the round
# trip for the conditional GET now instead of only
# once per 10 minutes.
# For more information, please see pypa/pip#5670.
"Cache-Control": "max-age=0",
},
)
raise_for_status(resp)
# The check for archives above only works if the url ends with
# something that looks like an archive. However that is not a
# requirement of an url. Unless we issue a HEAD request on every
# url we cannot know ahead of time for sure if something is a
# Simple API response or not. However we can check after we've
# downloaded it.
_ensure_api_header(resp)
logger.debug(
"Fetched page %s as %s",
redact_auth_from_url(url),
resp.headers.get("Content-Type", "Unknown"),
)
return resp
def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
"""Determine if we have any encoding information in our headers."""
if headers and "Content-Type" in headers:
m = email.message.Message()
m["content-type"] = headers["Content-Type"]
charset = m.get_param("charset")
if charset:
return str(charset)
return None
class CacheablePageContent:
def __init__(self, page: "IndexContent") -> None:
assert page.cache_link_parsing
self.page = page
def __eq__(self, other: object) -> bool:
return isinstance(other, type(self)) and self.page.url == other.page.url
def __hash__(self) -> int:
return hash(self.page.url)
class ParseLinks(Protocol):
def __call__(self, page: "IndexContent") -> Iterable[Link]:
...
def with_cached_index_content(fn: ParseLinks) -> ParseLinks:
"""
Given a function that parses an Iterable[Link] from an IndexContent, cache the
function's result (keyed by CacheablePageContent), unless the IndexContent
`page` has `page.cache_link_parsing == False`.
"""
@functools.lru_cache(maxsize=None)
def wrapper(cacheable_page: CacheablePageContent) -> List[Link]:
return list(fn(cacheable_page.page))
@functools.wraps(fn)
def wrapper_wrapper(page: "IndexContent") -> List[Link]:
if page.cache_link_parsing:
return wrapper(CacheablePageContent(page))
return list(fn(page))
return wrapper_wrapper
@with_cached_index_content
def parse_links(page: "IndexContent") -> Iterable[Link]:
"""
Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
"""
content_type_l = page.content_type.lower()
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
data = json.loads(page.content)
for file in data.get("files", []):
link = Link.from_json(file, page.url)
if link is None:
continue
yield link
return
parser = HTMLLinkParser(page.url)
encoding = page.encoding or "utf-8"
parser.feed(page.content.decode(encoding))
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
class IndexContent:
"""Represents one response (or page), along with its URL"""
def __init__(
self,
content: bytes,
content_type: str,
encoding: Optional[str],
url: str,
cache_link_parsing: bool = True,
) -> None:
"""
:param encoding: the encoding to decode the given content.
:param url: the URL from which the HTML was downloaded.
:param cache_link_parsing: whether links parsed from this page's url
should be cached. PyPI index urls should
have this set to False, for example.
"""
self.content = content
self.content_type = content_type
self.encoding = encoding
self.url = url
self.cache_link_parsing = cache_link_parsing
def __str__(self) -> str:
return redact_auth_from_url(self.url)
class HTMLLinkParser(HTMLParser):
"""
HTMLParser that keeps the first base HREF and a list of all anchor
elements' attributes.
"""
def __init__(self, url: str) -> None:
super().__init__(convert_charrefs=True)
self.url: str = url
self.base_url: Optional[str] = None
self.anchors: List[Dict[str, Optional[str]]] = []
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
if tag == "base" and self.base_url is None:
href = self.get_href(attrs)
if href is not None:
self.base_url = href
elif tag == "a":
self.anchors.append(dict(attrs))
def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]:
for name, value in attrs:
if name == "href":
return value
return None
def _handle_get_simple_fail(
link: Link,
reason: Union[str, Exception],
meth: Optional[Callable[..., None]] = None,
) -> None:
if meth is None:
meth = logger.debug
meth("Could not fetch URL %s: %s - skipping", link, reason)
def _make_index_content(
response: Response, cache_link_parsing: bool = True
) -> IndexContent:
encoding = _get_encoding_from_headers(response.headers)
return IndexContent(
response.content,
response.headers["Content-Type"],
encoding=encoding,
url=response.url,
cache_link_parsing=cache_link_parsing,
)
def _get_index_content(link: Link, *, session: PipSession) -> Optional["IndexContent"]:
url = link.url.split("#", 1)[0]
# Check for VCS schemes that do not support lookup as web pages.
vcs_scheme = _match_vcs_scheme(url)
if vcs_scheme:
logger.warning(
"Cannot look at %s URL %s because it does not support lookup as web pages.",
vcs_scheme,
link,
)
return None
# Tack index.html onto file:// URLs that point to directories
scheme, _, path, _, _, _ = urllib.parse.urlparse(url)
if scheme == "file" and os.path.isdir(urllib.request.url2pathname(path)):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith("/"):
url += "/"
# TODO: In the future, it would be nice if pip supported PEP 691
# style responses in the file:// URLs, however there's no
# standard file extension for application/vnd.pypi.simple.v1+json
# so we'll need to come up with something on our own.
url = urllib.parse.urljoin(url, "index.html")
logger.debug(" file: URL is directory, getting %s", url)
try:
resp = _get_simple_response(url, session=session)
except _NotHTTP:
logger.warning(
"Skipping page %s because it looks like an archive, and cannot "
"be checked by a HTTP HEAD request.",
link,
)
except _NotAPIContent as exc:
logger.warning(
"Skipping page %s because the %s request got Content-Type: %s. "
"The only supported Content-Types are application/vnd.pypi.simple.v1+json, "
"application/vnd.pypi.simple.v1+html, and text/html",
link,
exc.request_desc,
exc.content_type,
)
except NetworkConnectionError as exc:
_handle_get_simple_fail(link, exc)
except RetryError as exc:
_handle_get_simple_fail(link, exc)
except SSLError as exc:
reason = "There was a problem confirming the ssl certificate: "
reason += str(exc)
_handle_get_simple_fail(link, reason, meth=logger.info)
except requests.ConnectionError as exc:
_handle_get_simple_fail(link, f"connection error: {exc}")
except requests.Timeout:
_handle_get_simple_fail(link, "timed out")
else:
return _make_index_content(resp, cache_link_parsing=link.cache_link_parsing)
return None
class CollectedSources(NamedTuple):
find_links: Sequence[Optional[LinkSource]]
index_urls: Sequence[Optional[LinkSource]]
class LinkCollector:
"""
Responsible for collecting Link objects from all configured locations,
making network requests as needed.
The class's main method is its collect_sources() method.
"""
def __init__(
self,
session: PipSession,
search_scope: SearchScope,
) -> None:
self.search_scope = search_scope
self.session = session
@classmethod
def create(
cls,
session: PipSession,
options: Values,
suppress_no_index: bool = False,
) -> "LinkCollector":
"""
:param session: The Session to use to make requests.
:param suppress_no_index: Whether to ignore the --no-index option
when constructing the SearchScope object.
"""
index_urls = [options.index_url] + options.extra_index_urls
if options.no_index and not suppress_no_index:
logger.debug(
"Ignoring indexes: %s",
",".join(redact_auth_from_url(url) for url in index_urls),
)
index_urls = []
# Make sure find_links is a list before passing to create().
find_links = options.find_links or []
search_scope = SearchScope.create(
find_links=find_links,
index_urls=index_urls,
no_index=options.no_index,
)
link_collector = LinkCollector(
session=session,
search_scope=search_scope,
)
return link_collector
@property
def find_links(self) -> List[str]:
return self.search_scope.find_links
def fetch_response(self, location: Link) -> Optional[IndexContent]:
"""
Fetch an HTML page containing package links.
"""
return _get_index_content(location, session=self.session)
def collect_sources(
self,
project_name: str,
candidates_from_page: CandidatesFromPage,
) -> CollectedSources:
# The OrderedDict calls deduplicate sources by URL.
index_url_sources = collections.OrderedDict(
build_source(
loc,
candidates_from_page=candidates_from_page,
page_validator=self.session.is_secure_origin,
expand_dir=False,
cache_link_parsing=False,
project_name=project_name,
)
for loc in self.search_scope.get_index_urls_locations(project_name)
).values()
find_links_sources = collections.OrderedDict(
build_source(
loc,
candidates_from_page=candidates_from_page,
page_validator=self.session.is_secure_origin,
expand_dir=True,
cache_link_parsing=True,
project_name=project_name,
)
for loc in self.find_links
).values()
if logger.isEnabledFor(logging.DEBUG):
lines = [
f"* {s.link}"
for s in itertools.chain(find_links_sources, index_url_sources)
if s is not None and s.link is not None
]
lines = [
f"{len(lines)} location(s) to search "
f"for versions of {project_name}:"
] + lines
logger.debug("\n".join(lines))
return CollectedSources(
find_links=list(find_links_sources),
index_urls=list(index_url_sources),
)

View file

@ -0,0 +1,285 @@
import logging
import mimetypes
import os
from collections import defaultdict
from typing import Callable, Dict, Iterable, List, Optional, Tuple
from pip._vendor.packaging.utils import (
InvalidSdistFilename,
InvalidVersion,
InvalidWheelFilename,
canonicalize_name,
parse_sdist_filename,
parse_wheel_filename,
)
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._internal.vcs import is_url
logger = logging.getLogger(__name__)
FoundCandidates = Iterable[InstallationCandidate]
FoundLinks = Iterable[Link]
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
PageValidator = Callable[[Link], bool]
class LinkSource:
@property
def link(self) -> Optional[Link]:
"""Returns the underlying link, if there's one."""
raise NotImplementedError()
def page_candidates(self) -> FoundCandidates:
"""Candidates found by parsing an archive listing HTML file."""
raise NotImplementedError()
def file_links(self) -> FoundLinks:
"""Links found by specifying archives directly."""
raise NotImplementedError()
def _is_html_file(file_url: str) -> bool:
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
class _FlatDirectoryToUrls:
"""Scans directory and caches results"""
def __init__(self, path: str) -> None:
self._path = path
self._page_candidates: List[str] = []
self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
self._scanned_directory = False
def _scan_directory(self) -> None:
"""Scans directory once and populates both page_candidates
and project_name_to_urls at the same time
"""
for entry in os.scandir(self._path):
url = path_to_url(entry.path)
if _is_html_file(url):
self._page_candidates.append(url)
continue
# File must have a valid wheel or sdist name,
# otherwise not worth considering as a package
try:
project_filename = parse_wheel_filename(entry.name)[0]
except (InvalidWheelFilename, InvalidVersion):
try:
project_filename = parse_sdist_filename(entry.name)[0]
except (InvalidSdistFilename, InvalidVersion):
continue
self._project_name_to_urls[project_filename].append(url)
self._scanned_directory = True
@property
def page_candidates(self) -> List[str]:
if not self._scanned_directory:
self._scan_directory()
return self._page_candidates
@property
def project_name_to_urls(self) -> Dict[str, List[str]]:
if not self._scanned_directory:
self._scan_directory()
return self._project_name_to_urls
class _FlatDirectorySource(LinkSource):
"""Link source specified by ``--find-links=<path-to-dir>``.
This looks the content of the directory, and returns:
* ``page_candidates``: Links listed on each HTML file in the directory.
* ``file_candidates``: Archives in the directory.
"""
_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
def __init__(
self,
candidates_from_page: CandidatesFromPage,
path: str,
project_name: str,
) -> None:
self._candidates_from_page = candidates_from_page
self._project_name = canonicalize_name(project_name)
# Get existing instance of _FlatDirectoryToUrls if it exists
if path in self._paths_to_urls:
self._path_to_urls = self._paths_to_urls[path]
else:
self._path_to_urls = _FlatDirectoryToUrls(path=path)
self._paths_to_urls[path] = self._path_to_urls
@property
def link(self) -> Optional[Link]:
return None
def page_candidates(self) -> FoundCandidates:
for url in self._path_to_urls.page_candidates:
yield from self._candidates_from_page(Link(url))
def file_links(self) -> FoundLinks:
for url in self._path_to_urls.project_name_to_urls[self._project_name]:
yield Link(url)
class _LocalFileSource(LinkSource):
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
the option, it is converted to a URL first. This returns:
* ``page_candidates``: Links listed on an HTML file.
* ``file_candidates``: The non-HTML file.
"""
def __init__(
self,
candidates_from_page: CandidatesFromPage,
link: Link,
) -> None:
self._candidates_from_page = candidates_from_page
self._link = link
@property
def link(self) -> Optional[Link]:
return self._link
def page_candidates(self) -> FoundCandidates:
if not _is_html_file(self._link.url):
return
yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks:
if _is_html_file(self._link.url):
return
yield self._link
class _RemoteFileSource(LinkSource):
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
This returns:
* ``page_candidates``: Links listed on an HTML file.
* ``file_candidates``: The non-HTML file.
"""
def __init__(
self,
candidates_from_page: CandidatesFromPage,
page_validator: PageValidator,
link: Link,
) -> None:
self._candidates_from_page = candidates_from_page
self._page_validator = page_validator
self._link = link
@property
def link(self) -> Optional[Link]:
return self._link
def page_candidates(self) -> FoundCandidates:
if not self._page_validator(self._link):
return
yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks:
yield self._link
class _IndexDirectorySource(LinkSource):
"""``--[extra-]index-url=<path-to-directory>``.
This is treated like a remote URL; ``candidates_from_page`` contains logic
for this by appending ``index.html`` to the link.
"""
def __init__(
self,
candidates_from_page: CandidatesFromPage,
link: Link,
) -> None:
self._candidates_from_page = candidates_from_page
self._link = link
@property
def link(self) -> Optional[Link]:
return self._link
def page_candidates(self) -> FoundCandidates:
yield from self._candidates_from_page(self._link)
def file_links(self) -> FoundLinks:
return ()
def build_source(
location: str,
*,
candidates_from_page: CandidatesFromPage,
page_validator: PageValidator,
expand_dir: bool,
cache_link_parsing: bool,
project_name: str,
) -> Tuple[Optional[str], Optional[LinkSource]]:
path: Optional[str] = None
url: Optional[str] = None
if os.path.exists(location): # Is a local path.
url = path_to_url(location)
path = location
elif location.startswith("file:"): # A file: URL.
url = location
path = url_to_path(location)
elif is_url(location):
url = location
if url is None:
msg = (
"Location '%s' is ignored: "
"it is either a non-existing path or lacks a specific scheme."
)
logger.warning(msg, location)
return (None, None)
if path is None:
source: LinkSource = _RemoteFileSource(
candidates_from_page=candidates_from_page,
page_validator=page_validator,
link=Link(url, cache_link_parsing=cache_link_parsing),
)
return (url, source)
if os.path.isdir(path):
if expand_dir:
source = _FlatDirectorySource(
candidates_from_page=candidates_from_page,
path=path,
project_name=project_name,
)
else:
source = _IndexDirectorySource(
candidates_from_page=candidates_from_page,
link=Link(url, cache_link_parsing=cache_link_parsing),
)
return (url, source)
elif os.path.isfile(path):
source = _LocalFileSource(
candidates_from_page=candidates_from_page,
link=Link(url, cache_link_parsing=cache_link_parsing),
)
return (url, source)
logger.warning(
"Location '%s' is ignored: it is neither a file nor a directory.",
location,
)
return (url, None)

View file

@ -0,0 +1,467 @@
import functools
import logging
import os
import pathlib
import sys
import sysconfig
from typing import Any, Dict, Generator, Optional, Tuple
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
from pip._internal.utils.compat import WINDOWS
from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.virtualenv import running_under_virtualenv
from . import _sysconfig
from .base import (
USER_CACHE_DIR,
get_major_minor_version,
get_src_prefix,
is_osx_framework,
site_packages,
user_site,
)
__all__ = [
"USER_CACHE_DIR",
"get_bin_prefix",
"get_bin_user",
"get_major_minor_version",
"get_platlib",
"get_purelib",
"get_scheme",
"get_src_prefix",
"site_packages",
"user_site",
]
logger = logging.getLogger(__name__)
_PLATLIBDIR: str = getattr(sys, "platlibdir", "lib")
_USE_SYSCONFIG_DEFAULT = sys.version_info >= (3, 10)
def _should_use_sysconfig() -> bool:
"""This function determines the value of _USE_SYSCONFIG.
By default, pip uses sysconfig on Python 3.10+.
But Python distributors can override this decision by setting:
sysconfig._PIP_USE_SYSCONFIG = True / False
Rationale in https://github.com/pypa/pip/issues/10647
This is a function for testability, but should be constant during any one
run.
"""
return bool(getattr(sysconfig, "_PIP_USE_SYSCONFIG", _USE_SYSCONFIG_DEFAULT))
_USE_SYSCONFIG = _should_use_sysconfig()
if not _USE_SYSCONFIG:
# Import distutils lazily to avoid deprecation warnings,
# but import it soon enough that it is in memory and available during
# a pip reinstall.
from . import _distutils
# Be noisy about incompatibilities if this platforms "should" be using
# sysconfig, but is explicitly opting out and using distutils instead.
if _USE_SYSCONFIG_DEFAULT and not _USE_SYSCONFIG:
_MISMATCH_LEVEL = logging.WARNING
else:
_MISMATCH_LEVEL = logging.DEBUG
def _looks_like_bpo_44860() -> bool:
"""The resolution to bpo-44860 will change this incorrect platlib.
See <https://bugs.python.org/issue44860>.
"""
from distutils.command.install import INSTALL_SCHEMES
try:
unix_user_platlib = INSTALL_SCHEMES["unix_user"]["platlib"]
except KeyError:
return False
return unix_user_platlib == "$usersite"
def _looks_like_red_hat_patched_platlib_purelib(scheme: Dict[str, str]) -> bool:
platlib = scheme["platlib"]
if "/$platlibdir/" in platlib:
platlib = platlib.replace("/$platlibdir/", f"/{_PLATLIBDIR}/")
if "/lib64/" not in platlib:
return False
unpatched = platlib.replace("/lib64/", "/lib/")
return unpatched.replace("$platbase/", "$base/") == scheme["purelib"]
@functools.lru_cache(maxsize=None)
def _looks_like_red_hat_lib() -> bool:
"""Red Hat patches platlib in unix_prefix and unix_home, but not purelib.
This is the only way I can see to tell a Red Hat-patched Python.
"""
from distutils.command.install import INSTALL_SCHEMES
return all(
k in INSTALL_SCHEMES
and _looks_like_red_hat_patched_platlib_purelib(INSTALL_SCHEMES[k])
for k in ("unix_prefix", "unix_home")
)
@functools.lru_cache(maxsize=None)
def _looks_like_debian_scheme() -> bool:
"""Debian adds two additional schemes."""
from distutils.command.install import INSTALL_SCHEMES
return "deb_system" in INSTALL_SCHEMES and "unix_local" in INSTALL_SCHEMES
@functools.lru_cache(maxsize=None)
def _looks_like_red_hat_scheme() -> bool:
"""Red Hat patches ``sys.prefix`` and ``sys.exec_prefix``.
Red Hat's ``00251-change-user-install-location.patch`` changes the install
command's ``prefix`` and ``exec_prefix`` to append ``"/local"``. This is
(fortunately?) done quite unconditionally, so we create a default command
object without any configuration to detect this.
"""
from distutils.command.install import install
from distutils.dist import Distribution
cmd: Any = install(Distribution())
cmd.finalize_options()
return (
cmd.exec_prefix == f"{os.path.normpath(sys.exec_prefix)}/local"
and cmd.prefix == f"{os.path.normpath(sys.prefix)}/local"
)
@functools.lru_cache(maxsize=None)
def _looks_like_slackware_scheme() -> bool:
"""Slackware patches sysconfig but fails to patch distutils and site.
Slackware changes sysconfig's user scheme to use ``"lib64"`` for the lib
path, but does not do the same to the site module.
"""
if user_site is None: # User-site not available.
return False
try:
paths = sysconfig.get_paths(scheme="posix_user", expand=False)
except KeyError: # User-site not available.
return False
return "/lib64/" in paths["purelib"] and "/lib64/" not in user_site
@functools.lru_cache(maxsize=None)
def _looks_like_msys2_mingw_scheme() -> bool:
"""MSYS2 patches distutils and sysconfig to use a UNIX-like scheme.
However, MSYS2 incorrectly patches sysconfig ``nt`` scheme. The fix is
likely going to be included in their 3.10 release, so we ignore the warning.
See msys2/MINGW-packages#9319.
MSYS2 MINGW's patch uses lowercase ``"lib"`` instead of the usual uppercase,
and is missing the final ``"site-packages"``.
"""
paths = sysconfig.get_paths("nt", expand=False)
return all(
"Lib" not in p and "lib" in p and not p.endswith("site-packages")
for p in (paths[key] for key in ("platlib", "purelib"))
)
def _fix_abiflags(parts: Tuple[str]) -> Generator[str, None, None]:
ldversion = sysconfig.get_config_var("LDVERSION")
abiflags = getattr(sys, "abiflags", None)
# LDVERSION does not end with sys.abiflags. Just return the path unchanged.
if not ldversion or not abiflags or not ldversion.endswith(abiflags):
yield from parts
return
# Strip sys.abiflags from LDVERSION-based path components.
for part in parts:
if part.endswith(ldversion):
part = part[: (0 - len(abiflags))]
yield part
@functools.lru_cache(maxsize=None)
def _warn_mismatched(old: pathlib.Path, new: pathlib.Path, *, key: str) -> None:
issue_url = "https://github.com/pypa/pip/issues/10151"
message = (
"Value for %s does not match. Please report this to <%s>"
"\ndistutils: %s"
"\nsysconfig: %s"
)
logger.log(_MISMATCH_LEVEL, message, key, issue_url, old, new)
def _warn_if_mismatch(old: pathlib.Path, new: pathlib.Path, *, key: str) -> bool:
if old == new:
return False
_warn_mismatched(old, new, key=key)
return True
@functools.lru_cache(maxsize=None)
def _log_context(
*,
user: bool = False,
home: Optional[str] = None,
root: Optional[str] = None,
prefix: Optional[str] = None,
) -> None:
parts = [
"Additional context:",
"user = %r",
"home = %r",
"root = %r",
"prefix = %r",
]
logger.log(_MISMATCH_LEVEL, "\n".join(parts), user, home, root, prefix)
def get_scheme(
dist_name: str,
user: bool = False,
home: Optional[str] = None,
root: Optional[str] = None,
isolated: bool = False,
prefix: Optional[str] = None,
) -> Scheme:
new = _sysconfig.get_scheme(
dist_name,
user=user,
home=home,
root=root,
isolated=isolated,
prefix=prefix,
)
if _USE_SYSCONFIG:
return new
old = _distutils.get_scheme(
dist_name,
user=user,
home=home,
root=root,
isolated=isolated,
prefix=prefix,
)
warning_contexts = []
for k in SCHEME_KEYS:
old_v = pathlib.Path(getattr(old, k))
new_v = pathlib.Path(getattr(new, k))
if old_v == new_v:
continue
# distutils incorrectly put PyPy packages under ``site-packages/python``
# in the ``posix_home`` scheme, but PyPy devs said they expect the
# directory name to be ``pypy`` instead. So we treat this as a bug fix
# and not warn about it. See bpo-43307 and python/cpython#24628.
skip_pypy_special_case = (
sys.implementation.name == "pypy"
and home is not None
and k in ("platlib", "purelib")
and old_v.parent == new_v.parent
and old_v.name.startswith("python")
and new_v.name.startswith("pypy")
)
if skip_pypy_special_case:
continue
# sysconfig's ``osx_framework_user`` does not include ``pythonX.Y`` in
# the ``include`` value, but distutils's ``headers`` does. We'll let
# CPython decide whether this is a bug or feature. See bpo-43948.
skip_osx_framework_user_special_case = (
user
and is_osx_framework()
and k == "headers"
and old_v.parent.parent == new_v.parent
and old_v.parent.name.startswith("python")
)
if skip_osx_framework_user_special_case:
continue
# On Red Hat and derived Linux distributions, distutils is patched to
# use "lib64" instead of "lib" for platlib.
if k == "platlib" and _looks_like_red_hat_lib():
continue
# On Python 3.9+, sysconfig's posix_user scheme sets platlib against
# sys.platlibdir, but distutils's unix_user incorrectly coninutes
# using the same $usersite for both platlib and purelib. This creates a
# mismatch when sys.platlibdir is not "lib".
skip_bpo_44860 = (
user
and k == "platlib"
and not WINDOWS
and sys.version_info >= (3, 9)
and _PLATLIBDIR != "lib"
and _looks_like_bpo_44860()
)
if skip_bpo_44860:
continue
# Slackware incorrectly patches posix_user to use lib64 instead of lib,
# but not usersite to match the location.
skip_slackware_user_scheme = (
user
and k in ("platlib", "purelib")
and not WINDOWS
and _looks_like_slackware_scheme()
)
if skip_slackware_user_scheme:
continue
# Both Debian and Red Hat patch Python to place the system site under
# /usr/local instead of /usr. Debian also places lib in dist-packages
# instead of site-packages, but the /usr/local check should cover it.
skip_linux_system_special_case = (
not (user or home or prefix or running_under_virtualenv())
and old_v.parts[1:3] == ("usr", "local")
and len(new_v.parts) > 1
and new_v.parts[1] == "usr"
and (len(new_v.parts) < 3 or new_v.parts[2] != "local")
and (_looks_like_red_hat_scheme() or _looks_like_debian_scheme())
)
if skip_linux_system_special_case:
continue
# On Python 3.7 and earlier, sysconfig does not include sys.abiflags in
# the "pythonX.Y" part of the path, but distutils does.
skip_sysconfig_abiflag_bug = (
sys.version_info < (3, 8)
and not WINDOWS
and k in ("headers", "platlib", "purelib")
and tuple(_fix_abiflags(old_v.parts)) == new_v.parts
)
if skip_sysconfig_abiflag_bug:
continue
# MSYS2 MINGW's sysconfig patch does not include the "site-packages"
# part of the path. This is incorrect and will be fixed in MSYS.
skip_msys2_mingw_bug = (
WINDOWS and k in ("platlib", "purelib") and _looks_like_msys2_mingw_scheme()
)
if skip_msys2_mingw_bug:
continue
# CPython's POSIX install script invokes pip (via ensurepip) against the
# interpreter located in the source tree, not the install site. This
# triggers special logic in sysconfig that's not present in distutils.
# https://github.com/python/cpython/blob/8c21941ddaf/Lib/sysconfig.py#L178-L194
skip_cpython_build = (
sysconfig.is_python_build(check_home=True)
and not WINDOWS
and k in ("headers", "include", "platinclude")
)
if skip_cpython_build:
continue
warning_contexts.append((old_v, new_v, f"scheme.{k}"))
if not warning_contexts:
return old
# Check if this path mismatch is caused by distutils config files. Those
# files will no longer work once we switch to sysconfig, so this raises a
# deprecation message for them.
default_old = _distutils.distutils_scheme(
dist_name,
user,
home,
root,
isolated,
prefix,
ignore_config_files=True,
)
if any(default_old[k] != getattr(old, k) for k in SCHEME_KEYS):
deprecated(
reason=(
"Configuring installation scheme with distutils config files "
"is deprecated and will no longer work in the near future. If you "
"are using a Homebrew or Linuxbrew Python, please see discussion "
"at https://github.com/Homebrew/homebrew-core/issues/76621"
),
replacement=None,
gone_in=None,
)
return old
# Post warnings about this mismatch so user can report them back.
for old_v, new_v, key in warning_contexts:
_warn_mismatched(old_v, new_v, key=key)
_log_context(user=user, home=home, root=root, prefix=prefix)
return old
def get_bin_prefix() -> str:
new = _sysconfig.get_bin_prefix()
if _USE_SYSCONFIG:
return new
old = _distutils.get_bin_prefix()
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="bin_prefix"):
_log_context()
return old
def get_bin_user() -> str:
return _sysconfig.get_scheme("", user=True).scripts
def _looks_like_deb_system_dist_packages(value: str) -> bool:
"""Check if the value is Debian's APT-controlled dist-packages.
Debian's ``distutils.sysconfig.get_python_lib()`` implementation returns the
default package path controlled by APT, but does not patch ``sysconfig`` to
do the same. This is similar to the bug worked around in ``get_scheme()``,
but here the default is ``deb_system`` instead of ``unix_local``. Ultimately
we can't do anything about this Debian bug, and this detection allows us to
skip the warning when needed.
"""
if not _looks_like_debian_scheme():
return False
if value == "/usr/lib/python3/dist-packages":
return True
return False
def get_purelib() -> str:
"""Return the default pure-Python lib location."""
new = _sysconfig.get_purelib()
if _USE_SYSCONFIG:
return new
old = _distutils.get_purelib()
if _looks_like_deb_system_dist_packages(old):
return old
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="purelib"):
_log_context()
return old
def get_platlib() -> str:
"""Return the default platform-shared lib location."""
new = _sysconfig.get_platlib()
if _USE_SYSCONFIG:
return new
from . import _distutils
old = _distutils.get_platlib()
if _looks_like_deb_system_dist_packages(old):
return old
if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="platlib"):
_log_context()
return old

View file

@ -0,0 +1,172 @@
"""Locations where we look for configs, install stuff, etc"""
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
# If pip's going to use distutils, it should not be using the copy that setuptools
# might have injected into the environment. This is done by removing the injected
# shim, if it's injected.
#
# See https://github.com/pypa/pip/issues/8761 for the original discussion and
# rationale for why this is done within pip.
try:
__import__("_distutils_hack").remove_shim()
except (ImportError, AttributeError):
pass
import logging
import os
import sys
from distutils.cmd import Command as DistutilsCommand
from distutils.command.install import SCHEME_KEYS
from distutils.command.install import install as distutils_install_command
from distutils.sysconfig import get_python_lib
from typing import Dict, List, Optional, Union, cast
from pip._internal.models.scheme import Scheme
from pip._internal.utils.compat import WINDOWS
from pip._internal.utils.virtualenv import running_under_virtualenv
from .base import get_major_minor_version
logger = logging.getLogger(__name__)
def distutils_scheme(
dist_name: str,
user: bool = False,
home: Optional[str] = None,
root: Optional[str] = None,
isolated: bool = False,
prefix: Optional[str] = None,
*,
ignore_config_files: bool = False,
) -> Dict[str, str]:
"""
Return a distutils install scheme
"""
from distutils.dist import Distribution
dist_args: Dict[str, Union[str, List[str]]] = {"name": dist_name}
if isolated:
dist_args["script_args"] = ["--no-user-cfg"]
d = Distribution(dist_args)
if not ignore_config_files:
try:
d.parse_config_files()
except UnicodeDecodeError:
paths = d.find_config_files()
logger.warning(
"Ignore distutils configs in %s due to encoding errors.",
", ".join(os.path.basename(p) for p in paths),
)
obj: Optional[DistutilsCommand] = None
obj = d.get_command_obj("install", create=True)
assert obj is not None
i = cast(distutils_install_command, obj)
# NOTE: setting user or home has the side-effect of creating the home dir
# or user base for installations during finalize_options()
# ideally, we'd prefer a scheme class that has no side-effects.
assert not (user and prefix), f"user={user} prefix={prefix}"
assert not (home and prefix), f"home={home} prefix={prefix}"
i.user = user or i.user
if user or home:
i.prefix = ""
i.prefix = prefix or i.prefix
i.home = home or i.home
i.root = root or i.root
i.finalize_options()
scheme = {}
for key in SCHEME_KEYS:
scheme[key] = getattr(i, "install_" + key)
# install_lib specified in setup.cfg should install *everything*
# into there (i.e. it takes precedence over both purelib and
# platlib). Note, i.install_lib is *always* set after
# finalize_options(); we only want to override here if the user
# has explicitly requested it hence going back to the config
if "install_lib" in d.get_option_dict("install"):
scheme.update({"purelib": i.install_lib, "platlib": i.install_lib})
if running_under_virtualenv():
if home:
prefix = home
elif user:
prefix = i.install_userbase
else:
prefix = i.prefix
scheme["headers"] = os.path.join(
prefix,
"include",
"site",
f"python{get_major_minor_version()}",
dist_name,
)
if root is not None:
path_no_drive = os.path.splitdrive(os.path.abspath(scheme["headers"]))[1]
scheme["headers"] = os.path.join(root, path_no_drive[1:])
return scheme
def get_scheme(
dist_name: str,
user: bool = False,
home: Optional[str] = None,
root: Optional[str] = None,
isolated: bool = False,
prefix: Optional[str] = None,
) -> Scheme:
"""
Get the "scheme" corresponding to the input parameters. The distutils
documentation provides the context for the available schemes:
https://docs.python.org/3/install/index.html#alternate-installation
:param dist_name: the name of the package to retrieve the scheme for, used
in the headers scheme path
:param user: indicates to use the "user" scheme
:param home: indicates to use the "home" scheme and provides the base
directory for the same
:param root: root under which other directories are re-based
:param isolated: equivalent to --no-user-cfg, i.e. do not consider
~/.pydistutils.cfg (posix) or ~/pydistutils.cfg (non-posix) for
scheme paths
:param prefix: indicates to use the "prefix" scheme and provides the
base directory for the same
"""
scheme = distutils_scheme(dist_name, user, home, root, isolated, prefix)
return Scheme(
platlib=scheme["platlib"],
purelib=scheme["purelib"],
headers=scheme["headers"],
scripts=scheme["scripts"],
data=scheme["data"],
)
def get_bin_prefix() -> str:
# XXX: In old virtualenv versions, sys.prefix can contain '..' components,
# so we need to call normpath to eliminate them.
prefix = os.path.normpath(sys.prefix)
if WINDOWS:
bin_py = os.path.join(prefix, "Scripts")
# buildout uses 'bin' on Windows too?
if not os.path.exists(bin_py):
bin_py = os.path.join(prefix, "bin")
return bin_py
# Forcing to use /usr/local/bin for standard macOS framework installs
# Also log to ~/Library/Logs/ for use with the Console.app log viewer
if sys.platform[:6] == "darwin" and prefix[:16] == "/System/Library/":
return "/usr/local/bin"
return os.path.join(prefix, "bin")
def get_purelib() -> str:
return get_python_lib(plat_specific=False)
def get_platlib() -> str:
return get_python_lib(plat_specific=True)

View file

@ -0,0 +1,213 @@
import logging
import os
import sys
import sysconfig
import typing
from pip._internal.exceptions import InvalidSchemeCombination, UserInstallationInvalid
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
from pip._internal.utils.virtualenv import running_under_virtualenv
from .base import change_root, get_major_minor_version, is_osx_framework
logger = logging.getLogger(__name__)
# Notes on _infer_* functions.
# Unfortunately ``get_default_scheme()`` didn't exist before 3.10, so there's no
# way to ask things like "what is the '_prefix' scheme on this platform". These
# functions try to answer that with some heuristics while accounting for ad-hoc
# platforms not covered by CPython's default sysconfig implementation. If the
# ad-hoc implementation does not fully implement sysconfig, we'll fall back to
# a POSIX scheme.
_AVAILABLE_SCHEMES = set(sysconfig.get_scheme_names())
_PREFERRED_SCHEME_API = getattr(sysconfig, "get_preferred_scheme", None)
def _should_use_osx_framework_prefix() -> bool:
"""Check for Apple's ``osx_framework_library`` scheme.
Python distributed by Apple's Command Line Tools has this special scheme
that's used when:
* This is a framework build.
* We are installing into the system prefix.
This does not account for ``pip install --prefix`` (also means we're not
installing to the system prefix), which should use ``posix_prefix``, but
logic here means ``_infer_prefix()`` outputs ``osx_framework_library``. But
since ``prefix`` is not available for ``sysconfig.get_default_scheme()``,
which is the stdlib replacement for ``_infer_prefix()``, presumably Apple
wouldn't be able to magically switch between ``osx_framework_library`` and
``posix_prefix``. ``_infer_prefix()`` returning ``osx_framework_library``
means its behavior is consistent whether we use the stdlib implementation
or our own, and we deal with this special case in ``get_scheme()`` instead.
"""
return (
"osx_framework_library" in _AVAILABLE_SCHEMES
and not running_under_virtualenv()
and is_osx_framework()
)
def _infer_prefix() -> str:
"""Try to find a prefix scheme for the current platform.
This tries:
* A special ``osx_framework_library`` for Python distributed by Apple's
Command Line Tools, when not running in a virtual environment.
* Implementation + OS, used by PyPy on Windows (``pypy_nt``).
* Implementation without OS, used by PyPy on POSIX (``pypy``).
* OS + "prefix", used by CPython on POSIX (``posix_prefix``).
* Just the OS name, used by CPython on Windows (``nt``).
If none of the above works, fall back to ``posix_prefix``.
"""
if _PREFERRED_SCHEME_API:
return _PREFERRED_SCHEME_API("prefix")
if _should_use_osx_framework_prefix():
return "osx_framework_library"
implementation_suffixed = f"{sys.implementation.name}_{os.name}"
if implementation_suffixed in _AVAILABLE_SCHEMES:
return implementation_suffixed
if sys.implementation.name in _AVAILABLE_SCHEMES:
return sys.implementation.name
suffixed = f"{os.name}_prefix"
if suffixed in _AVAILABLE_SCHEMES:
return suffixed
if os.name in _AVAILABLE_SCHEMES: # On Windows, prefx is just called "nt".
return os.name
return "posix_prefix"
def _infer_user() -> str:
"""Try to find a user scheme for the current platform."""
if _PREFERRED_SCHEME_API:
return _PREFERRED_SCHEME_API("user")
if is_osx_framework() and not running_under_virtualenv():
suffixed = "osx_framework_user"
else:
suffixed = f"{os.name}_user"
if suffixed in _AVAILABLE_SCHEMES:
return suffixed
if "posix_user" not in _AVAILABLE_SCHEMES: # User scheme unavailable.
raise UserInstallationInvalid()
return "posix_user"
def _infer_home() -> str:
"""Try to find a home for the current platform."""
if _PREFERRED_SCHEME_API:
return _PREFERRED_SCHEME_API("home")
suffixed = f"{os.name}_home"
if suffixed in _AVAILABLE_SCHEMES:
return suffixed
return "posix_home"
# Update these keys if the user sets a custom home.
_HOME_KEYS = [
"installed_base",
"base",
"installed_platbase",
"platbase",
"prefix",
"exec_prefix",
]
if sysconfig.get_config_var("userbase") is not None:
_HOME_KEYS.append("userbase")
def get_scheme(
dist_name: str,
user: bool = False,
home: typing.Optional[str] = None,
root: typing.Optional[str] = None,
isolated: bool = False,
prefix: typing.Optional[str] = None,
) -> Scheme:
"""
Get the "scheme" corresponding to the input parameters.
:param dist_name: the name of the package to retrieve the scheme for, used
in the headers scheme path
:param user: indicates to use the "user" scheme
:param home: indicates to use the "home" scheme
:param root: root under which other directories are re-based
:param isolated: ignored, but kept for distutils compatibility (where
this controls whether the user-site pydistutils.cfg is honored)
:param prefix: indicates to use the "prefix" scheme and provides the
base directory for the same
"""
if user and prefix:
raise InvalidSchemeCombination("--user", "--prefix")
if home and prefix:
raise InvalidSchemeCombination("--home", "--prefix")
if home is not None:
scheme_name = _infer_home()
elif user:
scheme_name = _infer_user()
else:
scheme_name = _infer_prefix()
# Special case: When installing into a custom prefix, use posix_prefix
# instead of osx_framework_library. See _should_use_osx_framework_prefix()
# docstring for details.
if prefix is not None and scheme_name == "osx_framework_library":
scheme_name = "posix_prefix"
if home is not None:
variables = {k: home for k in _HOME_KEYS}
elif prefix is not None:
variables = {k: prefix for k in _HOME_KEYS}
else:
variables = {}
paths = sysconfig.get_paths(scheme=scheme_name, vars=variables)
# Logic here is very arbitrary, we're doing it for compatibility, don't ask.
# 1. Pip historically uses a special header path in virtual environments.
# 2. If the distribution name is not known, distutils uses 'UNKNOWN'. We
# only do the same when not running in a virtual environment because
# pip's historical header path logic (see point 1) did not do this.
if running_under_virtualenv():
if user:
base = variables.get("userbase", sys.prefix)
else:
base = variables.get("base", sys.prefix)
python_xy = f"python{get_major_minor_version()}"
paths["include"] = os.path.join(base, "include", "site", python_xy)
elif not dist_name:
dist_name = "UNKNOWN"
scheme = Scheme(
platlib=paths["platlib"],
purelib=paths["purelib"],
headers=os.path.join(paths["include"], dist_name),
scripts=paths["scripts"],
data=paths["data"],
)
if root is not None:
for key in SCHEME_KEYS:
value = change_root(root, getattr(scheme, key))
setattr(scheme, key, value)
return scheme
def get_bin_prefix() -> str:
# Forcing to use /usr/local/bin for standard macOS framework installs.
if sys.platform[:6] == "darwin" and sys.prefix[:16] == "/System/Library/":
return "/usr/local/bin"
return sysconfig.get_paths()["scripts"]
def get_purelib() -> str:
return sysconfig.get_paths()["purelib"]
def get_platlib() -> str:
return sysconfig.get_paths()["platlib"]

View file

@ -0,0 +1,81 @@
import functools
import os
import site
import sys
import sysconfig
import typing
from pip._internal.exceptions import InstallationError
from pip._internal.utils import appdirs
from pip._internal.utils.virtualenv import running_under_virtualenv
# Application Directories
USER_CACHE_DIR = appdirs.user_cache_dir("pip")
# FIXME doesn't account for venv linked to global site-packages
site_packages: str = sysconfig.get_path("purelib")
def get_major_minor_version() -> str:
"""
Return the major-minor version of the current Python as a string, e.g.
"3.7" or "3.10".
"""
return "{}.{}".format(*sys.version_info)
def change_root(new_root: str, pathname: str) -> str:
"""Return 'pathname' with 'new_root' prepended.
If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname).
Otherwise, it requires making 'pathname' relative and then joining the
two, which is tricky on DOS/Windows and Mac OS.
This is borrowed from Python's standard library's distutils module.
"""
if os.name == "posix":
if not os.path.isabs(pathname):
return os.path.join(new_root, pathname)
else:
return os.path.join(new_root, pathname[1:])
elif os.name == "nt":
(drive, path) = os.path.splitdrive(pathname)
if path[0] == "\\":
path = path[1:]
return os.path.join(new_root, path)
else:
raise InstallationError(
f"Unknown platform: {os.name}\n"
"Can not change root path prefix on unknown platform."
)
def get_src_prefix() -> str:
if running_under_virtualenv():
src_prefix = os.path.join(sys.prefix, "src")
else:
# FIXME: keep src in cwd for now (it is not a temporary folder)
try:
src_prefix = os.path.join(os.getcwd(), "src")
except OSError:
# In case the current working directory has been renamed or deleted
sys.exit("The folder you are executing pip from can no longer be found.")
# under macOS + virtualenv sys.prefix is not properly resolved
# it is something like /path/to/python/bin/..
return os.path.abspath(src_prefix)
try:
# Use getusersitepackages if this is present, as it ensures that the
# value is initialised properly.
user_site: typing.Optional[str] = site.getusersitepackages()
except AttributeError:
user_site = site.USER_SITE
@functools.lru_cache(maxsize=None)
def is_osx_framework() -> bool:
return bool(sysconfig.get_config_var("PYTHONFRAMEWORK"))

View file

@ -0,0 +1,12 @@
from typing import List, Optional
def main(args: Optional[List[str]] = None) -> int:
"""This is preserved for old console scripts that may still be referencing
it.
For additional details, see https://github.com/pypa/pip/issues/7498.
"""
from pip._internal.utils.entrypoints import _wrapper
return _wrapper(args)

View file

@ -0,0 +1,128 @@
import contextlib
import functools
import os
import sys
from typing import TYPE_CHECKING, List, Optional, Type, cast
from pip._internal.utils.misc import strtobool
from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
if TYPE_CHECKING:
from typing import Literal, Protocol
else:
Protocol = object
__all__ = [
"BaseDistribution",
"BaseEnvironment",
"FilesystemWheel",
"MemoryWheel",
"Wheel",
"get_default_environment",
"get_environment",
"get_wheel_distribution",
"select_backend",
]
def _should_use_importlib_metadata() -> bool:
"""Whether to use the ``importlib.metadata`` or ``pkg_resources`` backend.
By default, pip uses ``importlib.metadata`` on Python 3.11+, and
``pkg_resourcess`` otherwise. This can be overridden by a couple of ways:
* If environment variable ``_PIP_USE_IMPORTLIB_METADATA`` is set, it
dictates whether ``importlib.metadata`` is used, regardless of Python
version.
* On Python 3.11+, Python distributors can patch ``importlib.metadata``
to add a global constant ``_PIP_USE_IMPORTLIB_METADATA = False``. This
makes pip use ``pkg_resources`` (unless the user set the aforementioned
environment variable to *True*).
"""
with contextlib.suppress(KeyError, ValueError):
return bool(strtobool(os.environ["_PIP_USE_IMPORTLIB_METADATA"]))
if sys.version_info < (3, 11):
return False
import importlib.metadata
return bool(getattr(importlib.metadata, "_PIP_USE_IMPORTLIB_METADATA", True))
class Backend(Protocol):
NAME: 'Literal["importlib", "pkg_resources"]'
Distribution: Type[BaseDistribution]
Environment: Type[BaseEnvironment]
@functools.lru_cache(maxsize=None)
def select_backend() -> Backend:
if _should_use_importlib_metadata():
from . import importlib
return cast(Backend, importlib)
from . import pkg_resources
return cast(Backend, pkg_resources)
def get_default_environment() -> BaseEnvironment:
"""Get the default representation for the current environment.
This returns an Environment instance from the chosen backend. The default
Environment instance should be built from ``sys.path`` and may use caching
to share instance state accorss calls.
"""
return select_backend().Environment.default()
def get_environment(paths: Optional[List[str]]) -> BaseEnvironment:
"""Get a representation of the environment specified by ``paths``.
This returns an Environment instance from the chosen backend based on the
given import paths. The backend must build a fresh instance representing
the state of installed distributions when this function is called.
"""
return select_backend().Environment.from_paths(paths)
def get_directory_distribution(directory: str) -> BaseDistribution:
"""Get the distribution metadata representation in the specified directory.
This returns a Distribution instance from the chosen backend based on
the given on-disk ``.dist-info`` directory.
"""
return select_backend().Distribution.from_directory(directory)
def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistribution:
"""Get the representation of the specified wheel's distribution metadata.
This returns a Distribution instance from the chosen backend based on
the given wheel's ``.dist-info`` directory.
:param canonical_name: Normalized project name of the given wheel.
"""
return select_backend().Distribution.from_wheel(wheel, canonical_name)
def get_metadata_distribution(
metadata_contents: bytes,
filename: str,
canonical_name: str,
) -> BaseDistribution:
"""Get the dist representation of the specified METADATA file contents.
This returns a Distribution instance from the chosen backend sourced from the data
in `metadata_contents`.
:param metadata_contents: Contents of a METADATA file within a dist, or one served
via PEP 658.
:param filename: Filename for the dist this metadata represents.
:param canonical_name: Normalized project name of the given dist.
"""
return select_backend().Distribution.from_metadata_file_contents(
metadata_contents,
filename,
canonical_name,
)

View file

@ -0,0 +1,84 @@
# Extracted from https://github.com/pfmoore/pkg_metadata
from email.header import Header, decode_header, make_header
from email.message import Message
from typing import Any, Dict, List, Union
METADATA_FIELDS = [
# Name, Multiple-Use
("Metadata-Version", False),
("Name", False),
("Version", False),
("Dynamic", True),
("Platform", True),
("Supported-Platform", True),
("Summary", False),
("Description", False),
("Description-Content-Type", False),
("Keywords", False),
("Home-page", False),
("Download-URL", False),
("Author", False),
("Author-email", False),
("Maintainer", False),
("Maintainer-email", False),
("License", False),
("Classifier", True),
("Requires-Dist", True),
("Requires-Python", False),
("Requires-External", True),
("Project-URL", True),
("Provides-Extra", True),
("Provides-Dist", True),
("Obsoletes-Dist", True),
]
def json_name(field: str) -> str:
return field.lower().replace("-", "_")
def msg_to_json(msg: Message) -> Dict[str, Any]:
"""Convert a Message object into a JSON-compatible dictionary."""
def sanitise_header(h: Union[Header, str]) -> str:
if isinstance(h, Header):
chunks = []
for bytes, encoding in decode_header(h):
if encoding == "unknown-8bit":
try:
# See if UTF-8 works
bytes.decode("utf-8")
encoding = "utf-8"
except UnicodeDecodeError:
# If not, latin1 at least won't fail
encoding = "latin1"
chunks.append((bytes, encoding))
return str(make_header(chunks))
return str(h)
result = {}
for field, multi in METADATA_FIELDS:
if field not in msg:
continue
key = json_name(field)
if multi:
value: Union[str, List[str]] = [
sanitise_header(v) for v in msg.get_all(field) # type: ignore
]
else:
value = sanitise_header(msg.get(field)) # type: ignore
if key == "keywords":
# Accept both comma-separated and space-separated
# forms, for better compatibility with old data.
if "," in value:
value = [v.strip() for v in value.split(",")]
else:
value = value.split()
result[key] = value
payload = msg.get_payload()
if payload:
result["description"] = payload
return result

View file

@ -0,0 +1,702 @@
import csv
import email.message
import functools
import json
import logging
import pathlib
import re
import zipfile
from typing import (
IO,
TYPE_CHECKING,
Any,
Collection,
Container,
Dict,
Iterable,
Iterator,
List,
NamedTuple,
Optional,
Tuple,
Union,
)
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import LegacyVersion, Version
from pip._internal.exceptions import NoneMetadataError
from pip._internal.locations import site_packages, user_site
from pip._internal.models.direct_url import (
DIRECT_URL_METADATA_NAME,
DirectUrl,
DirectUrlValidationError,
)
from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
from pip._internal.utils.egg_link import egg_link_path_from_sys_path
from pip._internal.utils.misc import is_local, normalize_path
from pip._internal.utils.urls import url_to_path
from ._json import msg_to_json
if TYPE_CHECKING:
from typing import Protocol
else:
Protocol = object
DistributionVersion = Union[LegacyVersion, Version]
InfoPath = Union[str, pathlib.PurePath]
logger = logging.getLogger(__name__)
class BaseEntryPoint(Protocol):
@property
def name(self) -> str:
raise NotImplementedError()
@property
def value(self) -> str:
raise NotImplementedError()
@property
def group(self) -> str:
raise NotImplementedError()
def _convert_installed_files_path(
entry: Tuple[str, ...],
info: Tuple[str, ...],
) -> str:
"""Convert a legacy installed-files.txt path into modern RECORD path.
The legacy format stores paths relative to the info directory, while the
modern format stores paths relative to the package root, e.g. the
site-packages directory.
:param entry: Path parts of the installed-files.txt entry.
:param info: Path parts of the egg-info directory relative to package root.
:returns: The converted entry.
For best compatibility with symlinks, this does not use ``abspath()`` or
``Path.resolve()``, but tries to work with path parts:
1. While ``entry`` starts with ``..``, remove the equal amounts of parts
from ``info``; if ``info`` is empty, start appending ``..`` instead.
2. Join the two directly.
"""
while entry and entry[0] == "..":
if not info or info[-1] == "..":
info += ("..",)
else:
info = info[:-1]
entry = entry[1:]
return str(pathlib.Path(*info, *entry))
class RequiresEntry(NamedTuple):
requirement: str
extra: str
marker: str
class BaseDistribution(Protocol):
@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
"""Load the distribution from a metadata directory.
:param directory: Path to a metadata directory, e.g. ``.dist-info``.
"""
raise NotImplementedError()
@classmethod
def from_metadata_file_contents(
cls,
metadata_contents: bytes,
filename: str,
project_name: str,
) -> "BaseDistribution":
"""Load the distribution from the contents of a METADATA file.
This is used to implement PEP 658 by generating a "shallow" dist object that can
be used for resolution without downloading or building the actual dist yet.
:param metadata_contents: The contents of a METADATA file.
:param filename: File name for the dist with this metadata.
:param project_name: Name of the project this dist represents.
"""
raise NotImplementedError()
@classmethod
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
"""Load the distribution from a given wheel.
:param wheel: A concrete wheel definition.
:param name: File name of the wheel.
:raises InvalidWheel: Whenever loading of the wheel causes a
:py:exc:`zipfile.BadZipFile` exception to be thrown.
:raises UnsupportedWheel: If the wheel is a valid zip, but malformed
internally.
"""
raise NotImplementedError()
def __repr__(self) -> str:
return f"{self.raw_name} {self.version} ({self.location})"
def __str__(self) -> str:
return f"{self.raw_name} {self.version}"
@property
def location(self) -> Optional[str]:
"""Where the distribution is loaded from.
A string value is not necessarily a filesystem path, since distributions
can be loaded from other sources, e.g. arbitrary zip archives. ``None``
means the distribution is created in-memory.
Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
this is a symbolic link, we want to preserve the relative path between
it and files in the distribution.
"""
raise NotImplementedError()
@property
def editable_project_location(self) -> Optional[str]:
"""The project location for editable distributions.
This is the directory where pyproject.toml or setup.py is located.
None if the distribution is not installed in editable mode.
"""
# TODO: this property is relatively costly to compute, memoize it ?
direct_url = self.direct_url
if direct_url:
if direct_url.is_local_editable():
return url_to_path(direct_url.url)
else:
# Search for an .egg-link file by walking sys.path, as it was
# done before by dist_is_editable().
egg_link_path = egg_link_path_from_sys_path(self.raw_name)
if egg_link_path:
# TODO: get project location from second line of egg_link file
# (https://github.com/pypa/pip/issues/10243)
return self.location
return None
@property
def installed_location(self) -> Optional[str]:
"""The distribution's "installed" location.
This should generally be a ``site-packages`` directory. This is
usually ``dist.location``, except for legacy develop-installed packages,
where ``dist.location`` is the source code location, and this is where
the ``.egg-link`` file is.
The returned location is normalized (in particular, with symlinks removed).
"""
raise NotImplementedError()
@property
def info_location(self) -> Optional[str]:
"""Location of the .[egg|dist]-info directory or file.
Similarly to ``location``, a string value is not necessarily a
filesystem path. ``None`` means the distribution is created in-memory.
For a modern .dist-info installation on disk, this should be something
like ``{location}/{raw_name}-{version}.dist-info``.
Do not canonicalize this value with e.g. ``pathlib.Path.resolve()``. If
this is a symbolic link, we want to preserve the relative path between
it and other files in the distribution.
"""
raise NotImplementedError()
@property
def installed_by_distutils(self) -> bool:
"""Whether this distribution is installed with legacy distutils format.
A distribution installed with "raw" distutils not patched by setuptools
uses one single file at ``info_location`` to store metadata. We need to
treat this specially on uninstallation.
"""
info_location = self.info_location
if not info_location:
return False
return pathlib.Path(info_location).is_file()
@property
def installed_as_egg(self) -> bool:
"""Whether this distribution is installed as an egg.
This usually indicates the distribution was installed by (older versions
of) easy_install.
"""
location = self.location
if not location:
return False
return location.endswith(".egg")
@property
def installed_with_setuptools_egg_info(self) -> bool:
"""Whether this distribution is installed with the ``.egg-info`` format.
This usually indicates the distribution was installed with setuptools
with an old pip version or with ``single-version-externally-managed``.
Note that this ensure the metadata store is a directory. distutils can
also installs an ``.egg-info``, but as a file, not a directory. This
property is *False* for that case. Also see ``installed_by_distutils``.
"""
info_location = self.info_location
if not info_location:
return False
if not info_location.endswith(".egg-info"):
return False
return pathlib.Path(info_location).is_dir()
@property
def installed_with_dist_info(self) -> bool:
"""Whether this distribution is installed with the "modern format".
This indicates a "modern" installation, e.g. storing metadata in the
``.dist-info`` directory. This applies to installations made by
setuptools (but through pip, not directly), or anything using the
standardized build backend interface (PEP 517).
"""
info_location = self.info_location
if not info_location:
return False
if not info_location.endswith(".dist-info"):
return False
return pathlib.Path(info_location).is_dir()
@property
def canonical_name(self) -> NormalizedName:
raise NotImplementedError()
@property
def version(self) -> DistributionVersion:
raise NotImplementedError()
@property
def setuptools_filename(self) -> str:
"""Convert a project name to its setuptools-compatible filename.
This is a copy of ``pkg_resources.to_filename()`` for compatibility.
"""
return self.raw_name.replace("-", "_")
@property
def direct_url(self) -> Optional[DirectUrl]:
"""Obtain a DirectUrl from this distribution.
Returns None if the distribution has no `direct_url.json` metadata,
or if `direct_url.json` is invalid.
"""
try:
content = self.read_text(DIRECT_URL_METADATA_NAME)
except FileNotFoundError:
return None
try:
return DirectUrl.from_json(content)
except (
UnicodeDecodeError,
json.JSONDecodeError,
DirectUrlValidationError,
) as e:
logger.warning(
"Error parsing %s for %s: %s",
DIRECT_URL_METADATA_NAME,
self.canonical_name,
e,
)
return None
@property
def installer(self) -> str:
try:
installer_text = self.read_text("INSTALLER")
except (OSError, ValueError, NoneMetadataError):
return "" # Fail silently if the installer file cannot be read.
for line in installer_text.splitlines():
cleaned_line = line.strip()
if cleaned_line:
return cleaned_line
return ""
@property
def requested(self) -> bool:
return self.is_file("REQUESTED")
@property
def editable(self) -> bool:
return bool(self.editable_project_location)
@property
def local(self) -> bool:
"""If distribution is installed in the current virtual environment.
Always True if we're not in a virtualenv.
"""
if self.installed_location is None:
return False
return is_local(self.installed_location)
@property
def in_usersite(self) -> bool:
if self.installed_location is None or user_site is None:
return False
return self.installed_location.startswith(normalize_path(user_site))
@property
def in_site_packages(self) -> bool:
if self.installed_location is None or site_packages is None:
return False
return self.installed_location.startswith(normalize_path(site_packages))
def is_file(self, path: InfoPath) -> bool:
"""Check whether an entry in the info directory is a file."""
raise NotImplementedError()
def iter_distutils_script_names(self) -> Iterator[str]:
"""Find distutils 'scripts' entries metadata.
If 'scripts' is supplied in ``setup.py``, distutils records those in the
installed distribution's ``scripts`` directory, a file for each script.
"""
raise NotImplementedError()
def read_text(self, path: InfoPath) -> str:
"""Read a file in the info directory.
:raise FileNotFoundError: If ``path`` does not exist in the directory.
:raise NoneMetadataError: If ``path`` exists in the info directory, but
cannot be read.
"""
raise NotImplementedError()
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
raise NotImplementedError()
def _metadata_impl(self) -> email.message.Message:
raise NotImplementedError()
@functools.lru_cache(maxsize=1)
def _metadata_cached(self) -> email.message.Message:
# When we drop python 3.7 support, move this to the metadata property and use
# functools.cached_property instead of lru_cache.
metadata = self._metadata_impl()
self._add_egg_info_requires(metadata)
return metadata
@property
def metadata(self) -> email.message.Message:
"""Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
This should return an empty message if the metadata file is unavailable.
:raises NoneMetadataError: If the metadata file is available, but does
not contain valid metadata.
"""
return self._metadata_cached()
@property
def metadata_dict(self) -> Dict[str, Any]:
"""PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO.
This should return an empty dict if the metadata file is unavailable.
:raises NoneMetadataError: If the metadata file is available, but does
not contain valid metadata.
"""
return msg_to_json(self.metadata)
@property
def metadata_version(self) -> Optional[str]:
"""Value of "Metadata-Version:" in distribution metadata, if available."""
return self.metadata.get("Metadata-Version")
@property
def raw_name(self) -> str:
"""Value of "Name:" in distribution metadata."""
# The metadata should NEVER be missing the Name: key, but if it somehow
# does, fall back to the known canonical name.
return self.metadata.get("Name", self.canonical_name)
@property
def requires_python(self) -> SpecifierSet:
"""Value of "Requires-Python:" in distribution metadata.
If the key does not exist or contains an invalid value, an empty
SpecifierSet should be returned.
"""
value = self.metadata.get("Requires-Python")
if value is None:
return SpecifierSet()
try:
# Convert to str to satisfy the type checker; this can be a Header object.
spec = SpecifierSet(str(value))
except InvalidSpecifier as e:
message = "Package %r has an invalid Requires-Python: %s"
logger.warning(message, self.raw_name, e)
return SpecifierSet()
return spec
def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
"""Dependencies of this distribution.
For modern .dist-info distributions, this is the collection of
"Requires-Dist:" entries in distribution metadata.
"""
raise NotImplementedError()
def iter_provided_extras(self) -> Iterable[str]:
"""Extras provided by this distribution.
For modern .dist-info distributions, this is the collection of
"Provides-Extra:" entries in distribution metadata.
The return value of this function is not particularly useful other than
display purposes due to backward compatibility issues and the extra
names being poorly normalized prior to PEP 685. If you want to perform
logic operations on extras, use :func:`is_extra_provided` instead.
"""
raise NotImplementedError()
def is_extra_provided(self, extra: str) -> bool:
"""Check whether an extra is provided by this distribution.
This is needed mostly for compatibility issues with pkg_resources not
following the extra normalization rules defined in PEP 685.
"""
raise NotImplementedError()
def _iter_declared_entries_from_record(self) -> Optional[Iterator[str]]:
try:
text = self.read_text("RECORD")
except FileNotFoundError:
return None
# This extra Path-str cast normalizes entries.
return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines()))
def _iter_declared_entries_from_legacy(self) -> Optional[Iterator[str]]:
try:
text = self.read_text("installed-files.txt")
except FileNotFoundError:
return None
paths = (p for p in text.splitlines(keepends=False) if p)
root = self.location
info = self.info_location
if root is None or info is None:
return paths
try:
info_rel = pathlib.Path(info).relative_to(root)
except ValueError: # info is not relative to root.
return paths
if not info_rel.parts: # info *is* root.
return paths
return (
_convert_installed_files_path(pathlib.Path(p).parts, info_rel.parts)
for p in paths
)
def iter_declared_entries(self) -> Optional[Iterator[str]]:
"""Iterate through file entries declared in this distribution.
For modern .dist-info distributions, this is the files listed in the
``RECORD`` metadata file. For legacy setuptools distributions, this
comes from ``installed-files.txt``, with entries normalized to be
compatible with the format used by ``RECORD``.
:return: An iterator for listed entries, or None if the distribution
contains neither ``RECORD`` nor ``installed-files.txt``.
"""
return (
self._iter_declared_entries_from_record()
or self._iter_declared_entries_from_legacy()
)
def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
"""Parse a ``requires.txt`` in an egg-info directory.
This is an INI-ish format where an egg-info stores dependencies. A
section name describes extra other environment markers, while each entry
is an arbitrary string (not a key-value pair) representing a dependency
as a requirement string (no markers).
There is a construct in ``importlib.metadata`` called ``Sectioned`` that
does mostly the same, but the format is currently considered private.
"""
try:
content = self.read_text("requires.txt")
except FileNotFoundError:
return
extra = marker = "" # Section-less entries don't have markers.
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"): # Comment; ignored.
continue
if line.startswith("[") and line.endswith("]"): # A section header.
extra, _, marker = line.strip("[]").partition(":")
continue
yield RequiresEntry(requirement=line, extra=extra, marker=marker)
def _iter_egg_info_extras(self) -> Iterable[str]:
"""Get extras from the egg-info directory."""
known_extras = {""}
for entry in self._iter_requires_txt_entries():
extra = canonicalize_name(entry.extra)
if extra in known_extras:
continue
known_extras.add(extra)
yield extra
def _iter_egg_info_dependencies(self) -> Iterable[str]:
"""Get distribution dependencies from the egg-info directory.
To ease parsing, this converts a legacy dependency entry into a PEP 508
requirement string. Like ``_iter_requires_txt_entries()``, there is code
in ``importlib.metadata`` that does mostly the same, but not do exactly
what we need.
Namely, ``importlib.metadata`` does not normalize the extra name before
putting it into the requirement string, which causes marker comparison
to fail because the dist-info format do normalize. This is consistent in
all currently available PEP 517 backends, although not standardized.
"""
for entry in self._iter_requires_txt_entries():
extra = canonicalize_name(entry.extra)
if extra and entry.marker:
marker = f'({entry.marker}) and extra == "{extra}"'
elif extra:
marker = f'extra == "{extra}"'
elif entry.marker:
marker = entry.marker
else:
marker = ""
if marker:
yield f"{entry.requirement} ; {marker}"
else:
yield entry.requirement
def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
"""Add egg-info requires.txt information to the metadata."""
if not metadata.get_all("Requires-Dist"):
for dep in self._iter_egg_info_dependencies():
metadata["Requires-Dist"] = dep
if not metadata.get_all("Provides-Extra"):
for extra in self._iter_egg_info_extras():
metadata["Provides-Extra"] = extra
class BaseEnvironment:
"""An environment containing distributions to introspect."""
@classmethod
def default(cls) -> "BaseEnvironment":
raise NotImplementedError()
@classmethod
def from_paths(cls, paths: Optional[List[str]]) -> "BaseEnvironment":
raise NotImplementedError()
def get_distribution(self, name: str) -> Optional["BaseDistribution"]:
"""Given a requirement name, return the installed distributions.
The name may not be normalized. The implementation must canonicalize
it for lookup.
"""
raise NotImplementedError()
def _iter_distributions(self) -> Iterator["BaseDistribution"]:
"""Iterate through installed distributions.
This function should be implemented by subclass, but never called
directly. Use the public ``iter_distribution()`` instead, which
implements additional logic to make sure the distributions are valid.
"""
raise NotImplementedError()
def iter_all_distributions(self) -> Iterator[BaseDistribution]:
"""Iterate through all installed distributions without any filtering."""
for dist in self._iter_distributions():
# Make sure the distribution actually comes from a valid Python
# packaging distribution. Pip's AdjacentTempDirectory leaves folders
# e.g. ``~atplotlib.dist-info`` if cleanup was interrupted. The
# valid project name pattern is taken from PEP 508.
project_name_valid = re.match(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$",
dist.canonical_name,
flags=re.IGNORECASE,
)
if not project_name_valid:
logger.warning(
"Ignoring invalid distribution %s (%s)",
dist.canonical_name,
dist.location,
)
continue
yield dist
def iter_installed_distributions(
self,
local_only: bool = True,
skip: Container[str] = stdlib_pkgs,
include_editables: bool = True,
editables_only: bool = False,
user_only: bool = False,
) -> Iterator[BaseDistribution]:
"""Return a list of installed distributions.
This is based on ``iter_all_distributions()`` with additional filtering
options. Note that ``iter_installed_distributions()`` without arguments
is *not* equal to ``iter_all_distributions()``, since some of the
configurations exclude packages by default.
:param local_only: If True (default), only return installations
local to the current virtualenv, if in a virtualenv.
:param skip: An iterable of canonicalized project names to ignore;
defaults to ``stdlib_pkgs``.
:param include_editables: If False, don't report editables.
:param editables_only: If True, only report editables.
:param user_only: If True, only report installations in the user
site directory.
"""
it = self.iter_all_distributions()
if local_only:
it = (d for d in it if d.local)
if not include_editables:
it = (d for d in it if not d.editable)
if editables_only:
it = (d for d in it if d.editable)
if user_only:
it = (d for d in it if d.in_usersite)
return (d for d in it if d.canonical_name not in skip)
class Wheel(Protocol):
location: str
def as_zipfile(self) -> zipfile.ZipFile:
raise NotImplementedError()
class FilesystemWheel(Wheel):
def __init__(self, location: str) -> None:
self.location = location
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.location, allowZip64=True)
class MemoryWheel(Wheel):
def __init__(self, location: str, stream: IO[bytes]) -> None:
self.location = location
self.stream = stream
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.stream, allowZip64=True)

View file

@ -0,0 +1,6 @@
from ._dists import Distribution
from ._envs import Environment
__all__ = ["NAME", "Distribution", "Environment"]
NAME = "importlib"

View file

@ -0,0 +1,55 @@
import importlib.metadata
from typing import Any, Optional, Protocol, cast
class BadMetadata(ValueError):
def __init__(self, dist: importlib.metadata.Distribution, *, reason: str) -> None:
self.dist = dist
self.reason = reason
def __str__(self) -> str:
return f"Bad metadata in {self.dist} ({self.reason})"
class BasePath(Protocol):
"""A protocol that various path objects conform.
This exists because importlib.metadata uses both ``pathlib.Path`` and
``zipfile.Path``, and we need a common base for type hints (Union does not
work well since ``zipfile.Path`` is too new for our linter setup).
This does not mean to be exhaustive, but only contains things that present
in both classes *that we need*.
"""
@property
def name(self) -> str:
raise NotImplementedError()
@property
def parent(self) -> "BasePath":
raise NotImplementedError()
def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]:
"""Find the path to the distribution's metadata directory.
HACK: This relies on importlib.metadata's private ``_path`` attribute. Not
all distributions exist on disk, so importlib.metadata is correct to not
expose the attribute as public. But pip's code base is old and not as clean,
so we do this to avoid having to rewrite too many things. Hopefully we can
eliminate this some day.
"""
return getattr(d, "_path", None)
def get_dist_name(dist: importlib.metadata.Distribution) -> str:
"""Get the distribution's project name.
The ``name`` attribute is only available in Python 3.10 or later. We are
targeting exactly that, but Mypy does not know this.
"""
name = cast(Any, dist).name
if not isinstance(name, str):
raise BadMetadata(dist, reason="invalid metadata entry 'name'")
return name

View file

@ -0,0 +1,227 @@
import email.message
import importlib.metadata
import os
import pathlib
import zipfile
from typing import (
Collection,
Dict,
Iterable,
Iterator,
Mapping,
Optional,
Sequence,
cast,
)
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import parse as parse_version
from pip._internal.exceptions import InvalidWheel, UnsupportedWheel
from pip._internal.metadata.base import (
BaseDistribution,
BaseEntryPoint,
DistributionVersion,
InfoPath,
Wheel,
)
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
from ._compat import BasePath, get_dist_name
class WheelDistribution(importlib.metadata.Distribution):
"""An ``importlib.metadata.Distribution`` read from a wheel.
Although ``importlib.metadata.PathDistribution`` accepts ``zipfile.Path``,
its implementation is too "lazy" for pip's needs (we can't keep the ZipFile
handle open for the entire lifetime of the distribution object).
This implementation eagerly reads the entire metadata directory into the
memory instead, and operates from that.
"""
def __init__(
self,
files: Mapping[pathlib.PurePosixPath, bytes],
info_location: pathlib.PurePosixPath,
) -> None:
self._files = files
self.info_location = info_location
@classmethod
def from_zipfile(
cls,
zf: zipfile.ZipFile,
name: str,
location: str,
) -> "WheelDistribution":
info_dir, _ = parse_wheel(zf, name)
paths = (
(name, pathlib.PurePosixPath(name.split("/", 1)[-1]))
for name in zf.namelist()
if name.startswith(f"{info_dir}/")
)
files = {
relpath: read_wheel_metadata_file(zf, fullpath)
for fullpath, relpath in paths
}
info_location = pathlib.PurePosixPath(location, info_dir)
return cls(files, info_location)
def iterdir(self, path: InfoPath) -> Iterator[pathlib.PurePosixPath]:
# Only allow iterating through the metadata directory.
if pathlib.PurePosixPath(str(path)) in self._files:
return iter(self._files)
raise FileNotFoundError(path)
def read_text(self, filename: str) -> Optional[str]:
try:
data = self._files[pathlib.PurePosixPath(filename)]
except KeyError:
return None
try:
text = data.decode("utf-8")
except UnicodeDecodeError as e:
wheel = self.info_location.parent
error = f"Error decoding metadata for {wheel}: {e} in {filename} file"
raise UnsupportedWheel(error)
return text
class Distribution(BaseDistribution):
def __init__(
self,
dist: importlib.metadata.Distribution,
info_location: Optional[BasePath],
installed_location: Optional[BasePath],
) -> None:
self._dist = dist
self._info_location = info_location
self._installed_location = installed_location
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
info_location = pathlib.Path(directory)
dist = importlib.metadata.Distribution.at(info_location)
return cls(dist, info_location, info_location.parent)
@classmethod
def from_metadata_file_contents(
cls,
metadata_contents: bytes,
filename: str,
project_name: str,
) -> BaseDistribution:
# Generate temp dir to contain the metadata file, and write the file contents.
temp_dir = pathlib.Path(
TempDirectory(kind="metadata", globally_managed=True).path
)
metadata_path = temp_dir / "METADATA"
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
return cls(dist, metadata_path.parent, None)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
with wheel.as_zipfile() as zf:
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
except zipfile.BadZipFile as e:
raise InvalidWheel(wheel.location, name) from e
except UnsupportedWheel as e:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
@property
def location(self) -> Optional[str]:
if self._info_location is None:
return None
return str(self._info_location.parent)
@property
def info_location(self) -> Optional[str]:
if self._info_location is None:
return None
return str(self._info_location)
@property
def installed_location(self) -> Optional[str]:
if self._installed_location is None:
return None
return normalize_path(str(self._installed_location))
def _get_dist_name_from_location(self) -> Optional[str]:
"""Try to get the name from the metadata directory name.
This is much faster than reading metadata.
"""
if self._info_location is None:
return None
stem, suffix = os.path.splitext(self._info_location.name)
if suffix not in (".dist-info", ".egg-info"):
return None
return stem.split("-", 1)[0]
@property
def canonical_name(self) -> NormalizedName:
name = self._get_dist_name_from_location() or get_dist_name(self._dist)
return canonicalize_name(name)
@property
def version(self) -> DistributionVersion:
return parse_version(self._dist.version)
def is_file(self, path: InfoPath) -> bool:
return self._dist.read_text(str(path)) is not None
def iter_distutils_script_names(self) -> Iterator[str]:
# A distutils installation is always "flat" (not in e.g. egg form), so
# if this distribution's info location is NOT a pathlib.Path (but e.g.
# zipfile.Path), it can never contain any distutils scripts.
if not isinstance(self._info_location, pathlib.Path):
return
for child in self._info_location.joinpath("scripts").iterdir():
yield child.name
def read_text(self, path: InfoPath) -> str:
content = self._dist.read_text(str(path))
if content is None:
raise FileNotFoundError(path)
return content
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
# importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint.
return self._dist.entry_points
def _metadata_impl(self) -> email.message.Message:
# From Python 3.10+, importlib.metadata declares PackageMetadata as the
# return type. This protocol is unfortunately a disaster now and misses
# a ton of fields that we need, including get() and get_payload(). We
# rely on the implementation that the object is actually a Message now,
# until upstream can improve the protocol. (python/cpython#94952)
return cast(email.message.Message, self._dist.metadata)
def iter_provided_extras(self) -> Iterable[str]:
return self.metadata.get_all("Provides-Extra", [])
def is_extra_provided(self, extra: str) -> bool:
return any(
canonicalize_name(provided_extra) == canonicalize_name(extra)
for provided_extra in self.metadata.get_all("Provides-Extra", [])
)
def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
contexts: Sequence[Dict[str, str]] = [{"extra": e} for e in extras]
for req_string in self.metadata.get_all("Requires-Dist", []):
req = Requirement(req_string)
if not req.marker:
yield req
elif not extras and req.marker.evaluate({"extra": ""}):
yield req
elif any(req.marker.evaluate(context) for context in contexts):
yield req

Some files were not shown because too many files have changed in this diff Show more