Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dev:

doc8:
@echo "-> Run doc8 validation"
@${ACTIVATE} pip install doc8 pbr setuptools
@${ACTIVATE} doc8 --config pyproject.toml --ignore-path docs/_build --max-line-length 100 docs/ *.rst

valid:
Expand Down
6 changes: 5 additions & 1 deletion src/python_inspector/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ def resolve_dependencies(
async def gather_pypi_data():
async def get_pypi_data(package):
data = await get_pypi_data_from_purl(
package, repos=repos, environment=environment, prefer_source=prefer_source
package,
repos=repos,
environment=environment,
prefer_source=prefer_source,
index_urls=list(repos_by_url.keys()),
)

if verbose:
Expand Down
90 changes: 83 additions & 7 deletions src/python_inspector/package_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
from urllib.parse import urlparse, urlunparse

from typing import Dict
from typing import List
from typing import Optional
Expand All @@ -27,7 +30,11 @@


async def get_pypi_data_from_purl(
purl: str, environment: Environment, repos: List[PypiSimpleRepository], prefer_source: bool
purl: str,
environment: Environment,
repos: List[PypiSimpleRepository],
prefer_source: bool,
index_urls: List[str],
) -> Optional[PackageData]:
"""
Generate `Package` object from the `purl` string of pypi type
Expand All @@ -43,7 +50,22 @@ async def get_pypi_data_from_purl(
version = parsed_purl.version
if not version:
raise Exception("Version is not specified in the purl")
base_path = "https://pypi.org/pypi"

# Todo: address the case where several index URLs are passed
if index_urls:
# Backward compatibility: If pypi.org is passed as index url, always resolve against it.
# When multiple index URLs are supported and the todo above is fixed, then this hack can be removed.
if "https://pypi.org/simple" in index_urls:
index_url = None
else:
index_url = index_urls[0]
else:
index_url = None

base_path = (
index_url.removesuffix("/simple") + "/pypi" if index_url else "https://pypi.org/pypi"
)

api_url = f"{base_path}/{name}/{version}/json"

from python_inspector.utils import get_response_async
Expand All @@ -62,10 +84,32 @@ async def get_pypi_data_from_purl(
sdist_url = await get_sdist_download_url(
purl=parsed_purl, repos=repos, python_version=python_version
)

def canonicalize_url(url: str):
# Parse the URL into its components
parsed = urlparse(url)

# Canonicalize the path component to resolve ".."
# os.path.normpath will handle segments like '.' and '..'
canonical_path = os.path.normpath(parsed.path)

# On Windows, normpath uses backslashes ('\\').
# We must replace them with forward slashes ('/') for a valid URL path.
if os.path.sep == "\\":
canonical_path = canonical_path.replace("\\", "/")

# Rebuild the URL with the canonicalized path
# We replace the original path with the new one
parsed = parsed._replace(path=canonical_path)
canonical_url = urlunparse(parsed)

return canonical_url

if sdist_url:
valid_distribution_urls.append(sdist_url)

valid_distribution_urls = [url for url in valid_distribution_urls if url]
valid_distribution_urls = list(map(canonicalize_url, valid_distribution_urls))

# if prefer_source is True then only source distribution is used
# in case of no source distribution available then wheel is used
Expand All @@ -81,28 +125,60 @@ async def get_pypi_data_from_purl(
]
wheel_url = choose_single_wheel(wheel_urls)
if wheel_url:
valid_distribution_urls.insert(0, wheel_url)
valid_distribution_urls.insert(0, canonicalize_url(wheel_url))

urls = {url.get("url"): url for url in response.get("urls") or []}

# Sanitize all URLs that are relative and canonicalize them
urls_sanitized = {}
for url in urls:
value = urls.get(url)

# remove the URL anchor fragment
url_parsed = urlparse(url)
url = urlunparse(url_parsed._replace(fragment=""))

if url.startswith("https"):
url_sanitized = canonicalize_url(url)
else:
url_sanitized = canonicalize_url(base_path + url)

urls_sanitized[url_sanitized] = value

def remove_credentials_from_url(url: str):
# Parse the URL into its components
parsed = urlparse(url)

new_netloc = parsed.hostname
if parsed.port:
new_netloc += f":{parsed.port}"

# Create a new parsed result object, replacing the old netloc
# with our new one that has no credentials.
parsed = parsed._replace(netloc=new_netloc)
url_without_credentials = urlunparse(parsed)

return url_without_credentials

# iterate over the valid distribution urls and return the first
# one that is matching.
for dist_url in valid_distribution_urls:
if dist_url not in urls:
if dist_url not in urls_sanitized:
continue

url_data = urls.get(dist_url)
url_data = urls_sanitized.get(dist_url)
digests = url_data.get("digests") or {}

return PackageData(
primary_language="Python",
description=get_description(info),
homepage_url=homepage_url,
api_data_url=api_url,
api_data_url=remove_credentials_from_url(api_url),
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
license_expression=info.get("license_expression"),
declared_license=get_declared_license(info),
download_url=dist_url,
download_url=remove_credentials_from_url(dist_url),
size=url_data.get("size"),
md5=digests.get("md5") or url_data.get("md5_digest"),
sha256=digests.get("sha256"),
Expand Down
226 changes: 111 additions & 115 deletions tests/data/azure-devops.req-310-expected.json

Large diffs are not rendered by default.

226 changes: 111 additions & 115 deletions tests/data/azure-devops.req-312-expected.json

Large diffs are not rendered by default.

226 changes: 111 additions & 115 deletions tests/data/azure-devops.req-313-expected.json

Large diffs are not rendered by default.

226 changes: 111 additions & 115 deletions tests/data/azure-devops.req-314-expected.json

Large diffs are not rendered by default.

67 changes: 34 additions & 33 deletions tests/data/azure-devops.req-38-expected.json

Large diffs are not rendered by default.

108 changes: 50 additions & 58 deletions tests/data/example-requirements-ignore-errors-expected.json

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions tests/data/hash-requirements.txt-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@
"type": "pypi",
"namespace": null,
"name": "certifi",
"version": "2026.1.4",
"version": "2026.4.22",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Python package for providing Mozilla's CA Bundle.\nCertifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.",
"release_date": "2026-01-04T02:42:40",
"release_date": "2026-04-22T11:26:09",
"parties": [
{
"type": "person",
Expand All @@ -201,11 +201,11 @@
"Programming Language :: Python :: 3.9"
],
"homepage_url": "https://github.com/certifi/python-certifi",
"download_url": "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl",
"size": 152900,
"download_url": "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl",
"size": 135707,
"sha1": null,
"md5": "1dab98768140ad2d8dbc9be8f14a2af9",
"sha256": "9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c",
"md5": "a524df3261ff972bbe811eb7307a79ed",
"sha256": "3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/certifi/python-certifi",
Expand All @@ -225,9 +225,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/certifi/2026.1.4/json",
"api_data_url": "https://pypi.org/pypi/certifi/2026.4.22/json",
"datasource_id": null,
"purl": "pkg:pypi/certifi@2026.1.4"
"purl": "pkg:pypi/certifi@2026.4.22"
},
{
"type": "pypi",
Expand Down Expand Up @@ -514,7 +514,7 @@
"dependencies": []
},
{
"package": "pkg:pypi/certifi@2026.1.4",
"package": "pkg:pypi/certifi@2026.4.22",
"dependencies": []
},
{
Expand All @@ -528,7 +528,7 @@
{
"package": "pkg:pypi/requests@2.25.1",
"dependencies": [
"pkg:pypi/certifi@2026.1.4",
"pkg:pypi/certifi@2026.4.22",
"pkg:pypi/chardet@4.0.0",
"pkg:pypi/idna@2.10",
"pkg:pypi/urllib3@1.26.20"
Expand Down
12 changes: 6 additions & 6 deletions tests/data/resolved_deps/autobahn-310-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"dependencies": [
"pkg:pypi/cryptography@43.0.3",
"pkg:pypi/hyperlink@21.0.0",
"pkg:pypi/setuptools@80.9.0",
"pkg:pypi/setuptools@82.0.1",
"pkg:pypi/txaio@23.6.1"
]
},
Expand All @@ -24,19 +24,19 @@
{
"package": "pkg:pypi/hyperlink@21.0.0",
"dependencies": [
"pkg:pypi/idna@3.11"
"pkg:pypi/idna@3.13"
]
},
{
"package": "pkg:pypi/idna@3.11",
"package": "pkg:pypi/idna@3.13",
"dependencies": []
},
{
"package": "pkg:pypi/pycparser@2.23",
"dependencies": []
},
{
"package": "pkg:pypi/setuptools@80.9.0",
"package": "pkg:pypi/setuptools@82.0.1",
"dependencies": []
},
{
Expand All @@ -49,9 +49,9 @@
"pkg:pypi/cffi@2.0.0",
"pkg:pypi/cryptography@43.0.3",
"pkg:pypi/hyperlink@21.0.0",
"pkg:pypi/idna@3.11",
"pkg:pypi/idna@3.13",
"pkg:pypi/pycparser@2.23",
"pkg:pypi/setuptools@80.9.0",
"pkg:pypi/setuptools@82.0.1",
"pkg:pypi/txaio@23.6.1"
]
]
12 changes: 6 additions & 6 deletions tests/data/resolved_deps/flask-310-expected.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
[
[
{
"package": "pkg:pypi/click@8.3.1",
"package": "pkg:pypi/click@8.3.3",
"dependencies": []
},
{
"package": "pkg:pypi/flask@2.1.2",
"dependencies": [
"pkg:pypi/click@8.3.1",
"pkg:pypi/click@8.3.3",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.8"
]
},
{
Expand All @@ -28,18 +28,18 @@
"dependencies": []
},
{
"package": "pkg:pypi/werkzeug@3.1.5",
"package": "pkg:pypi/werkzeug@3.1.8",
"dependencies": [
"pkg:pypi/markupsafe@3.0.3"
]
}
],
[
"pkg:pypi/click@8.3.1",
"pkg:pypi/click@8.3.3",
"pkg:pypi/flask@2.1.2",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/markupsafe@3.0.3",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.8"
]
]
12 changes: 6 additions & 6 deletions tests/data/resolved_deps/flask-310-win-expected.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[
[
{
"package": "pkg:pypi/click@8.3.1",
"package": "pkg:pypi/click@8.3.3",
"dependencies": [
"pkg:pypi/colorama@0.4.6"
]
Expand All @@ -13,10 +13,10 @@
{
"package": "pkg:pypi/flask@2.1.2",
"dependencies": [
"pkg:pypi/click@8.3.1",
"pkg:pypi/click@8.3.3",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.8"
]
},
{
Expand All @@ -34,19 +34,19 @@
"dependencies": []
},
{
"package": "pkg:pypi/werkzeug@3.1.5",
"package": "pkg:pypi/werkzeug@3.1.8",
"dependencies": [
"pkg:pypi/markupsafe@3.0.3"
]
}
],
[
"pkg:pypi/click@8.3.1",
"pkg:pypi/click@8.3.3",
"pkg:pypi/colorama@0.4.6",
"pkg:pypi/flask@2.1.2",
"pkg:pypi/itsdangerous@2.2.0",
"pkg:pypi/jinja2@3.1.6",
"pkg:pypi/markupsafe@3.0.3",
"pkg:pypi/werkzeug@3.1.5"
"pkg:pypi/werkzeug@3.1.8"
]
]
Loading
Loading