Coverage for packages / griffelib / src / griffe / _internal / finder.py: 95.59%
247 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-11 11:48 +0100
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-11 11:48 +0100
1# This module contains the code allowing to find modules.
2#
3# Note: It might be possible to replace a good part of this module's logic
4# with utilities from `importlib` (however the util in question is private):
5#
6# ```pycon
7# >>> from importlib.util import _find_spec
8# >>> _find_spec("griffe.agents", _find_spec("griffe", None).submodule_search_locations)
9# ModuleSpec(
10# name='griffe.agents',
11# loader=<_frozen_importlib_external.SourceFileLoader object at 0x7fa5f34e8110>,
12# origin='/media/data/dev/griffelib/packages/griffe/src/griffe/agents/__init__.py',
13# submodule_search_locations=['/media/data/dev/griffelib/packages/griffe/src/griffe/agents'],
14# )
15# ```
17from __future__ import annotations
19import ast
20import os
21import re
22import sys
23from collections import defaultdict
24from contextlib import suppress
25from dataclasses import dataclass
26from itertools import chain
27from pathlib import Path
28from typing import TYPE_CHECKING, ClassVar
30from griffe._internal.exceptions import UnhandledEditableModuleError
31from griffe._internal.logger import logger
33if TYPE_CHECKING:
34 from collections.abc import Iterator, Sequence
35 from re import Pattern
37 from griffe._internal.models import Module
40_editable_editables_patterns = [re.compile(pat) for pat in (r"^__editables_\w+\.py$", r"^_editable_impl_\w+\.py$")]
41_editable_setuptools_patterns = [re.compile(pat) for pat in (r"^__editable__\w+\.py$",)]
42_editable_scikit_build_core_patterns = [re.compile(pat) for pat in (r"^_\w+_editable.py$",)]
43_editable_meson_python_patterns = [re.compile(pat) for pat in (r"^_\w+_editable_loader.py$",)]
45NamePartsType = tuple[str, ...]
46"""Type alias for the parts of a module name."""
47NamePartsAndPathType = tuple[NamePartsType, Path]
48"""Type alias for the parts of a module name and its path."""
51def _match_pattern(string: str, patterns: Sequence[Pattern]) -> bool:
52 return any(pattern.match(string) for pattern in patterns)
55@dataclass
56class Package:
57 """This class is a simple placeholder used during the process of finding packages.
59 Parameters:
60 name: The package name.
61 path: The package path(s).
62 stubs: An optional path to the related stubs file (.pyi).
63 """
65 name: str
66 """Package name."""
67 path: Path
68 """Package folder path."""
69 stubs: Path | None = None
70 """Package stubs file."""
73@dataclass
74class NamespacePackage:
75 """This class is a simple placeholder used during the process of finding packages.
77 Parameters:
78 name: The package name.
79 path: The package paths.
80 """
82 name: str
83 """Namespace package name."""
84 path: list[Path]
85 """Namespace package folder paths."""
88class ModuleFinder:
89 """The Griffe finder, allowing to find modules on the file system.
91 The module finder is generally not used directly.
92 Each [`GriffeLoader`][griffe.GriffeLoader] instance creates its own module finder instance.
93 The finder can be configured when instantiating the loader
94 thanks to the [loader][griffe.GriffeLoader]'s `search_paths` parameter.
95 """
97 accepted_py_module_extensions: ClassVar[list[str]] = [".py", ".pyc", ".pyo", ".pyd", ".pyi", ".so"]
98 """List of extensions supported by the finder."""
99 extensions_set: ClassVar[set[str]] = set(accepted_py_module_extensions)
100 """Set of extensions supported by the finder."""
102 def __init__(self, search_paths: Sequence[str | Path] | None = None) -> None:
103 """Initialize the finder.
105 Parameters:
106 search_paths: Optional paths to search into.
107 """
108 self._paths_contents: dict[Path, list[Path]] = {}
109 self.search_paths: list[Path] = []
110 """The finder search paths."""
112 # Optimization: pre-compute Paths to relieve CPU when joining paths.
113 for path in search_paths or sys.path:
114 self.append_search_path(Path(path))
116 self._always_scan_for: dict[str, list[Path]] = defaultdict(list)
117 self._extend_from_pth_files()
119 def append_search_path(self, path: Path) -> None:
120 """Append a search path.
122 The path will be resolved (absolute, normalized).
123 The path won't be appended if it is already in the search paths list.
125 Parameters:
126 path: The path to append.
127 """
128 self._append_search_path(path.resolve())
130 def _append_search_path(self, path: Path) -> None:
131 if path not in self.search_paths:
132 self.search_paths.append(path)
134 def insert_search_path(self, position: int, path: Path) -> None:
135 """Insert a search path at the given position.
137 The path will be resolved (absolute, normalized).
138 The path won't be inserted if it is already in the search paths list.
140 Parameters:
141 position: The insert position in the list.
142 path: The path to insert.
143 """
144 path = path.resolve()
145 if path not in self.search_paths: 145 ↛ exitline 145 didn't return from function 'insert_search_path' because the condition on line 145 was always true
146 self.search_paths.insert(position, path)
148 def find_spec(
149 self,
150 module: str | Path,
151 *,
152 try_relative_path: bool = True,
153 find_stubs_package: bool = False,
154 ) -> tuple[str, Package | NamespacePackage]:
155 """Find the top-level parent module of a module.
157 If a Path is passed, only try to find the module as a file path.
158 If a string is passed, first try to find the module as a file path,
159 then look into the search paths.
161 Parameters:
162 module: The module name or path.
163 try_relative_path: Whether to try finding the module as a relative path,
164 when the given module is not already a path.
165 find_stubs_package: Whether to search for stubs-only package.
166 If both the package and its stubs are found, they'll be merged together.
167 If only the stubs are found, they'll be used as the package itself.
169 Raises:
170 FileNotFoundError: When a Path was passed and the module could not be found:
172 - the directory has no `__init__.py` file in it
173 - the path does not exist
175 ModuleNotFoundError: When a string was passed and the module could not be found:
177 - no `module/__init__.py`
178 - no `module.py`
179 - no `module.pth`
180 - no `module` directory (namespace packages)
181 - or unsupported .pth file
183 Returns:
184 The name of the module, and an instance representing its (namespace) package.
185 """
186 module_path: Path | list[Path]
187 if isinstance(module, Path):
188 module_name, module_path = self._module_name_path(module)
189 top_module_name = self._top_module_name(module_path)
190 elif try_relative_path:
191 try:
192 module_name, module_path = self._module_name_path(Path(module))
193 except FileNotFoundError:
194 module_name = module
195 top_module_name = module.split(".", 1)[0]
196 else:
197 top_module_name = self._top_module_name(module_path)
198 else:
199 module_name = module
200 top_module_name = module.split(".", 1)[0]
202 # Only search for actual package, let exceptions bubble up.
203 if not find_stubs_package:
204 return module_name, self.find_package(top_module_name)
206 # Search for both package and stubs-only package.
207 try:
208 package = self.find_package(top_module_name)
209 except ModuleNotFoundError:
210 package = None
211 try:
212 stubs = self.find_package(top_module_name + "-stubs")
213 except ModuleNotFoundError:
214 stubs = None
216 # None found, raise error.
217 if package is None and stubs is None:
218 raise ModuleNotFoundError(top_module_name)
220 # Both found, assemble them to be merged later.
221 if package and stubs:
222 if isinstance(package, Package) and isinstance(stubs, Package):
223 package.stubs = stubs.path
224 elif isinstance(package, NamespacePackage) and isinstance(stubs, NamespacePackage): 224 ↛ 226line 224 didn't jump to line 226 because the condition on line 224 was always true
225 package.path += stubs.path
226 return module_name, package
228 # Return either one.
229 return module_name, package or stubs # ty:ignore[invalid-return-type]
231 def find_package(self, module_name: str) -> Package | NamespacePackage:
232 """Find a package or namespace package.
234 Parameters:
235 module_name: The module name.
237 Raises:
238 ModuleNotFoundError: When the module cannot be found.
240 Returns:
241 A package or namespace package wrapper.
242 """
243 filepaths = [
244 Path(module_name),
245 # TODO: Handle .py[cod] and .so files?
246 # This would be needed for package that are composed
247 # solely of a file with such an extension.
248 Path(f"{module_name}.py"),
249 ]
251 real_module_name = module_name
252 real_module_name = real_module_name.removesuffix("-stubs")
253 namespace_dirs = []
254 for path in self.search_paths:
255 path_contents = self._contents(path)
256 if path_contents:
257 for choice in filepaths:
258 abs_path = path / choice
259 if abs_path in path_contents:
260 if abs_path.suffix:
261 stubs = abs_path.with_suffix(".pyi")
262 return Package(real_module_name, abs_path, stubs if stubs.exists() else None)
263 init_module = abs_path / "__init__.py"
264 if init_module.exists() and not _is_pkg_style_namespace(init_module):
265 stubs = init_module.with_suffix(".pyi")
266 return Package(real_module_name, init_module, stubs if stubs.exists() else None)
267 init_module = abs_path / "__init__.pyi"
268 if init_module.exists():
269 # Stubs package.
270 return Package(real_module_name, init_module, None)
271 namespace_dirs.append(abs_path)
273 if namespace_dirs:
274 return NamespacePackage(module_name, namespace_dirs)
276 raise ModuleNotFoundError(module_name)
278 def iter_submodules(
279 self,
280 path: Path | list[Path],
281 seen: set | None = None,
282 ) -> Iterator[NamePartsAndPathType]:
283 """Iterate on a module's submodules, if any.
285 Parameters:
286 path: The module path.
287 seen: If not none, this set is used to skip some files.
288 The goal is to replicate the behavior of Python by
289 only using the first packages (with `__init__` modules)
290 of the same name found in different namespace packages.
291 As soon as we find an `__init__` module, we add its parent
292 path to the `seen` set, which will be reused when scanning
293 the next namespace packages.
295 Yields:
296 name_parts (tuple[str, ...]): The parts of a submodule name.
297 filepath (Path): A submodule filepath.
298 """
299 if isinstance(path, list):
300 # We never enter this condition again in recursive calls,
301 # so we just have to set `seen` once regardless of its value.
302 seen = set()
303 for path_elem in path:
304 yield from self.iter_submodules(path_elem, seen)
305 return
307 if path.stem == "__init__":
308 path = path.parent
309 # Optimization: just check if the file name ends with .py[icod]/.so
310 # (to distinguish it from a directory), not if it's an actual file.
311 elif path.suffix in self.extensions_set:
312 return
314 # `seen` is only set when we scan a list of paths (namespace package).
315 # `skip` is used to prevent yielding modules
316 # of a regular subpackage that we already yielded
317 # from another part of the namespace.
318 skip = set(seen or ())
320 for subpath in self._filter_py_modules(path):
321 rel_subpath = subpath.relative_to(path)
322 if rel_subpath.parent in skip:
323 logger.debug("Skip %s, another module took precedence", subpath)
324 continue
325 py_file = rel_subpath.suffix == ".py"
326 stem = rel_subpath.stem
327 if not py_file:
328 # `.py[cod]` and `.so` files look like `name.cpython-38-x86_64-linux-gnu.ext`.
329 stem = stem.split(".", 1)[0]
330 if stem == "__init__":
331 # Optimization: since it's a relative path, if it has only one part
332 # and is named __init__, it means it's the starting path
333 # (no need to compare it against starting path).
334 if len(rel_subpath.parts) == 1:
335 continue
336 yield rel_subpath.parts[:-1], subpath
337 if seen is not None:
338 seen.add(rel_subpath.parent)
339 elif py_file:
340 yield rel_subpath.with_suffix("").parts, subpath
341 else:
342 yield rel_subpath.with_name(stem).parts, subpath
344 def submodules(self, module: Module) -> list[NamePartsAndPathType]:
345 """Return the list of a module's submodules.
347 Parameters:
348 module: The parent module.
350 Returns:
351 A list of tuples containing the parts of the submodule name and its path.
352 """
353 return sorted(
354 chain(
355 self.iter_submodules(module.filepath),
356 self.iter_submodules(self._always_scan_for[module.name]),
357 ),
358 key=_module_depth,
359 )
361 def _module_name_path(self, path: Path) -> tuple[str, Path]:
362 # Always return absolute paths to avoid working-directory-dependent issues.
363 path = path.absolute()
364 if path.is_dir():
365 for ext in self.accepted_py_module_extensions:
366 module_path = path / f"__init__{ext}"
367 if module_path.exists(): 367 ↛ 368line 367 didn't jump to line 368 because the condition on line 367 was never true
368 return path.name, module_path
369 return path.name, path
370 if path.exists():
371 if path.stem == "__init__": 371 ↛ 372line 371 didn't jump to line 372 because the condition on line 371 was never true
372 return path.parent.name, path
373 return path.stem, path
374 raise FileNotFoundError
376 def _contents(self, path: Path) -> list[Path]:
377 if path not in self._paths_contents:
378 try:
379 self._paths_contents[path] = list(path.iterdir())
380 except (FileNotFoundError, NotADirectoryError):
381 self._paths_contents[path] = []
382 return self._paths_contents[path]
384 def _extend_from_pth_files(self) -> None:
385 for path in self.search_paths:
386 for item in self._contents(path):
387 if item.suffix == ".pth":
388 for directory in _handle_pth_file(item):
389 if scan := directory.always_scan_for: 389 ↛ 390line 389 didn't jump to line 390 because the condition on line 389 was never true
390 self._always_scan_for[scan].append(directory.path.joinpath(scan))
391 self.append_search_path(directory.path)
393 def _filter_py_modules(self, path: Path) -> Iterator[Path]:
394 for root, dirs, files in os.walk(path, topdown=True, followlinks=True):
395 # Optimization: modify dirs in-place to exclude `__pycache__` directories.
396 dirs[:] = [dir for dir in dirs if dir != "__pycache__"]
397 for relfile in files:
398 if os.path.splitext(relfile)[1] in self.extensions_set: # noqa: PTH122
399 yield Path(root, relfile)
401 def _top_module_name(self, path: Path) -> str:
402 # First find if a parent is in search paths.
403 parent_path = path if path.is_dir() else path.parent
404 # Always resolve parent path to compare for relativeness against resolved search paths.
405 parent_path = parent_path.resolve()
406 for search_path in self.search_paths:
407 with suppress(ValueError, IndexError):
408 rel_path = parent_path.relative_to(search_path.resolve())
409 return rel_path.parts[0]
410 # If not, get the highest directory with an `__init__` module,
411 # add its parent to search paths and return it.
412 while parent_path.parent != parent_path and (parent_path.parent / "__init__.py").exists(): 412 ↛ 413line 412 didn't jump to line 413 because the condition on line 412 was never true
413 parent_path = parent_path.parent
414 self.insert_search_path(0, parent_path.parent)
415 return parent_path.name
418_re_pkgresources = re.compile(r"(?:__import__\([\"']pkg_resources[\"']\).declare_namespace\(__name__\))")
419_re_pkgutil = re.compile(r"(?:__path__ = __import__\([\"']pkgutil[\"']\).extend_path\(__path__, __name__\))")
420_re_import_line = re.compile(r"^import[ \t]+\w+$")
423# TODO: For more robustness, we should load and minify the AST
424# to search for particular call statements.
425def _is_pkg_style_namespace(init_module: Path) -> bool:
426 code = init_module.read_text(encoding="utf-8-sig")
427 return bool(_re_pkgresources.search(code) or _re_pkgutil.search(code))
430def _module_depth(name_parts_and_path: NamePartsAndPathType) -> int:
431 return len(name_parts_and_path[0])
434@dataclass
435class _SP:
436 path: Path
437 always_scan_for: str = ""
440def _handle_pth_file(path: Path) -> list[_SP]:
441 # Support for .pth files pointing to directories.
442 # From https://docs.python.org/3/library/site.html:
443 # A path configuration file is a file whose name has the form name.pth
444 # and exists in one of the four directories mentioned above;
445 # its contents are additional items (one per line) to be added to sys.path.
446 # Non-existing items are never added to sys.path,
447 # and no check is made that the item refers to a directory rather than a file.
448 # No item is added to sys.path more than once.
449 # Blank lines and lines beginning with # are skipped.
450 # Lines starting with import (followed by space or tab) are executed.
451 directories: list[_SP] = []
452 try:
453 # It turns out PyTorch recommends its users to use `.pth` as the extension
454 # when saving models on the disk. These model files are not encoded in UTF8.
455 # If UTF8 decoding fails, we skip the .pth file.
456 text = path.read_text(encoding="utf-8-sig")
457 except UnicodeDecodeError:
458 return directories
459 for line in text.strip().replace(";", "\n").splitlines(keepends=False):
460 line = line.strip() # noqa: PLW2901
461 if _re_import_line.match(line):
462 editable_module = path.parent / f"{line[len('import') :].lstrip()}.py"
463 with suppress(UnhandledEditableModuleError):
464 return _handle_editable_module(editable_module)
465 if line and not line.startswith("#") and os.path.exists(line): # noqa: PTH110
466 directories.append(_SP(Path(line)))
467 return directories
470def _handle_editable_module(path: Path) -> list[_SP]:
471 if _match_pattern(path.name, (*_editable_editables_patterns, *_editable_scikit_build_core_patterns)):
472 # Support for how 'editables' write these files:
473 # example line: `F.map_module('pkg', '/data/dev/pkg/src/pkg/__init__.py')`.
474 # And how 'scikit-build-core' writes these files:
475 # example line: `install({'pkg': '/data/dev/pkg/src/pkg/__init__.py'}, {'cmake_example': ...}, None, False, True)`.
476 try:
477 editable_lines = path.read_text(encoding="utf-8-sig").strip().splitlines(keepends=False)
478 except FileNotFoundError as error:
479 raise UnhandledEditableModuleError(path) from error
480 new_path = Path(editable_lines[-1].split("'")[3])
481 if new_path.name.startswith("__init__"):
482 return [_SP(new_path.parent.parent)]
483 return [_SP(new_path)]
484 if _match_pattern(path.name, _editable_setuptools_patterns):
485 # Support for how 'setuptools' writes these files:
486 # example line: `MAPPING = {'pkg': '/data/dev/pkg/src/pkg', 'pkg2': '/data/dev/pkg/src/pkg2'}`.
487 # with annotation: `MAPPING: dict[str, str] = {...}`.
488 parsed_module = ast.parse(path.read_text(encoding="utf8"))
489 for node in parsed_module.body: 489 ↛ 498line 489 didn't jump to line 498 because the loop on line 489 didn't complete
490 if isinstance(node, ast.Assign):
491 target = node.targets[0]
492 elif isinstance(node, ast.AnnAssign):
493 target = node.target
494 else:
495 continue
496 if isinstance(target, ast.Name) and target.id == "MAPPING" and isinstance(node.value, ast.Dict): # ty:ignore[unresolved-attribute]
497 return [_SP(Path(cst.value).parent) for cst in node.value.values if isinstance(cst, ast.Constant)] # ty:ignore[unresolved-attribute]
498 if _match_pattern(path.name, _editable_meson_python_patterns):
499 # Support for how 'meson-python' writes these files:
500 # example line: `install({'package', 'module1'}, '/media/data/dev/griffe/build/cp311', ["path"], False)`.
501 # Compiled modules then found in the cp311 folder, under src/package.
502 parsed_module = ast.parse(path.read_text(encoding="utf8"))
503 for node in parsed_module.body: 503 ↛ 515line 503 didn't jump to line 515 because the loop on line 503 didn't complete
504 if (
505 isinstance(node, ast.Expr)
506 and isinstance(node.value, ast.Call)
507 and isinstance(node.value.func, ast.Name)
508 and node.value.func.id == "install"
509 and isinstance(node.value.args[1], ast.Constant)
510 ):
511 build_path = Path(node.value.args[1].value, "src") # ty:ignore[invalid-argument-type]
512 # NOTE: What if there are multiple packages?
513 pkg_name = next(build_path.iterdir()).name
514 return [_SP(build_path, always_scan_for=pkg_name)]
515 raise UnhandledEditableModuleError(path)