Coverage for src/_griffe/finder.py: 94.59%
250 statements
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-12 01:34 +0200
« prev ^ index » next coverage.py v7.6.2, created at 2024-10-12 01:34 +0200
1# This module contains the code allowing to find modules.
2#
3# Note: It might be possible to replace a good part of this module's logic
4# with utilities from `importlib` (however the util in question is private):
5#
6# ```pycon
7# >>> from importlib.util import _find_spec
8# >>> _find_spec("griffe.agents", _find_spec("griffe", None).submodule_search_locations)
9# ModuleSpec(
10# name='griffe.agents',
11# loader=<_frozen_importlib_external.SourceFileLoader object at 0x7fa5f34e8110>,
12# origin='/media/data/dev/griffe/src/griffe/agents/__init__.py',
13# submodule_search_locations=['/media/data/dev/griffe/src/griffe/agents'],
14# )
15# ```
17from __future__ import annotations
19import ast
20import os
21import re
22import sys
23from collections import defaultdict
24from contextlib import suppress
25from dataclasses import dataclass
26from itertools import chain
27from pathlib import Path
28from typing import TYPE_CHECKING, ClassVar
30from _griffe.exceptions import UnhandledEditableModuleError
31from _griffe.logger import logger
33if TYPE_CHECKING:
34 from collections.abc import Iterator, Sequence
35 from re import Pattern
37 from _griffe.models import Module
40_editable_editables_patterns = [re.compile(pat) for pat in (r"^__editables_\w+\.py$", r"^_editable_impl_\w+\.py$")]
41_editable_setuptools_patterns = [re.compile(pat) for pat in (r"^__editable__\w+\.py$",)]
42_editable_scikit_build_core_patterns = [re.compile(pat) for pat in (r"^_\w+_editable.py$",)]
43_editable_meson_python_patterns = [re.compile(pat) for pat in (r"^_\w+_editable_loader.py$",)]
45NamePartsType = tuple[str, ...]
46"""Type alias for the parts of a module name."""
47NamePartsAndPathType = tuple[NamePartsType, Path]
48"""Type alias for the parts of a module name and its path."""
51def _match_pattern(string: str, patterns: Sequence[Pattern]) -> bool:
52 return any(pattern.match(string) for pattern in patterns)
55@dataclass
56class Package:
57 """This class is a simple placeholder used during the process of finding packages.
59 Parameters:
60 name: The package name.
61 path: The package path(s).
62 stubs: An optional path to the related stubs file (.pyi).
63 """
65 name: str
66 """Package name."""
67 path: Path
68 """Package folder path."""
69 stubs: Path | None = None
70 """Package stubs file."""
73@dataclass
74class NamespacePackage:
75 """This class is a simple placeholder used during the process of finding packages.
77 Parameters:
78 name: The package name.
79 path: The package paths.
80 """
82 name: str
83 """Namespace package name."""
84 path: list[Path]
85 """Namespace package folder paths."""
88class ModuleFinder:
89 """The Griffe finder, allowing to find modules on the file system.
91 The module finder is generally not used directly.
92 Each [`GriffeLoader`][griffe.GriffeLoader] instance creates its own module finder instance.
93 The finder can be configured when instantiating the loader
94 thanks to the [loader][griffe.GriffeLoader]'s `search_paths` parameter.
95 """
97 accepted_py_module_extensions: ClassVar[list[str]] = [".py", ".pyc", ".pyo", ".pyd", ".pyi", ".so"]
98 """List of extensions supported by the finder."""
99 extensions_set: ClassVar[set[str]] = set(accepted_py_module_extensions)
100 """Set of extensions supported by the finder."""
102 def __init__(self, search_paths: Sequence[str | Path] | None = None) -> None:
103 """Initialize the finder.
105 Parameters:
106 search_paths: Optional paths to search into.
107 """
108 self._paths_contents: dict[Path, list[Path]] = {}
109 self.search_paths: list[Path] = []
110 """The finder search paths."""
112 # Optimization: pre-compute Paths to relieve CPU when joining paths.
113 for path in search_paths or sys.path:
114 self.append_search_path(Path(path))
116 self._always_scan_for: dict[str, list[Path]] = defaultdict(list)
117 self._extend_from_pth_files()
119 def append_search_path(self, path: Path) -> None:
120 """Append a search path.
122 The path will be resolved (absolute, normalized).
123 The path won't be appended if it is already in the search paths list.
125 Parameters:
126 path: The path to append.
127 """
128 path = path.resolve()
129 if path not in self.search_paths:
130 self.search_paths.append(path)
132 def insert_search_path(self, position: int, path: Path) -> None:
133 """Insert a search path at the given position.
135 The path will be resolved (absolute, normalized).
136 The path won't be inserted if it is already in the search paths list.
138 Parameters:
139 position: The insert position in the list.
140 path: The path to insert.
141 """
142 path = path.resolve()
143 if path not in self.search_paths: 143 ↛ exitline 143 didn't return from function 'insert_search_path' because the condition on line 143 was always true
144 self.search_paths.insert(position, path)
146 def find_spec(
147 self,
148 module: str | Path,
149 *,
150 try_relative_path: bool = True,
151 find_stubs_package: bool = False,
152 ) -> tuple[str, Package | NamespacePackage]:
153 """Find the top-level parent module of a module.
155 If a Path is passed, only try to find the module as a file path.
156 If a string is passed, first try to find the module as a file path,
157 then look into the search paths.
159 Parameters:
160 module: The module name or path.
161 try_relative_path: Whether to try finding the module as a relative path,
162 when the given module is not already a path.
163 find_stubs_package: Whether to search for stubs-only package.
164 If both the package and its stubs are found, they'll be merged together.
165 If only the stubs are found, they'll be used as the package itself.
167 Raises:
168 FileNotFoundError: When a Path was passed and the module could not be found:
170 - the directory has no `__init__.py` file in it
171 - the path does not exist
173 ModuleNotFoundError: When a string was passed and the module could not be found:
175 - no `module/__init__.py`
176 - no `module.py`
177 - no `module.pth`
178 - no `module` directory (namespace packages)
179 - or unsupported .pth file
181 Returns:
182 The name of the module, and an instance representing its (namespace) package.
183 """
184 module_path: Path | list[Path]
185 if isinstance(module, Path):
186 module_name, module_path = self._module_name_path(module)
187 top_module_name = self._top_module_name(module_path)
188 elif try_relative_path:
189 try:
190 module_name, module_path = self._module_name_path(Path(module))
191 except FileNotFoundError:
192 module_name = module
193 top_module_name = module.split(".", 1)[0]
194 else:
195 top_module_name = self._top_module_name(module_path)
196 else:
197 module_name = module
198 top_module_name = module.split(".", 1)[0]
200 # Only search for actual package, let exceptions bubble up.
201 if not find_stubs_package:
202 return module_name, self.find_package(top_module_name)
204 # Search for both package and stubs-only package.
205 try:
206 package = self.find_package(top_module_name)
207 except ModuleNotFoundError:
208 package = None
209 try:
210 stubs = self.find_package(top_module_name + "-stubs")
211 except ModuleNotFoundError:
212 stubs = None
214 # None found, raise error.
215 if package is None and stubs is None:
216 raise ModuleNotFoundError(top_module_name)
218 # Both found, assemble them to be merged later.
219 if package and stubs:
220 if isinstance(package, Package) and isinstance(stubs, Package):
221 package.stubs = stubs.path
222 elif isinstance(package, NamespacePackage) and isinstance(stubs, NamespacePackage): 222 ↛ 224line 222 didn't jump to line 224 because the condition on line 222 was always true
223 package.path += stubs.path
224 return module_name, package
226 # Return either one.
227 return module_name, package or stubs # type: ignore[return-value]
229 def find_package(self, module_name: str) -> Package | NamespacePackage:
230 """Find a package or namespace package.
232 Parameters:
233 module_name: The module name.
235 Raises:
236 ModuleNotFoundError: When the module cannot be found.
238 Returns:
239 A package or namespace package wrapper.
240 """
241 filepaths = [
242 Path(module_name),
243 # TODO: Handle .py[cod] and .so files?
244 # This would be needed for package that are composed
245 # solely of a file with such an extension.
246 Path(f"{module_name}.py"),
247 ]
249 real_module_name = module_name
250 if real_module_name.endswith("-stubs"):
251 real_module_name = real_module_name[:-6]
252 namespace_dirs = []
253 for path in self.search_paths:
254 path_contents = self._contents(path)
255 if path_contents:
256 for choice in filepaths:
257 abs_path = path / choice
258 if abs_path in path_contents:
259 if abs_path.suffix:
260 stubs = abs_path.with_suffix(".pyi")
261 return Package(real_module_name, abs_path, stubs if stubs.exists() else None)
262 init_module = abs_path / "__init__.py"
263 if init_module.exists() and not _is_pkg_style_namespace(init_module):
264 stubs = init_module.with_suffix(".pyi")
265 return Package(real_module_name, init_module, stubs if stubs.exists() else None)
266 init_module = abs_path / "__init__.pyi"
267 if init_module.exists():
268 # Stubs package
269 return Package(real_module_name, init_module, None)
270 namespace_dirs.append(abs_path)
272 if namespace_dirs:
273 return NamespacePackage(module_name, namespace_dirs)
275 raise ModuleNotFoundError(module_name)
277 def iter_submodules(
278 self,
279 path: Path | list[Path],
280 seen: set | None = None,
281 ) -> Iterator[NamePartsAndPathType]:
282 """Iterate on a module's submodules, if any.
284 Parameters:
285 path: The module path.
286 seen: If not none, this set is used to skip some files.
287 The goal is to replicate the behavior of Python by
288 only using the first packages (with `__init__` modules)
289 of the same name found in different namespace packages.
290 As soon as we find an `__init__` module, we add its parent
291 path to the `seen` set, which will be reused when scanning
292 the next namespace packages.
294 Yields:
295 name_parts (tuple[str, ...]): The parts of a submodule name.
296 filepath (Path): A submodule filepath.
297 """
298 if isinstance(path, list):
299 # We never enter this condition again in recursive calls,
300 # so we just have to set `seen` once regardless of its value.
301 seen = set()
302 for path_elem in path:
303 yield from self.iter_submodules(path_elem, seen)
304 return
306 if path.stem == "__init__":
307 path = path.parent
308 # Optimization: just check if the file name ends with .py[icod]/.so
309 # (to distinguish it from a directory), not if it's an actual file.
310 elif path.suffix in self.extensions_set:
311 return
313 # `seen` is only set when we scan a list of paths (namespace package).
314 # `skip` is used to prevent yielding modules
315 # of a regular subpackage that we already yielded
316 # from another part of the namespace.
317 skip = set(seen or ())
319 for subpath in self._filter_py_modules(path):
320 rel_subpath = subpath.relative_to(path)
321 if rel_subpath.parent in skip:
322 logger.debug("Skip %s, another module took precedence", subpath)
323 continue
324 py_file = rel_subpath.suffix == ".py"
325 stem = rel_subpath.stem
326 if not py_file:
327 # .py[cod] and .so files look like `name.cpython-38-x86_64-linux-gnu.ext`
328 stem = stem.split(".", 1)[0]
329 if stem == "__init__":
330 # Optimization: since it's a relative path, if it has only one part
331 # and is named __init__, it means it's the starting path
332 # (no need to compare it against starting path).
333 if len(rel_subpath.parts) == 1:
334 continue
335 yield rel_subpath.parts[:-1], subpath
336 if seen is not None:
337 seen.add(rel_subpath.parent)
338 elif py_file:
339 yield rel_subpath.with_suffix("").parts, subpath
340 else:
341 yield rel_subpath.with_name(stem).parts, subpath
343 def submodules(self, module: Module) -> list[NamePartsAndPathType]:
344 """Return the list of a module's submodules.
346 Parameters:
347 module: The parent module.
349 Returns:
350 A list of tuples containing the parts of the submodule name and its path.
351 """
352 return sorted(
353 chain(
354 self.iter_submodules(module.filepath),
355 self.iter_submodules(self._always_scan_for[module.name]),
356 ),
357 key=_module_depth,
358 )
360 def _module_name_path(self, path: Path) -> tuple[str, Path]:
361 # Always return absolute paths to avoid working-directory-dependent issues.
362 path = path.absolute()
363 if path.is_dir():
364 for ext in self.accepted_py_module_extensions:
365 module_path = path / f"__init__{ext}"
366 if module_path.exists(): 366 ↛ 367line 366 didn't jump to line 367 because the condition on line 366 was never true
367 return path.name, module_path
368 return path.name, path
369 if path.exists():
370 if path.stem == "__init__": 370 ↛ 371line 370 didn't jump to line 371 because the condition on line 370 was never true
371 return path.parent.name, path
372 return path.stem, path
373 raise FileNotFoundError
375 def _contents(self, path: Path) -> list[Path]:
376 if path not in self._paths_contents:
377 try:
378 self._paths_contents[path] = list(path.iterdir())
379 except (FileNotFoundError, NotADirectoryError):
380 self._paths_contents[path] = []
381 return self._paths_contents[path]
383 def _append_search_path(self, path: Path) -> None:
384 if path not in self.search_paths:
385 self.search_paths.append(path)
387 def _extend_from_pth_files(self) -> None:
388 for path in self.search_paths:
389 for item in self._contents(path):
390 if item.suffix == ".pth":
391 for directory in _handle_pth_file(item):
392 if scan := directory.always_scan_for: 392 ↛ 393line 392 didn't jump to line 393 because the condition on line 392 was never true
393 self._always_scan_for[scan].append(directory.path.joinpath(scan))
394 self.append_search_path(directory.path)
396 def _filter_py_modules(self, path: Path) -> Iterator[Path]:
397 for root, dirs, files in os.walk(path, topdown=True):
398 # Optimization: modify dirs in-place to exclude `__pycache__` directories.
399 dirs[:] = [dir for dir in dirs if dir != "__pycache__"]
400 for relfile in files:
401 if os.path.splitext(relfile)[1] in self.extensions_set: # noqa: PTH122
402 yield Path(root, relfile)
404 def _top_module_name(self, path: Path) -> str:
405 # First find if a parent is in search paths.
406 parent_path = path if path.is_dir() else path.parent
407 # Always resolve parent path to compare for relativeness against resolved search paths.
408 parent_path = parent_path.resolve()
409 for search_path in self.search_paths:
410 with suppress(ValueError, IndexError):
411 rel_path = parent_path.relative_to(search_path.resolve())
412 return rel_path.parts[0]
413 # If not, get the highest directory with an `__init__` module,
414 # add its parent to search paths and return it.
415 while parent_path.parent != parent_path and (parent_path.parent / "__init__.py").exists(): 415 ↛ 416line 415 didn't jump to line 416 because the condition on line 415 was never true
416 parent_path = parent_path.parent
417 self.insert_search_path(0, parent_path.parent)
418 return parent_path.name
421_re_pkgresources = re.compile(r"(?:__import__\([\"']pkg_resources[\"']\).declare_namespace\(__name__\))")
422_re_pkgutil = re.compile(r"(?:__path__ = __import__\([\"']pkgutil[\"']\).extend_path\(__path__, __name__\))")
423_re_import_line = re.compile(r"^import[ \t]+\w+$")
426# TODO: For more robustness, we should load and minify the AST
427# to search for particular call statements.
428def _is_pkg_style_namespace(init_module: Path) -> bool:
429 code = init_module.read_text(encoding="utf8")
430 return bool(_re_pkgresources.search(code) or _re_pkgutil.search(code))
433def _module_depth(name_parts_and_path: NamePartsAndPathType) -> int:
434 return len(name_parts_and_path[0])
437@dataclass
438class _SP:
439 path: Path
440 always_scan_for: str = ""
443def _handle_pth_file(path: Path) -> list[_SP]:
444 # Support for .pth files pointing to directories.
445 # From https://docs.python.org/3/library/site.html:
446 # A path configuration file is a file whose name has the form name.pth
447 # and exists in one of the four directories mentioned above;
448 # its contents are additional items (one per line) to be added to sys.path.
449 # Non-existing items are never added to sys.path,
450 # and no check is made that the item refers to a directory rather than a file.
451 # No item is added to sys.path more than once.
452 # Blank lines and lines beginning with # are skipped.
453 # Lines starting with import (followed by space or tab) are executed.
454 directories: list[_SP] = []
455 try:
456 # It turns out PyTorch recommends its users to use `.pth` as the extension
457 # when saving models on the disk. These model files are not encoded in UTF8.
458 # If UTF8 decoding fails, we skip the .pth file.
459 text = path.read_text(encoding="utf8")
460 except UnicodeDecodeError:
461 return directories
462 for line in text.strip().replace(";", "\n").splitlines(keepends=False):
463 line = line.strip() # noqa: PLW2901
464 if _re_import_line.match(line):
465 editable_module = path.parent / f"{line[len('import'):].lstrip()}.py"
466 with suppress(UnhandledEditableModuleError):
467 return _handle_editable_module(editable_module)
468 if line and not line.startswith("#") and os.path.exists(line): # noqa: PTH110
469 directories.append(_SP(Path(line)))
470 return directories
473def _handle_editable_module(path: Path) -> list[_SP]:
474 if _match_pattern(path.name, (*_editable_editables_patterns, *_editable_scikit_build_core_patterns)):
475 # Support for how 'editables' write these files:
476 # example line: `F.map_module('griffe', '/media/data/dev/griffe/src/griffe/__init__.py')`.
477 # And how 'scikit-build-core' writes these files:
478 # example line: `install({'griffe': '/media/data/dev/griffe/src/griffe/__init__.py'}, {'cmake_example': ...}, None, False, True)`.
479 try:
480 editable_lines = path.read_text(encoding="utf8").strip().splitlines(keepends=False)
481 except FileNotFoundError as error:
482 raise UnhandledEditableModuleError(path) from error
483 new_path = Path(editable_lines[-1].split("'")[3])
484 if new_path.name.startswith("__init__"):
485 return [_SP(new_path.parent.parent)]
486 return [_SP(new_path)]
487 if _match_pattern(path.name, _editable_setuptools_patterns):
488 # Support for how 'setuptools' writes these files:
489 # example line: `MAPPING = {'griffe': '/media/data/dev/griffe/src/griffe', 'briffe': '/media/data/dev/griffe/src/briffe'}`.
490 # with annotation: `MAPPING: dict[str, str] = {...}`.
491 parsed_module = ast.parse(path.read_text())
492 for node in parsed_module.body: 492 ↛ 501line 492 didn't jump to line 501 because the loop on line 492 didn't complete
493 if isinstance(node, ast.Assign):
494 target = node.targets[0]
495 elif isinstance(node, ast.AnnAssign):
496 target = node.target
497 else:
498 continue
499 if isinstance(target, ast.Name) and target.id == "MAPPING" and isinstance(node.value, ast.Dict): # type: ignore[attr-defined]
500 return [_SP(Path(cst.value).parent) for cst in node.value.values if isinstance(cst, ast.Constant)] # type: ignore[attr-defined]
501 if _match_pattern(path.name, _editable_meson_python_patterns):
502 # Support for how 'meson-python' writes these files:
503 # example line: `install({'package', 'module1'}, '/media/data/dev/griffe/build/cp311', ["path"], False)`.
504 # Compiled modules then found in the cp311 folder, under src/package.
505 parsed_module = ast.parse(path.read_text())
506 for node in parsed_module.body: 506 ↛ 518line 506 didn't jump to line 518 because the loop on line 506 didn't complete
507 if (
508 isinstance(node, ast.Expr)
509 and isinstance(node.value, ast.Call)
510 and isinstance(node.value.func, ast.Name)
511 and node.value.func.id == "install"
512 and isinstance(node.value.args[1], ast.Constant)
513 ):
514 build_path = Path(node.value.args[1].value, "src")
515 # NOTE: What if there are multiple packages?
516 pkg_name = next(build_path.iterdir()).name
517 return [_SP(build_path, always_scan_for=pkg_name)]
518 raise UnhandledEditableModuleError(path)