Coverage for packages / griffelib / src / griffe / _internal / finder.py: 95.59%

247 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-11 11:48 +0100

1# This module contains the code allowing to find modules. 

2# 

3# Note: It might be possible to replace a good part of this module's logic 

4# with utilities from `importlib` (however the util in question is private): 

5# 

6# ```pycon 

7# >>> from importlib.util import _find_spec 

8# >>> _find_spec("griffe.agents", _find_spec("griffe", None).submodule_search_locations) 

9# ModuleSpec( 

10# name='griffe.agents', 

11# loader=<_frozen_importlib_external.SourceFileLoader object at 0x7fa5f34e8110>, 

12# origin='/media/data/dev/griffelib/packages/griffe/src/griffe/agents/__init__.py', 

13# submodule_search_locations=['/media/data/dev/griffelib/packages/griffe/src/griffe/agents'], 

14# ) 

15# ``` 

16 

17from __future__ import annotations 

18 

19import ast 

20import os 

21import re 

22import sys 

23from collections import defaultdict 

24from contextlib import suppress 

25from dataclasses import dataclass 

26from itertools import chain 

27from pathlib import Path 

28from typing import TYPE_CHECKING, ClassVar 

29 

30from griffe._internal.exceptions import UnhandledEditableModuleError 

31from griffe._internal.logger import logger 

32 

33if TYPE_CHECKING: 

34 from collections.abc import Iterator, Sequence 

35 from re import Pattern 

36 

37 from griffe._internal.models import Module 

38 

39 

40_editable_editables_patterns = [re.compile(pat) for pat in (r"^__editables_\w+\.py$", r"^_editable_impl_\w+\.py$")] 

41_editable_setuptools_patterns = [re.compile(pat) for pat in (r"^__editable__\w+\.py$",)] 

42_editable_scikit_build_core_patterns = [re.compile(pat) for pat in (r"^_\w+_editable.py$",)] 

43_editable_meson_python_patterns = [re.compile(pat) for pat in (r"^_\w+_editable_loader.py$",)] 

44 

45NamePartsType = tuple[str, ...] 

46"""Type alias for the parts of a module name.""" 

47NamePartsAndPathType = tuple[NamePartsType, Path] 

48"""Type alias for the parts of a module name and its path.""" 

49 

50 

51def _match_pattern(string: str, patterns: Sequence[Pattern]) -> bool: 

52 return any(pattern.match(string) for pattern in patterns) 

53 

54 

55@dataclass 

56class Package: 

57 """This class is a simple placeholder used during the process of finding packages. 

58 

59 Parameters: 

60 name: The package name. 

61 path: The package path(s). 

62 stubs: An optional path to the related stubs file (.pyi). 

63 """ 

64 

65 name: str 

66 """Package name.""" 

67 path: Path 

68 """Package folder path.""" 

69 stubs: Path | None = None 

70 """Package stubs file.""" 

71 

72 

73@dataclass 

74class NamespacePackage: 

75 """This class is a simple placeholder used during the process of finding packages. 

76 

77 Parameters: 

78 name: The package name. 

79 path: The package paths. 

80 """ 

81 

82 name: str 

83 """Namespace package name.""" 

84 path: list[Path] 

85 """Namespace package folder paths.""" 

86 

87 

88class ModuleFinder: 

89 """The Griffe finder, allowing to find modules on the file system. 

90 

91 The module finder is generally not used directly. 

92 Each [`GriffeLoader`][griffe.GriffeLoader] instance creates its own module finder instance. 

93 The finder can be configured when instantiating the loader 

94 thanks to the [loader][griffe.GriffeLoader]'s `search_paths` parameter. 

95 """ 

96 

97 accepted_py_module_extensions: ClassVar[list[str]] = [".py", ".pyc", ".pyo", ".pyd", ".pyi", ".so"] 

98 """List of extensions supported by the finder.""" 

99 extensions_set: ClassVar[set[str]] = set(accepted_py_module_extensions) 

100 """Set of extensions supported by the finder.""" 

101 

102 def __init__(self, search_paths: Sequence[str | Path] | None = None) -> None: 

103 """Initialize the finder. 

104 

105 Parameters: 

106 search_paths: Optional paths to search into. 

107 """ 

108 self._paths_contents: dict[Path, list[Path]] = {} 

109 self.search_paths: list[Path] = [] 

110 """The finder search paths.""" 

111 

112 # Optimization: pre-compute Paths to relieve CPU when joining paths. 

113 for path in search_paths or sys.path: 

114 self.append_search_path(Path(path)) 

115 

116 self._always_scan_for: dict[str, list[Path]] = defaultdict(list) 

117 self._extend_from_pth_files() 

118 

119 def append_search_path(self, path: Path) -> None: 

120 """Append a search path. 

121 

122 The path will be resolved (absolute, normalized). 

123 The path won't be appended if it is already in the search paths list. 

124 

125 Parameters: 

126 path: The path to append. 

127 """ 

128 self._append_search_path(path.resolve()) 

129 

130 def _append_search_path(self, path: Path) -> None: 

131 if path not in self.search_paths: 

132 self.search_paths.append(path) 

133 

134 def insert_search_path(self, position: int, path: Path) -> None: 

135 """Insert a search path at the given position. 

136 

137 The path will be resolved (absolute, normalized). 

138 The path won't be inserted if it is already in the search paths list. 

139 

140 Parameters: 

141 position: The insert position in the list. 

142 path: The path to insert. 

143 """ 

144 path = path.resolve() 

145 if path not in self.search_paths: 145 ↛ exitline 145 didn't return from function 'insert_search_path' because the condition on line 145 was always true

146 self.search_paths.insert(position, path) 

147 

148 def find_spec( 

149 self, 

150 module: str | Path, 

151 *, 

152 try_relative_path: bool = True, 

153 find_stubs_package: bool = False, 

154 ) -> tuple[str, Package | NamespacePackage]: 

155 """Find the top-level parent module of a module. 

156 

157 If a Path is passed, only try to find the module as a file path. 

158 If a string is passed, first try to find the module as a file path, 

159 then look into the search paths. 

160 

161 Parameters: 

162 module: The module name or path. 

163 try_relative_path: Whether to try finding the module as a relative path, 

164 when the given module is not already a path. 

165 find_stubs_package: Whether to search for stubs-only package. 

166 If both the package and its stubs are found, they'll be merged together. 

167 If only the stubs are found, they'll be used as the package itself. 

168 

169 Raises: 

170 FileNotFoundError: When a Path was passed and the module could not be found: 

171 

172 - the directory has no `__init__.py` file in it 

173 - the path does not exist 

174 

175 ModuleNotFoundError: When a string was passed and the module could not be found: 

176 

177 - no `module/__init__.py` 

178 - no `module.py` 

179 - no `module.pth` 

180 - no `module` directory (namespace packages) 

181 - or unsupported .pth file 

182 

183 Returns: 

184 The name of the module, and an instance representing its (namespace) package. 

185 """ 

186 module_path: Path | list[Path] 

187 if isinstance(module, Path): 

188 module_name, module_path = self._module_name_path(module) 

189 top_module_name = self._top_module_name(module_path) 

190 elif try_relative_path: 

191 try: 

192 module_name, module_path = self._module_name_path(Path(module)) 

193 except FileNotFoundError: 

194 module_name = module 

195 top_module_name = module.split(".", 1)[0] 

196 else: 

197 top_module_name = self._top_module_name(module_path) 

198 else: 

199 module_name = module 

200 top_module_name = module.split(".", 1)[0] 

201 

202 # Only search for actual package, let exceptions bubble up. 

203 if not find_stubs_package: 

204 return module_name, self.find_package(top_module_name) 

205 

206 # Search for both package and stubs-only package. 

207 try: 

208 package = self.find_package(top_module_name) 

209 except ModuleNotFoundError: 

210 package = None 

211 try: 

212 stubs = self.find_package(top_module_name + "-stubs") 

213 except ModuleNotFoundError: 

214 stubs = None 

215 

216 # None found, raise error. 

217 if package is None and stubs is None: 

218 raise ModuleNotFoundError(top_module_name) 

219 

220 # Both found, assemble them to be merged later. 

221 if package and stubs: 

222 if isinstance(package, Package) and isinstance(stubs, Package): 

223 package.stubs = stubs.path 

224 elif isinstance(package, NamespacePackage) and isinstance(stubs, NamespacePackage): 224 ↛ 226line 224 didn't jump to line 226 because the condition on line 224 was always true

225 package.path += stubs.path 

226 return module_name, package 

227 

228 # Return either one. 

229 return module_name, package or stubs # ty:ignore[invalid-return-type] 

230 

231 def find_package(self, module_name: str) -> Package | NamespacePackage: 

232 """Find a package or namespace package. 

233 

234 Parameters: 

235 module_name: The module name. 

236 

237 Raises: 

238 ModuleNotFoundError: When the module cannot be found. 

239 

240 Returns: 

241 A package or namespace package wrapper. 

242 """ 

243 filepaths = [ 

244 Path(module_name), 

245 # TODO: Handle .py[cod] and .so files? 

246 # This would be needed for package that are composed 

247 # solely of a file with such an extension. 

248 Path(f"{module_name}.py"), 

249 ] 

250 

251 real_module_name = module_name 

252 real_module_name = real_module_name.removesuffix("-stubs") 

253 namespace_dirs = [] 

254 for path in self.search_paths: 

255 path_contents = self._contents(path) 

256 if path_contents: 

257 for choice in filepaths: 

258 abs_path = path / choice 

259 if abs_path in path_contents: 

260 if abs_path.suffix: 

261 stubs = abs_path.with_suffix(".pyi") 

262 return Package(real_module_name, abs_path, stubs if stubs.exists() else None) 

263 init_module = abs_path / "__init__.py" 

264 if init_module.exists() and not _is_pkg_style_namespace(init_module): 

265 stubs = init_module.with_suffix(".pyi") 

266 return Package(real_module_name, init_module, stubs if stubs.exists() else None) 

267 init_module = abs_path / "__init__.pyi" 

268 if init_module.exists(): 

269 # Stubs package. 

270 return Package(real_module_name, init_module, None) 

271 namespace_dirs.append(abs_path) 

272 

273 if namespace_dirs: 

274 return NamespacePackage(module_name, namespace_dirs) 

275 

276 raise ModuleNotFoundError(module_name) 

277 

278 def iter_submodules( 

279 self, 

280 path: Path | list[Path], 

281 seen: set | None = None, 

282 ) -> Iterator[NamePartsAndPathType]: 

283 """Iterate on a module's submodules, if any. 

284 

285 Parameters: 

286 path: The module path. 

287 seen: If not none, this set is used to skip some files. 

288 The goal is to replicate the behavior of Python by 

289 only using the first packages (with `__init__` modules) 

290 of the same name found in different namespace packages. 

291 As soon as we find an `__init__` module, we add its parent 

292 path to the `seen` set, which will be reused when scanning 

293 the next namespace packages. 

294 

295 Yields: 

296 name_parts (tuple[str, ...]): The parts of a submodule name. 

297 filepath (Path): A submodule filepath. 

298 """ 

299 if isinstance(path, list): 

300 # We never enter this condition again in recursive calls, 

301 # so we just have to set `seen` once regardless of its value. 

302 seen = set() 

303 for path_elem in path: 

304 yield from self.iter_submodules(path_elem, seen) 

305 return 

306 

307 if path.stem == "__init__": 

308 path = path.parent 

309 # Optimization: just check if the file name ends with .py[icod]/.so 

310 # (to distinguish it from a directory), not if it's an actual file. 

311 elif path.suffix in self.extensions_set: 

312 return 

313 

314 # `seen` is only set when we scan a list of paths (namespace package). 

315 # `skip` is used to prevent yielding modules 

316 # of a regular subpackage that we already yielded 

317 # from another part of the namespace. 

318 skip = set(seen or ()) 

319 

320 for subpath in self._filter_py_modules(path): 

321 rel_subpath = subpath.relative_to(path) 

322 if rel_subpath.parent in skip: 

323 logger.debug("Skip %s, another module took precedence", subpath) 

324 continue 

325 py_file = rel_subpath.suffix == ".py" 

326 stem = rel_subpath.stem 

327 if not py_file: 

328 # `.py[cod]` and `.so` files look like `name.cpython-38-x86_64-linux-gnu.ext`. 

329 stem = stem.split(".", 1)[0] 

330 if stem == "__init__": 

331 # Optimization: since it's a relative path, if it has only one part 

332 # and is named __init__, it means it's the starting path 

333 # (no need to compare it against starting path). 

334 if len(rel_subpath.parts) == 1: 

335 continue 

336 yield rel_subpath.parts[:-1], subpath 

337 if seen is not None: 

338 seen.add(rel_subpath.parent) 

339 elif py_file: 

340 yield rel_subpath.with_suffix("").parts, subpath 

341 else: 

342 yield rel_subpath.with_name(stem).parts, subpath 

343 

344 def submodules(self, module: Module) -> list[NamePartsAndPathType]: 

345 """Return the list of a module's submodules. 

346 

347 Parameters: 

348 module: The parent module. 

349 

350 Returns: 

351 A list of tuples containing the parts of the submodule name and its path. 

352 """ 

353 return sorted( 

354 chain( 

355 self.iter_submodules(module.filepath), 

356 self.iter_submodules(self._always_scan_for[module.name]), 

357 ), 

358 key=_module_depth, 

359 ) 

360 

361 def _module_name_path(self, path: Path) -> tuple[str, Path]: 

362 # Always return absolute paths to avoid working-directory-dependent issues. 

363 path = path.absolute() 

364 if path.is_dir(): 

365 for ext in self.accepted_py_module_extensions: 

366 module_path = path / f"__init__{ext}" 

367 if module_path.exists(): 367 ↛ 368line 367 didn't jump to line 368 because the condition on line 367 was never true

368 return path.name, module_path 

369 return path.name, path 

370 if path.exists(): 

371 if path.stem == "__init__": 371 ↛ 372line 371 didn't jump to line 372 because the condition on line 371 was never true

372 return path.parent.name, path 

373 return path.stem, path 

374 raise FileNotFoundError 

375 

376 def _contents(self, path: Path) -> list[Path]: 

377 if path not in self._paths_contents: 

378 try: 

379 self._paths_contents[path] = list(path.iterdir()) 

380 except (FileNotFoundError, NotADirectoryError): 

381 self._paths_contents[path] = [] 

382 return self._paths_contents[path] 

383 

384 def _extend_from_pth_files(self) -> None: 

385 for path in self.search_paths: 

386 for item in self._contents(path): 

387 if item.suffix == ".pth": 

388 for directory in _handle_pth_file(item): 

389 if scan := directory.always_scan_for: 389 ↛ 390line 389 didn't jump to line 390 because the condition on line 389 was never true

390 self._always_scan_for[scan].append(directory.path.joinpath(scan)) 

391 self.append_search_path(directory.path) 

392 

393 def _filter_py_modules(self, path: Path) -> Iterator[Path]: 

394 for root, dirs, files in os.walk(path, topdown=True, followlinks=True): 

395 # Optimization: modify dirs in-place to exclude `__pycache__` directories. 

396 dirs[:] = [dir for dir in dirs if dir != "__pycache__"] 

397 for relfile in files: 

398 if os.path.splitext(relfile)[1] in self.extensions_set: # noqa: PTH122 

399 yield Path(root, relfile) 

400 

401 def _top_module_name(self, path: Path) -> str: 

402 # First find if a parent is in search paths. 

403 parent_path = path if path.is_dir() else path.parent 

404 # Always resolve parent path to compare for relativeness against resolved search paths. 

405 parent_path = parent_path.resolve() 

406 for search_path in self.search_paths: 

407 with suppress(ValueError, IndexError): 

408 rel_path = parent_path.relative_to(search_path.resolve()) 

409 return rel_path.parts[0] 

410 # If not, get the highest directory with an `__init__` module, 

411 # add its parent to search paths and return it. 

412 while parent_path.parent != parent_path and (parent_path.parent / "__init__.py").exists(): 412 ↛ 413line 412 didn't jump to line 413 because the condition on line 412 was never true

413 parent_path = parent_path.parent 

414 self.insert_search_path(0, parent_path.parent) 

415 return parent_path.name 

416 

417 

418_re_pkgresources = re.compile(r"(?:__import__\([\"']pkg_resources[\"']\).declare_namespace\(__name__\))") 

419_re_pkgutil = re.compile(r"(?:__path__ = __import__\([\"']pkgutil[\"']\).extend_path\(__path__, __name__\))") 

420_re_import_line = re.compile(r"^import[ \t]+\w+$") 

421 

422 

423# TODO: For more robustness, we should load and minify the AST 

424# to search for particular call statements. 

425def _is_pkg_style_namespace(init_module: Path) -> bool: 

426 code = init_module.read_text(encoding="utf-8-sig") 

427 return bool(_re_pkgresources.search(code) or _re_pkgutil.search(code)) 

428 

429 

430def _module_depth(name_parts_and_path: NamePartsAndPathType) -> int: 

431 return len(name_parts_and_path[0]) 

432 

433 

434@dataclass 

435class _SP: 

436 path: Path 

437 always_scan_for: str = "" 

438 

439 

440def _handle_pth_file(path: Path) -> list[_SP]: 

441 # Support for .pth files pointing to directories. 

442 # From https://docs.python.org/3/library/site.html: 

443 # A path configuration file is a file whose name has the form name.pth 

444 # and exists in one of the four directories mentioned above; 

445 # its contents are additional items (one per line) to be added to sys.path. 

446 # Non-existing items are never added to sys.path, 

447 # and no check is made that the item refers to a directory rather than a file. 

448 # No item is added to sys.path more than once. 

449 # Blank lines and lines beginning with # are skipped. 

450 # Lines starting with import (followed by space or tab) are executed. 

451 directories: list[_SP] = [] 

452 try: 

453 # It turns out PyTorch recommends its users to use `.pth` as the extension 

454 # when saving models on the disk. These model files are not encoded in UTF8. 

455 # If UTF8 decoding fails, we skip the .pth file. 

456 text = path.read_text(encoding="utf-8-sig") 

457 except UnicodeDecodeError: 

458 return directories 

459 for line in text.strip().replace(";", "\n").splitlines(keepends=False): 

460 line = line.strip() # noqa: PLW2901 

461 if _re_import_line.match(line): 

462 editable_module = path.parent / f"{line[len('import') :].lstrip()}.py" 

463 with suppress(UnhandledEditableModuleError): 

464 return _handle_editable_module(editable_module) 

465 if line and not line.startswith("#") and os.path.exists(line): # noqa: PTH110 

466 directories.append(_SP(Path(line))) 

467 return directories 

468 

469 

470def _handle_editable_module(path: Path) -> list[_SP]: 

471 if _match_pattern(path.name, (*_editable_editables_patterns, *_editable_scikit_build_core_patterns)): 

472 # Support for how 'editables' write these files: 

473 # example line: `F.map_module('pkg', '/data/dev/pkg/src/pkg/__init__.py')`. 

474 # And how 'scikit-build-core' writes these files: 

475 # example line: `install({'pkg': '/data/dev/pkg/src/pkg/__init__.py'}, {'cmake_example': ...}, None, False, True)`. 

476 try: 

477 editable_lines = path.read_text(encoding="utf-8-sig").strip().splitlines(keepends=False) 

478 except FileNotFoundError as error: 

479 raise UnhandledEditableModuleError(path) from error 

480 new_path = Path(editable_lines[-1].split("'")[3]) 

481 if new_path.name.startswith("__init__"): 

482 return [_SP(new_path.parent.parent)] 

483 return [_SP(new_path)] 

484 if _match_pattern(path.name, _editable_setuptools_patterns): 

485 # Support for how 'setuptools' writes these files: 

486 # example line: `MAPPING = {'pkg': '/data/dev/pkg/src/pkg', 'pkg2': '/data/dev/pkg/src/pkg2'}`. 

487 # with annotation: `MAPPING: dict[str, str] = {...}`. 

488 parsed_module = ast.parse(path.read_text(encoding="utf8")) 

489 for node in parsed_module.body: 489 ↛ 498line 489 didn't jump to line 498 because the loop on line 489 didn't complete

490 if isinstance(node, ast.Assign): 

491 target = node.targets[0] 

492 elif isinstance(node, ast.AnnAssign): 

493 target = node.target 

494 else: 

495 continue 

496 if isinstance(target, ast.Name) and target.id == "MAPPING" and isinstance(node.value, ast.Dict): # ty:ignore[unresolved-attribute] 

497 return [_SP(Path(cst.value).parent) for cst in node.value.values if isinstance(cst, ast.Constant)] # ty:ignore[unresolved-attribute] 

498 if _match_pattern(path.name, _editable_meson_python_patterns): 

499 # Support for how 'meson-python' writes these files: 

500 # example line: `install({'package', 'module1'}, '/media/data/dev/griffe/build/cp311', ["path"], False)`. 

501 # Compiled modules then found in the cp311 folder, under src/package. 

502 parsed_module = ast.parse(path.read_text(encoding="utf8")) 

503 for node in parsed_module.body: 503 ↛ 515line 503 didn't jump to line 515 because the loop on line 503 didn't complete

504 if ( 

505 isinstance(node, ast.Expr) 

506 and isinstance(node.value, ast.Call) 

507 and isinstance(node.value.func, ast.Name) 

508 and node.value.func.id == "install" 

509 and isinstance(node.value.args[1], ast.Constant) 

510 ): 

511 build_path = Path(node.value.args[1].value, "src") # ty:ignore[invalid-argument-type] 

512 # NOTE: What if there are multiple packages? 

513 pkg_name = next(build_path.iterdir()).name 

514 return [_SP(build_path, always_scan_for=pkg_name)] 

515 raise UnhandledEditableModuleError(path)