Coverage for src/mkdocs_autorefs/_internal/plugin.py: 74.90%

191 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-24 16:00 +0100

1# This module contains the "mkdocs-autorefs" plugin. 

2# 

3# After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors 

4# it finds to later be able to fix unresolved references. 

5# 

6# Once every page has been rendered and all identifiers and their URLs collected, 

7# the plugin fixes unresolved references in the HTML content of the pages. 

8 

9from __future__ import annotations 

10 

11import contextlib 

12import functools 

13import logging 

14from collections import defaultdict 

15from pathlib import PurePosixPath as URL # noqa: N814 

16from typing import TYPE_CHECKING, Any, Callable, Literal 

17from urllib.parse import urlsplit 

18from warnings import warn 

19 

20from mkdocs.config.base import Config 

21from mkdocs.config.config_options import Choice, Type 

22from mkdocs.plugins import BasePlugin, event_priority 

23from mkdocs.structure.pages import Page 

24 

25from mkdocs_autorefs._internal.backlinks import Backlink, BacklinkCrumb 

26from mkdocs_autorefs._internal.references import AutorefsExtension, fix_refs, relative_url 

27 

28if TYPE_CHECKING: 

29 from collections.abc import Sequence 

30 

31 from jinja2.environment import Environment 

32 from mkdocs.config.defaults import MkDocsConfig 

33 from mkdocs.structure.files import Files 

34 from mkdocs.structure.nav import Section 

35 from mkdocs.structure.toc import AnchorLink 

36 

37try: 

38 from mkdocs.plugins import get_plugin_logger 

39 

40 _log = get_plugin_logger(__name__) 

41except ImportError: 

42 # TODO: Remove once support for MkDocs <1.5 is dropped. 

43 _log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] 

44 

45 

46class AutorefsConfig(Config): 

47 """Configuration options for the `autorefs` plugin.""" 

48 

49 resolve_closest: bool = Type(bool, default=False) # type: ignore[assignment] 

50 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier. 

51 

52 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page". 

53 

54 For example, if you link to identifier `hello` from page `foo/bar/`, 

55 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages, 

56 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`. 

57 

58 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs. 

59 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found. 

60 

61 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL. 

62 """ 

63 

64 link_titles: bool | Literal["auto", "external"] = Choice((True, False, "auto", "external"), default="auto") # type: ignore[assignment] 

65 """Whether to set titles on links. 

66 

67 Such title attributes are displayed as tooltips when hovering over the links. 

68 

69 - `"auto"`: autorefs will detect whether the instant preview feature of Material for MkDocs is enabled, 

70 and set titles on external links when it is, all links if it is not. 

71 - `"external"`: autorefs will set titles on external links only. 

72 - `True`: autorefs will set titles on all links. 

73 - `False`: autorefs will not set any title attributes on links. 

74 

75 Titles are only set when they are different from the link's text. 

76 Titles are constructed from the linked heading's original title, 

77 optionally appending the identifier for API objects. 

78 """ 

79 

80 strip_title_tags: bool | Literal["auto"] = Choice((True, False, "auto"), default="auto") # type: ignore[assignment] 

81 """Whether to strip HTML tags from link titles. 

82 

83 Some themes support HTML in link titles, but others do not. 

84 

85 - `"auto"`: strip tags unless the Material for MkDocs theme is detected. 

86 """ 

87 

88 

89class AutorefsPlugin(BasePlugin[AutorefsConfig]): 

90 """The `autorefs` plugin for `mkdocs`. 

91 

92 This plugin defines the following event hooks: 

93 

94 - `on_config`, to configure itself 

95 - `on_page_markdown`, to set the current page in order for Markdown extension to use it 

96 - `on_env`, to apply cross-references once all pages have been rendered 

97 

98 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs` 

99 for more information about its plugin system. 

100 """ 

101 

102 scan_toc: bool = True 

103 """Whether to scan the table of contents for identifiers to map to URLs.""" 

104 record_backlinks: bool = False 

105 """Whether to record backlinks.""" 

106 current_page: Page | None = None 

107 """The current page being processed.""" 

108 # YORE: Bump 2: Remove block. 

109 legacy_refs: bool = True 

110 """Whether to support legacy references.""" 

111 

112 def __init__(self) -> None: 

113 """Initialize the object.""" 

114 super().__init__() 

115 

116 # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs, 

117 # and one for "absolute" URLs. 

118 # 

119 # - A primary URL is an identifier that links to a specific anchor on a page. 

120 # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL. 

121 # Primary URLs with these aliases as identifiers may or may not be rendered later. 

122 # - An absolute URL is an identifier that links to an external resource. 

123 # These URLs are typically registered by mkdocstrings when loading object inventories. 

124 # 

125 # For example, mkdocstrings registers a primary URL for each heading rendered in a page. 

126 # Then, for each alias of this heading's identifier, it registers a secondary URL. 

127 # 

128 # We need to keep track of whether an identifier is primary or secondary, 

129 # to give it precedence when resolving cross-references. 

130 # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs, 

131 # instead we want to use the primary URL without any warning. 

132 # 

133 # - A single primary URL mapped to an identifer? Use it. 

134 # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such. 

135 # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it. 

136 # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such. 

137 # - No secondary URL mapped to an identifier? Try using absolute URLs 

138 # (typically registered by loading inventories in mkdocstrings). 

139 # 

140 # This logic unfolds in `_get_item_url`. 

141 self._primary_url_map: dict[str, list[str]] = {} 

142 self._secondary_url_map: dict[str, list[str]] = {} 

143 self._title_map: dict[str, str] = {} 

144 self._backlink_page_map: dict[str, Page] = {} 

145 self._abs_url_map: dict[str, str] = {} 

146 self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set)) 

147 # YORE: Bump 2: Remove line. 

148 self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None 

149 # YORE: Bump 2: Remove line. 

150 self._url_to_page: dict[str, Page] = {} 

151 

152 self._link_titles: bool | Literal["external"] = True 

153 self._strip_title_tags: bool = False 

154 

155 # ----------------------------------------------------------------------- # 

156 # MkDocs Hooks # 

157 # ----------------------------------------------------------------------- # 

158 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: 

159 """Instantiate our Markdown extension. 

160 

161 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config). 

162 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.AutorefsExtension] 

163 and add it to the list of Markdown extensions used by `mkdocs`. 

164 

165 Arguments: 

166 config: The MkDocs config object. 

167 

168 Returns: 

169 The modified config. 

170 """ 

171 _log.debug("Adding AutorefsExtension to the list") 

172 config.markdown_extensions.append(AutorefsExtension(self)) # type: ignore[arg-type] 

173 

174 # YORE: Bump 2: Remove block. 

175 # mkdocstrings still uses the `page` attribute as a string. 

176 # Fortunately, it does so in f-strings, so we can simply patch the `__str__` method 

177 # to render the URL. 

178 Page.__str__ = lambda page: page.url # type: ignore[method-assign,attr-defined] 

179 

180 if self.config.link_titles == "auto": 

181 if getattr(config.theme, "name", None) == "material" and "navigation.instant.preview" in config.theme.get( 

182 "features", 

183 (), 

184 ): 

185 self._link_titles = "external" 

186 else: 

187 self._link_titles = True 

188 else: 

189 self._link_titles = self.config.link_titles 

190 

191 if self.config.strip_title_tags == "auto": 

192 if getattr(config.theme, "name", None) == "material" and "content.tooltips" in config.theme.get( 

193 "features", 

194 (), 

195 ): 

196 self._strip_title_tags = False 

197 else: 

198 self._strip_title_tags = True 

199 else: 

200 self._strip_title_tags = self.config.strip_title_tags 

201 

202 return config 

203 

204 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

205 """Remember which page is the current one. 

206 

207 Arguments: 

208 markdown: Input Markdown. 

209 page: The related MkDocs page instance. 

210 kwargs: Additional arguments passed by MkDocs. 

211 

212 Returns: 

213 The same Markdown. We only use this hook to keep a reference to the current page URL, 

214 used during Markdown conversion by the anchor scanner tree processor. 

215 """ 

216 # YORE: Bump 2: Remove line. 

217 self._url_to_page[page.url] = page 

218 self.current_page = page 

219 return markdown 

220 

221 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

222 """Map anchors to URLs. 

223 

224 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content). 

225 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs. 

226 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or 

227 `[identifier][]`. 

228 

229 Arguments: 

230 html: HTML converted from Markdown. 

231 page: The related MkDocs page instance. 

232 kwargs: Additional arguments passed by MkDocs. 

233 

234 Returns: 

235 The same HTML. We only use this hook to map anchors to URLs. 

236 """ 

237 self.current_page = page 

238 # Collect `std`-domain URLs. 

239 if self.scan_toc: 

240 _log.debug("Mapping identifiers to URLs for page %s", page.file.src_path) 

241 for item in page.toc.items: 

242 self.map_urls(page, item) 

243 return html 

244 

245 @event_priority(-50) # Late, after mkdocstrings has finished loading inventories. 

246 def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment: # noqa: ARG002 

247 """Apply cross-references and collect backlinks. 

248 

249 Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env). 

250 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`. 

251 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings. 

252 It uses the native Markdown syntax so it's easy to remember and use. 

253 

254 We log a warning for each reference that we couldn't map to an URL. 

255 

256 We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass 

257 for performance reasons (we don't want to run the regular expression on each page twice). 

258 

259 Arguments: 

260 env: The MkDocs environment. 

261 config: The MkDocs config object. 

262 files: The list of files in the MkDocs project. 

263 

264 Returns: 

265 The unmodified environment. 

266 """ 

267 for file in files: 

268 if file.page and file.page.content: 

269 _log.debug("Applying cross-refs in page %s", file.page.file.src_path) 

270 

271 # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line. 

272 url_mapper = functools.partial( 

273 self.get_item_url, 

274 from_url=file.page.url, 

275 fallback=self.get_fallback_anchor, 

276 ) 

277 backlink_recorder = ( 

278 functools.partial(self._record_backlink, page_url=file.page.url) if self.record_backlinks else None 

279 ) 

280 # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line. 

281 file.page.content, unmapped = fix_refs( 

282 file.page.content, 

283 url_mapper, 

284 record_backlink=backlink_recorder, 

285 link_titles=self._link_titles, 

286 strip_title_tags=self._strip_title_tags, 

287 _legacy_refs=self.legacy_refs, 

288 ) 

289 

290 if unmapped and _log.isEnabledFor(logging.WARNING): 

291 for ref, context in unmapped: 

292 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else "" 

293 _log.warning( 

294 f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'", 

295 ) 

296 

297 return env 

298 

299 # ----------------------------------------------------------------------- # 

300 # Utilities # 

301 # ----------------------------------------------------------------------- # 

302 def map_urls(self, page: Page, anchor: AnchorLink) -> None: 

303 """Recurse on every anchor to map its ID to its absolute URL. 

304 

305 This method populates `self._primary_url_map` by side-effect. 

306 

307 Arguments: 

308 page: The page containing the anchors. 

309 anchor: The anchor to process and to recurse on. 

310 """ 

311 # YORE: Bump 2: Remove block. 

312 if isinstance(page, str): 

313 try: 

314 page = self._url_to_page[page] 

315 except KeyError: 

316 page = self.current_page 

317 

318 self.register_anchor(page, anchor.id, title=anchor.title, primary=True) 

319 for child in anchor.children: 

320 self.map_urls(page, child) 

321 

322 def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None: 

323 """Record a backlink. 

324 

325 Arguments: 

326 identifier: The target identifier. 

327 backlink_type: The type of backlink. 

328 backlink_anchor: The backlink target anchor. 

329 page_url: The URL of the page containing the backlink. 

330 """ 

331 # When we record backlinks, all identifiers have been registered. 

332 # If an identifier is not found in the primary or secondary URL maps, it's an absolute URL, 

333 # meaning it comes from an external source (typically an object inventory), 

334 # and we don't need to record backlinks for it. 

335 if identifier in self._primary_url_map or identifier in self._secondary_url_map: 

336 self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}") 

337 

338 def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[Backlink]]: 

339 """Return the backlinks to an identifier relative to the given URL. 

340 

341 Arguments: 

342 *identifiers: The identifiers to get backlinks for. 

343 from_url: The URL of the page where backlinks are rendered. 

344 

345 Returns: 

346 A dictionary of backlinks, with the type of reference as key and a set of backlinks as value. 

347 Each backlink is a tuple of (URL, title) tuples forming navigation breadcrumbs. 

348 """ 

349 relative_backlinks: dict[str, set[Backlink]] = defaultdict(set) 

350 for identifier in set(identifiers): 

351 backlinks = self._backlinks.get(identifier, {}) 

352 for backlink_type, backlink_urls in backlinks.items(): 

353 for backlink_url in backlink_urls: 

354 relative_backlinks[backlink_type].add(self._crumbs(from_url, backlink_url)) 

355 return relative_backlinks 

356 

357 def _crumbs(self, from_url: str, backlink_url: str) -> Backlink: 

358 backlink_page: Page = self._backlink_page_map[backlink_url] 

359 backlink_title = self._title_map.get(backlink_url, "") 

360 crumbs: list[BacklinkCrumb] = [ 

361 BacklinkCrumb(backlink_title, relative_url(from_url, backlink_url)), 

362 BacklinkCrumb(backlink_page.title, relative_url(from_url, backlink_page.url + "#")), 

363 ] 

364 page: Page | Section = backlink_page 

365 while page.parent: 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 page = page.parent 

367 if url := getattr(page, "url", ""): 

368 url = relative_url(from_url, url + "#") 

369 crumbs.append(BacklinkCrumb(page.title, url)) 

370 return Backlink(tuple(reversed(crumbs))) 

371 

372 def register_anchor( 

373 self, 

374 page: Page, 

375 identifier: str, 

376 anchor: str | None = None, 

377 *, 

378 title: str | None = None, 

379 primary: bool = True, 

380 ) -> None: 

381 """Register that an anchor corresponding to an identifier was encountered when rendering the page. 

382 

383 Arguments: 

384 page: The page where the anchor was found. 

385 identifier: The identifier to register. 

386 anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier. 

387 title: The title of the anchor (optional). 

388 primary: Whether this anchor is the primary one for the identifier. 

389 """ 

390 # YORE: Bump 2: Remove block. 

391 if isinstance(page, str): 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true

392 try: 

393 page = self._url_to_page[page] 

394 except KeyError: 

395 page = self.current_page 

396 

397 url = f"{page.url}#{anchor or identifier}" 

398 url_map = self._primary_url_map if primary else self._secondary_url_map 

399 if identifier in url_map: 

400 if url not in url_map[identifier]: 400 ↛ 404line 400 didn't jump to line 404 because the condition on line 400 was always true

401 url_map[identifier].append(url) 

402 else: 

403 url_map[identifier] = [url] 

404 if title and url not in self._title_map: 

405 self._title_map[url] = title 

406 if self.record_backlinks and url not in self._backlink_page_map: 

407 self._backlink_page_map[url] = page 

408 

409 def register_url(self, identifier: str, url: str) -> None: 

410 """Register that the identifier should be turned into a link to this URL. 

411 

412 Arguments: 

413 identifier: The new identifier. 

414 url: The absolute URL (including anchor, if needed) where this item can be found. 

415 """ 

416 self._abs_url_map[identifier] = url 

417 

418 @staticmethod 

419 def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str: 

420 """Return the closest URL to the current page. 

421 

422 Arguments: 

423 from_url: The URL of the base page, from which we link towards the targeted pages. 

424 urls: A list of URLs to choose from. 

425 qualifier: The type of URLs we are choosing from. 

426 

427 Returns: 

428 The closest URL to the current page. 

429 """ 

430 base_url = URL(from_url) 

431 

432 while True: 

433 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]: 

434 break 

435 base_url = base_url.parent 

436 if not base_url.name: 

437 break 

438 

439 if not candidates: 

440 _log.warning( 

441 "Could not find closest %s URL (from %s, candidates: %s). " 

442 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

443 qualifier, 

444 from_url, 

445 urls, 

446 ) 

447 return urls[0] 

448 

449 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/")) 

450 _log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls) 

451 return winner 

452 

453 def _get_urls(self, identifier: str) -> tuple[list[str], str]: 

454 try: 

455 return self._primary_url_map[identifier], "primary" 

456 except KeyError: 

457 return self._secondary_url_map[identifier], "secondary" 

458 

459 def _get_item_url( 

460 self, 

461 identifier: str, 

462 from_url: str | None = None, 

463 # YORE: Bump 2: Remove line. 

464 fallback: Callable[[str], Sequence[str]] | None = None, 

465 ) -> str: 

466 try: 

467 urls, qualifier = self._get_urls(identifier) 

468 except KeyError: 

469 # YORE: Bump 2: Replace block with line 2. 

470 if identifier in self._abs_url_map: 

471 return self._abs_url_map[identifier] 

472 if fallback: 

473 new_identifiers = fallback(identifier) 

474 for new_identifier in new_identifiers: 

475 with contextlib.suppress(KeyError): 

476 url = self._get_item_url(new_identifier) 

477 self._secondary_url_map[identifier] = [url] 

478 return url 

479 raise 

480 

481 if len(urls) > 1: 

482 if (self.config.resolve_closest or qualifier == "secondary") and from_url is not None: 

483 return self._get_closest_url(from_url, urls, qualifier) 

484 _log.warning( 

485 "Multiple %s URLs found for '%s': %s. " 

486 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

487 qualifier, 

488 identifier, 

489 urls, 

490 ) 

491 return urls[0] 

492 

493 def get_item_url( 

494 self, 

495 identifier: str, 

496 from_url: str | None = None, 

497 # YORE: Bump 2: Remove line. 

498 fallback: Callable[[str], Sequence[str]] | None = None, 

499 ) -> tuple[str, str | None]: 

500 """Return a site-relative URL with anchor to the identifier, if it's present anywhere. 

501 

502 Arguments: 

503 identifier: The anchor (without '#'). 

504 from_url: The URL of the base page, from which we link towards the targeted pages. 

505 

506 Returns: 

507 A site-relative URL. 

508 """ 

509 # YORE: Bump 2: Replace `, fallback` with `` within line. 

510 url = self._get_item_url(identifier, from_url, fallback) 

511 title = self._title_map.get(url) or None 

512 if from_url is not None: 

513 parsed = urlsplit(url) 

514 if not parsed.scheme and not parsed.netloc: 

515 url = relative_url(from_url, url) 

516 return url, title 

517 

518 # YORE: Bump 2: Remove block. 

519 # ----------------------------------------------------------------------- # 

520 # Deprecated API # 

521 # ----------------------------------------------------------------------- # 

522 @property 

523 def get_fallback_anchor(self) -> Callable[[str], tuple[str, ...]] | None: 

524 """Fallback anchors getter.""" 

525 return self._get_fallback_anchor 

526 

527 # YORE: Bump 2: Remove block. 

528 @get_fallback_anchor.setter 

529 def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) -> None: 

530 """Fallback anchors setter.""" 

531 self._get_fallback_anchor = value 

532 if value is not None: 

533 warn( 

534 "Setting a fallback anchor function is deprecated and will be removed in a future release.", 

535 DeprecationWarning, 

536 stacklevel=2, 

537 )