Coverage for src/mkdocs_autorefs/_internal/plugin.py: 74.90%

1# This module contains the "mkdocs-autorefs" plugin.

3# After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors

4# it finds to later be able to fix unresolved references.

6# Once every page has been rendered and all identifiers and their URLs collected,

7# the plugin fixes unresolved references in the HTML content of the pages.

9from __future__ import annotations

11import contextlib

12import functools

13import logging

14from collections import defaultdict

15from pathlib import PurePosixPath as URL # noqa: N814

16from typing import TYPE_CHECKING, Any, Callable, Literal

17from urllib.parse import urlsplit

18from warnings import warn

20from mkdocs.config.base import Config

21from mkdocs.config.config_options import Choice, Type

22from mkdocs.plugins import BasePlugin, event_priority

23from mkdocs.structure.pages import Page

25from mkdocs_autorefs._internal.backlinks import Backlink, BacklinkCrumb

26from mkdocs_autorefs._internal.references import AutorefsExtension, fix_refs, relative_url

28if TYPE_CHECKING:

29 from collections.abc import Sequence

31 from jinja2.environment import Environment

32 from mkdocs.config.defaults import MkDocsConfig

33 from mkdocs.structure.files import Files

34 from mkdocs.structure.nav import Section

35 from mkdocs.structure.toc import AnchorLink

37try:

38 from mkdocs.plugins import get_plugin_logger

40 _log = get_plugin_logger(__name__)

41except ImportError:

42 # TODO: Remove once support for MkDocs <1.5 is dropped.

43 _log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]

46class AutorefsConfig(Config):

47 """Configuration options for the `autorefs` plugin."""

49 resolve_closest: bool = Type(bool, default=False) # type: ignore[assignment]

50 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier.

52 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page".

54 For example, if you link to identifier `hello` from page `foo/bar/`,

55 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages,

56 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`.

58 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs.

59 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found.

61 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL.

62 """

64 link_titles: bool | Literal["auto", "external"] = Choice((True, False, "auto", "external"), default="auto") # type: ignore[assignment]

65 """Whether to set titles on links.

67 Such title attributes are displayed as tooltips when hovering over the links.

69 - `"auto"`: autorefs will detect whether the instant preview feature of Material for MkDocs is enabled,

70 and set titles on external links when it is, all links if it is not.

71 - `"external"`: autorefs will set titles on external links only.

72 - `True`: autorefs will set titles on all links.

73 - `False`: autorefs will not set any title attributes on links.

75 Titles are only set when they are different from the link's text.

76 Titles are constructed from the linked heading's original title,

77 optionally appending the identifier for API objects.

78 """

80 strip_title_tags: bool | Literal["auto"] = Choice((True, False, "auto"), default="auto") # type: ignore[assignment]

81 """Whether to strip HTML tags from link titles.

83 Some themes support HTML in link titles, but others do not.

85 - `"auto"`: strip tags unless the Material for MkDocs theme is detected.

86 """

89class AutorefsPlugin(BasePlugin[AutorefsConfig]):

90 """The `autorefs` plugin for `mkdocs`.

92 This plugin defines the following event hooks:

94 - `on_config`, to configure itself

95 - `on_page_markdown`, to set the current page in order for Markdown extension to use it

96 - `on_env`, to apply cross-references once all pages have been rendered

98 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs`

99 for more information about its plugin system.

100 """

101

102 scan_toc: bool = True

103 """Whether to scan the table of contents for identifiers to map to URLs."""

104 record_backlinks: bool = False

105 """Whether to record backlinks."""

106 current_page: Page | None = None

107 """The current page being processed."""

108 # YORE: Bump 2: Remove block.

109 legacy_refs: bool = True

110 """Whether to support legacy references."""

111

112 def __init__(self) -> None:

113 """Initialize the object."""

114 super().__init__()

115

116 # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,

117 # and one for "absolute" URLs.

118 #

119 # - A primary URL is an identifier that links to a specific anchor on a page.

120 # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.

121 # Primary URLs with these aliases as identifiers may or may not be rendered later.

122 # - An absolute URL is an identifier that links to an external resource.

123 # These URLs are typically registered by mkdocstrings when loading object inventories.

124 #

125 # For example, mkdocstrings registers a primary URL for each heading rendered in a page.

126 # Then, for each alias of this heading's identifier, it registers a secondary URL.

127 #

128 # We need to keep track of whether an identifier is primary or secondary,

129 # to give it precedence when resolving cross-references.

130 # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,

131 # instead we want to use the primary URL without any warning.

132 #

133 # - A single primary URL mapped to an identifer? Use it.

134 # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.

135 # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.

136 # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.

137 # - No secondary URL mapped to an identifier? Try using absolute URLs

138 # (typically registered by loading inventories in mkdocstrings).

139 #

140 # This logic unfolds in `_get_item_url`.

141 self._primary_url_map: dict[str, list[str]] = {}

142 self._secondary_url_map: dict[str, list[str]] = {}

143 self._title_map: dict[str, str] = {}

144 self._backlink_page_map: dict[str, Page] = {}

145 self._abs_url_map: dict[str, str] = {}

146 self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))

147 # YORE: Bump 2: Remove line.

148 self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None

149 # YORE: Bump 2: Remove line.

150 self._url_to_page: dict[str, Page] = {}

151

152 self._link_titles: bool | Literal["external"] = True

153 self._strip_title_tags: bool = False

154

155 # ----------------------------------------------------------------------- #

156 # MkDocs Hooks #

157 # ----------------------------------------------------------------------- #

158 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:

159 """Instantiate our Markdown extension.

160

161 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config).

162 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.AutorefsExtension]

163 and add it to the list of Markdown extensions used by `mkdocs`.

164

165 Arguments:

166 config: The MkDocs config object.

167

168 Returns:

169 The modified config.

170 """

171 _log.debug("Adding AutorefsExtension to the list")

172 config.markdown_extensions.append(AutorefsExtension(self)) # type: ignore[arg-type]

173

174 # YORE: Bump 2: Remove block.

175 # mkdocstrings still uses the `page` attribute as a string.

176 # Fortunately, it does so in f-strings, so we can simply patch the `__str__` method

177 # to render the URL.

178 Page.__str__ = lambda page: page.url # type: ignore[method-assign,attr-defined]

179

180 if self.config.link_titles == "auto":

181 if getattr(config.theme, "name", None) == "material" and "navigation.instant.preview" in config.theme.get(

182 "features",

183 (),

184 ):

185 self._link_titles = "external"

186 else:

187 self._link_titles = True

188 else:

189 self._link_titles = self.config.link_titles

190

191 if self.config.strip_title_tags == "auto":

192 if getattr(config.theme, "name", None) == "material" and "content.tooltips" in config.theme.get(

193 "features",

194 (),

195 ):

196 self._strip_title_tags = False

197 else:

198 self._strip_title_tags = True

199 else:

200 self._strip_title_tags = self.config.strip_title_tags

201

202 return config

203

204 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002

205 """Remember which page is the current one.

206

207 Arguments:

208 markdown: Input Markdown.

209 page: The related MkDocs page instance.

210 kwargs: Additional arguments passed by MkDocs.

211

212 Returns:

213 The same Markdown. We only use this hook to keep a reference to the current page URL,

214 used during Markdown conversion by the anchor scanner tree processor.

215 """

216 # YORE: Bump 2: Remove line.

217 self._url_to_page[page.url] = page

218 self.current_page = page

219 return markdown

220

221 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002

222 """Map anchors to URLs.

223

224 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content).

225 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs.

226 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or

227 `[identifier][]`.

228

229 Arguments:

230 html: HTML converted from Markdown.

231 page: The related MkDocs page instance.

232 kwargs: Additional arguments passed by MkDocs.

233

234 Returns:

235 The same HTML. We only use this hook to map anchors to URLs.

236 """

237 self.current_page = page

238 # Collect `std`-domain URLs.

239 if self.scan_toc:

240 _log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)

241 for item in page.toc.items:

242 self.map_urls(page, item)

243 return html

244

245 @event_priority(-50) # Late, after mkdocstrings has finished loading inventories.

246 def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment: # noqa: ARG002

247 """Apply cross-references and collect backlinks.

248

249 Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).

250 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.

251 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.

252 It uses the native Markdown syntax so it's easy to remember and use.

253

254 We log a warning for each reference that we couldn't map to an URL.

255

256 We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass

257 for performance reasons (we don't want to run the regular expression on each page twice).

258

259 Arguments:

260 env: The MkDocs environment.

261 config: The MkDocs config object.

262 files: The list of files in the MkDocs project.

263

264 Returns:

265 The unmodified environment.

266 """

267 for file in files:

268 if file.page and file.page.content:

269 _log.debug("Applying cross-refs in page %s", file.page.file.src_path)

270

271 # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.

272 url_mapper = functools.partial(

273 self.get_item_url,

274 from_url=file.page.url,

275 fallback=self.get_fallback_anchor,

276 )

277 backlink_recorder = (

278 functools.partial(self._record_backlink, page_url=file.page.url) if self.record_backlinks else None

279 )

280 # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.

281 file.page.content, unmapped = fix_refs(

282 file.page.content,

283 url_mapper,

284 record_backlink=backlink_recorder,

285 link_titles=self._link_titles,

286 strip_title_tags=self._strip_title_tags,

287 _legacy_refs=self.legacy_refs,

288 )

289

290 if unmapped and _log.isEnabledFor(logging.WARNING):

291 for ref, context in unmapped:

292 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""

293 _log.warning(

294 f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'",

295 )

296

297 return env

298

299 # ----------------------------------------------------------------------- #

300 # Utilities #

301 # ----------------------------------------------------------------------- #

302 def map_urls(self, page: Page, anchor: AnchorLink) -> None:

303 """Recurse on every anchor to map its ID to its absolute URL.

304

305 This method populates `self._primary_url_map` by side-effect.

306

307 Arguments:

308 page: The page containing the anchors.

309 anchor: The anchor to process and to recurse on.

310 """

311 # YORE: Bump 2: Remove block.

312 if isinstance(page, str):

313 try:

314 page = self._url_to_page[page]

315 except KeyError:

316 page = self.current_page

317

318 self.register_anchor(page, anchor.id, title=anchor.title, primary=True)

319 for child in anchor.children:

320 self.map_urls(page, child)

321

322 def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:

323 """Record a backlink.

324

325 Arguments:

326 identifier: The target identifier.

327 backlink_type: The type of backlink.

328 backlink_anchor: The backlink target anchor.

329 page_url: The URL of the page containing the backlink.

330 """

331 # When we record backlinks, all identifiers have been registered.

332 # If an identifier is not found in the primary or secondary URL maps, it's an absolute URL,

333 # meaning it comes from an external source (typically an object inventory),

334 # and we don't need to record backlinks for it.

335 if identifier in self._primary_url_map or identifier in self._secondary_url_map:

336 self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")

337

338 def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[Backlink]]:

339 """Return the backlinks to an identifier relative to the given URL.

340

341 Arguments:

342 *identifiers: The identifiers to get backlinks for.

343 from_url: The URL of the page where backlinks are rendered.

344

345 Returns:

346 A dictionary of backlinks, with the type of reference as key and a set of backlinks as value.

347 Each backlink is a tuple of (URL, title) tuples forming navigation breadcrumbs.

348 """

349 relative_backlinks: dict[str, set[Backlink]] = defaultdict(set)

350 for identifier in set(identifiers):

351 backlinks = self._backlinks.get(identifier, {})

352 for backlink_type, backlink_urls in backlinks.items():

353 for backlink_url in backlink_urls:

354 relative_backlinks[backlink_type].add(self._crumbs(from_url, backlink_url))

355 return relative_backlinks

356

357 def _crumbs(self, from_url: str, backlink_url: str) -> Backlink:

358 backlink_page: Page = self._backlink_page_map[backlink_url]

359 backlink_title = self._title_map.get(backlink_url, "")

360 crumbs: list[BacklinkCrumb] = [

361 BacklinkCrumb(backlink_title, relative_url(from_url, backlink_url)),

362 BacklinkCrumb(backlink_page.title, relative_url(from_url, backlink_page.url + "#")),

363 ]

364 page: Page | Section = backlink_page

365 while page.parent: 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 page = page.parent

367 if url := getattr(page, "url", ""):

368 url = relative_url(from_url, url + "#")

369 crumbs.append(BacklinkCrumb(page.title, url))

370 return Backlink(tuple(reversed(crumbs)))

371

372 def register_anchor(

373 self,

374 page: Page,

375 identifier: str,

376 anchor: str | None = None,

377 *,

378 title: str | None = None,

379 primary: bool = True,

380 ) -> None:

381 """Register that an anchor corresponding to an identifier was encountered when rendering the page.

382

383 Arguments:

384 page: The page where the anchor was found.

385 identifier: The identifier to register.

386 anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.

387 title: The title of the anchor (optional).

388 primary: Whether this anchor is the primary one for the identifier.

389 """

390 # YORE: Bump 2: Remove block.

391 if isinstance(page, str): 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true

392 try:

393 page = self._url_to_page[page]

394 except KeyError:

395 page = self.current_page

396

397 url = f"{page.url}#{anchor or identifier}"

398 url_map = self._primary_url_map if primary else self._secondary_url_map

399 if identifier in url_map:

400 if url not in url_map[identifier]: 400 ↛ 404line 400 didn't jump to line 404 because the condition on line 400 was always true

401 url_map[identifier].append(url)

402 else:

403 url_map[identifier] = [url]

404 if title and url not in self._title_map:

405 self._title_map[url] = title

406 if self.record_backlinks and url not in self._backlink_page_map:

407 self._backlink_page_map[url] = page

408

409 def register_url(self, identifier: str, url: str) -> None:

410 """Register that the identifier should be turned into a link to this URL.

411

412 Arguments:

413 identifier: The new identifier.

414 url: The absolute URL (including anchor, if needed) where this item can be found.

415 """

416 self._abs_url_map[identifier] = url

417

418 @staticmethod

419 def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:

420 """Return the closest URL to the current page.

421

422 Arguments:

423 from_url: The URL of the base page, from which we link towards the targeted pages.

424 urls: A list of URLs to choose from.

425 qualifier: The type of URLs we are choosing from.

426

427 Returns:

428 The closest URL to the current page.

429 """

430 base_url = URL(from_url)

431

432 while True:

433 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]:

434 break

435 base_url = base_url.parent

436 if not base_url.name:

437 break

438

439 if not candidates:

440 _log.warning(

441 "Could not find closest %s URL (from %s, candidates: %s). "

442 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",

443 qualifier,

444 from_url,

445 urls,

446 )

447 return urls[0]

448

449 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/"))

450 _log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)

451 return winner

452

453 def _get_urls(self, identifier: str) -> tuple[list[str], str]:

454 try:

455 return self._primary_url_map[identifier], "primary"

456 except KeyError:

457 return self._secondary_url_map[identifier], "secondary"

458

459 def _get_item_url(

460 self,

461 identifier: str,

462 from_url: str | None = None,

463 # YORE: Bump 2: Remove line.

464 fallback: Callable[[str], Sequence[str]] | None = None,

465 ) -> str:

466 try:

467 urls, qualifier = self._get_urls(identifier)

468 except KeyError:

469 # YORE: Bump 2: Replace block with line 2.

470 if identifier in self._abs_url_map:

471 return self._abs_url_map[identifier]

472 if fallback:

473 new_identifiers = fallback(identifier)

474 for new_identifier in new_identifiers:

475 with contextlib.suppress(KeyError):

476 url = self._get_item_url(new_identifier)

477 self._secondary_url_map[identifier] = [url]

478 return url

479 raise

480

481 if len(urls) > 1:

482 if (self.config.resolve_closest or qualifier == "secondary") and from_url is not None:

483 return self._get_closest_url(from_url, urls, qualifier)

484 _log.warning(

485 "Multiple %s URLs found for '%s': %s. "

486 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",

487 qualifier,

488 identifier,

489 urls,

490 )

491 return urls[0]

492

493 def get_item_url(

494 self,

495 identifier: str,

496 from_url: str | None = None,

497 # YORE: Bump 2: Remove line.

498 fallback: Callable[[str], Sequence[str]] | None = None,

499 ) -> tuple[str, str | None]:

500 """Return a site-relative URL with anchor to the identifier, if it's present anywhere.

501

502 Arguments:

503 identifier: The anchor (without '#').

504 from_url: The URL of the base page, from which we link towards the targeted pages.

505

506 Returns:

507 A site-relative URL.

508 """

509 # YORE: Bump 2: Replace `, fallback` with `` within line.

510 url = self._get_item_url(identifier, from_url, fallback)

511 title = self._title_map.get(url) or None

512 if from_url is not None:

513 parsed = urlsplit(url)

514 if not parsed.scheme and not parsed.netloc:

515 url = relative_url(from_url, url)

516 return url, title

517

518 # YORE: Bump 2: Remove block.

519 # ----------------------------------------------------------------------- #

520 # Deprecated API #

521 # ----------------------------------------------------------------------- #

522 @property

523 def get_fallback_anchor(self) -> Callable[[str], tuple[str, ...]] | None:

524 """Fallback anchors getter."""

525 return self._get_fallback_anchor

526

527 # YORE: Bump 2: Remove block.

528 @get_fallback_anchor.setter

529 def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) -> None:

530 """Fallback anchors setter."""

531 self._get_fallback_anchor = value

532 if value is not None:

533 warn(

534 "Setting a fallback anchor function is deprecated and will be removed in a future release.",

535 DeprecationWarning,

536 stacklevel=2,

537 )