Coverage for src/mkdocs_autorefs/_internal/plugin.py: 74.90%
191 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-24 16:00 +0100
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-24 16:00 +0100
1# This module contains the "mkdocs-autorefs" plugin.
2#
3# After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors
4# it finds to later be able to fix unresolved references.
5#
6# Once every page has been rendered and all identifiers and their URLs collected,
7# the plugin fixes unresolved references in the HTML content of the pages.
9from __future__ import annotations
11import contextlib
12import functools
13import logging
14from collections import defaultdict
15from pathlib import PurePosixPath as URL # noqa: N814
16from typing import TYPE_CHECKING, Any, Callable, Literal
17from urllib.parse import urlsplit
18from warnings import warn
20from mkdocs.config.base import Config
21from mkdocs.config.config_options import Choice, Type
22from mkdocs.plugins import BasePlugin, event_priority
23from mkdocs.structure.pages import Page
25from mkdocs_autorefs._internal.backlinks import Backlink, BacklinkCrumb
26from mkdocs_autorefs._internal.references import AutorefsExtension, fix_refs, relative_url
28if TYPE_CHECKING:
29 from collections.abc import Sequence
31 from jinja2.environment import Environment
32 from mkdocs.config.defaults import MkDocsConfig
33 from mkdocs.structure.files import Files
34 from mkdocs.structure.nav import Section
35 from mkdocs.structure.toc import AnchorLink
37try:
38 from mkdocs.plugins import get_plugin_logger
40 _log = get_plugin_logger(__name__)
41except ImportError:
42 # TODO: Remove once support for MkDocs <1.5 is dropped.
43 _log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
46class AutorefsConfig(Config):
47 """Configuration options for the `autorefs` plugin."""
49 resolve_closest: bool = Type(bool, default=False) # type: ignore[assignment]
50 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier.
52 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page".
54 For example, if you link to identifier `hello` from page `foo/bar/`,
55 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages,
56 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`.
58 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs.
59 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found.
61 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL.
62 """
64 link_titles: bool | Literal["auto", "external"] = Choice((True, False, "auto", "external"), default="auto") # type: ignore[assignment]
65 """Whether to set titles on links.
67 Such title attributes are displayed as tooltips when hovering over the links.
69 - `"auto"`: autorefs will detect whether the instant preview feature of Material for MkDocs is enabled,
70 and set titles on external links when it is, all links if it is not.
71 - `"external"`: autorefs will set titles on external links only.
72 - `True`: autorefs will set titles on all links.
73 - `False`: autorefs will not set any title attributes on links.
75 Titles are only set when they are different from the link's text.
76 Titles are constructed from the linked heading's original title,
77 optionally appending the identifier for API objects.
78 """
80 strip_title_tags: bool | Literal["auto"] = Choice((True, False, "auto"), default="auto") # type: ignore[assignment]
81 """Whether to strip HTML tags from link titles.
83 Some themes support HTML in link titles, but others do not.
85 - `"auto"`: strip tags unless the Material for MkDocs theme is detected.
86 """
89class AutorefsPlugin(BasePlugin[AutorefsConfig]):
90 """The `autorefs` plugin for `mkdocs`.
92 This plugin defines the following event hooks:
94 - `on_config`, to configure itself
95 - `on_page_markdown`, to set the current page in order for Markdown extension to use it
96 - `on_env`, to apply cross-references once all pages have been rendered
98 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs`
99 for more information about its plugin system.
100 """
102 scan_toc: bool = True
103 """Whether to scan the table of contents for identifiers to map to URLs."""
104 record_backlinks: bool = False
105 """Whether to record backlinks."""
106 current_page: Page | None = None
107 """The current page being processed."""
108 # YORE: Bump 2: Remove block.
109 legacy_refs: bool = True
110 """Whether to support legacy references."""
112 def __init__(self) -> None:
113 """Initialize the object."""
114 super().__init__()
116 # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,
117 # and one for "absolute" URLs.
118 #
119 # - A primary URL is an identifier that links to a specific anchor on a page.
120 # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.
121 # Primary URLs with these aliases as identifiers may or may not be rendered later.
122 # - An absolute URL is an identifier that links to an external resource.
123 # These URLs are typically registered by mkdocstrings when loading object inventories.
124 #
125 # For example, mkdocstrings registers a primary URL for each heading rendered in a page.
126 # Then, for each alias of this heading's identifier, it registers a secondary URL.
127 #
128 # We need to keep track of whether an identifier is primary or secondary,
129 # to give it precedence when resolving cross-references.
130 # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,
131 # instead we want to use the primary URL without any warning.
132 #
133 # - A single primary URL mapped to an identifer? Use it.
134 # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
135 # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.
136 # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
137 # - No secondary URL mapped to an identifier? Try using absolute URLs
138 # (typically registered by loading inventories in mkdocstrings).
139 #
140 # This logic unfolds in `_get_item_url`.
141 self._primary_url_map: dict[str, list[str]] = {}
142 self._secondary_url_map: dict[str, list[str]] = {}
143 self._title_map: dict[str, str] = {}
144 self._backlink_page_map: dict[str, Page] = {}
145 self._abs_url_map: dict[str, str] = {}
146 self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
147 # YORE: Bump 2: Remove line.
148 self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
149 # YORE: Bump 2: Remove line.
150 self._url_to_page: dict[str, Page] = {}
152 self._link_titles: bool | Literal["external"] = True
153 self._strip_title_tags: bool = False
155 # ----------------------------------------------------------------------- #
156 # MkDocs Hooks #
157 # ----------------------------------------------------------------------- #
158 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
159 """Instantiate our Markdown extension.
161 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config).
162 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.AutorefsExtension]
163 and add it to the list of Markdown extensions used by `mkdocs`.
165 Arguments:
166 config: The MkDocs config object.
168 Returns:
169 The modified config.
170 """
171 _log.debug("Adding AutorefsExtension to the list")
172 config.markdown_extensions.append(AutorefsExtension(self)) # type: ignore[arg-type]
174 # YORE: Bump 2: Remove block.
175 # mkdocstrings still uses the `page` attribute as a string.
176 # Fortunately, it does so in f-strings, so we can simply patch the `__str__` method
177 # to render the URL.
178 Page.__str__ = lambda page: page.url # type: ignore[method-assign,attr-defined]
180 if self.config.link_titles == "auto":
181 if getattr(config.theme, "name", None) == "material" and "navigation.instant.preview" in config.theme.get(
182 "features",
183 (),
184 ):
185 self._link_titles = "external"
186 else:
187 self._link_titles = True
188 else:
189 self._link_titles = self.config.link_titles
191 if self.config.strip_title_tags == "auto":
192 if getattr(config.theme, "name", None) == "material" and "content.tooltips" in config.theme.get(
193 "features",
194 (),
195 ):
196 self._strip_title_tags = False
197 else:
198 self._strip_title_tags = True
199 else:
200 self._strip_title_tags = self.config.strip_title_tags
202 return config
204 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
205 """Remember which page is the current one.
207 Arguments:
208 markdown: Input Markdown.
209 page: The related MkDocs page instance.
210 kwargs: Additional arguments passed by MkDocs.
212 Returns:
213 The same Markdown. We only use this hook to keep a reference to the current page URL,
214 used during Markdown conversion by the anchor scanner tree processor.
215 """
216 # YORE: Bump 2: Remove line.
217 self._url_to_page[page.url] = page
218 self.current_page = page
219 return markdown
221 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
222 """Map anchors to URLs.
224 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
225 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs.
226 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or
227 `[identifier][]`.
229 Arguments:
230 html: HTML converted from Markdown.
231 page: The related MkDocs page instance.
232 kwargs: Additional arguments passed by MkDocs.
234 Returns:
235 The same HTML. We only use this hook to map anchors to URLs.
236 """
237 self.current_page = page
238 # Collect `std`-domain URLs.
239 if self.scan_toc:
240 _log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
241 for item in page.toc.items:
242 self.map_urls(page, item)
243 return html
245 @event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
246 def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment: # noqa: ARG002
247 """Apply cross-references and collect backlinks.
249 Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
250 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
251 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
252 It uses the native Markdown syntax so it's easy to remember and use.
254 We log a warning for each reference that we couldn't map to an URL.
256 We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
257 for performance reasons (we don't want to run the regular expression on each page twice).
259 Arguments:
260 env: The MkDocs environment.
261 config: The MkDocs config object.
262 files: The list of files in the MkDocs project.
264 Returns:
265 The unmodified environment.
266 """
267 for file in files:
268 if file.page and file.page.content:
269 _log.debug("Applying cross-refs in page %s", file.page.file.src_path)
271 # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
272 url_mapper = functools.partial(
273 self.get_item_url,
274 from_url=file.page.url,
275 fallback=self.get_fallback_anchor,
276 )
277 backlink_recorder = (
278 functools.partial(self._record_backlink, page_url=file.page.url) if self.record_backlinks else None
279 )
280 # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
281 file.page.content, unmapped = fix_refs(
282 file.page.content,
283 url_mapper,
284 record_backlink=backlink_recorder,
285 link_titles=self._link_titles,
286 strip_title_tags=self._strip_title_tags,
287 _legacy_refs=self.legacy_refs,
288 )
290 if unmapped and _log.isEnabledFor(logging.WARNING):
291 for ref, context in unmapped:
292 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
293 _log.warning(
294 f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'",
295 )
297 return env
299 # ----------------------------------------------------------------------- #
300 # Utilities #
301 # ----------------------------------------------------------------------- #
302 def map_urls(self, page: Page, anchor: AnchorLink) -> None:
303 """Recurse on every anchor to map its ID to its absolute URL.
305 This method populates `self._primary_url_map` by side-effect.
307 Arguments:
308 page: The page containing the anchors.
309 anchor: The anchor to process and to recurse on.
310 """
311 # YORE: Bump 2: Remove block.
312 if isinstance(page, str):
313 try:
314 page = self._url_to_page[page]
315 except KeyError:
316 page = self.current_page
318 self.register_anchor(page, anchor.id, title=anchor.title, primary=True)
319 for child in anchor.children:
320 self.map_urls(page, child)
322 def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
323 """Record a backlink.
325 Arguments:
326 identifier: The target identifier.
327 backlink_type: The type of backlink.
328 backlink_anchor: The backlink target anchor.
329 page_url: The URL of the page containing the backlink.
330 """
331 # When we record backlinks, all identifiers have been registered.
332 # If an identifier is not found in the primary or secondary URL maps, it's an absolute URL,
333 # meaning it comes from an external source (typically an object inventory),
334 # and we don't need to record backlinks for it.
335 if identifier in self._primary_url_map or identifier in self._secondary_url_map:
336 self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")
338 def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[Backlink]]:
339 """Return the backlinks to an identifier relative to the given URL.
341 Arguments:
342 *identifiers: The identifiers to get backlinks for.
343 from_url: The URL of the page where backlinks are rendered.
345 Returns:
346 A dictionary of backlinks, with the type of reference as key and a set of backlinks as value.
347 Each backlink is a tuple of (URL, title) tuples forming navigation breadcrumbs.
348 """
349 relative_backlinks: dict[str, set[Backlink]] = defaultdict(set)
350 for identifier in set(identifiers):
351 backlinks = self._backlinks.get(identifier, {})
352 for backlink_type, backlink_urls in backlinks.items():
353 for backlink_url in backlink_urls:
354 relative_backlinks[backlink_type].add(self._crumbs(from_url, backlink_url))
355 return relative_backlinks
357 def _crumbs(self, from_url: str, backlink_url: str) -> Backlink:
358 backlink_page: Page = self._backlink_page_map[backlink_url]
359 backlink_title = self._title_map.get(backlink_url, "")
360 crumbs: list[BacklinkCrumb] = [
361 BacklinkCrumb(backlink_title, relative_url(from_url, backlink_url)),
362 BacklinkCrumb(backlink_page.title, relative_url(from_url, backlink_page.url + "#")),
363 ]
364 page: Page | Section = backlink_page
365 while page.parent: 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true
366 page = page.parent
367 if url := getattr(page, "url", ""):
368 url = relative_url(from_url, url + "#")
369 crumbs.append(BacklinkCrumb(page.title, url))
370 return Backlink(tuple(reversed(crumbs)))
372 def register_anchor(
373 self,
374 page: Page,
375 identifier: str,
376 anchor: str | None = None,
377 *,
378 title: str | None = None,
379 primary: bool = True,
380 ) -> None:
381 """Register that an anchor corresponding to an identifier was encountered when rendering the page.
383 Arguments:
384 page: The page where the anchor was found.
385 identifier: The identifier to register.
386 anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
387 title: The title of the anchor (optional).
388 primary: Whether this anchor is the primary one for the identifier.
389 """
390 # YORE: Bump 2: Remove block.
391 if isinstance(page, str): 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true
392 try:
393 page = self._url_to_page[page]
394 except KeyError:
395 page = self.current_page
397 url = f"{page.url}#{anchor or identifier}"
398 url_map = self._primary_url_map if primary else self._secondary_url_map
399 if identifier in url_map:
400 if url not in url_map[identifier]: 400 ↛ 404line 400 didn't jump to line 404 because the condition on line 400 was always true
401 url_map[identifier].append(url)
402 else:
403 url_map[identifier] = [url]
404 if title and url not in self._title_map:
405 self._title_map[url] = title
406 if self.record_backlinks and url not in self._backlink_page_map:
407 self._backlink_page_map[url] = page
409 def register_url(self, identifier: str, url: str) -> None:
410 """Register that the identifier should be turned into a link to this URL.
412 Arguments:
413 identifier: The new identifier.
414 url: The absolute URL (including anchor, if needed) where this item can be found.
415 """
416 self._abs_url_map[identifier] = url
418 @staticmethod
419 def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:
420 """Return the closest URL to the current page.
422 Arguments:
423 from_url: The URL of the base page, from which we link towards the targeted pages.
424 urls: A list of URLs to choose from.
425 qualifier: The type of URLs we are choosing from.
427 Returns:
428 The closest URL to the current page.
429 """
430 base_url = URL(from_url)
432 while True:
433 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]:
434 break
435 base_url = base_url.parent
436 if not base_url.name:
437 break
439 if not candidates:
440 _log.warning(
441 "Could not find closest %s URL (from %s, candidates: %s). "
442 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
443 qualifier,
444 from_url,
445 urls,
446 )
447 return urls[0]
449 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/"))
450 _log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
451 return winner
453 def _get_urls(self, identifier: str) -> tuple[list[str], str]:
454 try:
455 return self._primary_url_map[identifier], "primary"
456 except KeyError:
457 return self._secondary_url_map[identifier], "secondary"
459 def _get_item_url(
460 self,
461 identifier: str,
462 from_url: str | None = None,
463 # YORE: Bump 2: Remove line.
464 fallback: Callable[[str], Sequence[str]] | None = None,
465 ) -> str:
466 try:
467 urls, qualifier = self._get_urls(identifier)
468 except KeyError:
469 # YORE: Bump 2: Replace block with line 2.
470 if identifier in self._abs_url_map:
471 return self._abs_url_map[identifier]
472 if fallback:
473 new_identifiers = fallback(identifier)
474 for new_identifier in new_identifiers:
475 with contextlib.suppress(KeyError):
476 url = self._get_item_url(new_identifier)
477 self._secondary_url_map[identifier] = [url]
478 return url
479 raise
481 if len(urls) > 1:
482 if (self.config.resolve_closest or qualifier == "secondary") and from_url is not None:
483 return self._get_closest_url(from_url, urls, qualifier)
484 _log.warning(
485 "Multiple %s URLs found for '%s': %s. "
486 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
487 qualifier,
488 identifier,
489 urls,
490 )
491 return urls[0]
493 def get_item_url(
494 self,
495 identifier: str,
496 from_url: str | None = None,
497 # YORE: Bump 2: Remove line.
498 fallback: Callable[[str], Sequence[str]] | None = None,
499 ) -> tuple[str, str | None]:
500 """Return a site-relative URL with anchor to the identifier, if it's present anywhere.
502 Arguments:
503 identifier: The anchor (without '#').
504 from_url: The URL of the base page, from which we link towards the targeted pages.
506 Returns:
507 A site-relative URL.
508 """
509 # YORE: Bump 2: Replace `, fallback` with `` within line.
510 url = self._get_item_url(identifier, from_url, fallback)
511 title = self._title_map.get(url) or None
512 if from_url is not None:
513 parsed = urlsplit(url)
514 if not parsed.scheme and not parsed.netloc:
515 url = relative_url(from_url, url)
516 return url, title
518 # YORE: Bump 2: Remove block.
519 # ----------------------------------------------------------------------- #
520 # Deprecated API #
521 # ----------------------------------------------------------------------- #
522 @property
523 def get_fallback_anchor(self) -> Callable[[str], tuple[str, ...]] | None:
524 """Fallback anchors getter."""
525 return self._get_fallback_anchor
527 # YORE: Bump 2: Remove block.
528 @get_fallback_anchor.setter
529 def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) -> None:
530 """Fallback anchors setter."""
531 self._get_fallback_anchor = value
532 if value is not None:
533 warn(
534 "Setting a fallback anchor function is deprecated and will be removed in a future release.",
535 DeprecationWarning,
536 stacklevel=2,
537 )