Coverage for src/mkdocs_autorefs/plugin.py: 76.55%
111 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-10 16:33 +0100
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-10 16:33 +0100
1"""This module contains the "mkdocs-autorefs" plugin.
3After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors
4it finds to later be able to fix unresolved references.
5It stores them during the [`on_page_content` event hook](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
7Just before writing the final HTML to the disc, during the
8[`on_post_page` event hook](https://www.mkdocs.org/user-guide/plugins/#on_post_page),
9this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved,
10and fixes them using the previously stored identifier-URL mapping.
11"""
13from __future__ import annotations
15import contextlib
16import functools
17import logging
18from pathlib import PurePosixPath as URL # noqa: N814
19from typing import TYPE_CHECKING, Any, Callable
20from urllib.parse import urlsplit
22from mkdocs.config.base import Config
23from mkdocs.config.config_options import Type
24from mkdocs.plugins import BasePlugin
25from mkdocs.structure.pages import Page
27from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
29if TYPE_CHECKING:
30 from collections.abc import Sequence
32 from mkdocs.config.defaults import MkDocsConfig
33 from mkdocs.structure.pages import Page
34 from mkdocs.structure.toc import AnchorLink
36try:
37 from mkdocs.plugins import get_plugin_logger
39 log = get_plugin_logger(__name__)
40except ImportError:
41 # TODO: remove once support for MkDocs <1.5 is dropped
42 log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
45class AutorefsConfig(Config):
46 """Configuration options for the `autorefs` plugin."""
48 resolve_closest = Type(bool, default=False)
49 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier.
51 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page".
53 For example, if you link to identifier `hello` from page `foo/bar/`,
54 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages,
55 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`.
57 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs.
58 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found.
60 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL.
61 """
64class AutorefsPlugin(BasePlugin[AutorefsConfig]):
65 """The `autorefs` plugin for `mkdocs`.
67 This plugin defines the following event hooks:
69 - `on_config`
70 - `on_page_content`
71 - `on_post_page`
73 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs`
74 for more information about its plugin system.
75 """
77 scan_toc: bool = True
78 current_page: str | None = None
79 legacy_refs: bool = True
81 def __init__(self) -> None:
82 """Initialize the object."""
83 super().__init__()
85 # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,
86 # and one for "absolute" URLs.
87 #
88 # - A primary URL is an identifier that links to a specific anchor on a page.
89 # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.
90 # Primary URLs with these aliases as identifiers may or may not be rendered later.
91 # - An absolute URL is an identifier that links to an external resource.
92 # These URLs are typically registered by mkdocstrings when loading object inventories.
93 #
94 # For example, mkdocstrings registers a primary URL for each heading rendered in a page.
95 # Then, for each alias of this heading's identifier, it registers a secondary URL.
96 #
97 # We need to keep track of whether an identifier is primary or secondary,
98 # to give it precedence when resolving cross-references.
99 # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,
100 # instead we want to use the primary URL without any warning.
101 #
102 # - A single primary URL mapped to an identifer? Use it.
103 # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
104 # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.
105 # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
106 # - No secondary URL mapped to an identifier? Try using absolute URLs
107 # (typically registered by loading inventories in mkdocstrings).
108 #
109 # This logic unfolds in `_get_item_url`.
110 self._primary_url_map: dict[str, list[str]] = {}
111 self._secondary_url_map: dict[str, list[str]] = {}
112 self._abs_url_map: dict[str, str] = {}
114 self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
116 def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
117 """Register that an anchor corresponding to an identifier was encountered when rendering the page.
119 Arguments:
120 page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
121 identifier: The identifier to register.
122 anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
123 primary: Whether this anchor is the primary one for the identifier.
124 """
125 page_anchor = f"{page}#{anchor or identifier}"
126 url_map = self._primary_url_map if primary else self._secondary_url_map
127 if identifier in url_map:
128 if page_anchor not in url_map[identifier]: 128 ↛ exitline 128 didn't return from function 'register_anchor' because the condition on line 128 was always true
129 url_map[identifier].append(page_anchor)
130 else:
131 url_map[identifier] = [page_anchor]
133 def register_url(self, identifier: str, url: str) -> None:
134 """Register that the identifier should be turned into a link to this URL.
136 Arguments:
137 identifier: The new identifier.
138 url: The absolute URL (including anchor, if needed) where this item can be found.
139 """
140 self._abs_url_map[identifier] = url
142 @staticmethod
143 def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:
144 """Return the closest URL to the current page.
146 Arguments:
147 from_url: The URL of the base page, from which we link towards the targeted pages.
148 urls: A list of URLs to choose from.
149 qualifier: The type of URLs we are choosing from.
151 Returns:
152 The closest URL to the current page.
153 """
154 base_url = URL(from_url)
156 while True:
157 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]:
158 break
159 base_url = base_url.parent
160 if not base_url.name:
161 break
163 if not candidates:
164 log.warning(
165 "Could not find closest %s URL (from %s, candidates: %s). "
166 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
167 qualifier,
168 from_url,
169 urls,
170 )
171 return urls[0]
173 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/"))
174 log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
175 return winner
177 def _get_urls(self, identifier: str) -> tuple[list[str], str]:
178 try:
179 return self._primary_url_map[identifier], "primary"
180 except KeyError:
181 return self._secondary_url_map[identifier], "secondary"
183 def _get_item_url(
184 self,
185 identifier: str,
186 fallback: Callable[[str], Sequence[str]] | None = None,
187 from_url: str | None = None,
188 ) -> str:
189 try:
190 urls, qualifier = self._get_urls(identifier)
191 except KeyError:
192 if identifier in self._abs_url_map:
193 return self._abs_url_map[identifier]
194 if fallback:
195 new_identifiers = fallback(identifier)
196 for new_identifier in new_identifiers:
197 with contextlib.suppress(KeyError):
198 url = self._get_item_url(new_identifier)
199 self._secondary_url_map[identifier] = [url]
200 return url
201 raise
203 if len(urls) > 1:
204 if self.config.resolve_closest and from_url is not None:
205 return self._get_closest_url(from_url, urls, qualifier)
206 log.warning(
207 "Multiple %s URLs found for '%s': %s. "
208 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
209 qualifier,
210 identifier,
211 urls,
212 )
213 return urls[0]
215 def get_item_url(
216 self,
217 identifier: str,
218 from_url: str | None = None,
219 fallback: Callable[[str], Sequence[str]] | None = None,
220 ) -> str:
221 """Return a site-relative URL with anchor to the identifier, if it's present anywhere.
223 Arguments:
224 identifier: The anchor (without '#').
225 from_url: The URL of the base page, from which we link towards the targeted pages.
226 fallback: An optional function to suggest alternative anchors to try on failure.
228 Returns:
229 A site-relative URL.
230 """
231 url = self._get_item_url(identifier, fallback, from_url)
232 if from_url is not None:
233 parsed = urlsplit(url)
234 if not parsed.scheme and not parsed.netloc:
235 return relative_url(from_url, url)
236 return url
238 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
239 """Instantiate our Markdown extension.
241 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config).
242 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.references.AutorefsExtension]
243 and add it to the list of Markdown extensions used by `mkdocs`.
245 Arguments:
246 config: The MkDocs config object.
248 Returns:
249 The modified config.
250 """
251 log.debug("Adding AutorefsExtension to the list")
252 config["markdown_extensions"].append(AutorefsExtension(self))
253 return config
255 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
256 """Remember which page is the current one.
258 Arguments:
259 markdown: Input Markdown.
260 page: The related MkDocs page instance.
261 kwargs: Additional arguments passed by MkDocs.
263 Returns:
264 The same Markdown. We only use this hook to keep a reference to the current page URL,
265 used during Markdown conversion by the anchor scanner tree processor.
266 """
267 self.current_page = page.url
268 return markdown
270 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
271 """Map anchors to URLs.
273 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
274 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs.
275 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or
276 `[identifier][]`.
278 Arguments:
279 html: HTML converted from Markdown.
280 page: The related MkDocs page instance.
281 kwargs: Additional arguments passed by MkDocs.
283 Returns:
284 The same HTML. We only use this hook to map anchors to URLs.
285 """
286 if self.scan_toc:
287 log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
288 for item in page.toc.items:
289 self.map_urls(page.url, item)
290 return html
292 def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
293 """Recurse on every anchor to map its ID to its absolute URL.
295 This method populates `self._primary_url_map` by side-effect.
297 Arguments:
298 base_url: The base URL to use as a prefix for each anchor's relative URL.
299 anchor: The anchor to process and to recurse on.
300 """
301 self.register_anchor(base_url, anchor.id, primary=True)
302 for child in anchor.children:
303 self.map_urls(base_url, child)
305 def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
306 """Fix cross-references.
308 Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page).
309 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
310 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
311 It uses the native Markdown syntax so it's easy to remember and use.
313 We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
314 that do not look legitimate (sometimes documentation can contain strings matching
315 our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
316 We currently ignore references when their identifier contains a space or a slash.
318 Arguments:
319 output: HTML converted from Markdown.
320 page: The related MkDocs page instance.
321 kwargs: Additional arguments passed by MkDocs.
323 Returns:
324 Modified HTML.
325 """
326 log.debug("Fixing references in page %s", page.file.src_path)
328 url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
329 fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)
331 if unmapped and log.isEnabledFor(logging.WARNING):
332 for ref, context in unmapped:
333 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
334 log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
336 return fixed_output