Coverage for src/mkdocs_autorefs/plugin.py: 82.17%
114 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-01 20:28 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-01 20:28 +0200
1"""This module contains the "mkdocs-autorefs" plugin.
3After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors
4it finds to later be able to fix unresolved references.
5It stores them during the [`on_page_content` event hook](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
7Just before writing the final HTML to the disc, during the
8[`on_post_page` event hook](https://www.mkdocs.org/user-guide/plugins/#on_post_page),
9this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved,
10and fixes them using the previously stored identifier-URL mapping.
11"""
13from __future__ import annotations
15import contextlib
16import functools
17import logging
18import sys
19from typing import TYPE_CHECKING, Any, Callable, Sequence
20from urllib.parse import urlsplit
22from mkdocs.config.base import Config
23from mkdocs.config.config_options import Type
24from mkdocs.plugins import BasePlugin
25from mkdocs.structure.pages import Page
27from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
29if TYPE_CHECKING:
30 from mkdocs.config.defaults import MkDocsConfig
31 from mkdocs.structure.pages import Page
32 from mkdocs.structure.toc import AnchorLink
34try:
35 from mkdocs.plugins import get_plugin_logger
37 log = get_plugin_logger(__name__)
38except ImportError:
39 # TODO: remove once support for MkDocs <1.5 is dropped
40 log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
43# YORE: EOL 3.8: Remove block.
44if sys.version_info < (3, 9):
45 from pathlib import PurePosixPath
47 class URL(PurePosixPath): # noqa: D101
48 def is_relative_to(self, *args: Any) -> bool: # noqa: D102
49 try:
50 self.relative_to(*args)
51 except ValueError:
52 return False
53 return True
54else:
55 from pathlib import PurePosixPath as URL # noqa: N814
58class AutorefsConfig(Config):
59 """Configuration options for the `autorefs` plugin."""
61 resolve_closest = Type(bool, default=False)
62 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier.
64 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page".
66 For example, if you link to identifier `hello` from page `foo/bar/`,
67 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages,
68 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`.
70 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs.
71 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found.
73 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL.
74 """
77class AutorefsPlugin(BasePlugin[AutorefsConfig]):
78 """The `autorefs` plugin for `mkdocs`.
80 This plugin defines the following event hooks:
82 - `on_config`
83 - `on_page_content`
84 - `on_post_page`
86 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs`
87 for more information about its plugin system.
88 """
90 scan_toc: bool = True
91 current_page: str | None = None
92 legacy_refs: bool = True
94 def __init__(self) -> None:
95 """Initialize the object."""
96 super().__init__()
97 self._url_map: dict[str, list[str]] = {}
98 self._abs_url_map: dict[str, str] = {}
99 self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
101 def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
102 """Register that an anchor corresponding to an identifier was encountered when rendering the page.
104 Arguments:
105 page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
106 identifier: The HTML anchor (without '#') as a string.
107 """
108 page_anchor = f"{page}#{anchor or identifier}"
109 if identifier in self._url_map:
110 if page_anchor not in self._url_map[identifier]: 110 ↛ exitline 110 didn't return from function 'register_anchor' because the condition on line 110 was always true
111 self._url_map[identifier].append(page_anchor)
112 else:
113 self._url_map[identifier] = [page_anchor]
115 def register_url(self, identifier: str, url: str) -> None:
116 """Register that the identifier should be turned into a link to this URL.
118 Arguments:
119 identifier: The new identifier.
120 url: The absolute URL (including anchor, if needed) where this item can be found.
121 """
122 self._abs_url_map[identifier] = url
124 @staticmethod
125 def _get_closest_url(from_url: str, urls: list[str]) -> str:
126 """Return the closest URL to the current page.
128 Arguments:
129 from_url: The URL of the base page, from which we link towards the targeted pages.
130 urls: A list of URLs to choose from.
132 Returns:
133 The closest URL to the current page.
134 """
135 base_url = URL(from_url)
137 while True:
138 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]:
139 break
140 base_url = base_url.parent
141 if not base_url.name:
142 break
144 if not candidates:
145 log.warning(
146 "Could not find closest URL (from %s, candidates: %s). "
147 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
148 from_url,
149 urls,
150 )
151 return urls[0]
153 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/"))
154 log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
155 return winner
157 def _get_item_url(
158 self,
159 identifier: str,
160 fallback: Callable[[str], Sequence[str]] | None = None,
161 from_url: str | None = None,
162 ) -> str:
163 try:
164 urls = self._url_map[identifier]
165 except KeyError:
166 if identifier in self._abs_url_map:
167 return self._abs_url_map[identifier]
168 if fallback:
169 new_identifiers = fallback(identifier)
170 for new_identifier in new_identifiers:
171 with contextlib.suppress(KeyError):
172 url = self._get_item_url(new_identifier)
173 self._url_map[identifier] = [url]
174 return url
175 raise
177 if len(urls) > 1: 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true
178 if self.config.resolve_closest and from_url is not None: 178 ↛ 179, 178 ↛ 1802 missed branches: 1) line 178 didn't jump to line 179 because the condition on line 178 was never true, 2) line 178 didn't jump to line 180 because the condition on line 178 was always true
179 return self._get_closest_url(from_url, urls)
180 log.warning(
181 "Multiple URLs found for '%s': %s. "
182 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
183 identifier,
184 urls,
185 )
186 return urls[0]
188 def get_item_url(
189 self,
190 identifier: str,
191 from_url: str | None = None,
192 fallback: Callable[[str], Sequence[str]] | None = None,
193 ) -> str:
194 """Return a site-relative URL with anchor to the identifier, if it's present anywhere.
196 Arguments:
197 identifier: The anchor (without '#').
198 from_url: The URL of the base page, from which we link towards the targeted pages.
199 fallback: An optional function to suggest alternative anchors to try on failure.
201 Returns:
202 A site-relative URL.
203 """
204 url = self._get_item_url(identifier, fallback, from_url)
205 if from_url is not None:
206 parsed = urlsplit(url)
207 if not parsed.scheme and not parsed.netloc:
208 return relative_url(from_url, url)
209 return url
211 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
212 """Instantiate our Markdown extension.
214 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config).
215 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.references.AutorefsExtension]
216 and add it to the list of Markdown extensions used by `mkdocs`.
218 Arguments:
219 config: The MkDocs config object.
221 Returns:
222 The modified config.
223 """
224 log.debug("Adding AutorefsExtension to the list")
225 config["markdown_extensions"].append(AutorefsExtension(self))
226 return config
228 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
229 """Remember which page is the current one.
231 Arguments:
232 markdown: Input Markdown.
233 page: The related MkDocs page instance.
234 kwargs: Additional arguments passed by MkDocs.
236 Returns:
237 The same Markdown. We only use this hook to keep a reference to the current page URL,
238 used during Markdown conversion by the anchor scanner tree processor.
239 """
240 self.current_page = page.url
241 return markdown
243 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
244 """Map anchors to URLs.
246 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
247 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs.
248 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or
249 `[identifier][]`.
251 Arguments:
252 html: HTML converted from Markdown.
253 page: The related MkDocs page instance.
254 kwargs: Additional arguments passed by MkDocs.
256 Returns:
257 The same HTML. We only use this hook to map anchors to URLs.
258 """
259 if self.scan_toc:
260 log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
261 for item in page.toc.items:
262 self.map_urls(page.url, item)
263 return html
265 def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
266 """Recurse on every anchor to map its ID to its absolute URL.
268 This method populates `self.url_map` by side-effect.
270 Arguments:
271 base_url: The base URL to use as a prefix for each anchor's relative URL.
272 anchor: The anchor to process and to recurse on.
273 """
274 self.register_anchor(base_url, anchor.id)
275 for child in anchor.children: 275 ↛ exit, 275 ↛ 2762 missed branches: 1) line 275 didn't return from function 'map_urls' because the loop on line 275 didn't complete, 2) line 275 didn't jump to line 276 because the loop on line 275 never started
276 self.map_urls(base_url, child)
278 def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
279 """Fix cross-references.
281 Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page).
282 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
283 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
284 It uses the native Markdown syntax so it's easy to remember and use.
286 We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
287 that do not look legitimate (sometimes documentation can contain strings matching
288 our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
289 We currently ignore references when their identifier contains a space or a slash.
291 Arguments:
292 output: HTML converted from Markdown.
293 page: The related MkDocs page instance.
294 kwargs: Additional arguments passed by MkDocs.
296 Returns:
297 Modified HTML.
298 """
299 log.debug("Fixing references in page %s", page.file.src_path)
301 url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
302 fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)
304 if unmapped and log.isEnabledFor(logging.WARNING):
305 for ref, context in unmapped:
306 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
307 log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
309 return fixed_output