Coverage for src/mkdocs_autorefs/plugin.py: 76.55%

111 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-10 16:33 +0100

1"""This module contains the "mkdocs-autorefs" plugin. 

2 

3After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors 

4it finds to later be able to fix unresolved references. 

5It stores them during the [`on_page_content` event hook](https://www.mkdocs.org/user-guide/plugins/#on_page_content). 

6 

7Just before writing the final HTML to the disc, during the 

8[`on_post_page` event hook](https://www.mkdocs.org/user-guide/plugins/#on_post_page), 

9this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved, 

10and fixes them using the previously stored identifier-URL mapping. 

11""" 

12 

13from __future__ import annotations 

14 

15import contextlib 

16import functools 

17import logging 

18from pathlib import PurePosixPath as URL # noqa: N814 

19from typing import TYPE_CHECKING, Any, Callable 

20from urllib.parse import urlsplit 

21 

22from mkdocs.config.base import Config 

23from mkdocs.config.config_options import Type 

24from mkdocs.plugins import BasePlugin 

25from mkdocs.structure.pages import Page 

26 

27from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url 

28 

29if TYPE_CHECKING: 

30 from collections.abc import Sequence 

31 

32 from mkdocs.config.defaults import MkDocsConfig 

33 from mkdocs.structure.pages import Page 

34 from mkdocs.structure.toc import AnchorLink 

35 

36try: 

37 from mkdocs.plugins import get_plugin_logger 

38 

39 log = get_plugin_logger(__name__) 

40except ImportError: 

41 # TODO: remove once support for MkDocs <1.5 is dropped 

42 log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] 

43 

44 

45class AutorefsConfig(Config): 

46 """Configuration options for the `autorefs` plugin.""" 

47 

48 resolve_closest = Type(bool, default=False) 

49 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier. 

50 

51 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page". 

52 

53 For example, if you link to identifier `hello` from page `foo/bar/`, 

54 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages, 

55 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`. 

56 

57 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs. 

58 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found. 

59 

60 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL. 

61 """ 

62 

63 

64class AutorefsPlugin(BasePlugin[AutorefsConfig]): 

65 """The `autorefs` plugin for `mkdocs`. 

66 

67 This plugin defines the following event hooks: 

68 

69 - `on_config` 

70 - `on_page_content` 

71 - `on_post_page` 

72 

73 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs` 

74 for more information about its plugin system. 

75 """ 

76 

77 scan_toc: bool = True 

78 current_page: str | None = None 

79 legacy_refs: bool = True 

80 

81 def __init__(self) -> None: 

82 """Initialize the object.""" 

83 super().__init__() 

84 

85 # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs, 

86 # and one for "absolute" URLs. 

87 # 

88 # - A primary URL is an identifier that links to a specific anchor on a page. 

89 # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL. 

90 # Primary URLs with these aliases as identifiers may or may not be rendered later. 

91 # - An absolute URL is an identifier that links to an external resource. 

92 # These URLs are typically registered by mkdocstrings when loading object inventories. 

93 # 

94 # For example, mkdocstrings registers a primary URL for each heading rendered in a page. 

95 # Then, for each alias of this heading's identifier, it registers a secondary URL. 

96 # 

97 # We need to keep track of whether an identifier is primary or secondary, 

98 # to give it precedence when resolving cross-references. 

99 # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs, 

100 # instead we want to use the primary URL without any warning. 

101 # 

102 # - A single primary URL mapped to an identifer? Use it. 

103 # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such. 

104 # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it. 

105 # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such. 

106 # - No secondary URL mapped to an identifier? Try using absolute URLs 

107 # (typically registered by loading inventories in mkdocstrings). 

108 # 

109 # This logic unfolds in `_get_item_url`. 

110 self._primary_url_map: dict[str, list[str]] = {} 

111 self._secondary_url_map: dict[str, list[str]] = {} 

112 self._abs_url_map: dict[str, str] = {} 

113 

114 self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None 

115 

116 def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None: 

117 """Register that an anchor corresponding to an identifier was encountered when rendering the page. 

118 

119 Arguments: 

120 page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` 

121 identifier: The identifier to register. 

122 anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier. 

123 primary: Whether this anchor is the primary one for the identifier. 

124 """ 

125 page_anchor = f"{page}#{anchor or identifier}" 

126 url_map = self._primary_url_map if primary else self._secondary_url_map 

127 if identifier in url_map: 

128 if page_anchor not in url_map[identifier]: 128 ↛ exitline 128 didn't return from function 'register_anchor' because the condition on line 128 was always true

129 url_map[identifier].append(page_anchor) 

130 else: 

131 url_map[identifier] = [page_anchor] 

132 

133 def register_url(self, identifier: str, url: str) -> None: 

134 """Register that the identifier should be turned into a link to this URL. 

135 

136 Arguments: 

137 identifier: The new identifier. 

138 url: The absolute URL (including anchor, if needed) where this item can be found. 

139 """ 

140 self._abs_url_map[identifier] = url 

141 

142 @staticmethod 

143 def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str: 

144 """Return the closest URL to the current page. 

145 

146 Arguments: 

147 from_url: The URL of the base page, from which we link towards the targeted pages. 

148 urls: A list of URLs to choose from. 

149 qualifier: The type of URLs we are choosing from. 

150 

151 Returns: 

152 The closest URL to the current page. 

153 """ 

154 base_url = URL(from_url) 

155 

156 while True: 

157 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]: 

158 break 

159 base_url = base_url.parent 

160 if not base_url.name: 

161 break 

162 

163 if not candidates: 

164 log.warning( 

165 "Could not find closest %s URL (from %s, candidates: %s). " 

166 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

167 qualifier, 

168 from_url, 

169 urls, 

170 ) 

171 return urls[0] 

172 

173 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/")) 

174 log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls) 

175 return winner 

176 

177 def _get_urls(self, identifier: str) -> tuple[list[str], str]: 

178 try: 

179 return self._primary_url_map[identifier], "primary" 

180 except KeyError: 

181 return self._secondary_url_map[identifier], "secondary" 

182 

183 def _get_item_url( 

184 self, 

185 identifier: str, 

186 fallback: Callable[[str], Sequence[str]] | None = None, 

187 from_url: str | None = None, 

188 ) -> str: 

189 try: 

190 urls, qualifier = self._get_urls(identifier) 

191 except KeyError: 

192 if identifier in self._abs_url_map: 

193 return self._abs_url_map[identifier] 

194 if fallback: 

195 new_identifiers = fallback(identifier) 

196 for new_identifier in new_identifiers: 

197 with contextlib.suppress(KeyError): 

198 url = self._get_item_url(new_identifier) 

199 self._secondary_url_map[identifier] = [url] 

200 return url 

201 raise 

202 

203 if len(urls) > 1: 

204 if self.config.resolve_closest and from_url is not None: 

205 return self._get_closest_url(from_url, urls, qualifier) 

206 log.warning( 

207 "Multiple %s URLs found for '%s': %s. " 

208 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

209 qualifier, 

210 identifier, 

211 urls, 

212 ) 

213 return urls[0] 

214 

215 def get_item_url( 

216 self, 

217 identifier: str, 

218 from_url: str | None = None, 

219 fallback: Callable[[str], Sequence[str]] | None = None, 

220 ) -> str: 

221 """Return a site-relative URL with anchor to the identifier, if it's present anywhere. 

222 

223 Arguments: 

224 identifier: The anchor (without '#'). 

225 from_url: The URL of the base page, from which we link towards the targeted pages. 

226 fallback: An optional function to suggest alternative anchors to try on failure. 

227 

228 Returns: 

229 A site-relative URL. 

230 """ 

231 url = self._get_item_url(identifier, fallback, from_url) 

232 if from_url is not None: 

233 parsed = urlsplit(url) 

234 if not parsed.scheme and not parsed.netloc: 

235 return relative_url(from_url, url) 

236 return url 

237 

238 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: 

239 """Instantiate our Markdown extension. 

240 

241 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config). 

242 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.references.AutorefsExtension] 

243 and add it to the list of Markdown extensions used by `mkdocs`. 

244 

245 Arguments: 

246 config: The MkDocs config object. 

247 

248 Returns: 

249 The modified config. 

250 """ 

251 log.debug("Adding AutorefsExtension to the list") 

252 config["markdown_extensions"].append(AutorefsExtension(self)) 

253 return config 

254 

255 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

256 """Remember which page is the current one. 

257 

258 Arguments: 

259 markdown: Input Markdown. 

260 page: The related MkDocs page instance. 

261 kwargs: Additional arguments passed by MkDocs. 

262 

263 Returns: 

264 The same Markdown. We only use this hook to keep a reference to the current page URL, 

265 used during Markdown conversion by the anchor scanner tree processor. 

266 """ 

267 self.current_page = page.url 

268 return markdown 

269 

270 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

271 """Map anchors to URLs. 

272 

273 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content). 

274 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs. 

275 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or 

276 `[identifier][]`. 

277 

278 Arguments: 

279 html: HTML converted from Markdown. 

280 page: The related MkDocs page instance. 

281 kwargs: Additional arguments passed by MkDocs. 

282 

283 Returns: 

284 The same HTML. We only use this hook to map anchors to URLs. 

285 """ 

286 if self.scan_toc: 

287 log.debug("Mapping identifiers to URLs for page %s", page.file.src_path) 

288 for item in page.toc.items: 

289 self.map_urls(page.url, item) 

290 return html 

291 

292 def map_urls(self, base_url: str, anchor: AnchorLink) -> None: 

293 """Recurse on every anchor to map its ID to its absolute URL. 

294 

295 This method populates `self._primary_url_map` by side-effect. 

296 

297 Arguments: 

298 base_url: The base URL to use as a prefix for each anchor's relative URL. 

299 anchor: The anchor to process and to recurse on. 

300 """ 

301 self.register_anchor(base_url, anchor.id, primary=True) 

302 for child in anchor.children: 

303 self.map_urls(base_url, child) 

304 

305 def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

306 """Fix cross-references. 

307 

308 Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page). 

309 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`. 

310 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings. 

311 It uses the native Markdown syntax so it's easy to remember and use. 

312 

313 We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers 

314 that do not look legitimate (sometimes documentation can contain strings matching 

315 our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything). 

316 We currently ignore references when their identifier contains a space or a slash. 

317 

318 Arguments: 

319 output: HTML converted from Markdown. 

320 page: The related MkDocs page instance. 

321 kwargs: Additional arguments passed by MkDocs. 

322 

323 Returns: 

324 Modified HTML. 

325 """ 

326 log.debug("Fixing references in page %s", page.file.src_path) 

327 

328 url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor) 

329 fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs) 

330 

331 if unmapped and log.isEnabledFor(logging.WARNING): 

332 for ref, context in unmapped: 

333 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else "" 

334 log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'") 

335 

336 return fixed_output