Coverage for src/mkdocs_autorefs/plugin.py: 82.17%

114 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-01 20:28 +0200

1"""This module contains the "mkdocs-autorefs" plugin. 

2 

3After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors 

4it finds to later be able to fix unresolved references. 

5It stores them during the [`on_page_content` event hook](https://www.mkdocs.org/user-guide/plugins/#on_page_content). 

6 

7Just before writing the final HTML to the disc, during the 

8[`on_post_page` event hook](https://www.mkdocs.org/user-guide/plugins/#on_post_page), 

9this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved, 

10and fixes them using the previously stored identifier-URL mapping. 

11""" 

12 

13from __future__ import annotations 

14 

15import contextlib 

16import functools 

17import logging 

18import sys 

19from typing import TYPE_CHECKING, Any, Callable, Sequence 

20from urllib.parse import urlsplit 

21 

22from mkdocs.config.base import Config 

23from mkdocs.config.config_options import Type 

24from mkdocs.plugins import BasePlugin 

25from mkdocs.structure.pages import Page 

26 

27from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url 

28 

29if TYPE_CHECKING: 

30 from mkdocs.config.defaults import MkDocsConfig 

31 from mkdocs.structure.pages import Page 

32 from mkdocs.structure.toc import AnchorLink 

33 

34try: 

35 from mkdocs.plugins import get_plugin_logger 

36 

37 log = get_plugin_logger(__name__) 

38except ImportError: 

39 # TODO: remove once support for MkDocs <1.5 is dropped 

40 log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment] 

41 

42 

43# YORE: EOL 3.8: Remove block. 

44if sys.version_info < (3, 9): 

45 from pathlib import PurePosixPath 

46 

47 class URL(PurePosixPath): # noqa: D101 

48 def is_relative_to(self, *args: Any) -> bool: # noqa: D102 

49 try: 

50 self.relative_to(*args) 

51 except ValueError: 

52 return False 

53 return True 

54else: 

55 from pathlib import PurePosixPath as URL # noqa: N814 

56 

57 

58class AutorefsConfig(Config): 

59 """Configuration options for the `autorefs` plugin.""" 

60 

61 resolve_closest = Type(bool, default=False) 

62 """Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier. 

63 

64 By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page". 

65 

66 For example, if you link to identifier `hello` from page `foo/bar/`, 

67 and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages, 

68 autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`. 

69 

70 If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs. 

71 If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found. 

72 

73 When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL. 

74 """ 

75 

76 

77class AutorefsPlugin(BasePlugin[AutorefsConfig]): 

78 """The `autorefs` plugin for `mkdocs`. 

79 

80 This plugin defines the following event hooks: 

81 

82 - `on_config` 

83 - `on_page_content` 

84 - `on_post_page` 

85 

86 Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs` 

87 for more information about its plugin system. 

88 """ 

89 

90 scan_toc: bool = True 

91 current_page: str | None = None 

92 legacy_refs: bool = True 

93 

94 def __init__(self) -> None: 

95 """Initialize the object.""" 

96 super().__init__() 

97 self._url_map: dict[str, list[str]] = {} 

98 self._abs_url_map: dict[str, str] = {} 

99 self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None 

100 

101 def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None: 

102 """Register that an anchor corresponding to an identifier was encountered when rendering the page. 

103 

104 Arguments: 

105 page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` 

106 identifier: The HTML anchor (without '#') as a string. 

107 """ 

108 page_anchor = f"{page}#{anchor or identifier}" 

109 if identifier in self._url_map: 

110 if page_anchor not in self._url_map[identifier]: 110 ↛ exitline 110 didn't return from function 'register_anchor' because the condition on line 110 was always true

111 self._url_map[identifier].append(page_anchor) 

112 else: 

113 self._url_map[identifier] = [page_anchor] 

114 

115 def register_url(self, identifier: str, url: str) -> None: 

116 """Register that the identifier should be turned into a link to this URL. 

117 

118 Arguments: 

119 identifier: The new identifier. 

120 url: The absolute URL (including anchor, if needed) where this item can be found. 

121 """ 

122 self._abs_url_map[identifier] = url 

123 

124 @staticmethod 

125 def _get_closest_url(from_url: str, urls: list[str]) -> str: 

126 """Return the closest URL to the current page. 

127 

128 Arguments: 

129 from_url: The URL of the base page, from which we link towards the targeted pages. 

130 urls: A list of URLs to choose from. 

131 

132 Returns: 

133 The closest URL to the current page. 

134 """ 

135 base_url = URL(from_url) 

136 

137 while True: 

138 if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]: 

139 break 

140 base_url = base_url.parent 

141 if not base_url.name: 

142 break 

143 

144 if not candidates: 

145 log.warning( 

146 "Could not find closest URL (from %s, candidates: %s). " 

147 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

148 from_url, 

149 urls, 

150 ) 

151 return urls[0] 

152 

153 winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/")) 

154 log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls) 

155 return winner 

156 

157 def _get_item_url( 

158 self, 

159 identifier: str, 

160 fallback: Callable[[str], Sequence[str]] | None = None, 

161 from_url: str | None = None, 

162 ) -> str: 

163 try: 

164 urls = self._url_map[identifier] 

165 except KeyError: 

166 if identifier in self._abs_url_map: 

167 return self._abs_url_map[identifier] 

168 if fallback: 

169 new_identifiers = fallback(identifier) 

170 for new_identifier in new_identifiers: 

171 with contextlib.suppress(KeyError): 

172 url = self._get_item_url(new_identifier) 

173 self._url_map[identifier] = [url] 

174 return url 

175 raise 

176 

177 if len(urls) > 1: 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true

178 if self.config.resolve_closest and from_url is not None: 178 ↛ 179,   178 ↛ 1802 missed branches: 1) line 178 didn't jump to line 179 because the condition on line 178 was never true, 2) line 178 didn't jump to line 180 because the condition on line 178 was always true

179 return self._get_closest_url(from_url, urls) 

180 log.warning( 

181 "Multiple URLs found for '%s': %s. " 

182 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).", 

183 identifier, 

184 urls, 

185 ) 

186 return urls[0] 

187 

188 def get_item_url( 

189 self, 

190 identifier: str, 

191 from_url: str | None = None, 

192 fallback: Callable[[str], Sequence[str]] | None = None, 

193 ) -> str: 

194 """Return a site-relative URL with anchor to the identifier, if it's present anywhere. 

195 

196 Arguments: 

197 identifier: The anchor (without '#'). 

198 from_url: The URL of the base page, from which we link towards the targeted pages. 

199 fallback: An optional function to suggest alternative anchors to try on failure. 

200 

201 Returns: 

202 A site-relative URL. 

203 """ 

204 url = self._get_item_url(identifier, fallback, from_url) 

205 if from_url is not None: 

206 parsed = urlsplit(url) 

207 if not parsed.scheme and not parsed.netloc: 

208 return relative_url(from_url, url) 

209 return url 

210 

211 def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: 

212 """Instantiate our Markdown extension. 

213 

214 Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config). 

215 In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.references.AutorefsExtension] 

216 and add it to the list of Markdown extensions used by `mkdocs`. 

217 

218 Arguments: 

219 config: The MkDocs config object. 

220 

221 Returns: 

222 The modified config. 

223 """ 

224 log.debug("Adding AutorefsExtension to the list") 

225 config["markdown_extensions"].append(AutorefsExtension(self)) 

226 return config 

227 

228 def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

229 """Remember which page is the current one. 

230 

231 Arguments: 

232 markdown: Input Markdown. 

233 page: The related MkDocs page instance. 

234 kwargs: Additional arguments passed by MkDocs. 

235 

236 Returns: 

237 The same Markdown. We only use this hook to keep a reference to the current page URL, 

238 used during Markdown conversion by the anchor scanner tree processor. 

239 """ 

240 self.current_page = page.url 

241 return markdown 

242 

243 def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

244 """Map anchors to URLs. 

245 

246 Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content). 

247 In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs. 

248 This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or 

249 `[identifier][]`. 

250 

251 Arguments: 

252 html: HTML converted from Markdown. 

253 page: The related MkDocs page instance. 

254 kwargs: Additional arguments passed by MkDocs. 

255 

256 Returns: 

257 The same HTML. We only use this hook to map anchors to URLs. 

258 """ 

259 if self.scan_toc: 

260 log.debug("Mapping identifiers to URLs for page %s", page.file.src_path) 

261 for item in page.toc.items: 

262 self.map_urls(page.url, item) 

263 return html 

264 

265 def map_urls(self, base_url: str, anchor: AnchorLink) -> None: 

266 """Recurse on every anchor to map its ID to its absolute URL. 

267 

268 This method populates `self.url_map` by side-effect. 

269 

270 Arguments: 

271 base_url: The base URL to use as a prefix for each anchor's relative URL. 

272 anchor: The anchor to process and to recurse on. 

273 """ 

274 self.register_anchor(base_url, anchor.id) 

275 for child in anchor.children: 275 ↛ exit,   275 ↛ 2762 missed branches: 1) line 275 didn't return from function 'map_urls' because the loop on line 275 didn't complete, 2) line 275 didn't jump to line 276 because the loop on line 275 never started

276 self.map_urls(base_url, child) 

277 

278 def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002 

279 """Fix cross-references. 

280 

281 Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page). 

282 In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`. 

283 Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings. 

284 It uses the native Markdown syntax so it's easy to remember and use. 

285 

286 We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers 

287 that do not look legitimate (sometimes documentation can contain strings matching 

288 our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything). 

289 We currently ignore references when their identifier contains a space or a slash. 

290 

291 Arguments: 

292 output: HTML converted from Markdown. 

293 page: The related MkDocs page instance. 

294 kwargs: Additional arguments passed by MkDocs. 

295 

296 Returns: 

297 Modified HTML. 

298 """ 

299 log.debug("Fixing references in page %s", page.file.src_path) 

300 

301 url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor) 

302 fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs) 

303 

304 if unmapped and log.isEnabledFor(logging.WARNING): 

305 for ref, context in unmapped: 

306 message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else "" 

307 log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'") 

308 

309 return fixed_output