Coverage for src/mkdocstrings/extension.py: 77.36%

123 statements  

« prev     ^ index     » next       coverage.py v7.6.2, created at 2024-10-12 18:59 +0200

1"""This module holds the code of the Markdown extension responsible for matching "autodoc" instructions. 

2 

3The extension is composed of a Markdown [block processor](https://python-markdown.github.io/extensions/api/#blockparser) 

4that matches indented blocks starting with a line like `::: identifier`. 

5 

6For each of these blocks, it uses a [handler][mkdocstrings.handlers.base.BaseHandler] to collect documentation about 

7the given identifier and render it with Jinja templates. 

8 

9Both the collection and rendering process can be configured by adding YAML configuration under the "autodoc" 

10instruction: 

11 

12```yaml 

13::: some.identifier 

14 handler: python 

15 options: 

16 option1: value1 

17 option2: 

18 - value2a 

19 - value2b 

20 option_x: etc 

21``` 

22""" 

23 

24from __future__ import annotations 

25 

26import re 

27from collections import ChainMap 

28from typing import TYPE_CHECKING, Any 

29from xml.etree.ElementTree import Element 

30 

31import yaml 

32from jinja2.exceptions import TemplateNotFound 

33from markdown.blockprocessors import BlockProcessor 

34from markdown.extensions import Extension 

35from markdown.treeprocessors import Treeprocessor 

36from mkdocs.exceptions import PluginError 

37 

38from mkdocstrings.handlers.base import BaseHandler, CollectionError, CollectorItem, Handlers 

39from mkdocstrings.loggers import get_logger 

40 

41if TYPE_CHECKING: 

42 from collections.abc import MutableSequence 

43 

44 from markdown import Markdown 

45 from markdown.blockparser import BlockParser 

46 from mkdocs_autorefs.plugin import AutorefsPlugin 

47 

48 

49log = get_logger(__name__) 

50 

51 

52class AutoDocProcessor(BlockProcessor): 

53 """Our "autodoc" Markdown block processor. 

54 

55 It has a [`test` method][mkdocstrings.extension.AutoDocProcessor.test] that tells if a block matches a criterion, 

56 and a [`run` method][mkdocstrings.extension.AutoDocProcessor.run] that processes it. 

57 

58 It also has utility methods allowing to get handlers and their configuration easily, useful when processing 

59 a matched block. 

60 """ 

61 

62 regex = re.compile(r"^(?P<heading>#{1,6} *|)::: ?(?P<name>.+?) *$", flags=re.MULTILINE) 

63 

64 def __init__( 

65 self, 

66 parser: BlockParser, 

67 md: Markdown, 

68 config: dict, 

69 handlers: Handlers, 

70 autorefs: AutorefsPlugin, 

71 ) -> None: 

72 """Initialize the object. 

73 

74 Arguments: 

75 parser: A `markdown.blockparser.BlockParser` instance. 

76 md: A `markdown.Markdown` instance. 

77 config: The [configuration][mkdocstrings.plugin.PluginConfig] of the `mkdocstrings` plugin. 

78 handlers: The handlers container. 

79 autorefs: The autorefs plugin instance. 

80 """ 

81 super().__init__(parser=parser) 

82 self.md = md 

83 self._config = config 

84 self._handlers = handlers 

85 self._autorefs = autorefs 

86 self._updated_envs: set = set() 

87 

88 def test(self, parent: Element, block: str) -> bool: # noqa: ARG002 

89 """Match our autodoc instructions. 

90 

91 Arguments: 

92 parent: The parent element in the XML tree. 

93 block: The block to be tested. 

94 

95 Returns: 

96 Whether this block should be processed or not. 

97 """ 

98 return bool(self.regex.search(block)) 

99 

100 def run(self, parent: Element, blocks: MutableSequence[str]) -> None: 

101 """Run code on the matched blocks. 

102 

103 The identifier and configuration lines are retrieved from a matched block 

104 and used to collect and render an object. 

105 

106 Arguments: 

107 parent: The parent element in the XML tree. 

108 blocks: The rest of the blocks to be processed. 

109 """ 

110 block = blocks.pop(0) 

111 match = self.regex.search(block) 

112 

113 if match: 113 ↛ 119line 113 didn't jump to line 119 because the condition on line 113 was always true

114 if match.start() > 0: 

115 self.parser.parseBlocks(parent, [block[: match.start()]]) 

116 # removes the first line 

117 block = block[match.end() :] 

118 

119 block, the_rest = self.detab(block) 

120 

121 if not block and blocks and blocks[0].startswith((" handler:", " options:")): 

122 # YAML options were separated from the `:::` line by a blank line. 

123 block = blocks.pop(0) 

124 

125 if match: 125 ↛ 175line 125 didn't jump to line 175 because the condition on line 125 was always true

126 identifier = match["name"] 

127 heading_level = match["heading"].count("#") 

128 log.debug(f"Matched '::: {identifier}'") 

129 

130 html, handler, data = self._process_block(identifier, block, heading_level) 

131 el = Element("div", {"class": "mkdocstrings"}) 

132 # The final HTML is inserted as opaque to subsequent processing, and only revealed at the end. 

133 el.text = self.md.htmlStash.store(html) 

134 # We need to duplicate the headings directly, just so 'toc' can pick them up, 

135 # otherwise they wouldn't appear in the final table of contents. 

136 # These headings are generated by the `BaseHandler.do_heading` method (Jinja filter), 

137 # which runs in the inner Markdown conversion layer, and not in the outer one where we are now. 

138 headings = handler.get_headings() 

139 el.extend(headings) 

140 # These duplicated headings will later be removed by our `_HeadingsPostProcessor` processor, 

141 # which runs right after 'toc' (see `MkdocstringsExtension.extendMarkdown`). 

142 

143 page = self._autorefs.current_page 

144 if page is not None: 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true

145 for heading in headings: 

146 rendered_anchor = heading.attrib["id"] 

147 self._autorefs.register_anchor(page, rendered_anchor) 

148 

149 if "data-role" in heading.attrib: 

150 self._handlers.inventory.register( 

151 name=rendered_anchor, 

152 domain=handler.domain, 

153 role=heading.attrib["data-role"], 

154 priority=1, # register with standard priority 

155 uri=f"{page}#{rendered_anchor}", 

156 ) 

157 

158 # also register other anchors for this object in the inventory 

159 try: 

160 data_object = handler.collect(rendered_anchor, handler.fallback_config) 

161 except CollectionError: 

162 continue 

163 for anchor in handler.get_anchors(data_object): 

164 if anchor not in self._handlers.inventory: 

165 self._handlers.inventory.register( 

166 name=anchor, 

167 domain=handler.domain, 

168 role=heading.attrib["data-role"], 

169 priority=2, # register with lower priority 

170 uri=f"{page}#{rendered_anchor}", 

171 ) 

172 

173 parent.append(el) 

174 

175 if the_rest: 175 ↛ 179line 175 didn't jump to line 179 because the condition on line 175 was never true

176 # This block contained unindented line(s) after the first indented 

177 # line. Insert these lines as the first block of the master blocks 

178 # list for future processing. 

179 blocks.insert(0, the_rest) 

180 

181 def _process_block( 

182 self, 

183 identifier: str, 

184 yaml_block: str, 

185 heading_level: int = 0, 

186 ) -> tuple[str, BaseHandler, CollectorItem]: 

187 """Process an autodoc block. 

188 

189 Arguments: 

190 identifier: The identifier of the object to collect and render. 

191 yaml_block: The YAML configuration. 

192 heading_level: Suggested level of the heading to insert (0 to ignore). 

193 

194 Raises: 

195 PluginError: When something wrong happened during collection. 

196 TemplateNotFound: When a template used for rendering could not be found. 

197 

198 Returns: 

199 Rendered HTML, the handler that was used, and the collected item. 

200 """ 

201 config = yaml.safe_load(yaml_block) or {} 

202 handler_name = self._handlers.get_handler_name(config) 

203 

204 log.debug(f"Using handler '{handler_name}'") 

205 handler_config = self._handlers.get_handler_config(handler_name) 

206 handler = self._handlers.get_handler(handler_name, handler_config) 

207 

208 global_options = handler_config.get("options", {}) 

209 local_options = config.get("options", {}) 

210 options = ChainMap(local_options, global_options) 

211 

212 if heading_level: 212 ↛ 214line 212 didn't jump to line 214 because the condition on line 212 was never true

213 # Heading level obtained from Markdown (`##`) takes precedence. 

214 options = ChainMap({"heading_level": heading_level}, options) 

215 

216 log.debug("Collecting data") 

217 try: 

218 data: CollectorItem = handler.collect(identifier, options) 

219 except CollectionError as exception: 

220 log.error(str(exception)) # noqa: TRY400 

221 raise PluginError(f"Could not collect '{identifier}'") from exception 

222 

223 if handler_name not in self._updated_envs: # We haven't seen this handler before on this document. 

224 log.debug("Updating handler's rendering env") 

225 handler._update_env(self.md, self._config) 

226 self._updated_envs.add(handler_name) 

227 

228 log.debug("Rendering templates") 

229 try: 

230 rendered = handler.render(data, options) 

231 except TemplateNotFound as exc: 

232 theme_name = self._config["theme_name"] 

233 log.error( # noqa: TRY400 

234 f"Template '{exc.name}' not found for '{handler_name}' handler and theme '{theme_name}'.", 

235 ) 

236 raise 

237 

238 return rendered, handler, data 

239 

240 

241class _HeadingsPostProcessor(Treeprocessor): 

242 def run(self, root: Element) -> None: 

243 self._remove_duplicated_headings(root) 

244 

245 def _remove_duplicated_headings(self, parent: Element) -> None: 

246 carry_text = "" 

247 for el in reversed(parent): # Reversed mainly for the ability to mutate during iteration. 

248 if el.tag == "div" and el.get("class") == "mkdocstrings": 

249 # Delete the duplicated headings along with their container, but keep the text (i.e. the actual HTML). 

250 carry_text = (el.text or "") + carry_text 

251 parent.remove(el) 

252 else: 

253 if carry_text: 

254 el.tail = (el.tail or "") + carry_text 

255 carry_text = "" 

256 self._remove_duplicated_headings(el) 

257 

258 if carry_text: 

259 parent.text = (parent.text or "") + carry_text 

260 

261 

262class _TocLabelsTreeProcessor(Treeprocessor): 

263 def run(self, root: Element) -> None: # noqa: ARG002 

264 self._override_toc_labels(self.md.toc_tokens) # type: ignore[attr-defined] 

265 

266 def _override_toc_labels(self, tokens: list[dict[str, Any]]) -> None: 

267 for token in tokens: 

268 if (label := token.get("data-toc-label")) and token["name"] != label: 268 ↛ 269line 268 didn't jump to line 269 because the condition on line 268 was never true

269 token["name"] = label 

270 self._override_toc_labels(token["children"]) 

271 

272 

273class MkdocstringsExtension(Extension): 

274 """Our Markdown extension. 

275 

276 It cannot work outside of `mkdocstrings`. 

277 """ 

278 

279 def __init__(self, config: dict, handlers: Handlers, autorefs: AutorefsPlugin, **kwargs: Any) -> None: 

280 """Initialize the object. 

281 

282 Arguments: 

283 config: The configuration items from `mkdocs` and `mkdocstrings` that must be passed to the block processor 

284 when instantiated in [`extendMarkdown`][mkdocstrings.extension.MkdocstringsExtension.extendMarkdown]. 

285 handlers: The handlers container. 

286 autorefs: The autorefs plugin instance. 

287 **kwargs: Keyword arguments used by `markdown.extensions.Extension`. 

288 """ 

289 super().__init__(**kwargs) 

290 self._config = config 

291 self._handlers = handlers 

292 self._autorefs = autorefs 

293 

294 def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name) 

295 """Register the extension. 

296 

297 Add an instance of our [`AutoDocProcessor`][mkdocstrings.extension.AutoDocProcessor] to the Markdown parser. 

298 

299 Arguments: 

300 md: A `markdown.Markdown` instance. 

301 """ 

302 md.parser.blockprocessors.register( 

303 AutoDocProcessor(md.parser, md, self._config, self._handlers, self._autorefs), 

304 "mkdocstrings", 

305 priority=75, # Right before markdown.blockprocessors.HashHeaderProcessor 

306 ) 

307 md.treeprocessors.register( 

308 _HeadingsPostProcessor(md), 

309 "mkdocstrings_post_headings", 

310 priority=4, # Right after 'toc'. 

311 ) 

312 md.treeprocessors.register( 

313 _TocLabelsTreeProcessor(md), 

314 "mkdocstrings_post_toc_labels", 

315 priority=4, # Right after 'toc'. 

316 )