Coverage for src/mkdocstrings/extension.py: 77.36%

1"""This module holds the code of the Markdown extension responsible for matching "autodoc" instructions.

3The extension is composed of a Markdown [block processor](https://python-markdown.github.io/extensions/api/#blockparser)

4that matches indented blocks starting with a line like `::: identifier`.

6For each of these blocks, it uses a [handler][mkdocstrings.handlers.base.BaseHandler] to collect documentation about

7the given identifier and render it with Jinja templates.

9Both the collection and rendering process can be configured by adding YAML configuration under the "autodoc"

10instruction:

12```yaml

13::: some.identifier

14 handler: python

15 options:

16 option1: value1

17 option2:

18 - value2a

19 - value2b

20 option_x: etc

21```

22"""

24from __future__ import annotations

26import re

27from collections import ChainMap

28from typing import TYPE_CHECKING, Any

29from xml.etree.ElementTree import Element

31import yaml

32from jinja2.exceptions import TemplateNotFound

33from markdown.blockprocessors import BlockProcessor

34from markdown.extensions import Extension

35from markdown.treeprocessors import Treeprocessor

36from mkdocs.exceptions import PluginError

38from mkdocstrings.handlers.base import BaseHandler, CollectionError, CollectorItem, Handlers

39from mkdocstrings.loggers import get_logger

41if TYPE_CHECKING:

42 from collections.abc import MutableSequence

44 from markdown import Markdown

45 from markdown.blockparser import BlockParser

46 from mkdocs_autorefs.plugin import AutorefsPlugin

49log = get_logger(__name__)

52class AutoDocProcessor(BlockProcessor):

53 """Our "autodoc" Markdown block processor.

55 It has a [`test` method][mkdocstrings.extension.AutoDocProcessor.test] that tells if a block matches a criterion,

56 and a [`run` method][mkdocstrings.extension.AutoDocProcessor.run] that processes it.

58 It also has utility methods allowing to get handlers and their configuration easily, useful when processing

59 a matched block.

60 """

62 regex = re.compile(r"^(?P<heading>#{1,6} *|)::: ?(?P<name>.+?) *$", flags=re.MULTILINE)

64 def __init__(

65 self,

66 parser: BlockParser,

67 md: Markdown,

68 config: dict,

69 handlers: Handlers,

70 autorefs: AutorefsPlugin,

71 ) -> None:

72 """Initialize the object.

74 Arguments:

75 parser: A `markdown.blockparser.BlockParser` instance.

76 md: A `markdown.Markdown` instance.

77 config: The [configuration][mkdocstrings.plugin.PluginConfig] of the `mkdocstrings` plugin.

78 handlers: The handlers container.

79 autorefs: The autorefs plugin instance.

80 """

81 super().__init__(parser=parser)

82 self.md = md

83 self._config = config

84 self._handlers = handlers

85 self._autorefs = autorefs

86 self._updated_envs: set = set()

88 def test(self, parent: Element, block: str) -> bool: # noqa: ARG002

89 """Match our autodoc instructions.

91 Arguments:

92 parent: The parent element in the XML tree.

93 block: The block to be tested.

95 Returns:

96 Whether this block should be processed or not.

97 """

98 return bool(self.regex.search(block))

100 def run(self, parent: Element, blocks: MutableSequence[str]) -> None:

101 """Run code on the matched blocks.

102

103 The identifier and configuration lines are retrieved from a matched block

104 and used to collect and render an object.

105

106 Arguments:

107 parent: The parent element in the XML tree.

108 blocks: The rest of the blocks to be processed.

109 """

110 block = blocks.pop(0)

111 match = self.regex.search(block)

112

113 if match: 113 ↛ 119line 113 didn't jump to line 119 because the condition on line 113 was always true

114 if match.start() > 0:

115 self.parser.parseBlocks(parent, [block[: match.start()]])

116 # removes the first line

117 block = block[match.end() :]

118

119 block, the_rest = self.detab(block)

120

121 if not block and blocks and blocks[0].startswith((" handler:", " options:")):

122 # YAML options were separated from the `:::` line by a blank line.

123 block = blocks.pop(0)

124

125 if match: 125 ↛ 175line 125 didn't jump to line 175 because the condition on line 125 was always true

126 identifier = match["name"]

127 heading_level = match["heading"].count("#")

128 log.debug(f"Matched '::: {identifier}'")

129

130 html, handler, data = self._process_block(identifier, block, heading_level)

131 el = Element("div", {"class": "mkdocstrings"})

132 # The final HTML is inserted as opaque to subsequent processing, and only revealed at the end.

133 el.text = self.md.htmlStash.store(html)

134 # We need to duplicate the headings directly, just so 'toc' can pick them up,

135 # otherwise they wouldn't appear in the final table of contents.

136 # These headings are generated by the `BaseHandler.do_heading` method (Jinja filter),

137 # which runs in the inner Markdown conversion layer, and not in the outer one where we are now.

138 headings = handler.get_headings()

139 el.extend(headings)

140 # These duplicated headings will later be removed by our `_HeadingsPostProcessor` processor,

141 # which runs right after 'toc' (see `MkdocstringsExtension.extendMarkdown`).

142

143 page = self._autorefs.current_page

144 if page is not None: 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true

145 for heading in headings:

146 rendered_anchor = heading.attrib["id"]

147 self._autorefs.register_anchor(page, rendered_anchor)

148

149 if "data-role" in heading.attrib:

150 self._handlers.inventory.register(

151 name=rendered_anchor,

152 domain=handler.domain,

153 role=heading.attrib["data-role"],

154 priority=1, # register with standard priority

155 uri=f"{page}#{rendered_anchor}",

156 )

157

158 # also register other anchors for this object in the inventory

159 try:

160 data_object = handler.collect(rendered_anchor, handler.fallback_config)

161 except CollectionError:

162 continue

163 for anchor in handler.get_anchors(data_object):

164 if anchor not in self._handlers.inventory:

165 self._handlers.inventory.register(

166 name=anchor,

167 domain=handler.domain,

168 role=heading.attrib["data-role"],

169 priority=2, # register with lower priority

170 uri=f"{page}#{rendered_anchor}",

171 )

172

173 parent.append(el)

174

175 if the_rest: 175 ↛ 179line 175 didn't jump to line 179 because the condition on line 175 was never true

176 # This block contained unindented line(s) after the first indented

177 # line. Insert these lines as the first block of the master blocks

178 # list for future processing.

179 blocks.insert(0, the_rest)

180

181 def _process_block(

182 self,

183 identifier: str,

184 yaml_block: str,

185 heading_level: int = 0,

186 ) -> tuple[str, BaseHandler, CollectorItem]:

187 """Process an autodoc block.

188

189 Arguments:

190 identifier: The identifier of the object to collect and render.

191 yaml_block: The YAML configuration.

192 heading_level: Suggested level of the heading to insert (0 to ignore).

193

194 Raises:

195 PluginError: When something wrong happened during collection.

196 TemplateNotFound: When a template used for rendering could not be found.

197

198 Returns:

199 Rendered HTML, the handler that was used, and the collected item.

200 """

201 config = yaml.safe_load(yaml_block) or {}

202 handler_name = self._handlers.get_handler_name(config)

203

204 log.debug(f"Using handler '{handler_name}'")

205 handler_config = self._handlers.get_handler_config(handler_name)

206 handler = self._handlers.get_handler(handler_name, handler_config)

207

208 global_options = handler_config.get("options", {})

209 local_options = config.get("options", {})

210 options = ChainMap(local_options, global_options)

211

212 if heading_level: 212 ↛ 214line 212 didn't jump to line 214 because the condition on line 212 was never true

213 # Heading level obtained from Markdown (`##`) takes precedence.

214 options = ChainMap({"heading_level": heading_level}, options)

215

216 log.debug("Collecting data")

217 try:

218 data: CollectorItem = handler.collect(identifier, options)

219 except CollectionError as exception:

220 log.error(str(exception)) # noqa: TRY400

221 raise PluginError(f"Could not collect '{identifier}'") from exception

222

223 if handler_name not in self._updated_envs: # We haven't seen this handler before on this document.

224 log.debug("Updating handler's rendering env")

225 handler._update_env(self.md, self._config)

226 self._updated_envs.add(handler_name)

227

228 log.debug("Rendering templates")

229 try:

230 rendered = handler.render(data, options)

231 except TemplateNotFound as exc:

232 theme_name = self._config["theme_name"]

233 log.error( # noqa: TRY400

234 f"Template '{exc.name}' not found for '{handler_name}' handler and theme '{theme_name}'.",

235 )

236 raise

237

238 return rendered, handler, data

239

240

241class _HeadingsPostProcessor(Treeprocessor):

242 def run(self, root: Element) -> None:

243 self._remove_duplicated_headings(root)

244

245 def _remove_duplicated_headings(self, parent: Element) -> None:

246 carry_text = ""

247 for el in reversed(parent): # Reversed mainly for the ability to mutate during iteration.

248 if el.tag == "div" and el.get("class") == "mkdocstrings":

249 # Delete the duplicated headings along with their container, but keep the text (i.e. the actual HTML).

250 carry_text = (el.text or "") + carry_text

251 parent.remove(el)

252 else:

253 if carry_text:

254 el.tail = (el.tail or "") + carry_text

255 carry_text = ""

256 self._remove_duplicated_headings(el)

257

258 if carry_text:

259 parent.text = (parent.text or "") + carry_text

260

261

262class _TocLabelsTreeProcessor(Treeprocessor):

263 def run(self, root: Element) -> None: # noqa: ARG002

264 self._override_toc_labels(self.md.toc_tokens) # type: ignore[attr-defined]

265

266 def _override_toc_labels(self, tokens: list[dict[str, Any]]) -> None:

267 for token in tokens:

268 if (label := token.get("data-toc-label")) and token["name"] != label: 268 ↛ 269line 268 didn't jump to line 269 because the condition on line 268 was never true

269 token["name"] = label

270 self._override_toc_labels(token["children"])

271

272

273class MkdocstringsExtension(Extension):

274 """Our Markdown extension.

275

276 It cannot work outside of `mkdocstrings`.

277 """

278

279 def __init__(self, config: dict, handlers: Handlers, autorefs: AutorefsPlugin, **kwargs: Any) -> None:

280 """Initialize the object.

281

282 Arguments:

283 config: The configuration items from `mkdocs` and `mkdocstrings` that must be passed to the block processor

284 when instantiated in [`extendMarkdown`][mkdocstrings.extension.MkdocstringsExtension.extendMarkdown].

285 handlers: The handlers container.

286 autorefs: The autorefs plugin instance.

287 **kwargs: Keyword arguments used by `markdown.extensions.Extension`.

288 """

289 super().__init__(**kwargs)

290 self._config = config

291 self._handlers = handlers

292 self._autorefs = autorefs

293

294 def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name)

295 """Register the extension.

296

297 Add an instance of our [`AutoDocProcessor`][mkdocstrings.extension.AutoDocProcessor] to the Markdown parser.

298

299 Arguments:

300 md: A `markdown.Markdown` instance.

301 """

302 md.parser.blockprocessors.register(

303 AutoDocProcessor(md.parser, md, self._config, self._handlers, self._autorefs),

304 "mkdocstrings",

305 priority=75, # Right before markdown.blockprocessors.HashHeaderProcessor

306 )

307 md.treeprocessors.register(

308 _HeadingsPostProcessor(md),

309 "mkdocstrings_post_headings",

310 priority=4, # Right after 'toc'.

311 )

312 md.treeprocessors.register(

313 _TocLabelsTreeProcessor(md),

314 "mkdocstrings_post_toc_labels",

315 priority=4, # Right after 'toc'.

316 )