Coverage for src/mkdocstrings_handlers/c/handler.py: 85.59%
318 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-08-01 19:41 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-08-01 19:41 +0200
1"""This module implements a handler for the C language."""
3from __future__ import annotations
5import re
6from dataclasses import dataclass
7from enum import Enum
8from io import StringIO
9from pathlib import Path
10from typing import TYPE_CHECKING, Any, ClassVar, Mapping, MutableMapping, Protocol
12from mkdocstrings.handlers.base import BaseHandler, CollectionError, CollectorItem
13from mkdocstrings.loggers import get_logger
14from pycparser import CParser, c_ast
16if TYPE_CHECKING:
17 from markdown import Markdown
18 from pycparser.c_ast import FileAST
21logger = get_logger(__name__)
24@dataclass
25class Comment:
26 """A comment extracted from the source code."""
28 text: str
29 last_line_number: int
32@dataclass
33class Macro:
34 """A macro extracted from the source code."""
36 text: str
37 line_number: int
40_C_PARSER = CParser()
41_END_COMMENT = re.compile(r'^(?!").+; *((\/\/.*)|(\/\*.*\*\/))$')
42_END_COMMENT_MACRO = re.compile(r"#.+ *((\/\/.*)|(\/\*.*\*\/))$")
43_SINGLE_COMMENT = re.compile(r" *\/\/ ?(.*)")
44_SINGLE_COMMENT_ALT = re.compile(r"\/\* *(.+) *\*\/")
45_FULL_DOC = re.compile(r"\/\*!?\n((.|\n)+)\n *\*\/")
46_DESC = re.compile(r" *\* *(.+)")
47_DIRECTIVE = re.compile(r" *\* *@(\w+)(\[.+\])? *(.+)")
48_PARAM_BODY = re.compile(r"(\w+) *(.+)")
49_DEFINE = re.compile(r"# *define (\w+) *(\w*)")
52def extract_comments(code: str) -> tuple[list[Comment], str]:
53 """Extract comments from the source code.
55 Parameters:
56 code: The source code to extract comments from.
58 Returns:
59 A tuple containing a list of comments and the source code with comments removed.
60 """
61 comments: list[Comment] = []
62 extracted: list[str] = []
63 in_comment: bool = False
64 buffer = StringIO()
66 for index, line in enumerate(code.split("\n")):
67 content = line.lstrip(" ").rstrip(" ")
68 if content.startswith("//") or (content.startswith("/*") and content.endswith("*/")):
69 # single line comment
70 comments.append(Comment(content, index + 1))
71 extracted.append("") # preserve line count
72 elif match := _END_COMMENT_MACRO.match(line):
73 # comment at end of preprocessor directive
74 comments.append(Comment(match.group(2), index + 1))
75 extracted.append(line[: match.start(2)])
76 continue
77 elif match := _END_COMMENT.match(line):
78 # comment at end of line
79 comments.append(Comment(match.group(1), index + 1))
80 extracted.append(line[: match.start(1)])
81 continue
82 elif content.startswith("/*"):
83 # start of multiline comment
84 in_comment = True
85 buffer.write(content + "\n")
86 elif content.endswith("*/"):
87 # end of multiline comment
88 if not in_comment: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true
89 raise CollectionError("Found close to multiline comment without a start!")
91 in_comment = False
92 buffer.write(line)
93 bufval = buffer.getvalue()
94 comments.append(Comment(bufval, index + 1))
95 buffer.truncate(0)
97 for _ in range(bufval.count("\n") + 1):
98 extracted.append("") # preserve line count
99 elif in_comment:
100 # we want to preserve the indentation
101 # here, so use line instead of content
102 buffer.write(line + "\n")
103 else:
104 # not a comment
105 extracted.append(line)
107 if in_comment: 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true
108 raise CollectionError("Unterminated comment!")
110 return comments, "\n".join(extracted)
113def extract_macros(code: str) -> tuple[list[Macro], str]:
114 """Extract macros from the source code.
116 Parameters:
117 code: The source code to extract macros from.
119 Returns:
120 A tuple containing a list of macros and the source code with macros removed.
121 """
122 extracted: list[str] = []
123 macros: list[Macro] = []
125 # buffer variables
126 next_is_macro: bool = False
127 buffer = StringIO()
128 start_line = -1
130 for index, line in enumerate(code.split("\n")):
131 content = line.lstrip(" ").rstrip(" ")
133 if (not content.startswith("#")) and (not next_is_macro):
134 extracted.append(line)
135 continue
137 extracted.append("")
139 if next_is_macro: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true
140 next_is_macro = False
141 buffer.write("\n" + content)
143 if content.endswith("\\"): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true
144 if not next_is_macro:
145 # start of macro
146 start_line = index + 1
147 buffer.write(content)
149 next_is_macro = True
151 bufval = buffer.getvalue()
152 if (not next_is_macro) and (bufval): 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was never true
153 # multiline macro has ended
154 macros.append(Macro(bufval, start_line))
155 start_line = -1
156 buffer.truncate(0)
157 else:
158 # single line macro
159 macros.append(Macro(content, index + 1))
161 return macros, "\n".join(extracted)
164class InOut(str, Enum):
165 """Enumeration for parameter direction."""
167 UNSPECIFIED = "unspecified"
168 IN = "in"
169 OUT = "out"
172@dataclass
173class Param:
174 """A parameter in a function signature."""
176 name: str
177 desc: str
178 in_out: InOut
181@dataclass
182class Docstring:
183 """A parsed docstring."""
185 desc: str
186 params: list[Param] | None = None
187 ret: str | None = None
190def parse_docstring(content: str) -> Docstring:
191 """Parse a docstring.
193 Parameters:
194 content: The content of the docstring.
196 Returns:
197 A parsed docstring.
198 """
199 single = _SINGLE_COMMENT.match(content)
201 if single:
202 return Docstring(single.group(1))
204 single_alt = _SINGLE_COMMENT_ALT.match(content)
205 if single_alt: 205 ↛ 206line 205 didn't jump to line 206 because the condition on line 205 was never true
206 return Docstring(single_alt.group(1))
208 full = _FULL_DOC.match(content)
209 if not full: 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true
210 raise CollectionError(f"Could not parse docstring! {content}")
212 text = full.group(1)
213 desc = StringIO()
214 split = text.split("\n")
215 start_index = -1
216 params: list[Param] = []
217 returns: str | None = None
219 for index, i in enumerate(split): 219 ↛ 230line 219 didn't jump to line 230 because the loop on line 219 didn't complete
220 if "@" in i:
221 start_index = index
222 break
224 match = _DESC.match(i)
225 if not match: 225 ↛ 226line 225 didn't jump to line 226 because the condition on line 225 was never true
226 raise CollectionError(f"Invalid docstring syntax: {i}")
228 desc.write(match.group(1) + " ")
230 for directive in split[start_index:]:
231 match = _DIRECTIVE.match(directive)
233 if not match: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true
234 raise CollectionError(f"Invalid docstring syntax: {directive}")
236 name = match.group(1)
238 if name == "param": 238 ↛ 256line 238 didn't jump to line 256 because the condition on line 238 was always true
239 in_out_str = match.group(2)
241 if in_out_str == "[in]":
242 in_out = InOut.IN
243 elif in_out_str == "[out]":
244 in_out = InOut.OUT
245 else:
246 in_out = InOut.UNSPECIFIED
248 body = _PARAM_BODY.match(match.group(3))
250 if not body: 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true
251 raise CollectionError(f"Invalid @param body: {body}")
253 name = body.group(1)
254 param_desc = body.group(2)
255 params.append(Param(name, param_desc, in_out))
256 elif name in {"return", "returns"}:
257 if returns:
258 raise CollectionError("Multiple @returns found!")
259 returns = match.group(3)
260 else:
261 raise CollectionError(f"Invalid directive in docstring: {name}")
263 return Docstring(desc.getvalue(), params, returns)
266@dataclass
267class DocMacro:
268 """A parsed macro."""
270 name: str
271 content: str | None
272 doc: Docstring | None
275@dataclass
276class DocType:
277 """A parsed typedef."""
279 name: str
280 tp: TypeRef
281 doc: Docstring | None
282 quals: list[str]
285@dataclass
286class DocGlobalVar:
287 """A parsed global variable."""
289 name: str
290 tp: TypeRef
291 doc: Docstring | None
292 quals: list[str]
295@dataclass
296class FuncParam:
297 """A parameter in a function signature."""
299 name: str
300 tp: TypeRef
303@dataclass
304class DocFunc:
305 """A parsed function."""
307 name: str
308 args: list[FuncParam]
309 ret: TypeRef
310 doc: Docstring | None
313@dataclass
314class CodeDoc:
315 """A parsed C source file."""
317 macros: list[DocMacro]
318 functions: list[DocFunc]
319 global_vars: list[DocGlobalVar]
320 typedefs: dict[str, DocType]
323class TypeDecl(str, Enum):
324 """Enumeration for type declarations."""
326 NORMAL = "normal"
327 POINTER = "pointer"
328 ARRAY = "array"
329 FUNCTION = "function"
332@dataclass
333class TypeRef:
334 """A reference to a type in C."""
336 name: TypeRef | str
337 decl: TypeDecl
338 quals: list[str]
339 params: list[TypeRef] | None = None # only in functions
342class SupportsQualsAndType(Protocol):
343 """A protocol for types that can have qualifiers and a type."""
345 quals: list[str]
346 type: SupportsQualsAndType | c_ast.TypeDecl | c_ast.IdentifierType
349def ast_to_decl(node: SupportsQualsAndType, types: dict[str, DocType]) -> TypeRef:
350 """Convert a pycparser AST node to a TypeRef."""
351 if isinstance(node, c_ast.TypeDecl):
352 # assert isinstance(node.type, c_ast.IdentifierType)
353 name = node.type.names[0]
354 existing = types.get(name)
356 if existing:
357 return existing.tp
359 return TypeRef(name, TypeDecl.NORMAL, node.quals)
361 if isinstance(node, c_ast.PtrDecl):
362 # assert not isinstance(node.type, c_ast.IdentifierType)
363 return TypeRef(ast_to_decl(node.type, types), TypeDecl.POINTER, node.quals)
365 if isinstance(node, c_ast.ArrayDecl): 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true
366 return TypeRef(ast_to_decl(node.type, types), TypeDecl.ARRAY, node.quals)
368 # assert isinstance(node, c_ast.FuncDecl), f"expected a FuncDecl, got {node}"
369 return TypeRef(
370 ast_to_decl(node.type, types),
371 TypeDecl.FUNCTION,
372 [],
373 [ast_to_decl(decl.type, types) for decl in node.args.params], # type: ignore[attr-defined]
374 )
377def _tp_ref_format_char(ref: TypeRef, char: str, qualname: str) -> str:
378 # assert isinstance(ref.name, TypeRef)
379 content = tp_ref_to_str(ref.name, qualname) # type: ignore[arg-type]
380 if ref.quals: 380 ↛ 381line 380 didn't jump to line 381 because the condition on line 380 was never true
381 return f"({' '.join(ref.quals)} {content}{char})"
382 return f"{content}{char}"
385def tp_ref_to_str(ref: TypeRef, qualname: str) -> str:
386 """Convert a TypeRef to a string.
388 Parameters:
389 ref: The TypeRef to convert.
390 qualname: The name of the type.
392 Returns:
393 The string representation of the TypeRef.
394 """
395 if ref.decl == TypeDecl.NORMAL:
396 if ref.quals:
397 return f"{' '.join(ref.quals)} {ref.name}"
399 return ref.name # type: ignore[return-value]
401 if ref.decl == TypeDecl.POINTER: 401 ↛ 404line 401 didn't jump to line 404 because the condition on line 401 was always true
402 return _tp_ref_format_char(ref, "*", qualname)
404 if ref.decl == TypeDecl.ARRAY:
405 return _tp_ref_format_char(ref, "[]", qualname)
407 # assert ref.decl == TypeDecl.FUNCTION
408 # assert ref.params is not None
410 params: list[str] = [tp_ref_to_str(i, qualname) for i in ref.params] # type: ignore[union-attr]
411 ret = tp_ref_to_str(ref.name, qualname) if isinstance(ref.name, TypeRef) else ref.name
413 return f"{ret} (*{qualname})({', '.join(params)})"
416def typedef_to_str(decl: DocType) -> str:
417 """Convert a typedef to a string.
419 Parameters:
420 decl: The typedef to convert.
422 Returns:
423 The string representation of the typedef.
424 """
425 return tp_ref_to_str(decl.tp, decl.name)
428def desc(doc: Docstring | None) -> str:
429 """Get the description from a docstring.
431 Parameters:
432 doc: The docstring to get the description from.
434 Returns:
435 The description.
436 """
437 if not doc:
438 return "No description specified."
440 return doc.desc
443def lookup_type_html(data: CodeDoc, tp: TypeRef, *, name: str | None = None) -> str:
444 """Lookup a type and return an HTML representation.
446 Parameters:
447 data: The parsed C source file.
448 tp: The type to lookup.
449 name: The name of the type.
451 Returns:
452 The HTML representation of the type.
453 """
454 tp_str = ""
456 for type_name, doctype in data.typedefs.items():
457 if doctype.tp == tp:
458 tp_str = f'<a href="#type-{type_name}">{type_name}</a>'
460 return f'<code>{tp_str or tp_ref_to_str(tp, name or "unknown")}</code>'
463class CHandler(BaseHandler):
464 """The C handler class."""
466 name: str = "c"
467 """The handler's name."""
469 domain: str = "c"
470 """The cross-documentation domain/language for this handler."""
472 enable_inventory: bool = False
473 """Whether this handler is interested in enabling the creation of the `objects.inv` Sphinx inventory file."""
475 fallback_theme = "material"
476 """The theme to fallback to."""
478 fallback_config: ClassVar[dict] = {"fallback": True}
479 """The configuration used to collect item during autorefs fallback."""
481 default_config: ClassVar[dict] = {
482 "show_root_heading": False,
483 "show_root_toc_entry": True,
484 "show_symbol_type_heading": True,
485 "show_symbol_type_toc_entry": True,
486 "heading_level": 2,
487 }
488 """The default configuration options.
490 Option | Type | Description | Default
491 ------ | ---- | ----------- | -------
492 **`show_root_heading`** | `bool` | Show the heading of the object at the root of the documentation tree. | `False`
493 **`show_root_toc_entry`** | `bool` | If the root heading is not shown, at least add a ToC entry for it. | `True`
494 **`heading_level`** | `int` | The initial heading level to use. | `2`
495 """
497 def collect(self, identifier: str, config: MutableMapping[str, Any]) -> CollectorItem:
498 """Collect data given an identifier and selection configuration.
500 In the implementation, you typically call a subprocess that returns JSON, and load that JSON again into
501 a Python dictionary for example, though the implementation is completely free.
503 Parameters:
504 identifier: An identifier that was found in a markdown document for which to collect data. For example,
505 in Python, it would be 'mkdocstrings.handlers' to collect documentation about the handlers module.
506 It can be anything that you can feed to the tool of your choice.
507 config: All configuration options for this handler either defined globally in `mkdocs.yml` or
508 locally overridden in an identifier block by the user.
510 Returns:
511 Anything you want, as long as you can feed it to the `render` method.
512 """
513 if config.get("fallback", False): 513 ↛ 514line 513 didn't jump to line 514 because the condition on line 513 was never true
514 raise CollectionError("Not loading additional headers during fallback")
516 source = Path(identifier).read_text(encoding="utf-8")
517 comments_list, source = extract_comments(source)
518 macros_list, source = extract_macros(source)
519 code: FileAST = _C_PARSER.parse(source)
521 comments: dict[int, Comment] = {comment.last_line_number: comment for comment in comments_list}
522 types: dict[str, DocType] = {}
523 global_vars: list[DocGlobalVar] = []
524 funcs: list[DocFunc] = []
526 for node in code.ext:
527 if not isinstance(node, (c_ast.Typedef, c_ast.Decl)): 527 ↛ 528line 527 didn't jump to line 528 because the condition on line 527 was never true
528 continue
530 # assert node.coord, "node.coord is None"
531 lineno = node.coord.line
533 raw_doc: Comment | None = None
534 if lineno in comments:
535 raw_doc = comments.pop(lineno)
536 elif (lineno - 1) in comments: 536 ↛ 539line 536 didn't jump to line 539 because the condition on line 536 was always true
537 raw_doc = comments.pop(lineno - 1)
539 docstring: Docstring | None = None
541 if raw_doc: 541 ↛ 544line 541 didn't jump to line 544 because the condition on line 541 was always true
542 docstring = parse_docstring(raw_doc.text)
544 if isinstance(node, c_ast.Typedef):
545 types[node.name] = DocType(node.name, ast_to_decl(node.type, types), docstring, node.quals)
547 elif type(node) is c_ast.Decl: # we dont want the subclasses 547 ↛ 526line 547 didn't jump to line 526 because the condition on line 547 was always true
548 if isinstance(node.type, c_ast.FuncDecl):
549 ref = ast_to_decl(node.type, types)
550 # assert ref.decl is TypeDecl.FUNCTION, "decl is not TypeDecl.FUNCTION"
551 # assert ref.params is not None, "function typeref does not have parameters"
552 params: list[FuncParam] = []
554 for param_ref, param in zip(ref.params, node.type.args.params): # type: ignore[arg-type]
555 params.append(FuncParam(param.name, param_ref))
557 funcs.append(DocFunc(node.name, params, ref.name, docstring)) # type: ignore[arg-type]
558 else:
559 global_vars.append(
560 DocGlobalVar(
561 node.name,
562 ast_to_decl(node.type, types),
563 docstring,
564 node.quals,
565 ),
566 )
568 macros: list[DocMacro] = []
570 for macro in macros_list:
571 match = _DEFINE.match(macro.text)
573 if not match:
574 continue
576 lineno = macro.line_number
578 raw_doc = None
580 if lineno in comments:
581 raw_doc = comments.pop(lineno)
582 elif (lineno - 1) in comments: 582 ↛ 585line 582 didn't jump to line 585 because the condition on line 582 was always true
583 raw_doc = comments.pop(lineno - 1)
585 docstring = parse_docstring(raw_doc.text) if raw_doc else None
586 macros.append(DocMacro(match.group(1).rstrip(" "), match.group(2) or None, docstring))
588 return CodeDoc(macros, funcs, global_vars, types)
590 # def get_templates_dir(self, handler: str | None = None) -> Path:
591 # return Path.cwd()
593 def render(self, data: CodeDoc, config: Mapping[str, Any]) -> str:
594 """Render a template using provided data and configuration options.
596 Parameters:
597 data: The data to render that was collected above in `collect()`.
598 config: All configuration options for this handler either defined globally in `mkdocs.yml` or
599 locally overridden in an identifier block by the user.
601 Returns:
602 The rendered template as HTML.
603 """
604 final_config = {**self.default_config, **config}
605 heading_level = final_config["heading_level"]
606 template = self.env.get_template("header.html.jinja")
607 return template.render(
608 config=final_config,
609 header=data,
610 heading_level=heading_level,
611 root=True,
612 )
614 def update_env(self, md: Markdown, config: dict) -> None:
615 """Update the Jinja environment with any custom settings/filters/options for this handler.
617 Parameters:
618 md: The Markdown instance. Useful to add functions able to convert Markdown into the environment filters.
619 config: Configuration options for `mkdocs` and `mkdocstrings`, read from `mkdocs.yml`. See the source code
620 of [mkdocstrings.plugin.MkdocstringsPlugin.on_config][] to see what's in this dictionary.
621 """
622 super().update_env(md, config) # Add some mkdocstrings default filters such as highlight and convert_markdown
623 self.env.trim_blocks = True
624 self.env.lstrip_blocks = True
625 self.env.keep_trailing_newline = False
626 self.env.filters["typedef_to_str"] = typedef_to_str
627 self.env.filters["lookup_type_html"] = lookup_type_html
628 self.env.filters["zip"] = zip
631def get_handler(
632 theme: str,
633 custom_templates: str | None = None,
634 config_file_path: str | None = None, # noqa: ARG001
635 **config: Any, # noqa: ARG001
636) -> CHandler:
637 """Simply return an instance of `CHandler`.
639 Parameters:
640 theme: The theme to use when rendering contents.
641 custom_templates: Directory containing custom templates.
642 config_file_path: The MkDocs configuration file path.
643 **config: Configuration passed to the handler.
645 Returns:
646 An instance of the handler.
647 """
648 return CHandler(
649 handler="c",
650 theme=theme,
651 custom_templates=custom_templates,
652 # To pass the following argument,
653 # you'll need to override the handler's __init__ method.
654 # config_file_path=config_file_path,
655 )