Coverage for src/mkdocstrings_handlers/c/handler.py: 85.59%

1"""This module implements a handler for the C language."""

3from __future__ import annotations

5import re

6from dataclasses import dataclass

7from enum import Enum

8from io import StringIO

9from pathlib import Path

10from typing import TYPE_CHECKING, Any, ClassVar, Mapping, MutableMapping, Protocol

12from mkdocstrings.handlers.base import BaseHandler, CollectionError, CollectorItem

13from mkdocstrings.loggers import get_logger

14from pycparser import CParser, c_ast

16if TYPE_CHECKING:

17 from markdown import Markdown

18 from pycparser.c_ast import FileAST

21logger = get_logger(__name__)

24@dataclass

25class Comment:

26 """A comment extracted from the source code."""

28 text: str

29 last_line_number: int

32@dataclass

33class Macro:

34 """A macro extracted from the source code."""

36 text: str

37 line_number: int

40_C_PARSER = CParser()

41_END_COMMENT = re.compile(r'^(?!").+; *((\/\/.*)|(\/\*.*\*\/))$')

42_END_COMMENT_MACRO = re.compile(r"#.+ *((\/\/.*)|(\/\*.*\*\/))$")

43_SINGLE_COMMENT = re.compile(r" *\/\/ ?(.*)")

44_SINGLE_COMMENT_ALT = re.compile(r"\/\* *(.+) *\*\/")

45_FULL_DOC = re.compile(r"\/\*!?\n((.|\n)+)\n *\*\/")

46_DESC = re.compile(r" *\* *(.+)")

47_DIRECTIVE = re.compile(r" *\* *@(\w+)(\[.+\])? *(.+)")

48_PARAM_BODY = re.compile(r"(\w+) *(.+)")

49_DEFINE = re.compile(r"# *define (\w+) *(\w*)")

52def extract_comments(code: str) -> tuple[list[Comment], str]:

53 """Extract comments from the source code.

55 Parameters:

56 code: The source code to extract comments from.

58 Returns:

59 A tuple containing a list of comments and the source code with comments removed.

60 """

61 comments: list[Comment] = []

62 extracted: list[str] = []

63 in_comment: bool = False

64 buffer = StringIO()

66 for index, line in enumerate(code.split("\n")):

67 content = line.lstrip(" ").rstrip(" ")

68 if content.startswith("//") or (content.startswith("/*") and content.endswith("*/")):

69 # single line comment

70 comments.append(Comment(content, index + 1))

71 extracted.append("") # preserve line count

72 elif match := _END_COMMENT_MACRO.match(line):

73 # comment at end of preprocessor directive

74 comments.append(Comment(match.group(2), index + 1))

75 extracted.append(line[: match.start(2)])

76 continue

77 elif match := _END_COMMENT.match(line):

78 # comment at end of line

79 comments.append(Comment(match.group(1), index + 1))

80 extracted.append(line[: match.start(1)])

81 continue

82 elif content.startswith("/*"):

83 # start of multiline comment

84 in_comment = True

85 buffer.write(content + "\n")

86 elif content.endswith("*/"):

87 # end of multiline comment

88 if not in_comment: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 raise CollectionError("Found close to multiline comment without a start!")

91 in_comment = False

92 buffer.write(line)

93 bufval = buffer.getvalue()

94 comments.append(Comment(bufval, index + 1))

95 buffer.truncate(0)

97 for _ in range(bufval.count("\n") + 1):

98 extracted.append("") # preserve line count

99 elif in_comment:

100 # we want to preserve the indentation

101 # here, so use line instead of content

102 buffer.write(line + "\n")

103 else:

104 # not a comment

105 extracted.append(line)

106

107 if in_comment: 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true

108 raise CollectionError("Unterminated comment!")

109

110 return comments, "\n".join(extracted)

111

112

113def extract_macros(code: str) -> tuple[list[Macro], str]:

114 """Extract macros from the source code.

115

116 Parameters:

117 code: The source code to extract macros from.

118

119 Returns:

120 A tuple containing a list of macros and the source code with macros removed.

121 """

122 extracted: list[str] = []

123 macros: list[Macro] = []

124

125 # buffer variables

126 next_is_macro: bool = False

127 buffer = StringIO()

128 start_line = -1

129

130 for index, line in enumerate(code.split("\n")):

131 content = line.lstrip(" ").rstrip(" ")

132

133 if (not content.startswith("#")) and (not next_is_macro):

134 extracted.append(line)

135 continue

136

137 extracted.append("")

138

139 if next_is_macro: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true

140 next_is_macro = False

141 buffer.write("\n" + content)

142

143 if content.endswith("\\"): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 if not next_is_macro:

145 # start of macro

146 start_line = index + 1

147 buffer.write(content)

148

149 next_is_macro = True

150

151 bufval = buffer.getvalue()

152 if (not next_is_macro) and (bufval): 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was never true

153 # multiline macro has ended

154 macros.append(Macro(bufval, start_line))

155 start_line = -1

156 buffer.truncate(0)

157 else:

158 # single line macro

159 macros.append(Macro(content, index + 1))

160

161 return macros, "\n".join(extracted)

162

163

164class InOut(str, Enum):

165 """Enumeration for parameter direction."""

166

167 UNSPECIFIED = "unspecified"

168 IN = "in"

169 OUT = "out"

170

171

172@dataclass

173class Param:

174 """A parameter in a function signature."""

175

176 name: str

177 desc: str

178 in_out: InOut

179

180

181@dataclass

182class Docstring:

183 """A parsed docstring."""

184

185 desc: str

186 params: list[Param] | None = None

187 ret: str | None = None

188

189

190def parse_docstring(content: str) -> Docstring:

191 """Parse a docstring.

192

193 Parameters:

194 content: The content of the docstring.

195

196 Returns:

197 A parsed docstring.

198 """

199 single = _SINGLE_COMMENT.match(content)

200

201 if single:

202 return Docstring(single.group(1))

203

204 single_alt = _SINGLE_COMMENT_ALT.match(content)

205 if single_alt: 205 ↛ 206line 205 didn't jump to line 206 because the condition on line 205 was never true

206 return Docstring(single_alt.group(1))

207

208 full = _FULL_DOC.match(content)

209 if not full: 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 raise CollectionError(f"Could not parse docstring! {content}")

211

212 text = full.group(1)

213 desc = StringIO()

214 split = text.split("\n")

215 start_index = -1

216 params: list[Param] = []

217 returns: str | None = None

218

219 for index, i in enumerate(split): 219 ↛ 230line 219 didn't jump to line 230 because the loop on line 219 didn't complete

220 if "@" in i:

221 start_index = index

222 break

223

224 match = _DESC.match(i)

225 if not match: 225 ↛ 226line 225 didn't jump to line 226 because the condition on line 225 was never true

226 raise CollectionError(f"Invalid docstring syntax: {i}")

227

228 desc.write(match.group(1) + " ")

229

230 for directive in split[start_index:]:

231 match = _DIRECTIVE.match(directive)

232

233 if not match: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 raise CollectionError(f"Invalid docstring syntax: {directive}")

235

236 name = match.group(1)

237

238 if name == "param": 238 ↛ 256line 238 didn't jump to line 256 because the condition on line 238 was always true

239 in_out_str = match.group(2)

240

241 if in_out_str == "[in]":

242 in_out = InOut.IN

243 elif in_out_str == "[out]":

244 in_out = InOut.OUT

245 else:

246 in_out = InOut.UNSPECIFIED

247

248 body = _PARAM_BODY.match(match.group(3))

249

250 if not body: 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 raise CollectionError(f"Invalid @param body: {body}")

252

253 name = body.group(1)

254 param_desc = body.group(2)

255 params.append(Param(name, param_desc, in_out))

256 elif name in {"return", "returns"}:

257 if returns:

258 raise CollectionError("Multiple @returns found!")

259 returns = match.group(3)

260 else:

261 raise CollectionError(f"Invalid directive in docstring: {name}")

262

263 return Docstring(desc.getvalue(), params, returns)

264

265

266@dataclass

267class DocMacro:

268 """A parsed macro."""

269

270 name: str

271 content: str | None

272 doc: Docstring | None

273

274

275@dataclass

276class DocType:

277 """A parsed typedef."""

278

279 name: str

280 tp: TypeRef

281 doc: Docstring | None

282 quals: list[str]

283

284

285@dataclass

286class DocGlobalVar:

287 """A parsed global variable."""

288

289 name: str

290 tp: TypeRef

291 doc: Docstring | None

292 quals: list[str]

293

294

295@dataclass

296class FuncParam:

297 """A parameter in a function signature."""

298

299 name: str

300 tp: TypeRef

301

302

303@dataclass

304class DocFunc:

305 """A parsed function."""

306

307 name: str

308 args: list[FuncParam]

309 ret: TypeRef

310 doc: Docstring | None

311

312

313@dataclass

314class CodeDoc:

315 """A parsed C source file."""

316

317 macros: list[DocMacro]

318 functions: list[DocFunc]

319 global_vars: list[DocGlobalVar]

320 typedefs: dict[str, DocType]

321

322

323class TypeDecl(str, Enum):

324 """Enumeration for type declarations."""

325

326 NORMAL = "normal"

327 POINTER = "pointer"

328 ARRAY = "array"

329 FUNCTION = "function"

330

331

332@dataclass

333class TypeRef:

334 """A reference to a type in C."""

335

336 name: TypeRef | str

337 decl: TypeDecl

338 quals: list[str]

339 params: list[TypeRef] | None = None # only in functions

340

341

342class SupportsQualsAndType(Protocol):

343 """A protocol for types that can have qualifiers and a type."""

344

345 quals: list[str]

346 type: SupportsQualsAndType | c_ast.TypeDecl | c_ast.IdentifierType

347

348

349def ast_to_decl(node: SupportsQualsAndType, types: dict[str, DocType]) -> TypeRef:

350 """Convert a pycparser AST node to a TypeRef."""

351 if isinstance(node, c_ast.TypeDecl):

352 # assert isinstance(node.type, c_ast.IdentifierType)

353 name = node.type.names[0]

354 existing = types.get(name)

355

356 if existing:

357 return existing.tp

358

359 return TypeRef(name, TypeDecl.NORMAL, node.quals)

360

361 if isinstance(node, c_ast.PtrDecl):

362 # assert not isinstance(node.type, c_ast.IdentifierType)

363 return TypeRef(ast_to_decl(node.type, types), TypeDecl.POINTER, node.quals)

364

365 if isinstance(node, c_ast.ArrayDecl): 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 return TypeRef(ast_to_decl(node.type, types), TypeDecl.ARRAY, node.quals)

367

368 # assert isinstance(node, c_ast.FuncDecl), f"expected a FuncDecl, got {node}"

369 return TypeRef(

370 ast_to_decl(node.type, types),

371 TypeDecl.FUNCTION,

372 [],

373 [ast_to_decl(decl.type, types) for decl in node.args.params], # type: ignore[attr-defined]

374 )

375

376

377def _tp_ref_format_char(ref: TypeRef, char: str, qualname: str) -> str:

378 # assert isinstance(ref.name, TypeRef)

379 content = tp_ref_to_str(ref.name, qualname) # type: ignore[arg-type]

380 if ref.quals: 380 ↛ 381line 380 didn't jump to line 381 because the condition on line 380 was never true

381 return f"({' '.join(ref.quals)} {content}{char})"

382 return f"{content}{char}"

383

384

385def tp_ref_to_str(ref: TypeRef, qualname: str) -> str:

386 """Convert a TypeRef to a string.

387

388 Parameters:

389 ref: The TypeRef to convert.

390 qualname: The name of the type.

391

392 Returns:

393 The string representation of the TypeRef.

394 """

395 if ref.decl == TypeDecl.NORMAL:

396 if ref.quals:

397 return f"{' '.join(ref.quals)} {ref.name}"

398

399 return ref.name # type: ignore[return-value]

400

401 if ref.decl == TypeDecl.POINTER: 401 ↛ 404line 401 didn't jump to line 404 because the condition on line 401 was always true

402 return _tp_ref_format_char(ref, "*", qualname)

403

404 if ref.decl == TypeDecl.ARRAY:

405 return _tp_ref_format_char(ref, "[]", qualname)

406

407 # assert ref.decl == TypeDecl.FUNCTION

408 # assert ref.params is not None

409

410 params: list[str] = [tp_ref_to_str(i, qualname) for i in ref.params] # type: ignore[union-attr]

411 ret = tp_ref_to_str(ref.name, qualname) if isinstance(ref.name, TypeRef) else ref.name

412

413 return f"{ret} (*{qualname})({', '.join(params)})"

414

415

416def typedef_to_str(decl: DocType) -> str:

417 """Convert a typedef to a string.

418

419 Parameters:

420 decl: The typedef to convert.

421

422 Returns:

423 The string representation of the typedef.

424 """

425 return tp_ref_to_str(decl.tp, decl.name)

426

427

428def desc(doc: Docstring | None) -> str:

429 """Get the description from a docstring.

430

431 Parameters:

432 doc: The docstring to get the description from.

433

434 Returns:

435 The description.

436 """

437 if not doc:

438 return "No description specified."

439

440 return doc.desc

441

442

443def lookup_type_html(data: CodeDoc, tp: TypeRef, *, name: str | None = None) -> str:

444 """Lookup a type and return an HTML representation.

445

446 Parameters:

447 data: The parsed C source file.

448 tp: The type to lookup.

449 name: The name of the type.

450

451 Returns:

452 The HTML representation of the type.

453 """

454 tp_str = ""

455

456 for type_name, doctype in data.typedefs.items():

457 if doctype.tp == tp:

458 tp_str = f'<a href="#type-{type_name}">{type_name}</a>'

459

460 return f'<code>{tp_str or tp_ref_to_str(tp, name or "unknown")}</code>'

461

462

463class CHandler(BaseHandler):

464 """The C handler class."""

465

466 name: str = "c"

467 """The handler's name."""

468

469 domain: str = "c"

470 """The cross-documentation domain/language for this handler."""

471

472 enable_inventory: bool = False

473 """Whether this handler is interested in enabling the creation of the `objects.inv` Sphinx inventory file."""

474

475 fallback_theme = "material"

476 """The theme to fallback to."""

477

478 fallback_config: ClassVar[dict] = {"fallback": True}

479 """The configuration used to collect item during autorefs fallback."""

480

481 default_config: ClassVar[dict] = {

482 "show_root_heading": False,

483 "show_root_toc_entry": True,

484 "show_symbol_type_heading": True,

485 "show_symbol_type_toc_entry": True,

486 "heading_level": 2,

487 }

488 """The default configuration options.

489

490 Option | Type | Description | Default

491 ------ | ---- | ----------- | -------

492 **`show_root_heading`** | `bool` | Show the heading of the object at the root of the documentation tree. | `False`

493 **`show_root_toc_entry`** | `bool` | If the root heading is not shown, at least add a ToC entry for it. | `True`

494 **`heading_level`** | `int` | The initial heading level to use. | `2`

495 """

496

497 def collect(self, identifier: str, config: MutableMapping[str, Any]) -> CollectorItem:

498 """Collect data given an identifier and selection configuration.

499

500 In the implementation, you typically call a subprocess that returns JSON, and load that JSON again into

501 a Python dictionary for example, though the implementation is completely free.

502

503 Parameters:

504 identifier: An identifier that was found in a markdown document for which to collect data. For example,

505 in Python, it would be 'mkdocstrings.handlers' to collect documentation about the handlers module.

506 It can be anything that you can feed to the tool of your choice.

507 config: All configuration options for this handler either defined globally in `mkdocs.yml` or

508 locally overridden in an identifier block by the user.

509

510 Returns:

511 Anything you want, as long as you can feed it to the `render` method.

512 """

513 if config.get("fallback", False): 513 ↛ 514line 513 didn't jump to line 514 because the condition on line 513 was never true

514 raise CollectionError("Not loading additional headers during fallback")

515

516 source = Path(identifier).read_text(encoding="utf-8")

517 comments_list, source = extract_comments(source)

518 macros_list, source = extract_macros(source)

519 code: FileAST = _C_PARSER.parse(source)

520

521 comments: dict[int, Comment] = {comment.last_line_number: comment for comment in comments_list}

522 types: dict[str, DocType] = {}

523 global_vars: list[DocGlobalVar] = []

524 funcs: list[DocFunc] = []

525

526 for node in code.ext:

527 if not isinstance(node, (c_ast.Typedef, c_ast.Decl)): 527 ↛ 528line 527 didn't jump to line 528 because the condition on line 527 was never true

528 continue

529

530 # assert node.coord, "node.coord is None"

531 lineno = node.coord.line

532

533 raw_doc: Comment | None = None

534 if lineno in comments:

535 raw_doc = comments.pop(lineno)

536 elif (lineno - 1) in comments: 536 ↛ 539line 536 didn't jump to line 539 because the condition on line 536 was always true

537 raw_doc = comments.pop(lineno - 1)

538

539 docstring: Docstring | None = None

540

541 if raw_doc: 541 ↛ 544line 541 didn't jump to line 544 because the condition on line 541 was always true

542 docstring = parse_docstring(raw_doc.text)

543

544 if isinstance(node, c_ast.Typedef):

545 types[node.name] = DocType(node.name, ast_to_decl(node.type, types), docstring, node.quals)

546

547 elif type(node) is c_ast.Decl: # we dont want the subclasses 547 ↛ 526line 547 didn't jump to line 526 because the condition on line 547 was always true

548 if isinstance(node.type, c_ast.FuncDecl):

549 ref = ast_to_decl(node.type, types)

550 # assert ref.decl is TypeDecl.FUNCTION, "decl is not TypeDecl.FUNCTION"

551 # assert ref.params is not None, "function typeref does not have parameters"

552 params: list[FuncParam] = []

553

554 for param_ref, param in zip(ref.params, node.type.args.params): # type: ignore[arg-type]

555 params.append(FuncParam(param.name, param_ref))

556

557 funcs.append(DocFunc(node.name, params, ref.name, docstring)) # type: ignore[arg-type]

558 else:

559 global_vars.append(

560 DocGlobalVar(

561 node.name,

562 ast_to_decl(node.type, types),

563 docstring,

564 node.quals,

565 ),

566 )

567

568 macros: list[DocMacro] = []

569

570 for macro in macros_list:

571 match = _DEFINE.match(macro.text)

572

573 if not match:

574 continue

575

576 lineno = macro.line_number

577

578 raw_doc = None

579

580 if lineno in comments:

581 raw_doc = comments.pop(lineno)

582 elif (lineno - 1) in comments: 582 ↛ 585line 582 didn't jump to line 585 because the condition on line 582 was always true

583 raw_doc = comments.pop(lineno - 1)

584

585 docstring = parse_docstring(raw_doc.text) if raw_doc else None

586 macros.append(DocMacro(match.group(1).rstrip(" "), match.group(2) or None, docstring))

587

588 return CodeDoc(macros, funcs, global_vars, types)

589

590 # def get_templates_dir(self, handler: str | None = None) -> Path:

591 # return Path.cwd()

592

593 def render(self, data: CodeDoc, config: Mapping[str, Any]) -> str:

594 """Render a template using provided data and configuration options.

595

596 Parameters:

597 data: The data to render that was collected above in `collect()`.

598 config: All configuration options for this handler either defined globally in `mkdocs.yml` or

599 locally overridden in an identifier block by the user.

600

601 Returns:

602 The rendered template as HTML.

603 """

604 final_config = {**self.default_config, **config}

605 heading_level = final_config["heading_level"]

606 template = self.env.get_template("header.html.jinja")

607 return template.render(

608 config=final_config,

609 header=data,

610 heading_level=heading_level,

611 root=True,

612 )

613

614 def update_env(self, md: Markdown, config: dict) -> None:

615 """Update the Jinja environment with any custom settings/filters/options for this handler.

616

617 Parameters:

618 md: The Markdown instance. Useful to add functions able to convert Markdown into the environment filters.

619 config: Configuration options for `mkdocs` and `mkdocstrings`, read from `mkdocs.yml`. See the source code

620 of [mkdocstrings.plugin.MkdocstringsPlugin.on_config][] to see what's in this dictionary.

621 """

622 super().update_env(md, config) # Add some mkdocstrings default filters such as highlight and convert_markdown

623 self.env.trim_blocks = True

624 self.env.lstrip_blocks = True

625 self.env.keep_trailing_newline = False

626 self.env.filters["typedef_to_str"] = typedef_to_str

627 self.env.filters["lookup_type_html"] = lookup_type_html

628 self.env.filters["zip"] = zip

629

630

631def get_handler(

632 theme: str,

633 custom_templates: str | None = None,

634 config_file_path: str | None = None, # noqa: ARG001

635 **config: Any, # noqa: ARG001

636) -> CHandler:

637 """Simply return an instance of `CHandler`.

638

639 Parameters:

640 theme: The theme to use when rendering contents.

641 custom_templates: Directory containing custom templates.

642 config_file_path: The MkDocs configuration file path.

643 **config: Configuration passed to the handler.

644

645 Returns:

646 An instance of the handler.

647 """

648 return CHandler(

649 handler="c",

650 theme=theme,

651 custom_templates=custom_templates,

652 # To pass the following argument,

653 # you'll need to override the handler's __init__ method.

654 # config_file_path=config_file_path,

655 )