Coverage for src/mkdocstrings_handlers/c/handler.py: 85.59%

318 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-08-01 19:41 +0200

1"""This module implements a handler for the C language.""" 

2 

3from __future__ import annotations 

4 

5import re 

6from dataclasses import dataclass 

7from enum import Enum 

8from io import StringIO 

9from pathlib import Path 

10from typing import TYPE_CHECKING, Any, ClassVar, Mapping, MutableMapping, Protocol 

11 

12from mkdocstrings.handlers.base import BaseHandler, CollectionError, CollectorItem 

13from mkdocstrings.loggers import get_logger 

14from pycparser import CParser, c_ast 

15 

16if TYPE_CHECKING: 

17 from markdown import Markdown 

18 from pycparser.c_ast import FileAST 

19 

20 

21logger = get_logger(__name__) 

22 

23 

24@dataclass 

25class Comment: 

26 """A comment extracted from the source code.""" 

27 

28 text: str 

29 last_line_number: int 

30 

31 

32@dataclass 

33class Macro: 

34 """A macro extracted from the source code.""" 

35 

36 text: str 

37 line_number: int 

38 

39 

40_C_PARSER = CParser() 

41_END_COMMENT = re.compile(r'^(?!").+; *((\/\/.*)|(\/\*.*\*\/))$') 

42_END_COMMENT_MACRO = re.compile(r"#.+ *((\/\/.*)|(\/\*.*\*\/))$") 

43_SINGLE_COMMENT = re.compile(r" *\/\/ ?(.*)") 

44_SINGLE_COMMENT_ALT = re.compile(r"\/\* *(.+) *\*\/") 

45_FULL_DOC = re.compile(r"\/\*!?\n((.|\n)+)\n *\*\/") 

46_DESC = re.compile(r" *\* *(.+)") 

47_DIRECTIVE = re.compile(r" *\* *@(\w+)(\[.+\])? *(.+)") 

48_PARAM_BODY = re.compile(r"(\w+) *(.+)") 

49_DEFINE = re.compile(r"# *define (\w+) *(\w*)") 

50 

51 

52def extract_comments(code: str) -> tuple[list[Comment], str]: 

53 """Extract comments from the source code. 

54 

55 Parameters: 

56 code: The source code to extract comments from. 

57 

58 Returns: 

59 A tuple containing a list of comments and the source code with comments removed. 

60 """ 

61 comments: list[Comment] = [] 

62 extracted: list[str] = [] 

63 in_comment: bool = False 

64 buffer = StringIO() 

65 

66 for index, line in enumerate(code.split("\n")): 

67 content = line.lstrip(" ").rstrip(" ") 

68 if content.startswith("//") or (content.startswith("/*") and content.endswith("*/")): 

69 # single line comment 

70 comments.append(Comment(content, index + 1)) 

71 extracted.append("") # preserve line count 

72 elif match := _END_COMMENT_MACRO.match(line): 

73 # comment at end of preprocessor directive 

74 comments.append(Comment(match.group(2), index + 1)) 

75 extracted.append(line[: match.start(2)]) 

76 continue 

77 elif match := _END_COMMENT.match(line): 

78 # comment at end of line 

79 comments.append(Comment(match.group(1), index + 1)) 

80 extracted.append(line[: match.start(1)]) 

81 continue 

82 elif content.startswith("/*"): 

83 # start of multiline comment 

84 in_comment = True 

85 buffer.write(content + "\n") 

86 elif content.endswith("*/"): 

87 # end of multiline comment 

88 if not in_comment: 88 ↛ 89line 88 didn't jump to line 89 because the condition on line 88 was never true

89 raise CollectionError("Found close to multiline comment without a start!") 

90 

91 in_comment = False 

92 buffer.write(line) 

93 bufval = buffer.getvalue() 

94 comments.append(Comment(bufval, index + 1)) 

95 buffer.truncate(0) 

96 

97 for _ in range(bufval.count("\n") + 1): 

98 extracted.append("") # preserve line count 

99 elif in_comment: 

100 # we want to preserve the indentation 

101 # here, so use line instead of content 

102 buffer.write(line + "\n") 

103 else: 

104 # not a comment 

105 extracted.append(line) 

106 

107 if in_comment: 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true

108 raise CollectionError("Unterminated comment!") 

109 

110 return comments, "\n".join(extracted) 

111 

112 

113def extract_macros(code: str) -> tuple[list[Macro], str]: 

114 """Extract macros from the source code. 

115 

116 Parameters: 

117 code: The source code to extract macros from. 

118 

119 Returns: 

120 A tuple containing a list of macros and the source code with macros removed. 

121 """ 

122 extracted: list[str] = [] 

123 macros: list[Macro] = [] 

124 

125 # buffer variables 

126 next_is_macro: bool = False 

127 buffer = StringIO() 

128 start_line = -1 

129 

130 for index, line in enumerate(code.split("\n")): 

131 content = line.lstrip(" ").rstrip(" ") 

132 

133 if (not content.startswith("#")) and (not next_is_macro): 

134 extracted.append(line) 

135 continue 

136 

137 extracted.append("") 

138 

139 if next_is_macro: 139 ↛ 140line 139 didn't jump to line 140 because the condition on line 139 was never true

140 next_is_macro = False 

141 buffer.write("\n" + content) 

142 

143 if content.endswith("\\"): 143 ↛ 144line 143 didn't jump to line 144 because the condition on line 143 was never true

144 if not next_is_macro: 

145 # start of macro 

146 start_line = index + 1 

147 buffer.write(content) 

148 

149 next_is_macro = True 

150 

151 bufval = buffer.getvalue() 

152 if (not next_is_macro) and (bufval): 152 ↛ 154line 152 didn't jump to line 154 because the condition on line 152 was never true

153 # multiline macro has ended 

154 macros.append(Macro(bufval, start_line)) 

155 start_line = -1 

156 buffer.truncate(0) 

157 else: 

158 # single line macro 

159 macros.append(Macro(content, index + 1)) 

160 

161 return macros, "\n".join(extracted) 

162 

163 

164class InOut(str, Enum): 

165 """Enumeration for parameter direction.""" 

166 

167 UNSPECIFIED = "unspecified" 

168 IN = "in" 

169 OUT = "out" 

170 

171 

172@dataclass 

173class Param: 

174 """A parameter in a function signature.""" 

175 

176 name: str 

177 desc: str 

178 in_out: InOut 

179 

180 

181@dataclass 

182class Docstring: 

183 """A parsed docstring.""" 

184 

185 desc: str 

186 params: list[Param] | None = None 

187 ret: str | None = None 

188 

189 

190def parse_docstring(content: str) -> Docstring: 

191 """Parse a docstring. 

192 

193 Parameters: 

194 content: The content of the docstring. 

195 

196 Returns: 

197 A parsed docstring. 

198 """ 

199 single = _SINGLE_COMMENT.match(content) 

200 

201 if single: 

202 return Docstring(single.group(1)) 

203 

204 single_alt = _SINGLE_COMMENT_ALT.match(content) 

205 if single_alt: 205 ↛ 206line 205 didn't jump to line 206 because the condition on line 205 was never true

206 return Docstring(single_alt.group(1)) 

207 

208 full = _FULL_DOC.match(content) 

209 if not full: 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 raise CollectionError(f"Could not parse docstring! {content}") 

211 

212 text = full.group(1) 

213 desc = StringIO() 

214 split = text.split("\n") 

215 start_index = -1 

216 params: list[Param] = [] 

217 returns: str | None = None 

218 

219 for index, i in enumerate(split): 219 ↛ 230line 219 didn't jump to line 230 because the loop on line 219 didn't complete

220 if "@" in i: 

221 start_index = index 

222 break 

223 

224 match = _DESC.match(i) 

225 if not match: 225 ↛ 226line 225 didn't jump to line 226 because the condition on line 225 was never true

226 raise CollectionError(f"Invalid docstring syntax: {i}") 

227 

228 desc.write(match.group(1) + " ") 

229 

230 for directive in split[start_index:]: 

231 match = _DIRECTIVE.match(directive) 

232 

233 if not match: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 raise CollectionError(f"Invalid docstring syntax: {directive}") 

235 

236 name = match.group(1) 

237 

238 if name == "param": 238 ↛ 256line 238 didn't jump to line 256 because the condition on line 238 was always true

239 in_out_str = match.group(2) 

240 

241 if in_out_str == "[in]": 

242 in_out = InOut.IN 

243 elif in_out_str == "[out]": 

244 in_out = InOut.OUT 

245 else: 

246 in_out = InOut.UNSPECIFIED 

247 

248 body = _PARAM_BODY.match(match.group(3)) 

249 

250 if not body: 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 raise CollectionError(f"Invalid @param body: {body}") 

252 

253 name = body.group(1) 

254 param_desc = body.group(2) 

255 params.append(Param(name, param_desc, in_out)) 

256 elif name in {"return", "returns"}: 

257 if returns: 

258 raise CollectionError("Multiple @returns found!") 

259 returns = match.group(3) 

260 else: 

261 raise CollectionError(f"Invalid directive in docstring: {name}") 

262 

263 return Docstring(desc.getvalue(), params, returns) 

264 

265 

266@dataclass 

267class DocMacro: 

268 """A parsed macro.""" 

269 

270 name: str 

271 content: str | None 

272 doc: Docstring | None 

273 

274 

275@dataclass 

276class DocType: 

277 """A parsed typedef.""" 

278 

279 name: str 

280 tp: TypeRef 

281 doc: Docstring | None 

282 quals: list[str] 

283 

284 

285@dataclass 

286class DocGlobalVar: 

287 """A parsed global variable.""" 

288 

289 name: str 

290 tp: TypeRef 

291 doc: Docstring | None 

292 quals: list[str] 

293 

294 

295@dataclass 

296class FuncParam: 

297 """A parameter in a function signature.""" 

298 

299 name: str 

300 tp: TypeRef 

301 

302 

303@dataclass 

304class DocFunc: 

305 """A parsed function.""" 

306 

307 name: str 

308 args: list[FuncParam] 

309 ret: TypeRef 

310 doc: Docstring | None 

311 

312 

313@dataclass 

314class CodeDoc: 

315 """A parsed C source file.""" 

316 

317 macros: list[DocMacro] 

318 functions: list[DocFunc] 

319 global_vars: list[DocGlobalVar] 

320 typedefs: dict[str, DocType] 

321 

322 

323class TypeDecl(str, Enum): 

324 """Enumeration for type declarations.""" 

325 

326 NORMAL = "normal" 

327 POINTER = "pointer" 

328 ARRAY = "array" 

329 FUNCTION = "function" 

330 

331 

332@dataclass 

333class TypeRef: 

334 """A reference to a type in C.""" 

335 

336 name: TypeRef | str 

337 decl: TypeDecl 

338 quals: list[str] 

339 params: list[TypeRef] | None = None # only in functions 

340 

341 

342class SupportsQualsAndType(Protocol): 

343 """A protocol for types that can have qualifiers and a type.""" 

344 

345 quals: list[str] 

346 type: SupportsQualsAndType | c_ast.TypeDecl | c_ast.IdentifierType 

347 

348 

349def ast_to_decl(node: SupportsQualsAndType, types: dict[str, DocType]) -> TypeRef: 

350 """Convert a pycparser AST node to a TypeRef.""" 

351 if isinstance(node, c_ast.TypeDecl): 

352 # assert isinstance(node.type, c_ast.IdentifierType) 

353 name = node.type.names[0] 

354 existing = types.get(name) 

355 

356 if existing: 

357 return existing.tp 

358 

359 return TypeRef(name, TypeDecl.NORMAL, node.quals) 

360 

361 if isinstance(node, c_ast.PtrDecl): 

362 # assert not isinstance(node.type, c_ast.IdentifierType) 

363 return TypeRef(ast_to_decl(node.type, types), TypeDecl.POINTER, node.quals) 

364 

365 if isinstance(node, c_ast.ArrayDecl): 365 ↛ 366line 365 didn't jump to line 366 because the condition on line 365 was never true

366 return TypeRef(ast_to_decl(node.type, types), TypeDecl.ARRAY, node.quals) 

367 

368 # assert isinstance(node, c_ast.FuncDecl), f"expected a FuncDecl, got {node}" 

369 return TypeRef( 

370 ast_to_decl(node.type, types), 

371 TypeDecl.FUNCTION, 

372 [], 

373 [ast_to_decl(decl.type, types) for decl in node.args.params], # type: ignore[attr-defined] 

374 ) 

375 

376 

377def _tp_ref_format_char(ref: TypeRef, char: str, qualname: str) -> str: 

378 # assert isinstance(ref.name, TypeRef) 

379 content = tp_ref_to_str(ref.name, qualname) # type: ignore[arg-type] 

380 if ref.quals: 380 ↛ 381line 380 didn't jump to line 381 because the condition on line 380 was never true

381 return f"({' '.join(ref.quals)} {content}{char})" 

382 return f"{content}{char}" 

383 

384 

385def tp_ref_to_str(ref: TypeRef, qualname: str) -> str: 

386 """Convert a TypeRef to a string. 

387 

388 Parameters: 

389 ref: The TypeRef to convert. 

390 qualname: The name of the type. 

391 

392 Returns: 

393 The string representation of the TypeRef. 

394 """ 

395 if ref.decl == TypeDecl.NORMAL: 

396 if ref.quals: 

397 return f"{' '.join(ref.quals)} {ref.name}" 

398 

399 return ref.name # type: ignore[return-value] 

400 

401 if ref.decl == TypeDecl.POINTER: 401 ↛ 404line 401 didn't jump to line 404 because the condition on line 401 was always true

402 return _tp_ref_format_char(ref, "*", qualname) 

403 

404 if ref.decl == TypeDecl.ARRAY: 

405 return _tp_ref_format_char(ref, "[]", qualname) 

406 

407 # assert ref.decl == TypeDecl.FUNCTION 

408 # assert ref.params is not None 

409 

410 params: list[str] = [tp_ref_to_str(i, qualname) for i in ref.params] # type: ignore[union-attr] 

411 ret = tp_ref_to_str(ref.name, qualname) if isinstance(ref.name, TypeRef) else ref.name 

412 

413 return f"{ret} (*{qualname})({', '.join(params)})" 

414 

415 

416def typedef_to_str(decl: DocType) -> str: 

417 """Convert a typedef to a string. 

418 

419 Parameters: 

420 decl: The typedef to convert. 

421 

422 Returns: 

423 The string representation of the typedef. 

424 """ 

425 return tp_ref_to_str(decl.tp, decl.name) 

426 

427 

428def desc(doc: Docstring | None) -> str: 

429 """Get the description from a docstring. 

430 

431 Parameters: 

432 doc: The docstring to get the description from. 

433 

434 Returns: 

435 The description. 

436 """ 

437 if not doc: 

438 return "No description specified." 

439 

440 return doc.desc 

441 

442 

443def lookup_type_html(data: CodeDoc, tp: TypeRef, *, name: str | None = None) -> str: 

444 """Lookup a type and return an HTML representation. 

445 

446 Parameters: 

447 data: The parsed C source file. 

448 tp: The type to lookup. 

449 name: The name of the type. 

450 

451 Returns: 

452 The HTML representation of the type. 

453 """ 

454 tp_str = "" 

455 

456 for type_name, doctype in data.typedefs.items(): 

457 if doctype.tp == tp: 

458 tp_str = f'<a href="#type-{type_name}">{type_name}</a>' 

459 

460 return f'<code>{tp_str or tp_ref_to_str(tp, name or "unknown")}</code>' 

461 

462 

463class CHandler(BaseHandler): 

464 """The C handler class.""" 

465 

466 name: str = "c" 

467 """The handler's name.""" 

468 

469 domain: str = "c" 

470 """The cross-documentation domain/language for this handler.""" 

471 

472 enable_inventory: bool = False 

473 """Whether this handler is interested in enabling the creation of the `objects.inv` Sphinx inventory file.""" 

474 

475 fallback_theme = "material" 

476 """The theme to fallback to.""" 

477 

478 fallback_config: ClassVar[dict] = {"fallback": True} 

479 """The configuration used to collect item during autorefs fallback.""" 

480 

481 default_config: ClassVar[dict] = { 

482 "show_root_heading": False, 

483 "show_root_toc_entry": True, 

484 "show_symbol_type_heading": True, 

485 "show_symbol_type_toc_entry": True, 

486 "heading_level": 2, 

487 } 

488 """The default configuration options. 

489 

490 Option | Type | Description | Default 

491 ------ | ---- | ----------- | ------- 

492 **`show_root_heading`** | `bool` | Show the heading of the object at the root of the documentation tree. | `False` 

493 **`show_root_toc_entry`** | `bool` | If the root heading is not shown, at least add a ToC entry for it. | `True` 

494 **`heading_level`** | `int` | The initial heading level to use. | `2` 

495 """ 

496 

497 def collect(self, identifier: str, config: MutableMapping[str, Any]) -> CollectorItem: 

498 """Collect data given an identifier and selection configuration. 

499 

500 In the implementation, you typically call a subprocess that returns JSON, and load that JSON again into 

501 a Python dictionary for example, though the implementation is completely free. 

502 

503 Parameters: 

504 identifier: An identifier that was found in a markdown document for which to collect data. For example, 

505 in Python, it would be 'mkdocstrings.handlers' to collect documentation about the handlers module. 

506 It can be anything that you can feed to the tool of your choice. 

507 config: All configuration options for this handler either defined globally in `mkdocs.yml` or 

508 locally overridden in an identifier block by the user. 

509 

510 Returns: 

511 Anything you want, as long as you can feed it to the `render` method. 

512 """ 

513 if config.get("fallback", False): 513 ↛ 514line 513 didn't jump to line 514 because the condition on line 513 was never true

514 raise CollectionError("Not loading additional headers during fallback") 

515 

516 source = Path(identifier).read_text(encoding="utf-8") 

517 comments_list, source = extract_comments(source) 

518 macros_list, source = extract_macros(source) 

519 code: FileAST = _C_PARSER.parse(source) 

520 

521 comments: dict[int, Comment] = {comment.last_line_number: comment for comment in comments_list} 

522 types: dict[str, DocType] = {} 

523 global_vars: list[DocGlobalVar] = [] 

524 funcs: list[DocFunc] = [] 

525 

526 for node in code.ext: 

527 if not isinstance(node, (c_ast.Typedef, c_ast.Decl)): 527 ↛ 528line 527 didn't jump to line 528 because the condition on line 527 was never true

528 continue 

529 

530 # assert node.coord, "node.coord is None" 

531 lineno = node.coord.line 

532 

533 raw_doc: Comment | None = None 

534 if lineno in comments: 

535 raw_doc = comments.pop(lineno) 

536 elif (lineno - 1) in comments: 536 ↛ 539line 536 didn't jump to line 539 because the condition on line 536 was always true

537 raw_doc = comments.pop(lineno - 1) 

538 

539 docstring: Docstring | None = None 

540 

541 if raw_doc: 541 ↛ 544line 541 didn't jump to line 544 because the condition on line 541 was always true

542 docstring = parse_docstring(raw_doc.text) 

543 

544 if isinstance(node, c_ast.Typedef): 

545 types[node.name] = DocType(node.name, ast_to_decl(node.type, types), docstring, node.quals) 

546 

547 elif type(node) is c_ast.Decl: # we dont want the subclasses 547 ↛ 526line 547 didn't jump to line 526 because the condition on line 547 was always true

548 if isinstance(node.type, c_ast.FuncDecl): 

549 ref = ast_to_decl(node.type, types) 

550 # assert ref.decl is TypeDecl.FUNCTION, "decl is not TypeDecl.FUNCTION" 

551 # assert ref.params is not None, "function typeref does not have parameters" 

552 params: list[FuncParam] = [] 

553 

554 for param_ref, param in zip(ref.params, node.type.args.params): # type: ignore[arg-type] 

555 params.append(FuncParam(param.name, param_ref)) 

556 

557 funcs.append(DocFunc(node.name, params, ref.name, docstring)) # type: ignore[arg-type] 

558 else: 

559 global_vars.append( 

560 DocGlobalVar( 

561 node.name, 

562 ast_to_decl(node.type, types), 

563 docstring, 

564 node.quals, 

565 ), 

566 ) 

567 

568 macros: list[DocMacro] = [] 

569 

570 for macro in macros_list: 

571 match = _DEFINE.match(macro.text) 

572 

573 if not match: 

574 continue 

575 

576 lineno = macro.line_number 

577 

578 raw_doc = None 

579 

580 if lineno in comments: 

581 raw_doc = comments.pop(lineno) 

582 elif (lineno - 1) in comments: 582 ↛ 585line 582 didn't jump to line 585 because the condition on line 582 was always true

583 raw_doc = comments.pop(lineno - 1) 

584 

585 docstring = parse_docstring(raw_doc.text) if raw_doc else None 

586 macros.append(DocMacro(match.group(1).rstrip(" "), match.group(2) or None, docstring)) 

587 

588 return CodeDoc(macros, funcs, global_vars, types) 

589 

590 # def get_templates_dir(self, handler: str | None = None) -> Path: 

591 # return Path.cwd() 

592 

593 def render(self, data: CodeDoc, config: Mapping[str, Any]) -> str: 

594 """Render a template using provided data and configuration options. 

595 

596 Parameters: 

597 data: The data to render that was collected above in `collect()`. 

598 config: All configuration options for this handler either defined globally in `mkdocs.yml` or 

599 locally overridden in an identifier block by the user. 

600 

601 Returns: 

602 The rendered template as HTML. 

603 """ 

604 final_config = {**self.default_config, **config} 

605 heading_level = final_config["heading_level"] 

606 template = self.env.get_template("header.html.jinja") 

607 return template.render( 

608 config=final_config, 

609 header=data, 

610 heading_level=heading_level, 

611 root=True, 

612 ) 

613 

614 def update_env(self, md: Markdown, config: dict) -> None: 

615 """Update the Jinja environment with any custom settings/filters/options for this handler. 

616 

617 Parameters: 

618 md: The Markdown instance. Useful to add functions able to convert Markdown into the environment filters. 

619 config: Configuration options for `mkdocs` and `mkdocstrings`, read from `mkdocs.yml`. See the source code 

620 of [mkdocstrings.plugin.MkdocstringsPlugin.on_config][] to see what's in this dictionary. 

621 """ 

622 super().update_env(md, config) # Add some mkdocstrings default filters such as highlight and convert_markdown 

623 self.env.trim_blocks = True 

624 self.env.lstrip_blocks = True 

625 self.env.keep_trailing_newline = False 

626 self.env.filters["typedef_to_str"] = typedef_to_str 

627 self.env.filters["lookup_type_html"] = lookup_type_html 

628 self.env.filters["zip"] = zip 

629 

630 

631def get_handler( 

632 theme: str, 

633 custom_templates: str | None = None, 

634 config_file_path: str | None = None, # noqa: ARG001 

635 **config: Any, # noqa: ARG001 

636) -> CHandler: 

637 """Simply return an instance of `CHandler`. 

638 

639 Parameters: 

640 theme: The theme to use when rendering contents. 

641 custom_templates: Directory containing custom templates. 

642 config_file_path: The MkDocs configuration file path. 

643 **config: Configuration passed to the handler. 

644 

645 Returns: 

646 An instance of the handler. 

647 """ 

648 return CHandler( 

649 handler="c", 

650 theme=theme, 

651 custom_templates=custom_templates, 

652 # To pass the following argument, 

653 # you'll need to override the handler's __init__ method. 

654 # config_file_path=config_file_path, 

655 )