Coverage for src/griffe/_internal/docstrings/sphinx.py: 94.92%

244 statements  

« prev     ^ index     » next       coverage.py v7.10.2, created at 2025-08-14 23:10 +0200

1# This module defines functions to parse Sphinx docstrings into structured data. 

2 

3# Credits to Patrick Lannigan ([@plannigan](https://github.com/plannigan)) 

4# who originally added the parser in the [pytkdocs project](https://github.com/mkdocstrings/pytkdocs). 

5# See https://github.com/mkdocstrings/pytkdocs/pull/71. 

6 

7from __future__ import annotations 

8 

9from contextlib import suppress 

10from dataclasses import dataclass, field 

11from typing import TYPE_CHECKING, Any, Callable 

12 

13from griffe._internal.docstrings.models import ( 

14 DocstringAttribute, 

15 DocstringParameter, 

16 DocstringRaise, 

17 DocstringReturn, 

18 DocstringSection, 

19 DocstringSectionAttributes, 

20 DocstringSectionParameters, 

21 DocstringSectionRaises, 

22 DocstringSectionReturns, 

23 DocstringSectionText, 

24) 

25from griffe._internal.docstrings.utils import docstring_warning, parse_docstring_annotation 

26 

27if TYPE_CHECKING: 

28 from griffe._internal.expressions import Expr 

29 from griffe._internal.models import Docstring 

30 

31 

32# TODO: Examples: from the documentation, we're not sure there is a standard format for examples 

33_PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword")) 

34_PARAM_TYPE_NAMES = frozenset(("type",)) 

35_ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar")) 

36_ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",)) 

37_RETURN_NAMES = frozenset(("returns", "return")) 

38_RETURN_TYPE_NAMES = frozenset(("rtype",)) 

39_EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception")) 

40 

41 

42@dataclass(frozen=True) 

43class _FieldType: 

44 """Maps directive names to parser functions.""" 

45 

46 names: frozenset[str] 

47 reader: Callable[[Docstring, int, _ParsedValues], int] 

48 

49 def matches(self, line: str) -> bool: 

50 """Check if a line matches the field type. 

51 

52 Parameters: 

53 line: Line to check against 

54 

55 Returns: 

56 True if the line matches the field type, False otherwise. 

57 """ 

58 return any(line.startswith(f":{name}") for name in self.names) 

59 

60 

61@dataclass 

62class _ParsedDirective: 

63 """Directive information that has been parsed from a docstring.""" 

64 

65 line: str 

66 next_index: int 

67 directive_parts: list[str] 

68 value: str 

69 invalid: bool = False 

70 

71 

72@dataclass 

73class _ParsedValues: 

74 """Values parsed from the docstring to be used to produce sections.""" 

75 

76 description: list[str] = field(default_factory=list) 

77 parameters: dict[str, DocstringParameter] = field(default_factory=dict) 

78 param_types: dict[str, str | Expr] = field(default_factory=dict) 

79 attributes: dict[str, DocstringAttribute] = field(default_factory=dict) 

80 attribute_types: dict[str, str] = field(default_factory=dict) 

81 exceptions: list[DocstringRaise] = field(default_factory=list) 

82 return_value: DocstringReturn | None = None 

83 return_type: str | None = None 

84 

85 

86def parse_sphinx( 

87 docstring: Docstring, 

88 *, 

89 warn_unknown_params: bool = True, 

90 warnings: bool = True, 

91 **options: Any, 

92) -> list[DocstringSection]: 

93 """Parse a Sphinx-style docstring. 

94 

95 Parameters: 

96 docstring: The docstring to parse. 

97 warn_unknown_params: Warn about documented parameters not appearing in the signature. 

98 warnings: Whether to log warnings at all. 

99 **options: Additional parsing options. 

100 

101 Returns: 

102 A list of docstring sections. 

103 """ 

104 parsed_values = _ParsedValues() 

105 

106 options = { 

107 "warn_unknown_params": warn_unknown_params, 

108 "warnings": warnings, 

109 **options, 

110 } 

111 

112 lines = docstring.lines 

113 curr_line_index = 0 

114 

115 while curr_line_index < len(lines): 

116 line = lines[curr_line_index] 

117 for field_type in _field_types: 

118 if field_type.matches(line): 

119 # https://github.com/python/mypy/issues/5485 

120 curr_line_index = field_type.reader(docstring, curr_line_index, parsed_values, **options) 

121 break 

122 else: 

123 parsed_values.description.append(line) 

124 

125 curr_line_index += 1 

126 

127 return _parsed_values_to_sections(parsed_values) 

128 

129 

130def _read_parameter( 

131 docstring: Docstring, 

132 offset: int, 

133 parsed_values: _ParsedValues, 

134 *, 

135 warn_unknown_params: bool = True, 

136 warnings: bool = True, 

137 **options: Any, # noqa: ARG001 

138) -> int: 

139 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

140 if parsed_directive.invalid: 

141 return parsed_directive.next_index 

142 

143 directive_type = None 

144 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

145 # no type info 

146 name = parsed_directive.directive_parts[1] 

147 elif len(parsed_directive.directive_parts) == 3: # noqa: PLR2004 

148 directive_type = parse_docstring_annotation( 

149 parsed_directive.directive_parts[1], 

150 docstring, 

151 ) 

152 name = parsed_directive.directive_parts[2] 

153 elif len(parsed_directive.directive_parts) > 3: # noqa: PLR2004 

154 # Ignoring type info, only a type with a single word is valid 

155 # https://www.sphinx-doc.org/en/master/usage/domains/python.html#info-field-lists 

156 name = parsed_directive.directive_parts[-1] 

157 if warnings: 157 ↛ 164line 157 didn't jump to line 164 because the condition on line 157 was always true

158 docstring_warning(docstring, 0, f"Failed to parse field directive from '{parsed_directive.line}'") 

159 else: 

160 if warnings: 160 ↛ 162line 160 didn't jump to line 162 because the condition on line 160 was always true

161 docstring_warning(docstring, 0, f"Failed to parse field directive from '{parsed_directive.line}'") 

162 return parsed_directive.next_index 

163 

164 if name in parsed_values.parameters: 

165 if warnings: 165 ↛ 167line 165 didn't jump to line 167 because the condition on line 165 was always true

166 docstring_warning(docstring, 0, f"Duplicate parameter entry for '{name}'") 

167 return parsed_directive.next_index 

168 

169 if warnings and warn_unknown_params: 

170 with suppress(AttributeError): # For Parameters sections in objects without parameters. 

171 params = docstring.parent.parameters # type: ignore[union-attr] 

172 if name not in params: 

173 message = f"Parameter '{name}' does not appear in the function signature" 

174 for starred_name in (f"*{name}", f"**{name}"): 

175 if starred_name in params: 175 ↛ 176line 175 didn't jump to line 176 because the condition on line 175 was never true

176 message += f". Did you mean '{starred_name}'?" 

177 break 

178 docstring_warning(docstring, 0, message) 

179 

180 annotation = _determine_param_annotation(docstring, name, directive_type, parsed_values, warnings=warnings) 

181 default = _determine_param_default(docstring, name) 

182 

183 parsed_values.parameters[name] = DocstringParameter( 

184 name=name, 

185 annotation=annotation, 

186 description=parsed_directive.value, 

187 value=default, 

188 ) 

189 

190 return parsed_directive.next_index 

191 

192 

193def _determine_param_default(docstring: Docstring, name: str) -> str | None: 

194 try: 

195 return docstring.parent.parameters[name.lstrip()].default # type: ignore[union-attr] 

196 except (AttributeError, KeyError): 

197 return None 

198 

199 

200def _determine_param_annotation( 

201 docstring: Docstring, 

202 name: str, 

203 directive_type: str | Expr | None, 

204 parsed_values: _ParsedValues, 

205 *, 

206 warnings: bool = True, 

207) -> Any: 

208 # Annotation precedence: 

209 # - in-line directive type 

210 # - "type" directive type 

211 # - signature annotation 

212 # - none 

213 annotation: str | Expr | None = None 

214 

215 parsed_param_type = parsed_values.param_types.get(name) 

216 if parsed_param_type is not None: 

217 annotation = parsed_param_type 

218 

219 if directive_type is not None: 

220 annotation = directive_type 

221 

222 if warnings and directive_type is not None and parsed_param_type is not None: 

223 docstring_warning(docstring, 0, f"Duplicate parameter information for '{name}'") 

224 

225 if annotation is None: 

226 try: 

227 annotation = docstring.parent.parameters[name.lstrip()].annotation # type: ignore[union-attr] 

228 except (AttributeError, KeyError): 

229 if warnings: 

230 docstring_warning(docstring, 0, f"No matching parameter for '{name}'") 

231 

232 return annotation 

233 

234 

235def _read_parameter_type( 

236 docstring: Docstring, 

237 offset: int, 

238 parsed_values: _ParsedValues, 

239 *, 

240 warnings: bool = True, 

241 **options: Any, # noqa: ARG001 

242) -> int: 

243 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

244 if parsed_directive.invalid: 

245 return parsed_directive.next_index 

246 param_type_str = _consolidate_descriptive_type(parsed_directive.value.strip()) 

247 param_type = parse_docstring_annotation(param_type_str, docstring) 

248 

249 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

250 param_name = parsed_directive.directive_parts[1] 

251 else: 

252 if warnings: 252 ↛ 254line 252 didn't jump to line 254 because the condition on line 252 was always true

253 docstring_warning(docstring, 0, f"Failed to get parameter name from '{parsed_directive.line}'") 

254 return parsed_directive.next_index 

255 

256 parsed_values.param_types[param_name] = param_type 

257 param = parsed_values.parameters.get(param_name) 

258 if param is not None: 

259 if param.annotation is None: 

260 param.annotation = param_type 

261 else: 

262 docstring_warning(docstring, 0, f"Duplicate parameter information for '{param_name}'") 

263 return parsed_directive.next_index 

264 

265 

266def _read_attribute( 

267 docstring: Docstring, 

268 offset: int, 

269 parsed_values: _ParsedValues, 

270 *, 

271 warnings: bool = True, 

272 **options: Any, # noqa: ARG001 

273) -> int: 

274 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

275 if parsed_directive.invalid: 

276 return parsed_directive.next_index 

277 

278 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

279 name = parsed_directive.directive_parts[1] 

280 else: 

281 if warnings: 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was always true

282 docstring_warning(docstring, 0, f"Failed to parse field directive from '{parsed_directive.line}'") 

283 return parsed_directive.next_index 

284 

285 annotation: str | Expr | None = None 

286 

287 # Annotation precedence: 

288 # - "vartype" directive type 

289 # - annotation in the parent 

290 # - none 

291 

292 parsed_attribute_type = parsed_values.attribute_types.get(name) 

293 if parsed_attribute_type is not None: 

294 annotation = parsed_attribute_type 

295 else: 

296 # try to use the annotation from the parent 

297 with suppress(AttributeError, KeyError, TypeError): 

298 # Use subscript syntax to fetch annotation from inherited members too. 

299 annotation = docstring.parent[name].annotation # type: ignore[index] 

300 if name in parsed_values.attributes: 

301 if warnings: 301 ↛ 310line 301 didn't jump to line 310 because the condition on line 301 was always true

302 docstring_warning(docstring, 0, f"Duplicate attribute entry for '{name}'") 

303 else: 

304 parsed_values.attributes[name] = DocstringAttribute( 

305 name=name, 

306 annotation=annotation, 

307 description=parsed_directive.value, 

308 ) 

309 

310 return parsed_directive.next_index 

311 

312 

313def _read_attribute_type( 

314 docstring: Docstring, 

315 offset: int, 

316 parsed_values: _ParsedValues, 

317 *, 

318 warnings: bool = True, 

319 **options: Any, # noqa: ARG001 

320) -> int: 

321 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

322 if parsed_directive.invalid: 

323 return parsed_directive.next_index 

324 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

325 

326 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

327 attribute_name = parsed_directive.directive_parts[1] 

328 else: 

329 if warnings: 329 ↛ 331line 329 didn't jump to line 331 because the condition on line 329 was always true

330 docstring_warning(docstring, 0, f"Failed to get attribute name from '{parsed_directive.line}'") 

331 return parsed_directive.next_index 

332 

333 parsed_values.attribute_types[attribute_name] = attribute_type 

334 attribute = parsed_values.attributes.get(attribute_name) 

335 if attribute is not None: 

336 if attribute.annotation is None: 336 ↛ 338line 336 didn't jump to line 338 because the condition on line 336 was always true

337 attribute.annotation = attribute_type 

338 elif warnings: 

339 docstring_warning(docstring, 0, f"Duplicate attribute information for '{attribute_name}'") 

340 return parsed_directive.next_index 

341 

342 

343def _read_exception( 

344 docstring: Docstring, 

345 offset: int, 

346 parsed_values: _ParsedValues, 

347 *, 

348 warnings: bool = True, 

349 **options: Any, # noqa: ARG001 

350) -> int: 

351 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

352 if parsed_directive.invalid: 

353 return parsed_directive.next_index 

354 

355 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

356 ex_type = parsed_directive.directive_parts[1] 

357 parsed_values.exceptions.append(DocstringRaise(annotation=ex_type, description=parsed_directive.value)) 

358 elif warnings: 358 ↛ 361line 358 didn't jump to line 361 because the condition on line 358 was always true

359 docstring_warning(docstring, 0, f"Failed to parse exception directive from '{parsed_directive.line}'") 

360 

361 return parsed_directive.next_index 

362 

363 

364def _read_return( 

365 docstring: Docstring, 

366 offset: int, 

367 parsed_values: _ParsedValues, 

368 *, 

369 warn_missing_types: bool = True, 

370 warnings: bool = True, 

371 **options: Any, # noqa: ARG001 

372) -> int: 

373 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

374 if parsed_directive.invalid: 

375 return parsed_directive.next_index 

376 

377 # Annotation precedence: 

378 # - "rtype" directive type 

379 # - signature annotation 

380 # - None 

381 annotation: str | Expr | None 

382 if parsed_values.return_type is not None: 

383 annotation = parsed_values.return_type 

384 else: 

385 try: 

386 annotation = docstring.parent.annotation # type: ignore[union-attr] 

387 except AttributeError: 

388 if warnings and warn_missing_types: 388 ↛ 390line 388 didn't jump to line 390 because the condition on line 388 was always true

389 docstring_warning(docstring, 0, f"No return type or annotation at '{parsed_directive.line}'") 

390 annotation = None 

391 

392 # TODO: maybe support names 

393 parsed_values.return_value = DocstringReturn(name="", annotation=annotation, description=parsed_directive.value) 

394 

395 return parsed_directive.next_index 

396 

397 

398def _read_return_type( 

399 docstring: Docstring, 

400 offset: int, 

401 parsed_values: _ParsedValues, 

402 *, 

403 warnings: bool = True, 

404 **options: Any, # noqa: ARG001 

405) -> int: 

406 parsed_directive = _parse_directive(docstring, offset, warnings=warnings) 

407 if parsed_directive.invalid: 

408 return parsed_directive.next_index 

409 

410 return_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

411 parsed_values.return_type = return_type 

412 return_value = parsed_values.return_value 

413 if return_value is not None: 

414 return_value.annotation = return_type 

415 

416 return parsed_directive.next_index 

417 

418 

419def _parsed_values_to_sections(parsed_values: _ParsedValues) -> list[DocstringSection]: 

420 text = "\n".join(_strip_blank_lines(parsed_values.description)) 

421 result: list[DocstringSection] = [DocstringSectionText(text)] 

422 if parsed_values.parameters: 

423 param_values = list(parsed_values.parameters.values()) 

424 result.append(DocstringSectionParameters(param_values)) 

425 if parsed_values.attributes: 

426 attribute_values = list(parsed_values.attributes.values()) 

427 result.append(DocstringSectionAttributes(attribute_values)) 

428 if parsed_values.return_value is not None: 

429 result.append(DocstringSectionReturns([parsed_values.return_value])) 

430 if parsed_values.exceptions: 

431 result.append(DocstringSectionRaises(parsed_values.exceptions)) 

432 return result 

433 

434 

435def _parse_directive(docstring: Docstring, offset: int, *, warnings: bool = True) -> _ParsedDirective: 

436 line, next_index = _consolidate_continuation_lines(docstring.lines, offset) 

437 try: 

438 _, directive, value = line.split(":", 2) 

439 except ValueError: 

440 if warnings: 440 ↛ 442line 440 didn't jump to line 442 because the condition on line 440 was always true

441 docstring_warning(docstring, 0, f"Failed to get ':directive: value' pair from '{line}'") 

442 return _ParsedDirective(line, next_index, [], "", invalid=True) 

443 

444 value = value.strip() 

445 return _ParsedDirective(line, next_index, directive.split(" "), value) 

446 

447 

448def _consolidate_continuation_lines(lines: list[str], offset: int) -> tuple[str, int]: 

449 curr_line_index = offset 

450 block = [lines[curr_line_index].lstrip()] 

451 

452 # start processing after first item 

453 curr_line_index += 1 

454 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"): 

455 block.append(lines[curr_line_index].lstrip()) 

456 curr_line_index += 1 

457 

458 return " ".join(block).rstrip("\n"), curr_line_index - 1 

459 

460 

461def _consolidate_descriptive_type(descriptive_type: str) -> str: 

462 return descriptive_type.replace(" or ", " | ") 

463 

464 

465def _strip_blank_lines(lines: list[str]) -> list[str]: 

466 if not lines: 

467 return lines 

468 

469 # remove blank lines from the start and end 

470 content_found = False 

471 initial_content = 0 

472 final_content = 0 

473 for index, line in enumerate(lines): 

474 if not line or line.isspace(): 

475 if not content_found: 

476 initial_content += 1 

477 else: 

478 content_found = True 

479 final_content = index 

480 return lines[initial_content : final_content + 1] 

481 

482 

483_field_types = [ 

484 _FieldType(_PARAM_TYPE_NAMES, _read_parameter_type), 

485 _FieldType(_PARAM_NAMES, _read_parameter), 

486 _FieldType(_ATTRIBUTE_TYPE_NAMES, _read_attribute_type), 

487 _FieldType(_ATTRIBUTE_NAMES, _read_attribute), 

488 _FieldType(_EXCEPTION_NAMES, _read_exception), 

489 _FieldType(_RETURN_NAMES, _read_return), 

490 _FieldType(_RETURN_TYPE_NAMES, _read_return_type), 

491]