Coverage for src/_griffe/docstrings/sphinx.py: 98.15%

228 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-15 16:47 +0200

1# This module defines functions to parse Sphinx docstrings into structured data. 

2 

3# Credits to Patrick Lannigan ([@plannigan](https://github.com/plannigan)) 

4# who originally added the parser in the [pytkdocs project](https://github.com/mkdocstrings/pytkdocs). 

5# See https://github.com/mkdocstrings/pytkdocs/pull/71. 

6 

7from __future__ import annotations 

8 

9from contextlib import suppress 

10from dataclasses import dataclass, field 

11from typing import TYPE_CHECKING, Any, Callable 

12 

13from _griffe.docstrings.models import ( 

14 DocstringAttribute, 

15 DocstringParameter, 

16 DocstringRaise, 

17 DocstringReturn, 

18 DocstringSection, 

19 DocstringSectionAttributes, 

20 DocstringSectionParameters, 

21 DocstringSectionRaises, 

22 DocstringSectionReturns, 

23 DocstringSectionText, 

24) 

25from _griffe.docstrings.utils import docstring_warning 

26 

27if TYPE_CHECKING: 

28 from _griffe.expressions import Expr 

29 from _griffe.models import Docstring 

30 

31 

32# TODO: Examples: from the documentation, we're not sure there is a standard format for examples 

33_PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword")) 

34_PARAM_TYPE_NAMES = frozenset(("type",)) 

35_ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar")) 

36_ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",)) 

37_RETURN_NAMES = frozenset(("returns", "return")) 

38_RETURN_TYPE_NAMES = frozenset(("rtype",)) 

39_EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception")) 

40 

41 

42@dataclass(frozen=True) 

43class _FieldType: 

44 """Maps directive names to parser functions.""" 

45 

46 names: frozenset[str] 

47 reader: Callable[[Docstring, int, _ParsedValues], int] 

48 

49 def matches(self, line: str) -> bool: 

50 """Check if a line matches the field type. 

51 

52 Parameters: 

53 line: Line to check against 

54 

55 Returns: 

56 True if the line matches the field type, False otherwise. 

57 """ 

58 return any(line.startswith(f":{name}") for name in self.names) 

59 

60 

61@dataclass 

62class _ParsedDirective: 

63 """Directive information that has been parsed from a docstring.""" 

64 

65 line: str 

66 next_index: int 

67 directive_parts: list[str] 

68 value: str 

69 invalid: bool = False 

70 

71 

72@dataclass 

73class _ParsedValues: 

74 """Values parsed from the docstring to be used to produce sections.""" 

75 

76 description: list[str] = field(default_factory=list) 

77 parameters: dict[str, DocstringParameter] = field(default_factory=dict) 

78 param_types: dict[str, str] = field(default_factory=dict) 

79 attributes: dict[str, DocstringAttribute] = field(default_factory=dict) 

80 attribute_types: dict[str, str] = field(default_factory=dict) 

81 exceptions: list[DocstringRaise] = field(default_factory=list) 

82 return_value: DocstringReturn | None = None 

83 return_type: str | None = None 

84 

85 

86def parse_sphinx(docstring: Docstring, *, warn_unknown_params: bool = True, **options: Any) -> list[DocstringSection]: 

87 """Parse a Sphinx-style docstring. 

88 

89 Parameters: 

90 docstring: The docstring to parse. 

91 warn_unknown_params: Warn about documented parameters not appearing in the signature. 

92 **options: Additional parsing options. 

93 

94 Returns: 

95 A list of docstring sections. 

96 """ 

97 parsed_values = _ParsedValues() 

98 

99 options = { 

100 "warn_unknown_params": warn_unknown_params, 

101 **options, 

102 } 

103 

104 lines = docstring.lines 

105 curr_line_index = 0 

106 

107 while curr_line_index < len(lines): 

108 line = lines[curr_line_index] 

109 for field_type in _field_types: 

110 if field_type.matches(line): 

111 # https://github.com/python/mypy/issues/5485 

112 curr_line_index = field_type.reader(docstring, curr_line_index, parsed_values, **options) 

113 break 

114 else: 

115 parsed_values.description.append(line) 

116 

117 curr_line_index += 1 

118 

119 return _parsed_values_to_sections(parsed_values) 

120 

121 

122def _read_parameter( 

123 docstring: Docstring, 

124 offset: int, 

125 parsed_values: _ParsedValues, 

126 *, 

127 warn_unknown_params: bool = True, 

128 **options: Any, # noqa: ARG001 

129) -> int: 

130 parsed_directive = _parse_directive(docstring, offset) 

131 if parsed_directive.invalid: 

132 return parsed_directive.next_index 

133 

134 directive_type = None 

135 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

136 # no type info 

137 name = parsed_directive.directive_parts[1] 

138 elif len(parsed_directive.directive_parts) == 3: # noqa: PLR2004 

139 directive_type = parsed_directive.directive_parts[1] 

140 name = parsed_directive.directive_parts[2] 

141 else: 

142 docstring_warning(docstring, 0, f"Failed to parse field directive from '{parsed_directive.line}'") 

143 return parsed_directive.next_index 

144 

145 if name in parsed_values.parameters: 

146 docstring_warning(docstring, 0, f"Duplicate parameter entry for '{name}'") 

147 return parsed_directive.next_index 

148 

149 if warn_unknown_params: 149 ↛ 160line 149 didn't jump to line 160 because the condition on line 149 was always true

150 with suppress(AttributeError): # for parameters sections in objects without parameters 

151 params = docstring.parent.parameters # type: ignore[union-attr] 

152 if name not in params: 

153 message = f"Parameter '{name}' does not appear in the function signature" 

154 for starred_name in (f"*{name}", f"**{name}"): 

155 if starred_name in params: 155 ↛ 156line 155 didn't jump to line 156 because the condition on line 155 was never true

156 message += f". Did you mean '{starred_name}'?" 

157 break 

158 docstring_warning(docstring, 0, message) 

159 

160 annotation = _determine_param_annotation(docstring, name, directive_type, parsed_values) 

161 default = _determine_param_default(docstring, name) 

162 

163 parsed_values.parameters[name] = DocstringParameter( 

164 name=name, 

165 annotation=annotation, 

166 description=parsed_directive.value, 

167 value=default, 

168 ) 

169 

170 return parsed_directive.next_index 

171 

172 

173def _determine_param_default(docstring: Docstring, name: str) -> str | None: 

174 try: 

175 return docstring.parent.parameters[name.lstrip()].default # type: ignore[union-attr] 

176 except (AttributeError, KeyError): 

177 return None 

178 

179 

180def _determine_param_annotation( 

181 docstring: Docstring, 

182 name: str, 

183 directive_type: str | None, 

184 parsed_values: _ParsedValues, 

185) -> Any: 

186 # Annotation precedence: 

187 # - in-line directive type 

188 # - "type" directive type 

189 # - signature annotation 

190 # - none 

191 annotation: str | Expr | None = None 

192 

193 parsed_param_type = parsed_values.param_types.get(name) 

194 if parsed_param_type is not None: 

195 annotation = parsed_param_type 

196 

197 if directive_type is not None: 

198 annotation = directive_type 

199 

200 if directive_type is not None and parsed_param_type is not None: 

201 docstring_warning(docstring, 0, f"Duplicate parameter information for '{name}'") 

202 

203 if annotation is None: 

204 try: 

205 annotation = docstring.parent.parameters[name.lstrip()].annotation # type: ignore[union-attr] 

206 except (AttributeError, KeyError): 

207 docstring_warning(docstring, 0, f"No matching parameter for '{name}'") 

208 

209 return annotation 

210 

211 

212def _read_parameter_type( 

213 docstring: Docstring, 

214 offset: int, 

215 parsed_values: _ParsedValues, 

216 **options: Any, # noqa: ARG001 

217) -> int: 

218 parsed_directive = _parse_directive(docstring, offset) 

219 if parsed_directive.invalid: 

220 return parsed_directive.next_index 

221 param_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

222 

223 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

224 param_name = parsed_directive.directive_parts[1] 

225 else: 

226 docstring_warning(docstring, 0, f"Failed to get parameter name from '{parsed_directive.line}'") 

227 return parsed_directive.next_index 

228 

229 parsed_values.param_types[param_name] = param_type 

230 param = parsed_values.parameters.get(param_name) 

231 if param is not None: 

232 if param.annotation is None: 

233 param.annotation = param_type 

234 else: 

235 docstring_warning(docstring, 0, f"Duplicate parameter information for '{param_name}'") 

236 return parsed_directive.next_index 

237 

238 

239def _read_attribute( 

240 docstring: Docstring, 

241 offset: int, 

242 parsed_values: _ParsedValues, 

243 **options: Any, # noqa: ARG001 

244) -> int: 

245 parsed_directive = _parse_directive(docstring, offset) 

246 if parsed_directive.invalid: 

247 return parsed_directive.next_index 

248 

249 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

250 name = parsed_directive.directive_parts[1] 

251 else: 

252 docstring_warning(docstring, 0, f"Failed to parse field directive from '{parsed_directive.line}'") 

253 return parsed_directive.next_index 

254 

255 annotation: str | Expr | None = None 

256 

257 # Annotation precedence: 

258 # - "vartype" directive type 

259 # - annotation in the parent 

260 # - none 

261 

262 parsed_attribute_type = parsed_values.attribute_types.get(name) 

263 if parsed_attribute_type is not None: 

264 annotation = parsed_attribute_type 

265 else: 

266 # try to use the annotation from the parent 

267 with suppress(AttributeError, KeyError): 

268 annotation = docstring.parent.attributes[name].annotation # type: ignore[union-attr] 

269 if name in parsed_values.attributes: 

270 docstring_warning(docstring, 0, f"Duplicate attribute entry for '{name}'") 

271 else: 

272 parsed_values.attributes[name] = DocstringAttribute( 

273 name=name, 

274 annotation=annotation, 

275 description=parsed_directive.value, 

276 ) 

277 

278 return parsed_directive.next_index 

279 

280 

281def _read_attribute_type( 

282 docstring: Docstring, 

283 offset: int, 

284 parsed_values: _ParsedValues, 

285 **options: Any, # noqa: ARG001 

286) -> int: 

287 parsed_directive = _parse_directive(docstring, offset) 

288 if parsed_directive.invalid: 

289 return parsed_directive.next_index 

290 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

291 

292 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

293 attribute_name = parsed_directive.directive_parts[1] 

294 else: 

295 docstring_warning(docstring, 0, f"Failed to get attribute name from '{parsed_directive.line}'") 

296 return parsed_directive.next_index 

297 

298 parsed_values.attribute_types[attribute_name] = attribute_type 

299 attribute = parsed_values.attributes.get(attribute_name) 

300 if attribute is not None: 

301 if attribute.annotation is None: 301 ↛ 304line 301 didn't jump to line 304 because the condition on line 301 was always true

302 attribute.annotation = attribute_type 

303 else: 

304 docstring_warning(docstring, 0, f"Duplicate attribute information for '{attribute_name}'") 

305 return parsed_directive.next_index 

306 

307 

308def _read_exception( 

309 docstring: Docstring, 

310 offset: int, 

311 parsed_values: _ParsedValues, 

312 **options: Any, # noqa: ARG001 

313) -> int: 

314 parsed_directive = _parse_directive(docstring, offset) 

315 if parsed_directive.invalid: 

316 return parsed_directive.next_index 

317 

318 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

319 ex_type = parsed_directive.directive_parts[1] 

320 parsed_values.exceptions.append(DocstringRaise(annotation=ex_type, description=parsed_directive.value)) 

321 else: 

322 docstring_warning(docstring, 0, f"Failed to parse exception directive from '{parsed_directive.line}'") 

323 

324 return parsed_directive.next_index 

325 

326 

327def _read_return(docstring: Docstring, offset: int, parsed_values: _ParsedValues, **options: Any) -> int: # noqa: ARG001 

328 parsed_directive = _parse_directive(docstring, offset) 

329 if parsed_directive.invalid: 

330 return parsed_directive.next_index 

331 

332 # Annotation precedence: 

333 # - "rtype" directive type 

334 # - signature annotation 

335 # - None 

336 annotation: str | Expr | None 

337 if parsed_values.return_type is not None: 

338 annotation = parsed_values.return_type 

339 else: 

340 try: 

341 annotation = docstring.parent.annotation # type: ignore[union-attr] 

342 except AttributeError: 

343 docstring_warning(docstring, 0, f"No return type or annotation at '{parsed_directive.line}'") 

344 annotation = None 

345 

346 # TODO: maybe support names 

347 parsed_values.return_value = DocstringReturn(name="", annotation=annotation, description=parsed_directive.value) 

348 

349 return parsed_directive.next_index 

350 

351 

352def _read_return_type( 

353 docstring: Docstring, 

354 offset: int, 

355 parsed_values: _ParsedValues, 

356 **options: Any, # noqa: ARG001 

357) -> int: 

358 parsed_directive = _parse_directive(docstring, offset) 

359 if parsed_directive.invalid: 

360 return parsed_directive.next_index 

361 

362 return_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

363 parsed_values.return_type = return_type 

364 return_value = parsed_values.return_value 

365 if return_value is not None: 

366 return_value.annotation = return_type 

367 

368 return parsed_directive.next_index 

369 

370 

371def _parsed_values_to_sections(parsed_values: _ParsedValues) -> list[DocstringSection]: 

372 text = "\n".join(_strip_blank_lines(parsed_values.description)) 

373 result: list[DocstringSection] = [DocstringSectionText(text)] 

374 if parsed_values.parameters: 

375 param_values = list(parsed_values.parameters.values()) 

376 result.append(DocstringSectionParameters(param_values)) 

377 if parsed_values.attributes: 

378 attribute_values = list(parsed_values.attributes.values()) 

379 result.append(DocstringSectionAttributes(attribute_values)) 

380 if parsed_values.return_value is not None: 

381 result.append(DocstringSectionReturns([parsed_values.return_value])) 

382 if parsed_values.exceptions: 

383 result.append(DocstringSectionRaises(parsed_values.exceptions)) 

384 return result 

385 

386 

387def _parse_directive(docstring: Docstring, offset: int) -> _ParsedDirective: 

388 line, next_index = _consolidate_continuation_lines(docstring.lines, offset) 

389 try: 

390 _, directive, value = line.split(":", 2) 

391 except ValueError: 

392 docstring_warning(docstring, 0, f"Failed to get ':directive: value' pair from '{line}'") 

393 return _ParsedDirective(line, next_index, [], "", invalid=True) 

394 

395 value = value.strip() 

396 return _ParsedDirective(line, next_index, directive.split(" "), value) 

397 

398 

399def _consolidate_continuation_lines(lines: list[str], offset: int) -> tuple[str, int]: 

400 curr_line_index = offset 

401 block = [lines[curr_line_index].lstrip()] 

402 

403 # start processing after first item 

404 curr_line_index += 1 

405 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"): 

406 block.append(lines[curr_line_index].lstrip()) 

407 curr_line_index += 1 

408 

409 return " ".join(block).rstrip("\n"), curr_line_index - 1 

410 

411 

412def _consolidate_descriptive_type(descriptive_type: str) -> str: 

413 return descriptive_type.replace(" or ", " | ") 

414 

415 

416def _strip_blank_lines(lines: list[str]) -> list[str]: 

417 if not lines: 

418 return lines 

419 

420 # remove blank lines from the start and end 

421 content_found = False 

422 initial_content = 0 

423 final_content = 0 

424 for index, line in enumerate(lines): 

425 if not line or line.isspace(): 

426 if not content_found: 

427 initial_content += 1 

428 else: 

429 content_found = True 

430 final_content = index 

431 return lines[initial_content : final_content + 1] 

432 

433 

434_field_types = [ 

435 _FieldType(_PARAM_TYPE_NAMES, _read_parameter_type), 

436 _FieldType(_PARAM_NAMES, _read_parameter), 

437 _FieldType(_ATTRIBUTE_TYPE_NAMES, _read_attribute_type), 

438 _FieldType(_ATTRIBUTE_NAMES, _read_attribute), 

439 _FieldType(_EXCEPTION_NAMES, _read_exception), 

440 _FieldType(_RETURN_NAMES, _read_return), 

441 _FieldType(_RETURN_TYPE_NAMES, _read_return_type), 

442]