Coverage for src/pytkdocs/parsers/docstrings/restructured_text.py: 99.44%

260 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-09 17:28 +0100

1"""This module defines functions and classes to parse docstrings into structured data.""" 

2 

3from collections import defaultdict 

4from dataclasses import dataclass, field 

5from inspect import Signature 

6from typing import Any, Callable, Optional, cast 

7 

8from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty 

9 

10try: 

11 from typing import TypedDict 

12except ImportError: 

13 from typing_extensions import TypedDict 

14 

15 

16# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples 

17PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword")) 

18PARAM_TYPE_NAMES = frozenset(("type",)) 

19ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar")) 

20ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",)) 

21RETURN_NAMES = frozenset(("returns", "return")) 

22RETURN_TYPE_NAMES = frozenset(("rtype",)) 

23EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception")) 

24 

25 

26@dataclass(frozen=True) 

27class FieldType: 

28 """Maps directive names to parser functions.""" 

29 

30 names: frozenset[str] 

31 reader: Callable[[list[str], int], int] 

32 

33 def matches(self, line: str) -> bool: 

34 """Check if a line matches the field type. 

35 

36 Args: 

37 line: Line to check against 

38 

39 Returns: 

40 True if the line matches the field type, False otherwise. 

41 """ 

42 return any(line.startswith(f":{name}") for name in self.names) 

43 

44 

45class AttributesDict(TypedDict): 

46 """Attribute details.""" 

47 

48 docstring: str 

49 annotation: type # TODO: Not positive this is correct 

50 

51 

52class ParseContext: 

53 """Typed replacement for context dictionary.""" 

54 

55 obj: Any # I think this might be pytkdos.Object & subclasses 

56 attributes: defaultdict[str, AttributesDict] 

57 signature: Optional[Signature] 

58 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]] 

59 annotation: Any 

60 

61 # This might be be better as the obj & optional attributes 

62 def __init__(self, context: dict): 

63 """Initialize the object. 

64 

65 Args: 

66 context: Context of parsing operation. 

67 """ 

68 self.obj = context["obj"] 

69 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict)) 

70 attributes = context.get("attributes") 

71 if attributes is not None: 

72 self.attributes.update(attributes) 

73 

74 self.signature = getattr(self.obj, "signature", None) 

75 self.annotation = getattr(self.obj, "type", empty) 

76 

77 

78@dataclass 

79class ParsedDirective: 

80 """Directive information that has been parsed from a docstring.""" 

81 

82 line: str 

83 next_index: int 

84 directive_parts: list[str] 

85 value: str 

86 invalid: bool = False 

87 

88 

89@dataclass 

90class ParsedValues: 

91 """Values parsed from the docstring to be used to produce sections.""" 

92 

93 description: list[str] = field(default_factory=list) 

94 parameters: dict[str, Parameter] = field(default_factory=dict) 

95 param_types: dict[str, str] = field(default_factory=dict) 

96 attributes: dict[str, Attribute] = field(default_factory=dict) 

97 attribute_types: dict[str, str] = field(default_factory=dict) 

98 exceptions: list[AnnotatedObject] = field(default_factory=list) 

99 return_value: Optional[AnnotatedObject] = None 

100 return_type: Optional[str] = None 

101 

102 

103class RestructuredText(Parser): 

104 """A reStructuredText docstrings parser.""" 

105 

106 def __init__(self, **kwargs: Any) -> None: # noqa: ARG002 

107 """Initialize the object.""" 

108 super().__init__() 

109 self._typed_context = ParseContext({"obj": None}) 

110 self._parsed_values: ParsedValues = ParsedValues() 

111 # Ordering is significant so that directives like ":vartype" are checked before ":var" 

112 self.field_types = [ 

113 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type), 

114 FieldType(PARAM_NAMES, self._read_parameter), 

115 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type), 

116 FieldType(ATTRIBUTE_NAMES, self._read_attribute), 

117 FieldType(EXCEPTION_NAMES, self._read_exception), 

118 FieldType(RETURN_NAMES, self._read_return), 

119 FieldType(RETURN_TYPE_NAMES, self._read_return_type), 

120 ] 

121 

122 def parse_sections(self, docstring: str) -> list[Section]: # noqa: D102 

123 self._typed_context = ParseContext(self.context) 

124 self._parsed_values = ParsedValues() 

125 

126 lines = docstring.split("\n") 

127 curr_line_index = 0 

128 

129 while curr_line_index < len(lines): 

130 line = lines[curr_line_index] 

131 for field_type in self.field_types: 

132 if field_type.matches(line): 

133 # https://github.com/python/mypy/issues/5485 

134 curr_line_index = field_type.reader(lines, curr_line_index) 

135 break 

136 else: 

137 self._parsed_values.description.append(line) 

138 

139 curr_line_index += 1 

140 

141 return self._parsed_values_to_sections() 

142 

143 def _read_parameter(self, lines: list[str], start_index: int) -> int: 

144 """Parse a parameter value. 

145 

146 Arguments: 

147 lines: The docstring lines. 

148 start_index: The line number to start at. 

149 

150 Returns: 

151 Index at which to continue parsing. 

152 """ 

153 parsed_directive = self._parse_directive(lines, start_index) 

154 if parsed_directive.invalid: 

155 return parsed_directive.next_index 

156 

157 directive_type = None 

158 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

159 # no type info 

160 name = parsed_directive.directive_parts[1] 

161 elif len(parsed_directive.directive_parts) == 3: # noqa: PLR2004 

162 directive_type = parsed_directive.directive_parts[1] 

163 name = parsed_directive.directive_parts[2] 

164 else: 

165 self.error(f"Failed to parse field directive from '{parsed_directive.line}'") 

166 return parsed_directive.next_index 

167 

168 if name in self._parsed_values.parameters: 

169 self.errors.append(f"Duplicate parameter entry for '{name}'") 

170 return parsed_directive.next_index 

171 

172 annotation = self._determine_param_annotation(name, directive_type) 

173 default, kind = self._determine_param_details(name) 

174 

175 self._parsed_values.parameters[name] = Parameter( 

176 name=name, 

177 annotation=annotation, 

178 description=parsed_directive.value, 

179 default=default, 

180 kind=kind, 

181 ) 

182 

183 return parsed_directive.next_index 

184 

185 def _determine_param_details(self, name: str) -> tuple[Any, Any]: 

186 default = empty 

187 kind = empty 

188 

189 if self._typed_context.signature is not None: 

190 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*")) 

191 # an error for param_signature being none is already reported by _determine_param_annotation() 

192 if param_signature is not None: 

193 if param_signature.default is not empty: 

194 default = param_signature.default 

195 kind = param_signature.kind # type: ignore[assignment] 

196 

197 return default, kind 

198 

199 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any: 

200 # Annotation precedence: 

201 # - signature annotation 

202 # - in-line directive type 

203 # - "type" directive type 

204 # - empty 

205 annotation = empty 

206 

207 parsed_param_type = self._parsed_values.param_types.get(name) 

208 if parsed_param_type is not None: 

209 annotation = parsed_param_type # type: ignore[assignment] 

210 

211 if directive_type is not None: 

212 annotation = directive_type # type: ignore[assignment] 

213 

214 if directive_type is not None and parsed_param_type is not None: 

215 self.error(f"Duplicate parameter information for '{name}'") 

216 

217 if self._typed_context.signature is not None: 

218 try: 

219 param_signature = self._typed_context.signature.parameters[name.lstrip("*")] 

220 except KeyError: 

221 self.error(f"No matching parameter for '{name}'") 

222 else: 

223 if param_signature.annotation is not empty: 

224 annotation = param_signature.annotation 

225 

226 return annotation 

227 

228 def _read_parameter_type(self, lines: list[str], start_index: int) -> int: 

229 """Parse a parameter type. 

230 

231 Arguments: 

232 lines: The docstring lines. 

233 start_index: The line number to start at. 

234 

235 Returns: 

236 Index at which to continue parsing. 

237 """ 

238 parsed_directive = self._parse_directive(lines, start_index) 

239 if parsed_directive.invalid: 

240 return parsed_directive.next_index 

241 param_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

242 

243 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

244 param_name = parsed_directive.directive_parts[1] 

245 else: 

246 self.error(f"Failed to get parameter name from '{parsed_directive.line}'") 

247 return parsed_directive.next_index 

248 

249 self._parsed_values.param_types[param_name] = param_type 

250 param = self._parsed_values.parameters.get(param_name) 

251 if param is not None: 

252 if param.annotation is empty: 

253 param.annotation = param_type 

254 else: 

255 self.error(f"Duplicate parameter information for '{param_name}'") 

256 return parsed_directive.next_index 

257 

258 def _read_attribute(self, lines: list[str], start_index: int) -> int: 

259 """Parse an attribute value. 

260 

261 Arguments: 

262 lines: The docstring lines. 

263 start_index: The line number to start at. 

264 

265 Returns: 

266 Index at which to continue parsing. 

267 """ 

268 parsed_directive = self._parse_directive(lines, start_index) 

269 if parsed_directive.invalid: 

270 return parsed_directive.next_index 

271 

272 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

273 name = parsed_directive.directive_parts[1] 

274 else: 

275 self.error(f"Failed to parse field directive from '{parsed_directive.line}'") 

276 return parsed_directive.next_index 

277 

278 annotation = empty 

279 

280 # Annotation precedence: 

281 # - external context type TODO: spend time understanding where this comes from 

282 # - "vartype" directive type 

283 # - empty 

284 

285 parsed_attribute_type = self._parsed_values.attribute_types.get(name) 

286 if parsed_attribute_type is not None: 

287 annotation = parsed_attribute_type # type: ignore[assignment] 

288 

289 context_attribute_annotation = self._typed_context.attributes[name].get("annotation") 

290 if context_attribute_annotation is not None: 

291 annotation = context_attribute_annotation # type: ignore[assignment] 

292 

293 if name in self._parsed_values.attributes: 

294 self.errors.append(f"Duplicate attribute entry for '{name}'") 

295 else: 

296 self._parsed_values.attributes[name] = Attribute( 

297 name=name, 

298 annotation=annotation, 

299 description=parsed_directive.value, 

300 ) 

301 

302 return parsed_directive.next_index 

303 

304 def _read_attribute_type(self, lines: list[str], start_index: int) -> int: 

305 """Parse a parameter type. 

306 

307 Arguments: 

308 lines: The docstring lines. 

309 start_index: The line number to start at. 

310 

311 Returns: 

312 Index at which to continue parsing. 

313 """ 

314 parsed_directive = self._parse_directive(lines, start_index) 

315 if parsed_directive.invalid: 

316 return parsed_directive.next_index 

317 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

318 

319 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

320 attribute_name = parsed_directive.directive_parts[1] 

321 else: 

322 self.error(f"Failed to get attribute name from '{parsed_directive.line}'") 

323 return parsed_directive.next_index 

324 

325 self._parsed_values.attribute_types[attribute_name] = attribute_type 

326 attribute = self._parsed_values.attributes.get(attribute_name) 

327 if attribute is not None: 

328 if attribute.annotation is empty: 

329 attribute.annotation = attribute_type 

330 else: 

331 self.error(f"Duplicate attribute information for '{attribute_name}'") 

332 return parsed_directive.next_index 

333 

334 def _read_exception(self, lines: list[str], start_index: int) -> int: 

335 """Parse an exceptions value. 

336 

337 Arguments: 

338 lines: The docstring lines. 

339 start_index: The line number to start at. 

340 

341 Returns: 

342 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

343 """ 

344 parsed_directive = self._parse_directive(lines, start_index) 

345 if parsed_directive.invalid: 

346 return parsed_directive.next_index 

347 

348 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004 

349 ex_type = parsed_directive.directive_parts[1] 

350 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value)) 

351 else: 

352 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'") 

353 

354 return parsed_directive.next_index 

355 

356 def _read_return(self, lines: list[str], start_index: int) -> int: 

357 """Parse an return value. 

358 

359 Arguments: 

360 lines: The docstring lines. 

361 start_index: The line number to start at. 

362 

363 Returns: 

364 Index at which to continue parsing. 

365 """ 

366 parsed_directive = self._parse_directive(lines, start_index) 

367 if parsed_directive.invalid: 

368 return parsed_directive.next_index 

369 

370 annotation = empty 

371 # Annotation precedence: 

372 # - signature annotation 

373 # - "rtype" directive type 

374 # - external context type TODO: spend time understanding where this comes from 

375 # - empty 

376 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty: 

377 annotation = self._typed_context.signature.return_annotation 

378 elif self._parsed_values.return_type is not None: 

379 annotation = self._parsed_values.return_type # type: ignore[assignment] 

380 else: 

381 annotation = self._typed_context.annotation 

382 

383 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value) 

384 

385 return parsed_directive.next_index 

386 

387 def _read_return_type(self, lines: list[str], start_index: int) -> int: 

388 """Parse an return type value. 

389 

390 Arguments: 

391 lines: The docstring lines. 

392 start_index: The line number to start at. 

393 

394 Returns: 

395 Index at which to continue parsing. 

396 """ 

397 parsed_directive = self._parse_directive(lines, start_index) 

398 if parsed_directive.invalid: 

399 return parsed_directive.next_index 

400 

401 return_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

402 self._parsed_values.return_type = return_type 

403 return_value = self._parsed_values.return_value 

404 if return_value is not None: 

405 if return_value.annotation is empty: 

406 return_value.annotation = return_type 

407 else: 

408 self.error("Duplicate type information for return") 

409 

410 return parsed_directive.next_index 

411 

412 def _parsed_values_to_sections(self) -> list[Section]: 

413 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description)) 

414 result = [Section(Section.Type.MARKDOWN, markdown_text)] 

415 if self._parsed_values.parameters: 

416 param_values = list(self._parsed_values.parameters.values()) 

417 result.append(Section(Section.Type.PARAMETERS, param_values)) 

418 if self._parsed_values.attributes: 

419 attribute_values = list(self._parsed_values.attributes.values()) 

420 result.append(Section(Section.Type.ATTRIBUTES, attribute_values)) 

421 if self._parsed_values.return_value is not None: 

422 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value)) 

423 if self._parsed_values.exceptions: 

424 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions)) 

425 return result 

426 

427 def _parse_directive(self, lines: list[str], start_index: int) -> ParsedDirective: 

428 line, next_index = _consolidate_continuation_lines(lines, start_index) 

429 try: 

430 _, directive, value = line.split(":", 2) 

431 except ValueError: 

432 self.error(f"Failed to get ':directive: value' pair from '{line}'") 

433 return ParsedDirective(line, next_index, [], "", invalid=True) 

434 

435 value = value.strip() 

436 return ParsedDirective(line, next_index, directive.split(" "), value) 

437 

438 

439def _consolidate_continuation_lines(lines: list[str], start_index: int) -> tuple[str, int]: 

440 """Convert a docstring field into a single line if a line continuation exists. 

441 

442 Arguments: 

443 lines: The docstring lines. 

444 start_index: The line number to start at. 

445 

446 Returns: 

447 A tuple containing the continued lines as a single string and the index at which to continue parsing. 

448 """ 

449 curr_line_index = start_index 

450 block = [lines[curr_line_index].lstrip()] 

451 

452 # start processing after first item 

453 curr_line_index += 1 

454 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"): 

455 block.append(lines[curr_line_index].lstrip()) 

456 curr_line_index += 1 

457 

458 return " ".join(block).rstrip("\n"), curr_line_index - 1 

459 

460 

461def _consolidate_descriptive_type(descriptive_type: str) -> str: 

462 """Convert type descriptions with "or" into respective type signature. 

463 

464 "x or None" or "None or x" -> "Optional[x]" 

465 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]" 

466 

467 Args: 

468 descriptive_type: Descriptions of an item's type. 

469 

470 Returns: 

471 Type signature for descriptive type. 

472 """ 

473 types = descriptive_type.split("or") 

474 if len(types) == 1: 

475 return descriptive_type 

476 types = [pt.strip() for pt in types] 

477 if len(types) == 2: # noqa: PLR2004 

478 if types[0] == "None": 

479 return f"Optional[{types[1]}]" 

480 if types[1] == "None": 

481 return f"Optional[{types[0]}]" 

482 return f"Union[{','.join(types)}]" 

483 

484 

485def _strip_blank_lines(lines: list[str]) -> list[str]: 

486 """Remove lines with no text or only whitespace characters from the start and end of the list. 

487 

488 Args: 

489 lines: Lines to be stripped. 

490 

491 Returns: 

492 A list with the same contents, with any blank lines at the start or end removed. 

493 """ 

494 if not lines: 

495 return lines 

496 

497 # remove blank lines from the start and end 

498 content_found = False 

499 initial_content = 0 

500 final_content = 0 

501 for index, line in enumerate(lines): 

502 if line == "" or line.isspace(): 

503 if not content_found: 

504 initial_content += 1 

505 else: 

506 content_found = True 

507 final_content = index 

508 return lines[initial_content : final_content + 1]