Coverage for src/pytkdocs/parsers/docstrings/restructured_text.py: 100.00%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

264 statements  

1"""This module defines functions and classes to parse docstrings into structured data.""" 

2from collections import defaultdict 

3from dataclasses import dataclass, field 

4from inspect import Signature 

5from typing import Any, Callable, DefaultDict, Dict, FrozenSet, List, Optional, Tuple, Type, Union, cast # noqa: WPS235 

6 

7from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty 

8 

9try: 

10 from typing import TypedDict # type: ignore 

11except ImportError: 

12 from typing_extensions import TypedDict # noqa: WPS440 # type: ignore 

13try: 

14 from typing import Literal # type: ignore 

15except ImportError: 

16 # https://github.com/python/mypy/issues/8520 

17 from typing_extensions import Literal # type: ignore # noqa: WPS440 

18 

19 

20# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples 

21PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword")) 

22PARAM_TYPE_NAMES = frozenset(("type",)) 

23ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar")) 

24ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",)) 

25RETURN_NAMES = frozenset(("returns", "return")) 

26RETURN_TYPE_NAMES = frozenset(("rtype",)) 

27EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception")) 

28 

29 

30@dataclass(frozen=True) 

31class FieldType: 

32 """Maps directive names to parser functions.""" 

33 

34 names: FrozenSet[str] 

35 reader: Callable[[List[str], int], int] 

36 

37 def matches(self, line: str) -> bool: 

38 """ 

39 Check if a line matches the field type. 

40 

41 Args: 

42 line: Line to check against 

43 

44 Returns: 

45 True if the line matches the field type, False otherwise. 

46 """ 

47 return any(line.startswith(f":{name}") for name in self.names) 

48 

49 

50class AttributesDict(TypedDict): 

51 """Attribute details.""" 

52 

53 docstring: str 

54 annotation: Type # TODO: Not positive this is correct 

55 

56 

57class ParseContext: 

58 """Typed replacement for context dictionary.""" 

59 

60 obj: Any # I think this might be pytkdos.Object & subclasses 

61 attributes: DefaultDict[str, AttributesDict] 

62 signature: Optional[Signature] 

63 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]] 

64 annotation: Any 

65 

66 # This might be be better as the obj & optional attributes 

67 def __init__(self, context: Dict): 

68 """ 

69 Initialize the object. 

70 

71 Args: 

72 context: Context of parsing operation. 

73 """ 

74 self.obj = context["obj"] 

75 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict)) 

76 attributes = context.get("attributes") 

77 if attributes is not None: 

78 self.attributes.update(attributes) 

79 

80 self.signature = getattr(self.obj, "signature", None) 

81 self.annotation = getattr(self.obj, "type", empty) 

82 

83 

84@dataclass 

85class ParsedDirective: 

86 """Directive information that has been parsed from a docstring.""" 

87 

88 line: str 

89 next_index: int 

90 directive_parts: List[str] 

91 value: str 

92 invalid: bool = False 

93 

94 

95@dataclass 

96class ParsedValues: 

97 """Values parsed from the docstring to be used to produce sections.""" 

98 

99 description: List[str] = field(default_factory=list) 

100 parameters: Dict[str, Parameter] = field(default_factory=dict) 

101 param_types: Dict[str, str] = field(default_factory=dict) 

102 attributes: Dict[str, Attribute] = field(default_factory=dict) 

103 attribute_types: Dict[str, str] = field(default_factory=dict) 

104 exceptions: List[AnnotatedObject] = field(default_factory=list) 

105 return_value: Optional[AnnotatedObject] = None 

106 return_type: Optional[str] = None 

107 

108 

109class RestructuredText(Parser): 

110 """A reStructuredText docstrings parser.""" 

111 

112 def __init__(self) -> None: 

113 """Initialize the object.""" 

114 super().__init__() 

115 self._typed_context = ParseContext({"obj": None}) 

116 self._parsed_values: ParsedValues = ParsedValues() 

117 # Ordering is significant so that directives like ":vartype" are checked before ":var" 

118 self.field_types = [ 

119 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type), 

120 FieldType(PARAM_NAMES, self._read_parameter), 

121 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type), 

122 FieldType(ATTRIBUTE_NAMES, self._read_attribute), 

123 FieldType(EXCEPTION_NAMES, self._read_exception), 

124 FieldType(RETURN_NAMES, self._read_return), 

125 FieldType(RETURN_TYPE_NAMES, self._read_return_type), 

126 ] 

127 

128 def parse_sections(self, docstring: str) -> List[Section]: # noqa: D102 

129 self._typed_context = ParseContext(self.context) 

130 self._parsed_values = ParsedValues() 

131 

132 lines = docstring.split("\n") 

133 curr_line_index = 0 

134 

135 while curr_line_index < len(lines): 

136 line = lines[curr_line_index] 

137 for field_type in self.field_types: 

138 if field_type.matches(line): 

139 # https://github.com/python/mypy/issues/5485 

140 curr_line_index = field_type.reader(lines, curr_line_index) # type: ignore 

141 break 

142 else: 

143 self._parsed_values.description.append(line) 

144 

145 curr_line_index += 1 

146 

147 return self._parsed_values_to_sections() 

148 

149 def _read_parameter(self, lines: List[str], start_index: int) -> int: 

150 """ 

151 Parse a parameter value. 

152 

153 Arguments: 

154 lines: The docstring lines. 

155 start_index: The line number to start at. 

156 

157 Returns: 

158 Index at which to continue parsing. 

159 """ 

160 parsed_directive = self._parse_directive(lines, start_index) 

161 if parsed_directive.invalid: 

162 return parsed_directive.next_index 

163 

164 directive_type = None 

165 if len(parsed_directive.directive_parts) == 2: 

166 # no type info 

167 name = parsed_directive.directive_parts[1] 

168 elif len(parsed_directive.directive_parts) == 3: 

169 directive_type = parsed_directive.directive_parts[1] 

170 name = parsed_directive.directive_parts[2] 

171 else: 

172 self.error(f"Failed to parse field directive from '{parsed_directive.line}'") 

173 return parsed_directive.next_index 

174 

175 if name in self._parsed_values.parameters: 

176 self.errors.append(f"Duplicate parameter entry for '{name}'") 

177 return parsed_directive.next_index 

178 

179 annotation = self._determine_param_annotation(name, directive_type) 

180 default, kind = self._determine_param_details(name) 

181 

182 self._parsed_values.parameters[name] = Parameter( 

183 name=name, 

184 annotation=annotation, 

185 description=parsed_directive.value, 

186 default=default, 

187 kind=kind, 

188 ) 

189 

190 return parsed_directive.next_index 

191 

192 def _determine_param_details(self, name: str) -> Tuple[Any, Any]: 

193 default = empty 

194 kind = empty 

195 

196 if self._typed_context.signature is not None: 

197 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*")) 

198 # an error for param_signature being none is already reported by _determine_param_annotation() 

199 if param_signature is not None: 

200 if param_signature.default is not empty: 

201 default = param_signature.default 

202 kind = param_signature.kind # type: ignore[assignment] 

203 

204 return default, kind 

205 

206 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any: 

207 # Annotation precedence: 

208 # - signature annotation 

209 # - in-line directive type 

210 # - "type" directive type 

211 # - empty 

212 annotation = empty 

213 

214 parsed_param_type = self._parsed_values.param_types.get(name) 

215 if parsed_param_type is not None: 

216 annotation = parsed_param_type # type: ignore[assignment] 

217 

218 if directive_type is not None: 

219 annotation = directive_type # type: ignore[assignment] 

220 

221 if directive_type is not None and parsed_param_type is not None: 

222 self.error(f"Duplicate parameter information for '{name}'") 

223 

224 if self._typed_context.signature is not None: 

225 try: 

226 param_signature = self._typed_context.signature.parameters[name.lstrip("*")] 

227 except KeyError: 

228 self.error(f"No matching parameter for '{name}'") 

229 else: 

230 if param_signature.annotation is not empty: 

231 annotation = param_signature.annotation 

232 

233 return annotation 

234 

235 def _read_parameter_type(self, lines: List[str], start_index: int) -> int: 

236 """ 

237 Parse a parameter type. 

238 

239 Arguments: 

240 lines: The docstring lines. 

241 start_index: The line number to start at. 

242 

243 Returns: 

244 Index at which to continue parsing. 

245 """ 

246 parsed_directive = self._parse_directive(lines, start_index) 

247 if parsed_directive.invalid: 

248 return parsed_directive.next_index 

249 param_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

250 

251 if len(parsed_directive.directive_parts) == 2: 

252 param_name = parsed_directive.directive_parts[1] 

253 else: 

254 self.error(f"Failed to get parameter name from '{parsed_directive.line}'") 

255 return parsed_directive.next_index 

256 

257 self._parsed_values.param_types[param_name] = param_type 

258 param = self._parsed_values.parameters.get(param_name) 

259 if param is not None: 

260 if param.annotation is empty: 

261 param.annotation = param_type 

262 else: 

263 self.error(f"Duplicate parameter information for '{param_name}'") 

264 return parsed_directive.next_index 

265 

266 def _read_attribute(self, lines: List[str], start_index: int) -> int: 

267 """ 

268 Parse an attribute value. 

269 

270 Arguments: 

271 lines: The docstring lines. 

272 start_index: The line number to start at. 

273 

274 Returns: 

275 Index at which to continue parsing. 

276 """ 

277 parsed_directive = self._parse_directive(lines, start_index) 

278 if parsed_directive.invalid: 

279 return parsed_directive.next_index 

280 

281 if len(parsed_directive.directive_parts) == 2: 

282 name = parsed_directive.directive_parts[1] 

283 else: 

284 self.error(f"Failed to parse field directive from '{parsed_directive.line}'") 

285 return parsed_directive.next_index 

286 

287 annotation = empty 

288 

289 # Annotation precedence: 

290 # - external context type TODO: spend time understanding where this comes from 

291 # - "vartype" directive type 

292 # - empty 

293 

294 parsed_attribute_type = self._parsed_values.attribute_types.get(name) 

295 if parsed_attribute_type is not None: 

296 annotation = parsed_attribute_type # type: ignore[assignment] 

297 

298 context_attribute_annotation = self._typed_context.attributes[name].get("annotation") 

299 if context_attribute_annotation is not None: 

300 annotation = context_attribute_annotation 

301 

302 if name in self._parsed_values.attributes: 

303 self.errors.append(f"Duplicate attribute entry for '{name}'") 

304 else: 

305 self._parsed_values.attributes[name] = Attribute( 

306 name=name, 

307 annotation=annotation, 

308 description=parsed_directive.value, 

309 ) 

310 

311 return parsed_directive.next_index 

312 

313 def _read_attribute_type(self, lines: List[str], start_index: int) -> int: 

314 """ 

315 Parse a parameter type. 

316 

317 Arguments: 

318 lines: The docstring lines. 

319 start_index: The line number to start at. 

320 

321 Returns: 

322 Index at which to continue parsing. 

323 """ 

324 parsed_directive = self._parse_directive(lines, start_index) 

325 if parsed_directive.invalid: 

326 return parsed_directive.next_index 

327 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

328 

329 if len(parsed_directive.directive_parts) == 2: 

330 attribute_name = parsed_directive.directive_parts[1] 

331 else: 

332 self.error(f"Failed to get attribute name from '{parsed_directive.line}'") 

333 return parsed_directive.next_index 

334 

335 self._parsed_values.attribute_types[attribute_name] = attribute_type 

336 attribute = self._parsed_values.attributes.get(attribute_name) 

337 if attribute is not None: 

338 if attribute.annotation is empty: 

339 attribute.annotation = attribute_type 

340 else: 

341 self.error(f"Duplicate attribute information for '{attribute_name}'") 

342 return parsed_directive.next_index 

343 

344 def _read_exception(self, lines: List[str], start_index: int) -> int: 

345 """ 

346 Parse an exceptions value. 

347 

348 Arguments: 

349 lines: The docstring lines. 

350 start_index: The line number to start at. 

351 

352 Returns: 

353 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

354 """ 

355 parsed_directive = self._parse_directive(lines, start_index) 

356 if parsed_directive.invalid: 

357 return parsed_directive.next_index 

358 

359 if len(parsed_directive.directive_parts) == 2: 

360 ex_type = parsed_directive.directive_parts[1] 

361 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value)) 

362 else: 

363 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'") 

364 

365 return parsed_directive.next_index 

366 

367 def _read_return(self, lines: List[str], start_index: int) -> int: 

368 """ 

369 Parse an return value. 

370 

371 Arguments: 

372 lines: The docstring lines. 

373 start_index: The line number to start at. 

374 

375 Returns: 

376 Index at which to continue parsing. 

377 """ 

378 parsed_directive = self._parse_directive(lines, start_index) 

379 if parsed_directive.invalid: 

380 return parsed_directive.next_index 

381 

382 annotation = empty 

383 # Annotation precedence: 

384 # - signature annotation 

385 # - "rtype" directive type 

386 # - external context type TODO: spend time understanding where this comes from 

387 # - empty 

388 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty: 

389 annotation = self._typed_context.signature.return_annotation 

390 elif self._parsed_values.return_type is not None: 

391 annotation = self._parsed_values.return_type # type: ignore[assignment] 

392 else: 

393 annotation = self._typed_context.annotation 

394 

395 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value) 

396 

397 return parsed_directive.next_index 

398 

399 def _read_return_type(self, lines: List[str], start_index: int) -> int: 

400 """ 

401 Parse an return type value. 

402 

403 Arguments: 

404 lines: The docstring lines. 

405 start_index: The line number to start at. 

406 

407 Returns: 

408 Index at which to continue parsing. 

409 """ 

410 parsed_directive = self._parse_directive(lines, start_index) 

411 if parsed_directive.invalid: 

412 return parsed_directive.next_index 

413 

414 return_type = _consolidate_descriptive_type(parsed_directive.value.strip()) 

415 self._parsed_values.return_type = return_type 

416 return_value = self._parsed_values.return_value 

417 if return_value is not None: 

418 if return_value.annotation is empty: 

419 return_value.annotation = return_type 

420 else: 

421 self.error("Duplicate type information for return") 

422 

423 return parsed_directive.next_index 

424 

425 def _parsed_values_to_sections(self) -> List[Section]: 

426 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description)) 

427 result = [Section(Section.Type.MARKDOWN, markdown_text)] 

428 if self._parsed_values.parameters: 

429 param_values = list(self._parsed_values.parameters.values()) 

430 result.append(Section(Section.Type.PARAMETERS, param_values)) 

431 if self._parsed_values.attributes: 

432 attribute_values = list(self._parsed_values.attributes.values()) 

433 result.append(Section(Section.Type.ATTRIBUTES, attribute_values)) 

434 if self._parsed_values.return_value is not None: 

435 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value)) 

436 if self._parsed_values.exceptions: 

437 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions)) 

438 return result 

439 

440 def _parse_directive(self, lines: List[str], start_index: int) -> ParsedDirective: 

441 line, next_index = _consolidate_continuation_lines(lines, start_index) 

442 try: 

443 _, directive, value = line.split(":", 2) 

444 except ValueError: 

445 self.error(f"Failed to get ':directive: value' pair from '{line}'") 

446 return ParsedDirective(line, next_index, [], "", invalid=True) 

447 

448 value = value.strip() 

449 return ParsedDirective(line, next_index, directive.split(" "), value) 

450 

451 

452def _consolidate_continuation_lines(lines: List[str], start_index: int) -> Tuple[str, int]: 

453 """ 

454 Convert a docstring field into a single line if a line continuation exists. 

455 

456 Arguments: 

457 lines: The docstring lines. 

458 start_index: The line number to start at. 

459 

460 Returns: 

461 A tuple containing the continued lines as a single string and the index at which to continue parsing. 

462 """ 

463 curr_line_index = start_index 

464 block = [lines[curr_line_index].lstrip()] 

465 

466 # start processing after first item 

467 curr_line_index += 1 

468 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"): 

469 block.append(lines[curr_line_index].lstrip()) 

470 curr_line_index += 1 

471 

472 return " ".join(block).rstrip("\n"), curr_line_index - 1 

473 

474 

475def _consolidate_descriptive_type(descriptive_type: str) -> str: 

476 """ 

477 Convert type descriptions with "or" into respective type signature. 

478 

479 "x or None" or "None or x" -> "Optional[x]" 

480 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]" 

481 

482 Args: 

483 descriptive_type: Descriptions of an item's type. 

484 

485 Returns: 

486 Type signature for descriptive type. 

487 """ 

488 types = descriptive_type.split("or") 

489 if len(types) == 1: 

490 return descriptive_type 

491 types = [pt.strip() for pt in types] 

492 if len(types) == 2: 

493 if types[0] == "None": 

494 return f"Optional[{types[1]}]" 

495 if types[1] == "None": 

496 return f"Optional[{types[0]}]" 

497 return f"Union[{','.join(types)}]" 

498 

499 

500def _strip_blank_lines(lines: List[str]) -> List[str]: 

501 """ 

502 Remove lines with no text or only whitespace characters from the start and end of the list. 

503 

504 Args: 

505 lines: Lines to be stripped. 

506 

507 Returns: 

508 A list with the same contents, with any blank lines at the start or end removed. 

509 """ 

510 if not lines: 

511 return lines 

512 

513 # remove blank lines from the start and end 

514 content_found = False 

515 initial_content = 0 

516 final_content = 0 

517 for index, line in enumerate(lines): 

518 if line == "" or line.isspace(): 

519 if not content_found: 

520 initial_content += 1 

521 else: 

522 content_found = True 

523 final_content = index 

524 return lines[initial_content : final_content + 1]