Coverage for src/pytkdocs/parsers/docstrings/restructured

1"""This module defines functions and classes to parse docstrings into structured data."""

2from collections import defaultdict

3from dataclasses import dataclass, field

4from inspect import Signature

5from typing import Any, Callable, DefaultDict, Dict, FrozenSet, List, Optional, Tuple, Type, Union, cast # noqa: WPS235

7from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty

9try:

10 from typing import TypedDict # type: ignore

11except ImportError:

12 from typing_extensions import TypedDict # noqa: WPS440 # type: ignore

13try:

14 from typing import Literal # type: ignore

15except ImportError:

16 # https://github.com/python/mypy/issues/8520

17 from typing_extensions import Literal # type: ignore # noqa: WPS440

20# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples

21PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword"))

22PARAM_TYPE_NAMES = frozenset(("type",))

23ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar"))

24ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",))

25RETURN_NAMES = frozenset(("returns", "return"))

26RETURN_TYPE_NAMES = frozenset(("rtype",))

27EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception"))

30@dataclass(frozen=True)

31class FieldType:

32 """Maps directive names to parser functions."""

34 names: FrozenSet[str]

35 reader: Callable[[List[str], int], int]

37 def matches(self, line: str) -> bool:

38 """

39 Check if a line matches the field type.

41 Args:

42 line: Line to check against

44 Returns:

45 True if the line matches the field type, False otherwise.

46 """

47 return any(line.startswith(f":{name}") for name in self.names)

50class AttributesDict(TypedDict):

51 """Attribute details."""

53 docstring: str

54 annotation: Type # TODO: Not positive this is correct

57class ParseContext:

58 """Typed replacement for context dictionary."""

60 obj: Any # I think this might be pytkdos.Object & subclasses

61 attributes: DefaultDict[str, AttributesDict]

62 signature: Optional[Signature]

63 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]]

64 annotation: Any

66 # This might be be better as the obj & optional attributes

67 def __init__(self, context: Dict):

68 """

69 Initialize the object.

71 Args:

72 context: Context of parsing operation.

73 """

74 self.obj = context["obj"]

75 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict))

76 attributes = context.get("attributes")

77 if attributes is not None:

78 self.attributes.update(attributes)

80 self.signature = getattr(self.obj, "signature", None)

81 self.annotation = getattr(self.obj, "type", empty)

84@dataclass

85class ParsedDirective:

86 """Directive information that has been parsed from a docstring."""

88 line: str

89 next_index: int

90 directive_parts: List[str]

91 value: str

92 invalid: bool = False

95@dataclass

96class ParsedValues:

97 """Values parsed from the docstring to be used to produce sections."""

99 description: List[str] = field(default_factory=list)

100 parameters: Dict[str, Parameter] = field(default_factory=dict)

101 param_types: Dict[str, str] = field(default_factory=dict)

102 attributes: Dict[str, Attribute] = field(default_factory=dict)

103 attribute_types: Dict[str, str] = field(default_factory=dict)

104 exceptions: List[AnnotatedObject] = field(default_factory=list)

105 return_value: Optional[AnnotatedObject] = None

106 return_type: Optional[str] = None

107

108

109class RestructuredText(Parser):

110 """A reStructuredText docstrings parser."""

111

112 def __init__(self) -> None:

113 """Initialize the object."""

114 super().__init__()

115 self._typed_context = ParseContext({"obj": None})

116 self._parsed_values: ParsedValues = ParsedValues()

117 # Ordering is significant so that directives like ":vartype" are checked before ":var"

118 self.field_types = [

119 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type),

120 FieldType(PARAM_NAMES, self._read_parameter),

121 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type),

122 FieldType(ATTRIBUTE_NAMES, self._read_attribute),

123 FieldType(EXCEPTION_NAMES, self._read_exception),

124 FieldType(RETURN_NAMES, self._read_return),

125 FieldType(RETURN_TYPE_NAMES, self._read_return_type),

126 ]

127

128 def parse_sections(self, docstring: str) -> List[Section]: # noqa: D102

129 self._typed_context = ParseContext(self.context)

130 self._parsed_values = ParsedValues()

131

132 lines = docstring.split("\n")

133 curr_line_index = 0

134

135 while curr_line_index < len(lines):

136 line = lines[curr_line_index]

137 for field_type in self.field_types:

138 if field_type.matches(line):

139 # https://github.com/python/mypy/issues/5485

140 curr_line_index = field_type.reader(lines, curr_line_index) # type: ignore

141 break

142 else:

143 self._parsed_values.description.append(line)

144

145 curr_line_index += 1

146

147 return self._parsed_values_to_sections()

148

149 def _read_parameter(self, lines: List[str], start_index: int) -> int:

150 """

151 Parse a parameter value.

152

153 Arguments:

154 lines: The docstring lines.

155 start_index: The line number to start at.

156

157 Returns:

158 Index at which to continue parsing.

159 """

160 parsed_directive = self._parse_directive(lines, start_index)

161 if parsed_directive.invalid:

162 return parsed_directive.next_index

163

164 directive_type = None

165 if len(parsed_directive.directive_parts) == 2:

166 # no type info

167 name = parsed_directive.directive_parts[1]

168 elif len(parsed_directive.directive_parts) == 3:

169 directive_type = parsed_directive.directive_parts[1]

170 name = parsed_directive.directive_parts[2]

171 else:

172 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")

173 return parsed_directive.next_index

174

175 if name in self._parsed_values.parameters:

176 self.errors.append(f"Duplicate parameter entry for '{name}'")

177 return parsed_directive.next_index

178

179 annotation = self._determine_param_annotation(name, directive_type)

180 default, kind = self._determine_param_details(name)

181

182 self._parsed_values.parameters[name] = Parameter(

183 name=name,

184 annotation=annotation,

185 description=parsed_directive.value,

186 default=default,

187 kind=kind,

188 )

189

190 return parsed_directive.next_index

191

192 def _determine_param_details(self, name: str) -> Tuple[Any, Any]:

193 default = empty

194 kind = empty

195

196 if self._typed_context.signature is not None:

197 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*"))

198 # an error for param_signature being none is already reported by _determine_param_annotation()

199 if param_signature is not None:

200 if param_signature.default is not empty:

201 default = param_signature.default

202 kind = param_signature.kind # type: ignore[assignment]

203

204 return default, kind

205

206 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any:

207 # Annotation precedence:

208 # - signature annotation

209 # - in-line directive type

210 # - "type" directive type

211 # - empty

212 annotation = empty

213

214 parsed_param_type = self._parsed_values.param_types.get(name)

215 if parsed_param_type is not None:

216 annotation = parsed_param_type # type: ignore[assignment]

217

218 if directive_type is not None:

219 annotation = directive_type # type: ignore[assignment]

220

221 if directive_type is not None and parsed_param_type is not None:

222 self.error(f"Duplicate parameter information for '{name}'")

223

224 if self._typed_context.signature is not None:

225 try:

226 param_signature = self._typed_context.signature.parameters[name.lstrip("*")]

227 except KeyError:

228 self.error(f"No matching parameter for '{name}'")

229 else:

230 if param_signature.annotation is not empty:

231 annotation = param_signature.annotation

232

233 return annotation

234

235 def _read_parameter_type(self, lines: List[str], start_index: int) -> int:

236 """

237 Parse a parameter type.

238

239 Arguments:

240 lines: The docstring lines.

241 start_index: The line number to start at.

242

243 Returns:

244 Index at which to continue parsing.

245 """

246 parsed_directive = self._parse_directive(lines, start_index)

247 if parsed_directive.invalid:

248 return parsed_directive.next_index

249 param_type = _consolidate_descriptive_type(parsed_directive.value.strip())

250

251 if len(parsed_directive.directive_parts) == 2:

252 param_name = parsed_directive.directive_parts[1]

253 else:

254 self.error(f"Failed to get parameter name from '{parsed_directive.line}'")

255 return parsed_directive.next_index

256

257 self._parsed_values.param_types[param_name] = param_type

258 param = self._parsed_values.parameters.get(param_name)

259 if param is not None:

260 if param.annotation is empty:

261 param.annotation = param_type

262 else:

263 self.error(f"Duplicate parameter information for '{param_name}'")

264 return parsed_directive.next_index

265

266 def _read_attribute(self, lines: List[str], start_index: int) -> int:

267 """

268 Parse an attribute value.

269

270 Arguments:

271 lines: The docstring lines.

272 start_index: The line number to start at.

273

274 Returns:

275 Index at which to continue parsing.

276 """

277 parsed_directive = self._parse_directive(lines, start_index)

278 if parsed_directive.invalid:

279 return parsed_directive.next_index

280

281 if len(parsed_directive.directive_parts) == 2:

282 name = parsed_directive.directive_parts[1]

283 else:

284 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")

285 return parsed_directive.next_index

286

287 annotation = empty

288

289 # Annotation precedence:

290 # - external context type TODO: spend time understanding where this comes from

291 # - "vartype" directive type

292 # - empty

293

294 parsed_attribute_type = self._parsed_values.attribute_types.get(name)

295 if parsed_attribute_type is not None:

296 annotation = parsed_attribute_type # type: ignore[assignment]

297

298 context_attribute_annotation = self._typed_context.attributes[name].get("annotation")

299 if context_attribute_annotation is not None:

300 annotation = context_attribute_annotation

301

302 if name in self._parsed_values.attributes:

303 self.errors.append(f"Duplicate attribute entry for '{name}'")

304 else:

305 self._parsed_values.attributes[name] = Attribute(

306 name=name,

307 annotation=annotation,

308 description=parsed_directive.value,

309 )

310

311 return parsed_directive.next_index

312

313 def _read_attribute_type(self, lines: List[str], start_index: int) -> int:

314 """

315 Parse a parameter type.

316

317 Arguments:

318 lines: The docstring lines.

319 start_index: The line number to start at.

320

321 Returns:

322 Index at which to continue parsing.

323 """

324 parsed_directive = self._parse_directive(lines, start_index)

325 if parsed_directive.invalid:

326 return parsed_directive.next_index

327 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip())

328

329 if len(parsed_directive.directive_parts) == 2:

330 attribute_name = parsed_directive.directive_parts[1]

331 else:

332 self.error(f"Failed to get attribute name from '{parsed_directive.line}'")

333 return parsed_directive.next_index

334

335 self._parsed_values.attribute_types[attribute_name] = attribute_type

336 attribute = self._parsed_values.attributes.get(attribute_name)

337 if attribute is not None:

338 if attribute.annotation is empty:

339 attribute.annotation = attribute_type

340 else:

341 self.error(f"Duplicate attribute information for '{attribute_name}'")

342 return parsed_directive.next_index

343

344 def _read_exception(self, lines: List[str], start_index: int) -> int:

345 """

346 Parse an exceptions value.

347

348 Arguments:

349 lines: The docstring lines.

350 start_index: The line number to start at.

351

352 Returns:

353 A tuple containing a `Section` (or `None`) and the index at which to continue parsing.

354 """

355 parsed_directive = self._parse_directive(lines, start_index)

356 if parsed_directive.invalid:

357 return parsed_directive.next_index

358

359 if len(parsed_directive.directive_parts) == 2:

360 ex_type = parsed_directive.directive_parts[1]

361 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value))

362 else:

363 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'")

364

365 return parsed_directive.next_index

366

367 def _read_return(self, lines: List[str], start_index: int) -> int:

368 """

369 Parse an return value.

370

371 Arguments:

372 lines: The docstring lines.

373 start_index: The line number to start at.

374

375 Returns:

376 Index at which to continue parsing.

377 """

378 parsed_directive = self._parse_directive(lines, start_index)

379 if parsed_directive.invalid:

380 return parsed_directive.next_index

381

382 annotation = empty

383 # Annotation precedence:

384 # - signature annotation

385 # - "rtype" directive type

386 # - external context type TODO: spend time understanding where this comes from

387 # - empty

388 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty:

389 annotation = self._typed_context.signature.return_annotation

390 elif self._parsed_values.return_type is not None:

391 annotation = self._parsed_values.return_type # type: ignore[assignment]

392 else:

393 annotation = self._typed_context.annotation

394

395 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value)

396

397 return parsed_directive.next_index

398

399 def _read_return_type(self, lines: List[str], start_index: int) -> int:

400 """

401 Parse an return type value.

402

403 Arguments:

404 lines: The docstring lines.

405 start_index: The line number to start at.

406

407 Returns:

408 Index at which to continue parsing.

409 """

410 parsed_directive = self._parse_directive(lines, start_index)

411 if parsed_directive.invalid:

412 return parsed_directive.next_index

413

414 return_type = _consolidate_descriptive_type(parsed_directive.value.strip())

415 self._parsed_values.return_type = return_type

416 return_value = self._parsed_values.return_value

417 if return_value is not None:

418 if return_value.annotation is empty:

419 return_value.annotation = return_type

420 else:

421 self.error("Duplicate type information for return")

422

423 return parsed_directive.next_index

424

425 def _parsed_values_to_sections(self) -> List[Section]:

426 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description))

427 result = [Section(Section.Type.MARKDOWN, markdown_text)]

428 if self._parsed_values.parameters:

429 param_values = list(self._parsed_values.parameters.values())

430 result.append(Section(Section.Type.PARAMETERS, param_values))

431 if self._parsed_values.attributes:

432 attribute_values = list(self._parsed_values.attributes.values())

433 result.append(Section(Section.Type.ATTRIBUTES, attribute_values))

434 if self._parsed_values.return_value is not None:

435 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value))

436 if self._parsed_values.exceptions:

437 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions))

438 return result

439

440 def _parse_directive(self, lines: List[str], start_index: int) -> ParsedDirective:

441 line, next_index = _consolidate_continuation_lines(lines, start_index)

442 try:

443 _, directive, value = line.split(":", 2)

444 except ValueError:

445 self.error(f"Failed to get ':directive: value' pair from '{line}'")

446 return ParsedDirective(line, next_index, [], "", invalid=True)

447

448 value = value.strip()

449 return ParsedDirective(line, next_index, directive.split(" "), value)

450

451

452def _consolidate_continuation_lines(lines: List[str], start_index: int) -> Tuple[str, int]:

453 """

454 Convert a docstring field into a single line if a line continuation exists.

455

456 Arguments:

457 lines: The docstring lines.

458 start_index: The line number to start at.

459

460 Returns:

461 A tuple containing the continued lines as a single string and the index at which to continue parsing.

462 """

463 curr_line_index = start_index

464 block = [lines[curr_line_index].lstrip()]

465

466 # start processing after first item

467 curr_line_index += 1

468 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"):

469 block.append(lines[curr_line_index].lstrip())

470 curr_line_index += 1

471

472 return " ".join(block).rstrip("\n"), curr_line_index - 1

473

474

475def _consolidate_descriptive_type(descriptive_type: str) -> str:

476 """

477 Convert type descriptions with "or" into respective type signature.

478

479 "x or None" or "None or x" -> "Optional[x]"

480 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]"

481

482 Args:

483 descriptive_type: Descriptions of an item's type.

484

485 Returns:

486 Type signature for descriptive type.

487 """

488 types = descriptive_type.split("or")

489 if len(types) == 1:

490 return descriptive_type

491 types = [pt.strip() for pt in types]

492 if len(types) == 2:

493 if types[0] == "None":

494 return f"Optional[{types[1]}]"

495 if types[1] == "None":

496 return f"Optional[{types[0]}]"

497 return f"Union[{','.join(types)}]"

498

499

500def _strip_blank_lines(lines: List[str]) -> List[str]:

501 """

502 Remove lines with no text or only whitespace characters from the start and end of the list.

503

504 Args:

505 lines: Lines to be stripped.

506

507 Returns:

508 A list with the same contents, with any blank lines at the start or end removed.

509 """

510 if not lines:

511 return lines

512

513 # remove blank lines from the start and end

514 content_found = False

515 initial_content = 0

516 final_content = 0

517 for index, line in enumerate(lines):

518 if line == "" or line.isspace():

519 if not content_found:

520 initial_content += 1

521 else:

522 content_found = True

523 final_content = index

524 return lines[initial_content : final_content + 1]

Coverage for src/pytkdocs/parsers/docstrings/restructured_text.py: 100.00%

264 statements