Coverage for src/pytkdocs/parsers/docstrings/restructured

1"""This module defines functions and classes to parse docstrings into structured data."""

3from collections import defaultdict

4from dataclasses import dataclass, field

5from inspect import Signature

6from typing import Any, Callable, Optional, cast

8from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty

10try:

11 from typing import TypedDict

12except ImportError:

13 from typing_extensions import TypedDict

16# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples

17PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword"))

18PARAM_TYPE_NAMES = frozenset(("type",))

19ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar"))

20ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",))

21RETURN_NAMES = frozenset(("returns", "return"))

22RETURN_TYPE_NAMES = frozenset(("rtype",))

23EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception"))

26@dataclass(frozen=True)

27class FieldType:

28 """Maps directive names to parser functions."""

30 names: frozenset[str]

31 reader: Callable[[list[str], int], int]

33 def matches(self, line: str) -> bool:

34 """Check if a line matches the field type.

36 Args:

37 line: Line to check against

39 Returns:

40 True if the line matches the field type, False otherwise.

41 """

42 return any(line.startswith(f":{name}") for name in self.names)

45class AttributesDict(TypedDict):

46 """Attribute details."""

48 docstring: str

49 annotation: type # TODO: Not positive this is correct

52class ParseContext:

53 """Typed replacement for context dictionary."""

55 obj: Any # I think this might be pytkdos.Object & subclasses

56 attributes: defaultdict[str, AttributesDict]

57 signature: Optional[Signature]

58 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]]

59 annotation: Any

61 # This might be be better as the obj & optional attributes

62 def __init__(self, context: dict):

63 """Initialize the object.

65 Args:

66 context: Context of parsing operation.

67 """

68 self.obj = context["obj"]

69 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict))

70 attributes = context.get("attributes")

71 if attributes is not None:

72 self.attributes.update(attributes)

74 self.signature = getattr(self.obj, "signature", None)

75 self.annotation = getattr(self.obj, "type", empty)

78@dataclass

79class ParsedDirective:

80 """Directive information that has been parsed from a docstring."""

82 line: str

83 next_index: int

84 directive_parts: list[str]

85 value: str

86 invalid: bool = False

89@dataclass

90class ParsedValues:

91 """Values parsed from the docstring to be used to produce sections."""

93 description: list[str] = field(default_factory=list)

94 parameters: dict[str, Parameter] = field(default_factory=dict)

95 param_types: dict[str, str] = field(default_factory=dict)

96 attributes: dict[str, Attribute] = field(default_factory=dict)

97 attribute_types: dict[str, str] = field(default_factory=dict)

98 exceptions: list[AnnotatedObject] = field(default_factory=list)

99 return_value: Optional[AnnotatedObject] = None

100 return_type: Optional[str] = None

101

102

103class RestructuredText(Parser):

104 """A reStructuredText docstrings parser."""

105

106 def __init__(self, **kwargs: Any) -> None: # noqa: ARG002

107 """Initialize the object."""

108 super().__init__()

109 self._typed_context = ParseContext({"obj": None})

110 self._parsed_values: ParsedValues = ParsedValues()

111 # Ordering is significant so that directives like ":vartype" are checked before ":var"

112 self.field_types = [

113 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type),

114 FieldType(PARAM_NAMES, self._read_parameter),

115 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type),

116 FieldType(ATTRIBUTE_NAMES, self._read_attribute),

117 FieldType(EXCEPTION_NAMES, self._read_exception),

118 FieldType(RETURN_NAMES, self._read_return),

119 FieldType(RETURN_TYPE_NAMES, self._read_return_type),

120 ]

121

122 def parse_sections(self, docstring: str) -> list[Section]: # noqa: D102

123 self._typed_context = ParseContext(self.context)

124 self._parsed_values = ParsedValues()

125

126 lines = docstring.split("\n")

127 curr_line_index = 0

128

129 while curr_line_index < len(lines):

130 line = lines[curr_line_index]

131 for field_type in self.field_types:

132 if field_type.matches(line):

133 # https://github.com/python/mypy/issues/5485

134 curr_line_index = field_type.reader(lines, curr_line_index)

135 break

136 else:

137 self._parsed_values.description.append(line)

138

139 curr_line_index += 1

140

141 return self._parsed_values_to_sections()

142

143 def _read_parameter(self, lines: list[str], start_index: int) -> int:

144 """Parse a parameter value.

145

146 Arguments:

147 lines: The docstring lines.

148 start_index: The line number to start at.

149

150 Returns:

151 Index at which to continue parsing.

152 """

153 parsed_directive = self._parse_directive(lines, start_index)

154 if parsed_directive.invalid:

155 return parsed_directive.next_index

156

157 directive_type = None

158 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004

159 # no type info

160 name = parsed_directive.directive_parts[1]

161 elif len(parsed_directive.directive_parts) == 3: # noqa: PLR2004

162 directive_type = parsed_directive.directive_parts[1]

163 name = parsed_directive.directive_parts[2]

164 else:

165 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")

166 return parsed_directive.next_index

167

168 if name in self._parsed_values.parameters:

169 self.errors.append(f"Duplicate parameter entry for '{name}'")

170 return parsed_directive.next_index

171

172 annotation = self._determine_param_annotation(name, directive_type)

173 default, kind = self._determine_param_details(name)

174

175 self._parsed_values.parameters[name] = Parameter(

176 name=name,

177 annotation=annotation,

178 description=parsed_directive.value,

179 default=default,

180 kind=kind,

181 )

182

183 return parsed_directive.next_index

184

185 def _determine_param_details(self, name: str) -> tuple[Any, Any]:

186 default = empty

187 kind = empty

188

189 if self._typed_context.signature is not None:

190 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*"))

191 # an error for param_signature being none is already reported by _determine_param_annotation()

192 if param_signature is not None:

193 if param_signature.default is not empty:

194 default = param_signature.default

195 kind = param_signature.kind # type: ignore[assignment]

196

197 return default, kind

198

199 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any:

200 # Annotation precedence:

201 # - signature annotation

202 # - in-line directive type

203 # - "type" directive type

204 # - empty

205 annotation = empty

206

207 parsed_param_type = self._parsed_values.param_types.get(name)

208 if parsed_param_type is not None:

209 annotation = parsed_param_type # type: ignore[assignment]

210

211 if directive_type is not None:

212 annotation = directive_type # type: ignore[assignment]

213

214 if directive_type is not None and parsed_param_type is not None:

215 self.error(f"Duplicate parameter information for '{name}'")

216

217 if self._typed_context.signature is not None:

218 try:

219 param_signature = self._typed_context.signature.parameters[name.lstrip("*")]

220 except KeyError:

221 self.error(f"No matching parameter for '{name}'")

222 else:

223 if param_signature.annotation is not empty:

224 annotation = param_signature.annotation

225

226 return annotation

227

228 def _read_parameter_type(self, lines: list[str], start_index: int) -> int:

229 """Parse a parameter type.

230

231 Arguments:

232 lines: The docstring lines.

233 start_index: The line number to start at.

234

235 Returns:

236 Index at which to continue parsing.

237 """

238 parsed_directive = self._parse_directive(lines, start_index)

239 if parsed_directive.invalid:

240 return parsed_directive.next_index

241 param_type = _consolidate_descriptive_type(parsed_directive.value.strip())

242

243 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004

244 param_name = parsed_directive.directive_parts[1]

245 else:

246 self.error(f"Failed to get parameter name from '{parsed_directive.line}'")

247 return parsed_directive.next_index

248

249 self._parsed_values.param_types[param_name] = param_type

250 param = self._parsed_values.parameters.get(param_name)

251 if param is not None:

252 if param.annotation is empty:

253 param.annotation = param_type

254 else:

255 self.error(f"Duplicate parameter information for '{param_name}'")

256 return parsed_directive.next_index

257

258 def _read_attribute(self, lines: list[str], start_index: int) -> int:

259 """Parse an attribute value.

260

261 Arguments:

262 lines: The docstring lines.

263 start_index: The line number to start at.

264

265 Returns:

266 Index at which to continue parsing.

267 """

268 parsed_directive = self._parse_directive(lines, start_index)

269 if parsed_directive.invalid:

270 return parsed_directive.next_index

271

272 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004

273 name = parsed_directive.directive_parts[1]

274 else:

275 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")

276 return parsed_directive.next_index

277

278 annotation = empty

279

280 # Annotation precedence:

281 # - external context type TODO: spend time understanding where this comes from

282 # - "vartype" directive type

283 # - empty

284

285 parsed_attribute_type = self._parsed_values.attribute_types.get(name)

286 if parsed_attribute_type is not None:

287 annotation = parsed_attribute_type # type: ignore[assignment]

288

289 context_attribute_annotation = self._typed_context.attributes[name].get("annotation")

290 if context_attribute_annotation is not None:

291 annotation = context_attribute_annotation # type: ignore[assignment]

292

293 if name in self._parsed_values.attributes:

294 self.errors.append(f"Duplicate attribute entry for '{name}'")

295 else:

296 self._parsed_values.attributes[name] = Attribute(

297 name=name,

298 annotation=annotation,

299 description=parsed_directive.value,

300 )

301

302 return parsed_directive.next_index

303

304 def _read_attribute_type(self, lines: list[str], start_index: int) -> int:

305 """Parse a parameter type.

306

307 Arguments:

308 lines: The docstring lines.

309 start_index: The line number to start at.

310

311 Returns:

312 Index at which to continue parsing.

313 """

314 parsed_directive = self._parse_directive(lines, start_index)

315 if parsed_directive.invalid:

316 return parsed_directive.next_index

317 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip())

318

319 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004

320 attribute_name = parsed_directive.directive_parts[1]

321 else:

322 self.error(f"Failed to get attribute name from '{parsed_directive.line}'")

323 return parsed_directive.next_index

324

325 self._parsed_values.attribute_types[attribute_name] = attribute_type

326 attribute = self._parsed_values.attributes.get(attribute_name)

327 if attribute is not None:

328 if attribute.annotation is empty:

329 attribute.annotation = attribute_type

330 else:

331 self.error(f"Duplicate attribute information for '{attribute_name}'")

332 return parsed_directive.next_index

333

334 def _read_exception(self, lines: list[str], start_index: int) -> int:

335 """Parse an exceptions value.

336

337 Arguments:

338 lines: The docstring lines.

339 start_index: The line number to start at.

340

341 Returns:

342 A tuple containing a `Section` (or `None`) and the index at which to continue parsing.

343 """

344 parsed_directive = self._parse_directive(lines, start_index)

345 if parsed_directive.invalid:

346 return parsed_directive.next_index

347

348 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004

349 ex_type = parsed_directive.directive_parts[1]

350 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value))

351 else:

352 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'")

353

354 return parsed_directive.next_index

355

356 def _read_return(self, lines: list[str], start_index: int) -> int:

357 """Parse an return value.

358

359 Arguments:

360 lines: The docstring lines.

361 start_index: The line number to start at.

362

363 Returns:

364 Index at which to continue parsing.

365 """

366 parsed_directive = self._parse_directive(lines, start_index)

367 if parsed_directive.invalid:

368 return parsed_directive.next_index

369

370 annotation = empty

371 # Annotation precedence:

372 # - signature annotation

373 # - "rtype" directive type

374 # - external context type TODO: spend time understanding where this comes from

375 # - empty

376 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty:

377 annotation = self._typed_context.signature.return_annotation

378 elif self._parsed_values.return_type is not None:

379 annotation = self._parsed_values.return_type # type: ignore[assignment]

380 else:

381 annotation = self._typed_context.annotation

382

383 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value)

384

385 return parsed_directive.next_index

386

387 def _read_return_type(self, lines: list[str], start_index: int) -> int:

388 """Parse an return type value.

389

390 Arguments:

391 lines: The docstring lines.

392 start_index: The line number to start at.

393

394 Returns:

395 Index at which to continue parsing.

396 """

397 parsed_directive = self._parse_directive(lines, start_index)

398 if parsed_directive.invalid:

399 return parsed_directive.next_index

400

401 return_type = _consolidate_descriptive_type(parsed_directive.value.strip())

402 self._parsed_values.return_type = return_type

403 return_value = self._parsed_values.return_value

404 if return_value is not None:

405 if return_value.annotation is empty:

406 return_value.annotation = return_type

407 else:

408 self.error("Duplicate type information for return")

409

410 return parsed_directive.next_index

411

412 def _parsed_values_to_sections(self) -> list[Section]:

413 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description))

414 result = [Section(Section.Type.MARKDOWN, markdown_text)]

415 if self._parsed_values.parameters:

416 param_values = list(self._parsed_values.parameters.values())

417 result.append(Section(Section.Type.PARAMETERS, param_values))

418 if self._parsed_values.attributes:

419 attribute_values = list(self._parsed_values.attributes.values())

420 result.append(Section(Section.Type.ATTRIBUTES, attribute_values))

421 if self._parsed_values.return_value is not None:

422 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value))

423 if self._parsed_values.exceptions:

424 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions))

425 return result

426

427 def _parse_directive(self, lines: list[str], start_index: int) -> ParsedDirective:

428 line, next_index = _consolidate_continuation_lines(lines, start_index)

429 try:

430 _, directive, value = line.split(":", 2)

431 except ValueError:

432 self.error(f"Failed to get ':directive: value' pair from '{line}'")

433 return ParsedDirective(line, next_index, [], "", invalid=True)

434

435 value = value.strip()

436 return ParsedDirective(line, next_index, directive.split(" "), value)

437

438

439def _consolidate_continuation_lines(lines: list[str], start_index: int) -> tuple[str, int]:

440 """Convert a docstring field into a single line if a line continuation exists.

441

442 Arguments:

443 lines: The docstring lines.

444 start_index: The line number to start at.

445

446 Returns:

447 A tuple containing the continued lines as a single string and the index at which to continue parsing.

448 """

449 curr_line_index = start_index

450 block = [lines[curr_line_index].lstrip()]

451

452 # start processing after first item

453 curr_line_index += 1

454 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"):

455 block.append(lines[curr_line_index].lstrip())

456 curr_line_index += 1

457

458 return " ".join(block).rstrip("\n"), curr_line_index - 1

459

460

461def _consolidate_descriptive_type(descriptive_type: str) -> str:

462 """Convert type descriptions with "or" into respective type signature.

463

464 "x or None" or "None or x" -> "Optional[x]"

465 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]"

466

467 Args:

468 descriptive_type: Descriptions of an item's type.

469

470 Returns:

471 Type signature for descriptive type.

472 """

473 types = descriptive_type.split("or")

474 if len(types) == 1:

475 return descriptive_type

476 types = [pt.strip() for pt in types]

477 if len(types) == 2: # noqa: PLR2004

478 if types[0] == "None":

479 return f"Optional[{types[1]}]"

480 if types[1] == "None":

481 return f"Optional[{types[0]}]"

482 return f"Union[{','.join(types)}]"

483

484

485def _strip_blank_lines(lines: list[str]) -> list[str]:

486 """Remove lines with no text or only whitespace characters from the start and end of the list.

487

488 Args:

489 lines: Lines to be stripped.

490

491 Returns:

492 A list with the same contents, with any blank lines at the start or end removed.

493 """

494 if not lines:

495 return lines

496

497 # remove blank lines from the start and end

498 content_found = False

499 initial_content = 0

500 final_content = 0

501 for index, line in enumerate(lines):

502 if line == "" or line.isspace():

503 if not content_found:

504 initial_content += 1

505 else:

506 content_found = True

507 final_content = index

508 return lines[initial_content : final_content + 1]

Coverage for src/pytkdocs/parsers/docstrings/restructured_text.py: 99.44%

260 statements