Coverage for src/pytkdocs/parsers/docstrings/restructured_text.py: 100.00%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""This module defines functions and classes to parse docstrings into structured data."""
2from collections import defaultdict
3from dataclasses import dataclass, field
4from inspect import Signature
5from typing import Any, Callable, DefaultDict, Dict, FrozenSet, List, Optional, Tuple, Type, Union, cast # noqa: WPS235
7from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty
9try:
10 from typing import TypedDict # type: ignore
11except ImportError:
12 from typing_extensions import TypedDict # noqa: WPS440 # type: ignore
13try:
14 from typing import Literal # type: ignore
15except ImportError:
16 # https://github.com/python/mypy/issues/8520
17 from typing_extensions import Literal # type: ignore # noqa: WPS440
20# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples
21PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword"))
22PARAM_TYPE_NAMES = frozenset(("type",))
23ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar"))
24ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",))
25RETURN_NAMES = frozenset(("returns", "return"))
26RETURN_TYPE_NAMES = frozenset(("rtype",))
27EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception"))
30@dataclass(frozen=True)
31class FieldType:
32 """Maps directive names to parser functions."""
34 names: FrozenSet[str]
35 reader: Callable[[List[str], int], int]
37 def matches(self, line: str) -> bool:
38 """
39 Check if a line matches the field type.
41 Args:
42 line: Line to check against
44 Returns:
45 True if the line matches the field type, False otherwise.
46 """
47 return any(line.startswith(f":{name}") for name in self.names)
50class AttributesDict(TypedDict):
51 """Attribute details."""
53 docstring: str
54 annotation: Type # TODO: Not positive this is correct
57class ParseContext:
58 """Typed replacement for context dictionary."""
60 obj: Any # I think this might be pytkdos.Object & subclasses
61 attributes: DefaultDict[str, AttributesDict]
62 signature: Optional[Signature]
63 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]]
64 annotation: Any
66 # This might be be better as the obj & optional attributes
67 def __init__(self, context: Dict):
68 """
69 Initialize the object.
71 Args:
72 context: Context of parsing operation.
73 """
74 self.obj = context["obj"]
75 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict))
76 attributes = context.get("attributes")
77 if attributes is not None:
78 self.attributes.update(attributes)
80 self.signature = getattr(self.obj, "signature", None)
81 self.annotation = getattr(self.obj, "type", empty)
84@dataclass
85class ParsedDirective:
86 """Directive information that has been parsed from a docstring."""
88 line: str
89 next_index: int
90 directive_parts: List[str]
91 value: str
92 invalid: bool = False
95@dataclass
96class ParsedValues:
97 """Values parsed from the docstring to be used to produce sections."""
99 description: List[str] = field(default_factory=list)
100 parameters: Dict[str, Parameter] = field(default_factory=dict)
101 param_types: Dict[str, str] = field(default_factory=dict)
102 attributes: Dict[str, Attribute] = field(default_factory=dict)
103 attribute_types: Dict[str, str] = field(default_factory=dict)
104 exceptions: List[AnnotatedObject] = field(default_factory=list)
105 return_value: Optional[AnnotatedObject] = None
106 return_type: Optional[str] = None
109class RestructuredText(Parser):
110 """A reStructuredText docstrings parser."""
112 def __init__(self) -> None:
113 """Initialize the object."""
114 super().__init__()
115 self._typed_context = ParseContext({"obj": None})
116 self._parsed_values: ParsedValues = ParsedValues()
117 # Ordering is significant so that directives like ":vartype" are checked before ":var"
118 self.field_types = [
119 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type),
120 FieldType(PARAM_NAMES, self._read_parameter),
121 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type),
122 FieldType(ATTRIBUTE_NAMES, self._read_attribute),
123 FieldType(EXCEPTION_NAMES, self._read_exception),
124 FieldType(RETURN_NAMES, self._read_return),
125 FieldType(RETURN_TYPE_NAMES, self._read_return_type),
126 ]
128 def parse_sections(self, docstring: str) -> List[Section]: # noqa: D102
129 self._typed_context = ParseContext(self.context)
130 self._parsed_values = ParsedValues()
132 lines = docstring.split("\n")
133 curr_line_index = 0
135 while curr_line_index < len(lines):
136 line = lines[curr_line_index]
137 for field_type in self.field_types:
138 if field_type.matches(line):
139 # https://github.com/python/mypy/issues/5485
140 curr_line_index = field_type.reader(lines, curr_line_index) # type: ignore
141 break
142 else:
143 self._parsed_values.description.append(line)
145 curr_line_index += 1
147 return self._parsed_values_to_sections()
149 def _read_parameter(self, lines: List[str], start_index: int) -> int:
150 """
151 Parse a parameter value.
153 Arguments:
154 lines: The docstring lines.
155 start_index: The line number to start at.
157 Returns:
158 Index at which to continue parsing.
159 """
160 parsed_directive = self._parse_directive(lines, start_index)
161 if parsed_directive.invalid:
162 return parsed_directive.next_index
164 directive_type = None
165 if len(parsed_directive.directive_parts) == 2:
166 # no type info
167 name = parsed_directive.directive_parts[1]
168 elif len(parsed_directive.directive_parts) == 3:
169 directive_type = parsed_directive.directive_parts[1]
170 name = parsed_directive.directive_parts[2]
171 else:
172 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")
173 return parsed_directive.next_index
175 if name in self._parsed_values.parameters:
176 self.errors.append(f"Duplicate parameter entry for '{name}'")
177 return parsed_directive.next_index
179 annotation = self._determine_param_annotation(name, directive_type)
180 default, kind = self._determine_param_details(name)
182 self._parsed_values.parameters[name] = Parameter(
183 name=name,
184 annotation=annotation,
185 description=parsed_directive.value,
186 default=default,
187 kind=kind,
188 )
190 return parsed_directive.next_index
192 def _determine_param_details(self, name: str) -> Tuple[Any, Any]:
193 default = empty
194 kind = empty
196 if self._typed_context.signature is not None:
197 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*"))
198 # an error for param_signature being none is already reported by _determine_param_annotation()
199 if param_signature is not None:
200 if param_signature.default is not empty:
201 default = param_signature.default
202 kind = param_signature.kind # type: ignore[assignment]
204 return default, kind
206 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any:
207 # Annotation precedence:
208 # - signature annotation
209 # - in-line directive type
210 # - "type" directive type
211 # - empty
212 annotation = empty
214 parsed_param_type = self._parsed_values.param_types.get(name)
215 if parsed_param_type is not None:
216 annotation = parsed_param_type # type: ignore[assignment]
218 if directive_type is not None:
219 annotation = directive_type # type: ignore[assignment]
221 if directive_type is not None and parsed_param_type is not None:
222 self.error(f"Duplicate parameter information for '{name}'")
224 if self._typed_context.signature is not None:
225 try:
226 param_signature = self._typed_context.signature.parameters[name.lstrip("*")]
227 except KeyError:
228 self.error(f"No matching parameter for '{name}'")
229 else:
230 if param_signature.annotation is not empty:
231 annotation = param_signature.annotation
233 return annotation
235 def _read_parameter_type(self, lines: List[str], start_index: int) -> int:
236 """
237 Parse a parameter type.
239 Arguments:
240 lines: The docstring lines.
241 start_index: The line number to start at.
243 Returns:
244 Index at which to continue parsing.
245 """
246 parsed_directive = self._parse_directive(lines, start_index)
247 if parsed_directive.invalid:
248 return parsed_directive.next_index
249 param_type = _consolidate_descriptive_type(parsed_directive.value.strip())
251 if len(parsed_directive.directive_parts) == 2:
252 param_name = parsed_directive.directive_parts[1]
253 else:
254 self.error(f"Failed to get parameter name from '{parsed_directive.line}'")
255 return parsed_directive.next_index
257 self._parsed_values.param_types[param_name] = param_type
258 param = self._parsed_values.parameters.get(param_name)
259 if param is not None:
260 if param.annotation is empty:
261 param.annotation = param_type
262 else:
263 self.error(f"Duplicate parameter information for '{param_name}'")
264 return parsed_directive.next_index
266 def _read_attribute(self, lines: List[str], start_index: int) -> int:
267 """
268 Parse an attribute value.
270 Arguments:
271 lines: The docstring lines.
272 start_index: The line number to start at.
274 Returns:
275 Index at which to continue parsing.
276 """
277 parsed_directive = self._parse_directive(lines, start_index)
278 if parsed_directive.invalid:
279 return parsed_directive.next_index
281 if len(parsed_directive.directive_parts) == 2:
282 name = parsed_directive.directive_parts[1]
283 else:
284 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")
285 return parsed_directive.next_index
287 annotation = empty
289 # Annotation precedence:
290 # - external context type TODO: spend time understanding where this comes from
291 # - "vartype" directive type
292 # - empty
294 parsed_attribute_type = self._parsed_values.attribute_types.get(name)
295 if parsed_attribute_type is not None:
296 annotation = parsed_attribute_type # type: ignore[assignment]
298 context_attribute_annotation = self._typed_context.attributes[name].get("annotation")
299 if context_attribute_annotation is not None:
300 annotation = context_attribute_annotation
302 if name in self._parsed_values.attributes:
303 self.errors.append(f"Duplicate attribute entry for '{name}'")
304 else:
305 self._parsed_values.attributes[name] = Attribute(
306 name=name,
307 annotation=annotation,
308 description=parsed_directive.value,
309 )
311 return parsed_directive.next_index
313 def _read_attribute_type(self, lines: List[str], start_index: int) -> int:
314 """
315 Parse a parameter type.
317 Arguments:
318 lines: The docstring lines.
319 start_index: The line number to start at.
321 Returns:
322 Index at which to continue parsing.
323 """
324 parsed_directive = self._parse_directive(lines, start_index)
325 if parsed_directive.invalid:
326 return parsed_directive.next_index
327 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip())
329 if len(parsed_directive.directive_parts) == 2:
330 attribute_name = parsed_directive.directive_parts[1]
331 else:
332 self.error(f"Failed to get attribute name from '{parsed_directive.line}'")
333 return parsed_directive.next_index
335 self._parsed_values.attribute_types[attribute_name] = attribute_type
336 attribute = self._parsed_values.attributes.get(attribute_name)
337 if attribute is not None:
338 if attribute.annotation is empty:
339 attribute.annotation = attribute_type
340 else:
341 self.error(f"Duplicate attribute information for '{attribute_name}'")
342 return parsed_directive.next_index
344 def _read_exception(self, lines: List[str], start_index: int) -> int:
345 """
346 Parse an exceptions value.
348 Arguments:
349 lines: The docstring lines.
350 start_index: The line number to start at.
352 Returns:
353 A tuple containing a `Section` (or `None`) and the index at which to continue parsing.
354 """
355 parsed_directive = self._parse_directive(lines, start_index)
356 if parsed_directive.invalid:
357 return parsed_directive.next_index
359 if len(parsed_directive.directive_parts) == 2:
360 ex_type = parsed_directive.directive_parts[1]
361 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value))
362 else:
363 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'")
365 return parsed_directive.next_index
367 def _read_return(self, lines: List[str], start_index: int) -> int:
368 """
369 Parse an return value.
371 Arguments:
372 lines: The docstring lines.
373 start_index: The line number to start at.
375 Returns:
376 Index at which to continue parsing.
377 """
378 parsed_directive = self._parse_directive(lines, start_index)
379 if parsed_directive.invalid:
380 return parsed_directive.next_index
382 annotation = empty
383 # Annotation precedence:
384 # - signature annotation
385 # - "rtype" directive type
386 # - external context type TODO: spend time understanding where this comes from
387 # - empty
388 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty:
389 annotation = self._typed_context.signature.return_annotation
390 elif self._parsed_values.return_type is not None:
391 annotation = self._parsed_values.return_type # type: ignore[assignment]
392 else:
393 annotation = self._typed_context.annotation
395 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value)
397 return parsed_directive.next_index
399 def _read_return_type(self, lines: List[str], start_index: int) -> int:
400 """
401 Parse an return type value.
403 Arguments:
404 lines: The docstring lines.
405 start_index: The line number to start at.
407 Returns:
408 Index at which to continue parsing.
409 """
410 parsed_directive = self._parse_directive(lines, start_index)
411 if parsed_directive.invalid:
412 return parsed_directive.next_index
414 return_type = _consolidate_descriptive_type(parsed_directive.value.strip())
415 self._parsed_values.return_type = return_type
416 return_value = self._parsed_values.return_value
417 if return_value is not None:
418 if return_value.annotation is empty:
419 return_value.annotation = return_type
420 else:
421 self.error("Duplicate type information for return")
423 return parsed_directive.next_index
425 def _parsed_values_to_sections(self) -> List[Section]:
426 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description))
427 result = [Section(Section.Type.MARKDOWN, markdown_text)]
428 if self._parsed_values.parameters:
429 param_values = list(self._parsed_values.parameters.values())
430 result.append(Section(Section.Type.PARAMETERS, param_values))
431 if self._parsed_values.attributes:
432 attribute_values = list(self._parsed_values.attributes.values())
433 result.append(Section(Section.Type.ATTRIBUTES, attribute_values))
434 if self._parsed_values.return_value is not None:
435 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value))
436 if self._parsed_values.exceptions:
437 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions))
438 return result
440 def _parse_directive(self, lines: List[str], start_index: int) -> ParsedDirective:
441 line, next_index = _consolidate_continuation_lines(lines, start_index)
442 try:
443 _, directive, value = line.split(":", 2)
444 except ValueError:
445 self.error(f"Failed to get ':directive: value' pair from '{line}'")
446 return ParsedDirective(line, next_index, [], "", invalid=True)
448 value = value.strip()
449 return ParsedDirective(line, next_index, directive.split(" "), value)
452def _consolidate_continuation_lines(lines: List[str], start_index: int) -> Tuple[str, int]:
453 """
454 Convert a docstring field into a single line if a line continuation exists.
456 Arguments:
457 lines: The docstring lines.
458 start_index: The line number to start at.
460 Returns:
461 A tuple containing the continued lines as a single string and the index at which to continue parsing.
462 """
463 curr_line_index = start_index
464 block = [lines[curr_line_index].lstrip()]
466 # start processing after first item
467 curr_line_index += 1
468 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"):
469 block.append(lines[curr_line_index].lstrip())
470 curr_line_index += 1
472 return " ".join(block).rstrip("\n"), curr_line_index - 1
475def _consolidate_descriptive_type(descriptive_type: str) -> str:
476 """
477 Convert type descriptions with "or" into respective type signature.
479 "x or None" or "None or x" -> "Optional[x]"
480 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]"
482 Args:
483 descriptive_type: Descriptions of an item's type.
485 Returns:
486 Type signature for descriptive type.
487 """
488 types = descriptive_type.split("or")
489 if len(types) == 1:
490 return descriptive_type
491 types = [pt.strip() for pt in types]
492 if len(types) == 2:
493 if types[0] == "None":
494 return f"Optional[{types[1]}]"
495 if types[1] == "None":
496 return f"Optional[{types[0]}]"
497 return f"Union[{','.join(types)}]"
500def _strip_blank_lines(lines: List[str]) -> List[str]:
501 """
502 Remove lines with no text or only whitespace characters from the start and end of the list.
504 Args:
505 lines: Lines to be stripped.
507 Returns:
508 A list with the same contents, with any blank lines at the start or end removed.
509 """
510 if not lines:
511 return lines
513 # remove blank lines from the start and end
514 content_found = False
515 initial_content = 0
516 final_content = 0
517 for index, line in enumerate(lines):
518 if line == "" or line.isspace():
519 if not content_found:
520 initial_content += 1
521 else:
522 content_found = True
523 final_content = index
524 return lines[initial_content : final_content + 1]