Coverage for src/pytkdocs/parsers/docstrings/ 99.44%
260 statements
« prev ^ index » next v7.6.12, created at 2025-03-09 17:28 +0100
« prev ^ index » next v7.6.12, created at 2025-03-09 17:28 +0100
1"""This module defines functions and classes to parse docstrings into structured data."""
3from collections import defaultdict
4from dataclasses import dataclass, field
5from inspect import Signature
6from typing import Any, Callable, Optional, cast
8from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty
11 from typing import TypedDict
12except ImportError:
13 from typing_extensions import TypedDict
16# TODO: Examples: from the documentation, I'm not sure there is a standard format for examples
17PARAM_NAMES = frozenset(("param", "parameter", "arg", "argument", "key", "keyword"))
18PARAM_TYPE_NAMES = frozenset(("type",))
19ATTRIBUTE_NAMES = frozenset(("var", "ivar", "cvar"))
20ATTRIBUTE_TYPE_NAMES = frozenset(("vartype",))
21RETURN_NAMES = frozenset(("returns", "return"))
22RETURN_TYPE_NAMES = frozenset(("rtype",))
23EXCEPTION_NAMES = frozenset(("raises", "raise", "except", "exception"))
27class FieldType:
28 """Maps directive names to parser functions."""
30 names: frozenset[str]
31 reader: Callable[[list[str], int], int]
33 def matches(self, line: str) -> bool:
34 """Check if a line matches the field type.
36 Args:
37 line: Line to check against
39 Returns:
40 True if the line matches the field type, False otherwise.
41 """
42 return any(line.startswith(f":{name}") for name in self.names)
45class AttributesDict(TypedDict):
46 """Attribute details."""
48 docstring: str
49 annotation: type # TODO: Not positive this is correct
52class ParseContext:
53 """Typed replacement for context dictionary."""
55 obj: Any # I think this might be pytkdos.Object & subclasses
56 attributes: defaultdict[str, AttributesDict]
57 signature: Optional[Signature]
58 # Not sure real type yet. Maybe Optional[Union[Literal[Signature.empty],str,Type]]
59 annotation: Any
61 # This might be be better as the obj & optional attributes
62 def __init__(self, context: dict):
63 """Initialize the object.
65 Args:
66 context: Context of parsing operation.
67 """
68 self.obj = context["obj"]
69 self.attributes = defaultdict(cast(Callable[[], AttributesDict], dict))
70 attributes = context.get("attributes")
71 if attributes is not None:
72 self.attributes.update(attributes)
74 self.signature = getattr(self.obj, "signature", None)
75 self.annotation = getattr(self.obj, "type", empty)
79class ParsedDirective:
80 """Directive information that has been parsed from a docstring."""
82 line: str
83 next_index: int
84 directive_parts: list[str]
85 value: str
86 invalid: bool = False
90class ParsedValues:
91 """Values parsed from the docstring to be used to produce sections."""
93 description: list[str] = field(default_factory=list)
94 parameters: dict[str, Parameter] = field(default_factory=dict)
95 param_types: dict[str, str] = field(default_factory=dict)
96 attributes: dict[str, Attribute] = field(default_factory=dict)
97 attribute_types: dict[str, str] = field(default_factory=dict)
98 exceptions: list[AnnotatedObject] = field(default_factory=list)
99 return_value: Optional[AnnotatedObject] = None
100 return_type: Optional[str] = None
103class RestructuredText(Parser):
104 """A reStructuredText docstrings parser."""
106 def __init__(self, **kwargs: Any) -> None: # noqa: ARG002
107 """Initialize the object."""
108 super().__init__()
109 self._typed_context = ParseContext({"obj": None})
110 self._parsed_values: ParsedValues = ParsedValues()
111 # Ordering is significant so that directives like ":vartype" are checked before ":var"
112 self.field_types = [
113 FieldType(PARAM_TYPE_NAMES, self._read_parameter_type),
114 FieldType(PARAM_NAMES, self._read_parameter),
115 FieldType(ATTRIBUTE_TYPE_NAMES, self._read_attribute_type),
116 FieldType(ATTRIBUTE_NAMES, self._read_attribute),
117 FieldType(EXCEPTION_NAMES, self._read_exception),
118 FieldType(RETURN_NAMES, self._read_return),
119 FieldType(RETURN_TYPE_NAMES, self._read_return_type),
120 ]
122 def parse_sections(self, docstring: str) -> list[Section]: # noqa: D102
123 self._typed_context = ParseContext(self.context)
124 self._parsed_values = ParsedValues()
126 lines = docstring.split("\n")
127 curr_line_index = 0
129 while curr_line_index < len(lines):
130 line = lines[curr_line_index]
131 for field_type in self.field_types:
132 if field_type.matches(line):
133 #
134 curr_line_index = field_type.reader(lines, curr_line_index)
135 break
136 else:
137 self._parsed_values.description.append(line)
139 curr_line_index += 1
141 return self._parsed_values_to_sections()
143 def _read_parameter(self, lines: list[str], start_index: int) -> int:
144 """Parse a parameter value.
146 Arguments:
147 lines: The docstring lines.
148 start_index: The line number to start at.
150 Returns:
151 Index at which to continue parsing.
152 """
153 parsed_directive = self._parse_directive(lines, start_index)
154 if parsed_directive.invalid:
155 return parsed_directive.next_index
157 directive_type = None
158 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004
159 # no type info
160 name = parsed_directive.directive_parts[1]
161 elif len(parsed_directive.directive_parts) == 3: # noqa: PLR2004
162 directive_type = parsed_directive.directive_parts[1]
163 name = parsed_directive.directive_parts[2]
164 else:
165 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")
166 return parsed_directive.next_index
168 if name in self._parsed_values.parameters:
169 self.errors.append(f"Duplicate parameter entry for '{name}'")
170 return parsed_directive.next_index
172 annotation = self._determine_param_annotation(name, directive_type)
173 default, kind = self._determine_param_details(name)
175 self._parsed_values.parameters[name] = Parameter(
176 name=name,
177 annotation=annotation,
178 description=parsed_directive.value,
179 default=default,
180 kind=kind,
181 )
183 return parsed_directive.next_index
185 def _determine_param_details(self, name: str) -> tuple[Any, Any]:
186 default = empty
187 kind = empty
189 if self._typed_context.signature is not None:
190 param_signature = self._typed_context.signature.parameters.get(name.lstrip("*"))
191 # an error for param_signature being none is already reported by _determine_param_annotation()
192 if param_signature is not None:
193 if param_signature.default is not empty:
194 default = param_signature.default
195 kind = param_signature.kind # type: ignore[assignment]
197 return default, kind
199 def _determine_param_annotation(self, name: str, directive_type: Optional[str]) -> Any:
200 # Annotation precedence:
201 # - signature annotation
202 # - in-line directive type
203 # - "type" directive type
204 # - empty
205 annotation = empty
207 parsed_param_type = self._parsed_values.param_types.get(name)
208 if parsed_param_type is not None:
209 annotation = parsed_param_type # type: ignore[assignment]
211 if directive_type is not None:
212 annotation = directive_type # type: ignore[assignment]
214 if directive_type is not None and parsed_param_type is not None:
215 self.error(f"Duplicate parameter information for '{name}'")
217 if self._typed_context.signature is not None:
218 try:
219 param_signature = self._typed_context.signature.parameters[name.lstrip("*")]
220 except KeyError:
221 self.error(f"No matching parameter for '{name}'")
222 else:
223 if param_signature.annotation is not empty:
224 annotation = param_signature.annotation
226 return annotation
228 def _read_parameter_type(self, lines: list[str], start_index: int) -> int:
229 """Parse a parameter type.
231 Arguments:
232 lines: The docstring lines.
233 start_index: The line number to start at.
235 Returns:
236 Index at which to continue parsing.
237 """
238 parsed_directive = self._parse_directive(lines, start_index)
239 if parsed_directive.invalid:
240 return parsed_directive.next_index
241 param_type = _consolidate_descriptive_type(parsed_directive.value.strip())
243 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004
244 param_name = parsed_directive.directive_parts[1]
245 else:
246 self.error(f"Failed to get parameter name from '{parsed_directive.line}'")
247 return parsed_directive.next_index
249 self._parsed_values.param_types[param_name] = param_type
250 param = self._parsed_values.parameters.get(param_name)
251 if param is not None:
252 if param.annotation is empty:
253 param.annotation = param_type
254 else:
255 self.error(f"Duplicate parameter information for '{param_name}'")
256 return parsed_directive.next_index
258 def _read_attribute(self, lines: list[str], start_index: int) -> int:
259 """Parse an attribute value.
261 Arguments:
262 lines: The docstring lines.
263 start_index: The line number to start at.
265 Returns:
266 Index at which to continue parsing.
267 """
268 parsed_directive = self._parse_directive(lines, start_index)
269 if parsed_directive.invalid:
270 return parsed_directive.next_index
272 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004
273 name = parsed_directive.directive_parts[1]
274 else:
275 self.error(f"Failed to parse field directive from '{parsed_directive.line}'")
276 return parsed_directive.next_index
278 annotation = empty
280 # Annotation precedence:
281 # - external context type TODO: spend time understanding where this comes from
282 # - "vartype" directive type
283 # - empty
285 parsed_attribute_type = self._parsed_values.attribute_types.get(name)
286 if parsed_attribute_type is not None:
287 annotation = parsed_attribute_type # type: ignore[assignment]
289 context_attribute_annotation = self._typed_context.attributes[name].get("annotation")
290 if context_attribute_annotation is not None:
291 annotation = context_attribute_annotation # type: ignore[assignment]
293 if name in self._parsed_values.attributes:
294 self.errors.append(f"Duplicate attribute entry for '{name}'")
295 else:
296 self._parsed_values.attributes[name] = Attribute(
297 name=name,
298 annotation=annotation,
299 description=parsed_directive.value,
300 )
302 return parsed_directive.next_index
304 def _read_attribute_type(self, lines: list[str], start_index: int) -> int:
305 """Parse a parameter type.
307 Arguments:
308 lines: The docstring lines.
309 start_index: The line number to start at.
311 Returns:
312 Index at which to continue parsing.
313 """
314 parsed_directive = self._parse_directive(lines, start_index)
315 if parsed_directive.invalid:
316 return parsed_directive.next_index
317 attribute_type = _consolidate_descriptive_type(parsed_directive.value.strip())
319 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004
320 attribute_name = parsed_directive.directive_parts[1]
321 else:
322 self.error(f"Failed to get attribute name from '{parsed_directive.line}'")
323 return parsed_directive.next_index
325 self._parsed_values.attribute_types[attribute_name] = attribute_type
326 attribute = self._parsed_values.attributes.get(attribute_name)
327 if attribute is not None:
328 if attribute.annotation is empty:
329 attribute.annotation = attribute_type
330 else:
331 self.error(f"Duplicate attribute information for '{attribute_name}'")
332 return parsed_directive.next_index
334 def _read_exception(self, lines: list[str], start_index: int) -> int:
335 """Parse an exceptions value.
337 Arguments:
338 lines: The docstring lines.
339 start_index: The line number to start at.
341 Returns:
342 A tuple containing a `Section` (or `None`) and the index at which to continue parsing.
343 """
344 parsed_directive = self._parse_directive(lines, start_index)
345 if parsed_directive.invalid:
346 return parsed_directive.next_index
348 if len(parsed_directive.directive_parts) == 2: # noqa: PLR2004
349 ex_type = parsed_directive.directive_parts[1]
350 self._parsed_values.exceptions.append(AnnotatedObject(ex_type, parsed_directive.value))
351 else:
352 self.error(f"Failed to parse exception directive from '{parsed_directive.line}'")
354 return parsed_directive.next_index
356 def _read_return(self, lines: list[str], start_index: int) -> int:
357 """Parse an return value.
359 Arguments:
360 lines: The docstring lines.
361 start_index: The line number to start at.
363 Returns:
364 Index at which to continue parsing.
365 """
366 parsed_directive = self._parse_directive(lines, start_index)
367 if parsed_directive.invalid:
368 return parsed_directive.next_index
370 annotation = empty
371 # Annotation precedence:
372 # - signature annotation
373 # - "rtype" directive type
374 # - external context type TODO: spend time understanding where this comes from
375 # - empty
376 if self._typed_context.signature is not None and self._typed_context.signature.return_annotation is not empty:
377 annotation = self._typed_context.signature.return_annotation
378 elif self._parsed_values.return_type is not None:
379 annotation = self._parsed_values.return_type # type: ignore[assignment]
380 else:
381 annotation = self._typed_context.annotation
383 self._parsed_values.return_value = AnnotatedObject(annotation, parsed_directive.value)
385 return parsed_directive.next_index
387 def _read_return_type(self, lines: list[str], start_index: int) -> int:
388 """Parse an return type value.
390 Arguments:
391 lines: The docstring lines.
392 start_index: The line number to start at.
394 Returns:
395 Index at which to continue parsing.
396 """
397 parsed_directive = self._parse_directive(lines, start_index)
398 if parsed_directive.invalid:
399 return parsed_directive.next_index
401 return_type = _consolidate_descriptive_type(parsed_directive.value.strip())
402 self._parsed_values.return_type = return_type
403 return_value = self._parsed_values.return_value
404 if return_value is not None:
405 if return_value.annotation is empty:
406 return_value.annotation = return_type
407 else:
408 self.error("Duplicate type information for return")
410 return parsed_directive.next_index
412 def _parsed_values_to_sections(self) -> list[Section]:
413 markdown_text = "\n".join(_strip_blank_lines(self._parsed_values.description))
414 result = [Section(Section.Type.MARKDOWN, markdown_text)]
415 if self._parsed_values.parameters:
416 param_values = list(self._parsed_values.parameters.values())
417 result.append(Section(Section.Type.PARAMETERS, param_values))
418 if self._parsed_values.attributes:
419 attribute_values = list(self._parsed_values.attributes.values())
420 result.append(Section(Section.Type.ATTRIBUTES, attribute_values))
421 if self._parsed_values.return_value is not None:
422 result.append(Section(Section.Type.RETURN, self._parsed_values.return_value))
423 if self._parsed_values.exceptions:
424 result.append(Section(Section.Type.EXCEPTIONS, self._parsed_values.exceptions))
425 return result
427 def _parse_directive(self, lines: list[str], start_index: int) -> ParsedDirective:
428 line, next_index = _consolidate_continuation_lines(lines, start_index)
429 try:
430 _, directive, value = line.split(":", 2)
431 except ValueError:
432 self.error(f"Failed to get ':directive: value' pair from '{line}'")
433 return ParsedDirective(line, next_index, [], "", invalid=True)
435 value = value.strip()
436 return ParsedDirective(line, next_index, directive.split(" "), value)
439def _consolidate_continuation_lines(lines: list[str], start_index: int) -> tuple[str, int]:
440 """Convert a docstring field into a single line if a line continuation exists.
442 Arguments:
443 lines: The docstring lines.
444 start_index: The line number to start at.
446 Returns:
447 A tuple containing the continued lines as a single string and the index at which to continue parsing.
448 """
449 curr_line_index = start_index
450 block = [lines[curr_line_index].lstrip()]
452 # start processing after first item
453 curr_line_index += 1
454 while curr_line_index < len(lines) and not lines[curr_line_index].startswith(":"):
455 block.append(lines[curr_line_index].lstrip())
456 curr_line_index += 1
458 return " ".join(block).rstrip("\n"), curr_line_index - 1
461def _consolidate_descriptive_type(descriptive_type: str) -> str:
462 """Convert type descriptions with "or" into respective type signature.
464 "x or None" or "None or x" -> "Optional[x]"
465 "x or x" or "x or y[ or z [...]]" -> "Union[x, y, ...]"
467 Args:
468 descriptive_type: Descriptions of an item's type.
470 Returns:
471 Type signature for descriptive type.
472 """
473 types = descriptive_type.split("or")
474 if len(types) == 1:
475 return descriptive_type
476 types = [pt.strip() for pt in types]
477 if len(types) == 2: # noqa: PLR2004
478 if types[0] == "None":
479 return f"Optional[{types[1]}]"
480 if types[1] == "None":
481 return f"Optional[{types[0]}]"
482 return f"Union[{','.join(types)}]"
485def _strip_blank_lines(lines: list[str]) -> list[str]:
486 """Remove lines with no text or only whitespace characters from the start and end of the list.
488 Args:
489 lines: Lines to be stripped.
491 Returns:
492 A list with the same contents, with any blank lines at the start or end removed.
493 """
494 if not lines:
495 return lines
497 # remove blank lines from the start and end
498 content_found = False
499 initial_content = 0
500 final_content = 0
501 for index, line in enumerate(lines):
502 if line == "" or line.isspace():
503 if not content_found:
504 initial_content += 1
505 else:
506 content_found = True
507 final_content = index
508 return lines[initial_content : final_content + 1]