Coverage for src/pytkdocs/parsers/docstrings/google.py: 91.82%

265 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-09 18:24 +0100

1"""This module defines functions and classes to parse docstrings into structured data.""" 

2 

3import inspect 

4import re 

5from re import Pattern 

6from typing import Any, Optional 

7 

8from pytkdocs.parsers.docstrings.base import AnnotatedObject, Attribute, Parameter, Parser, Section, empty 

9 

10SECTIONS_TITLES = { 

11 "args:": Section.Type.PARAMETERS, 

12 "arguments:": Section.Type.PARAMETERS, 

13 "params:": Section.Type.PARAMETERS, 

14 "parameters:": Section.Type.PARAMETERS, 

15 "keyword args:": Section.Type.KEYWORD_ARGS, 

16 "keyword arguments:": Section.Type.KEYWORD_ARGS, 

17 "raise:": Section.Type.EXCEPTIONS, 

18 "raises:": Section.Type.EXCEPTIONS, 

19 "except:": Section.Type.EXCEPTIONS, 

20 "exceptions:": Section.Type.EXCEPTIONS, 

21 "return:": Section.Type.RETURN, 

22 "returns:": Section.Type.RETURN, 

23 "yield:": Section.Type.YIELD, 

24 "yields:": Section.Type.YIELD, 

25 "example:": Section.Type.EXAMPLES, 

26 "examples:": Section.Type.EXAMPLES, 

27 "attribute:": Section.Type.ATTRIBUTES, 

28 "attributes:": Section.Type.ATTRIBUTES, 

29} 

30 

31RE_GOOGLE_STYLE_ADMONITION: Pattern = re.compile(r"^(?P<indent>\s*)(?P<type>[\w-]+):((?:\s+)(?P<title>.+))?$") 

32"""Regular expressions to match lines starting admonitions, of the form `TYPE: [TITLE]`.""" 

33RE_DOCTEST_BLANKLINE: Pattern = re.compile(r"^\s*<BLANKLINE>\s*$") 

34"""Regular expression to match lines of the form `<BLANKLINE>`.""" 

35RE_DOCTEST_FLAGS: Pattern = re.compile(r"(\s*#\s*doctest:.+)$") 

36"""Regular expression to match lines containing doctest flags of the form `# doctest: +FLAG`.""" 

37 

38 

39class Google(Parser): 

40 """A Google-style docstrings parser.""" 

41 

42 def __init__(self, replace_admonitions: bool = True, trim_doctest_flags: bool = True, **kwargs: Any) -> None: # noqa: FBT001, FBT002, ARG002 

43 """Initialize the object. 

44 

45 Arguments: 

46 replace_admonitions: Whether to replace admonitions by their Markdown equivalent. 

47 trim_doctest_flags: Whether to remove doctest flags. 

48 """ 

49 super().__init__() 

50 self.replace_admonitions = replace_admonitions 

51 self.trim_doctest_flags = trim_doctest_flags 

52 self.section_reader = { 

53 Section.Type.PARAMETERS: self.read_parameters_section, 

54 Section.Type.KEYWORD_ARGS: self.read_keyword_arguments_section, 

55 Section.Type.EXCEPTIONS: self.read_exceptions_section, 

56 Section.Type.EXAMPLES: self.read_examples_section, 

57 Section.Type.ATTRIBUTES: self.read_attributes_section, 

58 Section.Type.RETURN: self.read_return_section, 

59 Section.Type.YIELD: self.read_yield_section, 

60 } 

61 

62 def parse_sections(self, docstring: str) -> list[Section]: # noqa: D102 

63 if "signature" not in self.context: 

64 self.context["signature"] = getattr(self.context["obj"], "signature", None) 

65 if "annotation" not in self.context: 65 ↛ 67line 65 didn't jump to line 67 because the condition on line 65 was always true

66 self.context["annotation"] = getattr(self.context["obj"], "type", empty) 

67 if "attributes" not in self.context: 

68 self.context["attributes"] = {} 

69 

70 sections = [] 

71 current_section = [] 

72 

73 in_code_block = False 

74 

75 lines = docstring.split("\n") 

76 i = 0 

77 

78 while i < len(lines): 

79 line_lower = lines[i].lower() 

80 

81 if in_code_block: 

82 if line_lower.lstrip(" ").startswith("```"): 

83 in_code_block = False 

84 current_section.append(lines[i]) 

85 

86 elif line_lower in SECTIONS_TITLES: 

87 if current_section: 

88 if any(current_section): 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true

89 sections.append(Section(Section.Type.MARKDOWN, "\n".join(current_section))) 

90 current_section = [] 

91 section_reader = self.section_reader[SECTIONS_TITLES[line_lower]] 

92 section, i = section_reader(lines, i + 1) 

93 if section: 

94 sections.append(section) 

95 

96 elif line_lower.lstrip(" ").startswith("```"): 

97 in_code_block = True 

98 current_section.append(lines[i]) 

99 

100 else: 

101 if self.replace_admonitions and not in_code_block and i + 1 < len(lines): 

102 match = RE_GOOGLE_STYLE_ADMONITION.match(lines[i]) 

103 if match: 

104 groups = match.groupdict() 

105 indent = groups["indent"] 

106 if lines[i + 1].startswith(indent + " " * 4): 

107 lines[i] = f"{indent}!!! {groups['type'].lower()}" 

108 if groups["title"]: 

109 lines[i] += f' "{groups["title"]}"' 

110 current_section.append(lines[i]) 

111 

112 i += 1 

113 

114 if current_section: 

115 sections.append(Section(Section.Type.MARKDOWN, "\n".join(current_section))) 

116 

117 return sections 

118 

119 def read_block_items(self, lines: list[str], start_index: int) -> tuple[list[str], int]: 

120 """Parse an indented block as a list of items. 

121 

122 The first indentation level is used as a reference to determine if the next lines are new items 

123 or continuation lines. 

124 

125 Arguments: 

126 lines: The block lines. 

127 start_index: The line number to start at. 

128 

129 Returns: 

130 A tuple containing the list of concatenated lines and the index at which to continue parsing. 

131 """ 

132 if start_index >= len(lines): 132 ↛ 133line 132 didn't jump to line 133 because the condition on line 132 was never true

133 return [], start_index 

134 

135 i = start_index 

136 items: list[str] = [] 

137 

138 # skip first empty lines 

139 while is_empty_line(lines[i]): 

140 i += 1 

141 

142 # get initial indent 

143 indent = len(lines[i]) - len(lines[i].lstrip()) 

144 

145 if indent == 0: 

146 # first non-empty line was not indented, abort 

147 return [], i - 1 

148 

149 # start processing first item 

150 current_item = [lines[i][indent:]] 

151 i += 1 

152 

153 # loop on next lines 

154 while i < len(lines): 

155 line = lines[i] 

156 

157 if line.startswith(indent * 2 * " "): 

158 # continuation line 

159 current_item.append(line[indent * 2 :]) 

160 

161 elif line.startswith((indent + 1) * " "): 

162 # indent between initial and continuation: append but add error 

163 cont_indent = len(line) - len(line.lstrip()) 

164 current_item.append(line[cont_indent:]) 

165 self.error( 

166 f"Confusing indentation for continuation line {i + 1} in docstring, " 

167 f"should be {indent} * 2 = {indent * 2} spaces, not {cont_indent}", 

168 ) 

169 

170 elif line.startswith(indent * " "): 

171 # indent equal to initial one: new item 

172 items.append("\n".join(current_item)) 

173 current_item = [line[indent:]] 

174 

175 elif is_empty_line(line): 

176 # empty line: preserve it in the current item 

177 current_item.append("") 

178 

179 else: 

180 # indent lower than initial one: end of section 

181 break 

182 

183 i += 1 

184 

185 if current_item: 185 ↛ 188line 185 didn't jump to line 188 because the condition on line 185 was always true

186 items.append("\n".join(current_item).rstrip("\n")) 

187 

188 return items, i - 1 

189 

190 def read_block(self, lines: list[str], start_index: int) -> tuple[str, int]: 

191 """Parse an indented block. 

192 

193 Arguments: 

194 lines: The block lines. 

195 start_index: The line number to start at. 

196 

197 Returns: 

198 A tuple containing the list of lines and the index at which to continue parsing. 

199 """ 

200 if start_index >= len(lines): 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true

201 return "", start_index 

202 

203 i = start_index 

204 block: list[str] = [] 

205 

206 # skip first empty lines 

207 while is_empty_line(lines[i]): 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 i += 1 

209 

210 # get initial indent 

211 indent = len(lines[i]) - len(lines[i].lstrip()) 

212 

213 if indent == 0: 

214 # first non-empty line was not indented, abort 

215 return "", i - 1 

216 

217 # start processing first item 

218 block.append(lines[i].lstrip()) 

219 i += 1 

220 

221 # loop on next lines 

222 while i < len(lines) and (lines[i].startswith(indent * " ") or is_empty_line(lines[i])): 

223 block.append(lines[i][indent:]) 

224 i += 1 

225 

226 return "\n".join(block).rstrip("\n"), i - 1 

227 

228 def _parse_parameters_section(self, lines: list[str], start_index: int) -> tuple[list[Parameter], int]: 

229 """Parse a "parameters" or "keyword args" section. 

230 

231 Arguments: 

232 lines: The parameters block lines. 

233 start_index: The line number to start at. 

234 

235 Returns: 

236 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

237 """ 

238 parameters = [] 

239 type_: Any 

240 block, i = self.read_block_items(lines, start_index) 

241 

242 for param_line in block: 

243 # Check that there is an annotation in the docstring 

244 try: 

245 name_with_type, description = param_line.split(":", 1) 

246 except ValueError: 

247 self.error(f"Failed to get 'name: description' pair from '{param_line}'") 

248 continue 

249 

250 # Setting defaults 

251 default = empty 

252 annotation = empty 

253 kind = None 

254 # Can only get description from docstring - keep if no type was given 

255 description = description.lstrip() 

256 

257 # If we have managed to find a type in the docstring use this 

258 if " " in name_with_type: 

259 name, type_ = name_with_type.split(" ", 1) 

260 annotation = type_.strip("()") 

261 if annotation.endswith(", optional"): # type: ignore[attr-defined] 

262 annotation = annotation[:-10] # type: ignore[misc] 

263 # Otherwise try to use the signature as `annotation` would still be empty 

264 else: 

265 name = name_with_type 

266 

267 # Check in the signature to get extra details 

268 try: 

269 signature_param = self.context["signature"].parameters[name.lstrip("*")] 

270 except (AttributeError, KeyError): 

271 if annotation is empty: 

272 self.error(f"No type annotation for parameter '{name}'") 

273 else: 

274 if annotation is empty: 

275 annotation = signature_param.annotation 

276 # If signature_param.X are empty it doesnt matter as defaults are empty anyway 

277 default = signature_param.default 

278 kind = signature_param.kind 

279 

280 parameters.append( 

281 Parameter(name=name, annotation=annotation, description=description, default=default, kind=kind), 

282 ) 

283 

284 return parameters, i 

285 

286 def read_parameters_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

287 """Parse a "parameters" section. 

288 

289 Arguments: 

290 lines: The parameters block lines. 

291 start_index: The line number to start at. 

292 

293 Returns: 

294 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

295 """ 

296 parameters, i = self._parse_parameters_section(lines, start_index) 

297 

298 if parameters: 

299 return Section(Section.Type.PARAMETERS, parameters), i 

300 

301 self.error(f"Empty parameters section at line {start_index}") 

302 return None, i 

303 

304 def read_keyword_arguments_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

305 """Parse a "keyword arguments" section. 

306 

307 Arguments: 

308 lines: The parameters block lines. 

309 start_index: The line number to start at. 

310 

311 Returns: 

312 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

313 """ 

314 parameters, i = self._parse_parameters_section(lines, start_index) 

315 for parameter in parameters: 

316 parameter.kind = inspect.Parameter.KEYWORD_ONLY 

317 

318 if parameters: 

319 return Section(Section.Type.KEYWORD_ARGS, parameters), i 

320 

321 self.error(f"Empty keyword arguments section at line {start_index}") 

322 return None, i 

323 

324 def read_attributes_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

325 """Parse an "attributes" section. 

326 

327 Arguments: 

328 lines: The parameters block lines. 

329 start_index: The line number to start at. 

330 

331 Returns: 

332 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

333 """ 

334 attributes = [] 

335 block, i = self.read_block_items(lines, start_index) 

336 

337 for attr_line in block: 

338 try: 

339 name_with_type, description = attr_line.split(":", 1) 

340 except ValueError: 

341 self.error(f"Failed to get 'name: description' pair from '{attr_line}'") 

342 continue 

343 

344 description = description.lstrip() 

345 

346 if " " in name_with_type: 

347 name, annotation = name_with_type.split(" ", 1) 

348 annotation = annotation.strip("()") 

349 if annotation.endswith(", optional"): 349 ↛ 350line 349 didn't jump to line 350 because the condition on line 349 was never true

350 annotation = annotation[:-10] 

351 else: 

352 name = name_with_type 

353 annotation = self.context["attributes"].get(name, {}).get("annotation", empty) 

354 

355 attributes.append(Attribute(name=name, annotation=annotation, description=description)) 

356 

357 if attributes: 357 ↛ 360line 357 didn't jump to line 360 because the condition on line 357 was always true

358 return Section(Section.Type.ATTRIBUTES, attributes), i 

359 

360 self.error(f"Empty attributes section at line {start_index}") 

361 return None, i 

362 

363 def read_exceptions_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

364 """Parse an "exceptions" section. 

365 

366 Arguments: 

367 lines: The exceptions block lines. 

368 start_index: The line number to start at. 

369 

370 Returns: 

371 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

372 """ 

373 exceptions = [] 

374 block, i = self.read_block_items(lines, start_index) 

375 

376 for exception_line in block: 

377 try: 

378 annotation, description = exception_line.split(": ", 1) 

379 except ValueError: 

380 self.error(f"Failed to get 'exception: description' pair from '{exception_line}'") 

381 else: 

382 exceptions.append(AnnotatedObject(annotation, description.lstrip(" "))) 

383 

384 if exceptions: 

385 return Section(Section.Type.EXCEPTIONS, exceptions), i 

386 

387 self.error(f"Empty exceptions section at line {start_index}") 

388 return None, i 

389 

390 def read_return_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

391 """Parse an "returns" section. 

392 

393 Arguments: 

394 lines: The return block lines. 

395 start_index: The line number to start at. 

396 

397 Returns: 

398 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

399 """ 

400 text, i = self.read_block(lines, start_index) 

401 

402 # Early exit if there is no text in the return section 

403 if not text: 

404 self.error(f"Empty return section at line {start_index}") 

405 return None, i 

406 

407 # First try to get the annotation and description from the docstring 

408 try: 

409 type_, text = text.split(":", 1) 

410 except ValueError: 

411 description = text 

412 annotation = self.context["annotation"] 

413 # If there was no annotation in the docstring then move to signature 

414 if annotation is empty and self.context["signature"]: 

415 annotation = self.context["signature"].return_annotation 

416 else: 

417 annotation = type_.lstrip() 

418 description = text.lstrip() 

419 

420 # There was no type in the docstring and no annotation 

421 if annotation is empty: 

422 self.error("No return type/annotation in docstring/signature") 

423 

424 return Section(Section.Type.RETURN, AnnotatedObject(annotation, description)), i 

425 

426 def read_yield_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

427 """Parse a "yields" section. 

428 

429 Arguments: 

430 lines: The return block lines. 

431 start_index: The line number to start at. 

432 

433 Returns: 

434 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

435 """ 

436 text, i = self.read_block(lines, start_index) 

437 

438 # Early exit if there is no text in the yield section 

439 if not text: 439 ↛ 440line 439 didn't jump to line 440 because the condition on line 439 was never true

440 self.error(f"Empty yield section at line {start_index}") 

441 return None, i 

442 

443 # First try to get the annotation and description from the docstring 

444 try: 

445 type_, text = text.split(":", 1) 

446 except ValueError: 

447 description = text 

448 annotation = self.context["annotation"] 

449 # If there was no annotation in the docstring then move to signature 

450 if annotation is empty and self.context["signature"]: 450 ↛ 457line 450 didn't jump to line 457 because the condition on line 450 was always true

451 annotation = self.context["signature"].return_annotation 

452 else: 

453 annotation = type_.lstrip() 

454 description = text.lstrip() 

455 

456 # There was no type in the docstring and no annotation 

457 if annotation is empty: 457 ↛ 458line 457 didn't jump to line 458 because the condition on line 457 was never true

458 self.error("No yield type/annotation in docstring/signature") 

459 

460 return Section(Section.Type.YIELD, AnnotatedObject(annotation, description)), i 

461 

462 def read_examples_section(self, lines: list[str], start_index: int) -> tuple[Optional[Section], int]: 

463 """Parse an "examples" section. 

464 

465 Arguments: 

466 lines: The examples block lines. 

467 start_index: The line number to start at. 

468 

469 Returns: 

470 A tuple containing a `Section` (or `None`) and the index at which to continue parsing. 

471 """ 

472 text, i = self.read_block(lines, start_index) 

473 

474 sub_sections = [] 

475 in_code_example = False 

476 in_code_block = False 

477 current_text: list[str] = [] 

478 current_example: list[str] = [] 

479 

480 for line in text.split("\n"): 

481 if is_empty_line(line): 

482 if in_code_example: 

483 if current_example: 483 ↛ 486line 483 didn't jump to line 486 because the condition on line 483 was always true

484 sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example))) 

485 current_example = [] 

486 in_code_example = False 

487 else: 

488 current_text.append(line) 

489 

490 elif in_code_example: 

491 if self.trim_doctest_flags: 

492 line = RE_DOCTEST_FLAGS.sub("", line) # noqa: PLW2901 

493 line = RE_DOCTEST_BLANKLINE.sub("", line) # noqa: PLW2901 

494 current_example.append(line) 

495 

496 elif line.startswith("```"): 

497 in_code_block = not in_code_block 

498 current_text.append(line) 

499 

500 elif in_code_block: 

501 current_text.append(line) 

502 

503 elif line.startswith(">>>"): 

504 if current_text: 

505 sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text))) 

506 current_text = [] 

507 in_code_example = True 

508 

509 if self.trim_doctest_flags: 

510 line = RE_DOCTEST_FLAGS.sub("", line) # noqa: PLW2901 

511 current_example.append(line) 

512 

513 else: 

514 current_text.append(line) 

515 

516 if current_text: 516 ↛ 517line 516 didn't jump to line 517 because the condition on line 516 was never true

517 sub_sections.append((Section.Type.MARKDOWN, "\n".join(current_text))) 

518 elif current_example: 518 ↛ 521line 518 didn't jump to line 521 because the condition on line 518 was always true

519 sub_sections.append((Section.Type.EXAMPLES, "\n".join(current_example))) 

520 

521 if sub_sections: 521 ↛ 524line 521 didn't jump to line 524 because the condition on line 521 was always true

522 return Section(Section.Type.EXAMPLES, sub_sections), i 

523 

524 self.error(f"Empty examples section at line {start_index}") 

525 return None, i 

526 

527 

528def is_empty_line(line: str) -> bool: 

529 """Tell if a line is empty. 

530 

531 Arguments: 

532 line: The line to check. 

533 

534 Returns: 

535 True if the line is empty or composed of blanks only, False otherwise. 

536 """ 

537 return not line.strip()