Coverage for packages / griffelib / src / griffe / _internal / docstrings / auto.py: 38.24%

52 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-11 11:48 +0100

1# This module defines functions to parse docstrings by guessing their style. 

2 

3from __future__ import annotations 

4 

5import re 

6from typing import TYPE_CHECKING, Literal, TypedDict 

7 

8from griffe._internal.enumerations import Parser 

9 

10if TYPE_CHECKING: 

11 from griffe._internal.docstrings.google import GoogleOptions 

12 from griffe._internal.docstrings.models import DocstringSection 

13 from griffe._internal.docstrings.numpy import NumpyOptions 

14 from griffe._internal.docstrings.parsers import DocstringStyle 

15 from griffe._internal.docstrings.sphinx import SphinxOptions 

16 from griffe._internal.models import Docstring 

17 

18 

19# This is not our preferred order, but the safest order for proper detection 

20# using heuristics. Indeed, Google style sections sometimes appear in otherwise 

21# plain markup docstrings, which could lead to false positives. Same for Numpy 

22# sections, whose syntax is regular rST markup, and which can therefore appear 

23# in plain markup docstrings too, even more often than Google sections. 

24_default_style_order = [Parser.sphinx, Parser.google, Parser.numpy] 

25 

26 

27DocstringDetectionMethod = Literal["heuristics", "max_sections"] 

28"""The supported methods to infer docstring styles.""" 

29 

30 

31_patterns = { 

32 Parser.google: ( 

33 r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", 

34 [ 

35 "args", 

36 "arguments", 

37 "params", 

38 "parameters", 

39 "keyword args", 

40 "keyword arguments", 

41 "other args", 

42 "other arguments", 

43 "other params", 

44 "other parameters", 

45 "raises", 

46 "exceptions", 

47 "returns", 

48 "yields", 

49 "receives", 

50 "examples", 

51 "attributes", 

52 "functions", 

53 "methods", 

54 "classes", 

55 "modules", 

56 "warns", 

57 "warnings", 

58 ], 

59 ), 

60 Parser.numpy: ( 

61 r"\n[ \t]*{0}\n[ \t]*---+\n", 

62 [ 

63 "deprecated", 

64 "parameters", 

65 "other parameters", 

66 "returns", 

67 "yields", 

68 "receives", 

69 "raises", 

70 "warns", 

71 # "examples", 

72 "attributes", 

73 "functions", 

74 "methods", 

75 "classes", 

76 "modules", 

77 ], 

78 ), 

79 Parser.sphinx: ( 

80 r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", 

81 [ 

82 "param", 

83 "parameter", 

84 "arg", 

85 "argument", 

86 "key", 

87 "keyword", 

88 "type", 

89 "var", 

90 "ivar", 

91 "cvar", 

92 "vartype", 

93 "returns", 

94 "return", 

95 "rtype", 

96 "raises", 

97 "raise", 

98 "except", 

99 "exception", 

100 ], 

101 ), 

102} 

103 

104 

105class PerStyleOptions(TypedDict, total=False): 

106 """Per-style options for docstring parsing.""" 

107 

108 google: GoogleOptions 

109 """Options for Google-style docstrings.""" 

110 numpy: NumpyOptions 

111 """Options for Numpy-style docstrings.""" 

112 sphinx: SphinxOptions 

113 """Options for Sphinx-style docstrings.""" 

114 

115 

116def infer_docstring_style( 

117 docstring: Docstring, 

118 *, 

119 method: DocstringDetectionMethod = "heuristics", 

120 style_order: list[Parser] | list[DocstringStyle] | None = None, 

121 default: Parser | DocstringStyle | None = None, 

122 per_style_options: PerStyleOptions | None = None, 

123) -> tuple[Parser | None, list[DocstringSection] | None]: 

124 """Infer the parser to use for the docstring. 

125 

126 The 'heuristics' method uses regular expressions. The 'max_sections' method 

127 parses the docstring with all parsers specified in `style_order` and returns 

128 the one who parsed the most sections. 

129 

130 If heuristics fail, the `default` parser is returned. If multiple parsers 

131 parsed the same number of sections, `style_order` is used to decide which 

132 one to return. The `default` parser is never used with the 'max_sections' method. 

133 

134 Additional options are parsed to the detected parser, if any. 

135 

136 Parameters: 

137 docstring: The docstring to parse. 

138 method: The method to use to infer the parser. 

139 style_order: The order of the styles to try when inferring the parser. 

140 default: The default parser to use if the inference fails. 

141 per_style_options: Additional parsing options per style. 

142 

143 Returns: 

144 The inferred parser, and optionally parsed sections (when method is 'max_sections'). 

145 """ 

146 from griffe._internal.docstrings.parsers import parsers # noqa: PLC0415 

147 

148 per_style_options = per_style_options or {} # ty:ignore[invalid-assignment] 

149 

150 style_order = [Parser(style) if isinstance(style, str) else style for style in style_order or _default_style_order] 

151 

152 if method == "heuristics": 

153 for style in style_order: 

154 pattern, replacements = _patterns[style] 

155 patterns = [ 

156 re.compile(pattern.format(replacement), re.IGNORECASE | re.MULTILINE) for replacement in replacements 

157 ] 

158 if any(pattern.search(docstring.value) for pattern in patterns): 

159 return style, None 

160 return default if default is None or isinstance(default, Parser) else Parser(default), None 

161 

162 if method == "max_sections": 

163 style_sections = {} 

164 for style in style_order: 

165 style_sections[style] = parsers[style](docstring, **per_style_options.get(style, {})) # ty:ignore[possibly-missing-attribute] 

166 style_lengths = {style: len(section) for style, section in style_sections.items()} 

167 max_sections = max(style_lengths.values()) 

168 for style in style_order: 

169 if style_lengths[style] == max_sections: 

170 return style, style_sections[style] 

171 

172 raise ValueError(f"Invalid method '{method}'.") 

173 

174 

175class AutoOptions(TypedDict, total=False): 

176 """Options for Auto-style docstrings.""" 

177 

178 method: DocstringDetectionMethod 

179 """The method to use to infer the parser.""" 

180 style_order: list[Parser] | list[DocstringStyle] | None 

181 """The order of styles to try when inferring the parser.""" 

182 default: Parser | DocstringStyle | None 

183 """The default parser to use if the inference fails.""" 

184 per_style_options: PerStyleOptions | None 

185 """Additional parsing options per style.""" 

186 

187 

188def parse_auto( 

189 docstring: Docstring, 

190 *, 

191 method: DocstringDetectionMethod = "heuristics", 

192 style_order: list[Parser] | list[DocstringStyle] | None = None, 

193 default: Parser | DocstringStyle | None = None, 

194 per_style_options: PerStyleOptions | None = None, 

195) -> list[DocstringSection]: 

196 """Parse a docstring by automatically detecting the style it uses. 

197 

198 See [`infer_docstring_style`][griffe.infer_docstring_style] for more information 

199 on the available parameters. 

200 

201 Parameters: 

202 docstring: The docstring to parse. 

203 method: The method to use to infer the parser. 

204 style_order: The order of the styles to try when inferring the parser. 

205 default: The default parser to use if the inference fails. 

206 per_style_options: Additional parsing options per style. 

207 

208 Returns: 

209 A list of docstring sections. 

210 """ 

211 from griffe._internal.docstrings.parsers import parse # noqa: PLC0415 

212 

213 per_style_options = per_style_options or {} # ty:ignore[invalid-assignment] 

214 

215 style, sections = infer_docstring_style( 

216 docstring, 

217 method=method, 

218 style_order=style_order, 

219 default=default, 

220 per_style_options=per_style_options, 

221 ) 

222 if sections is None: 

223 return parse(docstring, style, **per_style_options.get(style, {})) # ty:ignore[no-matching-overload, possibly-missing-attribute] 

224 return sections