Coverage for src/griffe/_internal/docstrings/parsers.py: 27.96%

61 statements  

« prev     ^ index     » next       coverage.py v7.10.2, created at 2025-08-11 13:44 +0200

1# This module imports all the defined parsers 

2# and provides a generic function to parse docstrings. 

3 

4from __future__ import annotations 

5 

6import re 

7from typing import TYPE_CHECKING, Any, Callable, Literal 

8from warnings import warn 

9 

10from griffe._internal.docstrings.google import parse_google 

11from griffe._internal.docstrings.models import DocstringSection, DocstringSectionText 

12from griffe._internal.docstrings.numpy import parse_numpy 

13from griffe._internal.docstrings.sphinx import parse_sphinx 

14from griffe._internal.enumerations import Parser 

15 

16if TYPE_CHECKING: 

17 from griffe._internal.models import Docstring 

18 

19 

20# This is not our preferred order, but the safest order for proper detection 

21# using heuristics. Indeed, Google style sections sometimes appear in otherwise 

22# plain markup docstrings, which could lead to false positives. Same for Numpy 

23# sections, whose syntax is regular rST markup, and which can therefore appear 

24# in plain markup docstrings too, even more often than Google sections. 

25_default_style_order = [Parser.sphinx, Parser.google, Parser.numpy] 

26 

27 

28DocstringStyle = Literal["google", "numpy", "sphinx", "auto"] 

29"""The supported docstring styles (literal values of the Parser enumeration).""" 

30DocstringDetectionMethod = Literal["heuristics", "max_sections"] 

31"""The supported methods to infer docstring styles.""" 

32 

33_patterns = { 

34 Parser.google: ( 

35 r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", 

36 [ 

37 "args", 

38 "arguments", 

39 "params", 

40 "parameters", 

41 "keyword args", 

42 "keyword arguments", 

43 "other args", 

44 "other arguments", 

45 "other params", 

46 "other parameters", 

47 "raises", 

48 "exceptions", 

49 "returns", 

50 "yields", 

51 "receives", 

52 "examples", 

53 "attributes", 

54 "functions", 

55 "methods", 

56 "classes", 

57 "modules", 

58 "warns", 

59 "warnings", 

60 ], 

61 ), 

62 Parser.numpy: ( 

63 r"\n[ \t]*{0}\n[ \t]*---+\n", 

64 [ 

65 "deprecated", 

66 "parameters", 

67 "other parameters", 

68 "returns", 

69 "yields", 

70 "receives", 

71 "raises", 

72 "warns", 

73 # "examples", 

74 "attributes", 

75 "functions", 

76 "methods", 

77 "classes", 

78 "modules", 

79 ], 

80 ), 

81 Parser.sphinx: ( 

82 r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", 

83 [ 

84 "param", 

85 "parameter", 

86 "arg", 

87 "argument", 

88 "key", 

89 "keyword", 

90 "type", 

91 "var", 

92 "ivar", 

93 "cvar", 

94 "vartype", 

95 "returns", 

96 "return", 

97 "rtype", 

98 "raises", 

99 "raise", 

100 "except", 

101 "exception", 

102 ], 

103 ), 

104} 

105 

106 

107def infer_docstring_style( 

108 docstring: Docstring, 

109 *, 

110 method: DocstringDetectionMethod = "heuristics", 

111 style_order: list[Parser] | list[DocstringStyle] | None = None, 

112 default: Parser | DocstringStyle | None = None, 

113 per_style_options: dict[Parser | DocstringStyle, dict[str, Any]] | None = None, 

114 **options: Any, 

115) -> tuple[Parser | None, list[DocstringSection] | None]: 

116 """Infer the parser to use for the docstring. 

117 

118 [:octicons-heart-fill-24:{ .pulse } Sponsors only](../../../insiders/index.md){ .insiders } — 

119 [:octicons-tag-24: Insiders 1.3.0](../../../insiders/changelog.md#1.3.0). 

120 

121 The 'heuristics' method uses regular expressions. The 'max_sections' method 

122 parses the docstring with all parsers specified in `style_order` and returns 

123 the one who parsed the most sections. 

124 

125 If heuristics fail, the `default` parser is returned. If multiple parsers 

126 parsed the same number of sections, `style_order` is used to decide which 

127 one to return. The `default` parser is never used with the 'max_sections' method. 

128 

129 For non-Insiders versions, `default` is returned if specified, else the first 

130 parser in `style_order` is returned. If `style_order` is not specified, 

131 `None` is returned. 

132 

133 Additional options are parsed to the detected parser, if any. 

134 

135 Parameters: 

136 docstring: The docstring to parse. 

137 method: The method to use to infer the parser. 

138 style_order: The order of the styles to try when inferring the parser. 

139 default: The default parser to use if the inference fails. 

140 per_style_options: Additional parsing options per style. 

141 **options: Deprecated. Use `per_style_options` instead. 

142 

143 Returns: 

144 The inferred parser, and optionally parsed sections (when method is 'max_sections'). 

145 """ 

146 # YORE: Bump 2: Replace block with `per_style_options = per_style_options or {}`. 

147 if options: 

148 if per_style_options: 

149 raise ValueError("Cannot use both `options` and `per_style_options`.") 

150 warn("`**options` is deprecated. Use `per_style_options` instead.", DeprecationWarning, stacklevel=2) 

151 per_style_options = {"google": options, "numpy": options, "sphinx": options} 

152 elif not per_style_options: 

153 per_style_options = {} 

154 

155 style_order = [Parser(style) if isinstance(style, str) else style for style in style_order or _default_style_order] 

156 

157 if method == "heuristics": 

158 for style in style_order: 

159 pattern, replacements = _patterns[style] 

160 patterns = [ 

161 re.compile(pattern.format(replacement), re.IGNORECASE | re.MULTILINE) for replacement in replacements 

162 ] 

163 if any(pattern.search(docstring.value) for pattern in patterns): 

164 return style, None 

165 return default if default is None or isinstance(default, Parser) else Parser(default), None 

166 

167 if method == "max_sections": 

168 style_sections = {} 

169 for style in style_order: 

170 style_sections[style] = parsers[style](docstring, **per_style_options.get(style, {})) 

171 style_lengths = {style: len(section) for style, section in style_sections.items()} 

172 max_sections = max(style_lengths.values()) 

173 for style in style_order: 

174 if style_lengths[style] == max_sections: 

175 return style, style_sections[style] 

176 

177 raise ValueError(f"Invalid method '{method}'.") 

178 

179 

180def parse_auto( 

181 docstring: Docstring, 

182 *, 

183 method: DocstringDetectionMethod = "heuristics", 

184 style_order: list[Parser] | list[DocstringStyle] | None = None, 

185 default: Parser | DocstringStyle | None = None, 

186 per_style_options: dict[Parser | DocstringStyle, dict[str, Any]] | None = None, 

187 **options: Any, 

188) -> list[DocstringSection]: 

189 """Parse a docstring by automatically detecting the style it uses. 

190 

191 [:octicons-heart-fill-24:{ .pulse } Sponsors only](../../../insiders/index.md){ .insiders } — 

192 [:octicons-tag-24: Insiders 1.3.0](../../../insiders/changelog.md#1.3.0). 

193 

194 See [`infer_docstring_style`][griffe.infer_docstring_style] for more information 

195 on the available parameters. 

196 

197 Parameters: 

198 docstring: The docstring to parse. 

199 method: The method to use to infer the parser. 

200 style_order: The order of the styles to try when inferring the parser. 

201 default: The default parser to use if the inference fails. 

202 per_style_options: Additional parsing options per style. 

203 **options: Deprecated. Use `per_style_options` instead. 

204 

205 Returns: 

206 A list of docstring sections. 

207 """ 

208 # YORE: Bump 2: Replace block with `per_style_options = per_style_options or {}`. 

209 if options: 

210 if per_style_options: 

211 raise ValueError("Cannot use both `options` and `per_style_options`.") 

212 warn("`**options` are deprecated. Use `per_style_options` instead.", DeprecationWarning, stacklevel=2) 

213 per_style_options = {"google": options, "numpy": options, "sphinx": options} 

214 elif not per_style_options: 

215 per_style_options = {} 

216 

217 per_style_options = {Parser(style): options for style, options in per_style_options.items()} 

218 

219 style, sections = infer_docstring_style( 

220 docstring, 

221 method=method, 

222 style_order=style_order, 

223 default=default, 

224 per_style_options=per_style_options, 

225 ) 

226 if sections is None: 

227 return parse(docstring, style, **per_style_options.get(style, {})) # type: ignore[arg-type] 

228 return sections 

229 

230 

231parsers: dict[Parser, Callable[[Docstring], list[DocstringSection]]] = { 

232 Parser.auto: parse_auto, 

233 Parser.google: parse_google, 

234 Parser.sphinx: parse_sphinx, 

235 Parser.numpy: parse_numpy, 

236} 

237 

238 

239def parse( 

240 docstring: Docstring, 

241 parser: DocstringStyle | Parser | None, 

242 **options: Any, 

243) -> list[DocstringSection]: 

244 """Parse the docstring. 

245 

246 Parameters: 

247 docstring: The docstring to parse. 

248 parser: The docstring parser to use. If None, return a single text section. 

249 **options: The options accepted by the parser. 

250 

251 Returns: 

252 A list of docstring sections. 

253 """ 

254 if parser: 

255 if not isinstance(parser, Parser): 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true

256 parser = Parser(parser) 

257 return parsers[parser](docstring, **options) 

258 return [DocstringSectionText(docstring.value)]