Coverage for src / lilbee / languages.py: 100%

2 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-16 08:27 +0000

1"""Language data for tree-sitter code chunking. 

2 

3tree-sitter-language-pack provides only ``get_parser(language_name)`` — it has no 

4extension-to-language mapping or per-language definition-type metadata. These dicts 

5must be maintained manually when adding support for new languages. 

6""" 

7 

8# Extension -> tree-sitter language name. 

9# Languages without DEFINITION_TYPES entries fall back to token-based chunking. 

10EXT_TO_LANG: dict[str, str] = { 

11 # Systems / compiled 

12 ".c": "c", 

13 ".h": "c", 

14 ".cpp": "cpp", 

15 ".cxx": "cpp", 

16 ".cc": "cpp", 

17 ".hpp": "cpp", 

18 ".hxx": "cpp", 

19 ".cs": "csharp", 

20 ".d": "d", 

21 ".go": "go", 

22 ".java": "java", 

23 ".kt": "kotlin", 

24 ".kts": "kotlin", 

25 ".m": "objc", 

26 ".rs": "rust", 

27 ".scala": "scala", 

28 ".swift": "swift", 

29 ".zig": "zig", 

30 ".v": "v", 

31 ".odin": "odin", 

32 ".hare": "hare", 

33 ".nim": "nim", 

34 ".ada": "ada", 

35 ".adb": "ada", 

36 ".ads": "ada", 

37 ".f90": "fortran", 

38 ".f95": "fortran", 

39 ".f03": "fortran", 

40 ".f": "fortran", 

41 ".pas": "pascal", 

42 ".cobol": "cobol", 

43 ".cob": "cobol", 

44 ".cbl": "cobol", 

45 ".vhdl": "vhdl", 

46 ".vhd": "vhdl", 

47 ".sv": "verilog", 

48 ".svh": "verilog", 

49 ".verilog": "verilog", 

50 # Scripting / dynamic 

51 ".py": "python", 

52 ".pyi": "python", 

53 ".js": "javascript", 

54 ".jsx": "javascript", 

55 ".mjs": "javascript", 

56 ".cjs": "javascript", 

57 ".ts": "typescript", 

58 ".tsx": "tsx", 

59 ".rb": "ruby", 

60 ".php": "php", 

61 ".lua": "lua", 

62 ".luau": "luau", 

63 ".pl": "perl", 

64 ".pm": "perl", 

65 ".r": "r", 

66 ".R": "r", 

67 ".jl": "julia", 

68 ".ex": "elixir", 

69 ".exs": "elixir", 

70 ".erl": "erlang", 

71 ".hrl": "erlang", 

72 ".clj": "clojure", 

73 ".cljs": "clojure", 

74 ".cljc": "clojure", 

75 ".ml": "ocaml", 

76 ".mli": "ocaml_interface", 

77 ".hs": "haskell", 

78 ".fs": "fsharp", 

79 ".fsi": "fsharp_signature", 

80 ".fsx": "fsharp", 

81 ".elm": "elm", 

82 ".purs": "purescript", 

83 ".rkt": "racket", 

84 ".scm": "scheme", 

85 ".el": "elisp", 

86 ".lisp": "commonlisp", 

87 ".cl": "commonlisp", 

88 ".fnl": "fennel", 

89 ".janet": "janet", 

90 ".dart": "dart", 

91 ".gd": "gdscript", 

92 ".gleam": "gleam", 

93 ".groovy": "groovy", 

94 ".gradle": "groovy", 

95 ".tcl": "tcl", 

96 ".fish": "fish", 

97 ".ps1": "powershell", 

98 ".psm1": "powershell", 

99 ".psd1": "powershell", 

100 ".matlab": "matlab", 

101 ".pony": "pony", 

102 ".hack": "hack", 

103 ".hx": "haxe", 

104 ".squirrel": "squirrel", 

105 ".nut": "squirrel", 

106 ".nix": "nix", 

107 ".star": "starlark", 

108 ".bzl": "starlark", 

109 ".smali": "smali", 

110 # Shell 

111 ".sh": "bash", 

112 ".bash": "bash", 

113 ".zsh": "bash", 

114 # Web / markup 

115 ".html": "html", 

116 ".htm": "html", 

117 ".xml": "xml", 

118 ".xsl": "xml", 

119 ".xslt": "xml", 

120 ".css": "css", 

121 ".scss": "scss", 

122 ".vue": "vue", 

123 ".svelte": "svelte", 

124 ".astro": "astro", 

125 ".twig": "twig", 

126 ".md": "markdown", 

127 ".markdown": "markdown", 

128 # Functional / blockchain / smart contracts 

129 ".sol": "solidity", 

130 ".cairo": "cairo", 

131 ".fc": "func", 

132 ".clar": "clarity", 

133 ".rego": "rego", 

134 # Data / config 

135 ".json": "json", 

136 ".jsonnet": "jsonnet", 

137 ".libsonnet": "jsonnet", 

138 ".yaml": "yaml", 

139 ".yml": "yaml", 

140 ".toml": "toml", 

141 ".ini": "ini", 

142 ".cfg": "ini", 

143 ".properties": "properties", 

144 ".ron": "ron", 

145 ".kdl": "kdl", 

146 ".hcl": "hcl", 

147 ".tf": "terraform", 

148 ".tfvars": "terraform", 

149 ".graphql": "graphql", 

150 ".gql": "graphql", 

151 ".proto": "proto", 

152 ".thrift": "thrift", 

153 ".capnp": "capnp", 

154 ".smithy": "smithy", 

155 ".prisma": "prisma", 

156 ".beancount": "beancount", 

157 ".sql": "sql", 

158 ".sparql": "sparql", 

159 # Build / CI 

160 ".cmake": "cmake", 

161 ".ninja": "ninja", 

162 ".meson": "meson", 

163 ".gn": "gn", 

164 ".pp": "puppet", 

165 ".tex": "latex", 

166 ".bib": "bibtex", 

167 ".typst": "typst", 

168 # HDL / embedded 

169 ".cuda": "cuda", 

170 ".cu": "cuda", 

171 ".glsl": "glsl", 

172 ".hlsl": "hlsl", 

173 ".wgsl": "wgsl", 

174 ".ispc": "ispc", 

175 ".s": "asm", 

176 ".asm": "asm", 

177 ".ll": "llvm", 

178 ".lds": "linkerscript", 

179 ".wat": "wat", 

180 ".wast": "wast", 

181 # Docker / infra 

182 ".dockerfile": "dockerfile", 

183 ".bicep": "bicep", 

184} 

185 

186# AST node types that represent extractable definitions, per language. 

187DEFINITION_TYPES: dict[str, frozenset[str]] = { 

188 "python": frozenset( 

189 { 

190 "function_definition", 

191 "class_definition", 

192 "decorated_definition", 

193 } 

194 ), 

195 "javascript": frozenset( 

196 { 

197 "function_declaration", 

198 "class_declaration", 

199 "export_statement", 

200 "lexical_declaration", 

201 } 

202 ), 

203 "typescript": frozenset( 

204 { 

205 "function_declaration", 

206 "class_declaration", 

207 "export_statement", 

208 "lexical_declaration", 

209 "interface_declaration", 

210 "type_alias_declaration", 

211 } 

212 ), 

213 "go": frozenset( 

214 { 

215 "function_declaration", 

216 "method_declaration", 

217 "type_declaration", 

218 } 

219 ), 

220 "rust": frozenset( 

221 { 

222 "function_item", 

223 "impl_item", 

224 "struct_item", 

225 "enum_item", 

226 "trait_item", 

227 } 

228 ), 

229 "java": frozenset( 

230 { 

231 "class_declaration", 

232 "method_declaration", 

233 "interface_declaration", 

234 } 

235 ), 

236 "c": frozenset({"function_definition", "struct_specifier"}), 

237 "cpp": frozenset( 

238 { 

239 "function_definition", 

240 "class_specifier", 

241 "struct_specifier", 

242 } 

243 ), 

244 "ruby": frozenset({"method", "class", "module", "singleton_method"}), 

245 "php": frozenset({"function_definition", "class_declaration", "method_declaration"}), 

246 "csharp": frozenset({"method_declaration", "class_declaration", "interface_declaration"}), 

247 "bash": frozenset({"function_definition"}), 

248 "kotlin": frozenset({"function_declaration", "class_declaration", "object_declaration"}), 

249 "swift": frozenset({"function_declaration", "class_declaration", "protocol_declaration"}), 

250 "scala": frozenset( 

251 {"function_definition", "class_definition", "object_definition", "trait_definition"} 

252 ), 

253 "lua": frozenset({"function_declaration", "function_definition_statement"}), 

254 "elixir": frozenset({"call"}), 

255 "haskell": frozenset({"function", "type_alias", "newtype", "adt"}), 

256 "dart": frozenset({"function_signature", "class_definition", "method_signature"}), 

257 "ocaml": frozenset({"let_binding", "type_definition", "module_binding"}), 

258 "erlang": frozenset({"function_clause"}), 

259 "clojure": frozenset({"list_lit"}), 

260 "elm": frozenset({"function_declaration_left", "type_alias_declaration", "type_declaration"}), 

261 "julia": frozenset({"function_definition", "struct_definition", "module_definition"}), 

262 "r": frozenset({"function_definition"}), 

263 "perl": frozenset({"function_definition"}), 

264 "groovy": frozenset({"function_definition", "class_definition", "method_declaration"}), 

265 "fortran": frozenset({"function", "subroutine", "module"}), 

266 "pascal": frozenset({"function_declaration", "procedure_declaration"}), 

267 "d": frozenset({"function_declaration", "class_declaration", "struct_declaration"}), 

268 "nim": frozenset({"proc_declaration", "func_declaration", "type_section"}), 

269 "zig": frozenset({"function_declaration"}), 

270 "v": frozenset({"function_declaration", "struct_declaration"}), 

271 "odin": frozenset({"procedure_declaration"}), 

272 "solidity": frozenset({"function_definition", "contract_declaration"}), 

273 "terraform": frozenset({"block"}), 

274 "sql": frozenset({"create_function_statement", "create_table_statement"}), 

275 "objc": frozenset({"function_definition", "class_interface", "class_implementation"}), 

276 "cuda": frozenset({"function_definition", "struct_specifier"}), 

277 "fsharp": frozenset({"function_or_value_defn", "type_definition", "module_defn"}), 

278}