Coverage for src / lilbee / languages.py: 100%
2 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 08:27 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 08:27 +0000
1"""Language data for tree-sitter code chunking.
3tree-sitter-language-pack provides only ``get_parser(language_name)`` — it has no
4extension-to-language mapping or per-language definition-type metadata. These dicts
5must be maintained manually when adding support for new languages.
6"""
8# Extension -> tree-sitter language name.
9# Languages without DEFINITION_TYPES entries fall back to token-based chunking.
10EXT_TO_LANG: dict[str, str] = {
11 # Systems / compiled
12 ".c": "c",
13 ".h": "c",
14 ".cpp": "cpp",
15 ".cxx": "cpp",
16 ".cc": "cpp",
17 ".hpp": "cpp",
18 ".hxx": "cpp",
19 ".cs": "csharp",
20 ".d": "d",
21 ".go": "go",
22 ".java": "java",
23 ".kt": "kotlin",
24 ".kts": "kotlin",
25 ".m": "objc",
26 ".rs": "rust",
27 ".scala": "scala",
28 ".swift": "swift",
29 ".zig": "zig",
30 ".v": "v",
31 ".odin": "odin",
32 ".hare": "hare",
33 ".nim": "nim",
34 ".ada": "ada",
35 ".adb": "ada",
36 ".ads": "ada",
37 ".f90": "fortran",
38 ".f95": "fortran",
39 ".f03": "fortran",
40 ".f": "fortran",
41 ".pas": "pascal",
42 ".cobol": "cobol",
43 ".cob": "cobol",
44 ".cbl": "cobol",
45 ".vhdl": "vhdl",
46 ".vhd": "vhdl",
47 ".sv": "verilog",
48 ".svh": "verilog",
49 ".verilog": "verilog",
50 # Scripting / dynamic
51 ".py": "python",
52 ".pyi": "python",
53 ".js": "javascript",
54 ".jsx": "javascript",
55 ".mjs": "javascript",
56 ".cjs": "javascript",
57 ".ts": "typescript",
58 ".tsx": "tsx",
59 ".rb": "ruby",
60 ".php": "php",
61 ".lua": "lua",
62 ".luau": "luau",
63 ".pl": "perl",
64 ".pm": "perl",
65 ".r": "r",
66 ".R": "r",
67 ".jl": "julia",
68 ".ex": "elixir",
69 ".exs": "elixir",
70 ".erl": "erlang",
71 ".hrl": "erlang",
72 ".clj": "clojure",
73 ".cljs": "clojure",
74 ".cljc": "clojure",
75 ".ml": "ocaml",
76 ".mli": "ocaml_interface",
77 ".hs": "haskell",
78 ".fs": "fsharp",
79 ".fsi": "fsharp_signature",
80 ".fsx": "fsharp",
81 ".elm": "elm",
82 ".purs": "purescript",
83 ".rkt": "racket",
84 ".scm": "scheme",
85 ".el": "elisp",
86 ".lisp": "commonlisp",
87 ".cl": "commonlisp",
88 ".fnl": "fennel",
89 ".janet": "janet",
90 ".dart": "dart",
91 ".gd": "gdscript",
92 ".gleam": "gleam",
93 ".groovy": "groovy",
94 ".gradle": "groovy",
95 ".tcl": "tcl",
96 ".fish": "fish",
97 ".ps1": "powershell",
98 ".psm1": "powershell",
99 ".psd1": "powershell",
100 ".matlab": "matlab",
101 ".pony": "pony",
102 ".hack": "hack",
103 ".hx": "haxe",
104 ".squirrel": "squirrel",
105 ".nut": "squirrel",
106 ".nix": "nix",
107 ".star": "starlark",
108 ".bzl": "starlark",
109 ".smali": "smali",
110 # Shell
111 ".sh": "bash",
112 ".bash": "bash",
113 ".zsh": "bash",
114 # Web / markup
115 ".html": "html",
116 ".htm": "html",
117 ".xml": "xml",
118 ".xsl": "xml",
119 ".xslt": "xml",
120 ".css": "css",
121 ".scss": "scss",
122 ".vue": "vue",
123 ".svelte": "svelte",
124 ".astro": "astro",
125 ".twig": "twig",
126 ".md": "markdown",
127 ".markdown": "markdown",
128 # Functional / blockchain / smart contracts
129 ".sol": "solidity",
130 ".cairo": "cairo",
131 ".fc": "func",
132 ".clar": "clarity",
133 ".rego": "rego",
134 # Data / config
135 ".json": "json",
136 ".jsonnet": "jsonnet",
137 ".libsonnet": "jsonnet",
138 ".yaml": "yaml",
139 ".yml": "yaml",
140 ".toml": "toml",
141 ".ini": "ini",
142 ".cfg": "ini",
143 ".properties": "properties",
144 ".ron": "ron",
145 ".kdl": "kdl",
146 ".hcl": "hcl",
147 ".tf": "terraform",
148 ".tfvars": "terraform",
149 ".graphql": "graphql",
150 ".gql": "graphql",
151 ".proto": "proto",
152 ".thrift": "thrift",
153 ".capnp": "capnp",
154 ".smithy": "smithy",
155 ".prisma": "prisma",
156 ".beancount": "beancount",
157 ".sql": "sql",
158 ".sparql": "sparql",
159 # Build / CI
160 ".cmake": "cmake",
161 ".ninja": "ninja",
162 ".meson": "meson",
163 ".gn": "gn",
164 ".pp": "puppet",
165 ".tex": "latex",
166 ".bib": "bibtex",
167 ".typst": "typst",
168 # HDL / embedded
169 ".cuda": "cuda",
170 ".cu": "cuda",
171 ".glsl": "glsl",
172 ".hlsl": "hlsl",
173 ".wgsl": "wgsl",
174 ".ispc": "ispc",
175 ".s": "asm",
176 ".asm": "asm",
177 ".ll": "llvm",
178 ".lds": "linkerscript",
179 ".wat": "wat",
180 ".wast": "wast",
181 # Docker / infra
182 ".dockerfile": "dockerfile",
183 ".bicep": "bicep",
184}
186# AST node types that represent extractable definitions, per language.
187DEFINITION_TYPES: dict[str, frozenset[str]] = {
188 "python": frozenset(
189 {
190 "function_definition",
191 "class_definition",
192 "decorated_definition",
193 }
194 ),
195 "javascript": frozenset(
196 {
197 "function_declaration",
198 "class_declaration",
199 "export_statement",
200 "lexical_declaration",
201 }
202 ),
203 "typescript": frozenset(
204 {
205 "function_declaration",
206 "class_declaration",
207 "export_statement",
208 "lexical_declaration",
209 "interface_declaration",
210 "type_alias_declaration",
211 }
212 ),
213 "go": frozenset(
214 {
215 "function_declaration",
216 "method_declaration",
217 "type_declaration",
218 }
219 ),
220 "rust": frozenset(
221 {
222 "function_item",
223 "impl_item",
224 "struct_item",
225 "enum_item",
226 "trait_item",
227 }
228 ),
229 "java": frozenset(
230 {
231 "class_declaration",
232 "method_declaration",
233 "interface_declaration",
234 }
235 ),
236 "c": frozenset({"function_definition", "struct_specifier"}),
237 "cpp": frozenset(
238 {
239 "function_definition",
240 "class_specifier",
241 "struct_specifier",
242 }
243 ),
244 "ruby": frozenset({"method", "class", "module", "singleton_method"}),
245 "php": frozenset({"function_definition", "class_declaration", "method_declaration"}),
246 "csharp": frozenset({"method_declaration", "class_declaration", "interface_declaration"}),
247 "bash": frozenset({"function_definition"}),
248 "kotlin": frozenset({"function_declaration", "class_declaration", "object_declaration"}),
249 "swift": frozenset({"function_declaration", "class_declaration", "protocol_declaration"}),
250 "scala": frozenset(
251 {"function_definition", "class_definition", "object_definition", "trait_definition"}
252 ),
253 "lua": frozenset({"function_declaration", "function_definition_statement"}),
254 "elixir": frozenset({"call"}),
255 "haskell": frozenset({"function", "type_alias", "newtype", "adt"}),
256 "dart": frozenset({"function_signature", "class_definition", "method_signature"}),
257 "ocaml": frozenset({"let_binding", "type_definition", "module_binding"}),
258 "erlang": frozenset({"function_clause"}),
259 "clojure": frozenset({"list_lit"}),
260 "elm": frozenset({"function_declaration_left", "type_alias_declaration", "type_declaration"}),
261 "julia": frozenset({"function_definition", "struct_definition", "module_definition"}),
262 "r": frozenset({"function_definition"}),
263 "perl": frozenset({"function_definition"}),
264 "groovy": frozenset({"function_definition", "class_definition", "method_declaration"}),
265 "fortran": frozenset({"function", "subroutine", "module"}),
266 "pascal": frozenset({"function_declaration", "procedure_declaration"}),
267 "d": frozenset({"function_declaration", "class_declaration", "struct_declaration"}),
268 "nim": frozenset({"proc_declaration", "func_declaration", "type_section"}),
269 "zig": frozenset({"function_declaration"}),
270 "v": frozenset({"function_declaration", "struct_declaration"}),
271 "odin": frozenset({"procedure_declaration"}),
272 "solidity": frozenset({"function_definition", "contract_declaration"}),
273 "terraform": frozenset({"block"}),
274 "sql": frozenset({"create_function_statement", "create_table_statement"}),
275 "objc": frozenset({"function_definition", "class_interface", "class_implementation"}),
276 "cuda": frozenset({"function_definition", "struct_specifier"}),
277 "fsharp": frozenset({"function_or_value_defn", "type_definition", "module_defn"}),
278}