diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..d3a8b5b6 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,39 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{json,toml,yml,gyp}] +indent_style = space +indent_size = 2 + +[*.js] +indent_style = space +indent_size = 2 + +[*.rs] +indent_style = space +indent_size = 4 + +[*.{c,cc,h}] +indent_style = space +indent_size = 4 + +[*.{py,pyi}] +indent_style = space +indent_size = 4 + +[*.swift] +indent_style = space +indent_size = 4 + +[*.go] +indent_style = tab +indent_size = 8 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/.gitattributes b/.gitattributes index 1491f7e1..9f71c8f1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,10 +1,10 @@ -/src/** linguist-vendored -/examples/* linguist-vendored +* text eol=lf -src/grammar.json linguist-generated -src/node-types.json linguist-generated +src/*.json linguist-generated src/parser.c linguist-generated +src/tree_sitter/* linguist-generated -src/grammar.json -diff -src/node-types.json -diff -src/parser.c -diff +bindings/** linguist-generated +binding.gyp linguist-generated +setup.py linguist-generated +Makefile linguist-generated diff --git a/.npmignore b/.npmignore index 0f438b55..ac723165 100644 --- a/.npmignore +++ b/.npmignore @@ -1,6 +1,17 @@ -/test -/examples -/build -/script -/target +bindings/c +bindings/go +bindings/python bindings/rust +bindings/swift +Cargo.toml +Makefile +examples +pyproject.toml +setup.py +test +.editorconfig +.github +.gitignore +.gitattributes +.gitmodules +.npmignore diff --git a/Cargo.toml b/Cargo.toml index b43ffb0a..374d5bc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,8 @@ name = "tree-sitter-python" description = "Python grammar for tree-sitter" version = "0.20.4" authors = [ - "Max Brunsfeld ", - "Douglas Creager ", + "Max Brunsfeld ", + "Douglas Creager ", ] license = "MIT" readme = "bindings/rust/README.md" @@ -21,7 +21,7 @@ include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"] path = "bindings/rust/lib.rs" [dependencies] -tree-sitter = "~0.20.10" +tree-sitter = "0.21.0" [build-dependencies] -cc = "~1.0" +cc = "1.0.88" diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d54bd5a7 --- /dev/null +++ b/Makefile @@ -0,0 +1,94 @@ +VERSION := 0.0.1 + +LANGUAGE_NAME := tree-sitter-python + +# repository +SRC_DIR := src + +PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null) + +ifeq ($(PARSER_URL),) + PARSER_URL := $(subst .git,,$(PARSER_REPO_URL)) +ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),) + PARSER_URL := $(subst :,/,$(PARSER_URL)) + PARSER_URL := $(subst git@,https://,$(PARSER_URL)) +endif +endif + +# ABI versioning +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# object files +OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c)) + +# flags +ARFLAGS := rcs +override CFLAGS += -I$(SRC_DIR) -std=c11 + +# OS-specific bits +ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED := $(LINKSHARED)-dynamiclib -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS), + endif + LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks +else ifneq ($(filter $(shell uname),Linux FreeBSD NetBSD DragonFly),) + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED := $(LINKSHARED)-shared -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS) + endif + LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR) +else ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +endif +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc + +$(SRC_DIR)/%.o: $(SRC_DIR)/%.c + $(CC) -c $^ -o $@ + +lib$(LANGUAGE_NAME).a: $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) + $(CC) -fPIC $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ + +$(LANGUAGE_NAME).pc: + sed > $@ bindings/c/$(LANGUAGE_NAME).pc.in \ + -e 's|@URL@|$(PARSER_URL)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|;' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|;' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' \ + -e 's|@REQUIRES@|$(REQUIRES)|' \ + -e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' + +install: all + install -Dm644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h + install -Dm644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + install -Dm755 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a + install -Dm755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) + +clean: + $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) + +.PHONY: all install clean diff --git a/Package.swift b/Package.swift index a5e4524d..495eca79 100644 --- a/Package.swift +++ b/Package.swift @@ -3,6 +3,7 @@ import PackageDescription let package = Package( name: "TreeSitterPython", + platforms: [.macOS(.v10_13), .iOS(.v11)], products: [ .library(name: "TreeSitterPython", targets: ["TreeSitterPython"]), ], @@ -11,18 +12,32 @@ let package = Package( .target(name: "TreeSitterPython", path: ".", exclude: [ - "binding.gyp", - "bindings", "Cargo.toml", - "corpus", + "Makefile", + "binding.gyp", + "bindings/c", + "bindings/go", + "bindings/node", + "bindings/python", + "bindings/rust", + "examples", "grammar.js", - "LICENSE", "package.json", - "README.md", + "package-lock.json", + "pyproject.toml", + "setup.py", + "test", + "types", + ".editorconfig", + ".github", + ".gitignore", + ".gitattributes", + ".gitmodules", + ".npmignore", ], sources: [ "src/parser.c", - "src/scanner.c", + // NOTE: if your language has an external scanner, add it here. ], resources: [ .copy("queries") diff --git a/binding.gyp b/binding.gyp index d76793ab..a2d04f2d 100644 --- a/binding.gyp +++ b/binding.gyp @@ -4,15 +4,18 @@ "target_name": "tree_sitter_python_binding", "include_dirs": [ " #include "nan.h" +#include using namespace v8; -extern "C" TSLanguage * tree_sitter_python(); +typedef struct TSLanguage TSLanguage; + +extern "C" const TSLanguage *tree_sitter_python(void); namespace { NAN_METHOD(New) {} void Init(Local exports, Local module) { - Local tpl = Nan::New(New); - tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - tpl->InstanceTemplate()->SetInternalFieldCount(1); + Local tpl = Nan::New(New); + tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + tpl->InstanceTemplate()->SetInternalFieldCount(1); - Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); - Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(instance, 0, tree_sitter_python()); + Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); + Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(instance, 0, (void *)tree_sitter_python()); - Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("python").ToLocalChecked()); - Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); + Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("python").ToLocalChecked()); + Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); } -NODE_MODULE(tree_sitter_python_binding, Init) +NODE_MODULE_CONTEXT_AWARE(tree_sitter_python_binding, Init) -} // namespace +} // namespace diff --git a/bindings/node/index.js b/bindings/node/index.js index e0f77d80..2fd841d9 100644 --- a/bindings/node/index.js +++ b/bindings/node/index.js @@ -1,13 +1,13 @@ try { module.exports = require("../../build/Release/tree_sitter_python_binding"); } catch (error1) { - if (error1.code !== 'MODULE_NOT_FOUND') { + if (error1.code !== "MODULE_NOT_FOUND") { throw error1; } try { module.exports = require("../../build/Debug/tree_sitter_python_binding"); } catch (error2) { - if (error2.code !== 'MODULE_NOT_FOUND') { + if (error2.code !== "MODULE_NOT_FOUND") { throw error2; } throw error1 diff --git a/bindings/python/build/lib/tree_sitter_python/__init__.py b/bindings/python/build/lib/tree_sitter_python/__init__.py new file mode 100644 index 00000000..7d4a8665 --- /dev/null +++ b/bindings/python/build/lib/tree_sitter_python/__init__.py @@ -0,0 +1,5 @@ +from ._tree_sitter_python import lib as _lib, ffi as _ffi + +def language(): + """Get the tree-sitter language for this grammar.""" + return int(_ffi.cast("uintptr_t", _lib.tree_sitter_python())) diff --git a/bindings/python/tree_sitter_python/__init__.py b/bindings/python/tree_sitter_python/__init__.py new file mode 100644 index 00000000..3c892c31 --- /dev/null +++ b/bindings/python/tree_sitter_python/__init__.py @@ -0,0 +1,3 @@ +"Python grammar for tree-sitter" + +from ._binding import language diff --git a/bindings/python/tree_sitter_python/__init__.pyi b/bindings/python/tree_sitter_python/__init__.pyi new file mode 100644 index 00000000..5416666f --- /dev/null +++ b/bindings/python/tree_sitter_python/__init__.pyi @@ -0,0 +1 @@ +def language() -> int: ... diff --git a/bindings/python/tree_sitter_python/binding.c b/bindings/python/tree_sitter_python/binding.c new file mode 100644 index 00000000..b3fd7bbc --- /dev/null +++ b/bindings/python/tree_sitter_python/binding.c @@ -0,0 +1,27 @@ +#include + +typedef struct TSLanguage TSLanguage; + +extern const TSLanguage *tree_sitter_python(void); + +static PyObject* _binding_language(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr((void *)tree_sitter_python()); +} + +static PyMethodDef methods[] = { + {"language", _binding_language, METH_NOARGS, + "Get the tree-sitter language for this grammar."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_binding", + .m_doc = NULL, + .m_size = -1, + .m_methods = methods +}; + +PyMODINIT_FUNC PyInit__binding(void) { + return PyModule_Create(&module); +} diff --git a/bindings/python/tree_sitter_python/py.typed b/bindings/python/tree_sitter_python/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index 8851fed1..a92e388f 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -2,11 +2,8 @@ fn main() { let src_dir = std::path::Path::new("src"); let mut c_config = cc::Build::new(); + c_config.flag_if_supported("-Wno-unused-value"); c_config.include(src_dir); - c_config - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable") - .flag_if_supported("-Wno-trigraphs"); let parser_path = src_dir.join("parser.c"); c_config.file(&parser_path); @@ -14,6 +11,6 @@ fn main() { c_config.file(&scanner_path); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); - c_config.compile("parser"); + c_config.compile("tree-sitter-python"); println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); } diff --git a/bindings/rust/lib.rs b/bindings/rust/lib.rs index 71bc80bd..123b697d 100644 --- a/bindings/rust/lib.rs +++ b/bindings/rust/lib.rs @@ -1,27 +1,17 @@ -// -*- coding: utf-8 -*- -// ------------------------------------------------------------------------------------------------ -// Copyright © 2020, tree-sitter-python authors. -// See the LICENSE file in this repo for license details. -// ------------------------------------------------------------------------------------------------ - -//! This crate provides a Python grammar for the [tree-sitter][] parsing library. +//! This crate provides Python language support for the [tree-sitter][] parsing library. //! -//! Typically, you will use the [language][language func] function to add this grammar to a +//! Typically, you will use the [language][language func] function to add this language to a //! tree-sitter [Parser][], and then use the parser to parse some code: //! //! ``` -//! use tree_sitter::Parser; -//! //! let code = r#" //! def double(x): //! return x * 2 //! "#; -//! let mut parser = Parser::new(); -//! parser.set_language(tree_sitter_python::language()).expect("Error loading Python grammar"); -//! let parsed = parser.parse(code, None); -//! # let parsed = parsed.unwrap(); -//! # let root = parsed.root_node(); -//! # assert!(!root.has_error()); +//! let mut parser = tree_sitter::Parser::new(); +//! parser.set_language(&tree_sitter_python::language()).expect("Error loading Python grammar"); +//! let tree = parser.parse(code, None).unwrap(); +//! assert!(!tree.root_node().has_error()); //! ``` //! //! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html @@ -35,7 +25,7 @@ extern "C" { fn tree_sitter_python() -> Language; } -/// Returns the tree-sitter [Language][] for this grammar. +/// Get the tree-sitter [Language][] for this grammar. /// /// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html pub fn language() -> Language { @@ -45,24 +35,21 @@ pub fn language() -> Language { /// The source of the Python tree-sitter grammar description. pub const GRAMMAR: &str = include_str!("../../grammar.js"); -/// The syntax highlighting query for this language. -pub const HIGHLIGHT_QUERY: &str = include_str!("../../queries/highlights.scm"); - /// The content of the [`node-types.json`][] file for this grammar. /// /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); -/// The symbol tagging query for this language. -pub const TAGGING_QUERY: &str = include_str!("../../queries/tags.scm"); +pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); +pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); #[cfg(test)] mod tests { #[test] - fn can_load_grammar() { + fn test_can_load_grammar() { let mut parser = tree_sitter::Parser::new(); parser - .set_language(super::language()) + .set_language(&super::language()) .expect("Error loading Python grammar"); } } diff --git a/bindings/swift/TreeSitterPython/python.h b/bindings/swift/TreeSitterPython/python.h index 6f2548a2..747317a1 100644 --- a/bindings/swift/TreeSitterPython/python.h +++ b/bindings/swift/TreeSitterPython/python.h @@ -7,7 +7,7 @@ typedef struct TSLanguage TSLanguage; extern "C" { #endif -extern TSLanguage *tree_sitter_python(); +extern const TSLanguage *tree_sitter_python(void); #ifdef __cplusplus } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a8eca355 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "tree-sitter-python" +description = "Python grammar for tree-sitter" +version = "0.0.1" +keywords = ["parsing", "incremental", "python"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Compilers", + "Topic :: Text Processing :: Linguistic", +] +requires-python = ">=3.8" +license.file = "LICENSE" +readme = "README.md" + +[project.optional-dependencies] +core = ["tree-sitter~=0.21"] + +[tool.cibuildwheel] +build = "cp38-*" +build-frontend = "build" diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..65b70b47 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +from os.path import isdir, join +from platform import system + +from setuptools import Extension, find_packages, setup +from setuptools.command.build import build +from wheel.bdist_wheel import bdist_wheel + + +class Build(build): + def run(self): + if isdir("queries"): + dest = join(self.build_lib, "tree_sitter_python", "queries") + self.copy_tree("queries", dest) + super().run() + + +class BdistWheel(bdist_wheel): + def get_tag(self): + python, abi, platform = super().get_tag() + if python.startswith("cp"): + python, abi = "cp38", "abi3" + return python, abi, platform + + +setup( + packages=find_packages("bindings/python"), + package_dir={"": "bindings/python"}, + package_data={ + "tree_sitter_python": ["*.pyi", "py.typed"], + "tree_sitter_python.queries": ["*.scm"], + }, + ext_package="tree_sitter_python", + ext_modules=[ + Extension( + name="_binding", + sources=[ + "bindings/python/tree_sitter_python/binding.c", + "src/parser.c", + "src/scanner.c", + ], + extra_compile_args=( + ["-std=c11"] if system() != 'Windows' else [] + ), + define_macros=[ + ("Py_LIMITED_API", "0x03080000"), + ("PY_SSIZE_T_CLEAN", None) + ], + include_dirs=["src"], + py_limited_api=True, + ) + ], + cmdclass={ + "build": Build, + "bdist_wheel": BdistWheel + }, + zip_safe=False, +) diff --git a/src/scanner.c b/src/scanner.c index 8c15505f..4022fccd 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -378,7 +378,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff } size += delimiter_count; - int iter = 1; + uint32_t iter = 1; for (; iter < scanner->indents.size && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) { buffer[size++] = (char)*array_get(&scanner->indents, iter); } diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h index 43358fcc..45969242 100644 --- a/src/tree_sitter/array.h +++ b/src/tree_sitter/array.h @@ -13,6 +13,16 @@ extern "C" { #include #include +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + #define Array(T) \ struct { \ T *contents; \ @@ -265,6 +275,14 @@ static inline void _array__splice(Array *self, size_t element_size, /// parameter by reference in order to work with the generic sorting function above. #define compare_int(a, b) ((int)*(a) - (int)(b)) +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif diff --git a/types/dsl.d.ts b/types/dsl.d.ts new file mode 100644 index 00000000..63f9ed49 --- /dev/null +++ b/types/dsl.d.ts @@ -0,0 +1,379 @@ +type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; +type BlankRule = { type: 'BLANK' }; +type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; +type FieldRule = { type: 'FIELD'; name: string; content: Rule }; +type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; +type PatternRule = { type: 'PATTERN'; value: string }; +type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; +type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; +type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; +type PrecRule = { type: 'PREC'; content: Rule; value: number }; +type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; +type RepeatRule = { type: 'REPEAT'; content: Rule }; +type SeqRule = { type: 'SEQ'; members: Rule[] }; +type StringRule = { type: 'STRING'; value: string }; +type SymbolRule = { type: 'SYMBOL'; name: Name }; +type TokenRule = { type: 'TOKEN'; content: Rule }; + +type Rule = + | AliasRule + | BlankRule + | ChoiceRule + | FieldRule + | ImmediateTokenRule + | PatternRule + | PrecDynamicRule + | PrecLeftRule + | PrecRightRule + | PrecRule + | Repeat1Rule + | RepeatRule + | SeqRule + | StringRule + | SymbolRule + | TokenRule; + +type RuleOrLiteral = Rule | RegExp | string; + +type GrammarSymbols = { + [name in RuleName]: SymbolRule; +} & + Record>; + +type RuleBuilder = ( + $: GrammarSymbols, + previous: Rule, +) => RuleOrLiteral; + +type RuleBuilders< + RuleName extends string, + BaseGrammarRuleName extends string +> = { + [name in RuleName]: RuleBuilder; + }; + +interface Grammar< + RuleName extends string, + BaseGrammarRuleName extends string = never, + Rules extends RuleBuilders = RuleBuilders< + RuleName, + BaseGrammarRuleName + > +> { + /** + * Name of the grammar language. + */ + name: string; + + /** Mapping of grammar rule names to rule builder functions. */ + rules: Rules; + + /** + * An array of arrays of precedence names or rules. Each inner array represents + * a *descending* ordering. Names/rules listed earlier in one of these arrays + * have higher precedence than any names/rules listed later in the same array. + * + * Using rules is just a shorthand way for using a name then calling prec() + * with that name. It is just a convenience. + */ + precedences?: ( + $: GrammarSymbols, + previous: Rule[][], + ) => RuleOrLiteral[][], + + /** + * An array of arrays of rule names. Each inner array represents a set of + * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ + * in the grammar. When these conflicts occur at runtime, Tree-sitter will + * use the GLR algorithm to explore all of the possible interpretations. If + * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree + * whose corresponding rule has the highest total _dynamic precedence_. + * + * @param $ grammar rules + */ + conflicts?: ( + $: GrammarSymbols, + previous: Rule[][], + ) => RuleOrLiteral[][]; + + /** + * An array of token names which can be returned by an _external scanner_. + * External scanners allow you to write custom C code which runs during the + * lexing process in order to handle lexical rules (e.g. Python's indentation + * tokens) that cannot be described by regular expressions. + * + * @param $ grammar rules + * @param previous array of externals from the base schema, if any + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + */ + externals?: ( + $: Record>, + previous: Rule[], + ) => RuleOrLiteral[]; + + /** + * An array of tokens that may appear anywhere in the language. This + * is often used for whitespace and comments. The default value of + * extras is to accept whitespace. To control whitespace explicitly, + * specify extras: `$ => []` in your grammar. + * + * @param $ grammar rules + */ + extras?: ( + $: GrammarSymbols, + ) => RuleOrLiteral[]; + + /** + * An array of rules that should be automatically removed from the + * grammar by replacing all of their usages with a copy of their definition. + * This is useful for rules that are used in multiple places but for which + * you don't want to create syntax tree nodes at runtime. + * + * @param $ grammar rules + */ + inline?: ( + $: GrammarSymbols, + previous: Rule[], + ) => RuleOrLiteral[]; + + /** + * A list of hidden rule names that should be considered supertypes in the + * generated node types file. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + */ + supertypes?: ( + $: GrammarSymbols, + previous: Rule[], + ) => RuleOrLiteral[]; + + /** + * The name of a token that will match keywords for the purpose of the + * keyword extraction optimization. + * + * @param $ grammar rules + * + * @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction + */ + word?: ($: GrammarSymbols) => RuleOrLiteral; +} + +type GrammarSchema = { + [K in keyof Grammar]: K extends 'rules' + ? Record + : Grammar[K]; +}; + +/** + * Causes the given rule to appear with an alternative name in the syntax tree. + * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an + * anonymous node, as if the rule had been written as the simple string. + * + * @param rule rule that will be aliased + * @param name target name for the alias + */ +declare function alias(rule: RuleOrLiteral, name: string): AliasRule; + +/** + * Causes the given rule to appear as an alternative named node, for instance + * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named + * node called `bar`. + * + * @param rule rule that will be aliased + * @param symbol target symbol for the alias + */ +declare function alias( + rule: RuleOrLiteral, + symbol: SymbolRule, +): AliasRule; + +/** + * Creates a blank rule, matching nothing. + */ +declare function blank(): BlankRule; + +/** + * Assigns a field name to the child node(s) matched by the given rule. + * In the resulting syntax tree, you can then use that field name to + * access specific children. + * + * @param name name of the field + * @param rule rule the field should match + */ +declare function field(name: string, rule: RuleOrLiteral): FieldRule; + +/** + * Creates a rule that matches one of a set of possible rules. The order + * of the arguments does not matter. This is analogous to the `|` (pipe) + * operator in EBNF notation. + * + * @param options possible rule choices + */ +declare function choice(...options: RuleOrLiteral[]): ChoiceRule; + +/** + * Creates a rule that matches zero or one occurrence of a given rule. + * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. + * + * @param value rule to be made optional + */ +declare function optional(rule: RuleOrLiteral): ChoiceRule; + +/** + * Marks the given rule with a precedence which will be used to resolve LR(1) + * conflicts at parser-generation time. When two rules overlap in a way that + * represents either a true ambiguity or a _local_ ambiguity given one token + * of lookahead, Tree-sitter will try to resolve the conflict by matching the + * rule with the higher precedence. + * + * Precedence values can either be strings or numbers. When comparing rules + * with numerical precedence, higher numbers indicate higher precedences. To + * compare rules with string precedence, Tree-sitter uses the grammar's `precedences` + * field. + * + * rules is zero. This works similarly to the precedence directives in Yacc grammars. + * + * @param value precedence weight + * @param rule rule being weighted + * + * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ +declare const prec: { + (value: String | number, rule: RuleOrLiteral): PrecRule; + + /** + * Marks the given rule as left-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a left-associative rule, Tree-sitter + * will prefer matching a rule that ends _earlier_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param value (optional) precedence weight + * @param rule rule to mark as left-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + left(rule: RuleOrLiteral): PrecLeftRule; + left(value: String | number, rule: RuleOrLiteral): PrecLeftRule; + + /** + * Marks the given rule as right-associative (and optionally applies a + * numerical precedence). When an LR(1) conflict arises in which all of the + * rules have the same numerical precedence, Tree-sitter will consult the + * rules' associativity. If there is a right-associative rule, Tree-sitter + * will prefer matching a rule that ends _later_. This works similarly to + * associativity directives in Yacc grammars. + * + * @param value (optional) precedence weight + * @param rule rule to mark as right-associative + * + * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html + */ + right(rule: RuleOrLiteral): PrecRightRule; + right(value: String | number, rule: RuleOrLiteral): PrecRightRule; + + /** + * Marks the given rule with a numerical precedence which will be used to + * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. + * This is only necessary when handling a conflict dynamically using the + * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: + * multiple rules correctly match a given piece of code. In that event, + * Tree-sitter compares the total dynamic precedence associated with each + * rule, and selects the one with the highest total. This is similar to + * dynamic precedence directives in Bison grammars. + * + * @param value precedence weight + * @param rule rule being weighted + * + * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html + */ + dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule; +}; + +/** + * Creates a rule that matches _zero-or-more_ occurrences of a given rule. + * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This + * rule is implemented in terms of `repeat1` but is included because it + * is very commonly used. + * + * @param rule rule to repeat, zero or more times + */ +declare function repeat(rule: RuleOrLiteral): RepeatRule; + +/** + * Creates a rule that matches one-or-more occurrences of a given rule. + * + * @param rule rule to repeat, one or more times + */ +declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; + +/** + * Creates a rule that matches any number of other rules, one after another. + * It is analogous to simply writing multiple symbols next to each other + * in EBNF notation. + * + * @param rules ordered rules that comprise the sequence + */ +declare function seq(...rules: RuleOrLiteral[]): SeqRule; + +/** + * Creates a symbol rule, representing another rule in the grammar by name. + * + * @param name name of the target rule + */ +declare function sym(name: Name): SymbolRule; + +/** + * Marks the given rule as producing only a single token. Tree-sitter's + * default is to treat each String or RegExp literal in the grammar as a + * separate token. Each token is matched separately by the lexer and + * returned as its own leaf node in the tree. The token function allows + * you to express a complex rule using the DSL functions (rather + * than as a single regular expression) but still have Tree-sitter treat + * it as a single token. + * + * @param rule rule to represent as a single token + */ +declare const token: { + (rule: RuleOrLiteral): TokenRule; + + /** + * Marks the given rule as producing an immediate token. This allows + * the parser to produce a different token based on whether or not + * there are `extras` preceding the token's main content. When there + * are _no_ leading `extras`, an immediate token is preferred over a + * normal token which would otherwise match. + * + * @param rule rule to represent as an immediate token + */ + immediate(rule: RuleOrLiteral): ImmediateTokenRule; +}; + +/** + * Creates a new language grammar with the provided schema. + * + * @param options grammar options + */ +declare function grammar( + options: Grammar, +): GrammarSchema; + +/** + * Extends an existing language grammar with the provided options, + * creating a new language. + * + * @param baseGrammar base grammar schema to extend from + * @param options grammar options for the new extended language + */ +declare function grammar< + BaseGrammarRuleName extends string, + RuleName extends string +>( + baseGrammar: GrammarSchema, + options: Grammar, +): GrammarSchema;