Skip to content

Commit

Permalink
Merge pull request #23 from nexB/tree-sitter-pin-dependency
Browse files Browse the repository at this point in the history
Pin tree-sitter dependencies
  • Loading branch information
keshav-space authored May 16, 2024
2 parents 01ae97f + 4816392 commit 857df51
Show file tree
Hide file tree
Showing 6 changed files with 452 additions and 404 deletions.
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
parameters:
job_name: ubuntu20_cpython
image_name: ubuntu-20.04
python_versions: ['3.8', '3.9', '3.10', '3.11', '3.12']
python_versions: ['3.9', '3.10', '3.11', '3.12']
test_suites:
all: |
sudo apt-get install universal-ctags gettext
Expand All @@ -21,7 +21,7 @@ jobs:
parameters:
job_name: ubuntu22_cpython
image_name: ubuntu-22.04
python_versions: ['3.8', '3.9', '3.10', '3.11', '3.12']
python_versions: ['3.9', '3.10', '3.11', '3.12']
test_suites:
all: |
sudo apt-get install universal-ctags gettext
Expand Down
18 changes: 9 additions & 9 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ install_requires =
plugincode
commoncode
typecode
tree-sitter
tree-sitter-bash
tree-sitter-c
tree-sitter-cpp
tree-sitter-go
tree-sitter-java
tree-sitter-javascript
tree-sitter-python
tree-sitter-rust
tree-sitter==0.22.0
tree-sitter-bash==0.21.0
tree-sitter-c==0.21.1
tree-sitter-cpp==0.22.0
tree-sitter-go==0.21.0
tree-sitter-java==0.21.0
tree-sitter-javascript==0.21.2
tree-sitter-python==0.21.0
tree-sitter-rust==0.21.2
pygments

[options.packages.find]
Expand Down
72 changes: 52 additions & 20 deletions src/source_inspector/symbols_tree_sitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,13 @@ def collect_symbols_and_strings(location):
symbols, strings = [], []

if parser_result := get_parser(location):
parser, string_id = parser_result
parser, language_info = parser_result

with open(location, "rb") as f:
source = f.read()

tree = parser.parse(source)
traverse(tree.root_node, symbols, strings, string_id)
traverse(tree.root_node, symbols, strings, language_info)

return symbols, strings

Expand All @@ -118,42 +118,74 @@ def get_parser(location):
if not language or language not in TS_LANGUAGE_WHEELS:
return

wheel = TS_LANGUAGE_WHEELS[language]["wheel"]
string_id = TS_LANGUAGE_WHEELS[language]["string_id"]
language_info = TS_LANGUAGE_WHEELS[language]
wheel = language_info["wheel"]

try:
grammar = importlib.import_module(wheel)
except ModuleNotFoundError:
raise TreeSitterWheelNotInstalled(f"{wheel} package is not installed")

LANGUAGE = Language(grammar.language(), language)
LANGUAGE = Language(grammar.language())
parser = Parser()
parser.set_language(LANGUAGE)

return parser, string_id
return parser, language_info


def traverse(node, symbols, strings, string_id, depth=0):
def traverse(node, symbols, strings, language_info, depth=0):
"""Recursively traverse the parse tree node to collect symbols and strings."""
if node.type == "identifier":
if source_symbol:=node.text.decode():
if node.type in language_info.get("identifiers"):
if source_symbol := node.text.decode():
symbols.append(source_symbol)
elif node.type == string_id:
if source_string:=node.text.decode("unicode_escape").replace('"', ""):
elif node.type in language_info.get("string_literals"):
if source_string := node.text.decode():
strings.append(source_string)
for child in node.children:
traverse(child, symbols, strings, string_id, depth + 1)
traverse(child, symbols, strings, language_info, depth + 1)


TS_LANGUAGE_WHEELS = {
"Bash": {"wheel": "tree_sitter_bash", "string_id": "raw_string"},
"C": {"wheel": "tree_sitter_c", "string_id": "string_literal"},
"C++": {"wheel": "tree_sitter_cpp", "string_id": "string_literal"},
"Go": {"wheel": "tree_sitter_go", "string_id": "raw_string_literal"},
"Java": {"wheel": "tree_sitter_java", "string_id": "string_literal"},
"JavaScript": {"wheel": "tree_sitter_javascript", "string_id": "string"},
"Python": {"wheel": "tree_sitter_python", "string_id": "string"},
"Rust": {"wheel": "tree_sitter_rust", "string_id": "raw_string_literal"},
"Bash": {
"wheel": "tree_sitter_bash",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"C": {
"wheel": "tree_sitter_c",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"C++": {
"wheel": "tree_sitter_cpp",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"Go": {
"wheel": "tree_sitter_go",
"identifiers": ["identifier"],
"string_literals": ["raw_string_literal"],
},
"Java": {
"wheel": "tree_sitter_java",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"JavaScript": {
"wheel": "tree_sitter_javascript",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"Python": {
"wheel": "tree_sitter_python",
"identifiers": ["identifier"],
"string_literals": ["string_literal"],
},
"Rust": {
"wheel": "tree_sitter_rust",
"identifiers": ["identifier"],
"string_literals": ["raw_string_literal"],
},
}


Expand Down
13 changes: 13 additions & 0 deletions tests/data/symbols_tree_sitter/greet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# Define a global variable
GREETING="Hello"

# Define a function to greet the user
greet_user() {
local username=$1
echo "$GREETING, $username! Welcome to the Bash scripting tutorial."
}

# Call the function with an argument
greet_user "Alice"
Loading

0 comments on commit 857df51

Please sign in to comment.