Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: use orjson for writing compile_commands #118

Closed
wants to merge 11 commits into from
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ Howdy, Bazel user 🤠. Let's get you set up fast with some awesome tooling for

There's a bunch of text here but only because we're trying to spell things out and make them easy. If you have issues, let us know; we'd love your help making things even better and more complete—and we'd love to help you!

This rule optionally uses the `orjson` pip package to significantly speed up JSON processing. You can add this before running the refresh command to ensure orjson availability:

```shell
pip show orjson >/dev/null || pip install orjson --user
```

### First, add this tool to your Bazel setup.

#### If you have a MODULE.bazel file and are using the new [bzlmod](https://bazel.build/external/migration) system
Expand Down
50 changes: 40 additions & 10 deletions refresh.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,11 @@ def _get_headers(compile_action, source_path: str):
cache_last_modified = os.path.getmtime(cache_file_path) # Do before opening just as a basic hedge against concurrent write, even though we won't handle the concurrent delete case perfectly.
try:
with open(cache_file_path) as cache_file:
action_key, cached_headers = json.load(cache_file)
try:
from orjson import loads
action_key, cached_headers = loads(cache_file.read())
except ImportError:
action_key, cached_headers = json.load(cache_file)
except json.JSONDecodeError:
# Corrupted cache, which can happen if, for example, the user kills the program, since writes aren't atomic.
# But if it is the result of a bug, we want to print it before it's overwritten, so it can be reported
Expand Down Expand Up @@ -584,16 +588,31 @@ def _get_headers(compile_action, source_path: str):
if output_file and should_cache:
os.makedirs(os.path.dirname(cache_file_path), exist_ok=True)
with open(cache_file_path, 'w') as cache_file:
json.dump((compile_action.actionKey, list(headers)), cache_file)
cache = (compile_action.actionKey, list(headers))
try:
from orjson import dumps
cache_file.write(dumps(cache))
except ImportError:
json.dump(cache, cache_file)
elif not headers and cached_headers: # If we failed to get headers, we'll fall back on a stale cache.
headers = set(cached_headers)

if {exclude_headers} == "external":
headers = {header for header in headers if _file_is_in_main_workspace_and_not_external(header)}

return headers

_get_headers.has_logged = False

def _cache_compile_action(compile_action, cache_file_path, headers):
cache = (compile_action.actionKey, list(headers))
try:
from orjson import dumps
with open(cache_file_path, 'wb') as cache_file:
cache_file.write(dumps(cache))
except ImportError:
with open(cache_file_path, 'w') as cache_file:
json.dump(cache, cache_file)

def _get_files(compile_action):
"""Gets the ({source files}, {header files}) clangd should be told the command applies to."""
Expand Down Expand Up @@ -1305,6 +1324,24 @@ def _ensure_cwd_is_workspace_root():
# Although this can fail (OSError/FileNotFoundError/PermissionError/NotADirectoryError), there's no easy way to recover, so we'll happily crash.
os.chdir(workspace_root)

def _write_compile_commands(compile_command_entries: typing.List[str]):
file_name = 'compile_commands.json'
try:
# orjson is much faster than the standard library's json module (1.9 seconds vs 6.6 seconds for a ~140 MB file).
from orjson import dumps, OPT_INDENT_2
with open(file_name, 'wb') as output_file:
output_file.write(dumps(
compile_command_entries,
option=OPT_INDENT_2
))
except ImportError:
with open(file_name, 'w') as output_file:
json.dump(
compile_command_entries,
output_file,
indent=2, # Yay, human readability!
check_circular=False # For speed.
)

def main():
_ensure_cwd_is_workspace_root()
Expand All @@ -1326,11 +1363,4 @@ def main():
There should be actionable warnings, above, that led to this.""")
sys.exit(1)

# Chain output into compile_commands.json
with open('compile_commands.json', 'w') as output_file:
json.dump(
compile_command_entries,
output_file,
indent=2, # Yay, human readability!
check_circular=False # For speed.
)
_write_compile_commands(compile_command_entries)