Skip to content

Commit

Permalink
refactor: group compute cache id logic into 1 function
Browse files Browse the repository at this point in the history
This commits groups the computing cache identifier logic
into a a single function: compute_cache_identifier.

Initially, this the cache id computing was split into two functions
compute_ruleset_cache_identifier and compute_cache_identifier.
  • Loading branch information
fariss authored and mr-tz committed Jul 3, 2024
1 parent d985150 commit 077d4bb
Showing 1 changed file with 2 additions and 41 deletions.
43 changes: 2 additions & 41 deletions capa/rules/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@


def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier:
# this is not a development environment, only use rule contents in
# computing the cache identifier
hash = hashlib.sha256()

# note that this changes with each release,
Expand Down Expand Up @@ -111,44 +109,6 @@ def get_ruleset_content(ruleset: capa.rules.RuleSet) -> List[bytes]:
def compute_ruleset_cache_identifier(ruleset: capa.rules.RuleSet) -> CacheIdentifier:
rule_contents = get_ruleset_content(ruleset)

try:
if capa.rules.utils.is_dev_environment():
modified_files = capa.rules.utils.get_modified_files()
commit_hash = capa.rules.utils.get_git_commit_hash()

if modified_files or commit_hash:
hash = hashlib.sha256()
hash.update(capa.version.__version__.encode("utf-8"))
hash.update(b"\x00")

for file in modified_files:
try:
file_content = file.read_bytes()
logger.debug("found modified source file %s", file)
hash.update(file_content)
hash.update(b"\x00")
except FileNotFoundError as e:
logger.error("modified file not found: %s", file)
logger.error("%s", e)

if commit_hash:
hash.update(commit_hash.encode("ascii"))
hash.update(b"\x00")

# include the hash of the rule contents
rule_hashes = sorted([hashlib.sha256(buf).hexdigest() for buf in rule_contents])
for rule_hash in rule_hashes:
hash.update(rule_hash.encode("ascii"))
hash.update(b"\x00")

logger.debug(
"developer environment detected, ruleset cache will be auto-generated upon each source modification"
)
return hash.hexdigest()
except Exception as e:
logger.warning("failed to compute ruleset cache identifier in developer mode: %s", str(e))
logger.warning("falling back to default cache identifier based on rules contents")

return compute_cache_identifier(rule_contents)


Expand Down Expand Up @@ -215,7 +175,8 @@ def generate_rule_cache(rules_dir: Path, cache_dir: Path) -> bool:
logger.error("%s", str(e))
return False

id = capa.rules.cache.compute_ruleset_cache_identifier(rules)
content = capa.rules.cache.get_ruleset_content(rules)
id = capa.rules.cache.compute_cache_identifier(content)
path = capa.rules.cache.get_cache_path(cache_dir, id)

assert path.exists()
Expand Down

0 comments on commit 077d4bb

Please sign in to comment.