diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 969443e8c..7d706d6e5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -57,7 +57,7 @@ When we make a significant decision in how we maintain the project and what we c we will document it in the [capa issues tracker](https://github.com/mandiant/capa/issues). This is the best place review our discussions about what/how/why we do things in the project. If you have a question, check to see if it is documented there. -If it is *not* documented there, or you can't find an answer, please open a issue. +If it is *not* documented there, or you can't find an answer, please open an issue. We'll link to existing issues when appropriate to keep discussions in one place. ## How Can I Contribute? diff --git a/.github/pyinstaller/hooks/hook-vivisect.py b/.github/pyinstaller/hooks/hook-vivisect.py index 8038b7146..680ba380e 100644 --- a/.github/pyinstaller/hooks/hook-vivisect.py +++ b/.github/pyinstaller/hooks/hook-vivisect.py @@ -24,7 +24,7 @@ "pyqtwebengine", # the above are imported by these viv modules. # so really, we'd want to exclude these submodules of viv. - # but i dont think this works. + # but i don't think this works. "vqt", "vdb.qt", "envi.qt", diff --git a/README.md b/README.md index 9c387cdcb..a50c90a25 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ function @ 0x4011C0 ... ``` -Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capabilty extraction. +Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capability extraction. In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON). Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary: diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index 23bfde4ac..9cc8e2f45 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -65,7 +65,7 @@ def find_thread_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for ch in extractor.get_calls(ph, th): @@ -103,11 +103,11 @@ def find_process_capabilities( process_features: FeatureSet = collections.defaultdict(set) # matches found at the basic threads. - # might be found at different threads, thats ok. + # might be found at different threads, that's ok. thread_matches: MatchResults = collections.defaultdict(list) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for th in extractor.get_threads(ph): diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py index a522a29da..8b213fdb6 100644 --- a/capa/capabilities/static.py +++ b/capa/capabilities/static.py @@ -66,7 +66,7 @@ def find_basic_block_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for insn in extractor.get_instructions(f, bb): @@ -106,11 +106,11 @@ def find_code_capabilities( function_features: FeatureSet = collections.defaultdict(set) # matches found at the basic block scope. - # might be found at different basic blocks, thats ok. + # might be found at different basic blocks, that's ok. bb_matches: MatchResults = collections.defaultdict(list) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for bb in extractor.get_basic_blocks(fh): diff --git a/capa/features/address.py b/capa/features/address.py index e589c0a1c..45c3a600f 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -93,7 +93,7 @@ def __lt__(self, other): class DynamicCallAddress(Address): - """addesses a call in a dynamic execution trace""" + """addresses a call in a dynamic execution trace""" def __init__(self, thread: ThreadAddress, id: int): assert id >= 0 diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index 34f671268..002117fc6 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -75,7 +75,7 @@ class BBHandle: @dataclass class InsnHandle: - """reference to a instruction recognized by a feature extractor. + """reference to an instruction recognized by a feature extractor. Attributes: address: the address of the instruction address. diff --git a/capa/features/extractors/binja/find_binja_api.py b/capa/features/extractors/binja/find_binja_api.py index e97dfc0d1..7412259f2 100644 --- a/capa/features/extractors/binja/find_binja_api.py +++ b/capa/features/extractors/binja/find_binja_api.py @@ -11,7 +11,7 @@ # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try # to find out the path of the binaryninja module that has been installed. -# Note, including the binaryninja module in the `pyintaller.spec` would not work, since the binaryninja module tries to +# Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to # find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the # binaryninja module is extracted by the PyInstaller. code = r""" diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 79db9272d..c90a31b5c 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -46,7 +46,7 @@ class FlexibleModel(BaseModel): # use this type to indicate that we won't model this data. -# because its not relevant to our use in capa. +# because it's not relevant to our use in capa. # # while its nice to have full coverage of the data shape, # it can easily change and break our parsing. @@ -356,8 +356,8 @@ class Behavior(ExactModel): anomaly: List[str] encryptedbuffers: List[EncryptedBuffer] # these are small objects that describe atomic events, - # like file move, registery access. - # we'll detect the same with our API call analyis. + # like file move, registry access. + # we'll detect the same with our API call analysis. enhanced: Skip = None diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 6c99b171a..e43332e63 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -206,7 +206,7 @@ def _parse(self): 15: OS.AROS, 16: OS.FENIXOS, 17: OS.CLOUD, - # 53: "SORTFIX", # i can't find any reference to this OS, i dont think it exists + # 53: "SORTFIX", # i can't find any reference to this OS, i don't think it exists # 64: "ARM_AEABI", # not an OS # 97: "ARM", # not an OS # 255: "STANDALONE", # not an OS diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index e6bee6643..22e0ed6d4 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -260,7 +260,7 @@ def dereference_ptr(insn: ghidra.program.database.code.InstructionDB): if thfunc and thfunc.isThunk(): return handle_thunk(to_deref) else: - # if it doesn't poin to a thunk, it's usually a jmp to a label + # if it doesn't point to a thunk, it's usually a jmp to a label return to_deref if not dat: return to_deref diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index f57ef4bc1..329bc94d0 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -113,7 +113,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if f.vw.metadata["Format"] == "elf": if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running - # this code everytime the call is made, thus preventing the computational overhead. + # this code every time the call is made, thus preventing the computational overhead. try: fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin) except Exception: @@ -598,7 +598,7 @@ def extract_op_number_features( if f.vw.probeMemory(v, 1, envi.memory.MM_READ): # this is a valid address - # assume its not also a constant. + # assume it's not also a constant. return if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP: diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 7f1f319a6..5dfa3fc71 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -382,7 +382,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: address=Address.from_capa(addr), feature=feature_from_capa(feature), ) # type: ignore - # Mypy is unable to recognise `basic_block` as a argument due to alias + # Mypy is unable to recognise `basic_block` as an argument due to alias for feature, addr in extractor.extract_basic_block_features(f, bb) ] @@ -419,7 +419,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: features=tuple(ffeatures), basic_blocks=basic_blocks, ) # type: ignore - # Mypy is unable to recognise `basic_blocks` as a argument due to alias + # Mypy is unable to recognise `basic_blocks` as an argument due to alias ) features = StaticFeatures( @@ -427,7 +427,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: file=tuple(file_features), functions=tuple(function_features), ) # type: ignore - # Mypy is unable to recognise `global_` as a argument due to alias + # Mypy is unable to recognise `global_` as an argument due to alias freeze = Freeze( version=CURRENT_VERSION, @@ -437,7 +437,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: extractor=Extractor(name=extractor.__class__.__name__), features=features, ) # type: ignore - # Mypy is unable to recognise `base_address` as a argument due to alias + # Mypy is unable to recognise `base_address` as an argument due to alias return freeze.model_dump_json() @@ -532,7 +532,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: file=tuple(file_features), processes=tuple(process_features), ) # type: ignore - # Mypy is unable to recognise `global_` as a argument due to alias + # Mypy is unable to recognise `global_` as an argument due to alias # workaround around mypy issue: https://github.com/python/mypy/issues/1424 get_base_addr = getattr(extractor, "get_base_addr", None) @@ -546,7 +546,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: extractor=Extractor(name=extractor.__class__.__name__), features=features, ) # type: ignore - # Mypy is unable to recognise `base_address` as a argument due to alias + # Mypy is unable to recognise `base_address` as an argument due to alias return freeze.model_dump_json() diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index dde5d9cde..b3d01f08c 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -132,7 +132,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.Import): assert isinstance(f.value, str) return ImportFeature(import_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `import_` as a argument due to alias + # Mypy is unable to recognise `import_` as an argument due to alias elif isinstance(f, capa.features.file.Section): assert isinstance(f.value, str) @@ -141,7 +141,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.FunctionName): assert isinstance(f.value, str) return FunctionNameFeature(function_name=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `function_name` as a argument due to alias + # Mypy is unable to recognise `function_name` as an argument due to alias # must come before check for String due to inheritance elif isinstance(f, capa.features.common.Substring): @@ -160,7 +160,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.common.Class): assert isinstance(f.value, str) return ClassFeature(class_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `class_` as a argument due to alias + # Mypy is unable to recognise `class_` as an argument due to alias elif isinstance(f, capa.features.common.Namespace): assert isinstance(f.value, str) @@ -197,12 +197,12 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.insn.OperandNumber): assert isinstance(f.value, int) return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_number` as a argument due to alias + # Mypy is unable to recognise `operand_number` as an argument due to alias elif isinstance(f, capa.features.insn.OperandOffset): assert isinstance(f.value, int) return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_offset` as a argument due to alias + # Mypy is unable to recognise `operand_offset` as an argument due to alias else: raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented") diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index 4bebb68cf..30a5695b7 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -27,7 +27,7 @@ Comments are added at the beginning of matched functions indicating matched capa ### Bookmarks -Bookmarks are added to functions that matched a capabilitiy that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window. +Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index 017beeb57..b93c31a8c 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -764,7 +764,7 @@ def load_features_from_yaml(self, rule_text, update_preview=False): node = self.make_child_node_from_feature(parent, parse_yaml_line(line.strip())) - # append our new node in case its a parent for another node + # append our new node in case it's a parent for another node if node: stack.append(node) diff --git a/capa/main.py b/capa/main.py index 40e697cdf..1bb53801b 100644 --- a/capa/main.py +++ b/capa/main.py @@ -317,7 +317,7 @@ def install_common_args(parser, wanted=None): # Library code should *not* call these functions. # # These main routines may raise `ShouldExitError` to indicate the program -# ...should exit. Its a tiny step away from doing `sys.exit()` directly. +# ...should exit. It's a tiny step away from doing `sys.exit()` directly. # I'm not sure if we should just do that. In the meantime, programs should # handle `ShouldExitError` and pass the status code to `sys.exit()`. # diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index a15507c97..67d0b03ea 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -866,7 +866,7 @@ def rec(statement): # matches a namespace, so take precedence and don't even check rule names. deps.update(r.name for r in namespaces[statement.value]) else: - # not a namespace, assume its a rule name. + # not a namespace, assume it's a rule name. assert isinstance(statement.value, str) deps.add(statement.value) @@ -1216,7 +1216,7 @@ def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Ru """ from the given collection of rules, select a rule and its dependencies (transitively). """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1255,7 +1255,7 @@ def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: raises: InvalidRule: if a dependency is not met. """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1302,7 +1302,7 @@ def topologically_order_rules(rules: List[Rule]) -> List[Rule]: assumes that the rule dependency graph is a DAG. """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1463,7 +1463,7 @@ def rec(rule_name: str, node: Union[Feature, Statement]): # # they're global, so if they match at one location in a file, # they'll match at every location in a file. - # so thats not helpful to decide how to downselect. + # so that's not helpful to decide how to downselect. # # and, a global rule will never be the sole selector in a rule. pass @@ -1533,7 +1533,7 @@ def rec(rule_name: str, node: Union[Feature, Statement]): rec(rule_name, root) # if a rule has a hard feature, - # dont consider it easy, and therefore, + # don't consider it easy, and therefore, # don't index any of its features. # # otherwise, its an easy rule, and index its features diff --git a/doc/release.md b/doc/release.md index 6381a5817..ef740ec41 100644 --- a/doc/release.md +++ b/doc/release.md @@ -1,7 +1,7 @@ # Release checklist - [ ] Ensure all [milestoned issues/PRs](https://github.com/mandiant/capa/milestones) are addressed, or reassign to a new milestone. -- [ ] Add the `dont merge` label to all PRs that are close to be ready to merge (or merge them if they are ready) in [capa](https://github.com/mandiant/capa/pulls) and [capa-rules](https://github.com/mandiant/capa-rules/pulls). +- [ ] Add the `don't merge` label to all PRs that are close to be ready to merge (or merge them if they are ready) in [capa](https://github.com/mandiant/capa/pulls) and [capa-rules](https://github.com/mandiant/capa-rules/pulls). - [ ] Ensure the [CI workflow succeeds in master](https://github.com/mandiant/capa/actions/workflows/tests.yml?query=branch%3Amaster). - [ ] Ensure that `python scripts/lint.py rules/ --thorough` succeeds (only `missing examples` offenses are allowed in the nursery). You can [manually trigger a thorough lint](https://github.com/mandiant/capa-rules/actions/workflows/tests.yml) in CI via the "Run workflow" option. - [ ] Review changes