Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

viv: insn: string: handle viv bug around substrings #1273

Merged
merged 6 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
- render: fix verbose rendering of scopes #1263 @williballenthin
- rules: better detect invalid rules #1282 @williballenthin
- show-features: better render strings with embedded whitespace #1267 @williballenthin
- handle vivisect bug around strings at instruction level, use min length 4 #1271 @williballenthin @mr-tz
- extractor: guard against invalid "calls from" features #1177 @mr-tz

### capa explorer IDA Pro plugin
Expand Down
16 changes: 12 additions & 4 deletions capa/features/extractors/viv/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,12 @@ def read_string(vw, offset: int) -> str:
pass
else:
if alen > 0:
return read_memory(vw, offset, alen).decode("utf-8")
buf = read_memory(vw, offset, alen)
if b"\x00" in buf:
# account for bug #1271.
# remove when vivisect is fixed.
buf = buf.partition(b"\x00")[0]
Comment on lines +285 to +288
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should really never be the case, but viv is broken

return buf.decode("utf-8")

try:
ulen = vw.detectUnicode(offset)
Expand All @@ -300,7 +305,9 @@ def read_string(vw, offset: int) -> str:
# vivisect seems to mis-detect the end unicode strings
# off by two, too short
ulen += 2
return read_memory(vw, offset, ulen).decode("utf-16")
# partition to account for bug #1271.
# remove when vivisect is fixed.
return read_memory(vw, offset, ulen).decode("utf-16").partition("\x00")[0]
Comment on lines +308 to +310
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're not really able to guess at the correct string length. this has an edge case where we have UTF16 | NULL | junk and when we try to decode as UTF16 then it fails due to the junk. we'll fail to extract some valid strings in this case, which is unfortunate.


raise ValueError("not a string", offset)

Expand Down Expand Up @@ -665,11 +672,12 @@ def extract_op_string_features(

for v in derefs(f.vw, v):
try:
s = read_string(f.vw, v)
s = read_string(f.vw, v).rstrip("\x00")
except ValueError:
continue
else:
yield String(s.rstrip("\x00")), ih.address
if len(s) > 4:
yield String(s), ih.address


def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
Expand Down
7 changes: 7 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ def fixup_viv(path, extractor):
if "3b13b" in path:
# vivisect only recognizes calling thunk function at 0x10001573
extractor.vw.makeFunction(0x10006860)
if "294b8d" in path:
# see vivisect/#561
extractor.vw.makeFunction(0x404970)


@lru_cache(maxsize=1)
Expand Down Expand Up @@ -277,6 +280,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_")
elif name.startswith("bf7a9c"):
return os.path.join(CD, "data", "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_")
elif name.startswith("294b8d"):
return os.path.join(CD, "data", "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_")
else:
raise ValueError("unexpected sample fixture: %s" % name)

Expand Down Expand Up @@ -627,6 +632,8 @@ def parametrize(params, values, **kwargs):
("mimikatz", "function=0x40105D", capa.features.common.String("ACR > "), True),
("mimikatz", "function=0x40105D", capa.features.common.String("nope"), False),
("773290...", "function=0x140001140", capa.features.common.String(r"%s:\\OfficePackagesForWDAG"), True),
# overlapping string, see #1271
("294b8d...", "function=0x404970,bb=0x404970,insn=0x40499F", capa.features.common.String("\r\n\x00:ht"), False),
# insn/regex
("pma16-01", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True),
("pma16-01", "function=0x402F40", capa.features.common.Regex("www.practicalmalwareanalysis.com"), True),
Expand Down