forked from pypi/warehouse
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Event-based Malware check (pypi#7249)
* requirements: Introduce yara * [WIP] malware/check: SetupPatternCheck In progress. Introduces SetupPatternCheck, an implementation of an event-based check that scans the `setup.py`s of release files for suspicious patterns. * malware/checks: Give MalwareCheckBase.run/scan args, kwargs * malware: Add check preparation Fiddle with the check/run signature a bit more. * malware/checks: Unpack file path correctly * docker-compose: Override FILES_BACKEND for worker The worker needs to be able to see the "files" virtual host during development so that malware checks can fetch their underlying release files. * [WIP] malware/checks: setup.py extraction * malware/checks: setup_patterns: Fix enum, seek * malware/checks: setup_patterns: Apply YARA rules Each rule match becomes a verdict. * malware/checks: setup_patterns: Prefer get over filter * warehouse/{admin,malware}: Consistent enum names Also enforce uniqueness for enum values. * warehouse/{admin,malware}: More enum changes * tests: Update admin, malware tests * tests: Fix enum, more test fixes * tests: Add prepare tests * malware/changes: base: Unpack id correctly * tests: Begin adding SetupPatternCheck tests * malware/checks: setup_patterns: Fix enum * tests: More SetupPatternCheck tests * warehouse/malware: setup_patterns: Fix enums * tests: More SetupPatternCheck tests * tests: Add license header * malware/checks: setup_patterns: Add TODO * tests: More SetupPatternCheck tests * tests: More SetupPatternCheck tests * tests: Complete extraction tests for SetupPatternCheck * tests: Fix test * malware/checks: Add docstring for prepare * malware/checks: blacken * malware/checks: Document, expand YARA rules * tests, warehouse: Restructure utilities * malware: Order some enums, reduce SetupPatternCheck verdicts * malware/models: Add missing __lt__ * malware/checks: Always embed the model object in the prepared arguments Use it instead of performing a DB request in the check itself. * malware/checks: Avoid raw bytes * malware/changes: Remove unused import * tests: Fixup malware tests * warehouse/malware: blacken * tests: Fill in malware coverage * tests, warehouse: Add a benign verdict for SetupPatternCheck * tests: blacken
- Loading branch information
Showing
24 changed files
with
863 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,5 +55,6 @@ typeguard | |
webauthn | ||
whitenoise | ||
WTForms>=2.0.0 | ||
yara-python | ||
zope.sqlalchemy | ||
zxcvbn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pretend | ||
import pytest | ||
import yara | ||
|
||
from warehouse.malware.checks.setup_patterns import check as c | ||
from warehouse.malware.models import ( | ||
MalwareCheckState, | ||
VerdictClassification, | ||
VerdictConfidence, | ||
) | ||
|
||
from .....common.db.malware import MalwareCheckFactory | ||
from .....common.db.packaging import FileFactory | ||
|
||
|
||
def test_initializes(db_session): | ||
check_model = MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
|
||
assert check.id == check_model.id | ||
assert isinstance(check._yara_rules, yara.Rules) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("obj", "file_url"), [(None, pretend.stub()), (pretend.stub(), None)] | ||
) | ||
def test_scan_missing_kwargs(db_session, obj, file_url): | ||
MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
check.scan(obj=obj, file_url=file_url) | ||
|
||
assert check._verdicts == [] | ||
|
||
|
||
def test_scan_non_sdist(db_session): | ||
MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
|
||
file = FileFactory.create(packagetype="bdist_wheel") | ||
|
||
check.scan(obj=file, file_url=pretend.stub()) | ||
|
||
assert check._verdicts == [] | ||
|
||
|
||
def test_scan_no_setup_contents(db_session, monkeypatch): | ||
monkeypatch.setattr( | ||
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub()) | ||
) | ||
monkeypatch.setattr( | ||
c, "extract_file_content", pretend.call_recorder(lambda *a: None) | ||
) | ||
|
||
MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
|
||
file = FileFactory.create(packagetype="sdist") | ||
|
||
check.scan(obj=file, file_url=pretend.stub()) | ||
|
||
assert len(check._verdicts) == 1 | ||
assert check._verdicts[0].check_id == check.id | ||
assert check._verdicts[0].file_id == file.id | ||
assert check._verdicts[0].classification == VerdictClassification.Indeterminate | ||
assert check._verdicts[0].confidence == VerdictConfidence.High | ||
assert ( | ||
check._verdicts[0].message | ||
== "sdist does not contain a suitable setup.py for analysis" | ||
) | ||
|
||
|
||
def test_scan_benign_contents(db_session, monkeypatch): | ||
monkeypatch.setattr( | ||
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub()) | ||
) | ||
monkeypatch.setattr( | ||
c, | ||
"extract_file_content", | ||
pretend.call_recorder(lambda *a: b"this is a benign string"), | ||
) | ||
|
||
MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
|
||
file = FileFactory.create(packagetype="sdist") | ||
|
||
check.scan(obj=file, file_url=pretend.stub()) | ||
|
||
assert len(check._verdicts) == 1 | ||
assert check._verdicts[0].check_id == check.id | ||
assert check._verdicts[0].file_id == file.id | ||
assert check._verdicts[0].classification == VerdictClassification.Benign | ||
assert check._verdicts[0].confidence == VerdictConfidence.Low | ||
assert check._verdicts[0].message == "No malicious patterns found in setup.py" | ||
|
||
|
||
def test_scan_matched_content(db_session, monkeypatch): | ||
monkeypatch.setattr( | ||
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub()) | ||
) | ||
monkeypatch.setattr( | ||
c, | ||
"extract_file_content", | ||
pretend.call_recorder( | ||
lambda *a: b"this looks suspicious: os.system('cat /etc/passwd')" | ||
), | ||
) | ||
|
||
MalwareCheckFactory.create( | ||
name="SetupPatternCheck", state=MalwareCheckState.Enabled | ||
) | ||
check = c.SetupPatternCheck(db_session) | ||
|
||
file = FileFactory.create(packagetype="sdist") | ||
|
||
check.scan(obj=file, file_url=pretend.stub()) | ||
|
||
assert len(check._verdicts) == 1 | ||
assert check._verdicts[0].check_id == check.id | ||
assert check._verdicts[0].file_id == file.id | ||
assert check._verdicts[0].classification == VerdictClassification.Threat | ||
assert check._verdicts[0].confidence == VerdictConfidence.High | ||
assert check._verdicts[0].message == "process_spawn_in_setup" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import io | ||
import tarfile | ||
import zipfile | ||
|
||
import pretend | ||
|
||
from warehouse.malware.checks import utils | ||
|
||
|
||
def test_fetch_url_content(monkeypatch): | ||
response = pretend.stub( | ||
raise_for_status=pretend.call_recorder(lambda: None), content=b"fake content" | ||
) | ||
requests = pretend.stub(get=pretend.call_recorder(lambda url: response)) | ||
|
||
monkeypatch.setattr(utils, "requests", requests) | ||
|
||
io = utils.fetch_url_content("hxxp://fake_url.com") | ||
|
||
assert requests.get.calls == [pretend.call("hxxp://fake_url.com")] | ||
assert response.raise_for_status.calls == [pretend.call()] | ||
assert io.getvalue() == b"fake content" | ||
|
||
|
||
def test_extract_file_contents_zip(): | ||
zipbuf = io.BytesIO() | ||
with zipfile.ZipFile(zipbuf, mode="w") as zipobj: | ||
zipobj.writestr("toplevelgetsskipped", b"nothing to see here") | ||
zipobj.writestr("foo/setup.py", b"these are some contents") | ||
zipbuf.seek(0) | ||
|
||
assert utils.extract_file_content(zipbuf, "setup.py") == b"these are some contents" | ||
|
||
|
||
def test_extract_file_contents_zip_no_file(): | ||
zipbuf = io.BytesIO() | ||
with zipfile.ZipFile(zipbuf, mode="w") as zipobj: | ||
zipobj.writestr("foo/notsetup.py", b"these are some contents") | ||
zipbuf.seek(0) | ||
|
||
assert utils.extract_file_content(zipbuf, "setup.py") is None | ||
|
||
|
||
def test_extract_file_contents_tar(): | ||
tarbuf = io.BytesIO() | ||
with tarfile.open(fileobj=tarbuf, mode="w:gz") as tarobj: | ||
contents = io.BytesIO(b"these are some contents") | ||
member = tarfile.TarInfo(name="foo/setup.py") | ||
member.size = len(contents.getbuffer()) | ||
tarobj.addfile(member, fileobj=contents) | ||
|
||
contents = io.BytesIO(b"nothing to see here") | ||
member = tarfile.TarInfo(name="toplevelgetsskipped") | ||
member.size = len(contents.getbuffer()) | ||
tarobj.addfile(member, fileobj=contents) | ||
tarbuf.seek(0) | ||
|
||
assert utils.extract_file_content(tarbuf, "setup.py") == b"these are some contents" | ||
|
||
|
||
def test_extract_file_contents_tar_empty(): | ||
tarbuf = io.BytesIO(b"invalid tar contents") | ||
|
||
assert utils.extract_file_content(tarbuf, "setup.py") is None | ||
|
||
|
||
def test_extract_file_contents_tar_no_file(): | ||
tarbuf = io.BytesIO() | ||
with tarfile.open(fileobj=tarbuf, mode="w:gz") as tarobj: | ||
contents = io.BytesIO(b"these are some contents") | ||
member = tarfile.TarInfo(name="foo/notsetup.py") | ||
member.size = len(contents.getbuffer()) | ||
tarobj.addfile(member, fileobj=contents) | ||
|
||
contents = io.BytesIO(b"nothing to see here") | ||
member = tarfile.TarInfo(name="toplevelgetsskipped") | ||
member.size = len(contents.getbuffer()) | ||
tarobj.addfile(member, fileobj=contents) | ||
tarbuf.seek(0) | ||
|
||
assert utils.extract_file_content(tarbuf, "setup.py") is None |
Oops, something went wrong.