Skip to content

Commit

Permalink
Merge branch 'master' into paulway_improve_topological_sort
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulWay authored Feb 6, 2023
2 parents 2574f19 + da20d33 commit 65dec4a
Show file tree
Hide file tree
Showing 353 changed files with 1,600 additions and 1,197 deletions.
12 changes: 10 additions & 2 deletions docs/api_index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ insights.core.dr
:members:
:exclude-members: requires, optional, metadata, group, tags

insights.core.exceptions
------------------------

.. automodule:: insights.core.exceptions
:members:
:show-inheritance:
:undoc-members:

insights.core.filters
---------------------

Expand Down Expand Up @@ -74,8 +82,8 @@ insights.parsers
----------------

.. automodule:: insights.parsers
:members: ParseException, SkipException, calc_offset, get_active_lines,
keyword_search, optlist_to_dict, parse_delimited_table,
:members: calc_offset, get_active_lines, keyword_search,
optlist_to_dict, parse_delimited_table,
parse_fixed_table, split_kv_pairs, unsplit_lines
:show-inheritance:
:undoc-members:
Expand Down
8 changes: 3 additions & 5 deletions docs/exception_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ any exceptions in the data (“dirty parser”). This allows rules that don’t
exceptions to rely on only the first parser, and those rules will not run if valid data
is not present. If the dirty parser identifies errors in the data then it will save
information regarding the errors for use by rules. If no errors are found in the data
then the dirty parser will raise :py:class:`insights.core.exceptions.SkipException`
then the dirty parser will raise :py:class:`insights.core.exceptions.SkipComponent`
to indicate to the engine that it should be removed from the dependency hierarchy.

Other Exceptions from Parsers
Expand All @@ -99,15 +99,13 @@ types aren’t important and such checks may limit expressiveness and flexibilit
Parsers should not use the assert statement in place of error handling code.
Asserts are for debugging purposes only.

SkipComponent and SkipException
===============================
SkipComponent
=============

Any component may raise `SkipComponent` to signal to the engine that
nothing is wrong but that the component should be taken out of dependency
resolution. This is useful if a component's dependencies are met but it's
still unable to produce a meaningful result.
:py:class:`insights.core.exceptions.SkipException` is a specialization of this for the
dirty parser use case above, but it's treated the same as `SkipComponent`.

Exception Recognition by the Insights Engine
============================================
Expand Down
3 changes: 3 additions & 0 deletions docs/shared_parsers_catalog/blacklisted.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. automodule:: insights.parsers.blacklisted
:members:
:show-inheritance:
15 changes: 10 additions & 5 deletions insights/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@
from insights.core import (CommandParser, ContainerParser, FileListing, IniConfigFile, JSONParser, LegacyItemAccess, # noqa: F401
LogFileOutput, Parser, Scannable, SysconfigOptions, Syslog, XMLParser, YAMLParser, dr, # noqa: F401
taglang)
from insights.core.archives import COMPRESSION_TYPES, InvalidArchive, InvalidContentType, extract
from insights.core.archives import COMPRESSION_TYPES, extract
from insights.core.context import (ClusterArchiveContext, ExecutionContext, HostContext, # noqa: F401
HostArchiveContext, SerializedArchiveContext)
from insights.core.exceptions import SkipComponent # noqa: F401
from insights.core.exceptions import InvalidArchive, InvalidContentType, SkipComponent # noqa: F401
from insights.core.filters import add_filter, apply_filters, get_filters # noqa: F401
from insights.core.hydration import create_context, initialize_broker # noqa: F401
from insights.core.plugins import (combiner, condition, datasource, fact, incident, make_fail, make_fingerprint, # noqa: F401
Expand Down Expand Up @@ -266,9 +266,8 @@ def _load_context(path):
return dr.get_component(path)


def run(component=None, root=None, print_summary=False,
context=None, inventory=None, print_component=None):

def run(component=None, root=None, print_summary=False, context=None, inventory=None, print_component=None,
store_skips=False):
args = None
formatters = None

Expand All @@ -293,6 +292,8 @@ def run(component=None, root=None, print_summary=False,
p.add_argument("--context", help="Execution Context. Defaults to HostContext if an archive isn't passed.")
p.add_argument("--no-load-default", help="Don't load the default plugins.", action="store_true")
p.add_argument("--parallel", help="Execute rules in parallel.", action="store_true")
p.add_argument("--show-skips", help="Capture skips in the broker for troubleshooting.", action="store_true",
default=False)
p.add_argument("--tags", help="Expression to select rules by tag.")

class Args(object):
Expand Down Expand Up @@ -385,6 +386,10 @@ class Args(object):
graph = dr.COMPONENTS[dr.GROUPS.single]

broker = dr.Broker()
if args:
broker.store_skips = args.show_skips
else:
broker.store_skips = store_skips

if args and args.bare:
ctx = ExecutionContext() # dummy context that no spec depend on. needed for filters to work
Expand Down
59 changes: 44 additions & 15 deletions insights/client/apps/malware_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
from glob import glob
from datetime import datetime
from tempfile import NamedTemporaryFile
from tempfile import NamedTemporaryFile, gettempdir
try:
# python 2
from urllib import quote as urlencode
Expand All @@ -21,7 +21,8 @@
from insights.client.utilities import (
generate_machine_id, write_data_to_file, get_time
)
from insights.util.subproc import call, CalledProcessError
from insights.core.exceptions import CalledProcessError
from insights.util.subproc import call

logger = logging.getLogger(__name__)
MIN_YARA_VERSION = "4.1.0"
Expand Down Expand Up @@ -186,6 +187,7 @@ def __init__(self, insights_config):
self.add_metadata = self._get_config_option('add_metadata', False)

self.matches = 0
self.potential_matches = 0

def run(self):
# Start the scans and record the time they were started
Expand All @@ -201,7 +203,11 @@ def run(self):

# Write a message to user informing them if there were matches or not and what to do next
if self.matches == 0:
logger.info("No rule matches found.\n")
if self.potential_matches == 0:
logger.info("No rule matches found.\n")
else:
logger.info("Rule matches potentially found but problems encountered parsing them, so no match data to upload.")
logger.info("Please contact support.\n")
else:
logger.info("Found %d rule match%s.", self.matches, 'es' if self.matches > 1 else '')
if not self.test_scan:
Expand Down Expand Up @@ -604,11 +610,12 @@ def _get_rules(self):
# However it can happen that the rules file isn't removed for some reason, so remove any existing
# rules files before beginning a new scan, otherwise they may show up as matches in the scan results.
old_rules_files = sum([glob(os.path.join(path, rules))
for path in ('/tmp', '/var/tmp')
for path in ('/tmp', '/var/tmp', '/usr/tmp', gettempdir())
for rules in ('.tmpmdsigs*', 'tmp_malware-detection-client_rules.*')], [])
for old_rules_file in old_rules_files:
logger.debug("Removing old rules file %s", old_rules_file)
os.remove(old_rules_file)
if os.path.exists(old_rules_file):
logger.debug("Removing old rules file %s", old_rules_file)
os.remove(old_rules_file)

self.rules_location = self._get_config_option('rules_location', '')

Expand Down Expand Up @@ -741,8 +748,16 @@ def scan_filesystem(self):
return False

# Exclude the rules file and insights-client log files, unless they are things we specifically want to scan
if self.rules_file not in self.scan_fsobjects:
self.filesystem_scan_exclude_list.append(self.rules_file)
# Get a list of potential rules files locations,eg /tmp, /var/tmp, /usr/tmp and gettempdir()
# eg customers may have /tmp linked to /var/tmp so both must be checked for excluding the downloaded rules
rules_file_name = os.path.basename(self.rules_file)
potential_tmp_dirs = set([gettempdir(), '/tmp', '/var/tmp', '/usr/tmp'])
potential_rules_files = set(list(map(lambda d: os.path.join(d, rules_file_name), potential_tmp_dirs)) + [self.rules_file])
rules_files = list(filter(lambda f: os.path.isfile(f), potential_rules_files))
for rules_file in rules_files:
if rules_file not in self.scan_fsobjects:
self.filesystem_scan_exclude_list.append(rules_file)
logger.debug("Excluding rules file: %s", rules_file)
insights_log_files = glob(constants.default_log_file + '*')
self.filesystem_scan_exclude_list.extend(list(set(insights_log_files) - set(self.scan_fsobjects)))

Expand Down Expand Up @@ -795,7 +810,12 @@ def scan_filesystem(self):
logger.debug("Unable to scan %s: %s", toplevel_dir, cpe.output.strip())
continue

self.parse_scan_output(output.strip())
try:
self.parse_scan_output(output.strip())
except Exception as e:
self.potential_matches += 1
logger.exception("Rule match(es) potentially found in %s but problems encountered parsing the results: %s. Skipping ...",
toplevel_dir, str(e))

dir_scan_end = time.time()
logger.info("Scan time for %s: %d seconds", toplevel_dir, (dir_scan_end - dir_scan_start))
Expand Down Expand Up @@ -862,7 +882,12 @@ def scan_processes(self):
logger.debug("Unable to scan process %s: %s", scan_pid, cpe.output.strip())
continue

self.parse_scan_output(output)
try:
self.parse_scan_output(output)
except Exception as e:
self.potential_matches += 1
logger.exception("Rule match(es) potentially found in process %s but problems encountered parsing the results: %s. Skipping ...",
scan_pid, str(e))

pid_scan_end = time.time()
logger.info("Scan time for process %s: %d seconds", scan_pid, (pid_scan_end - pid_scan_start))
Expand Down Expand Up @@ -969,11 +994,15 @@ def skip_string_data_lines(string_data_lines):
rule_match['matches'] = [rule_match_dict]

if self.add_metadata:
# Add extra data to each rule match, beyond what yara provides
# Eg, for files: line numbers & context, checksums; for processes: process name
# TODO: find more pythonic ways of doing this stuff instead of using system commands
metadata_func = self._add_file_metadata if source_type == 'file' else self._add_process_metadata
metadata_func(rule_match['matches'])
try:
# Add extra data to each rule match, beyond what yara provides
# Eg, for files: line numbers & context, checksums; for processes: process name
# TODO: find more pythonic ways of doing this stuff instead of using system commands
metadata_func = self._add_file_metadata if source_type == 'file' else self._add_process_metadata
metadata_func(rule_match['matches'])
except Exception as e:
logger.error("Error adding metadata to rule match %s in %s %s: %s. Skipping ...",
rule_name, source_type, source, str(e))

self.matches += 1
logger.info("Matched rule %s in %s %s", rule_name, source_type, source)
Expand Down
11 changes: 11 additions & 0 deletions insights/client/data_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from subprocess import Popen, PIPE, STDOUT
from tempfile import NamedTemporaryFile

from insights.core.blacklist import BLACKLISTED_SPECS
from insights.util import mangle
from ..contrib.soscleaner import SOSCleaner
from .utilities import _expand_paths, get_version_info, systemd_notify_init_thread, get_tags
Expand Down Expand Up @@ -132,6 +133,10 @@ def _write_blacklist_report(self, blacklist_report):
self.archive.add_metadata_to_archive(
json.dumps(blacklist_report), '/blacklist_report')

if BLACKLISTED_SPECS:
self.archive.add_metadata_to_archive(
json.dumps({"specs": BLACKLISTED_SPECS}), '/blacklisted_specs.txt')

def _write_egg_release(self):
logger.debug("Writing egg release to archive...")
egg_release = ''
Expand Down Expand Up @@ -327,11 +332,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
'insights_commands', mangle.mangle_command(c['command']))
if c['command'] in rm_commands or c.get('symbolic_name') in rm_commands:
logger.warn("WARNING: Skipping command %s", c['command'])
BLACKLISTED_SPECS.append(c['symbolic_name'])
elif self.mountpoint == "/" or c.get("image"):
cmd_specs = self._parse_command_spec(c, conf['pre_commands'])
for s in cmd_specs:
if s['command'] in rm_commands:
logger.warn("WARNING: Skipping command %s", s['command'])
BLACKLISTED_SPECS.append(s['symbolic_name'])
continue
cmd_spec = InsightsCommand(self.config, s, self.mountpoint)
self.archive.add_to_archive(cmd_spec)
Expand All @@ -343,12 +350,14 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
for f in conf['files']:
if f['file'] in rm_files or f.get('symbolic_name') in rm_files:
logger.warn("WARNING: Skipping file %s", f['file'])
BLACKLISTED_SPECS.append(f['symbolic_name'])
else:
file_specs = self._parse_file_spec(f)
for s in file_specs:
# filter files post-wildcard parsing
if s['file'] in rm_conf.get('files', []):
logger.warn("WARNING: Skipping file %s", s['file'])
BLACKLISTED_SPECS.append(s['symbolic_name'])
else:
file_spec = InsightsFile(s, self.mountpoint)
self.archive.add_to_archive(file_spec)
Expand All @@ -361,11 +370,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
if g.get('symbolic_name') in rm_files:
# ignore glob via symbolic name
logger.warn("WARNING: Skipping file %s", g['glob'])
BLACKLISTED_SPECS.append(g['symbolic_name'])
else:
glob_specs = self._parse_glob_spec(g)
for g in glob_specs:
if g['file'] in rm_files:
logger.warn("WARNING: Skipping file %s", g['file'])
BLACKLISTED_SPECS.append(g['symbolic_name'])
else:
glob_spec = InsightsFile(g, self.mountpoint)
self.archive.add_to_archive(glob_spec)
Expand Down
54 changes: 49 additions & 5 deletions insights/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""
from __future__ import print_function
import argparse
import json
import logging
import os
import sys
Expand All @@ -17,11 +18,13 @@

from datetime import datetime

from insights import apply_configs, apply_default_enabled, dr, get_pool
from insights.core import blacklist, filters
from insights import apply_configs, apply_default_enabled, get_pool
from insights.core import blacklist, dr, filters
from insights.core.blacklist import BLACKLISTED_SPECS
from insights.core.exceptions import CalledProcessError
from insights.core.serde import Hydration
from insights.util import fs
from insights.util.subproc import call, CalledProcessError
from insights.util.subproc import call

SAFE_ENV = {
"PATH": os.path.pathsep.join([
Expand Down Expand Up @@ -203,8 +206,8 @@
- name: insights.components.virtualization.IsBareMetal
enabled: true
# needed for the 'pre-check' of the 'ss' spec
- name: insights.parsers.lsmod
# needed for the 'pre-check' of the 'ss' spec and the 'modinfo_filtered_modules' spec
- name: insights.parsers.lsmod.LsMod
enabled: true
# needed for the 'pre-check' of the 'is_satellite_server' spec
Expand Down Expand Up @@ -401,6 +404,7 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No
log.warning('WARNING: Unknown component in blacklist: %s' % component)
else:
dr.set_enabled(component, enabled=False)
BLACKLISTED_SPECS.append(component.split('.')[-1])
log.warning('WARNING: Skipping component: %s', component)

to_persist = get_to_persist(client.get("persist", set()))
Expand Down Expand Up @@ -437,6 +441,11 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No
broker.add_observer(h.make_persister(to_persist))
dr.run_all(broker=broker, pool=pool)

if BLACKLISTED_SPECS:
_write_out_blacklisted_specs(output_path)
# Delete the list so the specs aren't written again by the client.
del BLACKLISTED_SPECS[:]

collect_errors = _parse_broker_exceptions(broker, EXCEPTIONS_TO_REPORT)

if compress:
Expand Down Expand Up @@ -472,6 +481,41 @@ def _parse_broker_exceptions(broker, exceptions_to_report):
return errors


def _write_out_blacklisted_specs(output_path):
"""
Write out the blacklisted specs to blacklisted_specs.txt, and create
a meta-data file for this file. That way it can be loaded when the
archive is processed.
Args:
output_path (str): Path of the output directory.
"""
if os.path.exists(os.path.join(output_path, "meta_data")):
output_path_root = os.path.join(output_path, "data")
else:
output_path_root = output_path

with open(os.path.join(output_path_root, "blacklisted_specs.txt"), "w") as of:
json.dump({"specs": BLACKLISTED_SPECS}, of)

doc = {
"name": "insights.specs.Specs.blacklisted_specs",
"exec_time": 0.0,
"errors": [],
"results": {
"type": "insights.core.spec_factory.DatasourceProvider",
"object": {
"relative_path": "blacklisted_specs.txt"
}
},
"ser_time": 0.0
}

meta_path = os.path.join(os.path.join(output_path, "meta_data"), "insights.specs.Specs.blacklisted_specs")
with open(meta_path, "w") as of:
json.dump(doc, of)


def main():
# Remove command line args so that they are not parsed by any called modules
# The main fxn is only invoked as a cli, if calling from another cli then
Expand Down
Loading

0 comments on commit 65dec4a

Please sign in to comment.