Skip to content

Commit

Permalink
Moved preprocess object to attribute container and clean up log2timel…
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jul 16, 2016
1 parent 921cb68 commit 1a10c9a
Show file tree
Hide file tree
Showing 18 changed files with 165 additions and 885 deletions.
1 change: 1 addition & 0 deletions plaso/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from plaso.containers import errors
from plaso.containers import event_sources
from plaso.containers import events
from plaso.containers import preprocess
from plaso.containers import reports
from plaso.containers import sessions
from plaso.containers import tasks
72 changes: 72 additions & 0 deletions plaso/containers/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# -*- coding: utf-8 -*-
"""The attribute container object definitions."""

from plaso.containers import interface
from plaso.containers import manager


class PreprocessObject(interface.AttributeContainer):
"""Object used to store all information gained from preprocessing.
Attributes:
collection_information (dict[str, object]): collection information.
zone (str): time zone.
"""
CONTAINER_TYPE = u'preprocess'

def __init__(self):
"""Initializes the preprocess object."""
super(PreprocessObject, self).__init__()
self._user_mappings = None
self.collection_information = {}
self.zone = u'UTC'

def GetPathAttributes(self):
"""Retrieves the path attributes.
Returns:
dict[str, str]]: path attributes e.g. {'SystemRoot': 'C:\\Windows'}
"""
# TODO: improve this only return known enviroment variables.
return self.__dict__

def GetUserMappings(self):
"""Retrieves mappings of user identifiers to usernames.
Returns:
dict[str, str]: mapping of SIDs or UIDs to usernames
"""
if self._user_mappings is None:
self._user_mappings = {}

if self._user_mappings:
return self._user_mappings

for user in getattr(self, u'users', []):
if u'sid' in user:
user_id = user.get(u'sid', u'')
elif u'uid' in user:
user_id = user.get(u'uid', u'')
else:
user_id = u''

if user_id:
self._user_mappings[user_id] = user.get(u'name', user_id)

return self._user_mappings

def GetUsernameById(self, user_identifier):
"""Returns a username for a specific user identifier.
Args:
user_identifier (str): user identifier, either a SID or UID.
Returns:
str: user name if available, otherwise '-'.
"""
user_mappings = self.GetUserMappings()

return user_mappings.get(user_identifier, u'-')


manager.AttributeContainersManager.RegisterAttributeContainer(PreprocessObject)
10 changes: 5 additions & 5 deletions plaso/engine/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
codepage of the source data.
"""

from plaso.lib import event
from plaso.containers import preprocess
from plaso.lib import py2to3

import pytz # pylint: disable=wrong-import-order
Expand All @@ -32,7 +32,7 @@ def __init__(self, pre_obj=None):
if pre_obj:
self._pre_obj = pre_obj
else:
self._pre_obj = event.PreprocessObject()
self._pre_obj = preprocess.PreprocessObject()

self._default_codepage = u'cp1252'
self._default_timezone = pytz.timezone(u'UTC')
Expand Down Expand Up @@ -66,8 +66,8 @@ def platform(self, value):

@property
def timezone(self):
"""The timezone object."""
return getattr(self._pre_obj, u'zone', self._default_timezone)
"""datetime.tzinfo: timezone."""
return self._default_timezone

@property
def users(self):
Expand Down Expand Up @@ -228,7 +228,7 @@ def SetDefaultTimezone(self, timezone):
"""Sets the default timezone.
Args:
timezone: the default timezone.
timezone (datetime.tzinfo): default timezone.
"""
# TODO: check if value is sane.
self._default_timezone = timezone
Expand Down
38 changes: 14 additions & 24 deletions plaso/frontend/extraction_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@

from plaso import hashers # pylint: disable=unused-import
from plaso import parsers # pylint: disable=unused-import
from plaso.containers import preprocess
from plaso.containers import sessions
from plaso.engine import single_process
from plaso.engine import utils as engine_utils
from plaso.frontend import frontend
from plaso.lib import definitions
from plaso.lib import errors
from plaso.lib import event
from plaso.multi_processing import engine as multi_process_engine
from plaso.hashers import manager as hashers_manager
from plaso.parsers import manager as parsers_manager
Expand Down Expand Up @@ -232,7 +232,7 @@ def _PreprocessSources(self, source_path_specs, source_type):
except IOError as exception:
logging.error(u'Unable to preprocess with error: {0:s}'.format(
exception))
return event.PreprocessObject()
return preprocess.PreprocessObject()

logging.debug(u'Preprocessing done.')

Expand All @@ -241,7 +241,7 @@ def _PreprocessSources(self, source_path_specs, source_type):
preprocess_object = getattr(self._engine.knowledge_base, u'_pre_obj', None)

if not preprocess_object:
preprocess_object = event.PreprocessObject()
preprocess_object = preprocess.PreprocessObject()

return preprocess_object

Expand Down Expand Up @@ -277,40 +277,30 @@ def _PreprocessSetCollectionInformation(self, preprocess_object):

preprocess_object.collection_information = collection_information

def _PreprocessSetTimezone(self, preprocess_object, timezone=pytz.UTC):
"""Sets the timezone as part of the preprocessing.
def _SetDefaultTimezone(self, preprocess_object, timezone=pytz.UTC):
"""Sets the default timezone.
Args:
preprocess_object: a preprocess object (instance of PreprocessObject).
timezone: optional preferred timezone.
preprocess_object (PreprocessObject): preprocess object.
timezone (Optional[datetime.tzinfo]): timezone.
"""
if not timezone:
timezone = pytz.UTC

default_timezone = timezone
if hasattr(preprocess_object, u'time_zone_str'):
logging.info(u'Setting timezone to: {0:s}'.format(
preprocess_object.time_zone_str))

try:
preprocess_object.zone = pytz.timezone(preprocess_object.time_zone_str)
default_timezone = pytz.timezone(preprocess_object.time_zone_str)

except pytz.UnknownTimeZoneError:
if not timezone:
logging.warning(u'timezone was not properly set, defaulting to UTC')
timezone = pytz.UTC
else:
logging.warning((
u'Unable to automatically configure timezone falling back '
u'to preferred timezone value: {0:s}').format(timezone))
preprocess_object.zone = timezone

else:
# TODO: shouldn't the user to be able to always override the timezone
# detection? Or do we need an input sanitization function.
preprocess_object.zone = timezone
logging.warning(
u'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
preprocess_object.time_zone_str, timezone.zone))

if not getattr(preprocess_object, u'zone', None):
preprocess_object.zone = timezone
self._engine.knowledge_base.SetDefaultTimezone(default_timezone)

def DisableProfiling(self):
"""Disabled profiling."""
Expand Down Expand Up @@ -480,7 +470,7 @@ def ProcessSources(
hasher_names_string=hasher_names_string):
self._hasher_names.append(hasher_name)

self._PreprocessSetTimezone(preprocess_object, timezone=timezone)
self._SetDefaultTimezone(preprocess_object, timezone=timezone)

if filter_file:
path_attributes = self._engine.knowledge_base.GetPathAttributes()
Expand Down
16 changes: 2 additions & 14 deletions plaso/frontend/psort.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ def _ProcessStorage(
output_module (OutputModule): output module.
storage_file (StorageFile): storage file.
analysis_plugins (list[AnalysisPlugin]): analysis plugins that should
be run.
be run.
event_queue_producers (list[ItemQueueProducer]): event queue producers.
command_line_arguments (Optional[str]): command line arguments.
deduplicate_events (Optional[bool]): True if events should be
deduplicated.
deduplicated.
preferred_encoding (Optional[str]): preferred encoding.
time_slice (Optional[TimeSlice]): slice of time to output.
use_time_slicer (Optional[bool]): True if the 'time slicer' should be
Expand Down Expand Up @@ -241,17 +241,6 @@ def _ProcessStorage(
session_completion = session.CreateSessionCompletion()
storage_file.WriteSessionCompletion(session_completion)

for information in storage_file.GetStorageInformation():
if getattr(information, u'counter', None):
total = information.counter.get(u'total')
if total:
counter[u'Stored Events'] += total

if self._filter_object and not counter[u'Limited By']:
counter[u'Filter By Date'] = (
counter[u'Stored Events'] - counter[u'Events Included'] -
counter[u'Events Filtered Out'])

return counter

def _SetAnalysisPluginProcessInformation(
Expand All @@ -273,7 +262,6 @@ def _SetAnalysisPluginProcessInformation(
pre_obj.collection_information[u'method'] = u'Running Analysis Plugins'
pre_obj.collection_information[u'plugins'] = analysis_plugin_names
pre_obj.collection_information[u'time_of_run'] = time_of_run
pre_obj.counter = collections.Counter()

# TODO: fix docstring, function does not create the pre_obj the call to
# storage does. Likely refactor this functionality into the storage API.
Expand Down
120 changes: 0 additions & 120 deletions plaso/lib/event.py

This file was deleted.

Loading

0 comments on commit 1a10c9a

Please sign in to comment.