Skip to content

Commit

Permalink
fixes to filebeat's clean-processed-folder
Browse files Browse the repository at this point in the history
  • Loading branch information
mmguero committed May 21, 2024
1 parent dd840ee commit e148cc6
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 19 deletions.
25 changes: 12 additions & 13 deletions filebeat/scripts/clean-processed-folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import fcntl
import magic
import json
import pprint
import re
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, DEVNULL
from malcolm_utils import LoadFileIfJson, deep_get

lockFilename = os.path.join(gettempdir(), '{}.lock'.format(os.path.basename(__file__)))
cleanLogSeconds = int(os.getenv('LOG_CLEANUP_MINUTES', "30")) * 60
Expand Down Expand Up @@ -53,22 +53,21 @@ def checkFile(filename, filebeatReg=None, checkLogs=True, checkArchives=True):
if fileStatInfo:
fileFound = any(
(
(entry['FileStateOS'])
and (entry['FileStateOS']['device'] == fileStatInfo.st_dev)
and (entry['FileStateOS']['inode'] == fileStatInfo.st_ino)
(deep_get(entry, ['v', 'FileStateOS', 'device']) == fileStatInfo.st_dev)
and (deep_get(entry, ['v', 'FileStateOS', 'inode']) == fileStatInfo.st_ino)
)
for entry in filebeatReg
)
if fileFound:
# found a file in the filebeat registry, so leave it alone!
# we only want to delete files that filebeat has forgotten
# print "{} is found in registry!".format(filename)
# print(f"{filename} is found in registry!")
return
# else:
# print "{} is NOT found in registry!".format(filename)
# print(f"{filename} is NOT found in registry!")

# now see if the file is in use by any other process in the system
fuserProcess = Popen(["fuser", "-s", filename], stdout=PIPE)
fuserProcess = Popen(["fuser", "-s", filename], stdout=PIPE, stderr=DEVNULL)
fuserProcess.communicate()
fuserExitCode = fuserProcess.wait()
if fuserExitCode != 0:
Expand All @@ -88,15 +87,15 @@ def checkFile(filename, filebeatReg=None, checkLogs=True, checkArchives=True):

if (cleanSeconds > 0) and (lastUseTime >= cleanSeconds):
# this is a closed file that is old, so delete it
print('removing old file "{}" ({}, used {} seconds ago)'.format(filename, fileType, lastUseTime))
print(f'removing old file "{filename}" ({fileType}, used {lastUseTime} seconds ago)')
silentRemove(filename)

except FileNotFoundError:
# file's already gone, oh well
pass

except Exception as e:
print("{} for '{}': {}".format(type(e).__name__, filename, e))
print(f"{type(e).__name__} for '{filename}': {e}")


def pruneFiles():
Expand All @@ -120,7 +119,7 @@ def pruneFiles():
fbReg = None
if os.path.isfile(fbRegFilename):
with open(fbRegFilename) as f:
fbReg = json.load(f)
fbReg = LoadFileIfJson(f, attemptLines=True)

# see if the files we found are in use and old enough to be pruned
for file in zeekFoundFiles:
Expand All @@ -132,7 +131,7 @@ def pruneFiles():
for current in os.listdir(zeekCurrentDir):
currentFileSpec = os.path.join(zeekCurrentDir, current)
if os.path.islink(currentFileSpec) and not os.path.exists(currentFileSpec):
print('removing dead symlink "{}"'.format(currentFileSpec))
print(f'removing dead symlink "{currentFileSpec}"')
silentRemove(currentFileSpec)

# clean up any old and empty directories in Zeek processed/ directory
Expand All @@ -150,7 +149,7 @@ def pruneFiles():
if dirAge >= cleanDirSeconds:
try:
os.rmdir(dirToRm)
print('removed empty directory "{}" (used {} seconds ago)'.format(dirToRm, dirAge))
print(f'removed empty directory "{dirToRm}" (used {dirAge} seconds ago)')
except OSError:
pass

Expand Down
31 changes: 25 additions & 6 deletions scripts/malcolm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,12 +379,31 @@ def LoadStrIfJson(jsonStr):

###################################################################################################
# attempt to decode a file (given by handle) as JSON, returning the object if it decodes and
# None otherwise
def LoadFileIfJson(fileHandle):
try:
return json.load(fileHandle)
except ValueError:
return None
# None otherwise. Also, if attemptLines=True, attempt to handle cases of a file containing
# individual lines of valid JSON.
def LoadFileIfJson(fileHandle, attemptLines=False):
if fileHandle is not None:

try:
result = json.load(fileHandle)
except ValueError:
result = None

if (result is None) and attemptLines:
fileHandle.seek(0)
result = []
for line in fileHandle:
try:
result.append(json.loads(line))
except ValueError:
pass
if not result:
result = None

else:
result = None

return result


###################################################################################################
Expand Down

0 comments on commit e148cc6

Please sign in to comment.