diff --git a/scripts/install.py b/scripts/install.py index 2d810ca7a..799af4b0f 100755 --- a/scripts/install.py +++ b/scripts/install.py @@ -33,6 +33,7 @@ from malcolm_common import ( AskForString, + BoundPathReplacer, ChooseMultiple, ChooseOne, DetermineYamlFileFormat, @@ -42,8 +43,10 @@ DOCKER_INSTALL_URLS, DotEnvDynamic, DownloadToFile, + DumpYaml, HOMEBREW_INSTALL_URLS, KubernetesDynamic, + LoadYaml, MalcolmCfgRunOnceFile, MalcolmPath, OrchestrationFramework, @@ -75,6 +78,7 @@ MALCOLM_PCAP_DIR, MALCOLM_LOGS_DIR, deep_get, + deep_set, eprint, flatten, LoadFileIfJson, @@ -753,8 +757,6 @@ def tweak_malcolm_runtime(self, malcolm_install_path): ) else: restartMode = 'no' - if restartMode == 'no': - restartMode = '"no"' if malcolmProfile == PROFILE_MALCOLM: nginxSSL = InstallerYesOrNo('Require encrypted HTTPS connections?', default=args.nginxSSL) @@ -2157,360 +2159,196 @@ def tweak_malcolm_runtime(self, malcolm_install_path): # save off owner of original files composeFileStat = os.stat(composeFile) origUid, origGuid = composeFileStat[4], composeFileStat[5] - composeFileHandle = fileinput.FileInput(composeFile, inplace=True, backup=None) try: - sectionIndents = defaultdict(lambda: ' ') - currentSection = None - currentService = None - networkWritten = False - - for line in composeFileHandle: - line = line.rstrip("\n") - skipLine = False - sectionStartLine = False - serviceStartLine = False - - # it would be cleaner to use something like PyYAML to do this, but I want to have as few dependencies - # as possible so we're going to do it janky instead. Also, as of right now pyyaml doesn't preserve - # comments, which is a big deal for this complicated docker-compose file. There is - # https://pypi.org/project/ruamel.yaml to possibly consider if we're comfortable with the dependency. - - # determine which section of the compose file we are in (e.g., services, networks, volumes, etc.) - sectionMatch = re.match(r'^([^\s#]+):\s*(#.*)?$', line) - if sectionMatch is not None: - currentSection = sectionMatch.group(1) - sectionStartLine = True - currentService = None - - # determine indentation for each compose file section (assumes YML file is consistently indented) - if (currentSection is not None) and (currentSection not in sectionIndents): - indentMatch = re.search(r'^(\s+)\S+\s*:\s*$', line) - if indentMatch is not None: - sectionIndents[currentSection] = indentMatch.group(1) - - # determine which service we're currently processing in the YML file - if currentSection == 'services': - serviceMatch = re.search(fr'^{sectionIndents[currentSection]}(\S+)\s*:\s*$', line) - if serviceMatch is not None: - currentService = serviceMatch.group(1).lower() - serviceStartLine = True - - if (currentSection == 'services') and (not serviceStartLine) and (currentService is not None): - # down in the individual services sections of the compose file - - if re.match(r'^\s*restart\s*:.*$', line): - # whether or not to restart services automatically (on boot, etc.) - line = f"{sectionIndents[currentSection] * 2}restart: {restartMode}" - - elif re.match(r'^\s*image\s*:.*$', line): - # use architecture-specific images - imageLineSpit = line.rstrip().split(":") + # load the docker-compose file + data = LoadYaml(composeFile) + + if 'services' in data: + + # stuff for all services + for service in data['services']: + + # whether or not to restart services automatically (on boot, etc.) + deep_set( + data, + ['services', service, 'restart'], + restartMode, + ) + + # use architecture-specific images + image = deep_get( + data, + ['services', service, 'image'], + ) + if image: + imageLineSpit = image.rstrip().split(":") imageLineSpit[-1] = imageLineSpit[-1].split("-", 1)[0] + args.imageArch - line = ":".join(imageLineSpit) - - elif (currentService == 'arkime') or (currentService == 'arkime-live'): - # stuff specifically in the arkime section - if re.match(r'^\s*-.+:/data/pcap(:.+)?\s*$', line): - # Arkime's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - pcapDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'filebeat': - # stuff specifically in the filebeat section - if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): - # set bind IP based on whether it should be externally exposed or not - line = re.sub( - r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', - fr"\g<1>{'0.0.0.0' if filebeatTcpOpen else '127.0.0.1'}:\g<3>", - line, - ) - - elif re.match(r'^\s*-.+:/suricata(:.+)?\s*$', line): - # filebeat's reference to the suricata-logs directory - line = ReplaceBindMountLocation( - line, - suricataLogDir, - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek(:.+)?\s*$', line): - # filebeat's reference to the zeek-logs directory - line = ReplaceBindMountLocation( - line, - zeekLogDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'file-monitor': - # stuff specifically in the file-monitor section - if re.match(r'^\s*-.+:/zeek/extract_files(:.+)?\s*$', line): - # file-monitor's reference to the zeek-logs/extract_files directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'extract_files'), - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek/logs(:.+)?\s*$', line): - # zeek's reference to the zeek-logs/current directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'current'), - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'logstash': - # stuff specifically in the logstash section - if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): - # set bind IP based on whether it should be externally exposed or not - line = re.sub( - r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', - fr"\g<1>{'0.0.0.0' if logstashOpen else '127.0.0.1'}:\g<3>", - line, - ) - - elif currentService == 'opensearch': - # stuff specifically in the opensearch section - if re.match(r'^\s*-.+:/usr/share/opensearch/data(:.+)?\s*$', line): - # OpenSearch indexes directory - line = ReplaceBindMountLocation( - line, - indexDir, - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/opt/opensearch/backup(:.+)?\s*$', line): - # OpenSearch backup directory - line = ReplaceBindMountLocation( - line, - indexSnapshotDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'pcap-capture': - # stuff specifically in the pcap-capture section - if re.match(r'^\s*-.+:/pcap(:.+)?\s*$', line): - # pcap-capture's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - os.path.join(pcapDir, 'upload'), - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'pcap-monitor': - # stuff specifically in the pcap-monitor section - if re.match(r'^\s*-.+:/pcap(:.+)?\s*$', line): - # pcap-monitor's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - pcapDir, - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek(:.+)?\s*$', line): - # pcap-monitor's reference to the zeek-logs directory - line = ReplaceBindMountLocation( - line, - zeekLogDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'suricata': - # stuff specifically in the suricata section - if re.match(r'^\s*-.+:/data/pcap(:.+)?\s*$', line): - # Suricata's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - pcapDir, - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/var/log/suricata(:.+)?\s*$', line): - # suricata's reference to the suricata-logs directory - line = ReplaceBindMountLocation( - line, - suricataLogDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'suricata-live': - # stuff specifically in the suricata-live section - if re.match(r'^\s*-.+:/var/log/suricata(:.+)?\s*$', line): - # suricata-live's reference to the suricata-logs directory - line = ReplaceBindMountLocation( - line, - suricataLogDir, - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'upload': - # stuff specifically in the upload section - if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): - # set bind IP based on whether it should be externally exposed or not - line = re.sub( - r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', - fr"\g<1>{'0.0.0.0' if sftpOpen else '127.0.0.1'}:\g<3>", - line, - ) - - elif re.match(r'^\s*-.+:/var/www/upload/server/php/chroot/files(:.+)?\s*$', line): - # upload's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - os.path.join(pcapDir, 'upload'), - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'zeek': - # stuff specifically in the zeek section - if re.match(r'^\s*-.+:/pcap(:.+)?\s*$', line): - # Zeek's reference to the PCAP directory - line = ReplaceBindMountLocation( - line, - pcapDir, - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek/upload(:.+)?\s*$', line): - # zeek's reference to the zeek-logs/upload directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'upload'), - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek/extract_files(:.+)?\s*$', line): - # zeek's reference to the zeek-logs/extract_files directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'extract_files'), - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'zeek-live': - # stuff specifically in the zeek-live section - if re.match(r'^\s*-.+:/zeek/live(:.+)?\s*$', line): - # zeek-live's reference to the zeek-logs/live directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'live'), - sectionIndents[currentSection] * 3, - ) - - elif re.match(r'^\s*-.+:/zeek/extract_files(:.+)?\s*$', line): - # zeek-lives's reference to the zeek-logs/extract_files directory - line = ReplaceBindMountLocation( - line, - os.path.join(zeekLogDir, 'extract_files'), - sectionIndents[currentSection] * 3, - ) - - elif currentService == 'nginx-proxy': - # stuff specifically in the nginx-proxy section - - if re.match(r'^\s*test\s*:', line): - # set nginx-proxy health check based on whether they're using HTTPS or not - line = re.sub( - r'https?://localhost:\d+', - fr"{'https' if nginxSSL else 'http'}://localhost:443", - line, - ) - - elif re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): - # set bind IPs and ports based on whether it should be externally exposed or not - line = re.sub( - r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', - fr"\g<1>{'0.0.0.0' if nginxSSL and (((not '9200:9200' in line) and (not '5601:5601' in line)) or opensearchOpen) else '127.0.0.1'}:\g<3>", - line, - ) - if nginxSSL is False: - if ':443:' in line: - line = line.replace(':443:', ':80:') - if ':9200:' in line: - line = line.replace(':9200:', ':9201:') - else: - if ':80:' in line: - line = line.replace(':80:', ':443:') - if ':9201:' in line: - line = line.replace(':9201:', ':9200:') - - elif 'traefik.' in line: - # enable/disable/configure traefik labels if applicable - - # Traefik enabled vs. disabled - if 'traefik.enable' in line: - line = re.sub( - r'(#\s*)?(traefik\.enable\s*:\s*)(\S+)', - fr"\g<2>{TrueOrFalseQuote(behindReverseProxy and traefikLabels)}", - line, - ) - else: - line = re.sub( - r'(#\s*)?(traefik\..*)', - fr"{'' if traefikLabels else '# '}\g<2>", - line, - ) - - if 'traefik.http.' in line and '.osmalcolm.' in line: - # OpenSearch router enabled/disabled/host value - line = re.sub( - r'(#\s*)?(traefik\..*)', - fr"{'' if behindReverseProxy and traefikLabels and opensearchOpen else '# '}\g<2>", - line, - ) - if ('.rule') in line: - line = re.sub( - r'(traefik\.http\.routers\.osmalcolm\.rule\s*:\s*)(\S+)', - fr"\g<1>'Host(`{traefikOpenSearchHost}`)'", - line, - ) - - if 'traefik.http.routers.malcolm.rule' in line: - # Malcolm interface router host value - line = re.sub( - r'(traefik\.http\.routers\.malcolm\.rule\s*:\s*)(\S+)', - fr"\g<1>'Host(`{traefikHost}`)'", - line, - ) - - elif 'traefik.http.routers.' in line and '.entrypoints' in line: - # Malcolm routers entrypoints - line = re.sub( - r'(traefik\.[\w\.]+\s*:\s*)(\S+)', - fr"\g<1>'{traefikEntrypoint}'", - line, - ) - - elif 'traefik.http.routers.' in line and '.certresolver' in line: - # Malcolm routers resolvers - line = re.sub( - r'(traefik\.[\w\.]+\s*:\s*)(\S+)', - fr"\g<1>'{traefikResolver}'", - line, - ) - - elif currentSection == 'networks': - # re-write the network definition from scratch - if not sectionStartLine: - if not networkWritten: - print(f"{sectionIndents[currentSection]}default:") - print( - f"{sectionIndents[currentSection] * 2}external: {'true' if (len(dockerNetworkExternalName) > 0) else 'false'}" - ) - if len(dockerNetworkExternalName) > 0: - print(f"{sectionIndents[currentSection] * 2}name: {dockerNetworkExternalName}") - networkWritten = True - # we already re-wrote the network stuff, anything else is superfluous - skipLine = True - - if not skipLine: - print(line) + deep_set(data, ['services', service, 'image'], ":".join(imageLineSpit)) + + # stuff for specific services + boundPathsToAdjust = ( + BoundPathReplacer("arkime", "/data/pcap", pcapDir), + BoundPathReplacer("arkime-live", "/data/pcap", pcapDir), + BoundPathReplacer("filebeat", "/suricata", suricataLogDir), + BoundPathReplacer("filebeat", "/zeek", zeekLogDir), + BoundPathReplacer( + "file-monitor", "/zeek/extract_files", os.path.join(zeekLogDir, 'extract_files') + ), + BoundPathReplacer("file-monitor", "/zeek/logs", os.path.join(zeekLogDir, 'current')), + BoundPathReplacer("opensearch", "/usr/share/opensearch/data", indexDir), + BoundPathReplacer("opensearch", "/opt/opensearch/backup", indexSnapshotDir), + BoundPathReplacer("pcap-capture", "/pcap", os.path.join(pcapDir, 'upload')), + BoundPathReplacer("pcap-monitor", "/pcap", pcapDir), + BoundPathReplacer("pcap-monitor", "/zeek", zeekLogDir), + BoundPathReplacer("suricata", "/data/pcap", pcapDir), + BoundPathReplacer("suricata", "/var/log/suricata", suricataLogDir), + BoundPathReplacer("suricata-live", "/var/log/suricata", suricataLogDir), + BoundPathReplacer( + "upload", "/var/www/upload/server/php/chroot/files", os.path.join(pcapDir, 'upload') + ), + BoundPathReplacer("zeek", "/pcap", pcapDir), + BoundPathReplacer("zeek", "/zeek/upload", os.path.join(zeekLogDir, 'upload')), + BoundPathReplacer("zeek", "/zeek/extract_files", os.path.join(zeekLogDir, 'extract_files')), + BoundPathReplacer("zeek-live", "/zeek/live", os.path.join(zeekLogDir, 'live')), + BoundPathReplacer( + "zeek-live", "/zeek/extract_files", os.path.join(zeekLogDir, 'extract_files') + ), + ) + + # filebeat + if 'filebeat' in data['services']: + # TODO + pass + # if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): + # # set bind IP based on whether it should be externally exposed or not + # line = re.sub( + # r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', + # fr"\g<1>{'0.0.0.0' if filebeatTcpOpen else '127.0.0.1'}:\g<3>", + # line, + # ) + + # logstash + if 'logstash' in data['services']: + # TODO + pass + # if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): + # # set bind IP based on whether it should be externally exposed or not + # line = re.sub( + # r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', + # fr"\g<1>{'0.0.0.0' if logstashOpen else '127.0.0.1'}:\g<3>", + # line, + # ) + + # upload + if 'upload' in data['services']: + # TODO + pass + # if re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): + # # set bind IP based on whether it should be externally exposed or not + # line = re.sub( + # r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', + # fr"\g<1>{'0.0.0.0' if sftpOpen else '127.0.0.1'}:\g<3>", + # line, + # ) + + # nginx-proxy + if 'nginx-proxy' in data['services']: + # TODO + pass + # if re.match(r'^\s*test\s*:', line): + # # set nginx-proxy health check based on whether they're using HTTPS or not + # line = re.sub( + # r'https?://localhost:\d+', + # fr"{'https' if nginxSSL else 'http'}://localhost:443", + # line, + # ) + + # elif re.match(r'^[\s#]*-\s*"([\d\.]+:)?\d+:\d+"\s*$', line): + # # set bind IPs and ports based on whether it should be externally exposed or not + # line = re.sub( + # r'^([\s#]*-\s*")([\d\.]+:)?(\d+:\d+"\s*)$', + # fr"\g<1>{'0.0.0.0' if nginxSSL and (((not '9200:9200' in line) and (not '5601:5601' in line)) or opensearchOpen) else '127.0.0.1'}:\g<3>", + # line, + # ) + # if nginxSSL is False: + # if ':443:' in line: + # line = line.replace(':443:', ':80:') + # if ':9200:' in line: + # line = line.replace(':9200:', ':9201:') + # else: + # if ':80:' in line: + # line = line.replace(':80:', ':443:') + # if ':9201:' in line: + # line = line.replace(':9201:', ':9200:') + + # elif 'traefik.' in line: + # # enable/disable/configure traefik labels if applicable + + # # Traefik enabled vs. disabled + # if 'traefik.enable' in line: + # line = re.sub( + # r'(#\s*)?(traefik\.enable\s*:\s*)(\S+)', + # fr"\g<2>{TrueOrFalseQuote(behindReverseProxy and traefikLabels)}", + # line, + # ) + # else: + # line = re.sub( + # r'(#\s*)?(traefik\..*)', + # fr"{'' if traefikLabels else '# '}\g<2>", + # line, + # ) + + # if 'traefik.http.' in line and '.osmalcolm.' in line: + # # OpenSearch router enabled/disabled/host value + # line = re.sub( + # r'(#\s*)?(traefik\..*)', + # fr"{'' if behindReverseProxy and traefikLabels and opensearchOpen else '# '}\g<2>", + # line, + # ) + # if ('.rule') in line: + # line = re.sub( + # r'(traefik\.http\.routers\.osmalcolm\.rule\s*:\s*)(\S+)', + # fr"\g<1>'Host(`{traefikOpenSearchHost}`)'", + # line, + # ) + + # if 'traefik.http.routers.malcolm.rule' in line: + # # Malcolm interface router host value + # line = re.sub( + # r'(traefik\.http\.routers\.malcolm\.rule\s*:\s*)(\S+)', + # fr"\g<1>'Host(`{traefikHost}`)'", + # line, + # ) + + # elif 'traefik.http.routers.' in line and '.entrypoints' in line: + # # Malcolm routers entrypoints + # line = re.sub( + # r'(traefik\.[\w\.]+\s*:\s*)(\S+)', + # fr"\g<1>'{traefikEntrypoint}'", + # line, + # ) + + # elif 'traefik.http.routers.' in line and '.certresolver' in line: + # # Malcolm routers resolvers + # line = re.sub( + # r'(traefik\.[\w\.]+\s*:\s*)(\S+)', + # fr"\g<1>'{traefikResolver}'", + # line, + # ) + + # re-write the network definition from scratch + if 'networks' in data: + del data['networks'] + networkDef = {'external': True if (len(dockerNetworkExternalName) > 0) else False} + if len(dockerNetworkExternalName) > 0: + networkDef['name']: dockerNetworkExternalName + data['networks'] = {} + data['networks']['default'] = networkDef + + # write the docker-compose file back out + DumpYaml(data, composeFile) finally: - composeFileHandle.close() # restore ownership os.chown(composeFile, origUid, origGuid) diff --git a/scripts/malcolm_common.py b/scripts/malcolm_common.py index 61e23aa20..40fb6b290 100644 --- a/scripts/malcolm_common.py +++ b/scripts/malcolm_common.py @@ -63,6 +63,15 @@ PLATFORM_LINUX_FEDORA = 'fedora' PLATFORM_LINUX_UBUNTU = 'ubuntu' +################################################################################################### +YAML_VERSION = (1, 1) + + +class NullRepresenter: + def __call__(self, repr, data): + ret_val = repr.represent_scalar(u'tag:yaml.org,2002:null', u'') + return ret_val + def DialogInit(): global Dialog @@ -98,6 +107,12 @@ class UserInterfaceMode(IntFlag): rename=False, ) +BoundPathReplacer = namedtuple( + "BoundPathReplacer", + ["service", "container_dir", "host_dir"], + rename=False, +) + # URLS for figuring things out if something goes wrong DOCKER_INSTALL_URLS = defaultdict(lambda: 'https://docs.docker.com/install/') DOCKER_INSTALL_URLS[PLATFORM_WINDOWS] = [ @@ -129,6 +144,7 @@ class OrchestrationFramework(Flag): ################################################################################################## def ReplaceBindMountLocation(line, location, linePrefix): + # TODO: switch to ruamel if os.path.isdir(location): volumeParts = line.strip().lstrip('-').lstrip().split(':') volumeParts[0] = location @@ -699,6 +715,53 @@ def DetermineYamlFileFormat(inputFileName): return result +################################################################################################### +def LoadYaml(inputFileName): + result = None + if inputFileName and os.path.isfile(inputFileName): + if yamlImported := YAMLDynamic(): + with open(inputFileName, 'r') as f: + inYaml = yamlImported.YAML(typ='rt') + inYaml.boolean_representation = ['false', 'true'] + inYaml.emitter.alt_null = None + inYaml.preserve_quotes = True + inYaml.representer.ignore_aliases = lambda *args: True + inYaml.width = 4096 + result = inYaml.load(f) + return result + + +################################################################################################### +def PopLine(fileName, count=1): + result = [] + with open(fileName, 'r+') as f: + for i in range(0, count): + result.append(f.readline()) + data = f.read() + f.seek(0) + f.write(data) + f.truncate() + return result if (len(result) != 1) else result[0] + + +################################################################################################### +def DumpYaml(data, outputFileName): + if data is not None: + if yamlImported := YAMLDynamic(): + with open(outputFileName, 'w') as outfile: + outYaml = yamlImported.YAML(typ='rt') + outYaml.boolean_representation = ['false', 'true'] + outYaml.preserve_quotes = False + outYaml.representer.ignore_aliases = lambda *args: True + outYaml.representer.add_representer(type(None), NullRepresenter()) + outYaml.version = YAML_VERSION + outYaml.width = 4096 + outYaml.dump(data, outfile) + # ruamel puts the YAML version header (2 lines) at the top, which docker-compose + # doesn't like, so we need to remove it + PopLine(outputFileName, 2) + + ################################################################################################### # download to file def DownloadToFile(url, local_filename, debug=False):