diff --git a/worker.py b/worker.py index 5c839ea..19fedce 100644 --- a/worker.py +++ b/worker.py @@ -189,136 +189,144 @@ def get_device_group_info(nebula_connection_object, device_group_to_get_info): if __name__ == "__main__": - # read config file and config envvars at startup, order preference is envvar>config file>default value (if exists) - if os.path.exists("config/conf.json"): - print("reading config file") - auth_file = json.load(open("config/conf.json")) - else: - print("config file not found - skipping reading it and checking if needed params are given from envvars") - auth_file = {} - print("reading config variables") - nebula_manager_auth_user = get_conf_setting("nebula_manager_auth_user", auth_file, None) - nebula_manager_auth_password = get_conf_setting("nebula_manager_auth_password", auth_file, None) - nebula_manager_host = get_conf_setting("nebula_manager_host", auth_file, "127.0.0.1") - nebula_manager_port = int(get_conf_setting("nebula_manager_port", auth_file, "80")) - nebula_manager_protocol = get_conf_setting("nebula_manager_protocol", auth_file, "http") - nebula_manager_request_timeout = int(get_conf_setting("nebula_manager_request_timeout", auth_file, "60")) - nebula_manager_check_in_time = int(get_conf_setting("nebula_manager_check_in_time", auth_file, "30")) - registry_auth_user = get_conf_setting("registry_auth_user", auth_file, None) - registry_auth_password = get_conf_setting("registry_auth_password", auth_file, None) - registry_host = get_conf_setting("registry_host", auth_file, "https://index.docker.io/v1/") - max_restart_wait_in_seconds = int(get_conf_setting("max_restart_wait_in_seconds", auth_file, 0)) - device_group = get_conf_setting("device_group", auth_file) - - # get number of cpu cores on host - cpu_cores = get_number_of_cpu_cores() - - # work against docker socket - docker_socket = DockerFunctions() - - # ensure default "nebula" named network exists - docker_socket.create_docker_network("nebula", "bridge") - - # login to the docker registry - if no registry login details are configured will just print a message stating that - docker_socket.registry_login(registry_host=registry_host, registry_user=registry_auth_user, - registry_pass=registry_auth_password) - - # login to the nebula manager - nebula_connection = Nebula(username=nebula_manager_auth_user, password=nebula_manager_auth_password, - host=nebula_manager_host, port=nebula_manager_port, protocol=nebula_manager_protocol, - request_timeout=nebula_manager_request_timeout) - - # make sure the nebula manager connects properly try: - print("checking nebula manager connection") - api_check = nebula_connection.check_api() - if api_check["status_code"] == 200 and api_check["reply"]["api_available"] is True: - print("nebula manager connection ok") + # read config file/envvars at startup, order preference is envvar>config file>default value (if exists) + if os.path.exists("config/conf.json"): + print("reading config file") + auth_file = json.load(open("config/conf.json")) else: - print("nebula manager initial connection check failure, dropping container") - except Exception as e: - print >> sys.stderr, e - print("error confirming connection to nebula manager - please check connection & authentication params and " - "that the manager is online") - os._exit(2) - - # stop all nebula managed containers on start to ensure a clean slate to work on - print("stopping all preexisting nebula manager containers in order to ensure a clean slate on boot") - stop_containers({"app_name": ""}) + print("config file not found - skipping reading it and checking if needed params are given from envvars") + auth_file = {} + print("reading config variables") + nebula_manager_auth_user = get_conf_setting("nebula_manager_auth_user", auth_file, None) + nebula_manager_auth_password = get_conf_setting("nebula_manager_auth_password", auth_file, None) + nebula_manager_host = get_conf_setting("nebula_manager_host", auth_file, "127.0.0.1") + nebula_manager_port = int(get_conf_setting("nebula_manager_port", auth_file, "80")) + nebula_manager_protocol = get_conf_setting("nebula_manager_protocol", auth_file, "http") + nebula_manager_request_timeout = int(get_conf_setting("nebula_manager_request_timeout", auth_file, "60")) + nebula_manager_check_in_time = int(get_conf_setting("nebula_manager_check_in_time", auth_file, "30")) + registry_auth_user = get_conf_setting("registry_auth_user", auth_file, None) + registry_auth_password = get_conf_setting("registry_auth_password", auth_file, None) + registry_host = get_conf_setting("registry_host", auth_file, "https://index.docker.io/v1/") + max_restart_wait_in_seconds = int(get_conf_setting("max_restart_wait_in_seconds", auth_file, 0)) + device_group = get_conf_setting("device_group", auth_file) + + # get number of cpu cores on host + cpu_cores = get_number_of_cpu_cores() + + # work against docker socket + docker_socket = DockerFunctions() + + # ensure default "nebula" named network exists + docker_socket.create_docker_network("nebula", "bridge") + + # login to the docker registry - if no registry login details are configured will just print a message stating + # that + docker_socket.registry_login(registry_host=registry_host, registry_user=registry_auth_user, + registry_pass=registry_auth_password) + + # login to the nebula manager + nebula_connection = Nebula(username=nebula_manager_auth_user, password=nebula_manager_auth_password, + host=nebula_manager_host, port=nebula_manager_port, protocol=nebula_manager_protocol, + request_timeout=nebula_manager_request_timeout) + + # make sure the nebula manager connects properly + try: + print("checking nebula manager connection") + api_check = nebula_connection.check_api() + if api_check["status_code"] == 200 and api_check["reply"]["api_available"] is True: + print("nebula manager connection ok") + else: + print("nebula manager initial connection check failure, dropping container") + os._exit(2) + except Exception as e: + print >> sys.stderr, e + print("error confirming connection to nebula manager - please check connection & authentication params and " + "that the manager is online") + os._exit(2) + + # stop all nebula managed containers on start to ensure a clean slate to work on + print("stopping all preexisting nebula manager containers in order to ensure a clean slate on boot") + stop_containers({"app_name": ""}) + + # get the initial device_group configuration and store it in memory + local_device_group_info = get_device_group_info(nebula_connection, device_group) - # get the initial device_group configuration and store it in memory - local_device_group_info = get_device_group_info(nebula_connection, device_group) + # make sure the device_group exists in the nebula cluster + while local_device_group_info["status_code"] == 403 and \ + local_device_group_info["reply"]["device_group_exists"] is False: + print("device_group " + device_group + " doesn't exist in nebula cluster, waiting for it to be created") + local_device_group_info = get_device_group_info(nebula_connection, device_group) + time.sleep(nebula_manager_check_in_time) + + # start all apps that are set to running on boot + for nebula_app in local_device_group_info["reply"]["apps"]: + if nebula_app["running"] is True: + print("initial start of " + nebula_app["app_name"] + " app") + start_containers(nebula_app) + print("completed initial start of " + nebula_app["app_name"] + " app") + + # open a thread which is in charge of restarting any containers which healthcheck shows them as unhealthy + print("starting work container health checking thread") + Thread(target=restart_unhealthy_containers).start() + + # loop forever + print("starting device_group " + device_group + " /info check loop, configured to check for changes every " + + str(nebula_manager_check_in_time) + " seconds") + while True: - # make sure the device_group exists in the nebula cluster - while local_device_group_info["status_code"] == 403 and \ - local_device_group_info["reply"]["device_group_exists"] is False: - print("device_group " + device_group + " doesn't exist in nebula cluster, waiting for it to be created") - local_device_group_info = get_device_group_info(nebula_connection, device_group) - time.sleep(nebula_manager_check_in_time) - - # start all apps that are set to running on boot - for nebula_app in local_device_group_info["reply"]["apps"]: - if nebula_app["running"] is True: - print("initial start of " + nebula_app["app_name"] + " app") - start_containers(nebula_app) - print("completed initial start of " + nebula_app["app_name"] + " app") - - # open a thread which is in charge of restarting any containers which healthcheck shows them as unhealthy - print("starting work container health checking thread") - Thread(target=restart_unhealthy_containers).start() - - # loop forever - print("starting device_group " + device_group + " /info check loop, configured to check for changes every " - + str(nebula_manager_check_in_time) + " seconds") - while True: - - # wait the configurable time before checking the device_group info page again - time.sleep(nebula_manager_check_in_time) - - monotonic_id_increase = False - - # get the device_group configuration - remote_device_group_info = get_device_group_info(nebula_connection, device_group) - - # logic that checks if the each app_id was increased and updates the app containers if the answer is yes - # the logic also starts containers of newly added apps to the device_group - for remote_nebula_app in remote_device_group_info["reply"]["apps"]: - if remote_nebula_app["app_name"] in local_device_group_info["reply"]["apps_list"]: - local_app_index = local_device_group_info["reply"]["apps_list"].index(remote_nebula_app["app_name"]) - if remote_nebula_app["app_id"] > local_device_group_info["reply"]["apps"][local_app_index]["app_id"]: + # wait the configurable time before checking the device_group info page again + time.sleep(nebula_manager_check_in_time) + + monotonic_id_increase = False + + # get the device_group configuration + remote_device_group_info = get_device_group_info(nebula_connection, device_group) + + # logic that checks if the each app_id was increased and updates the app containers if the answer is yes + # the logic also starts containers of newly added apps to the device_group + for remote_nebula_app in remote_device_group_info["reply"]["apps"]: + if remote_nebula_app["app_name"] in local_device_group_info["reply"]["apps_list"]: + local_app_index = local_device_group_info["reply"]["apps_list"].index(remote_nebula_app["app_name"]) + if remote_nebula_app["app_id"] > local_device_group_info["reply"]["apps"][local_app_index]["app_id"]: + monotonic_id_increase = True + if remote_nebula_app["running"] is False: + print("stopping app " + remote_nebula_app["app_name"] + + " do to changes in the app configuration") + stop_containers(remote_nebula_app) + elif remote_nebula_app["rolling_restart"] is True and \ + local_device_group_info["reply"]["apps"][local_app_index]["running"] is True: + print("rolling app " + remote_nebula_app["app_name"] + + " do to changes in the app configuration") + roll_containers(remote_nebula_app) + else: + print("restarting app " + remote_nebula_app["app_name"] + + " do to changes in the app configuration") + restart_containers(remote_nebula_app) + else: + print("restarting app " + remote_nebula_app["app_name"] + " do to changes in the app configuration") monotonic_id_increase = True - if remote_nebula_app["running"] is False: - print("stopping app " + remote_nebula_app["app_name"] + - " do to changes in the app configuration") - stop_containers(remote_nebula_app) - elif remote_nebula_app["rolling_restart"] is True and \ - local_device_group_info["reply"]["apps"][local_app_index]["running"] is True: - print("rolling app " + remote_nebula_app["app_name"] + - " do to changes in the app configuration") - roll_containers(remote_nebula_app) - else: - print("restarting app " + remote_nebula_app["app_name"] + + restart_containers(remote_nebula_app) + + # logic that removes containers of apps that was removed from the device_group + if remote_device_group_info["reply"]["device_group_id"] > local_device_group_info["reply"]["device_group_id"]: + monotonic_id_increase = True + for local_nebula_app in local_device_group_info["reply"]["apps"]: + if local_nebula_app["app_name"] not in remote_device_group_info["reply"]["apps_list"]: + print("removing app " + local_nebula_app["app_name"] + " do to changes in the app configuration") - restart_containers(remote_nebula_app) - else: - print("restarting app " + remote_nebula_app["app_name"] + " do to changes in the app configuration") + stop_containers(local_nebula_app) + + # logic that runs image pruning if prune_id increased + if remote_device_group_info["reply"]["prune_id"] > local_device_group_info["reply"]["prune_id"]: + print("pruning images do to changes in the app configuration") monotonic_id_increase = True - restart_containers(remote_nebula_app) - - # logic that removes containers of apps that was removed from the device_group - if remote_device_group_info["reply"]["device_group_id"] > local_device_group_info["reply"]["device_group_id"]: - monotonic_id_increase = True - for local_nebula_app in local_device_group_info["reply"]["apps"]: - if local_nebula_app["app_name"] not in remote_device_group_info["reply"]["apps_list"]: - print("removing app " + local_nebula_app["app_name"] + " do to changes in the app configuration") - stop_containers(local_nebula_app) - - # logic that runs image pruning if prune_id increased - if remote_device_group_info["reply"]["prune_id"] > local_device_group_info["reply"]["prune_id"]: - print("pruning images do to changes in the app configuration") - monotonic_id_increase = True - prune_images() - - # set the in memory device_group info to be the one recently received if any id increased - if monotonic_id_increase is True: - local_device_group_info = remote_device_group_info + prune_images() + + # set the in memory device_group info to be the one recently received if any id increased + if monotonic_id_increase is True: + local_device_group_info = remote_device_group_info + except Exception as e: + print >> sys.stderr, e + print("failed main loop - exiting") + os._exit(2)