Skip to content

Commit

Permalink
More resilient cleanup node
Browse files Browse the repository at this point in the history
  • Loading branch information
lferran committed Dec 21, 2023
1 parent 0f2fc26 commit 093514a
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 48 deletions.
118 changes: 71 additions & 47 deletions nucliadb/nucliadb/standalone/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

import asyncio
import os
import tarfile
import tempfile
from collections.abc import AsyncGenerator

import pkg_resources
Expand All @@ -32,51 +35,72 @@


async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
import tarfile
import tempfile

with tempfile.TemporaryDirectory() as tmpdirname:
# Create tar file
tar_file = os.path.join(tmpdirname, "introspect.tar.gz")
with tempfile.TemporaryDirectory() as temp_dir:
tar_file = os.path.join(temp_dir, "introspect.tar.gz")
with tarfile.open(tar_file, mode="w:gz") as tar:
# Add pip dependencies
dependendies_file = os.path.join(tmpdirname, "dependencies.txt")
with open(dependendies_file, "w") as f:
installed_packages = [pkg for pkg in pkg_resources.working_set]
lines = []
for pkg in sorted(installed_packages, key=lambda p: p.key):
lines.append(f"{pkg.key}=={pkg.version}\n")
f.writelines(lines)
tar.add(dependendies_file, arcname="dependencies.txt")

# Add standalone settings
if not hasattr(app, "settings"):
return

settings: Settings = app.settings.copy()
# Remove sensitive data
settings.nua_api_key = None
settings.jwk_key = None
settings.gcs_base64_creds = None
settings.s3_client_secret = None
settings_file = os.path.join(tmpdirname, "settings.json")
with open(settings_file, "w") as f:
f.write(settings.json(indent=4))
tar.add(settings_file, arcname="settings.json")

# Add log files
if settings.log_output_type == "file":
log_settings = LogSettings()
access_log = os.path.realpath(log_settings.access_log)
tar.add(access_log, arcname="logs/access.log")
error_log = os.path.realpath(log_settings.error_log)
tar.add(error_log, arcname="logs/error.log")
info_log = os.path.realpath(log_settings.info_log)
tar.add(info_log, arcname="logs/info.log")

# Stream out tar file
with open(tar_file, "rb") as f:
chunk = f.read(CHUNK_SIZE)
while chunk:
yield chunk
chunk = f.read(CHUNK_SIZE)
await add_dependencies(temp_dir, tar)
if hasattr(app, "settings"):
settings: Settings = app.settings.copy()
await add_settings(temp_dir, tar, settings)
if settings.log_output_type == "file":
await add_logs(tar)

async for chunk in stream_out_tar(tar_file):
yield chunk


async def stream_out_tar(tar_file: str) -> AsyncGenerator[bytes, None]:
loop = asyncio.get_event_loop()
with open(tar_file, "rb") as f:
chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
while chunk:
yield chunk
chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)


async def add_dependencies(temp_dir: str, tar: tarfile.TarFile):
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _add_dependencies_to_tar, temp_dir, tar)


def _add_dependencies_to_tar(temp_dir: str, tar: tarfile.TarFile):
dependendies_file = os.path.join(temp_dir, "dependencies.txt")
with open(dependendies_file, "w") as f:
installed_packages = [pkg for pkg in pkg_resources.working_set]
lines = []
for pkg in sorted(installed_packages, key=lambda p: p.key):
lines.append(f"{pkg.key}=={pkg.version}\n")
f.writelines(lines)
tar.add(dependendies_file, arcname="dependencies.txt")


async def add_settings(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _add_settings_to_tar, temp_dir, tar, settings)


def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
# Remove sensitive data from settings
settings.nua_api_key = None
settings.jwk_key = None
settings.gcs_base64_creds = None
settings.s3_client_secret = None
settings_file = os.path.join(temp_dir, "settings.json")
with open(settings_file, "w") as f:
f.write(settings.json(indent=4))
tar.add(settings_file, arcname="settings.json")


async def add_logs(tar):
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _add_logs_to_tar, tar)


def _add_logs_to_tar(tar: tarfile.TarFile):
log_settings = LogSettings()
access_log = os.path.realpath(log_settings.access_log)
tar.add(access_log, arcname="logs/access.log")
error_log = os.path.realpath(log_settings.error_log)
tar.add(error_log, arcname="logs/error.log")
info_log = os.path.realpath(log_settings.info_log)
tar.add(info_log, arcname="logs/info.log")
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def test_labelsets_service(nucliadb_manager) -> None:
with open(os.path.join(extracted_tar, "dependencies.txt")) as f:
dependencies = f.read()
assert "nucliadb" in dependencies
assert "nucliadb_models" in dependencies
assert "nucliadb-models" in dependencies

# Check settings
assert os.path.exists(os.path.join(extracted_tar, "settings.json"))
Expand Down

0 comments on commit 093514a

Please sign in to comment.