Skip to content

Commit

Permalink
✨ (⚠️ devops) 🗃️ Is922 resource tracking/1. version of regular scrapi…
Browse files Browse the repository at this point in the history
…ng (#4380)
  • Loading branch information
matusdrobuliak66 authored Jun 22, 2023
1 parent b537b67 commit c20bf3e
Show file tree
Hide file tree
Showing 41 changed files with 1,578 additions and 277 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""adding resource tracker container table
Revision ID: 6e91067932f2
Revises: 52cf00912ad9
Create Date: 2023-06-21 14:12:40.292816+00:00
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "6e91067932f2"
down_revision = "52cf00912ad9"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"resource_tracker_container",
sa.Column("container_id", sa.String(), nullable=False),
sa.Column("image", sa.String(), nullable=False),
sa.Column("user_id", sa.BigInteger(), nullable=False),
sa.Column("product_name", sa.String(), nullable=False),
sa.Column(
"service_settings_reservation_nano_cpus", sa.BigInteger(), nullable=True
),
sa.Column(
"service_settings_reservation_memory_bytes", sa.BigInteger(), nullable=True
),
sa.Column(
"service_settings_reservation_additional_info",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
),
sa.Column("container_cpu_usage_seconds_total", sa.Float(), nullable=False),
sa.Column("prometheus_created", sa.DateTime(timezone=True), nullable=False),
sa.Column(
"prometheus_last_scraped", sa.DateTime(timezone=True), nullable=False
),
sa.Column(
"modified",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("resource_tracker_container")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
""" resource_tracker_container table
- Table where we store the resource usage of each container that
we scrape via resource-usage-tracker service
"""

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB

from ._common import column_modified_datetime
from .base import metadata

resource_tracker_container = sa.Table(
"resource_tracker_container",
metadata,
sa.Column(
"container_id",
sa.String,
nullable=False,
doc="Refers to container id scraped via Prometheus",
),
sa.Column(
"image",
sa.String,
nullable=False,
doc="image label scraped via Prometheus (taken from container labels), ex. registry.osparc.io/simcore/services/dynamic/jupyter-smash:3.0.9",
),
sa.Column(
"user_id",
sa.BigInteger,
nullable=False,
doc="user_id label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"product_name",
sa.String,
nullable=False,
doc="product_name label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource allocated to a container, ex.500000000 means that the container is allocated 0.5 CPU shares",
),
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"service_settings_reservation_additional_info",
JSONB,
nullable=False,
doc="storing additional information about the reservation settings",
),
sa.Column("container_cpu_usage_seconds_total", sa.Float, nullable=False),
sa.Column(
"prometheus_created",
sa.DateTime(timezone=True),
nullable=False,
doc="First container creation timestamp (UTC timestamp)",
),
sa.Column(
"prometheus_last_scraped",
sa.DateTime(timezone=True),
nullable=False,
doc="Last prometheus scraped timestamp (UTC timestamp)",
),
column_modified_datetime(timezone=True),
# ---------------------------
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
58 changes: 58 additions & 0 deletions packages/service-library/src/servicelib/db_async_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import logging

from fastapi import FastAPI
from settings_library.postgres import PostgresSettings
from simcore_postgres_database.utils_aiosqlalchemy import (
get_pg_engine_stateinfo,
raise_if_migration_not_ready,
)
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from tenacity import retry

from .retry_policies import PostgresRetryPolicyUponInitialization

logger = logging.getLogger(__name__)


@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
async def connect_to_db(app: FastAPI, cfg: PostgresSettings) -> None:
logger.debug("Connecting db ...")

engine: AsyncEngine = create_async_engine(
cfg.dsn_with_async_sqlalchemy,
pool_size=cfg.POSTGRES_MINSIZE,
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
connect_args={
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
},
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
)

logger.debug("Connected to %s", engine.url) # pylint: disable=no-member

logger.debug("Checking db migration...")
try:
await raise_if_migration_not_ready(engine)
except Exception:
# NOTE: engine must be closed because retry will create a new engine
await engine.dispose()
raise

logger.debug("Migration up-to-date")

app.state.engine = engine

logger.debug(
"Setup engine: %s",
await get_pg_engine_stateinfo(engine),
)


async def close_db_connection(app: FastAPI) -> None:
logger.debug("Disconnecting db ...")

if engine := app.state.engine:
await engine.dispose()

logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member
5 changes: 3 additions & 2 deletions services/catalog/src/simcore_service_catalog/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

from fastapi import FastAPI
from models_library.basic_types import BootModeEnum
from servicelib.db_async_engine import close_db_connection, connect_to_db

from ..db.events import close_db_connection, connect_to_db, setup_default_product
from ..db.events import setup_default_product
from ..services.director import close_director, setup_director
from ..services.remote_debug import setup_remote_debugging
from .background_tasks import start_registry_sync_task, stop_registry_sync_task
Expand All @@ -23,7 +24,7 @@ async def start_app() -> None:

# setup connection to pg db
if app.state.settings.CATALOG_POSTGRES:
await connect_to_db(app)
await connect_to_db(app, app.state.settings.CATALOG_POSTGRES)
await setup_default_product(app)

if app.state.settings.CATALOG_DIRECTOR:
Expand Down
53 changes: 0 additions & 53 deletions services/catalog/src/simcore_service_catalog/db/events.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,12 @@
import logging

from fastapi import FastAPI
from servicelib.retry_policies import PostgresRetryPolicyUponInitialization
from settings_library.postgres import PostgresSettings
from simcore_postgres_database.utils_aiosqlalchemy import (
get_pg_engine_stateinfo,
raise_if_migration_not_ready,
)
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from tenacity import retry

from .repositories.products import ProductsRepository

logger = logging.getLogger(__name__)


@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
async def connect_to_db(app: FastAPI) -> None:
logger.debug("Connecting db ...")
cfg: PostgresSettings = app.state.settings.CATALOG_POSTGRES

engine: AsyncEngine = create_async_engine(
cfg.dsn_with_async_sqlalchemy,
pool_size=cfg.POSTGRES_MINSIZE,
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
connect_args={
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
},
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
)

logger.debug("Connected to %s", engine.url) # pylint: disable=no-member

logger.debug("Checking db migration...")
try:
await raise_if_migration_not_ready(engine)
except Exception:
# NOTE: engine must be closed because retry will create a new engine
await engine.dispose()
raise

logger.debug("Migration up-to-date")

app.state.engine = engine

logger.debug(
"Setup engine: %s",
await get_pg_engine_stateinfo(engine),
)


async def close_db_connection(app: FastAPI) -> None:
logger.debug("Disconnecting db ...")

if engine := app.state.engine:
await engine.dispose()

logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member


async def setup_default_product(app: FastAPI):
repo = ProductsRepository(db_engine=app.state.engine)
app.state.default_product_name = await repo.get_default_product_name()
10 changes: 10 additions & 0 deletions services/docker-compose.devel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,16 @@ services:
- SC_BOOT_MODE=debug-ptvsd
- RESOURCE_USAGE_TRACKER_LOGLEVEL=DEBUG
- DEBUG=true
- LOG_FORMAT_LOCAL_DEV_ENABLED=${LOG_FORMAT_LOCAL_DEV_ENABLED}
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT}
- POSTGRES_HOST=${POSTGRES_HOST}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_USER=${POSTGRES_USER}
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
volumes:
- ./resource-usage-tracker:/devel/services/resource-usage-tracker
- ../packages:/devel/packages
Expand Down
6 changes: 3 additions & 3 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_USER=${POSTGRES_USER}
- PROMETHEUS_URL=${PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${PROMETHEUS_PASSWORD}
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
- RESOURCE_USAGE_TRACKER_LOGLEVEL=${LOG_LEVEL:-INFO}

static-webserver:
Expand Down
8 changes: 8 additions & 0 deletions services/resource-usage-tracker/.env-devel
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
RESOURCE_USAGE_TRACKER_DEV_FEATURES_ENABLED=1

LOG_LEVEL=DEBUG

POSTGRES_USER=test
POSTGRES_PASSWORD=test
POSTGRES_DB=test
POSTGRES_HOST=localhost
1 change: 1 addition & 0 deletions services/resource-usage-tracker/requirements/_base.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# intra-repo required dependencies
--requirement ../../../packages/models-library/requirements/_base.in
--requirement ../../../packages/settings-library/requirements/_base.in
--requirement ../../../packages/postgres-database/requirements/_base.in
# service-library[fastapi]
--requirement ../../../packages/service-library/requirements/_base.in
--requirement ../../../packages/service-library/requirements/_fastapi.in
Expand Down
Loading

0 comments on commit c20bf3e

Please sign in to comment.