Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ (⚠️ devops) 🗃️ Is922 resource tracking/1. version of regular scraping #4380

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d96b312
migration - creating new table for resource tracking
matusdrobuliak66 Jun 18, 2023
2a298c3
adding new table resource tracking
matusdrobuliak66 Jun 18, 2023
5c5db29
adding needed requerements
matusdrobuliak66 Jun 18, 2023
76867f1
adding service core changes
matusdrobuliak66 Jun 18, 2023
f323544
adding database tests
matusdrobuliak66 Jun 18, 2023
ea7ddf9
adding needed env vars
matusdrobuliak66 Jun 18, 2023
ec1ff30
merge master
matusdrobuliak66 Jun 18, 2023
44a41f1
removing redundant @pytest.mark.testit
matusdrobuliak66 Jun 18, 2023
81703a2
additional minor cleaning
matusdrobuliak66 Jun 18, 2023
7ee22a9
pylint fix
matusdrobuliak66 Jun 18, 2023
bf42426
revision fix
matusdrobuliak66 Jun 18, 2023
364e98e
minor fix
matusdrobuliak66 Jun 18, 2023
77bf46d
removing file that in the end was not used during development
matusdrobuliak66 Jun 19, 2023
e344dc3
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 20, 2023
33a374a
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 21, 2023
c5c4e4b
@sanderegg review
matusdrobuliak66 Jun 21, 2023
5a5a245
@GitHK review
matusdrobuliak66 Jun 21, 2023
6829742
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 21, 2023
910f882
@pcrespov review
matusdrobuliak66 Jun 22, 2023
a0a39ba
Merge branch 'is922-resource-tracking/adding-variables' of github.com…
matusdrobuliak66 Jun 22, 2023
ab1d2b2
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 22, 2023
637b03d
revision fix
matusdrobuliak66 Jun 22, 2023
1a1d98a
adding machine fqdn ENV variable, so we can test it also on master
matusdrobuliak66 Jun 22, 2023
b782cac
fix shared db_async_engine
matusdrobuliak66 Jun 22, 2023
950868d
rm not needed dependency
matusdrobuliak66 Jun 22, 2023
c680e50
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 22, 2023
99dab0a
Merge branch 'master' into is922-resource-tracking/adding-variables
matusdrobuliak66 Jun 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""adding resource tracker container table

Revision ID: 6e91067932f2
Revises: 52cf00912ad9
Create Date: 2023-06-21 14:12:40.292816+00:00

"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "6e91067932f2"
down_revision = "52cf00912ad9"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"resource_tracker_container",
sa.Column("container_id", sa.String(), nullable=False),
sa.Column("image", sa.String(), nullable=False),
sa.Column("user_id", sa.BigInteger(), nullable=False),
sa.Column("product_name", sa.String(), nullable=False),
sa.Column(
"service_settings_reservation_nano_cpus", sa.BigInteger(), nullable=True
),
sa.Column(
"service_settings_reservation_memory_bytes", sa.BigInteger(), nullable=True
),
sa.Column(
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
"service_settings_reservation_additional_info",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
),
sa.Column("container_cpu_usage_seconds_total", sa.Float(), nullable=False),
sa.Column("prometheus_created", sa.DateTime(timezone=True), nullable=False),
sa.Column(
"prometheus_last_scraped", sa.DateTime(timezone=True), nullable=False
),
sa.Column(
"modified",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("resource_tracker_container")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
""" resource_tracker_container table

- Table where we store the resource usage of each container that
we scrape via resource-usage-tracker service
"""

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB

from ._common import column_modified_datetime
from .base import metadata

resource_tracker_container = sa.Table(
"resource_tracker_container",
metadata,
sa.Column(
"container_id",
sa.String,
nullable=False,
doc="Refers to container id scraped via Prometheus",
),
sa.Column(
"image",
sa.String,
nullable=False,
doc="image label scraped via Prometheus (taken from container labels), ex. registry.osparc.io/simcore/services/dynamic/jupyter-smash:3.0.9",
),
sa.Column(
"user_id",
sa.BigInteger,
nullable=False,
doc="user_id label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"product_name",
sa.String,
nullable=False,
doc="product_name label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"service_settings_reservation_nano_cpus",
sa.BigInteger,
nullable=True,
doc="CPU resource allocated to a container, ex.500000000 means that the container is allocated 0.5 CPU shares",
),
sa.Column(
"service_settings_reservation_memory_bytes",
sa.BigInteger,
nullable=True,
doc="memory limit in bytes scraped via Prometheus",
),
sa.Column(
"service_settings_reservation_additional_info",
JSONB,
nullable=False,
doc="storing additional information about the reservation settings",
),
sa.Column("container_cpu_usage_seconds_total", sa.Float, nullable=False),
sa.Column(
"prometheus_created",
sa.DateTime(timezone=True),
nullable=False,
doc="First container creation timestamp (UTC timestamp)",
),
sa.Column(
"prometheus_last_scraped",
sa.DateTime(timezone=True),
nullable=False,
doc="Last prometheus scraped timestamp (UTC timestamp)",
),
column_modified_datetime(timezone=True),
# ---------------------------
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
)
58 changes: 58 additions & 0 deletions packages/service-library/src/servicelib/db_async_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import logging

from fastapi import FastAPI
from settings_library.postgres import PostgresSettings
from simcore_postgres_database.utils_aiosqlalchemy import (
get_pg_engine_stateinfo,
raise_if_migration_not_ready,
)
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from tenacity import retry

from .retry_policies import PostgresRetryPolicyUponInitialization

logger = logging.getLogger(__name__)


@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
async def connect_to_db(app: FastAPI, cfg: PostgresSettings) -> None:
logger.debug("Connecting db ...")
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved

engine: AsyncEngine = create_async_engine(
cfg.dsn_with_async_sqlalchemy,
pool_size=cfg.POSTGRES_MINSIZE,
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
connect_args={
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
},
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
)

logger.debug("Connected to %s", engine.url) # pylint: disable=no-member

logger.debug("Checking db migration...")
try:
await raise_if_migration_not_ready(engine)
except Exception:
# NOTE: engine must be closed because retry will create a new engine
await engine.dispose()
raise

logger.debug("Migration up-to-date")

app.state.engine = engine

logger.debug(
"Setup engine: %s",
await get_pg_engine_stateinfo(engine),
)


async def close_db_connection(app: FastAPI) -> None:
logger.debug("Disconnecting db ...")

if engine := app.state.engine:
await engine.dispose()

logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member
5 changes: 3 additions & 2 deletions services/catalog/src/simcore_service_catalog/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

from fastapi import FastAPI
from models_library.basic_types import BootModeEnum
from servicelib.db_async_engine import close_db_connection, connect_to_db

from ..db.events import close_db_connection, connect_to_db, setup_default_product
from ..db.events import setup_default_product
from ..services.director import close_director, setup_director
from ..services.remote_debug import setup_remote_debugging
from .background_tasks import start_registry_sync_task, stop_registry_sync_task
Expand All @@ -23,7 +24,7 @@ async def start_app() -> None:

# setup connection to pg db
if app.state.settings.CATALOG_POSTGRES:
await connect_to_db(app)
await connect_to_db(app, app.state.settings.CATALOG_POSTGRES)
await setup_default_product(app)

if app.state.settings.CATALOG_DIRECTOR:
Expand Down
53 changes: 0 additions & 53 deletions services/catalog/src/simcore_service_catalog/db/events.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,12 @@
import logging

from fastapi import FastAPI
from servicelib.retry_policies import PostgresRetryPolicyUponInitialization
from settings_library.postgres import PostgresSettings
from simcore_postgres_database.utils_aiosqlalchemy import (
get_pg_engine_stateinfo,
raise_if_migration_not_ready,
)
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
from tenacity import retry

from .repositories.products import ProductsRepository

logger = logging.getLogger(__name__)


@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
async def connect_to_db(app: FastAPI) -> None:
logger.debug("Connecting db ...")
cfg: PostgresSettings = app.state.settings.CATALOG_POSTGRES

engine: AsyncEngine = create_async_engine(
cfg.dsn_with_async_sqlalchemy,
pool_size=cfg.POSTGRES_MINSIZE,
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
connect_args={
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
},
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
)

logger.debug("Connected to %s", engine.url) # pylint: disable=no-member

logger.debug("Checking db migration...")
try:
await raise_if_migration_not_ready(engine)
except Exception:
# NOTE: engine must be closed because retry will create a new engine
await engine.dispose()
raise

logger.debug("Migration up-to-date")

app.state.engine = engine

logger.debug(
"Setup engine: %s",
await get_pg_engine_stateinfo(engine),
)


async def close_db_connection(app: FastAPI) -> None:
logger.debug("Disconnecting db ...")

if engine := app.state.engine:
await engine.dispose()

logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member


async def setup_default_product(app: FastAPI):
repo = ProductsRepository(db_engine=app.state.engine)
app.state.default_product_name = await repo.get_default_product_name()
10 changes: 10 additions & 0 deletions services/docker-compose.devel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,16 @@ services:
- SC_BOOT_MODE=debug-ptvsd
- RESOURCE_USAGE_TRACKER_LOGLEVEL=DEBUG
- DEBUG=true
- LOG_FORMAT_LOCAL_DEV_ENABLED=${LOG_FORMAT_LOCAL_DEV_ENABLED}
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT}
- POSTGRES_HOST=${POSTGRES_HOST}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_USER=${POSTGRES_USER}
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
volumes:
- ./resource-usage-tracker:/devel/services/resource-usage-tracker
- ../packages:/devel/packages
Expand Down
6 changes: 3 additions & 3 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_USER=${POSTGRES_USER}
- PROMETHEUS_URL=${PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${PROMETHEUS_PASSWORD}
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
- RESOURCE_USAGE_TRACKER_LOGLEVEL=${LOG_LEVEL:-INFO}

static-webserver:
Expand Down
8 changes: 8 additions & 0 deletions services/resource-usage-tracker/.env-devel
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
RESOURCE_USAGE_TRACKER_DEV_FEATURES_ENABLED=1
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved

LOG_LEVEL=DEBUG

POSTGRES_USER=test
POSTGRES_PASSWORD=test
POSTGRES_DB=test
POSTGRES_HOST=localhost
1 change: 1 addition & 0 deletions services/resource-usage-tracker/requirements/_base.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# intra-repo required dependencies
--requirement ../../../packages/models-library/requirements/_base.in
--requirement ../../../packages/settings-library/requirements/_base.in
--requirement ../../../packages/postgres-database/requirements/_base.in
# service-library[fastapi]
--requirement ../../../packages/service-library/requirements/_base.in
--requirement ../../../packages/service-library/requirements/_fastapi.in
Expand Down
Loading