diff --git a/.gitignore b/.gitignore index d1f05ac..083bbe4 100644 --- a/.gitignore +++ b/.gitignore @@ -573,5 +573,6 @@ cython_debug/ duetector-dbcollector.sqlite3* dev-tools/duetector-dbcollector.sqlite3* dev-tools/config.toml +dev-tools/duetector_server* docs/usercases/tracking-mljob-in-kata-containers/cifar-10-batches-py/* docs/usercases/tracking-mljob-in-kata-containers/cifar-10-python.tar.gz diff --git a/README.md b/README.md index d97d75c..2dfd6c0 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,10 @@ docker pull dataucon/duetector:v0.0.1a ## 快速开始 +> 更多文档和例子可以在[这里](./docs/)找到。 + +### 启动探测器 + 使用命令行启动monitor,由于bcc需要root权限,所以我们使用 `sudo` 命令,这将启动所有的探测器,并将探测内容收集到当前目录下的 `duetector-dbcollector.sqlite3`文件中 ```bash @@ -154,7 +158,51 @@ Commands: stop Stop the process. ``` -更多文档和例子可以在[这里](./docs/)找到。 +### 使用Analyzer进行分析 + +我们提供了一个[Analyzer](https://duetector.readthedocs.io/en/latest/analyzer/index.html),它可以对存储中的数据进行查询,在这里我们提供了一个[入门案例](./docs/usercases/simplest-open-count/README.md) + +### 使用Duetector Server + +我们提供了一个Duetector Server,作为外部PIP服务和控制接口 + +使用`duectl-server`可以启动一个Duetector Server,默认将监听`0.0.0.0:8120`,你可以使用`--host`和`--port`来修改它。 + +```bash +$ duectl-server start --help +Usage: duectl-server start [OPTIONS] + + Start duetector server + +Options: + --config TEXT Config file path, default: + ``~/.config/duetector/config.toml``. + --load_env BOOLEAN Weather load env variables, Prefix: ``DUETECTOR_``, + Separator:``__``, e.g. ``DUETECTOR_config__a`` means + ``config.a``, default: True + --workdir TEXT Working directory, default: ``.``. + --host TEXT Host to listen, default: ``0.0.0.0``. + --port INTEGER Port to listen, default: ``8120``. + --workers INTEGER Number of worker processes, default: ``1``. + --help Show this message and exit. +``` + +在服务启动后,访问`http://{ip}:{port}/docs`可以查看API文档。 + +同样的,使用`duectl-server-daemon start`可以在后台运行一个Duetector Server,你可以使用`duectl-server-daemon stop`来停止它 + +```bash +$ duectl-server-daemon +Usage: duectl-server-daemon [OPTIONS] COMMAND [ARGS]... + +Options: + --help Show this message and exit. + +Commands: + start Start a background process of command ``duectl-server start``. + status Show status of process. + stop Stop the process. +``` ## API文档与配置文档 diff --git a/README_en.md b/README_en.md index bb598c1..c3e7457 100644 --- a/README_en.md +++ b/README_en.md @@ -83,6 +83,10 @@ For more details on running with docker images see [here](./docs/how-to/run-with ## Quick start +> More documentation and examples can be found [here](. /docs/). + +### Start detector + Start monitor using the command line, since bcc requires root privileges, we use the `sudo` command, which will start all probes and collect the probes into the `duetector-dbcollector.sqlite3` file in the current directory ```bash @@ -151,7 +155,51 @@ Commands: stop Stop the process. ``` -More documentation and examples can be found [here](. /docs/). +### Analyzing with analyzer + +We provide an [Analyzer](https://duetector.readthedocs.io/en/latest/analyzer/index.html) that can query the data in storage, here we provide a [user case](./docs/usercases/simplest-open-count/README.md) + +### Using duetector server + +We provide a Duetector Server as an external PIP service and control interface + +A Duetector Server can be started using `duectl-server` and will listen on `0.0.0.0:8120` by default, you can modify it using `--host` and `--port`. + +```bash +$ duectl-server start --help +Usage: duectl-server start [OPTIONS] + + Start duetector server + +Options: + --config TEXT Config file path, default: + ``~/.config/duetector/config.toml``. + --load_env BOOLEAN Weather load env variables, Prefix: ``DUETECTOR_``, + Separator:``__``, e.g. ``DUETECTOR_config__a`` means + ``config.a``, default: True + --workdir TEXT Working directory, default: ``.``. + --host TEXT Host to listen, default: ``0.0.0.0``. + --port INTEGER Port to listen, default: ``8120``. + --workers INTEGER Number of worker processes, default: ``1``. + --help Show this message and exit. +``` + +After the service has started, visit `http://{ip}:{port}/docs` to see the API documentation. + +Similarly, using `duectl-server-daemon start` you can run a Duetector Server in the background, and you can stop it using `duectl-server-daemon stop` + +```bash +$ duectl-server-daemon +Usage: duectl-server-daemon [OPTIONS] COMMAND [ARGS]... + +Options: + --help Show this message and exit. + +Commands: + start Start a background process of command ``duectl-server start``. + status Show status of process. + stop Stop the process. +``` ## API documentation diff --git a/dev-tools/entrypoint-server.py b/dev-tools/entrypoint-server.py new file mode 100644 index 0000000..01d09ed --- /dev/null +++ b/dev-tools/entrypoint-server.py @@ -0,0 +1,19 @@ +import os + +os.chdir(os.path.dirname(os.path.abspath(__file__))) +os.environ["DUETECTOR_LOG_LEVEL"] = "DEBUG" + +import re +import sys +from pathlib import Path + +from pkg_resources import load_entry_point + +db_file = Path("./duetector-dbcollector.sqlite3") +config_file = Path("./config.toml") + +if __name__ == "__main__": + sys.argv[0] = re.sub(r"(-script\.pyw?|\.exe)?$", "", sys.argv[0]) + sys.argv.append("start") + sys.argv.extend(["--config", config_file.resolve().as_posix()]) + sys.exit(load_entry_point("duetector", "console_scripts", "duectl-server")()) diff --git a/docker/start.sh b/docker/start.sh index f62b5a3..baeba8e 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash mount -t debugfs debugfs /sys/kernel/debug # enable debugfs duectl-daemon start --loglevel=DEBUG +duectl-server-daemon start --loglevel=DEBUG cd /home/application # Config user's local path for pip install some scripts diff --git a/docs/source/cli/index.rst b/docs/source/cli/index.rst index 3ab58a3..625b292 100644 --- a/docs/source/cli/index.rst +++ b/docs/source/cli/index.rst @@ -1,9 +1,14 @@ duetector.cli ========================================= -``duectl``: CLI for Start Monitor, generate config. +``duectl``: CLI for start monitor, generate config. -``duectl-daemon``: Allow to run as daemon, and run as a service. +``duectl-daemon``: Allow to run monitors and server as daemon. + + +``duectl-server``: CLI for start duetector server. + +``duectl-server-daemon``: Allow to run server as daemon. .. toctree:: @@ -12,3 +17,5 @@ duetector.cli duectl
duectl-daemon + duectl-server + duectl-server-daemon diff --git a/docs/source/cli/server-daemon.rst b/docs/source/cli/server-daemon.rst new file mode 100644 index 0000000..31b0365 --- /dev/null +++ b/docs/source/cli/server-daemon.rst @@ -0,0 +1,3 @@ +.. click:: duetector.cli.server_daemon:cli + :prog: duectl-server-daemon + :nested: full diff --git a/docs/source/cli/server.rst b/docs/source/cli/server.rst new file mode 100644 index 0000000..f25df70 --- /dev/null +++ b/docs/source/cli/server.rst @@ -0,0 +1,3 @@ +.. click:: duetector.cli.server:cli + :prog: duectl-server + :nested: full diff --git a/docs/usercases/simplest-open-count/README.md b/docs/usercases/simplest-open-count/README.md index cc9da9b..3047c5c 100644 --- a/docs/usercases/simplest-open-count/README.md +++ b/docs/usercases/simplest-open-count/README.md @@ -20,8 +20,10 @@ Alternatively, you can run `duetector` in a kata container, this gives you more docker run -it --rm \ --privileged \ -p 8888:8888 \ +-p 8120:8120 \ -v /lib/modules:/lib/modules \ -e DUETECTOR_DAEMON_WORKDIR=/duetector-kata \ +-e DUETECTOR_SERVER_DAEMON_WORKDIR=/duetector-kata \ -v $(pwd)/duetector-kata:/duetector-kata \ -v /sys/kernel/debug:/sys/kernel/debug \ dataucon/duetector @@ -34,6 +36,7 @@ Note: - You can use `--entrypoint bash` to enter the container and run `duetector` manually. - In kata container, you need to mount debugfs manually: `mount -t debugfs debugfs /sys/kernel/debug` - `/lib/modules` contains kernel modules, more details can be found in [run-with-docker](../../how-to/run-with-docker.md). +- `8888` is the port of JupyterLab, `8120` is the port of duetector server. Access `http://localhost:8888` in your browser to use JupyterLab, and access `http://localhost:8120/docs` to see the API docs of duetector server. ## Use JupyterLab to write some code diff --git a/duetector/analyzer/base.py b/duetector/analyzer/base.py index 2e8416d..52600d5 100644 --- a/duetector/analyzer/base.py +++ b/duetector/analyzer/base.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import List, Optional +from typing import Any, Dict, List, Optional from duetector.analyzer.models import AnalyzerBrief, Tracking from duetector.config import Configuable @@ -42,25 +42,64 @@ def get_all_collector_ids(self) -> List[str]: def query( self, - tracer: Optional[str] = None, - collector_id: Optional[str] = None, + tracers: Optional[List[str]] = None, + collector_ids: Optional[List[str]] = None, start_datetime: Optional[datetime] = None, end_datetime: Optional[datetime] = None, start: int = 0, - limit: int = 20, + limit: int = 0, + columns: Optional[List[str]] = None, + where: Optional[Dict[str, Any]] = None, + distinct: bool = False, + order_by_asc: Optional[List[str]] = None, + order_by_desc: Optional[List[str]] = None, ) -> List[Tracking]: """ - Query tracking data from storage. + Query all tracking records from database. + + Args: + tracers (Optional[List[str]], optional): Tracer's name. Defaults to None, all tracers will be queried. + collector_ids (Optional[List[str]], optional): Collector id. Defaults to None, all collector id will be queried. + start_datetime (Optional[datetime], optional): Start time. Defaults to None. + end_datetime (Optional[datetime], optional): End time. Defaults to None. + start (int, optional): Start index. Defaults to 0. + limit (int, optional): Limit of records. Defaults to 20. ``0`` means no limit. + columns (Optional[List[str]], optional): Columns to query. Defaults to None, all columns will be queried. + where (Optional[Dict[str, Any]], optional): Where clause. Defaults to None. + distinct (bool, optional): Distinct. Defaults to False. + order_by_asc (Optional[List[str]], optional): Order by asc. Defaults to None. + order_by_desc (Optional[List[str]], optional): Order by desc. Defaults to None. + Returns: + List[duetector.analyzer.models.Tracking]: List of tracking records. """ raise NotImplementedError def brief( self, + tracers: Optional[List[str]] = None, + collector_ids: Optional[List[str]] = None, start_datetime: Optional[datetime] = None, end_datetime: Optional[datetime] = None, + with_details: bool = True, + distinct: bool = False, ) -> AnalyzerBrief: """ - Get brief of analyzer. + Get a brief of this analyzer. + + Args: + tracers (Optional[List[str]], optional): + Tracers. Defaults to None, all tracers will be queried. + If a specific tracer is not found, it will be ignored. + collector_ids (Optional[List[str]], optional): + Collector ids. Defaults to None, all collector ids will be queried. + If a specific collector id is not found, it will be ignored. + start_datetime (Optional[datetime], optional): Start time. Defaults to None. + end_datetime (Optional[datetime], optional): End time. Defaults to None. + with_details (bool, optional): With details. Defaults to True. + distinct (bool, optional): Distinct. Defaults to False. + + Returns: + AnalyzerBrief: A brief of this analyzer. """ raise NotImplementedError diff --git a/duetector/analyzer/db.py b/duetector/analyzer/db.py index ceb1a35..5e8a3b2 100644 --- a/duetector/analyzer/db.py +++ b/duetector/analyzer/db.py @@ -12,9 +12,11 @@ class DBAnalyzer(Analyzer): """ A analyzer using database. - As a top model, it will init a ``SessionManager`` and pass it to submodels. + We design this analyzer to be a top module, so it can be used as a standalone tools. - Config scope is ``db_analyzer``. + In this analyzer, we use ``SessionManager`` to manage database session. + + Config scope is ``db_analyzer``. ``db_analyzer.db`` is the scope for ``SessionManager``. Example: diff --git a/duetector/cli/daemon.py b/duetector/cli/daemon.py index 00ed638..ccdc7d3 100644 --- a/duetector/cli/daemon.py +++ b/duetector/cli/daemon.py @@ -7,6 +7,7 @@ WORKDIR_ENV = "DUETECTOR_DAEMON_WORKDIR" DEFAULT_WORKDIR = "/tmp/duetector" +APPLICATION = "duetector-daemon" @click.command( @@ -36,7 +37,7 @@ def start(ctx, workdir, loglevel, rotate_log): Example: ``duectl-daemon start -- --config /path/to/config`` """ - cmd = ["duectl", "start"] + cmd = ["duectl", "start", "--config_dump_dir", workdir] cmd_args = ctx.args if cmd_args: cmd.extend(cmd_args) @@ -47,6 +48,7 @@ def start(ctx, workdir, loglevel, rotate_log): f"rotate_log: {rotate_log}" ) Daemon( + application=APPLICATION, cmd=cmd, workdir=workdir, env_dict={"DUETECTOR_LOG_LEVEL": loglevel}, @@ -68,6 +70,7 @@ def status(workdir): """ if Daemon( workdir=workdir, + application=APPLICATION, ).poll(): click.echo("Running") else: @@ -88,6 +91,7 @@ def stop(workdir): """ Daemon( workdir=workdir, + application=APPLICATION, ).stop() click.echo("Daemon stopped.") diff --git a/duetector/cli/server.py b/duetector/cli/server.py new file mode 100644 index 0000000..1d7582e --- /dev/null +++ b/duetector/cli/server.py @@ -0,0 +1,82 @@ +from pathlib import Path + +import click +import uvicorn + +from duetector.config import CONFIG_PATH, ConfigLoader +from duetector.log import logger +from duetector.service.config import CONFIG_PATH_ENV + +SERVER_CONFIG_FILE = "duetector_server_config.toml" +SERVER_ENV_FILE = "duetector_server.env" + + +@click.command() +@click.option( + "--config", + default=CONFIG_PATH, + help=f"Config file path, default: ``{CONFIG_PATH}``.", +) +@click.option( + "--load_env", + default=True, + help=f"Weather load env variables, " + f"Prefix: ``{ConfigLoader.ENV_PREFIX}``, Separator:``{ConfigLoader.ENV_SEP}``, " + f"e.g. ``{ConfigLoader.ENV_PREFIX}config{ConfigLoader.ENV_SEP}a`` means ``config.a``, " + f"default: True", +) +@click.option( + "--workdir", + default=".", + help=f"Working directory, default: ``.``.", +) +@click.option( + "--host", + default="0.0.0.0", + help=f"Host to listen, default: ``0.0.0.0``.", +) +@click.option( + "--port", + default=8120, + help=f"Port to listen, default: ``8120``.", +) +@click.option( + "--workers", + default=1, + help=f"Number of worker processes, default: ``1``.", +) +def start(config, load_env, workdir, host, port, workers): + """ + Start duetector server + """ + config_loader = ConfigLoader(config, load_env=load_env, dump_when_load=False) + config = config_loader.load_config() + workdir = Path(workdir).expanduser().resolve() + server_config_file = workdir / SERVER_CONFIG_FILE + config_loader.dump_config(config, server_config_file) + + server_env_file = workdir / SERVER_ENV_FILE + logger.info(f"Init server env file {server_env_file}") + server_env_file.write_text(f"{CONFIG_PATH_ENV}={server_config_file.absolute().as_posix()}") + + config = uvicorn.Config( + "duetector.service.app:app", + host=host, + port=port, + workers=workers, + env_file=SERVER_ENV_FILE, + ) + server = uvicorn.Server(config) + server.run() + + +@click.group() +def cli(): + pass + + +cli.add_command(start) + + +if __name__ == "__main__": + cli(["start"]) diff --git a/duetector/cli/server_daemon.py b/duetector/cli/server_daemon.py new file mode 100644 index 0000000..4cfae5c --- /dev/null +++ b/duetector/cli/server_daemon.py @@ -0,0 +1,109 @@ +import os + +import click + +from duetector.log import logger +from duetector.tools.daemon import Daemon + +WORKDIR_ENV = "DUETECTOR_SERVER_DAEMON_WORKDIR" +DEFAULT_WORKDIR = "/tmp/duetector" +APPLICATION = "duetector-server-daemon" + + +@click.command( + context_settings=dict( + ignore_unknown_options=True, + allow_extra_args=True, + ) +) +@click.option( + "--workdir", + default=os.getenv(WORKDIR_ENV, DEFAULT_WORKDIR), + help="Log file and pid file will be stored in working directory, default: /tmp/duetector", +) +@click.option("--loglevel", default="INFO", help="Log level, default: INFO") +@click.option( + "--rotate_log", + default=True, + help="Rotate log file when process started, default: True", +) +@click.pass_context +def start(ctx, workdir, loglevel, rotate_log): + """ + Start a background process of command ``duectl-server start``. + + All arguments after ``--`` will be passed to ``duectl-server start``. + + Example: + ``duectl-server-daemon start -- --config /path/to/config`` + """ + cmd = ["duectl-server", "start"] + cmd_args = ctx.args + if cmd_args: + cmd.extend(cmd_args) + logger.info( + f"Start duetector daemon with command: {' '.join(cmd)}, \n" + f"workdir: {workdir}, \n" + f"loglevel: {loglevel}, \n" + f"rotate_log: {rotate_log}" + ) + Daemon( + application=APPLICATION, + cmd=cmd, + workdir=workdir, + env_dict={"DUETECTOR_LOG_LEVEL": loglevel}, + rotate_log=rotate_log, + ).start() + + +@click.command() +@click.option( + "--workdir", + default=os.getenv(WORKDIR_ENV, DEFAULT_WORKDIR), + help="Log file and pid file will be stored in working directory, default: /tmp/duetector", +) +def status(workdir): + """ + Show status of process. + + Determined by the existence of pid file in ``workdir``. + """ + if Daemon( + workdir=workdir, + application=APPLICATION, + ).poll(): + click.echo("Running") + else: + click.echo("Stopped") + + +@click.command() +@click.option( + "--workdir", + default=os.getenv(WORKDIR_ENV, DEFAULT_WORKDIR), + help="Log file and pid file will be stored in working directory, default: /tmp/duetector", +) +def stop(workdir): + """ + Stop the process. + + Determined by the existence of pid file in ``workdir``. + """ + Daemon( + workdir=workdir, + application=APPLICATION, + ).stop() + click.echo("Daemon stopped.") + + +@click.group() +def cli(): + pass + + +cli.add_command(start) +cli.add_command(status) +cli.add_command(stop) + +if __name__ == "__main__": + cli(["start"]) diff --git a/duetector/service/app.py b/duetector/service/app.py new file mode 100644 index 0000000..0a9aeb7 --- /dev/null +++ b/duetector/service/app.py @@ -0,0 +1,13 @@ +from fastapi import FastAPI + +from duetector.__init__ import __version__ +from duetector.service.control.routes import r as cr +from duetector.service.query.routes import r as qr + +app = FastAPI( + title="Duetector", + description="Data Usage Extensible Detector for data usage observability", + version=__version__, +) +app.include_router(qr) +app.include_router(cr) diff --git a/duetector/service/config.py b/duetector/service/config.py new file mode 100644 index 0000000..c352135 --- /dev/null +++ b/duetector/service/config.py @@ -0,0 +1,26 @@ +import os +from typing import Any, Dict + +try: + from functools import cache +except ImportError: + from functools import lru_cache as cache + +from duetector.config import ConfigLoader + +CONFIG_PATH_ENV = "DUETECTOR_SERVER_CONFIG_PATH" + + +@cache +def get_config() -> Dict[str, Any]: + config_path = os.environ.get(CONFIG_PATH_ENV) + if config_path is None: + raise RuntimeError( + f"Environment variable {CONFIG_PATH_ENV} is not set. " + f"Please set it to the path of the config file." + ) + return ConfigLoader( + path=config_path, + load_env=False, + dump_when_load=False, + ).load_config() diff --git a/duetector/service/control/__init__.py b/duetector/service/control/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/duetector/service/control/routes.py b/duetector/service/control/routes.py new file mode 100644 index 0000000..9cac799 --- /dev/null +++ b/duetector/service/control/routes.py @@ -0,0 +1,12 @@ +from fastapi import APIRouter, Depends + +from duetector.service.config import get_config + +r = APIRouter( + prefix="/control", +) + + +@r.get("/") +async def root(config: dict = Depends(get_config)): + return config diff --git a/duetector/service/query/__init__.py b/duetector/service/query/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/duetector/service/query/routes.py b/duetector/service/query/routes.py new file mode 100644 index 0000000..e8671e7 --- /dev/null +++ b/duetector/service/query/routes.py @@ -0,0 +1,12 @@ +from fastapi import APIRouter, Depends + +from duetector.service.config import get_config + +r = APIRouter( + prefix="/query", +) + + +@r.get("/") +async def root(config: dict = Depends(get_config)): + return config diff --git a/duetector/tools/daemon.py b/duetector/tools/daemon.py index f30c3fb..062c0d8 100644 --- a/duetector/tools/daemon.py +++ b/duetector/tools/daemon.py @@ -41,6 +41,7 @@ class Daemon: def __init__( self, workdir: Union[str, Path], + application: str = "daemon", cmd: Optional[List[str]] = None, env_dict: Optional[Dict[str, str]] = None, rotate_log: bool = True, @@ -49,6 +50,8 @@ def __init__( self.workdir: Path = Path(workdir).expanduser().resolve() self.workdir.mkdir(parents=True, exist_ok=True) + self.application: str = application + self.env_dict: Dict[str, str] = os.environ.copy() if env_dict: self.env_dict.update(env_dict) @@ -60,14 +63,14 @@ def pid_file(self): """ Path to pid file. """ - return self.workdir / "pid" + return self.workdir / f"{self.application}.pid" @property def log_file(self): """ Path to log file. """ - return self.workdir / "log" + return self.workdir / f"{self.application}.log" @property def pid(self): @@ -85,7 +88,7 @@ def _rotate_log(self): Rotate log file. """ now = datetime.now() - new_log_file = self.log_file.with_suffix(f".{now:%Y%m%d%H%M%S}") + new_log_file = self.log_file.with_name(f"{self.application}-{now:%Y%m%d-%H%M%S}.log") logger.info(f"Rotate log file to {new_log_file}") self.log_file.rename(new_log_file) @@ -97,7 +100,7 @@ def start(self): raise RuntimeError("cmd is empty, nothing to start") if self.pid: - logger.warning("Daemon is already running, try stop first.") + logger.error("Daemon is already running, try stop first.") return if self.rotate_log and self.log_file.exists(): diff --git a/pyproject.toml b/pyproject.toml index 135da34..88fdf08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ dependencies = [ "SQLAlchemy>=2", "click", "psutil", + # Following are for web server + "fastapi", + "uvicorn[standard]", ] dynamic = ["version"] classifiers = [ @@ -32,7 +35,8 @@ docs = ["Sphinx<=7.2.4", "sphinx-rtd-theme", "sphinx-click", "autodoc_pydantic"] [project.scripts] duectl = "duetector.cli.main:cli" duectl-daemon = "duetector.cli.daemon:cli" - +duectl-server = "duetector.cli.server:cli" +duectl-server-daemon = "duetector.cli.server_daemon:cli" [[project.authors]] name = "hitsz-ids" diff --git a/tests/test_daemon.py b/tests/test_daemon.py index 48944a0..2aa86ed 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -4,10 +4,10 @@ @pytest.fixture -def daemon(): +def daemon(tmpdir): yield Daemon( cmd=["sleep", "100"], - workdir="/tmp/duetector", + workdir=tmpdir, env_dict={"DUETECTOR_LOG_LEVEL": "DEBUG"}, )