Skip to content

Commit

Permalink
Adding anonymizeSnippets in reports config (#569)
Browse files Browse the repository at this point in the history
* Adding anonymizeSnippets in reports config

* Updated config processing logic, it is now using pydantic models and validation.

* Updated docs
  • Loading branch information
dristysrivastava authored Sep 25, 2024
1 parent 0f216ea commit b476d75
Show file tree
Hide file tree
Showing 11 changed files with 555 additions and 547 deletions.
3 changes: 2 additions & 1 deletion docs/gh_pages/docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ Notes:
![Pebblo Reports](../static/img/report-comparision.png)

- `cacheDir`: Sets the directory where pebblo stores metadata, generated reports, and other temporary files. Default value is `~/.pebblo`.
- `anonymizeSnippets`: Flag to anonymize snippets in report and Pebblo local UI. Possible values are 'True' and 'False'. When its value is 'True', snippets in reports and Pebblo local UI will be shown as anonymized and vice versa.
- `outputDir`: Deprecated. Use `cacheDir` instead.

### Classifier

- `mode`: Specifies mode for classify API. Possible values are `all`, `entity` or `topic`. Default value is `all`. When its value is `all`, both entities and topics will get classified, if value is `entity`, only entities will get classified and vice-versa. It is used for classification in /classify and /loader/doc APIs.
- `anonymizeSnippets`: Flag to anonymize snippets in report. Possible values are 'True' and 'False'. When its value is 'True', snippets in reports will be shown as anonymized and vice versa.
- `anonymizeSnippets` is deprecated, use 'anonymizeSnippets' in reports instead.

### Storage
This is beta feature introduced in 0.1.18.
Expand Down
116 changes: 31 additions & 85 deletions pebblo/app/config/config.py
Original file line number Diff line number Diff line change
@@ -1,114 +1,60 @@
import pathlib
import sys
from contextvars import ContextVar
from typing import Optional, Tuple, Union
from typing import Optional, Tuple

import yaml
from pydantic import Field
from pydantic_settings import BaseSettings

from pebblo.app.config.config_validation import validate_config, validate_input
from pebblo.app.enums.common import ClassificationMode, DBStorageTypes, StorageTypes

# Default config value
dir_path = pathlib.Path().absolute()


# Port BaseModel
class PortConfig(BaseSettings):
host: str = Field(default="localhost")
port: int = Field(default=8000)


# Report BaseModel
class ReportConfig(BaseSettings):
format: str = Field(default="pdf")
renderer: str = Field(default="xhtml2pdf")
cacheDir: str = Field(default=str(dir_path))


# Logging Defaults
DEFAULT_LOGGER_NAME = "pebblo"
DEFAULT_LOG_MAX_FILE_SIZE = 8 * 1024 * 1024
DEFAULT_LOG_BACKUP_COUNT = 3
DEFAULT_LOG_LEVEL = "INFO"
DEFAULT_LOG_FILE_PATH = "/tmp/logs"
DEFAULT_LOG_FILE = f"{DEFAULT_LOG_FILE_PATH}/{DEFAULT_LOGGER_NAME}.log"


# Logging BaseModel
class LoggingConfig(BaseSettings):
level: str = Field(default=DEFAULT_LOG_LEVEL)
file: str = Field(default=DEFAULT_LOG_FILE)
maxFileSize: int = Field(default=DEFAULT_LOG_MAX_FILE_SIZE)
backupCount: int = Field(default=DEFAULT_LOG_BACKUP_COUNT)


class ClassifierConfig(BaseSettings):
mode: str = Field(default=ClassificationMode.ALL.value)
anonymizeSnippets: bool = Field(default=True)


class StorageConfig(BaseSettings):
type: str = Field(default=StorageTypes.FILE.value)
db: Union[str, None] = Field(default=DBStorageTypes.SQLITE.value)
location: Union[str, None] = Field(default=str(dir_path))
name: Union[str, None] = Field(default=str("pebblo"))
# This is default value for current version(0.1.18), it needs to be changed in next version to db.


# ConfigFile BaseModel
class Config(BaseSettings):
daemon: PortConfig
reports: ReportConfig
logging: LoggingConfig
classifier: ClassifierConfig
storage: StorageConfig

from pebblo.app.config.models import (
ClassifierConfig,
Config,
DaemonConfig,
LoggingConfig,
ReportConfig,
StorageConfig,
)
from pebblo.app.enums.common import ClassificationMode

var_server_config: ContextVar[Config] = ContextVar("server_config", default=None)
var_server_config_dict: ContextVar[dict] = ContextVar("server_config_dict", default={})


def get_default_config_values():
# set default config value
conf_obj = Config(
daemon=DaemonConfig(host="localhost", port=8000),
reports=ReportConfig(format="pdf", renderer="xhtml2pdf", cacheDir="~/.pebblo"),
logging=LoggingConfig(),
classifier=ClassifierConfig(
mode=ClassificationMode.ALL.value, anonymizeSnippets=False
),
storage=StorageConfig(type="file", db=None),
# for now, a default storage type is FILE, but in the next release DB will be the default storage type.
)
return conf_obj.dict(), conf_obj


def load_config(path: Optional[str]) -> Tuple[dict, Config]:
try:
# If Path does not exist in command, set default config value
conf_obj = Config(
daemon=PortConfig(host="localhost", port=8000),
reports=ReportConfig(
format="pdf", renderer="xhtml2pdf", cacheDir="~/.pebblo"
),
logging=LoggingConfig(),
classifier=ClassifierConfig(
mode=ClassificationMode.ALL.value, anonymizeSnippets=False
),
storage=StorageConfig(type="file", db=None),
# for now, a default storage type is FILE, but in the next release DB will be the default storage type.
)
if not path:
# Setting Default config details
return conf_obj.dict(), conf_obj
# If Path does not exist in command, set default config value
get_default_config_values()

# If Path exist, set config value
try:
with open(path, "r") as output:
cred_yaml = yaml.safe_load(output)
cred_yaml = validate_input(cred_yaml)

# Replace missing fields with default values
for key in conf_obj.dict().keys():
if key not in cred_yaml:
cred_yaml[key] = conf_obj.dict()[key]
parsed_config = Config.parse_obj(cred_yaml)
config_dict = parsed_config.dict()
config_dict["logging"]["level"] = (
config_dict.get("logging").get("level").upper()
)
validate_config(config_dict)
return config_dict, parsed_config
except IOError as err:
print(f"no config file found at {path}. Error : {err}")
return conf_obj.dict(), conf_obj
return get_default_config_values()

except Exception as err:
print(f"Error while loading config details, err: {err}")
print("Exiting due to validation error...")
sys.exit()
return {}, {}
211 changes: 0 additions & 211 deletions pebblo/app/config/config_validation.py

This file was deleted.

Loading

0 comments on commit b476d75

Please sign in to comment.