Skip to content

Commit

Permalink
Merge pull request #372 from argonne-lcf/main
Browse files Browse the repository at this point in the history
merge changes from main
  • Loading branch information
cms21 authored Aug 29, 2023
2 parents c00a72d + 5034973 commit d90f929
Show file tree
Hide file tree
Showing 30 changed files with 398 additions and 114 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3-slim
FROM python:3.10-slim

WORKDIR /balsam

Expand Down
2 changes: 1 addition & 1 deletion balsam/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from balsam.util import config_root_logger

__version__ = "0.7.0.a21"
__version__ = "0.7.0.a22"
config_root_logger()
2 changes: 2 additions & 0 deletions balsam/_api/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ def acquire_jobs(
max_nodes_per_job: Optional[int] = None,
max_aggregate_nodes: Optional[float] = None,
serial_only: bool = False,
sort_by: Optional[str] = None,
filter_tags: Optional[Dict[str, str]] = None,
states: Set[JobState] = RUNNABLE_STATES,
app_ids: Optional[Set[int]] = None,
Expand All @@ -385,6 +386,7 @@ def acquire_jobs(
max_nodes_per_job=max_nodes_per_job,
max_aggregate_nodes=max_aggregate_nodes,
serial_only=serial_only,
sort_by=sort_by,
filter_tags=filter_tags,
states=states,
app_ids=app_ids,
Expand Down
2 changes: 1 addition & 1 deletion balsam/_api/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
if TYPE_CHECKING:
from balsam.client import RESTClient

FILTER_CHUNK_SIZE = 512
FILTER_CHUNK_SIZE = 500

logger = logging.getLogger(__name__)
T = TypeVar("T", bound=BalsamModel)
Expand Down
2 changes: 1 addition & 1 deletion balsam/_api/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def __repr__(self) -> str:

def __str__(self) -> str:
d = self.display_dict()
return yaml.dump(d, sort_keys=False, indent=4) # type: ignore
return yaml.dump(d, sort_keys=False, indent=4)

def __eq__(self, other: Any) -> bool:
if not isinstance(other, BalsamModel):
Expand Down
10 changes: 5 additions & 5 deletions balsam/_api/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was auto-generated via /Users/turam/opt/miniconda3/bin/python balsam/schemas/api_generator.py
# [git rev ce4bdce]
# This file was auto-generated via /home/turam/miniconda3/bin/python balsam/schemas/api_generator.py
# [git rev 7634992]
# Do *not* make changes to the API by changing this file!

import datetime
Expand Down Expand Up @@ -765,7 +765,7 @@ class BatchJob(balsam._api.bases.BatchJobBase):
job_mode = Field[balsam.schemas.batchjob.JobMode]()
optional_params = Field[typing.Dict[str, str]]()
filter_tags = Field[typing.Dict[str, str]]()
partitions = Field[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]]()
partitions = Field[Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]]]()
site_id = Field[int]()
project = Field[str]()
queue = Field[str]()
Expand All @@ -786,7 +786,7 @@ def __init__(
queue: str,
optional_params: Optional[typing.Dict[str, str]] = None,
filter_tags: Optional[typing.Dict[str, str]] = None,
partitions: Optional[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]] = None,
partitions: Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]] = None,
**kwargs: Any,
) -> None:
"""
Expand Down Expand Up @@ -918,7 +918,7 @@ def create(
queue: str,
optional_params: Optional[typing.Dict[str, str]] = None,
filter_tags: Optional[typing.Dict[str, str]] = None,
partitions: Optional[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]] = None,
partitions: Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]] = None,
) -> BatchJob:
"""
Create a new BatchJob object and save it to the API in one step.
Expand Down
76 changes: 64 additions & 12 deletions balsam/cmdline/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,72 @@ def ls(site_selector: str, verbose: bool) -> None:


@app.command()
@click.option("-n", "--name", required=True)
@click.option("-n", "--name", "name_selector", default=None)
@click.option("-s", "--site", "site_selector", default="")
def rm(site_selector: str, name: str) -> None:
@click.option("-a", "--all", is_flag=True, default=False)
def rm(site_selector: str, name_selector: str, all: bool) -> None:
"""
Remove Apps
1) Remove named app
balsam app rm -n hello_world
1) Remove all apps across a site
balsam app rm --all --site=123,my_site_folder
2) Filter apps by specific site IDs or Path fragments
balsam app rm -n hello_world --site=123,my_site_folder
"""
client = ClientSettings.load_from_file().build_client()
qs = client.App.objects.all()
qs = filter_by_sites(qs, site_selector)

resolved_app = qs.get(name=name)
resolved_id = resolved_app.id
appstr = f"App(id={resolved_id}, name={resolved_app.name})"
job_count = client.Job.objects.filter(app_id=resolved_id).count()
if job_count == 0:
resolved_app.delete()
click.echo(f"Deleted {appstr}: there were no associated jobs.")
elif click.confirm(f"Really Delete {appstr}?? There are {job_count} Jobs that will be ERASED!"):
resolved_app.delete()
click.echo(f"Deleted App {resolved_id} ({name})")
if all and name_selector is not None:
raise click.BadParameter("Specify app name or --all, but not both")
elif not all and name_selector is None:
raise click.BadParameter("Specify app name with -n or specify --all")
else:
app_list = []

if all and site_selector == "":
raise click.BadParameter("balsam app rm --all requires that you specify --site to remove jobs")
elif all and site_selector != "":
click.echo("THIS WILL DELETE ALL APPS IN SITE! CAUTION!")
app_list = [a.name for a in list(qs)]
num_apps = 0
num_jobs = 0
elif name_selector is not None:
app_list = [name_selector]

if len(app_list) > 0:
for name in app_list:
resolved_app = qs.get(name=name)
resolved_id = resolved_app.id
job_count = client.Job.objects.filter(app_id=resolved_id).count()

if name_selector is not None:
appstr = f"App(id={resolved_id}, name={resolved_app.name}, site={resolved_app.site_id})"
if job_count == 0:
resolved_app.delete()
click.echo(f"Deleted {appstr}: there were no associated jobs.")
elif click.confirm(f"Really Delete {appstr}?? There are {job_count} Jobs that will be ERASED!"):
resolved_app.delete()
click.echo(f"Deleted App {resolved_id} ({name})")
else:
num_apps += 1
num_jobs += job_count

if all:
if click.confirm(
f"Really DELETE {num_apps} apps and {num_jobs} jobs from site {site_selector}?? They will be ERASED!"
):
for name in app_list:
resolved_app = qs.get(name=name)
resolved_app.delete()
click.echo(f"Deleted {num_apps} apps and {num_jobs} jobs from site {site_selector}")
else:
click.echo("Found no apps to Delete")
5 changes: 4 additions & 1 deletion balsam/cmdline/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,9 @@ def ls(

@job.command()
@click.option("-i", "--id", "job_ids", multiple=True, type=int)
@click.option("-t", "--tag", "tags", multiple=True, type=str, callback=validate_tags)
@click.option("-s", "--state", "state", type=str)
def modify(job_ids: List[int], state: JobState) -> None:
def modify(job_ids: List[int], tags: List[str], state: JobState) -> None:
"""
Modify Jobs
Expand All @@ -328,6 +329,8 @@ def modify(job_ids: List[int], state: JobState) -> None:
jobs = client.Job.objects.all()
if job_ids:
jobs = jobs.filter(id=job_ids)
elif tags:
jobs = jobs.filter(tags=tags)
else:
raise click.BadParameter("Provide either list of Job ids or tags to delete")
count = jobs.count()
Expand Down
100 changes: 63 additions & 37 deletions balsam/cmdline/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,12 @@ def submit(


@queue.command()
@click.option("-n", "--num", default=0, type=int)
@click.option("-h", "--history", is_flag=True, default=False)
@click.option("-v", "--verbose", is_flag=True, default=False)
@click.option("--site", "site_selector", default="")
def ls(history: bool, site_selector: str) -> None:
@click.option("--scheduler_id", "scheduler_id", type=int, default=None)
def ls(history: bool, verbose: bool, num: int, site_selector: str, scheduler_id: int) -> None:
"""
List BatchJobs
Expand All @@ -97,49 +100,72 @@ def ls(history: bool, site_selector: str) -> None:
2) View historical BatchJobs at all sites
balsam queue ls --history --site all
3) View verbose record for BatchJob with scheduler id
balsam queue ls --scheduler_id 12345 -v
4) View the last n BatchJobs
balsam queue ls --num n
"""
client = load_client()
BatchJob = client.BatchJob
qs = filter_by_sites(BatchJob.objects.all(), site_selector)
if not history:

if not history and scheduler_id is None and num == 0:
qs = qs.filter(state=["pending_submission", "queued", "running", "pending_deletion"])
if len(qs) == 0:
click.echo("No active batch jobs. Use --history option to list completed batch jobs.")

if scheduler_id is not None:
qs = qs.filter(scheduler_id=scheduler_id)

jobs = [j.display_dict() for j in qs]
sites = {site.id: site for site in client.Site.objects.all()}
for job in jobs:
site = sites[job["site_id"]]
path_str = site.path.as_posix()
if len(path_str) > 27:
path_str = "..." + path_str[-27:]
job["site"] = f"{site.name}"

fields = [
"id",
"site",
"scheduler_id",
"state",
"filter_tags",
"project",
"queue",
"num_nodes",
"wall_time_min",
"job_mode",
]
rows = [[str(j[field]) for field in fields] for j in jobs]

col_widths = [len(f) for f in fields]
for row in rows:
for col_idx, width in enumerate(col_widths):
col_widths[col_idx] = max(width, len(row[col_idx]))

for i, field in enumerate(fields):
fields[i] = field.rjust(col_widths[i] + 1)

print(*fields)
for row in rows:
for i, col in enumerate(row):
row[i] = col.rjust(col_widths[i] + 1)
print(*row)
if not history and num > 0 and scheduler_id is None:
click.echo(f"Displaying records for last {num} Batch Jobs")
jobs = jobs[-num:]

if verbose:
for j in jobs:
click.echo(j)
else:
sites = {site.id: site for site in client.Site.objects.all()}
for job in jobs:
site = sites[job["site_id"]]
path_str = site.path.as_posix()
if len(path_str) > 27:
path_str = "..." + path_str[-27:]
job["site"] = f"{site.name}"

fields = [
"id",
"site",
"scheduler_id",
"state",
"filter_tags",
"project",
"queue",
"num_nodes",
"wall_time_min",
"job_mode",
]
rows = [[str(j[field]) for field in fields] for j in jobs]

col_widths = [len(f) for f in fields]
for row in rows:
for col_idx, width in enumerate(col_widths):
col_widths[col_idx] = max(width, len(row[col_idx]))

for i, field in enumerate(fields):
fields[i] = field.rjust(col_widths[i] + 1)

print(*fields)
for row in rows:
for i, col in enumerate(row):
row[i] = col.rjust(col_widths[i] + 1)
print(*row)


@queue.command()
Expand Down
42 changes: 30 additions & 12 deletions balsam/cmdline/site.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import shutil
import socket
import sys
Expand Down Expand Up @@ -128,23 +129,40 @@ def mv(src: Union[Path, str], dest: Union[Path, str]) -> None:


@site.command()
@click.argument("path", type=click.Path(exists=True, file_okay=False))
def rm(path: Union[str, Path]) -> None:
# @click.argument("path", type=click.Path(exists=True, file_okay=False))
@click.argument("path", type=click.Path())
@click.option("-f", "--force", is_flag=True, default=False)
def rm(path: str, force: bool) -> None:
"""
Remove a balsam site
balsam site rm /path/to/site
"""
cf = SiteConfig(path)
client = cf.client
site = client.Site.objects.get(id=cf.site_id)
jobcount = client.Job.objects.filter(site_id=site.id).count()
warning = f"This will wipe out {jobcount} jobs inside!" if jobcount else ""

if click.confirm(f"Do you really want to destroy {Path(path).name}? {warning}"):
site.delete()
shutil.rmtree(path)
click.echo(f"Deleted site {path}")
if not force:
if os.path.exists(path):
cf = SiteConfig(path)
client = cf.client
site = client.Site.objects.get(id=cf.site_id)
jobcount = client.Job.objects.filter(site_id=site.id).count()
warning = f"This will wipe out {jobcount} jobs inside!" if jobcount else ""

if click.confirm(f"Do you really want to destroy {Path(path).name}? {warning}"):
site.delete()
shutil.rmtree(path)
click.echo(f"Deleted site {path}")
else:
raise click.BadParameter("Path doesn't exist")
else:
client = ClientSettings.load_from_file().build_client()
qs = client.Site.objects.all()
qs = qs.filter(path=path)
if len(qs) > 1:
raise click.BadParameter(f"Path found in {len(qs)} sites")
else:
site_id = qs[0].id
site = client.Site.objects.get(id=site_id)
site.delete()
click.echo("Forced site deletion; check for path to clean up")


@site.command()
Expand Down
12 changes: 5 additions & 7 deletions balsam/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ class LauncherSettings(BaseSettings):
local_app_launcher: Type[AppRun] = Field("balsam.platform.app_run.LocalAppRun")
mpirun_allows_node_packing: bool = False
serial_mode_prefetch_per_rank: int = 64
sort_by: Optional[str] = None
serial_mode_startup_params: Dict[str, str] = {"cpu_affinity": "none"}

@validator("compute_node", pre=True, always=True)
Expand Down Expand Up @@ -235,13 +236,10 @@ def save(self, path: Union[str, Path]) -> None:
fp.write(self.dump_yaml())

def dump_yaml(self) -> str:
return cast(
str,
yaml.dump(
json.loads(self.json()),
sort_keys=False,
indent=4,
),
return yaml.dump(
json.loads(self.json()),
sort_keys=False,
indent=4,
)

@classmethod
Expand Down
Loading

0 comments on commit d90f929

Please sign in to comment.