Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialize datetimes with Z suffix #2058

Merged
merged 43 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
7af6410
Add BtrixDatetime annotated type with custom serialization
tw4l Jul 30, 2024
cb1bfef
Remove manual Z suffixes from frontend
tw4l Jul 30, 2024
bb873de
Check that Z suffix is being added in tests
tw4l Jul 30, 2024
b5af503
Fix tests
tw4l Jul 30, 2024
92c8469
Add comment to trigger CI
tw4l Jul 30, 2024
1e49700
Try smarter fix
tw4l Jul 30, 2024
72641b3
Remove unused import
tw4l Jul 30, 2024
081553d
TEMPORARY: Determine if started or finished is naive
tw4l Jul 30, 2024
d3026e0
Fix typo
tw4l Jul 30, 2024
d8ce991
Try with started
tw4l Jul 30, 2024
4b7faa8
Fix crawl.started type
tw4l Jul 31, 2024
ba34293
Fixup
tw4l Jul 31, 2024
21933a7
One more try, save datetime without utils function
tw4l Jul 31, 2024
563b587
Change CrawlStatus type to BtrixDatetime
tw4l Jul 31, 2024
655cf53
Use naive starting time for duration calculation
tw4l Jul 31, 2024
c90498e
Whoops, fix for naive started
tw4l Jul 31, 2024
4e11c5b
Fix SubscriptionUpdate quotas model after rebase
tw4l Aug 29, 2024
f8ae75e
Add comment to help with review
tw4l Aug 29, 2024
712aaa3
Fix status.finished type in operator
tw4l Aug 29, 2024
1ac6608
Fix type for crawl.started
tw4l Aug 29, 2024
abb2c5a
More datetime -> BtrixDatetime conversions
tw4l Aug 29, 2024
41e1cdc
Convert remaining datetimes to BtrixDatetimes as needed
tw4l Aug 29, 2024
c20fd4d
Reformat and modify utils
tw4l Aug 29, 2024
c6f937d
Revert a few typing changes
tw4l Aug 29, 2024
2d0d18b
Revert operator changes
tw4l Aug 29, 2024
f120b69
Revert util change temporarily, keep print logging
tw4l Aug 30, 2024
e0feca4
Fix imports
tw4l Aug 30, 2024
260137a
Make datetime returned by from_k8s_date aware
tw4l Aug 30, 2024
7bb59d6
Leave microseconds alone
tw4l Aug 30, 2024
90e3ce9
Just trying things
tw4l Aug 30, 2024
9315d90
Add temp comparison fix with TODO
tw4l Aug 30, 2024
b7e6da4
Revert datetime serialization for export, add comment
tw4l Aug 30, 2024
a6cf7cc
Try removing slice
tw4l Aug 30, 2024
d0a4c96
Fix crawl stats for now, add another TODO
tw4l Aug 30, 2024
a75fe89
Fix copy-paste error
tw4l Aug 31, 2024
f9ce0db
update backend to use tz-aware conversion from mongodb
ikreymer Sep 2, 2024
25d3791
Fix display of workflow last modified date in list
tw4l Sep 2, 2024
ee2dddc
Fix display of collections modified date in list
tw4l Sep 2, 2024
6a12f63
Remove remaining manual Z suffixes from frontend
tw4l Sep 2, 2024
0f917ad
Merge branch 'main' into issue-1922-datetime-serialization
ikreymer Sep 6, 2024
f823995
Merge branch 'main' into issue-1922-datetime-serialization
ikreymer Sep 12, 2024
0427621
undo
ikreymer Sep 12, 2024
298a4e8
ci: update playwright action
ikreymer Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
CrawlSearchValuesResponse,
)
from .pagination import paginated_format, DEFAULT_PAGE_SIZE
from .utils import dt_now
from .utils import dt_now, date_to_str

if TYPE_CHECKING:
from .crawlconfigs import CrawlConfigOps
Expand Down Expand Up @@ -494,7 +494,7 @@ async def resolve_signed_urls(

expire_at_str = ""
if file_.expireAt:
expire_at_str = file_.expireAt.isoformat()
expire_at_str = date_to_str(file_.expireAt)

out_files.append(
CrawlFileOut(
Expand Down
8 changes: 4 additions & 4 deletions backend/btrixcloud/crawlmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from fastapi import HTTPException

from .utils import dt_now, to_k8s_date
from .utils import dt_now, date_to_str
from .k8sapi import K8sAPI

from .models import StorageRef, CrawlConfig, BgJobType
Expand Down Expand Up @@ -53,7 +53,7 @@ async def run_profile_browser(
"idle_timeout": os.environ.get("IDLE_TIMEOUT", "60"),
"url": url,
"vnc_password": secrets.token_hex(16),
"expire_time": to_k8s_date(dt_now() + timedelta(seconds=30)),
"expire_time": date_to_str(dt_now() + timedelta(seconds=30)),
"crawler_image": crawler_image,
}

Expand Down Expand Up @@ -237,12 +237,12 @@ async def ping_profile_browser(self, browserid: str) -> None:
"""return ping profile browser"""
expire_at = dt_now() + timedelta(seconds=30)
await self._patch_job(
browserid, {"expireTime": to_k8s_date(expire_at)}, "profilejobs"
browserid, {"expireTime": date_to_str(expire_at)}, "profilejobs"
)

async def rollover_restart_crawl(self, crawl_id: str) -> dict:
"""Rolling restart of crawl by updating restartTime field"""
update = to_k8s_date(dt_now())
update = date_to_str(dt_now())
return await self._patch_job(crawl_id, {"restartTime": update})

async def scale_crawl(self, crawl_id: str, scale: int = 1) -> dict:
Expand Down
11 changes: 8 additions & 3 deletions backend/btrixcloud/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
import pymongo

from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import dt_now, parse_jsonl_error_messages, stream_dict_list_as_csv
from .utils import (
dt_now,
date_to_str,
parse_jsonl_error_messages,
stream_dict_list_as_csv,
)
from .basecrawls import BaseCrawlOps
from .crawlmanager import CrawlManager
from .models import (
Expand Down Expand Up @@ -714,8 +719,8 @@ async def get_crawl_stats(
data["userid"] = str(crawl.userid)
data["user"] = user_emails.get(crawl.userid)

data["started"] = str(crawl.started)
data["finished"] = str(crawl.finished)
data["started"] = date_to_str(crawl.started) if crawl.started else ""
data["finished"] = date_to_str(crawl.finished) if crawl.finished else ""

data["duration"] = 0
duration_seconds = 0
Expand Down
1 change: 1 addition & 0 deletions backend/btrixcloud/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def init_db():

client = motor.motor_asyncio.AsyncIOMotorClient(
db_url,
tz_aware=True,
uuidRepresentation="standard",
connectTimeoutMS=120000,
serverSelectionTimeoutMS=120000,
Expand Down
4 changes: 2 additions & 2 deletions backend/btrixcloud/operator/bgjobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import traceback

from btrixcloud.utils import (
from_k8s_date,
str_to_date,
dt_now,
)

Expand Down Expand Up @@ -45,7 +45,7 @@ async def finalize_background_job(self, data: MCDecoratorSyncData) -> dict:

finished = None
if completion_time:
finished = from_k8s_date(completion_time)
finished = str_to_date(completion_time)
if not finished:
finished = dt_now()

Expand Down
43 changes: 22 additions & 21 deletions backend/btrixcloud/operator/crawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
Organization,
)

from btrixcloud.utils import from_k8s_date, to_k8s_date, dt_now
from btrixcloud.utils import str_to_date, date_to_str, dt_now

from .baseoperator import BaseOperator, Redis
from .models import (
Expand Down Expand Up @@ -408,8 +408,8 @@ def _qa_configmap_update_needed(self, name, configmap):
now = dt_now()
resources = json.loads(configmap["data"]["qa-config.json"])["resources"]
for resource in resources:
expire_at = datetime.fromisoformat(resource["expireAt"])
if expire_at <= now:
expire_at = str_to_date(resource["expireAt"])
if expire_at and expire_at <= now:
print(f"Refreshing QA configmap for QA run: {name}")
return True

Expand Down Expand Up @@ -547,7 +547,7 @@ async def set_state(
if actual_state:
status.state = actual_state
if finished:
status.finished = to_k8s_date(finished)
status.finished = date_to_str(finished)

if actual_state != state:
print(
Expand Down Expand Up @@ -721,7 +721,7 @@ async def finalize_response(
# keep parent until ttl expired, if any
if status.finished:
ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL)
finished = from_k8s_date(status.finished)
finished = str_to_date(status.finished)
if finished and (dt_now() - finished).total_seconds() > ttl >= 0:
print("CrawlJob expired, deleting: " + crawl.id)
finalized = True
Expand Down Expand Up @@ -798,7 +798,7 @@ async def sync_crawl_state(
# but not right away in case crawler pod is just restarting.
# avoids keeping redis pods around while no crawler pods are up
# (eg. due to resource constraints)
last_active_time = from_k8s_date(status.lastActiveTime)
last_active_time = str_to_date(status.lastActiveTime)
if last_active_time and (
(dt_now() - last_active_time).total_seconds() > REDIS_TTL
):
Expand All @@ -816,7 +816,7 @@ async def sync_crawl_state(

# update lastActiveTime if crawler is running
if crawler_running:
status.lastActiveTime = to_k8s_date(dt_now())
status.lastActiveTime = date_to_str(dt_now())

file_done = await redis.lpop(self.done_key)
while file_done:
Expand Down Expand Up @@ -965,7 +965,7 @@ async def increment_pod_exec_time(
if status.state in WAITING_STATES:
# reset lastUpdatedTime if at least 2 consecutive updates of non-running state
if status.last_state in WAITING_STATES:
status.lastUpdatedTime = to_k8s_date(now)
status.lastUpdatedTime = date_to_str(now)
return

update_start_time = await self.crawl_ops.get_crawl_exec_last_update_time(
Expand All @@ -991,7 +991,7 @@ async def increment_pod_exec_time(
await self.crawl_ops.inc_crawl_exec_time(
crawl.db_crawl_id, crawl.is_qa, 0, now
)
status.lastUpdatedTime = to_k8s_date(now)
status.lastUpdatedTime = date_to_str(now)
return

reason = None
Expand Down Expand Up @@ -1037,16 +1037,16 @@ async def increment_pod_exec_time(
if "running" in cstate:
pod_state = "running"
state = cstate["running"]
start_time = from_k8s_date(state.get("startedAt"))
start_time = str_to_date(state.get("startedAt"))
if update_start_time and start_time and update_start_time > start_time:
start_time = update_start_time

end_time = now
elif "terminated" in cstate:
pod_state = "terminated"
state = cstate["terminated"]
start_time = from_k8s_date(state.get("startedAt"))
end_time = from_k8s_date(state.get("finishedAt"))
start_time = str_to_date(state.get("startedAt"))
end_time = str_to_date(state.get("finishedAt"))
if update_start_time and start_time and update_start_time > start_time:
start_time = update_start_time

Expand Down Expand Up @@ -1081,16 +1081,17 @@ async def increment_pod_exec_time(
await self.crawl_ops.inc_crawl_exec_time(
crawl.db_crawl_id, crawl.is_qa, exec_time, now
)
status.lastUpdatedTime = to_k8s_date(now)
status.lastUpdatedTime = date_to_str(now)

def should_mark_waiting(self, state, started):
def should_mark_waiting(self, state: TYPE_ALL_CRAWL_STATES, started: str) -> bool:
"""Should the crawl be marked as waiting for capacity?"""
if state in RUNNING_STATES:
return True

if state == "starting":
started = from_k8s_date(started)
return (dt_now() - started).total_seconds() > STARTING_TIME_SECS
started_dt = str_to_date(started)
if started_dt:
return (dt_now() - started_dt).total_seconds() > STARTING_TIME_SECS

return False

Expand Down Expand Up @@ -1183,7 +1184,7 @@ async def log_crashes(self, crawl_id, pod_status: dict[str, PodInfo], redis):
def get_log_line(self, message, details):
"""get crawler error line for logging"""
err = {
"timestamp": dt_now().isoformat(),
"timestamp": date_to_str(dt_now()),
"logLevel": "error",
"context": "k8s",
"message": message,
Expand Down Expand Up @@ -1240,7 +1241,7 @@ async def is_crawl_stopping(
# check timeout if timeout time exceeds elapsed time
if crawl.timeout:
elapsed = status.elapsedCrawlTime
last_updated_time = from_k8s_date(status.lastUpdatedTime)
last_updated_time = str_to_date(status.lastUpdatedTime)
if last_updated_time:
elapsed += int((dt_now() - last_updated_time).total_seconds())

Expand Down Expand Up @@ -1452,11 +1453,11 @@ async def mark_finished(
):
print("already finished, ignoring mark_finished")
if not status.finished:
status.finished = to_k8s_date(finished)
status.finished = date_to_str(finished)

return False

status.finished = to_k8s_date(finished)
status.finished = date_to_str(finished)

if state in SUCCESSFUL_STATES:
await self.inc_crawl_complete_stats(crawl, finished)
Expand Down Expand Up @@ -1517,7 +1518,7 @@ async def do_qa_run_finished_tasks(
async def inc_crawl_complete_stats(self, crawl: CrawlSpec, finished: datetime):
"""Increment Crawl Stats"""

started = from_k8s_date(crawl.started)
started = str_to_date(crawl.started)
if not started:
print("Missing crawl start time, unable to increment crawl stats")
return
Expand Down
6 changes: 3 additions & 3 deletions backend/btrixcloud/operator/cronjobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Optional
import yaml

from btrixcloud.utils import to_k8s_date, dt_now
from btrixcloud.utils import date_to_str, dt_now
from .models import MCDecoratorSyncData, CJS, MCDecoratorSyncResponse
from .baseoperator import BaseOperator

Expand All @@ -31,7 +31,7 @@ def get_finished_response(
"""get final response to indicate cronjob created job is finished"""

if not finished:
finished = to_k8s_date(dt_now())
finished = date_to_str(dt_now())

status = None
# set status on decorated job to indicate that its finished
Expand Down Expand Up @@ -151,7 +151,7 @@ async def sync_cronjob_crawl(
crawl_id, is_qa=False
)
if finished:
finished_str = to_k8s_date(finished)
finished_str = date_to_str(finished)
set_status = False
# mark job as completed
if not data.object["status"].get("succeeded"):
Expand Down
4 changes: 2 additions & 2 deletions backend/btrixcloud/operator/profiles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" Operator handler for ProfileJobs """

from btrixcloud.utils import from_k8s_date, dt_now
from btrixcloud.utils import str_to_date, dt_now

from btrixcloud.models import StorageRef

Expand All @@ -23,7 +23,7 @@ async def sync_profile_browsers(self, data: MCSyncData):
"""sync profile browsers"""
spec = data.parent.get("spec", {})

expire_time = from_k8s_date(spec.get("expireTime"))
expire_time = str_to_date(spec.get("expireTime"))
browserid = spec.get("id")

if expire_time and dt_now() >= expire_time:
Expand Down
4 changes: 2 additions & 2 deletions backend/btrixcloud/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
PageNoteUpdatedResponse,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import from_k8s_date, str_list_to_bools, dt_now
from .utils import str_to_date, str_list_to_bools, dt_now

if TYPE_CHECKING:
from .crawls import CrawlOps
Expand Down Expand Up @@ -112,7 +112,7 @@ def _get_page_from_dict(
loadState=page_dict.get("loadState"),
status=status,
mime=page_dict.get("mime", "text/html"),
ts=(from_k8s_date(ts) if ts else dt_now()),
ts=(str_to_date(ts) if ts else dt_now()),
)
p.compute_page_type()
return p
Expand Down
19 changes: 7 additions & 12 deletions backend/btrixcloud/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def default(self, o: Any) -> str:
return str(o)

if isinstance(o, datetime):
return o.isoformat()
return date_to_str(o)

return super().default(o)

Expand All @@ -43,24 +43,19 @@ def get_templates_dir() -> str:
return os.path.join(os.path.dirname(__file__), "templates")


def from_k8s_date(string: str) -> Optional[datetime]:
def str_to_date(string: str) -> Optional[datetime]:
"""convert k8s date string to datetime"""
return datetime.fromisoformat(string[:-1]) if string else None
return datetime.fromisoformat(string) if string else None


def to_k8s_date(dt_val: datetime) -> str:
"""convert datetime to string for k8s"""
return dt_val.isoformat("T") + "Z"
def date_to_str(dt_val: datetime) -> str:
"""convert date to isostring with "Z" """
return dt_val.isoformat().replace("+00:00", "Z")


def dt_now() -> datetime:
"""get current ts"""
return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None)


def ts_now() -> str:
"""get current ts"""
return str(dt_now())
return datetime.now(timezone.utc).replace(microsecond=0)


def run_once_lock(name) -> bool:
Expand Down
1 change: 1 addition & 0 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def test_update_collection(
global modified
modified = data["modified"]
assert modified
assert modified.endswith("Z")


def test_rename_collection(
Expand Down
8 changes: 8 additions & 0 deletions backend/test/test_crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ def test_crawl_config_usernames(
assert data["modifiedByName"]
assert data["lastStartedByName"]

created = data["created"]
assert created
assert created.endswith("Z")

modified = data["modified"]
assert modified
assert modified.endswith("Z")


def test_add_crawl_config(crawler_auth_headers, default_org_id, sample_crawl_data):
# Create crawl config
Expand Down
Loading
Loading