From c49f3d89cbd9295f496c6add1eb68c3cba31a111 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 26 Aug 2024 14:15:35 -0700 Subject: [PATCH 01/18] add 'numBrowsers' to /api/settings endpoint, part of #2048 --- backend/btrixcloud/main.py | 2 ++ backend/test/test_api.py | 1 + chart/templates/configmap.yaml | 2 ++ 3 files changed, 5 insertions(+) diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index 3678f49e4..cb1610a98 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -111,6 +111,7 @@ class SettingsResponse(BaseModel): defaultPageLoadTimeSeconds: int maxPagesPerCrawl: int + numBrowsers: int maxScale: int billingEnabled: bool @@ -143,6 +144,7 @@ def main() -> None: os.environ.get("DEFAULT_PAGE_LOAD_TIME_SECONDS", 120) ), maxPagesPerCrawl=int(os.environ.get("MAX_PAGES_PER_CRAWL", 0)), + numBrowsers=int(os.environ.get("NUM_BROWSERS", 1)), maxScale=int(os.environ.get("MAX_CRAWL_SCALE", 3)), billingEnabled=is_bool(os.environ.get("BILLING_ENABLED")), signUpUrl=os.environ.get("SIGN_UP_URL", ""), diff --git a/backend/test/test_api.py b/backend/test/test_api.py index 439bfbff3..5c0a1d68b 100644 --- a/backend/test/test_api.py +++ b/backend/test/test_api.py @@ -43,6 +43,7 @@ def test_api_settings(): "jwtTokenLifetime": 86400, "defaultBehaviorTimeSeconds": 300, "maxPagesPerCrawl": 4, + "numBrowsers": 2, "maxScale": 3, "defaultPageLoadTimeSeconds": 120, "billingEnabled": True, diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index fa1c7db64..6026287f7 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -56,6 +56,8 @@ data: MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}" + NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}" + MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}" ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}" From 68de51aee42bbc4f1ddbbb8fc931a9b4803bce32 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 10:09:53 -0700 Subject: [PATCH 02/18] update field --- docs/user-guide/running-crawl.md | 4 +- docs/user-guide/workflow-setup.md | 8 ++-- frontend/src/components/ui/config-details.ts | 2 +- .../crawl-workflows/workflow-editor.ts | 46 +++++++++---------- frontend/src/pages/org/workflow-detail.ts | 14 ++++-- frontend/src/pages/org/workflows-list.ts | 2 +- 6 files changed, 40 insertions(+), 36 deletions(-) diff --git a/docs/user-guide/running-crawl.md b/docs/user-guide/running-crawl.md index ae31f398d..f899f83fd 100644 --- a/docs/user-guide/running-crawl.md +++ b/docs/user-guide/running-crawl.md @@ -23,9 +23,9 @@ If the crawl queue is filled with URLs that should not be crawled, use the _Edit Exclusions added while crawling are applied to the same exclusion table saved in the workflow's settings and will be used the next time the crawl workflow is run unless they are manually removed. -## Changing the Number of Crawler Instances +## Changing the Number of Browser Windows -Like exclusions, the [crawler instance](workflow-setup.md#crawler-instances) scale can also be adjusted while crawling. On the Watch Crawl page, press the _Edit Crawler Instances_ button, and set the desired value. +Like exclusions, the number of [browser windows](workflow-setup.md#browser-windows) can also be adjusted while crawling. On the **Watch Crawl** tab, press the _Edit Browser Windows_ button, and set the desired value. Unlike exclusions, this change will not be applied to future workflow runs. diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md index 120eea66c..cf8d74ce1 100644 --- a/docs/user-guide/workflow-setup.md +++ b/docs/user-guide/workflow-setup.md @@ -114,10 +114,6 @@ The crawl will be gracefully stopped after this set period of elapsed time. The crawl will be gracefully stopped after reaching this set size in GB. -### Crawler Instances - -Increasing the amount of crawler instances will speed up crawls by using additional browser windows to capture more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited. - ### Page Load Timeout Limits amount of elapsed time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded. @@ -146,6 +142,10 @@ Configure the browser used to visit URLs during the crawl. Sets the [_Browser Profile_](browser-profiles.md) to be used for this crawl. +### Browser Windows + +Increasing the number of browser windows will speed up crawls by capturing more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited. + ### Crawler Release Channel Sets the release channel of [Browsertrix Crawler](https://github.com/webrecorder/browsertrix-crawler) to be used for this crawl. Crawls started by this workflow will use the latest crawler version from the selected release channel. Generally "Default" will be the most stable, however others may have newer features (or bugs)! diff --git a/frontend/src/components/ui/config-details.ts b/frontend/src/components/ui/config-details.ts index 78fc14cc7..0da562897 100644 --- a/frontend/src/components/ui/config-details.ts +++ b/frontend/src/components/ui/config-details.ts @@ -167,7 +167,7 @@ export class ConfigDetails extends LiteElement { renderSize(crawlConfig?.maxCrawlSize), )} ${this.renderSetting( - msg("Crawler Instances"), + msg("Browser Windows"), crawlConfig?.scale ? `${crawlConfig.scale}×` : "", )} diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index 260153bf3..f50778035 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -1254,29 +1254,6 @@ https://archiveweb.page/images/${"logo.svg"}`} `)} ${this.renderHelpTextCol(infoTextStrings["maxCrawlSizeGB"])} - ${inputCol(html` - - this.updateFormState({ - scale: +(e.target as SlCheckbox).value, - })} - > - ${map( - range(this.defaults.maxScale), - (i: number) => - html` ${i + 1}×`, - )} - - `)} - ${this.renderHelpTextCol( - msg(`Increasing parallel crawler instances can speed up crawls, but may - increase the chances of getting rate limited.`), - )} ${this.renderSectionHeading(sectionStrings.perPageLimits)} ${inputCol(html` `)} ${this.renderHelpTextCol(infoTextStrings["browserProfile"])} + ${inputCol(html` + + this.updateFormState({ + scale: +(e.target as SlCheckbox).value, + })} + > + ${map( + range(this.defaults.maxScale), + (i: number) => + html` ${i + 1}×`, + )} + + `)} + ${this.renderHelpTextCol( + msg(`Increasing parallel browser windows can speed up crawls, but may + increase the chances of getting rate limited.`), + )} ${inputCol(html` (this.openDialogName = "scale")} > - - ${msg("Edit Crawler Instances")} + + ${msg("Edit Browser Windows")} `; } if (this.activePanel === "logs") { @@ -675,7 +679,7 @@ export class WorkflowDetail extends LiteElement { (this.openDialogName = "scale")}> - ${msg("Edit Crawler Instances")} + ${msg("Edit Browser Windows")} (this.openDialogName = "exclusions")} @@ -947,7 +951,7 @@ export class WorkflowDetail extends LiteElement { >` : skeleton, )} - ${this.renderDetailItem(msg("Crawler Instances"), () => + ${this.renderDetailItem(msg("Browser Windows"), () => this.workflow ? this.workflow.scale : skeleton, )} @@ -1016,7 +1020,7 @@ export class WorkflowDetail extends LiteElement {
${this.renderExclusions()}
(this.openDialogName = undefined)} @sl-show=${this.showDialog} diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index e2c064ebf..a51473774 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -475,7 +475,7 @@ export class WorkflowsList extends LiteElement { )} > - ${msg("Edit Crawler Instances")} + ${msg("Edit Browser Windows")}
From 77aa1b1098927266cee42f3bf651878986c3aadf Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 10:19:21 -0700 Subject: [PATCH 03/18] update docs --- docs/user-guide/crawl-workflows.md | 2 +- docs/user-guide/workflow-setup.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/crawl-workflows.md b/docs/user-guide/crawl-workflows.md index 435621fd7..78bbe3f60 100644 --- a/docs/user-guide/crawl-workflows.md +++ b/docs/user-guide/crawl-workflows.md @@ -34,7 +34,7 @@ Run a crawl workflow by clicking _Run Crawl_ in the actions menu of the workflow While crawling, the **Watch Crawl** section displays a list of queued URLs that will be visited, and streams the current state of the browser windows as they visit pages from the queue. You can [modify the crawl live](./running-crawl.md) by adding URL exclusions or changing the number of crawling instances. -Re-running a crawl workflow can be useful to capture a website as it changes over time, or to run with an updated [crawl scope](workflow-setup.md#scope). +Re-running a crawl workflow can be useful to capture a website as it changes over time, or to run with an updated [crawl scope](workflow-setup.md#crawl-scope). ## Status diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md index cf8d74ce1..7bcfe7068 100644 --- a/docs/user-guide/workflow-setup.md +++ b/docs/user-guide/workflow-setup.md @@ -6,7 +6,7 @@ Changes to a setting will only apply to subsequent crawls. Crawl settings are shown in the crawl workflow detail **Settings** tab and in the archived item **Crawl Settings** tab. -## Scope +## Crawl Scope Specify the range and depth of your crawl. Different settings will be shown depending on whether you chose _Known URLs_ (crawl type of **URL List**) or _Automated Discovery_ (crawl type of **Seeded Crawl**) when creating a new workflow. From e12f578c3a62d8048365bf9a93e25b360b82aed3 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 10:47:00 -0700 Subject: [PATCH 04/18] update info text --- docs/user-guide/workflow-setup.md | 7 ++++++- .../src/features/crawl-workflows/workflow-editor.ts | 13 ++++++++++--- frontend/src/utils/app.ts | 2 ++ frontend/src/utils/workflow.ts | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md index 7bcfe7068..6f24bf2e3 100644 --- a/docs/user-guide/workflow-setup.md +++ b/docs/user-guide/workflow-setup.md @@ -144,7 +144,12 @@ Sets the [_Browser Profile_](browser-profiles.md) to be used for this crawl. ### Browser Windows -Increasing the number of browser windows will speed up crawls by capturing more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited. +Sets the number of browser windows that are open and visiting pages during a crawl. Increasing the number of browser windows will speed up crawls by capturing more pages in parallel. + +There are some trade-offs: + +- This may result in a higher chance of getting rate limited due to the increase in traffic sent to the website. +- More execution minutes will be used per-crawl. ### Crawler Release Channel diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index f50778035..ea1aacc84 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -1357,14 +1357,21 @@ https://archiveweb.page/images/${"logo.svg"}`} range(this.defaults.maxScale), (i: number) => html` ${i + 1}×${(i + 1) * this.defaults.numBrowsers}`, )} `)} ${this.renderHelpTextCol( - msg(`Increasing parallel browser windows can speed up crawls, but may - increase the chances of getting rate limited.`), + html`${msg( + `Increase the number of open browser windows during a crawl. This will speed up your crawl by effectively running more crawlers at the same time.`, + )} + ${msg("See caveats")}.`, )} ${inputCol(html` { defaultBehaviorTimeSeconds: 0, defaultPageLoadTimeSeconds: 0, maxPagesPerCrawl: 0, + numBrowsers: 1, maxScale: 0, billingEnabled: false, signUpUrl: "", diff --git a/frontend/src/utils/workflow.ts b/frontend/src/utils/workflow.ts index dabf35443..7c48b4592 100644 --- a/frontend/src/utils/workflow.ts +++ b/frontend/src/utils/workflow.ts @@ -87,10 +87,12 @@ export type WorkflowDefaults = { behaviorTimeoutSeconds?: number; pageLoadTimeoutSeconds?: number; maxPagesPerCrawl?: number; + numBrowsers: number; maxScale: number; }; export const appDefaults: WorkflowDefaults = { + numBrowsers: 1, maxScale: DEFAULT_MAX_SCALE, }; @@ -302,6 +304,7 @@ export async function getServerDefaults(): Promise { if (data.maxScale) { defaults.maxScale = data.maxScale; } + defaults.numBrowsers = data.numBrowsers; return defaults; } catch (e) { From a6b3891b106aacf7a2783d97cd2b722927d74388 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 10:49:45 -0700 Subject: [PATCH 05/18] update dialog --- frontend/src/pages/org/workflow-detail.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index a7d878094..1b6cb8059 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1258,11 +1258,14 @@ export class WorkflowDetail extends LiteElement { if (!this.workflow) return; const scaleOptions = []; - for (let value = 1; value <= this.maxScale; value++) { - scaleOptions.push({ - value, - label: `${value}×`, - }); + + if (this.appState.settings) { + for (let value = 1; value <= this.maxScale; value++) { + scaleOptions.push({ + value, + label: value * this.appState.settings.numBrowsers, + }); + } } return html` From c9509df1f2a4120957ef3b0b92db1d15684dfe83 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 10:53:59 -0700 Subject: [PATCH 06/18] update display value --- frontend/src/components/ui/config-details.ts | 4 +++- .../features/crawl-workflows/workflow-editor.ts | 14 ++++++++------ frontend/src/pages/org/workflow-detail.ts | 4 +++- frontend/src/utils/workflow.ts | 3 --- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/frontend/src/components/ui/config-details.ts b/frontend/src/components/ui/config-details.ts index 0da562897..0e6e78860 100644 --- a/frontend/src/components/ui/config-details.ts +++ b/frontend/src/components/ui/config-details.ts @@ -168,7 +168,9 @@ export class ConfigDetails extends LiteElement { )} ${this.renderSetting( msg("Browser Windows"), - crawlConfig?.scale ? `${crawlConfig.scale}×` : "", + crawlConfig?.scale && this.appState.settings + ? `${crawlConfig.scale * this.appState.settings.numBrowsers}` + : "", )}

${sectionStrings.perPageLimits}

diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index ea1aacc84..cf95e4f0b 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -1353,12 +1353,14 @@ https://archiveweb.page/images/${"logo.svg"}`} scale: +(e.target as SlCheckbox).value, })} > - ${map( - range(this.defaults.maxScale), - (i: number) => - html` ${(i + 1) * this.defaults.numBrowsers}`, + ${when(this.appState.settings?.numBrowsers, (numBrowsers) => + map( + range(this.defaults.maxScale), + (i: number) => + html` ${(i + 1) * numBrowsers}`, + ), )} `)} diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 1b6cb8059..461349a3e 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -952,7 +952,9 @@ export class WorkflowDetail extends LiteElement { : skeleton, )} ${this.renderDetailItem(msg("Browser Windows"), () => - this.workflow ? this.workflow.scale : skeleton, + this.workflow && this.appState.settings + ? this.workflow.scale * this.appState.settings.numBrowsers + : skeleton, )} `; diff --git a/frontend/src/utils/workflow.ts b/frontend/src/utils/workflow.ts index 7c48b4592..dabf35443 100644 --- a/frontend/src/utils/workflow.ts +++ b/frontend/src/utils/workflow.ts @@ -87,12 +87,10 @@ export type WorkflowDefaults = { behaviorTimeoutSeconds?: number; pageLoadTimeoutSeconds?: number; maxPagesPerCrawl?: number; - numBrowsers: number; maxScale: number; }; export const appDefaults: WorkflowDefaults = { - numBrowsers: 1, maxScale: DEFAULT_MAX_SCALE, }; @@ -304,7 +302,6 @@ export async function getServerDefaults(): Promise { if (data.maxScale) { defaults.maxScale = data.maxScale; } - defaults.numBrowsers = data.numBrowsers; return defaults; } catch (e) { From e6ef0b98fa336a415898c249e976d62b0d6593cd Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 11:02:50 -0700 Subject: [PATCH 07/18] update config --- frontend/src/components/ui/config-details.ts | 12 ++++++------ frontend/src/features/org/usage-history-table.ts | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/src/components/ui/config-details.ts b/frontend/src/components/ui/config-details.ts index 0e6e78860..21d130e87 100644 --- a/frontend/src/components/ui/config-details.ts +++ b/frontend/src/components/ui/config-details.ts @@ -166,12 +166,6 @@ export class ConfigDetails extends LiteElement { msg("Crawl Size Limit"), renderSize(crawlConfig?.maxCrawlSize), )} - ${this.renderSetting( - msg("Browser Windows"), - crawlConfig?.scale && this.appState.settings - ? `${crawlConfig.scale * this.appState.settings.numBrowsers}` - : "", - )}

${sectionStrings.perPageLimits}

@@ -234,6 +228,12 @@ export class ConfigDetails extends LiteElement { >`, ), )} + ${this.renderSetting( + msg("Browser Windows"), + crawlConfig?.scale && this.appState.settings + ? `${crawlConfig.scale * this.appState.settings.numBrowsers}` + : "", + )} ${this.renderSetting( msg("Crawler Channel (Exact Crawler Version)"), capitalize(crawlConfig?.crawlerChannel || "default") + diff --git a/frontend/src/features/org/usage-history-table.ts b/frontend/src/features/org/usage-history-table.ts index c9f3c9d69..dbc1df885 100644 --- a/frontend/src/features/org/usage-history-table.ts +++ b/frontend/src/features/org/usage-history-table.ts @@ -51,7 +51,7 @@ export class UsageHistoryTable extends BtrixElement {
${msg( - "Aggregated time across all crawler instances that the crawler was actively executing a crawl or QA analysis run, i.e. not in a waiting state", + "Aggregated time that the crawler was actively executing a crawl or QA analysis run, i.e. not in a waiting state", )}
From 6e6b2703dd90b3238284562195aeb10c9cd2536f Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 11:35:01 -0700 Subject: [PATCH 08/18] fix when usage --- frontend/src/pages/org/workflow-detail.ts | 62 ++++++++++------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 461349a3e..9b9ad061b 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -562,17 +562,15 @@ export class WorkflowDetail extends LiteElement { ${when( - !this.isLoading && this.seeds, - () => html` + !this.isLoading && this.seeds && this.workflow, + (workflow) => html` - this.navTo( - `${this.orgBasePath}/workflows/crawl/${this.workflow!.id}`, - )} + this.navTo(`${this.orgBasePath}/workflows/crawl/${workflow.id}`)} > `, this.renderLoading, @@ -736,36 +734,36 @@ export class WorkflowDetail extends LiteElement { ${this.renderDetailItem( msg("Status"), - () => html` + (workflow) => html` `, )} ${this.renderDetailItem( msg("Total Size"), - () => + (workflow) => html` `, )} - ${this.renderDetailItem(msg("Schedule"), () => - this.workflow!.schedule + ${this.renderDetailItem(msg("Schedule"), (workflow) => + workflow.schedule ? html`
- ${humanizeSchedule(this.workflow!.schedule, { + ${humanizeSchedule(workflow.schedule, { length: "short", })}
` : html`${msg("No Schedule")}`, )} - ${this.renderDetailItem(msg("Created By"), () => + ${this.renderDetailItem(msg("Created By"), (workflow) => msg( - str`${this.workflow!.createdByName} on ${this.dateFormatter.format( - new Date(`${this.workflow!.created}Z`), + str`${workflow.createdByName} on ${this.dateFormatter.format( + new Date(`${workflow.created}Z`), )}`, ), )} @@ -775,7 +773,7 @@ export class WorkflowDetail extends LiteElement { private renderDetailItem( label: string | TemplateResult, - renderContent: () => TemplateResult | string | number, + renderContent: (workflow: Workflow) => TemplateResult | string | number, ) { return html` @@ -1008,13 +1006,13 @@ export class WorkflowDetail extends LiteElement { ` : this.renderInactiveCrawlMessage()} ${when( - isRunning, - () => html` + isRunning && this.workflow, + (workflow) => html`
@@ -1045,12 +1043,10 @@ export class WorkflowDetail extends LiteElement {

${when( - this.workflow?.lastCrawlId, - () => html` + this.workflow?.lastCrawlId && this.workflow, + (workflow) => html` + this.isCrawler && this.workflow, + (workflow) => html` @@ -1576,7 +1570,7 @@ export class WorkflowDetail extends LiteElement { private async runNow(): Promise { try { const data = await this.apiFetch<{ started: string | null }>( - `/orgs/${this.orgId}/crawlconfigs/${this.workflow!.id}/run`, + `/orgs/${this.orgId}/crawlconfigs/${this.workflowId}/run`, { method: "POST", }, From 60b8e6c26ac60bd49fcf698e921f532ff5144927 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 11:43:31 -0700 Subject: [PATCH 09/18] update dialog name --- frontend/src/pages/org/workflow-detail.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 9b9ad061b..a54dbe367 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1020,7 +1020,7 @@ export class WorkflowDetail extends LiteElement {
${this.renderExclusions()}
(this.openDialogName = undefined)} @sl-show=${this.showDialog} From ebe9413a1f0d1e58a066f2d05cb18633aa0d41bf Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 12:07:10 -0700 Subject: [PATCH 10/18] fix info text --- frontend/src/pages/org/workflow-detail.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index a54dbe367..60d27ebdb 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1266,12 +1266,12 @@ export class WorkflowDetail extends LiteElement { return html`
- + ${msg( + "Change the number of browser windows running in parallel. This change will take effect immediately on the currently running crawl and update crawl workflow settings.", )} - > +

+ ${scaleOptions.map( ({ value, label }) => html` Date: Fri, 30 Aug 2024 12:08:26 -0700 Subject: [PATCH 11/18] update how edit browser disable --- frontend/src/pages/org/workflow-detail.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 60d27ebdb..f271c3cbe 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -493,7 +493,7 @@ export class WorkflowDetail extends LiteElement { return html`

${this.tabLabels[this.activePanel]}

(this.openDialogName = "scale")} > Date: Fri, 30 Aug 2024 12:16:25 -0700 Subject: [PATCH 12/18] update execution time text --- frontend/src/features/org/usage-history-table.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/features/org/usage-history-table.ts b/frontend/src/features/org/usage-history-table.ts index dbc1df885..0608bb8f6 100644 --- a/frontend/src/features/org/usage-history-table.ts +++ b/frontend/src/features/org/usage-history-table.ts @@ -51,7 +51,7 @@ export class UsageHistoryTable extends BtrixElement {
${msg( - "Aggregated time that the crawler was actively executing a crawl or QA analysis run, i.e. not in a waiting state", + "Aggregated time across all browser windows that the crawler was actively executing a crawl or QA analysis run, i.e. not in a waiting state", )}
From ecaf8dedd7fd96ef3383477ea669f0f6de98fd39 Mon Sep 17 00:00:00 2001 From: Henry Wilkinson Date: Fri, 30 Aug 2024 22:27:55 +0100 Subject: [PATCH 13/18] Change verb I think we can use "crawling" here! --- frontend/src/pages/org/workflow-detail.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index f271c3cbe..ad9ee2eea 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1268,7 +1268,7 @@ export class WorkflowDetail extends LiteElement {

${msg( - "Change the number of browser windows running in parallel. This change will take effect immediately on the currently running crawl and update crawl workflow settings.", + "Change the number of browser windows crawling in parallel. This change will take effect immediately on the currently running crawl and update crawl workflow settings.", )}

From 0846b43e691f7212758195bb6e6df9c312402908 Mon Sep 17 00:00:00 2001 From: Henry Wilkinson Date: Fri, 30 Aug 2024 22:32:44 +0100 Subject: [PATCH 14/18] Minor docs reword --- docs/user-guide/workflow-setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md index 6f24bf2e3..5dae9c82e 100644 --- a/docs/user-guide/workflow-setup.md +++ b/docs/user-guide/workflow-setup.md @@ -144,7 +144,7 @@ Sets the [_Browser Profile_](browser-profiles.md) to be used for this crawl. ### Browser Windows -Sets the number of browser windows that are open and visiting pages during a crawl. Increasing the number of browser windows will speed up crawls by capturing more pages in parallel. +Sets the number of browser windows that are used to visit webpages while crawling. Increasing the number of browser windows will speed up crawls by capturing more pages in parallel. There are some trade-offs: From 123cd763723238406b90ce6dd83ce7018cd1e8c4 Mon Sep 17 00:00:00 2001 From: Henry Wilkinson Date: Fri, 30 Aug 2024 22:42:13 +0100 Subject: [PATCH 15/18] Fix broken ID links @SuaYoo IDK if your version of mkdocs is doing this (I think it was released in a newer version, I'm on 1.6), but it will show you in the terminal if you have broken links! Useful for nailing down stuff like this. --- docs/user-guide/archived-items.md | 2 +- docs/user-guide/overview.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/archived-items.md b/docs/user-guide/archived-items.md index 719587585..2f34fb6ae 100644 --- a/docs/user-guide/archived-items.md +++ b/docs/user-guide/archived-items.md @@ -56,7 +56,7 @@ For more details on navigating web archives within ReplayWeb.page, see the [Repl ### Exporting Files -While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each [crawler instance](workflow-setup.md#crawler-instances) will output separate WACZ files. +While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each [crawler instance](workflow-setup.md#browser-windows) will output separate WACZ files. The **WACZ Files** tab lists the individually downloadable WACZ files that make up the archived item as well as their file sizes and backup status. diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md index 6194d5aea..71c93ce5f 100644 --- a/docs/user-guide/overview.md +++ b/docs/user-guide/overview.md @@ -21,7 +21,7 @@ The crawling panel lists the number of currently running and waiting crawls, as For organizations with a set execution minute limit, the crawling panel displays a graph of how much execution time has been used and how much is currently remaining. Monthly execution time limits reset on the first of each month at 12:00 AM GMT. ??? Question "How is execution time calculated?" - Execution time is the total runtime of all [_Crawler Instances_](workflow-setup.md/#crawler-instances) during a crawl. For instance, if _Crawler Instances_ scale is set to 2× and each crawler instance uses 2 minutes of active crawling time, execution time for the crawl will be 4 minutes. Like elapsed time, this is tracked as the crawl runs so changing the _Crawler Instances_ scale while a crawl is running may change the amount of execution time used in a given time period. + Execution time is the total runtime of all [_Crawler Instances_](workflow-setup.md/#browser-windows) during a crawl. For instance, if _Crawler Instances_ scale is set to 2× and each crawler instance uses 2 minutes of active crawling time, execution time for the crawl will be 4 minutes. Like elapsed time, this is tracked as the crawl runs so changing the _Crawler Instances_ scale while a crawl is running may change the amount of execution time used in a given time period. ## Collections From 804ea6a90e520fdfbfa28c949880aa65d1ed8f2e Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 14:47:45 -0700 Subject: [PATCH 16/18] update notify message --- frontend/src/pages/org/workflow-detail.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index ad9ee2eea..1d098b6e4 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1343,7 +1343,7 @@ export class WorkflowDetail extends LiteElement { if (data.scaled) { void this.fetchWorkflow(); this.notify({ - message: msg("Updated crawl scale."), + message: msg("Updated number of browser windows."), variant: "success", icon: "check2-circle", }); @@ -1352,7 +1352,9 @@ export class WorkflowDetail extends LiteElement { } } catch { this.notify({ - message: msg("Sorry, couldn't change crawl scale at this time."), + message: msg( + "Sorry, couldn't change number of browser windows at this time.", + ), variant: "danger", icon: "exclamation-octagon", }); From b81711053d9e42eb5e3bf574c0d1d3f7ed1605b4 Mon Sep 17 00:00:00 2001 From: Henry Wilkinson Date: Fri, 30 Aug 2024 23:53:58 +0100 Subject: [PATCH 17/18] Update execution time overview --- docs/user-guide/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md index 71c93ce5f..b67b5d6a0 100644 --- a/docs/user-guide/overview.md +++ b/docs/user-guide/overview.md @@ -21,7 +21,7 @@ The crawling panel lists the number of currently running and waiting crawls, as For organizations with a set execution minute limit, the crawling panel displays a graph of how much execution time has been used and how much is currently remaining. Monthly execution time limits reset on the first of each month at 12:00 AM GMT. ??? Question "How is execution time calculated?" - Execution time is the total runtime of all [_Crawler Instances_](workflow-setup.md/#browser-windows) during a crawl. For instance, if _Crawler Instances_ scale is set to 2× and each crawler instance uses 2 minutes of active crawling time, execution time for the crawl will be 4 minutes. Like elapsed time, this is tracked as the crawl runs so changing the _Crawler Instances_ scale while a crawl is running may change the amount of execution time used in a given time period. + Execution time is the total runtime of scaled by the [_Browser Windows_](workflow-setup.md/#browser-windows) setting increment value during a crawl. Like elapsed time, this is tracked as the crawl runs so changing the amount of _Browser Windows_ while a crawl is running may change the amount of execution time used in a given time period. ## Collections From 2c2bebea2ec9f650f4a87454fe6e513dcbd47165 Mon Sep 17 00:00:00 2001 From: sua yoo Date: Fri, 30 Aug 2024 18:23:49 -0700 Subject: [PATCH 18/18] update file documentation --- docs/user-guide/archived-items.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/archived-items.md b/docs/user-guide/archived-items.md index 2f34fb6ae..428ce4c38 100644 --- a/docs/user-guide/archived-items.md +++ b/docs/user-guide/archived-items.md @@ -56,7 +56,7 @@ For more details on navigating web archives within ReplayWeb.page, see the [Repl ### Exporting Files -While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each [crawler instance](workflow-setup.md#browser-windows) will output separate WACZ files. +While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each crawler will output separate WACZ files. The **WACZ Files** tab lists the individually downloadable WACZ files that make up the archived item as well as their file sizes and backup status.