diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py
index 3678f49e4..cb1610a98 100644
--- a/backend/btrixcloud/main.py
+++ b/backend/btrixcloud/main.py
@@ -111,6 +111,7 @@ class SettingsResponse(BaseModel):
defaultPageLoadTimeSeconds: int
maxPagesPerCrawl: int
+ numBrowsers: int
maxScale: int
billingEnabled: bool
@@ -143,6 +144,7 @@ def main() -> None:
os.environ.get("DEFAULT_PAGE_LOAD_TIME_SECONDS", 120)
),
maxPagesPerCrawl=int(os.environ.get("MAX_PAGES_PER_CRAWL", 0)),
+ numBrowsers=int(os.environ.get("NUM_BROWSERS", 1)),
maxScale=int(os.environ.get("MAX_CRAWL_SCALE", 3)),
billingEnabled=is_bool(os.environ.get("BILLING_ENABLED")),
signUpUrl=os.environ.get("SIGN_UP_URL", ""),
diff --git a/backend/test/test_api.py b/backend/test/test_api.py
index 439bfbff3..5c0a1d68b 100644
--- a/backend/test/test_api.py
+++ b/backend/test/test_api.py
@@ -43,6 +43,7 @@ def test_api_settings():
"jwtTokenLifetime": 86400,
"defaultBehaviorTimeSeconds": 300,
"maxPagesPerCrawl": 4,
+ "numBrowsers": 2,
"maxScale": 3,
"defaultPageLoadTimeSeconds": 120,
"billingEnabled": True,
diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml
index fa1c7db64..6026287f7 100644
--- a/chart/templates/configmap.yaml
+++ b/chart/templates/configmap.yaml
@@ -56,6 +56,8 @@ data:
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
+ NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}"
+
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
diff --git a/docs/user-guide/archived-items.md b/docs/user-guide/archived-items.md
index 719587585..428ce4c38 100644
--- a/docs/user-guide/archived-items.md
+++ b/docs/user-guide/archived-items.md
@@ -56,7 +56,7 @@ For more details on navigating web archives within ReplayWeb.page, see the [Repl
### Exporting Files
-While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each [crawler instance](workflow-setup.md#crawler-instances) will output separate WACZ files.
+While crawling, Browsertrix will output one or more WACZ files — the crawler aims to output files in consistently sized chunks, and each crawler will output separate WACZ files.
The **WACZ Files** tab lists the individually downloadable WACZ files that make up the archived item as well as their file sizes and backup status.
diff --git a/docs/user-guide/crawl-workflows.md b/docs/user-guide/crawl-workflows.md
index 435621fd7..78bbe3f60 100644
--- a/docs/user-guide/crawl-workflows.md
+++ b/docs/user-guide/crawl-workflows.md
@@ -34,7 +34,7 @@ Run a crawl workflow by clicking _Run Crawl_ in the actions menu of the workflow
While crawling, the **Watch Crawl** section displays a list of queued URLs that will be visited, and streams the current state of the browser windows as they visit pages from the queue. You can [modify the crawl live](./running-crawl.md) by adding URL exclusions or changing the number of crawling instances.
-Re-running a crawl workflow can be useful to capture a website as it changes over time, or to run with an updated [crawl scope](workflow-setup.md#scope).
+Re-running a crawl workflow can be useful to capture a website as it changes over time, or to run with an updated [crawl scope](workflow-setup.md#crawl-scope).
## Status
diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md
index 6194d5aea..b67b5d6a0 100644
--- a/docs/user-guide/overview.md
+++ b/docs/user-guide/overview.md
@@ -21,7 +21,7 @@ The crawling panel lists the number of currently running and waiting crawls, as
For organizations with a set execution minute limit, the crawling panel displays a graph of how much execution time has been used and how much is currently remaining. Monthly execution time limits reset on the first of each month at 12:00 AM GMT.
??? Question "How is execution time calculated?"
- Execution time is the total runtime of all [_Crawler Instances_](workflow-setup.md/#crawler-instances) during a crawl. For instance, if _Crawler Instances_ scale is set to 2× and each crawler instance uses 2 minutes of active crawling time, execution time for the crawl will be 4 minutes. Like elapsed time, this is tracked as the crawl runs so changing the _Crawler Instances_ scale while a crawl is running may change the amount of execution time used in a given time period.
+ Execution time is the total runtime of scaled by the [_Browser Windows_](workflow-setup.md/#browser-windows) setting increment value during a crawl. Like elapsed time, this is tracked as the crawl runs so changing the amount of _Browser Windows_ while a crawl is running may change the amount of execution time used in a given time period.
## Collections
diff --git a/docs/user-guide/running-crawl.md b/docs/user-guide/running-crawl.md
index ae31f398d..f899f83fd 100644
--- a/docs/user-guide/running-crawl.md
+++ b/docs/user-guide/running-crawl.md
@@ -23,9 +23,9 @@ If the crawl queue is filled with URLs that should not be crawled, use the _Edit
Exclusions added while crawling are applied to the same exclusion table saved in the workflow's settings and will be used the next time the crawl workflow is run unless they are manually removed.
-## Changing the Number of Crawler Instances
+## Changing the Number of Browser Windows
-Like exclusions, the [crawler instance](workflow-setup.md#crawler-instances) scale can also be adjusted while crawling. On the Watch Crawl page, press the _Edit Crawler Instances_ button, and set the desired value.
+Like exclusions, the number of [browser windows](workflow-setup.md#browser-windows) can also be adjusted while crawling. On the **Watch Crawl** tab, press the _Edit Browser Windows_ button, and set the desired value.
Unlike exclusions, this change will not be applied to future workflow runs.
diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md
index 120eea66c..5dae9c82e 100644
--- a/docs/user-guide/workflow-setup.md
+++ b/docs/user-guide/workflow-setup.md
@@ -6,7 +6,7 @@ Changes to a setting will only apply to subsequent crawls.
Crawl settings are shown in the crawl workflow detail **Settings** tab and in the archived item **Crawl Settings** tab.
-## Scope
+## Crawl Scope
Specify the range and depth of your crawl. Different settings will be shown depending on whether you chose _Known URLs_ (crawl type of **URL List**) or _Automated Discovery_ (crawl type of **Seeded Crawl**) when creating a new workflow.
@@ -114,10 +114,6 @@ The crawl will be gracefully stopped after this set period of elapsed time.
The crawl will be gracefully stopped after reaching this set size in GB.
-### Crawler Instances
-
-Increasing the amount of crawler instances will speed up crawls by using additional browser windows to capture more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited.
-
### Page Load Timeout
Limits amount of elapsed time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.
@@ -146,6 +142,15 @@ Configure the browser used to visit URLs during the crawl.
Sets the [_Browser Profile_](browser-profiles.md) to be used for this crawl.
+### Browser Windows
+
+Sets the number of browser windows that are used to visit webpages while crawling. Increasing the number of browser windows will speed up crawls by capturing more pages in parallel.
+
+There are some trade-offs:
+
+- This may result in a higher chance of getting rate limited due to the increase in traffic sent to the website.
+- More execution minutes will be used per-crawl.
+
### Crawler Release Channel
Sets the release channel of [Browsertrix Crawler](https://github.com/webrecorder/browsertrix-crawler) to be used for this crawl. Crawls started by this workflow will use the latest crawler version from the selected release channel. Generally "Default" will be the most stable, however others may have newer features (or bugs)!
diff --git a/frontend/src/components/ui/config-details.ts b/frontend/src/components/ui/config-details.ts
index 78fc14cc7..21d130e87 100644
--- a/frontend/src/components/ui/config-details.ts
+++ b/frontend/src/components/ui/config-details.ts
@@ -166,10 +166,6 @@ export class ConfigDetails extends LiteElement {
msg("Crawl Size Limit"),
renderSize(crawlConfig?.maxCrawlSize),
)}
- ${this.renderSetting(
- msg("Crawler Instances"),
- crawlConfig?.scale ? `${crawlConfig.scale}×` : "",
- )}
${sectionStrings.perPageLimits}
${this.tabLabels[this.activePanel]}