diff --git a/_includes/v21.2/sidebar-data/deploy.json b/_includes/v21.2/sidebar-data/deploy.json index b5360ffa658..f8a4d644a0f 100644 --- a/_includes/v21.2/sidebar-data/deploy.json +++ b/_includes/v21.2/sidebar-data/deploy.json @@ -267,7 +267,7 @@ ] }, { - "title": "Data Domiciling", + "title": "Data Domiciling with CockroachDB", "urls": [ "/${VERSION}/data-domiciling.html" ] diff --git a/_includes/v21.2/sql/multiregion-movr-global.md b/_includes/v21.2/sql/multiregion-movr-global.md new file mode 100644 index 00000000000..a8fc70afc06 --- /dev/null +++ b/_includes/v21.2/sql/multiregion-movr-global.md @@ -0,0 +1,17 @@ +Because the data in `promo_codes` is not updated frequently (a.k.a., "read-mostly"), and needs to be available from any region, the right table locality is [`GLOBAL`](multiregion-overview.html#global-tables). + +{% include copy-clipboard.html %} +~~~ sql +ALTER TABLE promo_codes SET locality GLOBAL; +~~~ + +Next, alter the `user_promo_codes` table to have a foreign key into the global `promo_codes` table. This will enable fast reads of the `promo_codes.code` column from any region in the cluster. + +{% include copy-clipboard.html %} +~~~ sql +ALTER TABLE user_promo_codes + ADD CONSTRAINT user_promo_codes_code_fk + FOREIGN KEY (code) + REFERENCES promo_codes (code) + ON UPDATE CASCADE; +~~~ diff --git a/_includes/v21.2/sql/multiregion-movr-regional-by-row.md b/_includes/v21.2/sql/multiregion-movr-regional-by-row.md new file mode 100644 index 00000000000..edd6a37dc41 --- /dev/null +++ b/_includes/v21.2/sql/multiregion-movr-regional-by-row.md @@ -0,0 +1,103 @@ +All of the tables except `promo_codes` contain rows which are partitioned by region, and updated very frequently. For these tables, the right table locality for optimizing access to their data is [`REGIONAL BY ROW`](multiregion-overview.html#regional-by-row-tables). + +Apply this table locality to the remaining tables. These statements use a `CASE` statement to put data for a given city in the right region and can take around 1 minute to complete for each table. + +- `rides` + + {% include copy-clipboard.html %} + ~~~ sql + ALTER TABLE rides ADD COLUMN region crdb_internal_region AS ( + CASE WHEN city = 'amsterdam' THEN 'europe-west1' + WHEN city = 'paris' THEN 'europe-west1' + WHEN city = 'rome' THEN 'europe-west1' + WHEN city = 'new york' THEN 'us-east1' + WHEN city = 'boston' THEN 'us-east1' + WHEN city = 'washington dc' THEN 'us-east1' + WHEN city = 'san francisco' THEN 'us-west1' + WHEN city = 'seattle' THEN 'us-west1' + WHEN city = 'los angeles' THEN 'us-west1' + END + ) STORED; + ALTER TABLE rides ALTER COLUMN REGION SET NOT NULL; + ALTER TABLE rides SET LOCALITY REGIONAL BY ROW AS "region"; + ~~~ + +- `user_promo_codes` + + {% include copy-clipboard.html %} + ~~~ sql + ALTER TABLE user_promo_codes ADD COLUMN region crdb_internal_region AS ( + CASE WHEN city = 'amsterdam' THEN 'europe-west1' + WHEN city = 'paris' THEN 'europe-west1' + WHEN city = 'rome' THEN 'europe-west1' + WHEN city = 'new york' THEN 'us-east1' + WHEN city = 'boston' THEN 'us-east1' + WHEN city = 'washington dc' THEN 'us-east1' + WHEN city = 'san francisco' THEN 'us-west1' + WHEN city = 'seattle' THEN 'us-west1' + WHEN city = 'los angeles' THEN 'us-west1' + END + ) STORED; + ALTER TABLE user_promo_codes ALTER COLUMN REGION SET NOT NULL; + ALTER TABLE user_promo_codes SET LOCALITY REGIONAL BY ROW AS "region"; + ~~~ + +- `users` + + {% include copy-clipboard.html %} + ~~~ sql + ALTER TABLE users ADD COLUMN region crdb_internal_region AS ( + CASE WHEN city = 'amsterdam' THEN 'europe-west1' + WHEN city = 'paris' THEN 'europe-west1' + WHEN city = 'rome' THEN 'europe-west1' + WHEN city = 'new york' THEN 'us-east1' + WHEN city = 'boston' THEN 'us-east1' + WHEN city = 'washington dc' THEN 'us-east1' + WHEN city = 'san francisco' THEN 'us-west1' + WHEN city = 'seattle' THEN 'us-west1' + WHEN city = 'los angeles' THEN 'us-west1' + END + ) STORED; + ALTER TABLE users ALTER COLUMN REGION SET NOT NULL; + ALTER TABLE users SET LOCALITY REGIONAL BY ROW AS "region"; + ~~~ + +- `vehicle_location_histories` + + {% include copy-clipboard.html %} + ~~~ sql + ALTER TABLE vehicle_location_histories ADD COLUMN region crdb_internal_region AS ( + CASE WHEN city = 'amsterdam' THEN 'europe-west1' + WHEN city = 'paris' THEN 'europe-west1' + WHEN city = 'rome' THEN 'europe-west1' + WHEN city = 'new york' THEN 'us-east1' + WHEN city = 'boston' THEN 'us-east1' + WHEN city = 'washington dc' THEN 'us-east1' + WHEN city = 'san francisco' THEN 'us-west1' + WHEN city = 'seattle' THEN 'us-west1' + WHEN city = 'los angeles' THEN 'us-west1' + END + ) STORED; + ALTER TABLE vehicle_location_histories ALTER COLUMN REGION SET NOT NULL; + ALTER TABLE vehicle_location_histories SET LOCALITY REGIONAL BY ROW AS "region"; + ~~~ + +- `vehicles` + + {% include copy-clipboard.html %} + ~~~ sql + ALTER TABLE vehicles ADD COLUMN region crdb_internal_region AS ( + CASE WHEN city = 'amsterdam' THEN 'europe-west1' + WHEN city = 'paris' THEN 'europe-west1' + WHEN city = 'rome' THEN 'europe-west1' + WHEN city = 'new york' THEN 'us-east1' + WHEN city = 'boston' THEN 'us-east1' + WHEN city = 'washington dc' THEN 'us-east1' + WHEN city = 'san francisco' THEN 'us-west1' + WHEN city = 'seattle' THEN 'us-west1' + WHEN city = 'los angeles' THEN 'us-west1' + END + ) STORED; + ALTER TABLE vehicles ALTER COLUMN REGION SET NOT NULL; + ALTER TABLE vehicles SET LOCALITY REGIONAL BY ROW AS "region"; + ~~~ diff --git a/v21.1/demo-low-latency-multi-region-deployment.md b/v21.1/demo-low-latency-multi-region-deployment.md index b42e33a3687..09570947fea 100644 --- a/v21.1/demo-low-latency-multi-region-deployment.md +++ b/v21.1/demo-low-latency-multi-region-deployment.md @@ -300,14 +300,14 @@ ALTER DATABASE movr ADD REGION "us-west1"; #### Configure GLOBAL tables -As mentioned earlier, all of the tables except `promo_codes` are geographically specific. Because the data in `promo_codes` is not updated frequently (a.k.a., "read-mostly"), and needs to be available from any region, the right table locality is [`GLOBAL`](multiregion-overview.html#global-tables). +All of the tables except `promo_codes` contain rows which are partitioned by region, and updated very frequently. Because the data in `promo_codes` is not updated frequently (a.k.a., "read-mostly"), and needs to be available from any region, the right table locality is [`GLOBAL`](multiregion-overview.html#global-tables). {% include copy-clipboard.html %} ~~~ sql ALTER TABLE promo_codes SET locality GLOBAL; ~~~ -Next, alter the `user_promo_codes` table to have a foreign key into the `promo_codes` table. This step is necessary to modify the MovR schema design to take full advantage of the multi-region features in v21.1+. +Next, alter the `user_promo_codes` table to have a foreign key into the global `promo_codes` table. This will enable fast reads of the `promo_codes.code` column from any region in the cluster. {% include copy-clipboard.html %} ~~~ sql diff --git a/v21.2/data-domiciling.md b/v21.2/data-domiciling.md index 3007b641f55..1e12691a1e5 100644 --- a/v21.2/data-domiciling.md +++ b/v21.2/data-domiciling.md @@ -1,93 +1,275 @@ --- title: Data Domiciling with CockroachDB -summary: Learn how to use CockroachDB's improved multi-region capabilities to implement data domiciling. +summary: Learn how to use CockroachDB's multi-region SQL capabilities and the ALTER DATABASE ... PLACEMENT RESTRICTED statement as part of your data domiciling approach toc: true docs_area: deploy --- As you scale your usage of [multi-region clusters](multiregion-overview.html), you may need to keep certain subsets of data in specific localities. Keeping specific data on servers in specific geographic locations is also known as _data domiciling_. -CockroachDB has basic support for data domiciling in multi-region clusters using the process described on this page. At a high level, this process involves: +CockroachDB has basic support for data domiciling in multi-region clusters using the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement. -1. Creating separate databases per domicile. -1. Adding regions to those databases using the [`ADD REGION`](add-region.html) statement. -1. Making sure your application is adding data meant for a specific domicile to the correct database. +{{site.data.alerts.callout_danger}} +Using CockroachDB as part of your approach to data domiciling has several limitations. For more information, see [Limitations](#limitations). +{{site.data.alerts.end}} ## Overview -The best way to keep a specific data set cordoned off from others in CockroachDB is also the simplest: keep it in a separate database. Data that needs to meet specific domiciling requirements such as "data from EU residents must be domiciled in the EU" is most easily met by creating a separate database only for EU residents' data. +This page has instructions for data domiciling in [multi-region clusters](multiregion-overview.html) using the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement. At a high level, this process involves: + +1. Controlling the placement of specific row or table data using regional tables - specifically, the [`REGIONAL BY ROW`](multiregion-overview.html#regional-by-row-tables), [`REGIONAL BY TABLE`](multiregion-overview.html#regional-tables). +1. Further restricting where the data in those regional tables is stored using the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement, which constrains the replicas for a partition or table to be stored in only the [home regions](set-locality.html#crdb_region) associated with those rows or tables. + +For more information, see the sections below. + +## Prerequisites + +This page assumes you are already familiar with: + +- CockroachDB's [multi-region SQL abstractions](multiregion-overview.html). If you are not using them, the instructions on this page will not apply. +- The fact that CockroachDB stores your data in [a distributed key-value store, which is split into chunks called ranges](architecture/distribution-layer.html#overview). + +## Example + +In the following example, you will go through the process of configuring the [MovR](movr.html) data set using [multi-region SQL statements](multiregion-overview.html). Then, as part of implementing a data domiciling strategy, you will apply restricted replica settings using the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement. Finally, you will verify that the resulting replica placements are as expected using [replication reports](query-replication-reports.html). + +For the purposes of this example, the data domiciling requirement is to configure a multi-region deployment of the [MovR database](movr.html) such that data for EU-based users, vehicles, etc. is being stored on CockroachDB nodes running in EU localities. -In a multi-region setting, you can associate that database with only those regions which should accept and store EU user data. Luckily, CockroachDB supports cross-database [selection queries](selection-queries.html), so you can still join this data with other data sets to keep track of what is happening across your application, while meeting the requirement that data is stored on disk in the allowed localities. Depending on your requirements, you may want to further require that even read queries are only generated from the region in which you are allowed to access the data. +### Step 1. Start a simulated multi-region cluster + +{% include {{page.version.version}}/sql/start-a-multi-region-demo-cluster.md %} + +You now have a cluster running across 9 nodes, with 3 nodes each in the following regions: + +- `us-east1` +- `us-west1` +- `europe-west1` + +You can verify this using the [`SHOW REGIONS`](show-regions.html) statement: + +{% include_cached copy-clipboard.html %} +~~~ sql +SHOW REGIONS; +~~~ + +~~~ + region | zones | database_names | primary_region_of +---------------+---------+----------------+-------------------- + europe-west1 | {b,c,d} | {} | {} + us-east1 | {b,c,d} | {} | {} + us-west1 | {a,b,c} | {} | {} +(3 rows) +~~~ + +### Step 2. Apply multi-region SQL abstractions + +Execute the following statements to set the [database regions](multiregion-overview.html#database-regions). This information is necessary so that CockroachDB can later move data around to optimize access to particular data from particular regions. + +{% include copy-clipboard.html %} +~~~ sql +ALTER DATABASE movr PRIMARY REGION "europe-west1"; +ALTER DATABASE movr ADD REGION "us-east1"; +ALTER DATABASE movr ADD REGION "us-west1"; +~~~ + +{% include {{page.version.version}}/sql/multiregion-movr-global.md %} + +{% include {{page.version.version}}/sql/multiregion-movr-regional-by-row.md %} + +### Step 3. View noncompliant replicas + +Next, run a [replication report](query-replication-reports.html) to see which ranges are still not in compliance with your desired domiciling: that data on EU-based entities (users, etc.) does not leave EU-based nodes. + +On a small demo cluster like this one, the data movement from the previous step should have finished almost instantly; on larger clusters, the rebalancing process may take longer. For more information about the performance considerations of rebalancing data in multi-region clusters, see [Performance considerations](migrate-to-multiregion-sql.html#performance-considerations). + +With the default settings, you should expect some replicas in the cluster to be violating this constraint. This is because [non-voting replicas](architecture/replication-layer.html#non-voting-replicas) are enabled by default in [multi-region clusters](multiregion-overview.html) to enable stale reads of data in [regional tables](regional-tables.html) from outside those tables' [home regions](set-locality.html#crdb_region). For many use cases, this is preferred, but it keeps you from meeting the domiciling requirements for this example. + +{% include_cached copy-clipboard.html %} +~~~ sql +SELECT * FROM system.replication_constraint_stats WHERE violating_ranges > 0; +~~~ + +~~~ + zone_id | subzone_id | type | config | report_id | violation_start | violating_ranges +----------+------------+------------+------------------------+-----------+-------------------------------+------------------- + 52 | 0 | constraint | +region=europe-west1:1 | 1 | 2022-01-19 16:33:49.485535+00 | 10 + 52 | 0 | constraint | +region=us-east1:1 | 1 | 2022-01-19 16:34:49.930886+00 | 58 + 52 | 0 | constraint | +region=us-west1:1 | 1 | 2022-01-19 16:34:49.930886+00 | 61 +~~~ + +Based on this output, you can see that plenty of replicas are out of compliance (see the `violating_ranges` column) for the reason described above: the presence of non-voting replicas in other regions to enable fast stale reads from those regions. {{site.data.alerts.callout_info}} -As of CockroachDB v21.1 and earlier, some metadata about the user data may be stored in system ranges, system tables, etc. This might result in potential "leakage" outside of the desired domicile if your schema includes table names, etc., that may reveal information about their contents (e.g., `SELECT * FROM germany_users_with_iphones_over_35`, to provide a silly example). +The [Replication Reports](query-replication-reports.html) do not consider [non-voting replicas](architecture/replication-layer.html#non-voting-replicas) located outside of a table's home region to be in compliance with the constraints on that table. {{site.data.alerts.end}} -Therefore, make sure to design your schema such that information that must remain domiciled _cannot_ be deduced from the schema design (e.g., primary keys, table names, column names, usernames). +Next, run the query suggested in [the Replication Reports documentation](query-replication-reports.html#find-out-which-of-your-tables-have-a-constraint-violation) that should show which database and table names contain the `violating_ranges`. + +{% include_cached copy-clipboard.html %} +~~~ sql +WITH + partition_violations + AS ( + SELECT + * + FROM + system.replication_constraint_stats + WHERE + violating_ranges > 0 + ), + report + AS ( + SELECT + crdb_internal.zones.zone_id, + crdb_internal.zones.subzone_id, + target, + database_name, + table_name, + index_name, + partition_violations.type, + partition_violations.config, + partition_violations.violation_start, + partition_violations.violating_ranges + FROM + crdb_internal.zones, partition_violations + WHERE + crdb_internal.zones.zone_id + = partition_violations.zone_id + ) +SELECT * FROM report; +~~~ + +~~~ + zone_id | subzone_id | target | database_name | table_name | index_name | type | config | violation_start | violating_ranges +----------+------------+---------------+---------------+------------+------------+------------+------------------------+-------------------------------+------------------- + 52 | 0 | DATABASE movr | movr | NULL | NULL | constraint | +region=europe-west1:1 | 2022-01-19 16:33:49.485535+00 | 16 + 52 | 0 | DATABASE movr | movr | NULL | NULL | constraint | +region=us-west1:1 | 2022-01-19 16:34:49.930886+00 | 78 + 52 | 0 | DATABASE movr | movr | NULL | NULL | constraint | +region=us-east1:1 | 2022-01-19 16:34:49.930886+00 | 78 +~~~ + +This output shows that the `movr` database has ranges out of compliance, which you saw previously. Unfortunately, this output does not contain the table or index names due to a current limitation of the replication reports: non-voting replicas are not associated with any tables or indexes by the reports. -For a complete list of the limitations of the data domiciling approach described here, see the [Limitations](#limitations) section. +### Step 4. Apply stricter replica placement settings -### Step 1. Create separate databases per domiciled data requirement +To ensure that data on EU-based users, vehicles, etc. from [`REGIONAL BY ROW` tables](regional-tables.html#regional-by-row-tables) is stored only on EU-based nodes in the cluster, you must disable the use of [non-voting replicas](architecture/replication-layer.html#non-voting-replicas) on all of the [regional tables](regional-tables.html) in this database. You can do this using the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement. -As mentioned, the best way to keep specific data sets cordoned off from each other in CockroachDB is to keep them in separate databases. To create a separate database for EU-based users, run the following statement: +To use this statement, you must set the `enable_multiregion_placement_policy` [session setting](set-vars.html) or the `sql.defaults.multiregion_placement_policy.enabled` [cluster setting](cluster-settings.html): {% include_cached copy-clipboard.html %} ~~~ sql -CREATE DATABASE eu_users; -USE eu_users; +SET enable_multiregion_placement_policy=on; ~~~ -### Step 2. Add specific regions to each database +~~~ +SET +~~~ -Make sure that CockroachDB is storing the data in the `eu_users` database in European regions using the [`ALTER DATABASE ... SET PRIMARY REGION`](set-primary-region.html) and [`... ADD REGION`](add-region.html) statements: +Next, use the [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) statement to disable non-voting replicas for regional tables: {% include_cached copy-clipboard.html %} ~~~ sql -ALTER DATABASE eu_users SET PRIMARY REGION 'eu-west-1'; -ALTER DATABASE eu_users ADD REGION 'eu-west-2'; -ALTER DATABASE eu_users ADD REGION 'eu-central-1'; +ALTER DATABASE movr PLACEMENT RESTRICTED; ~~~ +~~~ +ALTER DATABASE PLACEMENT +~~~ + +The restricted replica placement settings should start to apply immediately. + {{site.data.alerts.callout_info}} -To be able to add these regions from SQL, you must have started the cluster with these regions using the [`cockroach start --locality`](cockroach-start.html#locality) flag. +[`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) does not affect the replica placement for [global tables](global-tables.html), which are designed to provide fast, up-to-date reads from all [database regions](multiregion-overview.html#database-regions). {{site.data.alerts.end}} -### Step 3. Add domiciled data to the right databases +### Step 5. Verify updated replica placement -You will need to make sure that user data associated with EU users is only added to the `eu_users` database. +Now that you have restricted the placement of non-voting replicas for all [regional tables](regional-tables.html), you can run another [replication report](query-replication-reports.html) to see the effects: -How exactly you will accomplish that is beyond the scope of this document, but you will likely need to add some logic to your application and/or to your load balancing infrastructure to make sure that when your application code is [inserting](insert.html) or [updating](update.html) EU user data, the data only ever updates the `eu_users` database. For example, you can set the target database in your [connection string](connection-parameters.html): +{% include_cached copy-clipboard.html %} +~~~ sql +SELECT * FROM system.replication_constraint_stats WHERE violating_ranges > 0; +~~~ ~~~ -postgres://maxroach:mypassword@region.cockroachlabs.cloud:26257/eu_users?sslmode=verify-full&sslrootcert=certs/app-ca.crt + zone_id | subzone_id | type | config | report_id | violation_start | violating_ranges +----------+------------+------------+--------------------+-----------+-------------------------------+------------------- + 57 | 0 | constraint | +region=us-east1:1 | 1 | 2022-01-19 19:09:00.235247+00 | 1 + 57 | 0 | constraint | +region=us-west1:1 | 1 | 2022-01-19 19:09:00.235247+00 | 1 ~~~ -For a more detailed example showing how to deploy an application and CockroachDB in multiple regions, see [Deploy a Global, Serverless Application](movr-flask-deployment.html#global-application-deployment). - -### Step 4. Query across databases +The output above shows that there are now far fewer replicas that do not meet the data domiciling goal. As described above, [`ALTER DATABASE ... PLACEMENT RESTRICTED`](placement-restricted.html) does not affect the replica placement for [`GLOBAL` tables](global-tables.html), so it's likely that these few replicas are part of a global table. -Storing data on EU users in a separate database is made easier by the fact that CockroachDB supports cross-database [joins](joins.html). For example, to join data from EU and non-EU user tables in a hypothetical application, you might issue a query like the following, which joins users from separate databases on a shared application ID: +To verify that the constraint violating replicas are indeed part of a `GLOBAL` table, run the replication report query from [Step 3](#step-3-view-noncompliant-replicas) again as shown below. This will display the database and table names of these replicas. {% include_cached copy-clipboard.html %} ~~~ sql -SELECT us_users.name, eu_users.name - FROM us_users.users, eu_users.users - WHERE us_users.users.application_id = eu_users.users.application_id ... +WITH + partition_violations + AS ( + SELECT + * + FROM + system.replication_constraint_stats + WHERE + violating_ranges > 0 + ), + report + AS ( + SELECT + crdb_internal.zones.zone_id, + crdb_internal.zones.subzone_id, + target, + database_name, + table_name, + index_name, + partition_violations.type, + partition_violations.config, + partition_violations.violation_start, + partition_violations.violating_ranges + FROM + crdb_internal.zones, partition_violations + WHERE + crdb_internal.zones.zone_id + = partition_violations.zone_id + ) +SELECT * FROM report; ~~~ +~~~ + zone_id | subzone_id | target | database_name | table_name | index_name | type | config | violation_start | violating_ranges +----------+------------+-------------------------------+---------------+-------------+------------+------------+--------------------+-------------------------------+------------------- + 57 | 0 | TABLE movr.public.promo_codes | movr | promo_codes | NULL | constraint | +region=us-east1:1 | 2022-01-19 19:09:00.235247+00 | 1 + 57 | 0 | TABLE movr.public.promo_codes | movr | promo_codes | NULL | constraint | +region=us-west1:1 | 2022-01-19 19:09:00.235247+00 | 1 +~~~ + +As expected, these replicas are part of the `promo_codes` table, which was configured to use the [`GLOBAL`](global-tables.html) [table locality](multiregion-overview.html#table-locality) in [Step 2](#step-2-apply-multi-region-sql-abstractions). + +Now that you have verified that the system is configured to meet the domiciling requirement, it's a good idea to run these replication reports on a regular basis (via automation of some kind) to ensure that the requirement continues to be met. + +{{site.data.alerts.callout_info}} +The steps above are necessary but not sufficient to accomplish a data domiciling solution using CockroachDB. Be sure to review the [limitations of CockroachDB for data domiciling](#limitations) and design your total solution with those limitations in mind. +{{site.data.alerts.end}} + ## Limitations -As noted, there are several limitations to this approach: +Using CockroachDB as part of your approach to data domiciling has several limitations: -- Some metadata about the objects in a schema may be stored in system ranges, system tables, etc. CockroachDB synchronizes system ranges and system tables across nodes. This synchronization does not respect any multi-region settings applied via either the multi-region SQL commands described, or the low-level [zone configs](configure-replication-zones.html) mechanism. This might result in potential "leakage" outside of the desired domicile if your schema includes primary keys, table names, etc., that may reveal information about their contents. -- If you start a node with a [`--locality`](cockroach-start.html#locality) flag that says the node is in region _A_, but the node is actually running in some region _B_, this approach will not work. A CockroachDB node only knows its locality based on the text supplied to the `--locality` flag; it can not ensure that it is actually running in that physical location. -- Cross-region writes are slower than intra-region writes. This may be an issue depending on your application's performance needs, since following the advice in this page would result in having different databases' data stored in different regions. +- When columns are [indexed](indexes.html), a subset of data from the indexed columns may appear in [meta ranges](architecture/distribution-layer.html#meta-ranges) or other system tables. CockroachDB synchronizes these system ranges and system tables across nodes. This synchronization does not respect any multi-region settings applied via either the [multi-region SQL statements](multiregion-overview.html), or the low-level [zone configs](configure-replication-zones.html) mechanism. +- [Zone configs](configure-replication-zones.html) can be used for data placement but these features were historically built for performance, not for domiciling. The replication system's top priority is to prevent the loss of data and it may override the zone configurations if necessary to ensure data durability. For more information, see [Configure Replication Zones](configure-replication-zones.html#types-of-constraints). +- If your [log files](logging-overview.html) are kept in the region where they were generated, there is some cross-region leakage (like the system tables described previously), but the majority of user data that makes it into the logs is going to be homed in that region. If that's not strong enough, you can use the [log redaction functionality](configure-logs.html#redact-logs) to strip all raw data from the logs. You can also limit your log retention entirely. +- If you start a node with a [`--locality`](cockroach-start.html#locality) flag that says the node is in region _A_, but the node is actually running in some region _B_, data domiciling based on the inferred node placement will not work. A CockroachDB node only knows its locality based on the text supplied to the `--locality` flag; it can not ensure that it is actually running in that physical location. ## See also -- [Multi-Region Capabilities Overview](multiregion-overview.html) -- [How to Choose a Multi-Region Configuration](choosing-a-multi-region-configuration.html) -- [When to Use ZONE vs. REGION Survival Goals](when-to-use-zone-vs-region-survival-goals.html) -- [When to Use REGIONAL vs. GLOBAL Tables](when-to-use-regional-vs-global-tables.html) -- [Low Latency Reads and Writes in a Multi-Region Cluster](demo-low-latency-multi-region-deployment.html) +- [Choosing a multi-region configuration](choosing-a-multi-region-configuration.html) +- [Install CockroachDB](install-cockroachdb.html) +- [Migrate to Multi-region SQL](migrate-to-multiregion-sql.html) +- [Multi-region Overview](multiregion-overview.html) +- [Multi-region SQL Performance](demo-low-latency-multi-region-deployment.html) +- [Multi-region overview](multiregion-overview.html) +- [Reads and Writes in CockroachDB](architecture/reads-and-writes-overview.html) +- [When to use REGIONAL vs GLOBAL tables](when-to-use-regional-vs-global-tables.html) +- [When to use ZONE vs REGION survival goals](when-to-use-zone-vs-region-survival-goals.html) +- [When to use `REGIONAL` vs. `GLOBAL` tables](when-to-use-regional-vs-global-tables.html) +- [When to use `ZONE` vs. `REGION` survival goals](when-to-use-zone-vs-region-survival-goals.html) - [`ADD REGION`](add-region.html) diff --git a/v21.2/demo-low-latency-multi-region-deployment.md b/v21.2/demo-low-latency-multi-region-deployment.md index aa69bb5094d..b3e90cf6f47 100644 --- a/v21.2/demo-low-latency-multi-region-deployment.md +++ b/v21.2/demo-low-latency-multi-region-deployment.md @@ -301,131 +301,15 @@ ALTER DATABASE movr ADD REGION "us-west1"; #### Configure GLOBAL tables -As mentioned earlier, all of the tables except `promo_codes` are geographically specific. Because the data in `promo_codes` is not updated frequently (a.k.a., "read-mostly"), and needs to be available from any region, the right table locality is [`GLOBAL`](multiregion-overview.html#global-tables). +As mentioned earlier, all of the tables except `promo_codes` are geographically specific. -{% include copy-clipboard.html %} -~~~ sql -ALTER TABLE promo_codes SET locality GLOBAL; -~~~ - -Next, alter the `user_promo_codes` table to have a foreign key into the `promo_codes` table. This step is necessary to modify the MovR schema design to take full advantage of the multi-region features in v21.1+. - -{% include copy-clipboard.html %} -~~~ sql -ALTER TABLE user_promo_codes - ADD CONSTRAINT user_promo_codes_code_fk - FOREIGN KEY (code) - REFERENCES promo_codes (code) - ON UPDATE CASCADE; -~~~ +{% include {{page.version.version}}/sql/multiregion-movr-global.md %} #### Configure REGIONAL BY ROW tables -All of the tables except `promo_codes` are geographically specific, and updated very frequently. For these tables, the right table locality for optimizing access to their data is [`REGIONAL BY ROW`](multiregion-overview.html#regional-by-row-tables). - -Apply this table locality to the remaining tables. These statements use a `CASE` statement to put data for a given city in the right region and can take around 1 minute to complete for each table. - -- `rides` - - {% include copy-clipboard.html %} - ~~~ sql - ALTER TABLE rides ADD COLUMN region crdb_internal_region AS ( - CASE WHEN city = 'amsterdam' THEN 'europe-west1' - WHEN city = 'paris' THEN 'europe-west1' - WHEN city = 'rome' THEN 'europe-west1' - WHEN city = 'new york' THEN 'us-east1' - WHEN city = 'boston' THEN 'us-east1' - WHEN city = 'washington dc' THEN 'us-east1' - WHEN city = 'san francisco' THEN 'us-west1' - WHEN city = 'seattle' THEN 'us-west1' - WHEN city = 'los angeles' THEN 'us-west1' - END - ) STORED; - ALTER TABLE rides ALTER COLUMN REGION SET NOT NULL; - ALTER TABLE rides SET LOCALITY REGIONAL BY ROW AS "region"; - ~~~ - -- `user_promo_codes` - - {% include copy-clipboard.html %} - ~~~ sql - ALTER TABLE user_promo_codes ADD COLUMN region crdb_internal_region AS ( - CASE WHEN city = 'amsterdam' THEN 'europe-west1' - WHEN city = 'paris' THEN 'europe-west1' - WHEN city = 'rome' THEN 'europe-west1' - WHEN city = 'new york' THEN 'us-east1' - WHEN city = 'boston' THEN 'us-east1' - WHEN city = 'washington dc' THEN 'us-east1' - WHEN city = 'san francisco' THEN 'us-west1' - WHEN city = 'seattle' THEN 'us-west1' - WHEN city = 'los angeles' THEN 'us-west1' - END - ) STORED; - ALTER TABLE user_promo_codes ALTER COLUMN REGION SET NOT NULL; - ALTER TABLE user_promo_codes SET LOCALITY REGIONAL BY ROW AS "region"; - ~~~ - -- `users` - - {% include copy-clipboard.html %} - ~~~ sql - ALTER TABLE users ADD COLUMN region crdb_internal_region AS ( - CASE WHEN city = 'amsterdam' THEN 'europe-west1' - WHEN city = 'paris' THEN 'europe-west1' - WHEN city = 'rome' THEN 'europe-west1' - WHEN city = 'new york' THEN 'us-east1' - WHEN city = 'boston' THEN 'us-east1' - WHEN city = 'washington dc' THEN 'us-east1' - WHEN city = 'san francisco' THEN 'us-west1' - WHEN city = 'seattle' THEN 'us-west1' - WHEN city = 'los angeles' THEN 'us-west1' - END - ) STORED; - ALTER TABLE users ALTER COLUMN REGION SET NOT NULL; - ALTER TABLE users SET LOCALITY REGIONAL BY ROW AS "region"; - ~~~ - -- `vehicle_location_histories` - - {% include copy-clipboard.html %} - ~~~ sql - ALTER TABLE vehicle_location_histories ADD COLUMN region crdb_internal_region AS ( - CASE WHEN city = 'amsterdam' THEN 'europe-west1' - WHEN city = 'paris' THEN 'europe-west1' - WHEN city = 'rome' THEN 'europe-west1' - WHEN city = 'new york' THEN 'us-east1' - WHEN city = 'boston' THEN 'us-east1' - WHEN city = 'washington dc' THEN 'us-east1' - WHEN city = 'san francisco' THEN 'us-west1' - WHEN city = 'seattle' THEN 'us-west1' - WHEN city = 'los angeles' THEN 'us-west1' - END - ) STORED; - ALTER TABLE vehicle_location_histories ALTER COLUMN REGION SET NOT NULL; - ALTER TABLE vehicle_location_histories SET LOCALITY REGIONAL BY ROW AS "region"; - ~~~ - -- `vehicles` - - {% include copy-clipboard.html %} - ~~~ sql - ALTER TABLE vehicles ADD COLUMN region crdb_internal_region AS ( - CASE WHEN city = 'amsterdam' THEN 'europe-west1' - WHEN city = 'paris' THEN 'europe-west1' - WHEN city = 'rome' THEN 'europe-west1' - WHEN city = 'new york' THEN 'us-east1' - WHEN city = 'boston' THEN 'us-east1' - WHEN city = 'washington dc' THEN 'us-east1' - WHEN city = 'san francisco' THEN 'us-west1' - WHEN city = 'seattle' THEN 'us-west1' - WHEN city = 'los angeles' THEN 'us-west1' - END - ) STORED; - ALTER TABLE vehicles ALTER COLUMN REGION SET NOT NULL; - ALTER TABLE vehicles SET LOCALITY REGIONAL BY ROW AS "region"; - ~~~ +{% include {{page.version.version}}/sql/multiregion-movr-regional-by-row.md %} -## Step 7. Re-check service latency +## Step 6. Re-check service latency As the multi-region schema changes complete, you should see changes to the following metrics: