From c40fff2860ff61b054152e463c3c0ee7943c47d5 Mon Sep 17 00:00:00 2001
From: Dwight Hodge <79169168+ddhodge@users.noreply.github.com>
Date: Fri, 6 Sep 2024 20:39:15 -0400
Subject: [PATCH] copy Develop changes to stable (#23812)
---
.../preview/architecture/design-goals.md | 2 +-
docs/content/preview/develop/_index.md | 2 +-
.../configuration/all-flags-yb-master.md | 2 +-
.../configuration/all-flags-yb-tserver.md | 2 +-
.../xcluster-replication-setup.md | 2 +-
docs/content/stable/api/ycql/type_jsonb.md | 2 +-
.../stable/architecture/design-goals.md | 2 +-
docs/content/stable/develop/_index.md | 128 +++---
.../common-patterns/timeseries/_index.md | 45 ---
.../stable/develop/data-modeling/_index.md | 62 +++
.../common-patterns/_index.md | 16 +-
.../common-patterns/keyvalue.md | 7 +-
.../common-patterns/timeseries/_index.md | 53 +++
.../common-patterns/timeseries/data-expiry.md | 7 +-
.../timeseries/global-ordering.md | 1 +
.../timeseries/ordering-by-entity.md | 1 +
.../timeseries/partitioning-by-time.md | 7 +-
.../develop/data-modeling/hot-shards-ysql.md | 129 ++++++
.../develop/data-modeling/partitioning.md | 45 +++
.../data-modeling/primary-keys-ycql.md | 226 +++++++++++
.../data-modeling/primary-keys-ysql.md | 222 +++++++++++
.../data-modeling/secondary-indexes-ycql.md | 179 +++++++++
.../data-modeling/secondary-indexes-ysql.md | 215 ++++++++++
docs/content/stable/develop/learn/_index.md | 211 +++-------
.../develop/learn/data-modeling-ycql.md | 228 -----------
.../develop/learn/data-modeling-ysql.md | 374 ------------------
.../stable/develop/learn/data-types-ycql.md | 208 ----------
.../stable/develop/learn/data-types-ysql.md | 16 -
.../{develop/learn => explore}/sql-nosql.md | 46 +--
.../configuration/all-flags-yb-master.md | 2 +-
.../configuration/all-flags-yb-tserver.md | 2 +-
.../xcluster-replication-setup.md | 2 +-
32 files changed, 1315 insertions(+), 1131 deletions(-)
delete mode 100644 docs/content/stable/develop/common-patterns/timeseries/_index.md
create mode 100644 docs/content/stable/develop/data-modeling/_index.md
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/_index.md (92%)
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/keyvalue.md (94%)
create mode 100644 docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/data-expiry.md (93%)
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/global-ordering.md (99%)
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/ordering-by-entity.md (99%)
rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/partitioning-by-time.md (95%)
create mode 100644 docs/content/stable/develop/data-modeling/hot-shards-ysql.md
create mode 100644 docs/content/stable/develop/data-modeling/partitioning.md
create mode 100644 docs/content/stable/develop/data-modeling/primary-keys-ycql.md
create mode 100644 docs/content/stable/develop/data-modeling/primary-keys-ysql.md
create mode 100644 docs/content/stable/develop/data-modeling/secondary-indexes-ycql.md
create mode 100644 docs/content/stable/develop/data-modeling/secondary-indexes-ysql.md
delete mode 100644 docs/content/stable/develop/learn/data-modeling-ycql.md
delete mode 100644 docs/content/stable/develop/learn/data-modeling-ysql.md
delete mode 100644 docs/content/stable/develop/learn/data-types-ycql.md
delete mode 100644 docs/content/stable/develop/learn/data-types-ysql.md
rename docs/content/stable/{develop/learn => explore}/sql-nosql.md (70%)
diff --git a/docs/content/preview/architecture/design-goals.md b/docs/content/preview/architecture/design-goals.md
index 7d1afa71a34e..2477b434d6e0 100644
--- a/docs/content/preview/architecture/design-goals.md
+++ b/docs/content/preview/architecture/design-goals.md
@@ -78,7 +78,7 @@ YugabyteDB has been designed with operational simplicity in mind, providing feat
## Heterogeneous workload support
-Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](../../tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads.
+Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads.
## Transaction isolation levels
diff --git a/docs/content/preview/develop/_index.md b/docs/content/preview/develop/_index.md
index 790c42a446a8..ee96a1d598fb 100644
--- a/docs/content/preview/develop/_index.md
+++ b/docs/content/preview/develop/_index.md
@@ -77,5 +77,5 @@ To learn more about how to use browser-based IDEs, see [Cloud-native development
Yugabyte provides multiple step-by-step guides for building scalable and fault-tolerant applications with YugabyteDB using your favorite programming language, services, and frameworks, including Kafka, Gen-AI, and more.
{{}}
-For step-by-step guides for various frameworks, see [Tutorials](../tutorials/).
+For step-by-step guides for various frameworks, see [Tutorials](/preview/tutorials/).
{{}}
diff --git a/docs/content/preview/reference/configuration/all-flags-yb-master.md b/docs/content/preview/reference/configuration/all-flags-yb-master.md
index 0d068dc78e3d..04e45d76b20a 100644
--- a/docs/content/preview/reference/configuration/all-flags-yb-master.md
+++ b/docs/content/preview/reference/configuration/all-flags-yb-master.md
@@ -9,7 +9,7 @@ menu:
type: indexpage
---
-Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/concepts/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation.
+Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation.
For a list of all YB-TServer flags, see [All YB-TServer flags](../all-flags-yb-tserver/).
diff --git a/docs/content/preview/reference/configuration/all-flags-yb-tserver.md b/docs/content/preview/reference/configuration/all-flags-yb-tserver.md
index 976296932cd6..50a8750f05d0 100644
--- a/docs/content/preview/reference/configuration/all-flags-yb-tserver.md
+++ b/docs/content/preview/reference/configuration/all-flags-yb-tserver.md
@@ -9,7 +9,7 @@ menu:
type: indexpage
---
-Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/concepts/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation.
+Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation.
For a list of all YB-Master flags, see [All YB-Master flags](../all-flags-yb-master/).
diff --git a/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md b/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
index 308478e174f9..f792f7433579 100644
--- a/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
+++ b/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
@@ -139,7 +139,7 @@ A full copy is done by first backing up the data to external storage, and then r
### YSQL tables
-You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../architecture/docdb-sharding/colocated-tables/#xcluster-and-colocation).
+You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../explore/colocation/#xcluster-and-colocation).
If a [full copy](#full-copy-during-xcluster-setup) is required, the entire database is recreated on the target universe from the current database on the source universe. Be sure to keep the set of tables the same at all times on both the source and target universes in these databases by following the steps in [Manage tables and indexes](../xcluster-replication-ddl/).
diff --git a/docs/content/stable/api/ycql/type_jsonb.md b/docs/content/stable/api/ycql/type_jsonb.md
index 41eb97ab5d4b..e594d484c843 100644
--- a/docs/content/stable/api/ycql/type_jsonb.md
+++ b/docs/content/stable/api/ycql/type_jsonb.md
@@ -241,6 +241,6 @@ Note that JSONB upsert only works for JSON objects and not for other data types
## See also
-- [Explore JSON documents](../../../develop/learn/data-types-ycql/#jsonb)
+- [Explore JSON documents](../../../explore/ycql-language/jsonb-ycql)
- [Data types](..#data-types)
- [Secondary indexes with JSONB](../../../explore/ycql-language/indexes-constraints/secondary-indexes-with-jsonb-ycql/)
diff --git a/docs/content/stable/architecture/design-goals.md b/docs/content/stable/architecture/design-goals.md
index cb3463306bd3..777b73cf809f 100644
--- a/docs/content/stable/architecture/design-goals.md
+++ b/docs/content/stable/architecture/design-goals.md
@@ -76,7 +76,7 @@ YugabyteDB has been designed with operational simplicity in mind, providing feat
## Heterogeneous workload support
-Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads.
+Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads.
## Transaction isolation levels
diff --git a/docs/content/stable/develop/_index.md b/docs/content/stable/develop/_index.md
index 6b10512223dc..ee96a1d598fb 100644
--- a/docs/content/stable/develop/_index.md
+++ b/docs/content/stable/develop/_index.md
@@ -8,60 +8,74 @@ image: /images/section_icons/index/develop.png
type: indexpage
---
-{{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
- {{}}
-
-{{}}
+## Data modeling
+
+Although YugabyteDB is fully SQL compatible, modeling data for a distributed database is quite different from modeling for a monolithic database like MySQL or PostgreSQL. This is because the table data is distributed across different nodes. You must understand how to model your data for efficient storage and retrieval from a distributed system.
+
+{{}}
+To understand how to model your data for YugabyteDB, see [Distributed data modeling](./data-modeling/).
+{{}}
+
+## Global applications
+
+Today's applications have to cater to users distributed across the globe. Running applications across multiple data centers while providing the best user experience is no trivial task. Yugabyte provides some battle-tested design patterns for your global applications.
+
+{{}}
+To learn more about building global applications, see [Build global applications](./build-global-apps/).
+{{}}
+
+## Multi-cloud applications
+
+A multi-cloud strategy provides the flexibility to use the optimal computing environment for each specific workload, helps avoid vendor lock-in, lets you place data close to the users, and can minimize cost by choosing optimal pricing and performance of various cloud providers. You can also opt for a hybrid model as your path to migration onto the cloud.
+
+{{}}
+To understand how to build a multi-cloud setup with YugabyteDB, see [Build multi-cloud applications](./multi-cloud/).
+{{}}
+
+## Application development
+
+Although building scalable applications on top of YugabyteDB is straightforward, you need to understand certain fundamental concepts like transactions, search, and more to make the best use of them.
+
+{{}}
+To learn how to build applications on top of YugabyteDB, see [Learn app development](./learn/).
+{{}}
+
+## Best practices
+
+Use these best practices to build distributed applications on top of YugabyteDB; this includes a list of techniques that you can adopt to make your application perform its best.
+
+{{}}
+For more details, see [Best practices](./best-practices-ysql).
+{{}}
+
+## Drivers and ORMs
+
+To communicate with YugabyteDB, applications need to use drivers. Applications can also be built using Object-Relational mappings, a technique used to communicate with the database using object-oriented techniques. We've tested various drivers and ORMs in multiple languages with the optimal configurations to get your applications up and running.
+
+{{}}
+For the list of drivers and ORMs with sample code, see [Drivers and ORMs](../drivers-orms/).
+{{}}
+
+## Quality of service
+
+Although YugabyteDB can scale horizontally when needed, it also includes safety measures and settings such as rate-limiting, admission control, transaction priorities, and more, to ensure applications can maintain a high quality of service for all users when the systems comes under heavy load.
+
+{{}}
+To learn more about how to use rate-limiting and other features, see [Quality of service](./quality-of-service/).
+{{}}
+
+## Cloud-native development
+
+Cloud-native development refers to building and running applications that fully exploit the advantages of cloud computing without needing to install any software on your development machine. Two prominent tools for cloud-native development environments are Gitpod and GitHub Codespaces. Both provide cloud-based development environments, but they have their own features and use cases.
+
+{{}}
+To learn more about how to use browser-based IDEs, see [Cloud-native development](./gitdev/).
+{{}}
+
+## Tutorials
+
+Yugabyte provides multiple step-by-step guides for building scalable and fault-tolerant applications with YugabyteDB using your favorite programming language, services, and frameworks, including Kafka, Gen-AI, and more.
+
+{{}}
+For step-by-step guides for various frameworks, see [Tutorials](/preview/tutorials/).
+{{}}
diff --git a/docs/content/stable/develop/common-patterns/timeseries/_index.md b/docs/content/stable/develop/common-patterns/timeseries/_index.md
deleted file mode 100644
index d9f6f09a6538..000000000000
--- a/docs/content/stable/develop/common-patterns/timeseries/_index.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: Time series data model
-headerTitle: Time series data model
-linkTitle: Time series
-description: Explore the Time series data model
-headcontent: Handle large amounts of data ordered by time
-menu:
- stable:
- identifier: common-patterns-timeseries
- parent: common-patterns
- weight: 100
-type: indexpage
----
-
-Time series data are measurements or events that are tracked and monitored over time. This could be server metrics, application performance monitoring, network data, sensor data, events, clicks, trades in a market, and many other types of analytics data. A time series data model is designed specifically for handling large amounts of data that are ordered by time.
-
-Although YugabyteDB is hash sharded by default, it also supports range sharding, where the data is ordered and split at specific boundaries.
-
-A time series pattern works best for range queries where you need to look up items in a given time range.
-
-You can use the following common patterns to store and retrieve time series data in YugabyteDB in both distributed and ordered manner:
-
-- **Global ordering by time**
-
- In this pattern, all your data is ordered by time across different tablets.
-
- To understand how to efficiently store and retrieve data in this pattern, see [Global ordering by time](./global-ordering).
-
-- **Ordering by time per entity**
-
- In this pattern, the data is ordered by time in a specific entity.
-
- To understand how to distribute the entities effectively and avoid hot shards, see [Ordering by time per entity](./ordering-by-entity).
-
-- **Automatic data expiration**
-
- In some scenarios, you don't want data lying around for a long time as they may not be needed or you have rules in your organization that you cannot store specific data longer than a particular duration. For such cases, you can set a time-to-live value on rows, columns, and the table itself.
-
- For more details, see [Automatic data expiration](./data-expiry).
-
-- **Partitioning**
-
- When you have a lot of data that needs to deleted regularly, you can opt to partition your data. This also has speed advantages in some cases.
-
- For more details, see [Partitioning by time](./partitioning-by-time).
diff --git a/docs/content/stable/develop/data-modeling/_index.md b/docs/content/stable/develop/data-modeling/_index.md
new file mode 100644
index 000000000000..d480c150fb68
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/_index.md
@@ -0,0 +1,62 @@
+---
+title: Distributed Data modeling
+linkTitle: Data modeling
+description: Learn to develop YugabyteDB applications
+image: fa-sharp fa-light fa-objects-column
+menu:
+ stable:
+ identifier: data-modeling
+ parent: develop
+ weight: 100
+type: indexpage
+---
+
+Data modeling is the process of defining the structure, organization, and relationships of data in a database. In a distributed SQL database, this process becomes even more crucial due to the complexities introduced by data distribution, replication, and consistency. To fully leverage the benefits offered by YugabyteDB, you need to approach data modeling with a distributed mindset. Data modeling for distributed SQL databases requires a careful balance of theoretical principles and practical considerations.
+
+## Organization
+
+In YugabyteDB, data is stored as rows and columns in tables; tables are organized under schemas and databases.
+
+{{}}
+To understand how to create and manage tables, schemas, and databases, see [Schemas and tables](../../explore/ysql-language-features/databases-schemas-tables).
+{{}}
+
+## Sharding
+
+In YugabyteDB, table data is split into tablets, and distributed across multiple nodes in the cluster. Applications can connect to any node for storing and retrieving data. Because reads and writes can span multiple nodes, it's crucial to consider how table data is sharded and distributed when modeling your data. To design your tables and indexes for fast retrieval and storage in YugabyteDB, you first need to understand the [data distribution](../../explore/going-beyond-sql/data-sharding) schemes: Hash and Range sharding.
+
+{{}}
+To learn more about data distribution schemes, see [Configurable data sharding](../../explore/going-beyond-sql/data-sharding).
+{{}}
+
+## Primary keys
+
+The primary key is the unique identifier for each row in the table. The distribution and ordering of table data depends on the primary key.
+
+{{}}
+To design optimal primary keys for your tables, see [Primary keys](./primary-keys-ysql).
+{{}}
+
+## Secondary indexes
+
+Indexes provide alternate access patterns for queries not involving the primary key of the table. With the help of an index, you can improve the access operations of your queries.
+
+{{}}
+To design optimal indexes for faster lookup, see [Secondary indexes](./secondary-indexes-ysql).
+{{}}
+
+## Hot shards
+
+In distributed systems, a hot-spot or hot-shard refers to a node that is overloaded with queries due to disproportionate traffic compared to other nodes in the cluster.
+
+{{}}
+To understand the hot-shard problem and solutions to overcome the issue, see [Hot shards](./hot-shards-ysql).
+{{}}
+
+## Table partitioning
+
+When the data in tables keep growing, you can partition the tables for better performance and enhanced data management. Partitioning also makes it easier to drop older data by dropping partitions. In YugabyteDB, you can also use partitioning with Tablespaces to improve latency in multi-region scenarios and adhere to data residency laws like GDPR.
+
+{{}}
+To understand partitioning in YugabyteDB, see [Table partitioning](./partitioning).
+{{}}
diff --git a/docs/content/stable/develop/common-patterns/_index.md b/docs/content/stable/develop/data-modeling/common-patterns/_index.md
similarity index 92%
rename from docs/content/stable/develop/common-patterns/_index.md
rename to docs/content/stable/develop/data-modeling/common-patterns/_index.md
index fdc69cb30b26..1cb61f861a79 100644
--- a/docs/content/stable/develop/common-patterns/_index.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/_index.md
@@ -8,8 +8,8 @@ image: /images/section_icons/architecture/distributed_acid.png
menu:
stable:
identifier: common-patterns
- parent: develop
- weight: 400
+ parent: data-modeling
+ weight: 600
type: indexpage
showRightNav: true
---
@@ -35,7 +35,9 @@ A good example would be the speed sensor in a car that tracks the speed of a car
An insurance company could use the data to investigate accidents or an automobile company could track various sensors and improve the performance of the car. This could amount to billions of data points.
+{{}}
For more information on storing and retrieving such vast amounts of ordered data in YugabyteDB, see [Time series data model](./timeseries).
+{{}}
## Key-value
@@ -50,13 +52,17 @@ user2.name = "Harry Potter"
user2.country = "UK"
```
-Key-value stores are expected to be some of the fastest storage data models. For more information on using YugabyteDB for key-value stores, see [Key-value data model](./keyvalue).
+Key-value stores are expected to be some of the fastest storage data models.
+
+{{}}
+For more information on using YugabyteDB for key-value stores, see [Key-value data model](./keyvalue).
+{{}}
## Wide-column
In a wide-column data model, the data is organized as rows and columns. Each row is identified by a row `id` or `name` and each column is identified by a column `id` or `name`. Each row can have any number of columns attached to it. You can visualize it as a table-like structure where some of the cells are empty. For example:
-```sql{.nocopy}
+```caddyfile{.nocopy}
| | col-1 | col-2 | col-3 |
| ----- | ----- | ----- | ----- |
| row-1 | a | | c |
@@ -66,7 +72,7 @@ In a wide-column data model, the data is organized as rows and columns. Each row
To retrieve specific cells, you can issue commands similar to the following:
-```sql{.nocopy}
+```python{.nocopy}
get(row-1, col-3) ==> c
get(row-3, col-2) ==> NULL
```
diff --git a/docs/content/stable/develop/common-patterns/keyvalue.md b/docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md
similarity index 94%
rename from docs/content/stable/develop/common-patterns/keyvalue.md
rename to docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md
index 0139def2b3c1..a99aff285714 100644
--- a/docs/content/stable/develop/common-patterns/keyvalue.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md
@@ -22,8 +22,8 @@ With these three simple functionalities, key-value stores have carved themselves
YugabyteDB provides several advantages when used as a key-value store:
-- YugabyteDB internally [stores data](../../../architecture/docdb/) as a collection of key-value pairs and therefore automatically excels as a key-value store.
-- Being [distributed by design](../../../architecture/transactions/distributed-txns/), YugabyteDB also naturally acts as a distributed key-value store.
+- YugabyteDB internally [stores data](../../../../architecture/docdb/data-model/) as a collection of key-value pairs and therefore automatically excels as a key-value store.
+- Being [distributed by design](../../../../architecture/transactions/distributed-txns/), YugabyteDB also naturally acts as a distributed key-value store.
- YugabyteDB inherently provides consistency of data because of Raft replication, which is typically not guaranteed by other key-value stores.
## Use cases
@@ -36,7 +36,6 @@ YugabyteDB provides several advantages when used as a key-value store:
1. **Shopping cart** : A user's shopping cart can be represented as a JSON or Hstore and stored under a key (for example, `user1.cart`). Given the strong consistency and resilience offered by YugabyteDB, the cart information will not be lost even in case of disasters.
-
## Simple scenario
Consider a scenario where you want to store multiple details related to users like `id`, `name`, `country`. For this, you could adopt a simple key-value schema where each attribute is a separate key, such as the following where the key contains both the `id` and the attribute name while the value is the value of the attribute, like:
@@ -234,4 +233,4 @@ you will notice that the `name` attribute has been removed for user `id=1`.
## Learn more
- [Hstore](https://www.postgresql.org/docs/11/hstore.html)
-- [Json](../../../explore/ysql-language-features/jsonb-ysql/)
\ No newline at end of file
+- [Json](../../../../explore/ysql-language-features/jsonb-ysql/)
\ No newline at end of file
diff --git a/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md
new file mode 100644
index 000000000000..a70478bc3c06
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md
@@ -0,0 +1,53 @@
+---
+title: Time series data model
+headerTitle: Time series data model
+linkTitle: Time series
+description: Explore the Time series data model
+headcontent: Handle large amounts of data ordered by time
+menu:
+ stable:
+ identifier: common-patterns-timeseries
+ parent: common-patterns
+ weight: 100
+type: indexpage
+---
+
+Time series data are measurements or events that are tracked and monitored over time. This could be server metrics, application performance monitoring, network data, sensor data, events, clicks, trades in a market, and many other types of analytics data. A time series data model is designed specifically for handling large amounts of data that are ordered by time.
+
+Although YugabyteDB is hash sharded by default, it also supports range sharding, where the data is ordered and split at specific boundaries.
+
+A time series pattern works best for range queries where you need to look up items in a given time range.
+
+You can use the following common patterns to store and retrieve time series data in YugabyteDB in both distributed and ordered manner:
+
+## Global ordering by time
+
+In this pattern, all your data is ordered by time across different tablets.
+
+{{}}
+To understand how to efficiently store and retrieve data in this pattern, see [Global ordering by time](./global-ordering).
+{{}}
+
+## Ordering by time per entity
+
+In this pattern, the data is ordered by time in a specific entity.
+
+{{}}
+To understand how to distribute the entities effectively and avoid hot shards, see [Ordering by time per entity](./ordering-by-entity).
+{{}}
+
+## Automatic data expiration
+
+In some scenarios, you don't want data lying around for a long time as they may not be needed or you have rules in your organization that you cannot store specific data longer than a particular duration. For such cases, you can set a time-to-live value on rows, columns, and the table itself.
+
+{{}}
+For more details, see [Automatic data expiration](./data-expiry).
+{{}}
+
+## Partitioning
+
+When you have a lot of data that needs to be deleted regularly, you can opt to partition your data. This also has speed advantages in some cases.
+
+{{}}
+For more details, see [Partitioning by time](./partitioning-by-time).
+{{}}
diff --git a/docs/content/stable/develop/common-patterns/timeseries/data-expiry.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md
similarity index 93%
rename from docs/content/stable/develop/common-patterns/timeseries/data-expiry.md
rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md
index f9de3570ac1c..adc6990f1506 100644
--- a/docs/content/stable/develop/common-patterns/timeseries/data-expiry.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md
@@ -4,6 +4,7 @@ headerTitle: Automatic data expiration
linkTitle: Automatic data expiration
description: Expire data using the `USING TTL` operator
headcontent: Expire data using time-to-live
+badges: ycql
menu:
stable:
identifier: timeseries-automatic-expiration
@@ -15,7 +16,7 @@ type: docs
Consider a scenario where you only need the last few values and the older data is not of any value and can be purged. Typically, this requires setting up a separate background job. Using YugabyteDB however, you can set an expiration value for columns using the `USING TTL` operator.
{{}}
-TTL-based expiration is only available in [YCQL](../../../../api/ycql/).
+TTL-based expiration is only available in [YCQL](/{{}}/api/ycql/).
{{}}
## Setup
@@ -99,8 +100,8 @@ Note that the row is present but the value for the `speed` column is `null`.
Instead of explicitly setting the TTL at the row or column level, you can set a TTL on the table. This also has the benefit of saving space as the TTL value is stored in only one place and not per row or column.
-Define table-level TTL using the [default_time_to_live property](../../../../api/ycql/ddl_create_table/#table-properties-1).
+Define table-level TTL using the [default_time_to_live property](/{{}}/api/ycql/ddl_create_table/#table-properties-1).
## Learn more
-- [TTL for data expiration](../../../learn/ttl-data-expiration-ycql/)
\ No newline at end of file
+- [TTL for data expiration](/{{}}/develop/learn/ttl-data-expiration-ycql/)
\ No newline at end of file
diff --git a/docs/content/stable/develop/common-patterns/timeseries/global-ordering.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md
similarity index 99%
rename from docs/content/stable/develop/common-patterns/timeseries/global-ordering.md
rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md
index 8a6e897d4203..92dfa290ff78 100644
--- a/docs/content/stable/develop/common-patterns/timeseries/global-ordering.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md
@@ -4,6 +4,7 @@ headerTitle: Global ordering by time
linkTitle: Global ordering by time
description: Distribute your time-ordered data and retrieve data efficiently
headcontent: Distribute time-ordered data and retrieve data efficiently
+badges: ysql
menu:
stable:
identifier: timeseries-global-ordering
diff --git a/docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md
similarity index 99%
rename from docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md
rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md
index 7dafc460e358..784617b00660 100644
--- a/docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md
@@ -4,6 +4,7 @@ headerTitle: Order by time per entity
linkTitle: Ordering by time per entity
description: Keep entity data together using Entity-wise or Bucket-based ordering
headcontent: Keep entity data together in a time series data model
+badges: ysql
menu:
stable:
identifier: timeseries-entity-ordering
diff --git a/docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md
similarity index 95%
rename from docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md
rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md
index 7b0d2b36070d..7a1b479b290a 100644
--- a/docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md
+++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md
@@ -4,6 +4,9 @@ headerTitle: Partition data by time
linkTitle: Partition data by time
description: Partition data for efficient data management
headcontent: Partition data for efficient data management
+badges: ysql
+aliases:
+ - /preview/common-patterns/timeseries/partitioning-by-time/
menu:
stable:
identifier: timeseries-partition-by-time
@@ -17,7 +20,7 @@ Partitioning refers to splitting what is logically one large table into smaller
The following example describes the advantages of partitions in more detail.
{{}}
-Partitioning is only available in [YSQL](../../../../api/ysql/).
+Partitioning is only available in [YSQL](/{{}}/api/ysql/).
{{}}
## Setup
@@ -140,4 +143,4 @@ Time: 103.214 ms
## Learn more
-- [Table partitioning](../../../../explore/ysql-language-features/advanced-features/partitions/)
+- [Table partitioning](/{{}}/explore/ysql-language-features/advanced-features/partitions/)
diff --git a/docs/content/stable/develop/data-modeling/hot-shards-ysql.md b/docs/content/stable/develop/data-modeling/hot-shards-ysql.md
new file mode 100644
index 000000000000..c0bc7c59b920
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/hot-shards-ysql.md
@@ -0,0 +1,129 @@
+---
+title: Avoiding hotspots in YugabyteDB
+headertitle: Avoiding hotspots
+linkTitle: Hot shards
+badges: ysql
+menu:
+ stable:
+ identifier: data-modeling-hot-shard
+ parent: data-modeling
+ weight: 300
+type: docs
+---
+
+A hot shard is a common problem in data retrieval where a specific node becomes a performance bottleneck due to disproportionately high traffic or workload compared to other nodes in the system. This imbalance can lead to various issues, such as degraded system performance, increased latency, and potential system failures.
+
+This typically happens because of mismatches between query pattern and data distribution. You should be careful when choosing a primary key in the schema design to not accidentally create hotspots in your database.
+
+{{}}
+The hot shard issue can occur both for tables and indexes.
+{{}}
+
+Let us understand the problem and the solution to this via some examples.
+
+
+{{}}
+
+{{}}
+
+
+For illustration, create a census table as follows.
+
+```sql
+CREATE TABLE census(
+ id int,
+ name varchar(255),
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(id ASC)
+)
+```
+
+Add some data to the table as follows.
+
+```sql
+INSERT INTO public.census ( id,name,age,zipcode,employed ) VALUES
+ (1,'Zachary',55,94085,True), (2,'James',56,94085,False), (3,'Kimberly',50,94084,False),
+ (4,'Edward',56,94085,True), (5,'Barry',56,94084,False), (6,'Tyler',45,94084,False),
+ (7,'Nancy',47,94085,False), (8,'Sarah',52,94084,True), (9,'Nancy',59,94084,False),
+ (10,'Diane',51,94083,False), (11,'Ashley',42,94083,False), (12,'Jacqueline',58,94085,False),
+ (13,'Benjamin',49,94084,False), (14,'James',48,94083,False), (15,'Ann',43,94083,False),
+ (16,'Aimee',47,94085,True), (17,'Michael',49,94085,False), (18,'Rebecca',40,94085,False),
+ (19,'Kevin',45,94085,True), (20,'James',45,94084,False), (21,'Sandra',60,94085,False),
+ (22,'Kathleen',40,94085,True), (23,'William',42,94084,False), (24,'James',42,94083,False),
+ (25,'Tyler',50,94085,False), (26,'James',49,94085,True), (27,'Kathleen',55,94083,True),
+ (28,'Zachary',55,94083,True), (29,'Rebecca',41,94085,True), (30,'Jacqueline',49,94085,False),
+ (31,'Diane',48,94083,False), (32,'Sarah',53,94085,True), (33,'Rebecca',55,94083,True),
+ (34,'William',47,94085,False), (35,'William',60,94085,True), (36,'Sarah',53,94085,False),
+ (37,'Ashley',47,94084,True), (38,'Ashley',54,94084,False), (39,'Benjamin',42,94083,False),
+ (40,'Tyler',47,94085,True), (41,'Michael',42,94084,False), (42,'Diane',50,94084,False),
+ (43,'Nancy',51,94085,False), (44,'Rebecca',56,94085,False), (45,'Tyler',41,94085,True);
+```
+
+
+
+## Ordering of columns
+
+Consider a scenario where you want to look up people with a specific name, say `Michael` in `94085`. For this, a good index would be the following:
+
+```sql
+create index idx_zip3 on census(zipcode ASC, name ASC) include(id);
+```
+
+The query would be as follows:
+
+```sql
+select id from census where zipcode=94085 AND name='Michael';
+```
+
+This results in an output similar to the following:
+
+```yaml{.nocopy}
+ id
+----
+ 17
+(1 row)
+```
+
+Now consider a scenario where zip code 94085 is very popular and the target of many queries (say there was an election or a disaster in that area). As the index is distributed based on `zipcode`, everyone in zip code 94085 will end up located in the same tablet; as a result, all the queries will end up reading from that one tablet. In other words, this tablet has become hot. To avoid this, you can distribute the index on name instead of zip code, as follows:
+
+```sql
+drop index if exists idx_zip3;
+create index idx_zip3 on census(name ASC, zipcode ASC) include(id);
+```
+
+This swaps the order of columns in the index. The result is the index being distributed/ordered on name first, and then ordered on zip code. Now when many queries have the same zip code, the queries are handled by different tablets. This is because the names being looked up will be different and will be located on different tablets.
+
+{{}}
+Consider swapping the order of columns to avoid hot shards.
+{{}}
+
+## Distribution on more columns
+
+Suppose you choose to distribute your index based on hash sharding so that all citizens in the same zip code are located in the same tablet. Your index might look like the following:
+
+```sql{.nocopy}
+create index idx_zip4 on census(zipcode HASH, name ASC) include(id);
+```
+
+Now when you look up a specific person in a certain zip code (say, `zipcode=94085 AND name='Michael'`), the lookup is made on just one node. But this node could become hot if there are too many lookups for that zip code.
+
+To fix this, add `name` into the sharding part of the index as follows:
+
+```sql
+create index idx_zip4 on census((zipcode,name) HASH) include(id);
+```
+
+Now the index data for the same zip code would be distributed across multiple tablets, as the `name` columns is also part of the sharding scheme.
+
+{{}}
+In the case of hash sharding, consider adding more columns to the sharding part to avoid hot shards.
+{{}}
diff --git a/docs/content/stable/develop/data-modeling/partitioning.md b/docs/content/stable/develop/data-modeling/partitioning.md
new file mode 100644
index 000000000000..0258edb2606f
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/partitioning.md
@@ -0,0 +1,45 @@
+---
+title: Partitioning tables in YugabyteDB
+headertitle: Partitioning tables
+linkTitle: Partitioning tables
+badges: ysql
+menu:
+ stable:
+ identifier: data-modeling-partitions
+ parent: data-modeling
+ weight: 500
+type: docs
+---
+
+
+[Data partitioning](../../../explore/ysql-language-features/advanced-features/partitions) refers to the process of dividing a large table or dataset into smaller physical partitions based on certain criteria or rules. This technique offers several benefits, including improved performance, easier data management, and better use of storage resources. Each partition is internally a table. This scheme is useful for managing large volumes of data and particularly for dropping older data.
+
+### Manage large datasets
+
+You can manage large data volumes by partitioning based on time (say by day, week, month, and so on) to make it easier to drop old data, especially when you want to retain only the recent data.
+
+![Table partitioning](/images/develop/data-modeling/table-partitioning.png)
+
+{{}}
+To understand how large data can be partitioned for easier management, see [Partitioning data by time](../common-patterns/timeseries/partitioning-by-time).
+{{}}
+
+### Place data closer to users
+
+When you want to improve latency for local users when your users are spread across a large geography, partition your data according to where big clusters of users are located, and place their data in regions closer to them using [tablespaces](../../../explore/going-beyond-sql/tablespaces). Users will end up talking to partitions closer to them.
+
+![East and west applications](/images/develop/global-apps/latency-optimized-geo-partition-final.png)
+
+{{}}
+To understand how to partition and place data closer to users for improved latency, see [Latency-optimized geo-partitioning](../../build-global-apps/latency-optimized-geo-partition).
+{{}}
+
+### Adhere to compliance laws
+
+You can partition your data according to the user's citizenship and place their data in the boundaries of their respective nations to be compliant with data residency laws like [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
+
+![User data stored within their country's boundaries](/images/develop/global-apps/locality-optimized-geo-partition-goal.png)
+
+{{}}
+To understand how to partition data to be compliant with data residency laws, see [Locality-optimized geo-partitioning](../../build-global-apps/locality-optimized-geo-partition).
+{{}}
diff --git a/docs/content/stable/develop/data-modeling/primary-keys-ycql.md b/docs/content/stable/develop/data-modeling/primary-keys-ycql.md
new file mode 100644
index 000000000000..9ec19ba3bb51
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/primary-keys-ycql.md
@@ -0,0 +1,226 @@
+---
+title: Designing optimal primary keys in YugabyteDB
+headerTitle: Designing optimal primary keys
+linkTitle: Primary keys
+badges: ycql
+menu:
+ stable:
+ identifier: data-modeling-pk-ycql
+ parent: data-modeling
+ weight: 100
+type: docs
+---
+
+{{}}
+
+The Primary key is a column or a set of columns that uniquely identifies a row, such as a user ID or order number. You should choose the primary key based on the most common access pattern. Columns of data type [string](../../../explore/ycql-language/data-types/#strings), [number](../../../explore/ycql-language/data-types/#numeric-types), [serial](../../../explore/ysql-language-features/data-types/#serial-pseudotype), or [UUID](../../../explore/ycql-language/data-types/#universally-unique-id-types) make good choices for primary keys.
+
+## Automatically generate the primary key
+
+The best way to uniquely identify a record is to allow the database to assign a unique identifier to the row. YugabyteDB supports multiple schemes for generating identifiers that you can choose based on the needs of your application.
+
+### UUID
+
+A UUID is a 128-bit number represented as a string of 36 characters, including hyphens. For example, `4b6aa2ff-53e6-44f5-8bd0-ef9de90a8095`. YugabyteDB natively supports [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) generation as per [RFC 4122](https://datatracker.ietf.org/doc/html/rfc4122) via the uuid-ossp extension. UUIDs have several advantages:
+
+- The likelihood of generating duplicate UUIDs is extremely low.
+- UUIDs can be independently generated on different nodes in the cluster without any coordination with other systems.
+- The randomness of UUIDs makes it hard to predict the next ID, providing an additional layer of security.
+
+You can add a UUID to your schema as follows:
+
+```sql
+create keyspace if not exists yugabyte;
+use yugabyte;
+
+drop table if exists users;
+create table users (
+ id uuid,
+ name text,
+ primary key (id)
+);
+```
+
+Insert some rows into the table:
+
+```sql
+insert into users (id, name) values (uuid(), 'John Wick');
+insert into users (id, name) values (uuid(), 'Iron Man');
+insert into users (id, name) values (uuid(), 'Harry Potter');
+insert into users (id, name) values (uuid(), 'Kack Sparrow');
+```
+
+Select all the rows from the table:
+
+```cql
+select * from users;
+```
+
+Notice how the generated IDs are totally random.
+
+```cql{.nocopy}
+ id | name
+--------------------------------------+--------------
+ 85a17586-317f-4ef1-b5dd-582a13ccc832 | Harry Potter
+ b431bb80-b20d-42fe-900d-fed295de507a | Kack Sparrow
+ 7abae478-532c-40aa-9f81-42e85750fe01 | John Wick
+ 2a151214-272d-4448-af3e-a343f434fa68 | Iron Man
+```
+
+### TimeUUID
+
+[TimeUUID](../../../api/ycql/type_uuid/) is a special type of UUID that has a time factor integrated into it so the generated UUIDs have an order associated with them. They can be generated using the `now()` function. To do this, first create a table as follows:
+
+```cql
+create keyspace if not exists yugabyte;
+use yugabyte;
+
+drop table if exists users;
+create table users (
+ id timeuuid,
+ name text,
+ primary key(id)
+);
+```
+
+Insert some rows into the table:
+
+```sql
+insert into users (id, name) values (now(), 'John Wick');
+insert into users (id, name) values (now(), 'Iron Man');
+insert into users (id, name) values (now(), 'Harry Potter');
+insert into users (id, name) values (now(), 'Kack Sparrow');
+```
+
+Select all the rows from the table:
+
+```cql
+select * from users;
+```
+
+The generated ids are not very random:
+
+```cql{.nocopy}
+ id | name
+--------------------------------------+--------------
+ 19f24446-2904-11ef-917b-6bf61abbc06e | Iron Man
+ 13dc1460-2904-11ef-917b-6bf61abbc06e | John Wick
+ 1a3f8ac6-2904-11ef-917b-6bf61abbc06e | Kack Sparrow
+ 19f29720-2904-11ef-917b-6bf61abbc06e | Harry Potter
+```
+
+## Existing columns as primary keys
+
+To illustrate how to choose existing columns as primary keys, first create a sample census schema.
+
+
+{{}}
+
+{{}}
+{{}}
+
+{{}}
+{{}}
+
+{{}} {{}} {{}}
+{{}}
+
+
+Create a census table as follows:
+
+```sql
+CREATE TABLE census(
+ id int,
+ name varchar,
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(id)
+);
+```
+
+Add some data to the table as follows.
+
+```cql
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (1,'Zachary',55,94085,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (2,'James',56,94085,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (3,'Kimberly',50,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (4,'Edward',56,94085,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (5,'Barry',56,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (6,'Tyler',45,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (7,'James',47,94085,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (8,'Sarah',52,94084,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (9,'James',59,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (10,'Diane',51,94083,False);
+```
+
+
+
+### ID as the primary key
+
+In the `census` table, the most likely way to look up a person is by their `id`, so the primary key has been set to `id`. This means that the data is distributed based on ID. This works well for point lookups on ID. For example, to look up ID 9, you can do the following:
+
+```sql
+select * from census where id=9;
+```
+
+You will see output similar to the following:
+
+```yaml{.nocopy}
+ id | name | age | zipcode | employed
+----+-------+-----+---------+----------
+ 9 | Nancy | 59 | 94084 | False
+```
+
+One row matching ID 9 was quickly fetched with just one request.
+
+### Name as the primary key
+
+Suppose your most common lookup is based on the name. In this case you would make the name column part of the primary key. Because the name alone may not be unique enough to be the primary key (the primary key has to be unique), you can choose a primary key with both name and ID as follows:
+
+```sql
+CREATE TABLE census2(
+ id int,
+ name varchar,
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(name, id)
+) WITH CLUSTERING ORDER BY id ASC;
+```
+
+```cql
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (1,'Zachary',55,94085,True);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (2,'James',56,94085,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (3,'Kimberly',50,94084,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (4,'Edward',56,94085,True);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (5,'Barry',56,94084,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (6,'Tyler',45,94084,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (7,'James',47,94085,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (8,'Sarah',52,94084,True);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (9,'James',59,94084,False);
+INSERT INTO census2 (id,name,age,zipcode,employed) VALUES (10,'Diane',51,94083,False);
+```
+
+When specifying the primary key, the `name` column is specified first, and `id` second. This ensures that the data is stored sorted based on `name` first, and for all matching names, the `id` is stored sorted in ascending order, ensuring all people with the same name will be stored in the same tablet. This allows you to do a fast lookup on `name` even though `(name, id)` is the primary key.
+
+Retrieve all the people with the name James as follows:
+
+```sql
+select * from census2 where name = 'James';
+```
+
+You will see output similar to the following:
+
+```tablegen{.nocopy}
+ name | id | age | zipcode | employed
+-------+----+-----+---------+----------
+ James | 2 | 56 | 94085 | False
+ James | 7 | 47 | 94085 | False
+ James | 9 | 59 | 94084 | False
+(5 rows)
+```
+
+There are 3 people named James, and all of them can be quickly looked up as the data has been distributed by name.
+
+Notice that the rows are ordered by id. This is because you specified `CLUSTERING ORDER BY id ASC` to ensure that the rows with the same name will be stored ordered in the order of the `id` column.
diff --git a/docs/content/stable/develop/data-modeling/primary-keys-ysql.md b/docs/content/stable/develop/data-modeling/primary-keys-ysql.md
new file mode 100644
index 000000000000..06a41c5a1a7d
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/primary-keys-ysql.md
@@ -0,0 +1,222 @@
+---
+title: Designing optimal primary keys in YugabyteDB
+headerTitle: Designing optimal primary keys
+linkTitle: Primary keys
+badges: ysql
+menu:
+ stable:
+ identifier: data-modeling-pk
+ parent: data-modeling
+ weight: 100
+type: docs
+---
+
+{{}}
+
+The Primary key is a column or a set of columns that uniquely identifies a row, such as a user ID or order number. You should choose the primary key based on the most common access pattern. Columns of data type [string](../../../explore/ycql-language/data-types/#strings), [number](../../../explore/ycql-language/data-types/#numeric-types), [serial](../../../explore/ysql-language-features/data-types/#serial-pseudotype), or [UUID](../../../explore/ycql-language/data-types/#universally-unique-id-types) make good choices for primary keys.
+
+## Automatically generating the primary key
+
+The best way to uniquely identify a record is to allow the database to assign a unique identifier to the row. YugabyteDB supports multiple schemes for generating identifiers that you can choose based on the needs of your application.
+
+### UUID
+
+A UUID is a 128-bit number represented as a string of 36 characters, including hyphens. For example, `4b6aa2ff-53e6-44f5-8bd0-ef9de90a8095`. YugabyteDB natively supports [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) generation as per [RFC 4122](https://datatracker.ietf.org/doc/html/rfc4122) via the uuid-ossp extension. UUIDs have several advantages:
+
+- The likelihood of generating duplicate UUIDs is extremely low.
+- UUIDs can be independently generated on different nodes in the cluster without any coordination with other systems.
+- The randomness of UUIDs makes it hard to predict the next ID, providing an additional layer of security.
+
+You can add a UUID to your schema as follows:
+
+```sql
+CREATE TABLE users (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ name TEXT
+);
+```
+
+The [DEFAULT](../../../api/ysql/the-sql-language/statements/ddl_create_table/#default) clause ensures that for every row inserted, a UUID is automatically generated and inserted along with the row.
+
+### Serial
+
+[Serial](../../../api/ysql/datatypes/type_serial/) is a special data type in YugabyteDB that creates an auto-incrementing integer column starting with `1`. It is essentially a shorthand for creating a sequence and using it as a default value for a column. You can choose between three types of serial data types depending on the needs of your application:
+
+- **SMALLSERIAL** - An integer column in the range of 1 to 32,767.
+- **SERIAL** - An integer column in the range of 1 to 2,147,483,647.
+- **BIGSERIAL** - An integer column in the range of 1 to 9,223,372,036,854,775,807.
+
+Serial can be used directly in table definitions to simplify the creation of auto-incrementing columns.
+
+```sql
+DROP TABLE IF EXISTS users;
+CREATE TABLE users (
+ id serial,
+ name TEXT,
+ PRIMARY KEY(id)
+);
+```
+
+For each row inserted into the table, an auto-incremented `id` value is automatically inserted along with the row.
+
+### Sequence
+
+A [sequence](../../../api/ysql/the-sql-language/statements/ddl_create_sequence/) is a database object that generates a sequence of unique numbers. Sequences are independent objects that can be associated with one or more tables or columns. Sequences offer more flexibility and control over auto-incrementing behavior. They can be created, managed, and used separately from table definitions. Sequences can be customized with different increment values, start values, minimum and maximum values, and cycle behavior.
+
+```sql
+CREATE SEQUENCE user_id_seq START 100 INCREMENT BY 100 CACHE 10000;
+
+DROP TABLE IF EXISTS users;
+CREATE TABLE users (
+ id INTEGER DEFAULT nextval('user_id_seq'),
+ name TEXT,
+ PRIMARY KEY(id)
+);
+```
+
+For every row inserted, user IDs are automatically generated as 100, 200,300, and so on.
+
+{{}}
+Use serial for basic use cases and opt for sequences when you need more control over the sequence behavior, need to share a sequence between multiple tables or columns, or require custom incrementing logic.
+{{}}
+
+## Existing columns as primary keys
+
+To illustrate how to choose existing columns as primary keys, first create a sample census schema.
+
+
+{{}}
+
+{{}}
+{{}}
+
+{{}}
+{{}}
+
+{{}} {{}} {{}}
+{{}}
+
+
+Create a census table as follows:
+
+```sql
+CREATE TABLE census(
+ id int,
+ name varchar(255),
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(id ASC)
+)
+```
+
+Add some data to the table as follows.
+
+```sql
+INSERT INTO public.census ( id,name,age,zipcode,employed ) VALUES
+ (1,'Zachary',55,94085,True), (2,'James',56,94085,False), (3,'Kimberly',50,94084,False),
+ (4,'Edward',56,94085,True), (5,'Barry',56,94084,False), (6,'Tyler',45,94084,False),
+ (7,'Nancy',47,94085,False), (8,'Sarah',52,94084,True), (9,'Nancy',59,94084,False),
+ (10,'Diane',51,94083,False), (11,'Ashley',42,94083,False), (12,'Jacqueline',58,94085,False),
+ (13,'Benjamin',49,94084,False), (14,'James',48,94083,False), (15,'Ann',43,94083,False),
+ (16,'Aimee',47,94085,True), (17,'Michael',49,94085,False), (18,'Rebecca',40,94085,False),
+ (19,'Kevin',45,94085,True), (20,'James',45,94084,False), (21,'Sandra',60,94085,False),
+ (22,'Kathleen',40,94085,True), (23,'William',42,94084,False), (24,'James',42,94083,False),
+ (25,'Tyler',50,94085,False), (26,'James',49,94085,True), (27,'Kathleen',55,94083,True),
+ (28,'Zachary',55,94083,True), (29,'Rebecca',41,94085,True), (30,'Jacqueline',49,94085,False),
+ (31,'Diane',48,94083,False), (32,'Sarah',53,94085,True), (33,'Rebecca',55,94083,True),
+ (34,'William',47,94085,False), (35,'William',60,94085,True), (36,'Sarah',53,94085,False),
+ (37,'Ashley',47,94084,True), (38,'Ashley',54,94084,False), (39,'Benjamin',42,94083,False),
+ (40,'Tyler',47,94085,True), (41,'Michael',42,94084,False), (42,'Diane',50,94084,False),
+ (43,'Nancy',51,94085,False), (44,'Rebecca',56,94085,False), (45,'Tyler',41,94085,True);
+```
+
+
+
+### ID as the primary key
+
+In the `census` table, the most likely way to look up a person is by their `id`, so the primary key has been set to `id ASC`. This means that the data is stored in ascending order of ID, ensuring contiguous IDs are mostly located in the same tablet. This works well for point lookups on ID and range scans on IDs. For example, to look up ID 9, you can do the following:
+
+```sql
+select * from census where id=9;
+```
+
+You will see output similar to the following:
+
+```yaml{.nocopy}
+ id | name | age | zipcode | employed
+----+-------+-----+---------+----------
+ 9 | Nancy | 59 | 94084 | f
+```
+
+One row matching ID 9 was quickly fetched with just one request. You can also do a quick range scan.
+
+```sql
+select * from census where id>=5 and id<=15;
+```
+
+You will see an output similar to the following:
+
+```tablegen{.nocopy}
+ id | name | age | zipcode | employed
+----+------------+-----+---------+----------
+ 5 | Barry | 56 | 94084 | f
+ 6 | Tyler | 45 | 94084 | f
+ 7 | Nancy | 47 | 94085 | f
+ 8 | Sarah | 52 | 94084 | t
+ 9 | Nancy | 59 | 94084 | f
+ 10 | Diane | 51 | 94083 | f
+ 11 | Ashley | 42 | 94083 | f
+ 12 | Jacqueline | 58 | 94085 | f
+ 13 | Benjamin | 49 | 94084 | f
+ 14 | James | 48 | 94083 | f
+ 15 | Ann | 43 | 94083 | f
+(11 rows)
+```
+
+11 rows were quickly retrieved as the data is stored sorted on the `id` column. So range scans are also fast.
+
+### Name as the primary key
+
+Suppose your most common lookup is based on the name. In this case you would make the `name` column part of the primary key. Because the name alone may not be unique enough to be the primary key (the primary key has to be unique), you can choose a primary key with both name and ID as follows:
+
+```sql
+CREATE TABLE census2(
+ id int,
+ name varchar(255),
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(name ASC, id ASC)
+);
+
+-- copy the same data into census2
+INSERT INTO census2 SELECT * FROM census;
+```
+
+When specifying the primary key, the `name` column is specified first, and `id` second. This ensures that the data is stored sorted based on `name` first, and for all matching names, the `id` is stored sorted in ascending order, ensuring all people with the same name will be stored in the same tablet. This allows you to do a fast lookup on `name` even though `(name, id)` is the primary key.
+
+Retrieve all the people with the name James as follows:
+
+```sql
+select * from census2 where name = 'James';
+```
+
+You will see output similar to the following:
+
+```tablegen{.nocopy}
+ id | name | age | zipcode | employed
+----+-------+-----+---------+----------
+ 2 | James | 56 | 94085 | f
+ 14 | James | 48 | 94083 | f
+ 20 | James | 45 | 94084 | f
+ 24 | James | 42 | 94083 | f
+ 26 | James | 49 | 94085 | t
+(5 rows)
+```
+
+There are 5 people named James, and all of them can be quickly looked up as the data has been sorted on name.
+
+{{}}
+The primary key was specified with `ASC` order. However, if the queries are going to retrieve data in descending order with `ORDER BY name DESC`, then it is better to match the same ordering in the primary key definition.
+{{}}
diff --git a/docs/content/stable/develop/data-modeling/secondary-indexes-ycql.md b/docs/content/stable/develop/data-modeling/secondary-indexes-ycql.md
new file mode 100644
index 000000000000..0db6c79a13ed
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/secondary-indexes-ycql.md
@@ -0,0 +1,179 @@
+---
+title: Designing secondary indexes in YugabyteDB
+headertitle: Designing secondary indexes
+linkTitle: Secondary indexes
+badges: ycql
+menu:
+ stable:
+ identifier: data-modeling-indexes-ycql
+ parent: data-modeling
+ weight: 200
+type: docs
+---
+
+{{}}
+
+The primary goal of an index is to enhance the performance of data retrieval operations on the data in the tables. Indexes are designed to quickly locate data without having to search every row in a database table and provide fast access for patterns other than that of the primary key of the table. In YugabyteDB, indexes are internally designed just like tables and operate as such. The main difference between a table and an index is that the primary key of the table has to be unique but it need not be unique for an index.
+
+{{}}
+In YugabyteDB, indexes are global and are implemented just like tables. They are split into tablets and distributed across the different nodes in the cluster. The sharding of indexes is based on the primary key of the index and is independent of how the main table is sharded and distributed. Indexes are not colocated with the base table.
+{{}}
+
+To illustrate secondary indexes, first create a sample census schema.
+
+
+{{}}
+
+{{}}
+{{}}
+
+{{}}
+{{}}
+
+{{}} {{}} {{}}
+{{}}
+
+
+Create a census table as follows:
+
+```sql
+create keyspace if not exists yugabyte;
+use yugabyte;
+
+drop table if exists census;
+CREATE TABLE census(
+ id int,
+ name varchar,
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(id)
+) WITH transactions = { 'enabled' : true };
+```
+
+{{}}
+To attach indexes to tables, the tables should be created with transactions enabled.
+{{}}
+
+Add some data to the table as follows.
+
+```sql
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (1,'Zachary',55,94085,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (2,'James',56,94085,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (3,'Kimberly',50,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (4,'Edward',56,94085,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (5,'Barry',56,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (6,'Tyler',45,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (7,'James',47,94085,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (8,'Sarah',52,94084,True);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (9,'James',59,94084,False);
+INSERT INTO census (id,name,age,zipcode,employed) VALUES (10,'Diane',51,94083,False);
+```
+
+
+
+## Basic index
+
+Suppose you need to look up the data based on the zip codes of the people in the census. You can fetch details with a query similar to the following:
+
+```sql
+select id,name from census where zipcode=94085;
+```
+
+This required a sequential scan of all the rows in the table. This is because the primary key of the table is `id`, and looking up by zip code requires a full scan. To avoid the full scan, create an index on `zipcode` so that the executor can quickly fetch the matching rows by looking at the index.
+
+```sql
+create index idx_zip on census(zipcode);
+```
+
+Now, for a query to get all the people in zip code 94085 as follows:
+
+```sql
+explain select id,name from census where zipcode=94085;
+```
+
+You will see an output like the following:
+
+```yaml{.nocopy}
+ Index Scan using yugabyte.idx_zip on yugabyte.census
+ Key Conditions: (zipcode = 94085)
+```
+
+The same 4 rows were fetched from the table, but much faster. This is because the planner uses the index to execute the query.
+
+## Covering index
+
+In the prior example, to retrieve the rows the index was first looked up, and then more columns (such as `name`) were fetched for the same rows from the table. This additional round trip to the table is needed because the columns are not present in the index. To avoid this, you can store the column along with the index as follows:
+
+```sql
+create index idx_zip2 on census(zipcode) include(name);
+```
+
+Now, for a query to get all people in zip code 94085 as follows:
+
+```sql
+explain select id,name from census where zipcode=94085;
+```
+
+You will see an output like the following:
+
+```yaml{.nocopy}
+ Index Only Scan using yugabyte.idx_zip2 on yugabyte.census
+ Key Conditions: (zipcode = 94085)
+```
+
+This is an index-only scan, which means that all the data required by the query has been fetched from the index. This is also why there was no entry for Table Read Requests.
+
+{{}}
+When an index contains all the columns of the table, it is referred to as a Duplicate index.
+{{}}
+
+## Listing indexes
+
+You can list the indexes associated with a table using the following methods.
+
+### DESC command
+
+The `DESC TABLE
` command lists the indexes associated with a table along with the schema details.
+
+```cql
+DESC TABLE census;
+```
+
+The indexes are listed at the end of the output as follows:
+
+```cql{.nocopy}
+CREATE TABLE yugabyte.census (
+ id int PRIMARY KEY,
+ name text,
+ age int,
+ zipcode int,
+ employed boolean
+) WITH default_time_to_live = 0
+ AND transactions = {'enabled': 'true'};
+CREATE INDEX idx_zip2 ON yugabyte.census (zipcode, id) INCLUDE (name)
+ WITH transactions = {'enabled': 'true'};
+CREATE INDEX idx_zip ON yugabyte.census (zipcode, id)
+ WITH transactions = {'enabled': 'true'};
+```
+
+The `DESC INDEX ` command gives just the description of the specified index.
+
+```cql
+DESC INDEX idx_zip2;
+```
+
+The output includes the description of just the index as follows:
+
+```cql{.nocopy}
+CREATE INDEX idx_zip2 ON yugabyte.census (zipcode, id) INCLUDE (name)
+ WITH transactions = {'enabled': 'true'};
+```
+
+## Conclusion
+
+While primary keys are essential to ensure data uniqueness and facilitate efficient data distribution, secondary indexes provide the flexibility needed to optimize queries based on non-primary key columns. Using secondary indexes, applications can boost performance and provide a robust and scalable solution for managing large-scale, distributed datasets.
+
+## Learn more
+
+- [Explore indexes and constraints](../../../explore/ycql-language/indexes-constraints/)
diff --git a/docs/content/stable/develop/data-modeling/secondary-indexes-ysql.md b/docs/content/stable/develop/data-modeling/secondary-indexes-ysql.md
new file mode 100644
index 000000000000..f92e614cf2bb
--- /dev/null
+++ b/docs/content/stable/develop/data-modeling/secondary-indexes-ysql.md
@@ -0,0 +1,215 @@
+---
+title: Designing secondary indexes in YugabyteDB
+headertitle: Designing secondary indexes
+linkTitle: Secondary indexes
+badges: ysql
+menu:
+ stable:
+ identifier: data-modeling-indexes
+ parent: data-modeling
+ weight: 200
+type: docs
+---
+
+{{}}
+
+The primary goal of an index is to enhance the performance of data retrieval operations on the data in the tables. Indexes are designed to quickly locate data without having to search every row in a database table and provide fast access for patterns other than that of the primary key of the table. In YugabyteDB, indexes are internally designed just like tables and operate as such. The main difference between a table and an index is that the primary key of the table has to be unique but it need not be unique for an index.
+
+{{}}
+In YugabyteDB, indexes are global and are implemented just like tables. They are split into tablets and distributed across the different nodes in the cluster. The sharding of indexes is based on the primary key of the index and is independent of how the main table is sharded and distributed. Indexes are not colocated with the base table.
+{{}}
+
+To illustrate secondary indexes, first create a sample census schema.
+
+
+{{}}
+
+{{}}
+{{}}
+
+{{}}
+{{}}
+
+{{}} {{}} {{}}
+{{}}
+
+
+Create a census table as follows:
+
+```sql
+CREATE TABLE census(
+ id int,
+ name varchar(255),
+ age int,
+ zipcode int,
+ employed boolean,
+ PRIMARY KEY(id ASC)
+)
+```
+
+Add some data to the table as follows.
+
+```sql
+INSERT INTO public.census ( id,name,age,zipcode,employed ) VALUES
+ (1,'Zachary',55,94085,True), (2,'James',56,94085,False), (3,'Kimberly',50,94084,False),
+ (4,'Edward',56,94085,True), (5,'Barry',56,94084,False), (6,'Tyler',45,94084,False),
+ (7,'Nancy',47,94085,False), (8,'Sarah',52,94084,True), (9,'Nancy',59,94084,False),
+ (10,'Diane',51,94083,False), (11,'Ashley',42,94083,False), (12,'Jacqueline',58,94085,False),
+ (13,'Benjamin',49,94084,False), (14,'James',48,94083,False), (15,'Ann',43,94083,False),
+ (16,'Aimee',47,94085,True), (17,'Michael',49,94085,False), (18,'Rebecca',40,94085,False),
+ (19,'Kevin',45,94085,True), (20,'James',45,94084,False), (21,'Sandra',60,94085,False),
+ (22,'Kathleen',40,94085,True), (23,'William',42,94084,False), (24,'James',42,94083,False),
+ (25,'Tyler',50,94085,False), (26,'James',49,94085,True), (27,'Kathleen',55,94083,True),
+ (28,'Zachary',55,94083,True), (29,'Rebecca',41,94085,True), (30,'Jacqueline',49,94085,False),
+ (31,'Diane',48,94083,False), (32,'Sarah',53,94085,True), (33,'Rebecca',55,94083,True),
+ (34,'William',47,94085,False), (35,'William',60,94085,True), (36,'Sarah',53,94085,False),
+ (37,'Ashley',47,94084,True), (38,'Ashley',54,94084,False), (39,'Benjamin',42,94083,False),
+ (40,'Tyler',47,94085,True), (41,'Michael',42,94084,False), (42,'Diane',50,94084,False),
+ (43,'Nancy',51,94085,False), (44,'Rebecca',56,94085,False), (45,'Tyler',41,94085,True);
+```
+
+
+
+## Basic index
+
+Suppose you need to look up the data based on the zip codes of the people in the census. You can fetch details with a query similar to the following:
+
+```sql
+select id from census where zipcode=94085;
+```
+
+This required a sequential scan of all the rows in the table. This is because the primary key of the table is `id`, and looking up by zip code requires a full scan. To avoid the full scan, create an index on `zipcode` so that the executor can quickly fetch the matching rows by looking at the index.
+
+```sql
+create index idx_zip on census(zipcode ASC);
+```
+
+Now, for a query to get all the people in zip code 94085 as follows:
+
+```sql
+explain (analyze, dist, costs off) select id from census where zipcode=94085;
+```
+
+You will see an output like the following:
+
+```yaml{.nocopy}
+ Index Scan using idx_zip on public.census (actual time=3.273..3.295 rows=23 loops=1)
+ Output: id
+ Index Cond: (census.zipcode = 94085)
+ Storage Table Read Requests: 1
+ Storage Table Read Execution Time: 1.401 ms
+ Storage Table Rows Scanned: 23
+ Storage Index Read Requests: 1
+ Storage Index Read Execution Time: 1.529 ms
+ Storage Index Rows Scanned: 23
+...
+```
+
+The same 23 rows were fetched from the table, but much faster. This is because the planner uses the index to execute the query.
+
+## Covering index
+
+In the prior example, to retrieve the rows the index was first looked up, and then more columns were fetched for the same rows from the table. This additional round trip to the table is needed because the columns are not present in the index. To avoid this, you can store the column along with the index as follows:
+
+```sql
+create index idx_zip2 on census(zipcode ASC) include(id);
+```
+
+Now, for a query to get all people in zip code 94085 as follows:
+
+```sql
+explain (analyze, dist, costs off) select id from census where zipcode=94085;
+```
+
+You will see an output like the following:
+
+```yaml{.nocopy}
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Index Only Scan using idx_zip2 on census (actual time=1.930..1.942 rows=23 loops=1)
+ Index Cond: (zipcode = 94085)
+ Storage Index Read Requests: 1
+ Storage Index Read Execution Time: 1.042 ms
+ Storage Index Rows Scanned: 23
+...
+```
+
+This is an index-only scan, which means that all the data required by the query has been fetched from the index. This is also why there was no entry for Table Read Requests.
+
+When an index contains all the columns of the table, it is referred to as a Duplicate index. Duplicate indexes can be used in multi-region deployments to reduce read latencies.
+
+{{}}
+See [Duplicate indexes](../../../develop/build-global-apps/duplicate-indexes/) for more details.
+{{}}
+
+## Listing indexes
+
+You can list the indexes associated with a table using the following methods.
+
+### \d+ meta command
+
+The `\d+
` meta command lists the indexes associated with a table along with the schema details.
+
+```sql
+\d+ census
+```
+
+The indexes are listed at the end of the output as follows:
+
+```yaml{.nocopy}
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+----------+------------------------+-----------+----------+---------+----------+--------------+-------------
+ id | integer | | not null | | plain | |
+ name | character varying(255) | | | | extended | |
+ age | integer | | | | plain | |
+ zipcode | integer | | | | plain | |
+ employed | boolean | | | | plain | |
+Indexes:
+ "census_pkey" PRIMARY KEY, lsm (id ASC)
+ "idx_zip" lsm (zipcode ASC)
+```
+
+### pg_indexes view
+
+You can also fetch more information about indexes using the [pg_indexes](../../../architecture/system-catalog#schema) view.
+
+```sql
+SELECT * FROM pg_indexes WHERE tablename = 'census' ;
+```
+
+This gives an output similar to the following:
+
+```yaml{.nocopy}
+ schemaname | tablename | indexname | tablespace | indexdef
+------------+-----------+-------------+------------+---------------------------------------------------------------------
+ public | census | census_pkey | null | CREATE UNIQUE INDEX census_pkey ON public.census USING lsm (id ASC)
+ public | census | idx_zip | null | CREATE INDEX idx_zip ON public.census USING lsm (zipcode ASC)
+```
+
+## Index usage
+
+It's a good idea to keep track of how well indexes are used by your applications so that you can evaluate and improve your indexes, and drop indexes that are not used. To get the usage statistics of the indexes of a table, you can execute the following command:
+
+```sql
+SELECT * FROM pg_stat_user_indexes WHERE relname = 'census';
+```
+
+This should give an output similar to the following:
+
+```yaml{.nocopy}
+ relid | indexrelid | schemaname | relname | indexrelname | idx_scan | idx_tup_read | idx_tup_fetch
+-------+------------+------------+---------+--------------+----------+--------------+---------------
+ 17227 | 17230 | public | census | census_pkey | 2 | 12 | 0
+ 17227 | 17237 | public | census | idx_zip | 2 | 24 | 0
+```
+
+You can get an idea of how many times the index was scanned and how many tuples were read from the index using this statistic.
+
+## Conclusion
+
+While primary keys are essential to ensure data uniqueness and facilitate efficient data distribution, secondary indexes provide the flexibility needed to optimize queries based on non-primary key columns. Using secondary indexes, applications can boost performance and provide a robust and scalable solution for managing large-scale, distributed datasets.
+
+## Learn more
+
+- [Use Explain Analyze to improve query performance](../../../explore/query-1-performance/explain-analyze)
+- [Explore indexes and constraints](../../../explore/ysql-language-features/indexes-constraints/)
diff --git a/docs/content/stable/develop/learn/_index.md b/docs/content/stable/develop/learn/_index.md
index a33d2913c63c..008655b545b9 100644
--- a/docs/content/stable/develop/learn/_index.md
+++ b/docs/content/stable/develop/learn/_index.md
@@ -11,159 +11,58 @@ menu:
type: indexpage
---
-
+## Transactions
+
+Transactions are a sequence of operations performed as a single logical unit of work. These operations can modify multiple tables or rows. Transactions are important to maintain data integrity when multiple users are modifying the same set of rows across tables. For example, credit and debit transactions in a bank account.
+
+{{}}
+To understand how to use transactions when developing applications, see [Transactions](./transactions/acid-transactions-ysql).
+{{}}
+
+## Text search
+
+YugabyteDB supports advanced text search schemes like similarity search, phonetic search, and full-text search, along with the standard pattern matching using the `LIKE` operator.
+
+{{}}
+To understand build advanced search functionalities into your applications, see [Text search](./text-search/).
+{{}}
+
+## Aggregations
+
+When performing analytical operations on your data, it is common to fetch aggregates like min, max, sum, average, and so on.
+
+{{}}
+To understand how to best use aggregates in your applications, see [Aggregations](./aggregations-ycql).
+{{}}
+
+## Batch operations
+
+Sometimes it's better to batch multiple statements into one request to avoid round trips to the server. In [YSQL](/{{}}/api/ysql), this can be done using [Stored Procedures](/{{}}/explore/ysql-language-features/stored-procedures/), which are not supported in the [YCQL](/{{}}/api/ycql) API.
+
+{{}}
+To understand how to best do batch operations in YCQL, see [Batch operations](./batch-operations-ycql).
+{{}}
+
+## Date and time
+
+Although date and time are common concepts, working with dates and times across various time zones can be quite a challenge.
+
+{{}}
+To understand how to use date and time data types effectively in your applications, see [Date and Time](./date-and-time-ysql).
+{{}}
+
+## Strings and text
+
+Text, string, and character data types are probably some of the most commonly used types when designing a schema. YugabyteDB provides an extensive suite of functionality to format and manipulate text data types.
+
+{{}}
+To learn how to use text, string, and character types effectively in applications, see [Strings and text](./strings-and-text-ysql).
+{{}}
+
+## Data expiration
+
+Cleaning up old, unwanted data can be a painful task. YugabyteDB supports Time-to-Live (TTL) functionality in the YCQL API which you can use to automatically purge old data and reduce storage costs.
+
+{{}}
+To understand how to use TTL in your YCQL applications, see [TTL for data expiration](./ttl-data-expiration-ycql).
+{{}}
diff --git a/docs/content/stable/develop/learn/data-modeling-ycql.md b/docs/content/stable/develop/learn/data-modeling-ycql.md
deleted file mode 100644
index 12b9720d2415..000000000000
--- a/docs/content/stable/develop/learn/data-modeling-ycql.md
+++ /dev/null
@@ -1,228 +0,0 @@
----
-title: Data modeling in YCQL
-headerTitle: Data modeling
-linkTitle: Data modeling
-description: Learn data modeling in YCQL and how to identify the patterns used to access data and the types of queries to be performed.
-menu:
- stable:
- identifier: data-modeling-ycql
- parent: learn
- weight: 110
-type: docs
----
-
-{{}}
-
-Data modeling is a process that involves identifying the entities (items to be stored) and the relationships between entities. To create your data model, identify the patterns used to access data and the types of queries to be performed. These two ideas inform the organization and structure of the data, and the design and creation of the database's tables.
-
-This topic documents data modeling with [Yugabyte Cloud Query Language (YCQL)](../../../api/ycql/), YugabyteDB's Cassandra-compatible API.
-
-## Keyspaces, tables, rows, and columns
-
-### Keyspaces
-
-Cassandra keyspaces are a collection of tables. They are analogous to SQL namespaces. Typically, each application creates all its tables in one keyspace.
-
-### Tables
-
-A table is a collection of data. A keyspace most often contains one or more tables. Each table is identified by a name. Tables have a set of columns and contain records (rows) with data. Tables can be created, dropped, and altered at runtime without blocking updates and queries.
-
-### Rows
-
-Each table contains multiple rows of data. A row is a set of columns that is uniquely identifiable among all of the other rows.
-
-### Columns
-
-Each row is composed of one or more columns. A column is a fundamental data element, and does not need to be broken down any further.
-
-As the example of a `users` table which holds information about users of a service.
-
-| user_id | firstname | lastname | address
-| -------- | --------- | -------- | --------
-| 1001 | Sherlock | Holmes | 221b Baker St, London, UK
-| 1003 | Clark | Kent | 344 Clinton Street, Metropolis
-| 1007 | James | Bond |
-
-Note the following about the `users` table:
-
-- Each row in the table has a unique value for the primary key column (`user_id`).
-- Other than the primary key, the `users` table has three other columns - `firstname`, `lastname`, `address` each of which is a string.
-- Some columns may have no data (for example, James Bond's address `address` is unknown). These have `null` values in the database.
-
-Now consider another example of the `books` table that keeps track of authors and the books they have written.
-
-| author | book_title | price | year | genre
-| -------------------- | -------------------- | ------ | ---- | -----
-| William Shakespeare | Hamlet | 6.75 | 1602 | tragedy
-| William Shakespeare | Macbeth | 7.50 | 1606 | tragedy
-| Charles Dickens | Oliver Twist | 9.25 | 1837 | serial novel
-| Charles Dickens | A Tale of Two Cities | 11.40 | 1859 | historical novel
-
-Note the following about the `books` table:
-
-- The primary key for this table consists of two columns - `author` and `book_title`. Each row in the table must have values for these two attributes, and the combination of these values must be unique.
-- Other than the primary key, the table has other columns such as `price`, `year`, `genre`.
-- The columns `author`, `book_title` and `genre` are string, `price` is a float, `year` is an integer.
-
-## Primary key
-
-When creating a table, the primary key of the table must be specified in addition to the table name. The primary key uniquely identifies each row in the table, therefore no two rows can have the same key.
-
-There are two components of primary keys, and they are described below.
-
-### Partition key columns (required)
-
-One or more columns of a table are made the partition key columns. The values of the partition key columns are used to compute an internal hash value. This hash value determines the tablet (or partition) in which the row will be stored. This has two implications:
-
-- Each unique set of partition key values is hashed and distributed across nodes randomly to ensure uniform utilization of the cluster.
-
-- All the data for a unique set of partition key values are always stored on the same node. This matters only if there are clustering key columns, which are described in the next section.
-
-In the case of the `users` table, you can make `user_id` column the only primary key column. This is a good choice for a partition key because our queries do not care about the order of the `user_id`s. If the table is split into a number of tablets (partitions), the data may be assigned as follows.
-
-| tablet | user_id | firstname | lastname | address
-| --------- | -------- | --------- | -------- | --------
-| tablet-22 | 1001 | Sherlock | Holmes | 221b Baker St, London, UK
-| tablet-4 | 1003 | Clark | Kent | 344 Clinton Street, Metropolis
-| tablet-17 | 1007 | James | Bond |
-
-### Clustering key columns (optional)
-
-The clustering columns specify the order in which the column data is sorted and stored on disk for a given unique partition key value. More than one clustering column can be specified, and the columns are sorted in the order they are declared in the clustering column. It is also possible to control the sort order (ascending or descending sort) for these columns. Note that the sort order respects the data type.
-
-In a table that has both partition keys and clustering keys, it is possible for two rows to have the same partition key value and therefore they end up on the same node. However, those rows must have different clustering key values in order to satisfy the primary key requirements. Tables without clustering key columns are said to have *simple primary keys*.
-
-In the case of the `books` table, `author` is a good partition key and `book_title` is a good clustering key. Such a data model would allow easily listing all the books for a given author, as well as look up details of a specific book. This would cause the data to be stored as follows.
-
-| tablet | author | book_title | price | year | genre
-| --------- | -------------------- | -------------------- | ------ | ---- | -----
-| tablet-15 | William Shakespeare | Hamlet | 6.75 | 1602 | tragedy
-| tablet-15 | William Shakespeare | Macbeth | 7.50 | 1606 | tragedy
-| tablet-21 | Charles Dickens | A Tale of Two Cities | 11.40 | 1859 | historical novel
-| tablet-21 | Charles Dickens | Oliver Twist | 9.25 | 1837 | serial novel
-
-Note that if you had made both `author` and `book_title` partition key columns, you would not be able to list all the books for a given author efficiently.
-
-**Note**
-
-- The partition key columns are also often referred to as its *hash columns*. This is because an internal hash function is used to distribute data items across tablets based on their partition key values.
-
-- The clustering key columns are also referred to as its **range columns**. This is because rows with the same partition key are stored on disk in sorted order by the clustering key value.
-
-## Secondary indexes
-
-A database index is a data structure that improves the speed of data retrieval operations on a database table. Typically, databases are very efficient at looking up data by the primary key. A secondary index can be created using one or more columns of a database table, and provides the basis for both rapid random lookups and efficient access of ordered records when querying by those columns. To achieve this, secondary indexes require additional writes and storage space to maintain the index data structure. YugabyteDB's secondary index support is documented in detail [here](../../../api/ycql/ddl_create_index/).
-
-### Benefits of secondary indexes
-
-Secondary indexes can be used to speed up queries and to enforce uniqueness of values in a column.
-
-#### Speed up queries
-
-The predominant use of a secondary index is to make lookups by some column values efficient. Let us take an example of a users table, where `user_id` is the primary key. Suppose we want to lookup `user_id` by the email of the user efficiently. You can achieve this as follows.
-
-```sql
-ycqlsh> CREATE KEYSPACE example;
-```
-
-```sql
-ycqlsh> CREATE TABLE example.users(
- user_id bigint PRIMARY KEY,
- firstname text,
- lastname text,
- email text
- ) WITH transactions = { 'enabled' : true };
-```
-
-```sql
-ycqlsh> CREATE INDEX user_by_email ON example.users (email)
- INCLUDE (firstname, lastname);
-```
-
-Next let us insert some data.
-
-```sql
-ycqlsh> INSERT INTO example.users (user_id, firstname, lastname, email)
- VALUES (1, 'James', 'Bond', 'bond@example.com');
-```
-
-```sql
-ycqlsh> INSERT INTO example.users (user_id, firstname, lastname, email)
- VALUES (2, 'Sherlock', 'Holmes', 'sholmes@example.com');
-```
-
-You can now query the table by the email of a user efficiently as follows.
-
-```sql
-ycqlsh> SELECT * FROM example.users WHERE email='bond@example.com';
-```
-
-Read more about using secondary indexes to speed up queries in this quick guide to YugabyteDB secondary indexes.
-
-### Enforce uniqueness of column values
-
-In some cases, you would need to ensure that duplicate values cannot be inserted in a column of a table. You can achieve this in YugabyteDB by creating a unique secondary index, where the application does not want duplicate values to be inserted into a column.
-
-```sql
-ycqlsh> CREATE KEYSPACE example;
-```
-
-```sql
-ycqlsh> CREATE TABLE example.users(
- user_id bigint PRIMARY KEY,
- firstname text,
- lastname text,
- email text
- ) WITH transactions = { 'enabled' : true };
-```
-
-```sql
-ycqlsh> CREATE UNIQUE INDEX unique_emails ON example.users (email);
-```
-
-Inserts would succeed as long as the email is unique.
-
-```sql
-ycqlsh> INSERT INTO example.users (user_id, firstname, lastname, email)
- VALUES (1, 'James', 'Bond', 'bond@example.com');
-```
-
-```sql
-ycqlsh> INSERT INTO example.users (user_id, firstname, lastname, email)
- VALUES (2, 'Sherlock', 'Holmes', 'sholmes@example.com');
-```
-
-But upon inserting a duplicate email, you get an error.
-
-```sql
-ycqlsh> INSERT INTO example.users (user_id, firstname, lastname, email)
- VALUES (3, 'Fake', 'Bond', 'bond@example.com');
-```
-
-```
-InvalidRequest: Error from server: code=2200 [Invalid query] message="SQL error: Execution Error. Duplicate value disallowed by unique index unique_emails
-```
-
-## Documents
-
-Documents are the most common way for storing, retrieving, and managing semi-structured data. Unlike the traditional relational data model, the document data model is not restricted to a rigid schema of rows and columns. The schema can be changed easily thus helping application developers write business logic faster than ever before. Instead of columns with names and data types that are used in a relational model, a document contains a description of the data type and the value for that description. Each document can have the same or different structure. Even nested document structures are possible where one or more sub-documents are embedded inside a larger document.
-
-Databases commonly support document data management through the use of a JSON data type. [JSON.org](http://www.json.org/) defines JSON (JavaScript Object Notation) to be a lightweight data-interchange format. It’s easy for humans to read and write. it’s easy for machines to parse and generate. JSON has four simple data types:
-
-- string
-- number
-- boolean
-- null (or empty)
-
-In addition, it has two core complex data types.
-
-- Collection of name-value pairs which is realized as an object, hash table, dictionary, or something similar depending on the language.
-- Ordered list of values which is realized as an array, vector, list or sequence depending on the language.
-
-Document data models are best fit for applications requiring a flexible schema and fast data access. For example, nested documents enable applications to store related pieces of information in the same database record in a denormalized manner. As a result, applications can issue fewer queries and updates to complete common operations.
-
-### Comparison with Apache Cassandra’s JSON support
-
-[Apache Cassandra’s JSON](http://cassandra.apache.org/doc/latest/cql/json.html) support can be misleading for many developers. YCQL allows `SELECT` and `INSERT` statements to include the `JSON` keyword. The `SELECT` output will now be available in the JSON format and the `INSERT` inputs can now be specified in the JSON format. However, this “JSON” support is simply an ease-of-use abstraction in the CQL layer that the underlying database engine is unaware of. Since there is no native JSON data type in CQL, the schema doesn’t have any knowledge of the JSON provided by the user. This means the schema definition doesn’t change nor does the schema enforcement. Cassandra developers needing native JSON support previously had no choice but to add a new document database such as MongoDB or Couchbase into their data tier.
-
-With YugabyteDB’s native JSON support using the [`JSONB`](../data-types-ycql/#jsonb) data type, application developers can now benefit from the structured query language of Cassandra and the document data modeling of MongoDB in a single database.
diff --git a/docs/content/stable/develop/learn/data-modeling-ysql.md b/docs/content/stable/develop/learn/data-modeling-ysql.md
deleted file mode 100644
index 7bb382651108..000000000000
--- a/docs/content/stable/develop/learn/data-modeling-ysql.md
+++ /dev/null
@@ -1,374 +0,0 @@
----
-title: YSQL data modeling in YugabyteDB
-headerTitle: Distributed SQL Data modeling
-linkTitle: Data modeling
-description: Learn data modeling in YSQL and how to identify the patterns used to access data and the types of queries to be performed.
-menu:
- stable:
- identifier: data-modeling-ysql
- parent: learn
- weight: 100
-type: docs
----
-
-{{}}
-
-Data modeling involves designing the database schema for efficient storage and access. In a distributed SQL database like YugabyteDB, table data is split into tablets and distributed across multiple nodes in the cluster, allowing applications to connect to any node for storing and retrieving data. Because reads and writes can span multiple nodes, it's crucial to consider how table data is partitioned and distributed when modeling your data.
-
-In YugabyteDB, data is stored as rows and columns in tables, which are organized under schemas and databases.
-
-{{}}
-To understand more about creating and managing tables, schemas, and databases, see [Schemas and tables](../../../explore/ysql-language-features/databases-schemas-tables).
-{{}}
-
-To design your tables and indexes for fast retrieval and storage in YugabyteDB, you first need to understand the two [data distribution](../../../explore/going-beyond-sql/data-sharding) schemes, Hash and Range sharding, in detail.
-
-In YugabyteDB, the sharding and ordering of data in the tables and indexes is governed by the primary key of the table and index respectively.
-
-## Cluster setup
-
-
-{{}}
-
-{{}}
-{{}}
-
-{{}}
-{{}}
-
-{{}} {{}} {{}}
-{{}}
-
-
-## Sample data
-
-For illustration, create a census table as follows.
-
-```sql
-CREATE TABLE census(
- id int,
- name varchar(255),
- age int,
- zipcode int,
- employed boolean,
- PRIMARY KEY(id ASC)
-) SPLIT AT VALUES ((10), (25));
-```
-
-For illustration purposes, the data is being explicitly split into three tablets. Normally this is not needed, as tablets are auto-split.
-
-Add some data to the table as follows.
-
-```sql
-INSERT INTO public.census ( id,name,age,zipcode,employed ) VALUES
- (1,'Zachary',55,94085,True), (2,'James',56,94085,False), (3,'Kimberly',50,94084,False),
- (4,'Edward',56,94085,True), (5,'Barry',56,94084,False), (6,'Tyler',45,94084,False),
- (7,'Nancy',47,94085,False), (8,'Sarah',52,94084,True), (9,'Nancy',59,94084,False),
- (10,'Diane',51,94083,False), (11,'Ashley',42,94083,False), (12,'Jacqueline',58,94085,False),
- (13,'Benjamin',49,94084,False), (14,'James',48,94083,False), (15,'Ann',43,94083,False),
- (16,'Aimee',47,94085,True), (17,'Michael',49,94085,False), (18,'Rebecca',40,94085,False),
- (19,'Kevin',45,94085,True), (20,'James',45,94084,False), (21,'Sandra',60,94085,False),
- (22,'Kathleen',40,94085,True), (23,'William',42,94084,False), (24,'James',42,94083,False),
- (25,'Tyler',50,94085,False), (26,'James',49,94085,True), (27,'Kathleen',55,94083,True),
- (28,'Zachary',55,94083,True), (29,'Rebecca',41,94085,True), (30,'Jacqueline',49,94085,False),
- (31,'Diane',48,94083,False), (32,'Sarah',53,94085,True), (33,'Rebecca',55,94083,True),
- (34,'William',47,94085,False), (35,'William',60,94085,True), (36,'Sarah',53,94085,False),
- (37,'Ashley',47,94084,True), (38,'Ashley',54,94084,False), (39,'Benjamin',42,94083,False),
- (40,'Tyler',47,94085,True), (41,'Michael',42,94084,False), (42,'Diane',50,94084,False),
- (43,'Nancy',51,94085,False), (44,'Rebecca',56,94085,False), (45,'Tyler',41,94085,True);
-```
-
-
-
-{{}}
-To explain the behavior of the queries, the examples use **explain (analyze, dist, costs off)**. In practice, you do not need to do this unless you are trying to optimize performance. For more details, see [Analyze queries](../../../explore/query-1-performance/explain-analyze).
-{{}}
-
-## Primary keys
-
-The Primary key is a column or a set of columns that uniquely identifies a row, such as a user ID or order number. The choice of primary key is very important as it defines how data is distributed and ordered when stored. You should choose the primary key based on the most common access pattern. Columns of data type [string](../../../explore/ysql-language-features/data-types/#strings), [number](../../../explore/ysql-language-features/data-types/#numeric-types), [serial](../../../explore/ysql-language-features/data-types/#serial-pseudotype), or [UUID](../../../api/ysql/datatypes/type_uuid/) make good choices for primary keys.
-
-Always specify the primary key when creating the table, as it could be an expensive operation to define a primary key after the data has been added because the table data has to be re-ordered.
-
-{{}}
-In the absence of an explicit primary key, YugabyteDB automatically inserts an internal **row_id** to be used as the primary key. This **row_id** is not accessible by users.
-{{}}
-
-### ID as the primary key
-
-In the `census` table, the most likely way to look up a person is by their `id`, so the primary key has been set to `id ASC`. This means that the data is stored in ascending order of ID, ensuring contiguous IDs are mostly located in the same tablet. This works well for point lookups on ID and range scans on IDs. For example, to look up ID 9, you can do the following:
-
-```sql
-explain (analyze, dist, costs off) select * from census where id=9;
-```
-
-You will see an output similar to the following:
-
-```yaml{.nocopy}
- QUERY PLAN
-----------------------------------------------------------------------------------
- Index Scan using census_pkey on census (actual time=2.814..2.820 rows=1 loops=1)
- Index Cond: (id = 9)
- Storage Table Read Requests: 1
- Storage Table Read Execution Time: 1.560 ms
- Storage Table Rows Scanned: 1
-...
-```
-
-One row matching ID 9 was fetched with just one request (`Storage Table Read Requests : 1`), as the system knows exactly where to look for that row. Also, only one row was scanned. But if you do a range scan for items across 2 tablets as follows:
-
-```sql
-explain (analyze, dist, costs off) select * from census where id>=9 and id<=10;
-```
-
-You will see an output similar to:
-
-```yaml{.nocopy}
- QUERY PLAN
-----------------------------------------------------------------------------------
- Index Scan using census_pkey on census (actual time=3.456..4.393 rows=11 loops=1)
- Index Cond: ((id >= 5) AND (id <= 15))
- Storage Table Read Requests: 2
- Storage Table Read Execution Time: 3.584 ms
- Storage Table Rows Scanned: 11
-...
-```
-
-Notice how there are two Table Read Requests. This is because the table was split at ID 10. Rows with an ID of 5 through 9 are in one tablet, while rows with ID 10 through 15 are in another, requiring two requests to be made.
-
-### Name as the primary key
-
-Suppose your most common lookup is based on the name. In this case you would make the name column part of the primary key. Because the name alone may not be unique enough to be the primary key (the primary key has to be unique), you can choose a primary key with both name and ID as follows:
-
-```sql
-CREATE TABLE census2(
- id int,
- name varchar(255),
- age int,
- zipcode int,
- employed boolean,
- PRIMARY KEY(name ASC, id ASC)
-) SPLIT AT VALUES (('H'), ('S'));
--- NOTE: Splitting only for demo
-
--- copy the same data into census2
-INSERT INTO census2 SELECT * FROM census;
-```
-
-Note how the `name` column is specified first, and `id` second. This ensures that the data is stored sorted based on `name` first, and for all matching names, the `id` will be stored sorted in ascending order, and all the people with the same name will be in the same tablet. This allows you to do a fast lookup on `name` even though `(name, id)` is the primary key. Retrieve all the people with the name James as follows:
-
-```sql
-explain (analyze, dist, costs off) select * from census2 where name = 'James';
-```
-
-You will see an output similar to the following:
-
-```yaml{.nocopy}
- QUERY PLAN
-------------------------------------------------------------------------------------
- Index Scan using census2_pkey on census2 (actual time=1.489..1.496 rows=5 loops=1)
- Index Cond: ((name)::text = 'James'::text)
- Storage Table Read Requests: 1
- Storage Table Read Execution Time: 1.252 ms
- Storage Table Rows Scanned: 5
-...
-```
-
-There are 5 people named James, and all of them are located in one tablet, requiring only one Table Read Request.
-
-If you do a range query as follows:
-
-```sql
-explain (analyze, dist, costs off) select * from census2 where name >= 'James' and name <='Michael';
-```
-
-You will see an output similar to the following:
-
-```yaml{.nocopy}
- QUERY PLAN
----------------------------------------------------------------------------------------
- Index Scan using census2_pkey on census2 (actual time=2.411..2.430 rows=11 loops=1)
- Index Cond: (((name)::text >= 'James'::text) AND ((name)::text <= 'Michael'::text))
- Storage Table Read Requests: 1
- Storage Table Read Execution Time: 2.010 ms
- Storage Table Rows Scanned: 11
-```
-
-Notice how only one Table Read Request is needed to fetch the results; all the results with names James, Kathleen, Kevin, Kimberly, and Michael are stored in the same tablet.
-
-{{}}
-The primary key was specified with `ASC` order. However, if the queries are going to retrieve data in descending order with `ORDER BY name DESC`, then it is better to match the same ordering in the primary key definition.
-{{}}
-
-## Secondary indexes
-
-The primary goal of an index is to enhance the performance of data retrieval operations on the data in the tables. Indexes are designed to quickly locate data without having to search every row in a database table and provide fast access for patterns other than that of the primary key of the table. In YugabyteDB, indexes are internally designed just like tables and operate as such. The main difference between a table and an index is that the primary key of the table has to be unique but it need not be unique for an index.
-
-Indexes can be created using the [CREATE INDEX](../../../api/ysql/the-sql-language/statements/ddl_create_index) statement, which has the following format:
-
-```sql{.nocopy}
-CREATE INDEX idx_name ON table_name
- ((columns), columns) INCLUDE (columns)
--- [SHARDING] [CLUSTERING] [COVERING]
-```
-
-The columns that are specified in the [CREATE INDEX](../../../api/ysql/the-sql-language/statements/ddl_create_index) statement are of three kinds:
-
-- **Sharding** - These columns determine how the index data is distributed.
-- **Clustering** - These optional columns determine how the index rows matching the same sharding key are ordered.
-- **Covering** - These are optional additional columns that are stored in the index to avoid a trip to the table.
-
-### Basic index
-
-Suppose you also need to look up the data based on the zip codes of the people in the census. You can fetch details with a query similar to the following:
-
-```sql
-explain (analyze, dist, costs off) select id from census where zipcode=94085;
-```
-
-For which you will get a query plan similar to the following:
-
-```yaml{.nocopy}
- QUERY PLAN
-----------------------------------------------------------------------
- Seq Scan on public.census (actual time=4.201..4.206 rows=23 loops=1)
- Output: id
- Remote Filter: (census.zipcode = 94085)
- Storage Table Read Requests: 1
- Storage Table Read Execution Time: 1.928 ms
- Storage Table Rows Scanned: 45
-```
-
-You will quickly notice that this required a sequential scan of all the rows in the table. This is because the primary key of the table is either `name` or `id`, and looking up by zip code requires a full scan. To avoid the full scan, you need to create an index on `zipcode` so that the executor can quickly fetch the matching rows by looking at the index.
-
-```sql
-create index idx_zip on census(zipcode ASC);
-```
-
-Now, for a query to get all the people in zip code 94085 as follows:
-
-```sql
-explain (analyze, dist, costs off) select id from census where zipcode=94085;
-```
-
-You will see an output like the following:
-
-```yaml{.nocopy}
- QUERY PLAN
--------------------------------------------------------------------------------------------
- Index Scan using idx_zip on public.census (actual time=3.273..3.295 rows=23 loops=1)
- Output: id
- Index Cond: (census.zipcode = 94085)
- Storage Table Read Requests: 1
- Storage Table Read Execution Time: 1.401 ms
- Storage Table Rows Scanned: 23
- Storage Index Read Requests: 1
- Storage Index Read Execution Time: 1.529 ms
- Storage Index Rows Scanned: 23
-...
-```
-
-The index was used to identify all rows matching `zipcode = 94085`. 23 rows were fetched from the index and the corresponding data for the 23 rows were fetched from the table.
-
-### Covering index
-
-In the prior example, to retrieve 23 rows the index was first looked up and then more columns were fetched for the same rows from the table. This additional round trip to the table is needed because the columns are not present in the index. To avoid this, you can store the column along with the index as follows:
-
-```sql
-create index idx_zip2 on census(zipcode ASC) include(id);
-```
-
-Now, for a query to get all the people in zip code 94085 as follows:
-
-```sql
-explain (analyze, dist, costs off) select id from census where zipcode=94085;
-```
-
-You will see an output like the following:
-
-```yaml{.nocopy}
- QUERY PLAN
--------------------------------------------------------------------------------------
- Index Only Scan using idx_zip2 on census (actual time=1.930..1.942 rows=23 loops=1)
- Index Cond: (zipcode = 94085)
- Storage Index Read Requests: 1
- Storage Index Read Execution Time: 1.042 ms
- Storage Index Rows Scanned: 23
-...
-```
-
-This has become an index-only scan, which means that all the data required by the query has been fetched from the index. This is also why there was no entry for Table Read Requests.
-
-## Hot shard or tablet
-
-A hot shard is a common problem in data retrieval where a specific node ends up handling most of the queries because of the query pattern and data distribution scheme.
-
-{{}}
-The hot shard issue can occur both for tables and indexes.
-{{}}
-
-Consider a scenario where you want to look up people with a specific name, say `Michael`, in `94085`. For this, a good index would be the following:
-
-```sql
-create index idx_zip3 on census(zipcode ASC, name ASC) include(id);
-```
-
-The query would be as follows:
-
-```sql
-explain (analyze, dist, costs off) select id from census where zipcode=94085 AND name='Michael';
-```
-
-This results in an output similar to the following:
-
-```yaml{.nocopy}
- QUERY PLAN
-------------------------------------------------------------------------------------
- Index Only Scan using idx_zip3 on census (actual time=1.618..1.620 rows=1 loops=1)
- Index Cond: ((zipcode = 94085) AND (name = 'Michael'::text))
- Heap Fetches: 0
- Storage Index Read Requests: 1
- Storage Index Read Execution Time: 0.970 ms
- Storage Index Rows Scanned: 1
-```
-
-Now consider a scenario where zip code 94085 is very popular and the target of many queries (say there was an election or a disaster in that area). As the index is distributed based on `zipcode`, everyone in zip code 94085 will end up located in the same tablet; as a result, all the queries will end up reading from that one tablet. In other words, this tablet has become hot. To avoid this, you can distribute the index on name instead of zip code, as follows:
-
-```sql
-drop index if exists idx_zip3;
-create index idx_zip3 on census(name ASC, zipcode ASC) include(id);
-```
-
-Notice that we have swapped the order of columns in the index. This results in the index being distributed/ordered on name first and then ordered on zip code. Now when many queries have the same zip code, the queries will be handled by different tablets as the names being looked up will be different and will be located on different tablets.
-
-{{}}
-Consider swapping the order of columns to avoid hot shards.
-{{}}
-
-## Partitioning
-
-[Data partitioning](../../../explore/ysql-language-features/advanced-features/partitions) refers to the process of dividing a large table or dataset into smaller physical partitions based on certain criteria or rules. This technique offers several benefits, including improved performance, easier data management, and better use of storage resources. Each partition is internally a table. This scheme is useful for managing large volumes of data and especially useful for dropping older data.
-
-### Manage large datasets
-
-You can manage large data volumes by partitioning based on time (say by day, week, month, and so on) to make it easier to drop old data, especially when you want to retain only the recent data.
-
-{{}}
-To understand how large data can be partitioned for easier management, see [Partitioning data by time](../../common-patterns/timeseries/partitioning-by-time).
-{{}}
-
-### Place data closer to users
-
-When you want to improve latency for local users when your users are spread across a large geography, partition your data according to where big clusters of users are located, and place their data in regions closer to them using [tablespaces](../../../explore/going-beyond-sql/tablespaces). Users will end up talking to partitions closer to them.
-
-{{}}
-To understand how to partition and place data closer to users for improved latency, see [Latency-optimized geo-partitioning](../../build-global-apps/latency-optimized-geo-partition).
-{{}}
-
-### Adhere to compliance laws
-
-You can partition your data according to the user's citizenship and place their data in the boundaries of their respective nations to be compliant with data residency laws like [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
-
-{{}}
-To understand how to partition data to be compliant with data residency laws, see [Locality-optimized geo-partitioning](../../build-global-apps/locality-optimized-geo-partition).
-{{}}
diff --git a/docs/content/stable/develop/learn/data-types-ycql.md b/docs/content/stable/develop/learn/data-types-ycql.md
deleted file mode 100644
index d44f2d2b2996..000000000000
--- a/docs/content/stable/develop/learn/data-types-ycql.md
+++ /dev/null
@@ -1,208 +0,0 @@
----
-title: Data types in YCQL
-headerTitle: Data types in YCQL
-linkTitle: Data types
-description: Learn about the data types in YCQL.
-menu:
- stable:
- identifier: data-types-1-ycql
- parent: learn
- weight: 120
-type: docs
----
-
-{{}}
-
-This topic lists the various data types available in YugabyteDB's [Cassandra-compatible YCQL API](../../../api/ycql/).
-
-## JSONB
-
-There are a number of different serialization formats for JSON data, one of the popular formats being JSONB (JSON Better) to efficiently model document data.
-
-The YCQL API supports the [JSONB data type](../../../api/ycql/type_jsonb/) to parse, store, and query JSON documents natively. This data type is similar in query language syntax and functionality to the one supported by PostgreSQL. JSONB serialization allows for easy search and retrieval of attributes inside the document. This is achieved by storing all the JSON attributes in a sorted order, which allows for efficient binary search of keys. Similarly, arrays are stored such that random access for a particular array index into the serialized JSON document is possible. In addition, [DocDB](../../../architecture/docdb/), YugabyteDB's underlying storage engine, is document-oriented, which makes storing JSON data simpler than would otherwise be possible.
-
-Consider the example of an ecommerce application for an online bookstore. The database for such a bookstore needs to store details of various books, some of which may have custom attributes. The following example shows a JSON document that captures the details of a particular book, Macbeth by William Shakespeare.
-
-```json
-{
- "name":"Macbeth",
- "author":{
- "first_name":"William",
- "last_name":"Shakespeare"
- },
- "year":1623,
- "editors":[
- "John",
- "Elizabeth",
- "Jeff"
- ]
-}
-```
-
-### Create a table
-
-The books table for this bookstore can be modelled as follows. Assume that the ID of each book is an int, but this could be a string or a UUID.
-
-```cql
-ycqlsh> CREATE KEYSPACE store;
-```
-
-```cql
-ycqlsh> CREATE TABLE store.books ( id int PRIMARY KEY, details jsonb );
-```
-
-### Insert data
-
-Next, insert some sample data for a few books into this store as follows:
-
-```cql
-INSERT INTO store.books (id, details) VALUES (1,
- '{ "name" : "Macbeth",
- "author" : {"first_name": "William", "last_name": "Shakespeare"},
- "year" : 1623,
- "editors": ["John", "Elizabeth", "Jeff"] }'
-);
-INSERT INTO store.books (id, details) VALUES (2,
- '{ "name" : "Hamlet",
- "author" : {"first_name": "William", "last_name": "Shakespeare"},
- "year" : 1603,
- "editors": ["Lysa", "Mark", "Robert"] }'
-);
-INSERT INTO store.books (id, details) VALUES (3,
- '{ "name" : "Oliver Twist",
- "author" : {"first_name": "Charles", "last_name": "Dickens"},
- "year" : 1838,
- "genre" : "novel",
- "editors": ["Mark", "Tony", "Britney"] }'
-);
-INSERT INTO store.books (id, details) VALUES (4,
- '{ "name" : "Great Expectations",
- "author" : {"first_name": "Charles", "last_name": "Dickens"},
- "year" : 1950,
- "genre" : "novel",
- "editors": ["Robert", "John", "Melisa"] }'
-);
-INSERT INTO store.books (id, details) VALUES (5,
- '{ "name" : "A Brief History of Time",
- "author" : {"first_name": "Stephen", "last_name": "Hawking"},
- "year" : 1988,
- "genre" : "science",
- "editors": ["Melisa", "Mark", "John"] }'
-);
-```
-
-Note the following about the preceding book details:
-
-- The year attribute for each of the books is interpreted as an integer.
-- The first two books do not have a genre attribute, which the others do.
-- The author attribute is a map.
-- The editors attribute is an array.
-
-### Retrieve a subset of attributes
-
-Running the following `SELECT` query returns all attributes of each book:
-
-```cql
-ycqlsh> SELECT * FROM store.books;
-
-```
-
-The following query retrieves just the ID and name for all the books:
-
-```cql
-ycqlsh> SELECT id, details->>'name' as book_title FROM store.books;
-```
-
-```output
- id | book_title
-----+-------------------------
- 5 | A Brief History of Time
- 1 | Macbeth
- 4 | Great Expectations
- 2 | Hamlet
- 3 | Oliver Twist
-```
-
-### Query by attribute values - string
-
-The name attribute is a string in the book details JSON document. Run the following to query the details of the book named *Hamlet*.
-
-```cql
-ycqlsh> SELECT * FROM store.books WHERE details->>'name'='Hamlet';
-```
-
-```output
- id | details
-----+---------------------------------------------------------------
- 2 | {"author":{"first_name":"William","last_name":"Shakespeare"},
- "editors":["Lysa","Mark","Robert"],
- "name":"Hamlet","year":1603}
-```
-
-Note that you can query by attributes that exist only in some of the documents. For example, you can query for all books that have a genre of novel. Recall that not all books have a genre attribute defined.
-
-```cql
-ycqlsh> SELECT id, details->>'name' as title,
- details->>'genre' as genre
- FROM store.books
- WHERE details->>'genre'='novel';
-```
-
-```output
- id | title | genre
-----+--------------------+-------
- 4 | Great Expectations | novel
- 3 | Oliver Twist | novel
-```
-
-### Query by attribute values - integer
-
-The year attribute is an integer in the book details JSON document. Run the following to query the ID and name of books written after 1900:
-
-```cql
-ycqlsh> SELECT id, details->>'name' as title, details->>'year'
- FROM store.books
- WHERE CAST(details->>'year' AS integer) > 1900;
-```
-
-```output
- id | title | expr
-----+-------------------------+------
- 5 | A Brief History of Time | 1988
- 4 | Great Expectations | 1950
-```
-
-### Query by attribute values - map
-
-The author attribute is a map, which in turn consists of the attributes `first_name` and `last_name`. Fetch the IDs and titles of all books written by William Shakespeare as follows:
-
-```cql
-ycqlsh> SELECT id, details->>'name' as title,
- details->>'author' as author
- FROM store.books
- WHERE details->'author'->>'first_name' = 'William' AND
- details->'author'->>'last_name' = 'Shakespeare';
-```
-
-```output
- id | title | author
-----+---------+----------------------------------------------------
- 1 | Macbeth | {"first_name":"William","last_name":"Shakespeare"}
- 2 | Hamlet | {"first_name":"William","last_name":"Shakespeare"}
-```
-
-### Query by attribute Values - array
-
-The editors attribute is an array consisting of the first names of the editors of each of the books. You can query for the book titles where `Mark` is the first entry in the editors list as follows:
-
-```cql
-ycqlsh> SELECT id, details->>'name' as title,
- details->>'editors' as editors FROM store.books
- WHERE details->'editors'->>0 = 'Mark';
-```
-
-```output
- id | title | editors
-----+--------------+---------------------------
- 3 | Oliver Twist | ["Mark","Tony","Britney"]
-```
diff --git a/docs/content/stable/develop/learn/data-types-ysql.md b/docs/content/stable/develop/learn/data-types-ysql.md
deleted file mode 100644
index bce575692242..000000000000
--- a/docs/content/stable/develop/learn/data-types-ysql.md
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: Data types in YSQL
-headerTitle: Data types in YSQL
-linkTitle: Data types
-description: Learn about the data types in YSQL.
-menu:
- stable:
- identifier: data-types-2-ysql
- parent: learn
- weight: 120
-type: docs
-private: true
----
-
-
-{{}}
diff --git a/docs/content/stable/develop/learn/sql-nosql.md b/docs/content/stable/explore/sql-nosql.md
similarity index 70%
rename from docs/content/stable/develop/learn/sql-nosql.md
rename to docs/content/stable/explore/sql-nosql.md
index 0ce0923c4697..340b647c015b 100644
--- a/docs/content/stable/develop/learn/sql-nosql.md
+++ b/docs/content/stable/explore/sql-nosql.md
@@ -6,22 +6,22 @@ description: Learn how YugabyteDB brings the best of SQL and NoSQL together into
menu:
stable:
identifier: sql-nosql
- parent: learn
- weight: 130
+ parent: explore
+ weight: 10
type: docs
---
Most application developers have used SQL, and possibly some NoSQL databases, to build applications. YugabyteDB brings the best of these two databases together into one unified platform to simplify development of scalable cloud services.
-Very often, today's cloud services and applications will start out with just a few requests and a very small amount of data. These can be served by just a few nodes. But if the application becomes popular, they would have to scale out rapidly in order to handle millions of requests and many terabytes of data. YugabyteDB is well suited for these kinds of workloads.
+Very often, cloud services and applications start out with just a few requests and a very small amount of data. These can be served by just a few nodes. But if the application becomes popular, they would have to scale out rapidly to handle millions of requests and many terabytes of data. YugabyteDB is well suited for these kinds of workloads.
## Unifying SQL and NoSQL
-Here are a few different criteria where YugabyteDB brings the best of SQL and NoSQL together into a single database platform.
+YugabyteDB combines database and operational characteristics of SQL and NoSQL together in a single database platform.
### Database characteristics
-These can be loosely defined as the high-level concerns when choosing a database to build an application or a cloud service - such as its data model, the API it supports, its consistency semantics, and so on. The following table contrasts what YugabyteDB offers with SQL and NoSQL databases in general. Note that there are a number of different NoSQL databases each with their own nuanced behavior, and the table is not accurate for all NoSQL databases - it is just meant to give an idea.
+These can be loosely defined as the high-level concerns when choosing a database to build an application or a cloud service - such as its data model, the API it supports, its consistency semantics, and so on. The following table contrasts what YugabyteDB offers with SQL and NoSQL databases in general. Note that there are a number of different NoSQL databases each with their own nuanced behavior, and the table is not accurate for all NoSQL databases - it's just meant to give an idea.
| Characteristic | SQL | NoSQL | YugabyteDB |
| --------------- | ---------------- | ------------------ | ------------------ |
@@ -29,35 +29,35 @@ These can be loosely defined as the high-level concerns when choosing a database
| API | SQL | Various | Fully-relational SQL + Semi-relational SQL |
| Consistency | Strong | Eventual | Strong |
| Transactions | ACID | None | ACID |
-| High write throughput | No | Sometimes | Yes
-| Tunable read latency | No | Yes | Yes
+| High write throughput | No | Sometimes | Yes |
+| Tunable read latency | No | Yes | Yes |
### Operational characteristics
-Operational characteristics can be defined as the runtime concerns that arise when a database is deployed, run, and managed in production. When running a database in production in a cloud-like architecture, a number of operational characteristics become essential. The following table compares the capabilities of YugabyteDB to SQL and NoSQL databases. As with the preceding section, there are a number of NoSQL databases which are different in their own ways and the following table is meant to give a broad idea.
+Operational characteristics are the runtime concerns that arise when a database is deployed, run, and managed in production. When running a database in production in a cloud-like architecture, a number of operational characteristics become essential. The following table compares the capabilities of YugabyteDB to SQL and NoSQL databases. As with the preceding section, there are a number of NoSQL databases which are different in their own ways and the following table is meant to give a broad idea.
-The following table lists some of the important features that YugabyteDB supports, and the API to use to leverage the feature. Note that typically, multiple databases are deployed to achieve the same functionality.
+The following table lists some of the important features that YugabyteDB supports. Note that typically multiple databases are deployed to achieve the same functionality.
| Operational characteristics | SQL | NoSQL | YugabyteDB |
| --------------- | ---------------- | ------------------ | ------------------ |
-| Automatic sharding | No | Sometimes | Yes
-| Linear scalability | No | Yes | Yes
-| Fault tolerance | No - manual setup | Yes - smart client detects failed nodes | Yes - smart client detects failed nodes
-| Data resilience | No | Yes - but rebuilds cause high latencies | Yes - automatic, efficient data rebuilds
-| Geo-distributed | No - manual setup | Sometimes | Yes
-| Low latency reads | No | Yes | Yes
-| Predictable p99 read latency | Yes | No | Yes
-| High data density | No | Sometimes - latencies suffer when densities increase | Yes - predictable latencies at high data densities
-| Tunable reads with timeline consistency | No - manual setup | Sometimes | Yes
-| Read replica support | No - manual setup | No - no asynchronous replication | Yes - synchronous and asynchronous replication options
+| Automatic sharding | No | Sometimes | Yes |
+| Linear scalability | No | Yes | Yes |
+| Fault tolerance | No - manual setup | Yes - smart client detects failed nodes | Yes - smart client detects failed nodes |
+| Data resilience | No | Yes - but rebuilds cause high latencies | Yes - automatic, efficient data rebuilds |
+| Geo-distributed | No - manual setup | Sometimes | Yes |
+| Low latency reads | No | Yes | Yes |
+| Predictable p99 read latency | Yes | No | Yes |
+| High data density | No | Sometimes - latencies suffer when densities increase | Yes - predictable latencies at high data densities |
+| Tunable reads with timeline consistency | No - manual setup | Sometimes | Yes |
+| Read replica support | No - manual setup | No - no asynchronous replication | Yes - synchronous and asynchronous replication options |
## Core features
-Applications and cloud services depend on databases for a variety of built-in features. These can include the ability to perform multi-row transactions, JSON or document support, secondary indexes, automatic data expiration with TTLs, and so on.
+Applications and cloud services depend on databases for a variety of built-in features. These can include the ability to perform multi-row transactions, JSON or document support, secondary indexes, automatic data expiration with time to live (TTL), and so on.
-Here is a table that lists some of the important features that YugabyteDB supports, and which of YugabyteDB's APIs to use in order to achieve these features. Note that typically, multiple databases are deployed in order to achieve these features.
+The following table lists some of the important features that YugabyteDB supports, and which YugabyteDB APIs support these features. Note that typically multiple databases are deployed to achieve these features.
-| Database features | [Yugabyte SQL API](../../../api/ysql/) | [Yugabyte Cloud QL API](../../../api/ycql/) |
+| Database features | [Yugabyte SQL API](../../api/ysql/) | [Yugabyte Cloud QL API](../../api/ycql/) |
| --------------- | ---------------- | ------------------ |
| Multi-row transactions | Yes | Yes |
| Consistent secondary indexes | Yes | Yes |
@@ -65,7 +65,7 @@ Here is a table that lists some of the important features that YugabyteDB suppor
| Secondary Indexes | Yes | Yes |
| Foreign keys | Yes | No |
| JOINs | Yes | No |
-| Automatic data expiry with TTL | No | Yes - table and column level TTL |
+| Automatic data expiry with TTL | No | Yes - table- and column-level TTL |
| Run Apache Spark for AI/ML | No | Yes |
## Linear scalability
diff --git a/docs/content/stable/reference/configuration/all-flags-yb-master.md b/docs/content/stable/reference/configuration/all-flags-yb-master.md
index fff8164ba7dd..e402279d0049 100644
--- a/docs/content/stable/reference/configuration/all-flags-yb-master.md
+++ b/docs/content/stable/reference/configuration/all-flags-yb-master.md
@@ -9,7 +9,7 @@ menu:
type: indexpage
---
-Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/concepts/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation.
+Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation.
For a list of all YB-TServer flags, see [All YB-TServer flags](../all-flags-yb-tserver/).
diff --git a/docs/content/stable/reference/configuration/all-flags-yb-tserver.md b/docs/content/stable/reference/configuration/all-flags-yb-tserver.md
index 758ed5a3d0fb..abd987f40ace 100644
--- a/docs/content/stable/reference/configuration/all-flags-yb-tserver.md
+++ b/docs/content/stable/reference/configuration/all-flags-yb-tserver.md
@@ -9,7 +9,7 @@ menu:
type: indexpage
---
-Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/concepts/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation.
+Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation.
For a list of all YB-Master flags, see [All YB-Master flags](../all-flags-yb-master/).
diff --git a/docs/content/stable/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md b/docs/content/stable/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
index 8348c45585f2..d2e51ffa44c9 100644
--- a/docs/content/stable/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
+++ b/docs/content/stable/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md
@@ -139,7 +139,7 @@ A full copy is done by first backing up the data to external storage, and then r
### YSQL tables
-You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../architecture/docdb-sharding/colocated-tables/#xcluster-and-colocation).
+You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../explore/colocation/#xcluster-and-colocation).
If a [full copy](#full-copy-during-xcluster-setup) is required, the entire database is recreated on the target universe from the current database on the source universe. Be sure to keep the set of tables the same at all times on both the source and target universes in these databases by following the steps in [Manage tables and indexes](../xcluster-replication-ddl/).