From c40fff2860ff61b054152e463c3c0ee7943c47d5 Mon Sep 17 00:00:00 2001 From: Dwight Hodge <79169168+ddhodge@users.noreply.github.com> Date: Fri, 6 Sep 2024 20:39:15 -0400 Subject: [PATCH] copy Develop changes to stable (#23812) --- .../preview/architecture/design-goals.md | 2 +- docs/content/preview/develop/_index.md | 2 +- .../configuration/all-flags-yb-master.md | 2 +- .../configuration/all-flags-yb-tserver.md | 2 +- .../xcluster-replication-setup.md | 2 +- docs/content/stable/api/ycql/type_jsonb.md | 2 +- .../stable/architecture/design-goals.md | 2 +- docs/content/stable/develop/_index.md | 128 +++--- .../common-patterns/timeseries/_index.md | 45 --- .../stable/develop/data-modeling/_index.md | 62 +++ .../common-patterns/_index.md | 16 +- .../common-patterns/keyvalue.md | 7 +- .../common-patterns/timeseries/_index.md | 53 +++ .../common-patterns/timeseries/data-expiry.md | 7 +- .../timeseries/global-ordering.md | 1 + .../timeseries/ordering-by-entity.md | 1 + .../timeseries/partitioning-by-time.md | 7 +- .../develop/data-modeling/hot-shards-ysql.md | 129 ++++++ .../develop/data-modeling/partitioning.md | 45 +++ .../data-modeling/primary-keys-ycql.md | 226 +++++++++++ .../data-modeling/primary-keys-ysql.md | 222 +++++++++++ .../data-modeling/secondary-indexes-ycql.md | 179 +++++++++ .../data-modeling/secondary-indexes-ysql.md | 215 ++++++++++ docs/content/stable/develop/learn/_index.md | 211 +++------- .../develop/learn/data-modeling-ycql.md | 228 ----------- .../develop/learn/data-modeling-ysql.md | 374 ------------------ .../stable/develop/learn/data-types-ycql.md | 208 ---------- .../stable/develop/learn/data-types-ysql.md | 16 - .../{develop/learn => explore}/sql-nosql.md | 46 +-- .../configuration/all-flags-yb-master.md | 2 +- .../configuration/all-flags-yb-tserver.md | 2 +- .../xcluster-replication-setup.md | 2 +- 32 files changed, 1315 insertions(+), 1131 deletions(-) delete mode 100644 docs/content/stable/develop/common-patterns/timeseries/_index.md create mode 100644 docs/content/stable/develop/data-modeling/_index.md rename docs/content/stable/develop/{ => data-modeling}/common-patterns/_index.md (92%) rename docs/content/stable/develop/{ => data-modeling}/common-patterns/keyvalue.md (94%) create mode 100644 docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/data-expiry.md (93%) rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/global-ordering.md (99%) rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/ordering-by-entity.md (99%) rename docs/content/stable/develop/{ => data-modeling}/common-patterns/timeseries/partitioning-by-time.md (95%) create mode 100644 docs/content/stable/develop/data-modeling/hot-shards-ysql.md create mode 100644 docs/content/stable/develop/data-modeling/partitioning.md create mode 100644 docs/content/stable/develop/data-modeling/primary-keys-ycql.md create mode 100644 docs/content/stable/develop/data-modeling/primary-keys-ysql.md create mode 100644 docs/content/stable/develop/data-modeling/secondary-indexes-ycql.md create mode 100644 docs/content/stable/develop/data-modeling/secondary-indexes-ysql.md delete mode 100644 docs/content/stable/develop/learn/data-modeling-ycql.md delete mode 100644 docs/content/stable/develop/learn/data-modeling-ysql.md delete mode 100644 docs/content/stable/develop/learn/data-types-ycql.md delete mode 100644 docs/content/stable/develop/learn/data-types-ysql.md rename docs/content/stable/{develop/learn => explore}/sql-nosql.md (70%) diff --git a/docs/content/preview/architecture/design-goals.md b/docs/content/preview/architecture/design-goals.md index 7d1afa71a34e..2477b434d6e0 100644 --- a/docs/content/preview/architecture/design-goals.md +++ b/docs/content/preview/architecture/design-goals.md @@ -78,7 +78,7 @@ YugabyteDB has been designed with operational simplicity in mind, providing feat ## Heterogeneous workload support -Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](../../tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads. +Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads. ## Transaction isolation levels diff --git a/docs/content/preview/develop/_index.md b/docs/content/preview/develop/_index.md index 790c42a446a8..ee96a1d598fb 100644 --- a/docs/content/preview/develop/_index.md +++ b/docs/content/preview/develop/_index.md @@ -77,5 +77,5 @@ To learn more about how to use browser-based IDEs, see [Cloud-native development Yugabyte provides multiple step-by-step guides for building scalable and fault-tolerant applications with YugabyteDB using your favorite programming language, services, and frameworks, including Kafka, Gen-AI, and more. {{}} -For step-by-step guides for various frameworks, see [Tutorials](../tutorials/). +For step-by-step guides for various frameworks, see [Tutorials](/preview/tutorials/). {{}} diff --git a/docs/content/preview/reference/configuration/all-flags-yb-master.md b/docs/content/preview/reference/configuration/all-flags-yb-master.md index 0d068dc78e3d..04e45d76b20a 100644 --- a/docs/content/preview/reference/configuration/all-flags-yb-master.md +++ b/docs/content/preview/reference/configuration/all-flags-yb-master.md @@ -9,7 +9,7 @@ menu: type: indexpage --- -Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/concepts/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation. +Use the yb-master binary and its flags to configure the [YB-Master](../../../architecture/yb-master/) server. The yb-master executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-master, refer to the [yb-master](../yb-master/) documentation. For a list of all YB-TServer flags, see [All YB-TServer flags](../all-flags-yb-tserver/). diff --git a/docs/content/preview/reference/configuration/all-flags-yb-tserver.md b/docs/content/preview/reference/configuration/all-flags-yb-tserver.md index 976296932cd6..50a8750f05d0 100644 --- a/docs/content/preview/reference/configuration/all-flags-yb-tserver.md +++ b/docs/content/preview/reference/configuration/all-flags-yb-tserver.md @@ -9,7 +9,7 @@ menu: type: indexpage --- -Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/concepts/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation. +Use the yb-tserver binary and its flags to configure the [YB-TServer](../../../architecture/yb-tserver/) server. The yb-tserver executable file is located in the `bin` directory of YugabyteDB home. For more detailed information about flags commonly used for production deployments and instructions on using yb-tserver, refer to the [yb-tserver](../yb-tserver/) documentation. For a list of all YB-Master flags, see [All YB-Master flags](../all-flags-yb-master/). diff --git a/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md b/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md index 308478e174f9..f792f7433579 100644 --- a/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md +++ b/docs/content/preview/yugabyte-platform/manage-deployments/xcluster-replication/xcluster-replication-setup.md @@ -139,7 +139,7 @@ A full copy is done by first backing up the data to external storage, and then r ### YSQL tables -You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../architecture/docdb-sharding/colocated-tables/#xcluster-and-colocation). +You can add databases containing colocated tables to the xCluster configuration as long as the underlying database is v2.18.1.0 or later. Colocated tables on the source and target should be created with the same colocation ID if they already exist on both the source and target prior to setup. Refer to [xCluster and colocation](../../../../explore/colocation/#xcluster-and-colocation). If a [full copy](#full-copy-during-xcluster-setup) is required, the entire database is recreated on the target universe from the current database on the source universe. Be sure to keep the set of tables the same at all times on both the source and target universes in these databases by following the steps in [Manage tables and indexes](../xcluster-replication-ddl/). diff --git a/docs/content/stable/api/ycql/type_jsonb.md b/docs/content/stable/api/ycql/type_jsonb.md index 41eb97ab5d4b..e594d484c843 100644 --- a/docs/content/stable/api/ycql/type_jsonb.md +++ b/docs/content/stable/api/ycql/type_jsonb.md @@ -241,6 +241,6 @@ Note that JSONB upsert only works for JSON objects and not for other data types ## See also -- [Explore JSON documents](../../../develop/learn/data-types-ycql/#jsonb) +- [Explore JSON documents](../../../explore/ycql-language/jsonb-ycql) - [Data types](..#data-types) - [Secondary indexes with JSONB](../../../explore/ycql-language/indexes-constraints/secondary-indexes-with-jsonb-ycql/) diff --git a/docs/content/stable/architecture/design-goals.md b/docs/content/stable/architecture/design-goals.md index cb3463306bd3..777b73cf809f 100644 --- a/docs/content/stable/architecture/design-goals.md +++ b/docs/content/stable/architecture/design-goals.md @@ -76,7 +76,7 @@ YugabyteDB has been designed with operational simplicity in mind, providing feat ## Heterogeneous workload support -Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads. +Depending on the use case, the database may need to support diverse workloads, such as [transactional processing](../../benchmark/tpcc/), [analytical queries](../../sample-data/retail-analytics/), [real-time data ingestion](/preview/tutorials/azure/azure-event-hubs/), [time-series](../../develop/data-modeling/common-patterns/timeseries/), and [key-value](../../benchmark/key-value-workload-ycql/) workloads. ## Transaction isolation levels diff --git a/docs/content/stable/develop/_index.md b/docs/content/stable/develop/_index.md index 6b10512223dc..ee96a1d598fb 100644 --- a/docs/content/stable/develop/_index.md +++ b/docs/content/stable/develop/_index.md @@ -8,60 +8,74 @@ image: /images/section_icons/index/develop.png type: indexpage --- -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} +## Data modeling + +Although YugabyteDB is fully SQL compatible, modeling data for a distributed database is quite different from modeling for a monolithic database like MySQL or PostgreSQL. This is because the table data is distributed across different nodes. You must understand how to model your data for efficient storage and retrieval from a distributed system. + +{{}} +To understand how to model your data for YugabyteDB, see [Distributed data modeling](./data-modeling/). +{{}} + +## Global applications + +Today's applications have to cater to users distributed across the globe. Running applications across multiple data centers while providing the best user experience is no trivial task. Yugabyte provides some battle-tested design patterns for your global applications. + +{{}} +To learn more about building global applications, see [Build global applications](./build-global-apps/). +{{}} + +## Multi-cloud applications + +A multi-cloud strategy provides the flexibility to use the optimal computing environment for each specific workload, helps avoid vendor lock-in, lets you place data close to the users, and can minimize cost by choosing optimal pricing and performance of various cloud providers. You can also opt for a hybrid model as your path to migration onto the cloud. + +{{}} +To understand how to build a multi-cloud setup with YugabyteDB, see [Build multi-cloud applications](./multi-cloud/). +{{}} + +## Application development + +Although building scalable applications on top of YugabyteDB is straightforward, you need to understand certain fundamental concepts like transactions, search, and more to make the best use of them. + +{{}} +To learn how to build applications on top of YugabyteDB, see [Learn app development](./learn/). +{{}} + +## Best practices + +Use these best practices to build distributed applications on top of YugabyteDB; this includes a list of techniques that you can adopt to make your application perform its best. + +{{}} +For more details, see [Best practices](./best-practices-ysql). +{{}} + +## Drivers and ORMs + +To communicate with YugabyteDB, applications need to use drivers. Applications can also be built using Object-Relational mappings, a technique used to communicate with the database using object-oriented techniques. We've tested various drivers and ORMs in multiple languages with the optimal configurations to get your applications up and running. + +{{}} +For the list of drivers and ORMs with sample code, see [Drivers and ORMs](../drivers-orms/). +{{}} + +## Quality of service + +Although YugabyteDB can scale horizontally when needed, it also includes safety measures and settings such as rate-limiting, admission control, transaction priorities, and more, to ensure applications can maintain a high quality of service for all users when the systems comes under heavy load. + +{{}} +To learn more about how to use rate-limiting and other features, see [Quality of service](./quality-of-service/). +{{}} + +## Cloud-native development + +Cloud-native development refers to building and running applications that fully exploit the advantages of cloud computing without needing to install any software on your development machine. Two prominent tools for cloud-native development environments are Gitpod and GitHub Codespaces. Both provide cloud-based development environments, but they have their own features and use cases. + +{{}} +To learn more about how to use browser-based IDEs, see [Cloud-native development](./gitdev/). +{{}} + +## Tutorials + +Yugabyte provides multiple step-by-step guides for building scalable and fault-tolerant applications with YugabyteDB using your favorite programming language, services, and frameworks, including Kafka, Gen-AI, and more. + +{{}} +For step-by-step guides for various frameworks, see [Tutorials](/preview/tutorials/). +{{}} diff --git a/docs/content/stable/develop/common-patterns/timeseries/_index.md b/docs/content/stable/develop/common-patterns/timeseries/_index.md deleted file mode 100644 index d9f6f09a6538..000000000000 --- a/docs/content/stable/develop/common-patterns/timeseries/_index.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Time series data model -headerTitle: Time series data model -linkTitle: Time series -description: Explore the Time series data model -headcontent: Handle large amounts of data ordered by time -menu: - stable: - identifier: common-patterns-timeseries - parent: common-patterns - weight: 100 -type: indexpage ---- - -Time series data are measurements or events that are tracked and monitored over time. This could be server metrics, application performance monitoring, network data, sensor data, events, clicks, trades in a market, and many other types of analytics data. A time series data model is designed specifically for handling large amounts of data that are ordered by time. - -Although YugabyteDB is hash sharded by default, it also supports range sharding, where the data is ordered and split at specific boundaries. - -A time series pattern works best for range queries where you need to look up items in a given time range. - -You can use the following common patterns to store and retrieve time series data in YugabyteDB in both distributed and ordered manner: - -- **Global ordering by time** - - In this pattern, all your data is ordered by time across different tablets. - - To understand how to efficiently store and retrieve data in this pattern, see [Global ordering by time](./global-ordering). - -- **Ordering by time per entity** - - In this pattern, the data is ordered by time in a specific entity. - - To understand how to distribute the entities effectively and avoid hot shards, see [Ordering by time per entity](./ordering-by-entity). - -- **Automatic data expiration** - - In some scenarios, you don't want data lying around for a long time as they may not be needed or you have rules in your organization that you cannot store specific data longer than a particular duration. For such cases, you can set a time-to-live value on rows, columns, and the table itself. - - For more details, see [Automatic data expiration](./data-expiry). - -- **Partitioning** - - When you have a lot of data that needs to deleted regularly, you can opt to partition your data. This also has speed advantages in some cases. - - For more details, see [Partitioning by time](./partitioning-by-time). diff --git a/docs/content/stable/develop/data-modeling/_index.md b/docs/content/stable/develop/data-modeling/_index.md new file mode 100644 index 000000000000..d480c150fb68 --- /dev/null +++ b/docs/content/stable/develop/data-modeling/_index.md @@ -0,0 +1,62 @@ +--- +title: Distributed Data modeling +linkTitle: Data modeling +description: Learn to develop YugabyteDB applications +image: fa-sharp fa-light fa-objects-column +menu: + stable: + identifier: data-modeling + parent: develop + weight: 100 +type: indexpage +--- + +Data modeling is the process of defining the structure, organization, and relationships of data in a database. In a distributed SQL database, this process becomes even more crucial due to the complexities introduced by data distribution, replication, and consistency. To fully leverage the benefits offered by YugabyteDB, you need to approach data modeling with a distributed mindset. Data modeling for distributed SQL databases requires a careful balance of theoretical principles and practical considerations. + +## Organization + +In YugabyteDB, data is stored as rows and columns in tables; tables are organized under schemas and databases. + +{{}} +To understand how to create and manage tables, schemas, and databases, see [Schemas and tables](../../explore/ysql-language-features/databases-schemas-tables). +{{}} + +## Sharding + +In YugabyteDB, table data is split into tablets, and distributed across multiple nodes in the cluster. Applications can connect to any node for storing and retrieving data. Because reads and writes can span multiple nodes, it's crucial to consider how table data is sharded and distributed when modeling your data. To design your tables and indexes for fast retrieval and storage in YugabyteDB, you first need to understand the [data distribution](../../explore/going-beyond-sql/data-sharding) schemes: Hash and Range sharding. + +{{}} +To learn more about data distribution schemes, see [Configurable data sharding](../../explore/going-beyond-sql/data-sharding). +{{}} + +## Primary keys + +The primary key is the unique identifier for each row in the table. The distribution and ordering of table data depends on the primary key. + +{{}} +To design optimal primary keys for your tables, see [Primary keys](./primary-keys-ysql). +{{}} + +## Secondary indexes + +Indexes provide alternate access patterns for queries not involving the primary key of the table. With the help of an index, you can improve the access operations of your queries. + +{{}} +To design optimal indexes for faster lookup, see [Secondary indexes](./secondary-indexes-ysql). +{{}} + +## Hot shards + +In distributed systems, a hot-spot or hot-shard refers to a node that is overloaded with queries due to disproportionate traffic compared to other nodes in the cluster. + +{{}} +To understand the hot-shard problem and solutions to overcome the issue, see [Hot shards](./hot-shards-ysql). +{{}} + +## Table partitioning + +When the data in tables keep growing, you can partition the tables for better performance and enhanced data management. Partitioning also makes it easier to drop older data by dropping partitions. In YugabyteDB, you can also use partitioning with Tablespaces to improve latency in multi-region scenarios and adhere to data residency laws like GDPR. + +{{}} +To understand partitioning in YugabyteDB, see [Table partitioning](./partitioning). +{{}} diff --git a/docs/content/stable/develop/common-patterns/_index.md b/docs/content/stable/develop/data-modeling/common-patterns/_index.md similarity index 92% rename from docs/content/stable/develop/common-patterns/_index.md rename to docs/content/stable/develop/data-modeling/common-patterns/_index.md index fdc69cb30b26..1cb61f861a79 100644 --- a/docs/content/stable/develop/common-patterns/_index.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/_index.md @@ -8,8 +8,8 @@ image: /images/section_icons/architecture/distributed_acid.png menu: stable: identifier: common-patterns - parent: develop - weight: 400 + parent: data-modeling + weight: 600 type: indexpage showRightNav: true --- @@ -35,7 +35,9 @@ A good example would be the speed sensor in a car that tracks the speed of a car An insurance company could use the data to investigate accidents or an automobile company could track various sensors and improve the performance of the car. This could amount to billions of data points. +{{}} For more information on storing and retrieving such vast amounts of ordered data in YugabyteDB, see [Time series data model](./timeseries). +{{}} ## Key-value @@ -50,13 +52,17 @@ user2.name = "Harry Potter" user2.country = "UK" ``` -Key-value stores are expected to be some of the fastest storage data models. For more information on using YugabyteDB for key-value stores, see [Key-value data model](./keyvalue). +Key-value stores are expected to be some of the fastest storage data models. + +{{}} +For more information on using YugabyteDB for key-value stores, see [Key-value data model](./keyvalue). +{{}} ## Wide-column In a wide-column data model, the data is organized as rows and columns. Each row is identified by a row `id` or `name` and each column is identified by a column `id` or `name`. Each row can have any number of columns attached to it. You can visualize it as a table-like structure where some of the cells are empty. For example: -```sql{.nocopy} +```caddyfile{.nocopy} | | col-1 | col-2 | col-3 | | ----- | ----- | ----- | ----- | | row-1 | a | | c | @@ -66,7 +72,7 @@ In a wide-column data model, the data is organized as rows and columns. Each row To retrieve specific cells, you can issue commands similar to the following: -```sql{.nocopy} +```python{.nocopy} get(row-1, col-3) ==> c get(row-3, col-2) ==> NULL ``` diff --git a/docs/content/stable/develop/common-patterns/keyvalue.md b/docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md similarity index 94% rename from docs/content/stable/develop/common-patterns/keyvalue.md rename to docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md index 0139def2b3c1..a99aff285714 100644 --- a/docs/content/stable/develop/common-patterns/keyvalue.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/keyvalue.md @@ -22,8 +22,8 @@ With these three simple functionalities, key-value stores have carved themselves YugabyteDB provides several advantages when used as a key-value store: -- YugabyteDB internally [stores data](../../../architecture/docdb/) as a collection of key-value pairs and therefore automatically excels as a key-value store. -- Being [distributed by design](../../../architecture/transactions/distributed-txns/), YugabyteDB also naturally acts as a distributed key-value store. +- YugabyteDB internally [stores data](../../../../architecture/docdb/data-model/) as a collection of key-value pairs and therefore automatically excels as a key-value store. +- Being [distributed by design](../../../../architecture/transactions/distributed-txns/), YugabyteDB also naturally acts as a distributed key-value store. - YugabyteDB inherently provides consistency of data because of Raft replication, which is typically not guaranteed by other key-value stores. ## Use cases @@ -36,7 +36,6 @@ YugabyteDB provides several advantages when used as a key-value store: 1. **Shopping cart** : A user's shopping cart can be represented as a JSON or Hstore and stored under a key (for example, `user1.cart`). Given the strong consistency and resilience offered by YugabyteDB, the cart information will not be lost even in case of disasters. - ## Simple scenario Consider a scenario where you want to store multiple details related to users like `id`, `name`, `country`. For this, you could adopt a simple key-value schema where each attribute is a separate key, such as the following where the key contains both the `id` and the attribute name while the value is the value of the attribute, like: @@ -234,4 +233,4 @@ you will notice that the `name` attribute has been removed for user `id=1`. ## Learn more - [Hstore](https://www.postgresql.org/docs/11/hstore.html) -- [Json](../../../explore/ysql-language-features/jsonb-ysql/) \ No newline at end of file +- [Json](../../../../explore/ysql-language-features/jsonb-ysql/) \ No newline at end of file diff --git a/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md new file mode 100644 index 000000000000..a70478bc3c06 --- /dev/null +++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/_index.md @@ -0,0 +1,53 @@ +--- +title: Time series data model +headerTitle: Time series data model +linkTitle: Time series +description: Explore the Time series data model +headcontent: Handle large amounts of data ordered by time +menu: + stable: + identifier: common-patterns-timeseries + parent: common-patterns + weight: 100 +type: indexpage +--- + +Time series data are measurements or events that are tracked and monitored over time. This could be server metrics, application performance monitoring, network data, sensor data, events, clicks, trades in a market, and many other types of analytics data. A time series data model is designed specifically for handling large amounts of data that are ordered by time. + +Although YugabyteDB is hash sharded by default, it also supports range sharding, where the data is ordered and split at specific boundaries. + +A time series pattern works best for range queries where you need to look up items in a given time range. + +You can use the following common patterns to store and retrieve time series data in YugabyteDB in both distributed and ordered manner: + +## Global ordering by time + +In this pattern, all your data is ordered by time across different tablets. + +{{}} +To understand how to efficiently store and retrieve data in this pattern, see [Global ordering by time](./global-ordering). +{{}} + +## Ordering by time per entity + +In this pattern, the data is ordered by time in a specific entity. + +{{}} +To understand how to distribute the entities effectively and avoid hot shards, see [Ordering by time per entity](./ordering-by-entity). +{{}} + +## Automatic data expiration + +In some scenarios, you don't want data lying around for a long time as they may not be needed or you have rules in your organization that you cannot store specific data longer than a particular duration. For such cases, you can set a time-to-live value on rows, columns, and the table itself. + +{{}} +For more details, see [Automatic data expiration](./data-expiry). +{{}} + +## Partitioning + +When you have a lot of data that needs to be deleted regularly, you can opt to partition your data. This also has speed advantages in some cases. + +{{}} +For more details, see [Partitioning by time](./partitioning-by-time). +{{}} diff --git a/docs/content/stable/develop/common-patterns/timeseries/data-expiry.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md similarity index 93% rename from docs/content/stable/develop/common-patterns/timeseries/data-expiry.md rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md index f9de3570ac1c..adc6990f1506 100644 --- a/docs/content/stable/develop/common-patterns/timeseries/data-expiry.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/data-expiry.md @@ -4,6 +4,7 @@ headerTitle: Automatic data expiration linkTitle: Automatic data expiration description: Expire data using the `USING TTL` operator headcontent: Expire data using time-to-live +badges: ycql menu: stable: identifier: timeseries-automatic-expiration @@ -15,7 +16,7 @@ type: docs Consider a scenario where you only need the last few values and the older data is not of any value and can be purged. Typically, this requires setting up a separate background job. Using YugabyteDB however, you can set an expiration value for columns using the `USING TTL` operator. {{}} -TTL-based expiration is only available in [YCQL](../../../../api/ycql/). +TTL-based expiration is only available in [YCQL](/{{}}/api/ycql/). {{}} ## Setup @@ -99,8 +100,8 @@ Note that the row is present but the value for the `speed` column is `null`. Instead of explicitly setting the TTL at the row or column level, you can set a TTL on the table. This also has the benefit of saving space as the TTL value is stored in only one place and not per row or column. -Define table-level TTL using the [default_time_to_live property](../../../../api/ycql/ddl_create_table/#table-properties-1). +Define table-level TTL using the [default_time_to_live property](/{{}}/api/ycql/ddl_create_table/#table-properties-1). ## Learn more -- [TTL for data expiration](../../../learn/ttl-data-expiration-ycql/) \ No newline at end of file +- [TTL for data expiration](/{{}}/develop/learn/ttl-data-expiration-ycql/) \ No newline at end of file diff --git a/docs/content/stable/develop/common-patterns/timeseries/global-ordering.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md similarity index 99% rename from docs/content/stable/develop/common-patterns/timeseries/global-ordering.md rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md index 8a6e897d4203..92dfa290ff78 100644 --- a/docs/content/stable/develop/common-patterns/timeseries/global-ordering.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/global-ordering.md @@ -4,6 +4,7 @@ headerTitle: Global ordering by time linkTitle: Global ordering by time description: Distribute your time-ordered data and retrieve data efficiently headcontent: Distribute time-ordered data and retrieve data efficiently +badges: ysql menu: stable: identifier: timeseries-global-ordering diff --git a/docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md similarity index 99% rename from docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md index 7dafc460e358..784617b00660 100644 --- a/docs/content/stable/develop/common-patterns/timeseries/ordering-by-entity.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/ordering-by-entity.md @@ -4,6 +4,7 @@ headerTitle: Order by time per entity linkTitle: Ordering by time per entity description: Keep entity data together using Entity-wise or Bucket-based ordering headcontent: Keep entity data together in a time series data model +badges: ysql menu: stable: identifier: timeseries-entity-ordering diff --git a/docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md similarity index 95% rename from docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md rename to docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md index 7b0d2b36070d..7a1b479b290a 100644 --- a/docs/content/stable/develop/common-patterns/timeseries/partitioning-by-time.md +++ b/docs/content/stable/develop/data-modeling/common-patterns/timeseries/partitioning-by-time.md @@ -4,6 +4,9 @@ headerTitle: Partition data by time linkTitle: Partition data by time description: Partition data for efficient data management headcontent: Partition data for efficient data management +badges: ysql +aliases: + - /preview/common-patterns/timeseries/partitioning-by-time/ menu: stable: identifier: timeseries-partition-by-time @@ -17,7 +20,7 @@ Partitioning refers to splitting what is logically one large table into smaller The following example describes the advantages of partitions in more detail. {{}} -Partitioning is only available in [YSQL](../../../../api/ysql/). +Partitioning is only available in [YSQL](/{{}}/api/ysql/). {{}} ## Setup @@ -140,4 +143,4 @@ Time: 103.214 ms ## Learn more -- [Table partitioning](../../../../explore/ysql-language-features/advanced-features/partitions/) +- [Table partitioning](/{{}}/explore/ysql-language-features/advanced-features/partitions/) diff --git a/docs/content/stable/develop/data-modeling/hot-shards-ysql.md b/docs/content/stable/develop/data-modeling/hot-shards-ysql.md new file mode 100644 index 000000000000..c0bc7c59b920 --- /dev/null +++ b/docs/content/stable/develop/data-modeling/hot-shards-ysql.md @@ -0,0 +1,129 @@ +--- +title: Avoiding hotspots in YugabyteDB +headertitle: Avoiding hotspots +linkTitle: Hot shards +badges: ysql +menu: + stable: + identifier: data-modeling-hot-shard + parent: data-modeling + weight: 300 +type: docs +--- + +A hot shard is a common problem in data retrieval where a specific node becomes a performance bottleneck due to disproportionately high traffic or workload compared to other nodes in the system. This imbalance can lead to various issues, such as degraded system performance, increased latency, and potential system failures. + +This typically happens because of mismatches between query pattern and data distribution. You should be careful when choosing a primary key in the schema design to not accidentally create hotspots in your database. + +{{}} +The hot shard issue can occur both for tables and indexes. +{{}} + +Let us understand the problem and the solution to this via some examples. + + +{{