From a28daa05217b37f87d546266f81ae5c1d95fbbd0 Mon Sep 17 00:00:00 2001 From: yangj1211 <153493538+yangj1211@users.noreply.github.com> Date: Tue, 30 Apr 2024 11:45:33 +0800 Subject: [PATCH] update doc about Vector (#577) * update doc about vector * update doc about vector * fix --- .../{1.1-vector.md => vector.md} | 84 +++++++++++++++---- docs/MatrixOne/Maintain/mo_ctl.md | 2 +- .../{1.1-Vector => Vector}/arithmetic.md | 0 .../Vector/cosine_distance.md | 60 +++++++++++++ .../cosine_similarity.md | 0 .../{1.1-Vector => Vector}/inner_product.md | 0 .../{1.1-Vector => Vector}/l1_norm.md | 0 .../Vector/l2_distance.md | 42 ++++++++++ .../{1.1-Vector => Vector}/l2_norm.md | 0 .../{1.1-Vector => Vector}/misc.md | 0 .../Vector/normalize_l2.md | 40 +++++++++ .../{1.1-Vector => Vector}/vector_dims.md | 0 mkdocs.yml | 12 +++ 13 files changed, 224 insertions(+), 16 deletions(-) rename docs/MatrixOne/Develop/schema-design/{1.1-vector.md => vector.md} (74%) rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/arithmetic.md (100%) create mode 100644 docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_distance.md rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/cosine_similarity.md (100%) rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/inner_product.md (100%) rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/l1_norm.md (100%) create mode 100644 docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_distance.md rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/l2_norm.md (100%) rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/misc.md (100%) create mode 100644 docs/MatrixOne/Reference/Functions-and-Operators/Vector/normalize_l2.md rename docs/MatrixOne/Reference/Functions-and-Operators/{1.1-Vector => Vector}/vector_dims.md (100%) diff --git a/docs/MatrixOne/Develop/schema-design/1.1-vector.md b/docs/MatrixOne/Develop/schema-design/vector.md similarity index 74% rename from docs/MatrixOne/Develop/schema-design/1.1-vector.md rename to docs/MatrixOne/Develop/schema-design/vector.md index f20aa0bdd..4362d75ef 100644 --- a/docs/MatrixOne/Develop/schema-design/1.1-vector.md +++ b/docs/MatrixOne/Develop/schema-design/vector.md @@ -105,19 +105,72 @@ CREATE TABLE t1 ( ); -- Insert some sample data -INSERT INTO t1 (id,b) VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[2,1,1]'), (4, '[7,8,9]'), (5, '[0,0,0]'), (6, '[3,1,2]'); +INSERT INTO t1 (id,b) VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[2,1,1]'), (4, '[7,8,9]'), (5, '[2,2,2]'), (6, '[3,1,2]'); + +mysql> select * from t1; ++------+-----------+ +| id | b | ++------+-----------+ +| 1 | [1, 2, 3] | +| 2 | [4, 5, 6] | +| 3 | [2, 1, 1] | +| 4 | [7, 8, 9] | +| 5 | [2, 2, 2] | +| 6 | [3, 1, 2] | ++------+-----------+ +6 rows in set (0.01 sec) -- Top K Queries using l1_distance -SELECT * FROM t1 ORDER BY l1_norm(b - '[3,1,2]') LIMIT 5; +mysql> SELECT * FROM t1 ORDER BY l1_norm(b - '[3,1,2]') LIMIT 5; ++------+-----------+ +| id | b | ++------+-----------+ +| 6 | [3, 1, 2] | +| 5 | [2, 2, 2] | +| 3 | [2, 1, 1] | +| 1 | [1, 2, 3] | +| 2 | [4, 5, 6] | ++------+-----------+ +5 rows in set (0.00 sec) -- Top K Queries using l2_distance -SELECT * FROM t1 ORDER BY l2_norm(b - '[3,1,2]') LIMIT 5; +mysql> SELECT * FROM t1 ORDER BY l2_distance(b,'[3,1,2]') LIMIT 5; ++------+-----------+ +| id | b | ++------+-----------+ +| 6 | [3, 1, 2] | +| 5 | [2, 2, 2] | +| 3 | [2, 1, 1] | +| 1 | [1, 2, 3] | +| 2 | [4, 5, 6] | ++------+-----------+ +5 rows in set (0.00 sec) -- Top K Queries using cosine similarity -SELECT * FROM t1 ORDER BY cosine_similarity(b, '[3,1,2]') LIMIT 5; +mysql> SELECT * FROM t1 ORDER BY cosine_similarity(b, '[3,1,2]') LIMIT 5; ++------+-----------+ +| id | b | ++------+-----------+ +| 1 | [1, 2, 3] | +| 2 | [4, 5, 6] | +| 4 | [7, 8, 9] | +| 5 | [2, 2, 2] | +| 3 | [2, 1, 1] | ++------+-----------+ +5 rows in set (0.00 sec) -- Top K Queries using cosine distance -SELECT * FROM t1 ORDER BY 1 - cosine_similarity(b, '[3,1,2]') LIMIT 5; +mysql> SELECT * FROM t1 ORDER BY cosine_distance(b, '[3,1,2]') LIMIT 5; ++------+-----------+ +| id | b | ++------+-----------+ +| 6 | [3, 1, 2] | +| 3 | [2, 1, 1] | +| 5 | [2, 2, 2] | +| 4 | [7, 8, 9] | +| 2 | [4, 5, 6] | ++------+-----------+ +5 rows in set (0.00 sec) ``` These queries demonstrate retrieving the top 5 vectors most similar to the given vector `[3,1,2]` using different distance and similarity measures. With these queries, you can find the data that best matches your target vector based on different measurement criteria. @@ -157,18 +210,19 @@ These queries demonstrate retrieving the top 5 vectors most similar to the given - Currently, MatrixOne Vector type supports float32 and float64 types. - Vector cannot be Primary Key or Unique Key. -- Vector maximum dimension is 65536. - -Certainly, let's refine the original text in English: +- Vector maximum dimension is 65535. ## Reference For more documentation on vector functions, see: -- [inner_product()](../../Reference/Functions-and-Operators/1.1-Vector/inner_product.md) -- [l1_norm()](../../Reference/Functions-and-Operators/1.1-Vector/l1_norm.md) -- [l2_norm()](../../Reference/Functions-and-Operators/1.1-Vector/l2_norm.md) -- [cosine_similarity()](../../Reference/Functions-and-Operators/1.1-Vector/cosine_similarity.md) -- [vector_dims()](../../Reference/Functions-and-Operators/1.1-Vector/vector_dims.md) -- [Arithemetic Operators](../../Reference/Functions-and-Operators/1.1-Vector/arithmetic.md) -- [Misc Functions](../../Reference/Functions-and-Operators/1.1-Vector/misc.md) +- [inner_product()](../../Reference/Functions-and-Operators/Vector/inner_product.md) +- [l1_norm()](../../Reference/Functions-and-Operators/Vector/l1_norm.md) +- [l2_norm()](../../Reference/Functions-and-Operators/Vector/l2_norm.md) +- [l2_distance()](../../Reference/Functions-and-Operators/Vector/l2_distance.md) +- [cosine_similarity()](../../Reference/Functions-and-Operators/Vector/cosine_similarity.md) +- [cosine_distance()](../../Reference/Functions-and-Operators/Vector/cosine_distance.md) +- [vector_dims()](../../Reference/Functions-and-Operators/Vector/vector_dims.md) +- [normalize_l2()](../../Reference/Functions-and-Operators/Vector/normalize_l2.md) +- [Arithemetic Operators](../../Reference/Functions-and-Operators/Vector/arithmetic.md) +- [Misc Functions](../../Reference/Functions-and-Operators/Vector/misc.md) diff --git a/docs/MatrixOne/Maintain/mo_ctl.md b/docs/MatrixOne/Maintain/mo_ctl.md index 4502cc902..73ba400a8 100644 --- a/docs/MatrixOne/Maintain/mo_ctl.md +++ b/docs/MatrixOne/Maintain/mo_ctl.md @@ -15,7 +15,7 @@ The operating systems that `mo_ctl` has adapted so far are shown in the table be The current function list of `mo_ctl` is shown in the table below. | Command | Function | -| ------- | -------- || +| ------- | -------- | | `mo_ctl help` | See a list of statements and functions for the `mo_ctl` tool itself | | `mo_ctl precheck` | Check dependencies required for MatrixOne source code installation, namely golang, gcc, git, MySQL Client | | `mo_ctl deploy` | Download and install and compile the corresponding version of MatrixOne; the default is to install the latest stable version | diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/arithmetic.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/arithmetic.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/arithmetic.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/arithmetic.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_distance.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_distance.md new file mode 100644 index 000000000..101c58d88 --- /dev/null +++ b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_distance.md @@ -0,0 +1,60 @@ +# COSINE_DISTANCE() + +## Description + +The `COSINE_DISTANCE()` function is used to calculate the cosine distance between two vectors. + +Cosine Distance is a measure of the directional difference between two vectors, typically defined as 1 minus the cosine similarity ([Cosine Similarity](cosine_similarity.md)). The value of cosine distance ranges from 0 to 2. A value of 0 indicates that the directions of the two vectors are exactly the same (minimum distance). A value of 2 indicates that the directions of the two vectors are exactly opposite (maximum distance). In text analysis, cosine distance can be used to measure the similarity between documents. Since it only considers the direction of the vectors and not their magnitude, it is fair for comparisons between long and short texts. + +
+ +
+ +## Syntax + +``` +> SELECT COSINE_DISTANCE(vector1, vector2) FROM tbl; +``` + +## Examples + +```sql +drop table if exists vec_table; +create table vec_table(a int, b vecf32(3), c vecf64(3)); +insert into vec_table values(1, "[1,2,3]", "[4,5,6]"); +mysql> select * from vec_table; ++------+-----------+-----------+ +| a | b | c | ++------+-----------+-----------+ +| 1 | [1, 2, 3] | [4, 5, 6] | ++------+-----------+-----------+ +1 row in set (0.01 sec) + +mysql> select cosine_distance(b,c) from vec_table; ++-----------------------+ +| cosine_distance(b, c) | ++-----------------------+ +| 0.0253681538029239 | ++-----------------------+ +1 row in set (0.00 sec) + +mysql> select cosine_distance(b,"[1,2,3]") from vec_table; ++-----------------------------+ +| cosine_distance(b, [1,2,3]) | ++-----------------------------+ +| 0 | ++-----------------------------+ +1 row in set (0.00 sec) + +mysql> select cosine_distance(b,"[-1,-2,-3]") from vec_table; ++--------------------------------+ +| cosine_distance(b, [-1,-2,-3]) | ++--------------------------------+ +| 2 | ++--------------------------------+ +1 row in set (0.00 sec) +``` + +## Constraints + +When using the `COSINE_DISTANCE()`, input vectors must not be zero vectors, as this would result in a division by zero, which is undefined in mathematics. In practical applications, we generally consider the cosine similarity between a zero vector and any other vector to be zero, because there is no directional similarity between them. \ No newline at end of file diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/cosine_similarity.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_similarity.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/cosine_similarity.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/cosine_similarity.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/inner_product.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/inner_product.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/inner_product.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/inner_product.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/l1_norm.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/l1_norm.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/l1_norm.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/l1_norm.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_distance.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_distance.md new file mode 100644 index 000000000..f940a427e --- /dev/null +++ b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_distance.md @@ -0,0 +1,42 @@ +# L2_DISTANCE() + +## Description + +The `L2_DISTANCE()` function is used to calculate the Euclidean distance between two vectors. It returns a value of the FLOAT64 type. + +L2 distance, also known as Euclidean distance, is one of the most commonly used methods of measuring distance in vector spaces. It measures the straight-line distance between two points in multidimensional space. L2 distance has many practical applications, including fields such as machine learning, computer vision, and spatial analysis. + +
+ +
+ +## Syntax + +``` +> SELECT L2_DISTANCE(vector, const_vector) FROM tbl; +``` + +## Examples + +```sql +drop table if exists vec_table; +create table vec_table(a int, b vecf32(3), c vecf64(3)); +insert into vec_table values(1, "[1,2,3]", "[4,5,6]"),(2, "[1,1,1]", "[2,2,2]"); +mysql> select * from vec_table; ++------+-----------+-----------+ +| a | b | c | ++------+-----------+-----------+ +| 1 | [1, 2, 3] | [4, 5, 6] | +| 2 | [1, 1, 1] | [2, 2, 2] | ++------+-----------+-----------+ +2 rows in set (0.00 sec) + +mysql> select l2_distance(b,c) from vec_table; ++--------------------+ +| l2_distance(b, c) | ++--------------------+ +| 5.196152422706632 | +| 1.7320508075688772 | ++--------------------+ +2 rows in set (0.00 sec) +``` diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/l2_norm.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_norm.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/l2_norm.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/l2_norm.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/misc.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/misc.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/misc.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/misc.md diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/Vector/normalize_l2.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/normalize_l2.md new file mode 100644 index 000000000..80848b239 --- /dev/null +++ b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/normalize_l2.md @@ -0,0 +1,40 @@ +# NORMALIZE_L2() + +## Description + +The`NORMALIZE_L2()` function performs Euclidean normalization (L2 normalization) on a vector. + +The L2 norm is the square root of the sum of the squares of the vector's elements. Therefore, the purpose of L2 normalization is to make the length (or norm) of the vector equal to 1, which is often referred to as a unit vector. This method of normalization is particularly useful in machine learning, especially when dealing with feature vectors. It can help standardize the scale of features, thereby improving the performance of the algorithm. + +
+ +
+ +## Syntax + +``` +> SELECT NORMALIZE_L2(vector_column) FROM tbl; +``` + +## Examples + +```sql +drop table if exists vec_table; +create table vec_table(a int, b vecf32(3), c vecf64(3)); +insert into vec_table values(1, "[1,2,3]", "[4,5,6]"); +mysql> select * from vec_table; ++------+-----------+-----------+ +| a | b | c | ++------+-----------+-----------+ +| 1 | [1, 2, 3] | [4, 5, 6] | ++------+-----------+-----------+ +1 row in set (0.00 sec) + +mysql> select normalize_l2(b) from vec_table; ++-------------------------------------+ +| normalize_l2(b) | ++-------------------------------------+ +| [0.26726124, 0.5345225, 0.80178374] | ++-------------------------------------+ +1 row in set (0.00 sec) +``` diff --git a/docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/vector_dims.md b/docs/MatrixOne/Reference/Functions-and-Operators/Vector/vector_dims.md similarity index 100% rename from docs/MatrixOne/Reference/Functions-and-Operators/1.1-Vector/vector_dims.md rename to docs/MatrixOne/Reference/Functions-and-Operators/Vector/vector_dims.md diff --git a/mkdocs.yml b/mkdocs.yml index 4b4c52462..187f16f82 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,6 +95,7 @@ nav: - Create View: MatrixOne/Develop/schema-design/create-view.md - Create Temporary Table: MatrixOne/Develop/schema-design/create-temporary-table.md - Create Secondary Index: MatrixOne/Develop/schema-design/create-secondary-index.md + - Vector: MatrixOne/Develop/schema-design/vector.md - Data Integrity: - Data Integrity Constraints Overview: MatrixOne/Develop/schema-design/data-integrity/overview-of-integrity-constraint-types.md - NOT NULL Constraints: MatrixOne/Develop/schema-design/data-integrity/not-null-constraints.md @@ -513,6 +514,17 @@ nav: - REGEXP_SUBSTR(): MatrixOne/Reference/Functions-and-Operators/String/Regular-Expressions/regexp-substr.md - Table: - UNNEST(): MatrixOne/Reference/Functions-and-Operators/Table/unnest.md + - Vector: + - Arithemetic Operators: MatrixOne/Reference/Functions-and-Operators/Vector/arithmetic.md + - COSINE_SIMILARITY(): MatrixOne/Reference/Functions-and-Operators/Vector/cosine_similarity.md + - COSINE_DISTANCE(): MatrixOne/Reference/Functions-and-Operators/Vector/cosine_distance.md + - INNER_PRODUCT(): MatrixOne/Reference/Functions-and-Operators/Vector/inner_product.md + - L1_NORM(): MatrixOne/Reference/Functions-and-Operators/Vector/l1_norm.md + - L2_NORM(): MatrixOne/Reference/Functions-and-Operators/Vector/l2_norm.md + - L2_DISTANCE(): MatrixOne/Reference/Functions-and-Operators/Vector/l2_distance.md + - Misc Function: MatrixOne/Reference/Functions-and-Operators/Vector/misc.md + - VECTOR_DIMS(): MatrixOne/Reference/Functions-and-Operators/Vector/vector_dims.md + - NORMALIZE_L2(): MatrixOne/Reference/Functions-and-Operators/Vector/normalize_l2.md - Window Functions: - DENSE_RANK(): MatrixOne/Reference/Functions-and-Operators/Window-Functions/dense_rank.md - RANK(): MatrixOne/Reference/Functions-and-Operators/Window-Functions/rank.md