diff --git a/docs-2.0/3.ngql-guide/15.full-text-index-statements/1.search-with-text-based-index.md b/docs-2.0/3.ngql-guide/15.full-text-index-statements/1.search-with-text-based-index.md index 42925a0d012..4be371d4e7f 100644 --- a/docs-2.0/3.ngql-guide/15.full-text-index-statements/1.search-with-text-based-index.md +++ b/docs-2.0/3.ngql-guide/15.full-text-index-statements/1.search-with-text-based-index.md @@ -12,18 +12,21 @@ Before using the full-text index, make sure that you have deployed a Elasticsear Before using the full-text index, make sure that you know the [restrictions](../../4.deployment-and-installation/6.deploy-text-based-index/1.text-based-index-restrictions.md). -## Natural language full-text search +## Full Text Queries -A natural language search interprets the search string as a phrase in natural human language. The search is case-sensitive and by default prefixes the string with a match. For example, there are three vertices with the tag `player`. The tag `player` contains the property `name`. The `name` of these three vertices are `Kevin Durant`, `Tim Duncan`, and `David Beckham`. Now that the full-text index of `player.name` is established, only `David Beckham` will be queried when using the prefix search statement `LOOKUP ON player WHERE PREFIX(player.name,"D");`. +Full-text queries enable you to search for parsed text fields, using a parser with strict syntax to return content based on the query string provided. For details, see [Query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-dsl-query-string-query). ## Syntax ### Create full-text indexes ```ngql -CREATE FULLTEXT {TAG | EDGE} INDEX ON { | } ([]); +CREATE FULLTEXT {TAG | EDGE} INDEX ON { | } ( [,]...) [ANALYZER=""]; ``` +- Composite indexes with multiple properties are supported when creating full-text indexes. +- `` is the name of the analyzer. The default value is `standard`. To use other analyzers (e.g. [IK Analysis](https://github.com/medcl/elasticsearch-analysis-ik)), you need to make sure that the corresponding analyzer is installed in Elasticsearch in advance. + ### Show full-text indexes ```ngql @@ -48,30 +51,17 @@ DROP FULLTEXT INDEX ; ### Use query options ```ngql -LOOKUP ON { | } WHERE [YIELD ]; - - ::= - PREFIX | WILDCARD | REGEXP | FUZZY +LOOKUP ON { | } WHERE ES_QUERY(, "") YIELD [| LIMIT [,] ]; - [AS ] [, [AS ] ...] + [AS ] [, [AS ] ...] [, id(vertex) [AS ]] [, score() AS ] ``` -- PREFIX(schema_name.prop_name, prefix_string, row_limit, timeout) - -- WILDCARD(schema_name.prop_name, wildcard_string, row_limit, timeout) - -- REGEXP(schema_name.prop_name, regexp_string, row_limit, timeout) - -- FUZZY(schema_name.prop_name, fuzzy_string, fuzziness, operator, row_limit, timeout) - - - `fuzziness` (optional): Maximum edit distance allowed for matching. The default value is `AUTO`. For other valid values and more information, see [Elasticsearch document](https://www.elastic.co/guide/en/elasticsearch/reference/6.8/common-options.html#fuzziness). - - - `operator` (optional): Boolean logic used to interpret the text. Valid values are `OR` (default) and `AND`. +- `index_name`: The name of the full-text index. -- `row_limit` (optional): Specifies the number of rows to return. The default value is `100`. +- `text`: Search conditions. For supported syntax, see [Query string syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax). -- `timeout` (optional): Specifies the timeout time. The default value is `200ms`. +- `score()`: The score calculated by doing N degree expansion for the eligible vertices. The default value is `1.0`. The higher the score, the higher the degree of match. The return value is sorted by default from highest to lowest score. For details, see [Search and Scoring in Lucene](https://lucene.apache.org/core/9_6_0/core/org/apache/lucene/search/package-summary.html#package.description). ## Examples @@ -80,99 +70,166 @@ LOOKUP ON { | } WHERE [YIELD ]; nebula> CREATE SPACE IF NOT EXISTS basketballplayer (partition_num=3,replica_factor=1, vid_type=fixed_string(30)); // This example signs in the text service. -nebula> SIGN IN TEXT SERVICE (127.0.0.1:9200, HTTP); +nebula> SIGN IN TEXT SERVICE (192.168.8.100:9200, HTTP); // This example checks the text service status. nebula> SHOW TEXT SEARCH CLIENTS; ++-----------------+-----------------+------+ +| Type | Host | Port | ++-----------------+-----------------+------+ +| "ELASTICSEARCH" | "192.168.8.100" | 9200 | ++-----------------+-----------------+------+ // This example switches the graph space. nebula> USE basketballplayer; // This example adds the listener to the NebulaGraph cluster. -nebula> ADD LISTENER ELASTICSEARCH 192.168.8.5:9789; +nebula> ADD LISTENER ELASTICSEARCH 192.168.8.100:9789; // This example checks the listener status. When the status is `Online`, the listener is ready. nebula> SHOW LISTENER; ++--------+-----------------+------------------------+-------------+ +| PartId | Type | Host | Host Status | ++--------+-----------------+------------------------+-------------+ +| 1 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | +| 2 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | +| 3 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | ++--------+-----------------+------------------------+-------------+ // This example creates the tag. -nebula> CREATE TAG IF NOT EXISTS player(name string, age int); +nebula> CREATE TAG IF NOT EXISTS player(name string, city string); -// This example creates the full-text index. The index name starts with "nebula_". -nebula> CREATE FULLTEXT TAG INDEX nebula_index_1 ON player(name); +// This example creates a single-attribute full-text index. +nebula> CREATE FULLTEXT TAG INDEX fulltext_index_1 ON player(name) ANALYZER="standard"; + +// This example creates a multi-attribute full-text indexe. +nebula> CREATE FULLTEXT TAG INDEX fulltext_index_2 ON player(name,city) ANALYZER="standard"; // This example rebuilds the full-text index. nebula> REBUILD FULLTEXT INDEX; // This example shows the full-text index. nebula> SHOW FULLTEXT INDEXES; -+------------------+-------------+-------------+--------+ -| Name | Schema Type | Schema Name | Fields | -+------------------+-------------+-------------+--------+ -| "nebula_index_1" | "Tag" | "player" | "name" | -+------------------+-------------+-------------+--------+ ++--------------------+-------------+-------------+--------------+------------+ +| Name | Schema Type | Schema Name | Fields | Analyzer | ++--------------------+-------------+-------------+--------------+------------+ +| "fulltext_index_1" | "Tag" | "player" | "name" | "standard" | +| "fulltext_index_2" | "Tag" | "player" | "name, city" | "standard" | ++--------------------+-------------+-------------+--------------+------------+ // This example inserts the test data. -nebula> INSERT VERTEX player(name, age) VALUES \ - "Russell Westbrook": ("Russell Westbrook", 30), \ - "Chris Paul": ("Chris Paul", 33),\ - "Boris Diaw": ("Boris Diaw", 36),\ - "David West": ("David West", 38),\ - "Danny Green": ("Danny Green", 31),\ - "Tim Duncan": ("Tim Duncan", 42),\ - "James Harden": ("James Harden", 29),\ - "Tony Parker": ("Tony Parker", 36),\ - "Aron Baynes": ("Aron Baynes", 32),\ - "Ben Simmons": ("Ben Simmons", 22),\ - "Blake Griffin": ("Blake Griffin", 30); +nebula> INSERT VERTEX player(name, city) VALUES \ + "Russell Westbrook": ("Russell Westbrook", "Los Angeles"), \ + "Chris Paul": ("Chris Paul", "Houston"),\ + "Boris Diaw": ("Boris Diaw", "Houston"),\ + "David West": ("David West", "Philadelphia"),\ + "Danny Green": ("Danny Green", "Philadelphia"),\ + "Tim Duncan": ("Tim Duncan", "New York"),\ + "James Harden": ("James Harden", "New York"),\ + "Tony Parker": ("Tony Parker", "Chicago"),\ + "Aron Baynes": ("Aron Baynes", "Chicago"),\ + "Ben Simmons": ("Ben Simmons", "Phoenix"),\ + "Blake Griffin": ("Blake Griffin", "Phoenix"); // These examples run test queries. -nebula> LOOKUP ON player WHERE PREFIX(player.name, "B") YIELD id(vertex); +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"Chris") YIELD id(vertex); ++--------------+ +| id(VERTEX) | ++--------------+ +| "Chris Paul" | ++--------------+ + +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"Harden") YIELD properties(vertex); ++----------------------------------------------------------------+ +| properties(VERTEX) | ++----------------------------------------------------------------+ +| {_vid: "James Harden", city: "New York", name: "James Harden"} | ++----------------------------------------------------------------+ + +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"Da*") YIELD properties(vertex); ++------------------------------------------------------------------+ +| properties(VERTEX) | ++------------------------------------------------------------------+ +| {_vid: "David West", city: "Philadelphia", name: "David West"} | +| {_vid: "Danny Green", city: "Philadelphia", name: "Danny Green"} | ++------------------------------------------------------------------+ + +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"*b*") YIELD id(vertex); ++---------------------+ +| id(VERTEX) | ++---------------------+ +| "Russell Westbrook" | +| "Boris Diaw" | +| "Aron Baynes" | +| "Ben Simmons" | +| "Blake Griffin" | ++---------------------+ + +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"*b*") YIELD id(vertex) | LIMIT 2,3; +-----------------+ | id(VERTEX) | +-----------------+ -| "Boris Diaw" | +| "Aron Baynes" | | "Ben Simmons" | | "Blake Griffin" | +-----------------+ -nebula> LOOKUP ON player WHERE WILDCARD(player.name, "*ri*") YIELD player.name, player.age; -+-----------------+-----+ -| name | age | -+-----------------+-----+ -| "Chris Paul" | 33 | -| "Boris Diaw" | 36 | -| "Blake Griffin" | 30 | -+-----------------+-----+ - -nebula> LOOKUP ON player WHERE WILDCARD(player.name, "*ri*") YIELD player.name, player.age | YIELD count(*); +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"*b*") YIELD id(vertex) | YIELD count(*); +----------+ | count(*) | +----------+ -| 3 | +| 5 | +----------+ -nebula> LOOKUP ON player WHERE REGEXP(player.name, "R.*") YIELD player.name, player.age; -+---------------------+-----+ -| name | age | -+---------------------+-----+ -| "Russell Westbrook" | 30 | -+---------------------+-----+ - -nebula> LOOKUP ON player WHERE REGEXP(player.name, ".*") YIELD id(vertex); -+---------------------+ -| id(VERTEX) | -+---------------------+ -| "Danny Green" | -| "David West" | -... - -nebula> LOOKUP ON player WHERE FUZZY(player.name, "Tim Dunncan", AUTO, OR) YIELD player.name; -+--------------+ -| name | -+--------------+ -| "Tim Duncan" | -+--------------+ - -// This example drops the full-text index. -nebula> DROP FULLTEXT INDEX nebula_index_1; +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"*b*") YIELD id(vertex), score() AS score; ++---------------------+-------+ +| id(VERTEX) | score | ++---------------------+-------+ +| "Russell Westbrook" | 1.0 | +| "Boris Diaw" | 1.0 | +| "Aron Baynes" | 1.0 | +| "Ben Simmons" | 1.0 | +| "Blake Griffin" | 1.0 | ++---------------------+-------+ + +// For documents containing a word `b`, its score will be multiplied by a weighting factor of 4, while for documents containing a word `c`, the default weighting factor of 1 is used. +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_1,"*b*^4 OR *c*") YIELD id(vertex), score() AS score; ++---------------------+-------+ +| id(VERTEX) | score | ++---------------------+-------+ +| "Russell Westbrook" | 4.0 | +| "Boris Diaw" | 4.0 | +| "Aron Baynes" | 4.0 | +| "Ben Simmons" | 4.0 | +| "Blake Griffin" | 4.0 | +| "Chris Paul" | 1.0 | +| "Tim Duncan" | 1.0 | ++---------------------+-------+ + +// When using a multi-attribute full-text index query, the conditions are matched within all properties of the index. +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_2,"*h*") YIELD properties(vertex); ++------------------------------------------------------------------+ +| properties(VERTEX) | ++------------------------------------------------------------------+ +| {_vid: "Chris Paul", city: "Houston", name: "Chris Paul"} | +| {_vid: "Boris Diaw", city: "Houston", name: "Boris Diaw"} | +| {_vid: "David West", city: "Philadelphia", name: "David West"} | +| {_vid: "James Harden", city: "New York", name: "James Harden"} | +| {_vid: "Tony Parker", city: "Chicago", name: "Tony Parker"} | +| {_vid: "Aron Baynes", city: "Chicago", name: "Aron Baynes"} | +| {_vid: "Ben Simmons", city: "Phoenix", name: "Ben Simmons"} | +| {_vid: "Blake Griffin", city: "Phoenix", name: "Blake Griffin"} | +| {_vid: "Danny Green", city: "Philadelphia", name: "Danny Green"} | ++------------------------------------------------------------------+ + +// When using multi-attribute full-text index queries, you can specify different text for different properties for the query. +nebula> LOOKUP ON player WHERE ES_QUERY(fulltext_index_2,"name:*b* AND city:Houston") YIELD properties(vertex); ++-----------------------------------------------------------+ +| properties(VERTEX) | ++-----------------------------------------------------------+ +| {_vid: "Boris Diaw", city: "Houston", name: "Boris Diaw"} | ++-----------------------------------------------------------+ + +// Delete single-attribute full-text index. +nebula> DROP FULLTEXT INDEX fulltext_index_1; ``` diff --git a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/1.text-based-index-restrictions.md b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/1.text-based-index-restrictions.md index cee3d2db252..b108ef5340f 100644 --- a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/1.text-based-index-restrictions.md +++ b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/1.text-based-index-restrictions.md @@ -2,13 +2,14 @@ !!! caution - This topic introduces the restrictions for full-text indexes. Please read the restrictions very carefully before using the full-text indexes. + - This topic introduces the restrictions for full-text indexes. Please read the restrictions very carefully before using the full-text indexes. + - Version 3.5.0 redoes the full-text index function, which is not compatible with the previous versions, and requires deleting the previous index data and rebuilding the index. For now, full-text search has the following limitations: - Currently, full-text search supports `LOOKUP` statements only. -- The full-text index name must starts with `nebula_` and can contain only numbers, lowercase letters, and underscores. +- The full-text index name can contain only numbers, lowercase letters, and underscores. - If there is a full-text index on the tag/edge type, the tag/edge type cannot be deleted or modified. @@ -16,18 +17,16 @@ For now, full-text search has the following limitations: - Full-text index can not be applied to search multiple tags/edge types. -- Sorting for the returned results of the full-text search is not supported. Data is returned in the order of data insertion. - - Full-text index can not search properties with value `NULL`. - Altering Elasticsearch indexes is not supported at this time. -- The pipe operator is not supported. +- Modifying the analyzer is not supported. You have to delete the index data and then specify the analyzer when you rebuild the index. - `WHERE` clauses supports full-text search only working on single terms. - Make sure that you start the Elasticsearch cluster and Nebula Graph at the same time. If not, the data writing on the Elasticsearch cluster can be incomplete. -- It may take a while for Elasticsearch to create indexes. If Nebula Graph warns no index is found, wait for the index to take effect (however, the waiting time is unknown and there is no code to check). +- It may take a while for Elasticsearch to create indexes. If Nebula Graph warns no index is found, you can check the status of the indexing task. - NebulaGraph clusters deployed with K8s do not have native support for the full-text search feature. However, you can manually deploy the feature yourself. diff --git a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/2.deploy-es.md b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/2.deploy-es.md index 6a84b25a6a4..17f3c390803 100644 --- a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/2.deploy-es.md +++ b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/2.deploy-es.md @@ -10,13 +10,9 @@ Before you start using the full-text index, please make sure that you know the [ To deploy an Elasticsearch cluster, see [Kubernetes Elasticsearch deployment](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-deploy-elasticsearch.html) or [Elasticsearch installation](https://www.elastic.co/guide/en/elasticsearch/reference/7.15/targz.html). -!!! compatibility +!!! note - For NebulaGraph 3.4 and later versions, no additional templates need to be created. - -!!! caution - - The full-text index name must starts with `nebula_`. + To support external network access to Elasticsearch, set `network.host` to `0.0.0.0` in `config/elasticsearch.yml`. You can configure the Elasticsearch to meet your business needs. To customize the Elasticsearch, see [Elasticsearch Document](https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html). @@ -33,7 +29,7 @@ SIGN IN TEXT SERVICE (, {HTTP | HTTPS} [,"", " SIGN IN TEXT SERVICE (127.0.0.1:9200, HTTP); +nebula> SIGN IN TEXT SERVICE (192.168.8.100:9200, HTTP); ``` !!! Note @@ -58,13 +54,11 @@ SHOW TEXT SEARCH CLIENTS; ```ngql nebula> SHOW TEXT SEARCH CLIENTS; -+-------------+------+ -| Host | Port | -+-------------+------+ -| "127.0.0.1" | 9200 | -| "127.0.0.1" | 9200 | -| "127.0.0.1" | 9200 | -+-------------+------+ ++-----------------+-----------------+------+ +| Type | Host | Port | ++-----------------+-----------------+------+ +| "ELASTICSEARCH" | "192.168.8.100" | 9200 | ++-----------------+-----------------+------+ ``` ## Sign out to the text search clients diff --git a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/3.deploy-listener.md b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/3.deploy-listener.md index b9eeabdb4c5..a8854c52392 100644 --- a/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/3.deploy-listener.md +++ b/docs-2.0/4.deployment-and-installation/6.deploy-text-based-index/3.deploy-listener.md @@ -20,13 +20,13 @@ Full-text index data is written to the Elasticsearch cluster asynchronously. The ## Deployment process -### Step 1: Install the Storage service +### Step 1: Install the Listener service -The Listener process and the storaged process use the same binary file. However, their configuration files and using ports are different. You can install NebulaGraph on all servers that need to deploy a Listener, but only the Storage service can be used. For details, see [Install NebulaGraph by RPM or DEB Package](../2.compile-and-install-nebula-graph/2.install-nebula-graph-by-rpm-or-deb.md). +The Listener service uses the same binary as the storaged service. However, the configuration files are different and the processes use different ports. You can install NebulaGraph on all servers that need to deploy a Listener, but only the storaged service can be used. For details, see [Install NebulaGraph by RPM or DEB Package](../2.compile-and-install-nebula-graph/2.install-nebula-graph-by-rpm-or-deb.md). ### Step 2: Prepare the configuration file for the Listener -You have to prepare a corresponding configuration file on the machine that you want to deploy a Listener. The file must be named as `nebula-storaged-listener.conf` and stored in the `etc` directory. A [template](https://github.com/vesoft-inc/nebula-storage/blob/master/conf/nebula-storaged-listener.conf.production) is provided for your reference. Note that the file suffix `.production` should be removed. +In the `etc` directory, remove the suffix from `nebula-storaged-listener.conf.default` or `nebula-storaged-listener.conf.production` to `nebula-storaged-listener.conf`, and then modify the configuration content. Most configurations are the same as the configurations of [Storage Service](../../5.configurations-and-logs/1.configurations/4.storage-config.md). This topic only introduces the differences. @@ -35,7 +35,7 @@ Most configurations are the same as the configurations of [Storage Service](../. | `daemonize` | `true` | When set to `true`, the process is a daemon process. | | `pid_file` | `pids/nebula-metad-listener.pid` | The file that records the process ID. | | `meta_server_addrs` | - | IP addresses and ports of all Meta services. Multiple Meta services are separated by commas. | -| `local_ip` | - | The local IP address of the Listener service. | +| `local_ip` | - | The local IP address of the Listener service. Use real IP addresses instead of domain names or loopback IP addresses such as `127.0.0.1`. | | `port` | - | The listening port of the RPC daemon of the Listener service. | | `heartbeat_interval_secs` | `10` | The heartbeat interval of the Meta service. The unit is second (s). | | `listener_path` | `data/listener` | The WAL directory of the Listener. Only one directory is allowed. | @@ -46,20 +46,14 @@ Most configurations are the same as the configurations of [Storage Service](../. | `engine_type` | `rocksdb` | The type of the Storage engine, such as `rocksdb`, `memory`, etc. | | `part_type` | `simple` | The type of the part, such as `simple`, `consensus`, etc. | -!!! note - - Use real IP addresses in the configuration file instead of domain names or loopback IP addresses such as `127.0.0.1`. - ### Step 3: Start Listeners To initiate the Listener, navigate to the installation path of the desired cluster and execute the following command: ```bash -./bin/nebula-storaged --flagfile /nebula-storaged-listener.conf +./bin/nebula-storaged --flagfile etc/nebula-storaged-listener.conf ``` -`${listener_config_path}` is the path where you store the Listener configuration file. - ### Step 4: Add Listeners to NebulaGraph [Connect to NebulaGraph](../../2.quick-start/3.connect-to-nebula-graph.md) and run [`USE `](../../3.ngql-guide/9.space-statements/2.use-space.md) to enter the graph space that you want to create full-text indexes for. Then run the following statement to add a Listener into NebulaGraph. @@ -75,7 +69,7 @@ ADD LISTENER ELASTICSEARCH [,, ...] Add all Listeners in one statement completely. ```ngql - nebula> ADD LISTENER ELASTICSEARCH 192.168.8.5:9789,192.168.8.6:9789; + nebula> ADD LISTENER ELASTICSEARCH 192.168.8.100:9789,192.168.8.101:9789; ``` ## Show Listeners @@ -86,13 +80,13 @@ Run the `SHOW LISTENER` statement to list all Listeners. ```ngql nebula> SHOW LISTENER; -+--------+-----------------+-----------------------+----------+ -| PartId | Type | Host | Status | -+--------+-----------------+-----------------------+----------+ -| 1 | "ELASTICSEARCH" | "[192.168.8.5:46780]" | "ONLINE" | -| 2 | "ELASTICSEARCH" | "[192.168.8.5:46780]" | "ONLINE" | -| 3 | "ELASTICSEARCH" | "[192.168.8.5:46780]" | "ONLINE" | -+--------+-----------------+-----------------------+----------+ ++--------+-----------------+------------------------+-------------+ +| PartId | Type | Host | Host Status | ++--------+-----------------+------------------------+-------------+ +| 1 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | +| 2 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | +| 3 | "ELASTICSEARCH" | ""192.168.8.100":9789" | "ONLINE" | ++--------+-----------------+------------------------+-------------+ ``` ## Remove Listeners