Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalizer api specs #12983

Merged
merged 2 commits into from
Mar 5, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2495,7 +2495,7 @@
{
"value": "lowercase",
"name": "Lowercase",
"description": "Normalizes token text to lower case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.htm"
"description": "Normalizes token text to lower case. See https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html"
},
{
"value": "nGram_v2",
Expand Down Expand Up @@ -2575,7 +2575,7 @@
{
"value": "uppercase",
"name": "Uppercase",
"description": "Normalizes token text to upper case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html"
"description": "Normalizes token text to upper case. See https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html"
},
{
"value": "word_delimiter",
Expand All @@ -2589,6 +2589,51 @@
"url": "https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search"
}
},
"LexicalNormalizerName": {
"type": "string",
"enum": [
"asciifolding",
"elision",
"lowercase",
"standard",
"uppercase"
],
"x-ms-enum": {
"name": "LexicalNormalizerName",
"modelAsString": true,
"values": [
{
"value": "asciifolding",
"name": "AsciiFolding",
"description": "Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the \"Basic Latin\" Unicode block) into their ASCII equivalents, if such equivalents exist. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html"
},
{
"value": "elision",
"name": "Elision",
"description": "Removes elisions. For example, \"l'avion\" (the plane) will be converted to \"avion\" (plane). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html"
},
{
"value": "lowercase",
"name": "Lowercase",
"description": "Normalizes token text to lowercase. See https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html"
},
{
"value": "standard",
"name": "Standard",
"description": "Standard normalizer, which consists of lowercase and asciifolding. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html"
},
{
"value": "uppercase",
"name": "Uppercase",
"description": "Normalizes token text to uppercase. See https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html"
}
]
},
"description": "Defines the names of all text normalizers supported by Azure Cognitive Search.",
"externalDocs": {
"url": "https://aka.ms/azs-normalizers"
}
},
"CharFilterName": {
"type": "string",
"enum": [
Expand Down Expand Up @@ -2873,6 +2918,56 @@
"url": "http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopAnalyzer.html"
}
},
"LexicalNormalizer": {
"properties": {
"@odata.type": {
"type": "string",
"description": "Identifies the concrete type of the normalizer."
},
"name": {
"type": "string",
"externalDocs": {
"url": "https://aka.ms/azs-normalizers"
},
"description": "The name of the normalizer. It must only contain letters, digits, spaces, dashes or underscores, can only start and end with alphanumeric characters, and is limited to 128 characters. It cannot end in '.microsoft' nor '.lucene', nor be named 'asciifolding', 'standard', 'lowercase', 'uppercase', or 'elision'."
}
},
"required": [
"@odata.type",
"name"
],
"description": "Base type for normalizers."
},
"CustomNormalizer": {
"x-ms-discriminator-value": "#Microsoft.Azure.Search.CustomNormalizer",
"allOf": [
{
"$ref": "#/definitions/LexicalNormalizer"
}
],
"properties": {
"tokenFilters": {
"type": "array",
"items": {
"$ref": "#/definitions/TokenFilterName",
"x-nullable": false
},
"description": "A list of token filters used to filter out or modify the input token. For example, you can specify a lowercase filter that converts all characters to lowercase. The filters are run in the order in which they are listed."
},
"charFilters": {
"type": "array",
"items": {
"$ref": "#/definitions/CharFilterName",
"x-nullable": false
},
"description": "A list of character filters used to prepare input text before it is processed. For instance, they can replace certain characters or symbols. The filters are run in the order in which they are listed."
}
},
"description": "Allows you to configure normalization for filterable, sortable, and facetable fields, which by default operate with strict matching. This is a user-defined configuration consisting of at least one or more filters, which modify the token that is stored.",
"externalDocs": {
"url": "https://aka.ms/azs-custom-normalizers"
}
},
"LexicalTokenizer": {
"discriminator": "@odata.type",
"properties": {
Expand Down Expand Up @@ -6522,6 +6617,14 @@
"description": "The name of the analyzer used at indexing time for the field. This option can be used only with searchable fields. It must be set together with searchAnalyzer and it cannot be set together with the analyzer option. This property cannot be set to the name of a language analyzer; use the analyzer property instead if you need a language analyzer. Once the analyzer is chosen, it cannot be changed for the field. Must be null for complex fields.",
"x-nullable": true
},
"normalizer": {
"externalDocs": {
"url": "https://aka.ms/azs-normalizers"
},
"$ref": "#/definitions/LexicalNormalizerName",
"description": "The name of the normalizer to use for the field. This option can be used only with fields with filterable, sortable, or facetable enabled. Once the normalizer is chosen, it cannot be changed for the field. Must be null for complex fields.",
"x-nullable": true
},
"synonymMaps": {
"externalDocs": {
"url": "https://docs.microsoft.com/rest/api/searchservice/Synonym-Map-operations"
Expand Down Expand Up @@ -7007,6 +7110,16 @@
"url": "https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search"
}
},
"normalizers": {
"type": "array",
"items": {
"$ref": "#/definitions/LexicalNormalizer"
},
"description": "The normalizers for the index.",
"externalDocs": {
"url": "https://aka.ms/azs-custom-normalizers"
}
},
"encryptionKey": {
"$ref": "#/definitions/SearchResourceEncryptionKey",
"description": "A description of an encryption key that you create in Azure Key Vault. This key is used to provide an additional level of encryption-at-rest for your data when you want full assurance that no one, not even Microsoft, can decrypt your data in Azure Cognitive Search. Once you have encrypted your data, it will always remain encrypted. Azure Cognitive Search will ignore attempts to set this property to null. You can change this property as needed if you want to rotate your encryption key; Your data will be unaffected. Encryption with customer-managed keys is not available for free search services, and is only available for paid services created on or after January 1, 2019.",
Expand Down