Skip to content

Commit

Permalink
semantic search (Azure#17690)
Browse files Browse the repository at this point in the history
* semantic search
  • Loading branch information
xiangyan99 authored and mccoyp committed Apr 13, 2021
1 parent d91fdd9 commit 3ea99c1
Show file tree
Hide file tree
Showing 14 changed files with 230 additions and 28 deletions.
3 changes: 3 additions & 0 deletions sdk/search/azure-search-documents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## 11.2.0b2 (Unreleased)

### New features

- Added support for semantic search #17638

## 11.2.0b1 (2021-04-06)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ class SearchDocumentsResult(msrest.serialization.Model):
'count': {'key': '@odata\\.count', 'type': 'long'},
'coverage': {'key': '@search\\.coverage', 'type': 'float'},
'facets': {'key': '@search\\.facets', 'type': '{[FacetResult]}'},
'answers': {'key': '@search\\.answers', 'type': '{[AnswerResult]}'},
'answers': {'key': '@search\\.answers', 'type': '[AnswerResult]'},
'next_page_parameters': {'key': '@search\\.nextPageParameters', 'type': 'SearchRequest'},
'results': {'key': 'value', 'type': '[SearchResult]'},
'next_link': {'key': '@odata\\.nextLink', 'type': 'str'},
Expand Down Expand Up @@ -909,7 +909,7 @@ class SearchResult(msrest.serialization.Model):
'score': {'key': '@search\\.score', 'type': 'float'},
'reranker_score': {'key': '@search\\.rerankerScore', 'type': 'float'},
'highlights': {'key': '@search\\.highlights', 'type': '{[str]}'},
'captions': {'key': '@search\\.captions', 'type': '{[CaptionResult]}'},
'captions': {'key': '@search\\.captions', 'type': '[CaptionResult]'},
}

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ class SearchDocumentsResult(msrest.serialization.Model):
'count': {'key': '@odata\\.count', 'type': 'long'},
'coverage': {'key': '@search\\.coverage', 'type': 'float'},
'facets': {'key': '@search\\.facets', 'type': '{[FacetResult]}'},
'answers': {'key': '@search\\.answers', 'type': '{[AnswerResult]}'},
'answers': {'key': '@search\\.answers', 'type': '[AnswerResult]'},
'next_page_parameters': {'key': '@search\\.nextPageParameters', 'type': 'SearchRequest'},
'results': {'key': 'value', 'type': '[SearchResult]'},
'next_link': {'key': '@odata\\.nextLink', 'type': 'str'},
Expand Down Expand Up @@ -991,7 +991,7 @@ class SearchResult(msrest.serialization.Model):
'score': {'key': '@search\\.score', 'type': 'float'},
'reranker_score': {'key': '@search\\.rerankerScore', 'type': 'float'},
'highlights': {'key': '@search\\.highlights', 'type': '{[str]}'},
'captions': {'key': '@search\\.captions', 'type': '{[CaptionResult]}'},
'captions': {'key': '@search\\.captions', 'type': '[CaptionResult]'},
}

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
if TYPE_CHECKING:
# pylint:disable=unused-import,ungrouped-imports
from typing import Any, Union
from ..documents.models import AnswerResult


def convert_search_result(result):
Expand Down Expand Up @@ -83,6 +84,10 @@ def get_count(self):
"""
return self._first_iterator_instance().get_count()

def get_answers(self):
# type: () -> Union[list[AnswerResult], None]
"""Return answers."""
return self._first_iterator_instance().get_answers()

# The pylint error silenced below seems spurious, as the inner wrapper does, in
# fact, become a method of the class when it is applied.
Expand Down Expand Up @@ -141,3 +146,7 @@ def get_coverage(self):
@_ensure_response
def get_count(self):
return self._response.count

@_ensure_response
def get_answers(self):
return self._response.answers
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def odata(statement, **kwargs):
"""Escape an OData query string.
The statement to prepare should include fields to substitute given inside
braces, e.g. `{somevar}` and then pass the corresponing value as a keyword
braces, e.g. `{somevar}` and then pass the corresponding value as a keyword
argument, e.g. `somevar=10`.
:param statement: An OData query string to prepare
Expand Down Expand Up @@ -154,9 +154,9 @@ def search(self, search_text, **kwargs):
:keyword list[str] highlight_fields: The list of field names to use for hit highlights. Only searchable
fields can be used for hit highlighting.
:keyword str highlight_post_tag: A string tag that is appended to hit highlights. Must be set with
highlightPreTag. Default is </em>.
highlightPreTag. Default is </em>.
:keyword str highlight_pre_tag: A string tag that is prepended to hit highlights. Must be set with
highlightPostTag. Default is &lt;em&gt;.
highlightPostTag. Default is <em>.
:keyword float minimum_coverage: A number between 0 and 100 indicating the percentage of the index that
must be covered by a search query in order for the query to be reported as a success. This
parameter can be useful for ensuring search availability even for services with only one
Expand All @@ -169,8 +169,8 @@ def search(self, search_text, **kwargs):
document match score. There can be at most 32 $orderby clauses.
:keyword query_type: A value that specifies the syntax of the search query. The default is
'simple'. Use 'full' if your query uses the Lucene query syntax. Possible values include:
'simple', 'full'.
:paramtype query_type: str or ~search_index_client.models.QueryType
'simple', 'full', "semantic".
:paramtype query_type: str or ~azure.search.documents.models.QueryType
:keyword list[str] scoring_parameters: The list of parameter values to be used in scoring functions (for
example, referencePointParameter) using the format name-values. For example, if the scoring
profile defines a function with a parameter called 'mylocation' the parameter string would be
Expand All @@ -182,7 +182,16 @@ def search(self, search_text, **kwargs):
each fielded search expression take precedence over any field names listed in this parameter.
:keyword search_mode: A value that specifies whether any or all of the search terms must be
matched in order to count the document as a match. Possible values include: 'any', 'all'.
:paramtype search_mode: str or ~search_index_client.models.SearchMode
:paramtype search_mode: str or ~azure.search.documents.models.SearchMode
:keyword query_language: A value that specifies the language of the search query. Possible values
include: "none", "en-us".
:paramtype query_language: str or ~azure.search.documents.models.QueryLanguage
:keyword speller: A value that specified the type of the speller to use to spell-correct
individual search query terms. Possible values include: "none", "lexicon".
:paramtype speller: str or ~azure.search.documents.models.Speller
:keyword answers: A value that specifies whether answers should be returned as part of the search
response. Possible values include: "none", "extractive".
:paramtype answers: str or ~azure.search.documents.models.Answers
:keyword list[str] select: The list of fields to retrieve. If unspecified, all fields marked as retrievable
in the schema are included.
:keyword int skip: The number of search results to skip. This value cannot be greater than 100,000.
Expand Down Expand Up @@ -234,6 +243,9 @@ def search(self, search_text, **kwargs):
scoring_profile = kwargs.pop("scoring_profile", None)
search_fields = kwargs.pop("search_fields", None)
search_mode = kwargs.pop("search_mode", None)
query_language = kwargs.pop("query_language", None)
speller = kwargs.pop("speller", None)
answers = kwargs.pop("answers", None)
select = kwargs.pop("select", None)
skip = kwargs.pop("skip", None)
top = kwargs.pop("top", None)
Expand All @@ -252,6 +264,9 @@ def search(self, search_text, **kwargs):
scoring_profile=scoring_profile,
search_fields=search_fields,
search_mode=search_mode,
query_language=query_language,
speller=speller,
answers=answers,
select=select if isinstance(select, six.string_types) else None,
skip=skip,
top=top
Expand Down Expand Up @@ -353,7 +368,7 @@ def autocomplete(self, search_text, suggester_name, **kwargs):
:keyword mode: Specifies the mode for Autocomplete. The default is 'oneTerm'. Use
'twoTerms' to get shingles and 'oneTermWithContext' to use the current context while producing
auto-completed terms. Possible values include: 'oneTerm', 'twoTerms', 'oneTermWithContext'.
:paramtype mode: str or ~search_index_client.models.AutocompleteMode
:paramtype mode: str or ~azure.search.documents.models.AutocompleteMode
:keyword str filter: An OData expression that filters the documents used to produce completed terms
for the Autocomplete result.
:keyword bool use_fuzzy_matching: A value indicating whether to use fuzzy matching for the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
from typing import Union
from typing import Union, TYPE_CHECKING

from azure.core.async_paging import AsyncItemPaged, AsyncPageIterator, ReturnType
from .._generated.models import SearchRequest
Expand All @@ -13,6 +13,9 @@
unpack_continuation_token,
)

if TYPE_CHECKING:
# pylint:disable=unused-import,ungrouped-imports
from ...documents.models import AnswerResult

class AsyncSearchItemPaged(AsyncItemPaged[ReturnType]):
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -62,6 +65,11 @@ async def get_count(self):
"""
return await self._first_iterator_instance().get_count()

async def get_answers(self):
# type: () -> Union[list[AnswerResult], None]
"""Return answers."""
return await self._first_iterator_instance().get_answers()


# The pylint error silenced below seems spurious, as the inner wrapper does, in
# fact, become a method of the class when it is applied.
Expand Down Expand Up @@ -122,3 +130,7 @@ async def get_coverage(self):
@_ensure_response
async def get_count(self):
return self._response.count

@_ensure_response
async def get_answers(self):
return self._response.answers
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ async def search(self, search_text, **kwargs):
:keyword list[str] highlight_fields: The list of field names to use for hit highlights. Only searchable
fields can be used for hit highlighting.
:keyword str highlight_post_tag: A string tag that is appended to hit highlights. Must be set with
highlightPreTag. Default is &lt;/em&gt;.
highlightPreTag. Default is </em>.
:keyword str highlight_pre_tag: A string tag that is prepended to hit highlights. Must be set with
highlightPostTag. Default is &lt;em&gt;.
highlightPostTag. Default is <em>.
:keyword float minimum_coverage: A number between 0 and 100 indicating the percentage of the index that
must be covered by a search query in order for the query to be reported as a success. This
parameter can be useful for ensuring search availability even for services with only one
Expand All @@ -143,8 +143,8 @@ async def search(self, search_text, **kwargs):
document match score. There can be at most 32 $orderby clauses.
:keyword query_type: A value that specifies the syntax of the search query. The default is
'simple'. Use 'full' if your query uses the Lucene query syntax. Possible values include:
'simple', 'full'.
:paramtype query_type: str or ~search_index_client.models.QueryType
'simple', 'full', "semantic".
:paramtype query_type: str or ~azure.search.documents.models.QueryType
:keyword list[str] scoring_parameters: The list of parameter values to be used in scoring functions (for
example, referencePointParameter) using the format name-values. For example, if the scoring
profile defines a function with a parameter called 'mylocation' the parameter string would be
Expand All @@ -156,7 +156,16 @@ async def search(self, search_text, **kwargs):
each fielded search expression take precedence over any field names listed in this parameter.
:keyword search_mode: A value that specifies whether any or all of the search terms must be
matched in order to count the document as a match. Possible values include: 'any', 'all'.
:paramtype search_mode: str or ~search_index_client.models.SearchMode
:paramtype search_mode: str or ~azure.search.documents.models.SearchMode
:keyword query_language: A value that specifies the language of the search query. Possible values
include: "none", "en-us".
:paramtype query_language: str or ~azure.search.documents.models.QueryLanguage
:keyword speller: A value that specified the type of the speller to use to spell-correct
individual search query terms. Possible values include: "none", "lexicon".
:paramtype speller: str or ~azure.search.documents.models.Speller
:keyword answers: A value that specifies whether answers should be returned as part of the search
response. Possible values include: "none", "extractive".
:paramtype answers: str or ~azure.search.documents.models.Answers
:keyword list[str] select: The list of fields to retrieve. If unspecified, all fields marked as retrievable
in the schema are included.
:keyword int skip: The number of search results to skip. This value cannot be greater than 100,000.
Expand Down Expand Up @@ -208,6 +217,9 @@ async def search(self, search_text, **kwargs):
scoring_profile = kwargs.pop("scoring_profile", None)
search_fields = kwargs.pop("search_fields", None)
search_mode = kwargs.pop("search_mode", None)
query_language = kwargs.pop("query_language", None)
speller = kwargs.pop("speller", None)
answers = kwargs.pop("answers", None)
select = kwargs.pop("select", None)
skip = kwargs.pop("skip", None)
top = kwargs.pop("top", None)
Expand All @@ -226,6 +238,9 @@ async def search(self, search_text, **kwargs):
scoring_profile=scoring_profile,
search_fields=search_fields,
search_mode=search_mode,
query_language=query_language,
speller=speller,
answers=answers,
select=select if isinstance(select, six.string_types) else None,
skip=skip,
top=top
Expand Down Expand Up @@ -326,7 +341,7 @@ async def autocomplete(self, search_text, suggester_name, **kwargs):
:keyword mode: Specifies the mode for Autocomplete. The default is 'oneTerm'. Use
'twoTerms' to get shingles and 'oneTermWithContext' to use the current context while producing
auto-completed terms. Possible values include: 'oneTerm', 'twoTerms', 'oneTermWithContext'.
:paramtype mode: str or ~search_index_client.models.AutocompleteMode
:paramtype mode: str or ~azure.search.documents.models.AutocompleteMode
:keyword str filter: An OData expression that filters the documents used to produce completed terms
for the Autocomplete result.
:keyword bool use_fuzzy_matching: A value indicating whether to use fuzzy matching for the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ def _from_generated(cls, search_field):
if search_field.fields else None
hidden = not search_field.retrievable if search_field.retrievable is not None else None
try:
normalizers = search_field.normalizers
normalizer = search_field.normalizer
except AttributeError:
normalizers = None
normalizer = None
return cls(
name=search_field.name,
type=search_field.type,
Expand All @@ -255,7 +255,7 @@ def _from_generated(cls, search_field):
analyzer_name=search_field.analyzer,
search_analyzer_name=search_field.search_analyzer,
index_analyzer_name=search_field.index_analyzer,
normalizers=normalizers,
normalizer=normalizer,
synonym_map_names=search_field.synonym_maps,
fields=fields
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,29 @@
#
# --------------------------------------------------------------------------

from .._generated.models import IndexAction, IndexingResult
from .._generated.models import (
Answers,
AnswerResult,
AutocompleteMode,
IndexAction,
IndexingResult,
QueryLanguage,
QueryType,
SearchMode,
Speller,
)
from .._search_client import odata


__all__ = (
"Answers",
"AnswerResult",
"AutocompleteMode",
"IndexAction",
"IndexingResult",
"odata",
"QueryLanguage",
"QueryType",
"SearchMode",
"Speller",
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
# --------------------------------------------------------------------------

"""
FILE: sample_analyze_text.py
FILE: sample_analyze_text_async.py
DESCRIPTION:
This sample demonstrates how to analyze text.
USAGE:
python sample_analyze_text.py
python sample_analyze_text_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
# --------------------------------------------------------------------------

"""
FILE: sample_authentication.py
FILE: sample_authentication_async.py
DESCRIPTION:
This sample demonstrates how to authenticate with the Azure Congnitive Search
service with an API key. See more details about authentication here:
https://docs.microsoft.com/en-us/azure.search.documents/search-security-api-keys
USAGE:
python sample_authentication.py
python sample_authentication_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service
2) AZURE_SEARCH_INDEX_NAME - the name of your search index (e.g. "hotels-sample-index")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
# --------------------------------------------------------------------------

"""
FILE: sample_index_crud_operations.py
FILE: sample_index_crud_operations_async.py
DESCRIPTION:
This sample demonstrates how to get, create, update, or delete an index.
USAGE:
python sample_index_crud_operations.py
python sample_index_crud_operations_async.py
Set the environment variables with your own values before running the sample:
1) AZURE_SEARCH_SERVICE_ENDPOINT - the endpoint of your Azure Cognitive Search service
Expand Down
Loading

0 comments on commit 3ea99c1

Please sign in to comment.