Skip to content

Commit

Permalink
[text analytics] add string-index-type support (#13378)
Browse files Browse the repository at this point in the history
  • Loading branch information
iscai-msft authored Aug 28, 2020
1 parent bd05a04 commit 3891c08
Show file tree
Hide file tree
Showing 450 changed files with 3,274 additions and 1,669 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ class CategorizedEntity(DictMixin):
:ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc
:vartype subcategory: str
:ivar int offset: The entity text offset from the start of the document.
:ivar int length: The length of the entity text.
Returned in unicode code points.
:ivar int length: The length of the entity text. Returned
in unicode code points.
:ivar confidence_score: Confidence score between 0 and 1 of the extracted
entity.
:vartype confidence_score: float
Expand Down Expand Up @@ -253,7 +255,9 @@ class PiiEntity(DictMixin):
:ivar str subcategory: Entity subcategory, such as Credit Card/EU
Phone number/ABA Routing Numbers, etc.
:ivar int offset: The PII entity text offset from the start of the document.
:ivar int length: The length of the PII entity text.
Returned in unicode code points.
:ivar int length: The length of the PII entity text. Returned
in unicode code points.
:ivar float confidence_score: Confidence score between 0 and 1 of the extracted
entity.
"""
Expand Down Expand Up @@ -636,7 +640,9 @@ class LinkedEntityMatch(DictMixin):
:vartype confidence_score: float
:ivar text: Entity text as appears in the request.
:ivar int offset: The linked entity match text offset from the start of the document.
:ivar int length: The length of the linked entity match text.
Returned in unicode code points.
:ivar int length: The length of the linked entity match text. Returned
in unicode code points.
:vartype text: str
"""

Expand Down Expand Up @@ -738,8 +744,10 @@ class SentenceSentiment(DictMixin):
and 1 for the sentence for all labels.
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The sentence offset from the start of the document.
:ivar int length: The length of the sentence.
:ivar int offset: The sentence offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the sentence. Returned
in unicode code points.
:ivar mined_opinions: The list of opinions mined from this sentence.
For example in "The food is good, but the service is bad", we would
mind these two opinions "food is good", "service is bad". Only returned
Expand Down Expand Up @@ -847,8 +855,10 @@ class AspectSentiment(DictMixin):
for 'neutral' will always be 0
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The aspect offset from the start of the document.
:ivar int length: The length of the aspect.
:ivar int offset: The aspect offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the aspect. Returned
in unicode code points.
"""

def __init__(self, **kwargs):
Expand Down Expand Up @@ -892,8 +902,10 @@ class OpinionSentiment(DictMixin):
for 'neutral' will always be 0
:vartype confidence_scores:
~azure.ai.textanalytics.SentimentConfidenceScores
:ivar int offset: The opinion offset from the start of the document.
:ivar int length: The length of the opinion.
:ivar int offset: The opinion offset from the start of the document. Returned
in unicode code points.
:ivar int length: The length of the opinion. Returned
in unicode code points.
:ivar bool is_negated: Whether the opinion is negated. For example, in
"The food is not good", the opinion "good" is negated.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, endpoint, credential, **kwargs):
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace
def detect_language( # type: ignore
Expand Down Expand Up @@ -213,6 +214,8 @@ def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -278,6 +281,8 @@ def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -350,6 +355,8 @@ def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -490,6 +497,8 @@ def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def __init__( # type: ignore
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace_async
async def detect_language( # type: ignore
Expand Down Expand Up @@ -216,6 +217,8 @@ async def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -280,6 +283,8 @@ async def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -351,6 +356,8 @@ async def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
try:
return await self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -489,6 +496,8 @@ async def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"statistics":{"documentsCount":3,"validDocumentsCount":3,"erroneousDocumentsCount":0,"transactionsCount":3},"documents":[{"id":"1","sentiment":"neutral","statistics":{"charactersCount":51,"transactionsCount":1},"confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
Expand All @@ -30,21 +30,21 @@ interactions:
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- b1e4352f-1e0f-46e3-9f6e-5a82195726b5
- 546ef146-2055-49be-945d-8b4d95870565
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=3
date:
- Wed, 26 Aug 2020 21:20:39 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '91'
- '84'
status:
code: 200
message: OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"1","sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
Expand All @@ -30,21 +30,21 @@ interactions:
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- 36f47b42-b805-4655-9cc9-ed373487b586
- ee67d363-828c-4a5b-92ee-4a943a9aa020
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=3
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '83'
- '95'
status:
code: 200
message: OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"401","message":"Access denied due to invalid subscription
Expand All @@ -26,7 +26,7 @@ interactions:
content-length:
- '224'
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:56 GMT
status:
code: 401
message: PermissionDenied
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,26 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid Request.","innererror":{"code":"ModelVersionIncorrect","message":"Invalid
model version. Possible values are: latest,2019-10-01,2020-04-01"}}}'
headers:
apim-request-id:
- e98c3279-f8c4-49ce-b25c-f51289330fdd
- 600cfe88-8c7b-4017-a50e-ef0c30a546a4
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:35 GMT
- Thu, 27 Aug 2020 19:31:56 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '10'
- '4'
status:
code: 400
message: Bad Request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -760,26 +760,26 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
request contains too many records. Max 10 records are permitted."}}}'
headers:
apim-request-id:
- 5bcf6f2d-8a67-4bf7-a552-67c0c0ce9f9b
- e63eddb4-ac2c-4b1d-bfa8-ff78dc65076f
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:36 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '13'
- '12'
status:
code: 400
message: Bad Request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -725,18 +725,18 @@ interactions:
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
request contains too many records. Max 10 records are permitted."}}}'
headers:
apim-request-id:
- 35aa5189-c6e8-46c5-9339-607d86aef6a1
- 22ce0f08-e152-4611-bf63-9cc9ae125568
content-type:
- application/json; charset=utf-8
date:
- Wed, 26 Aug 2020 21:20:39 GMT
- Thu, 27 Aug 2020 19:31:50 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
Expand Down
Loading

0 comments on commit 3891c08

Please sign in to comment.