Merge branch 'main' into amazon_bedrock_converse

deepset-ai · Dec 10, 2024 · 78617a0 · 78617a0
2 parents a16d3d7 + d22deba
commit 78617a0
Show file tree

Hide file tree

Showing 15 changed files with 198 additions and 69 deletions.
diff --git a/.github/workflows/CI_readme_sync.yml b/.github/workflows/CI_readme_sync.yml
@@ -81,6 +81,6 @@ jobs:
           ls tmp
 
       - name: Sync API docs with Haystack docs version ${{ matrix.hs-docs-version }}
-        uses: readmeio/rdme@v8
+        uses: readmeio/rdme@v9
         with:
           rdme: docs ${{ steps.pathfinder.outputs.project_path }}/tmp --key=${{ secrets.README_API_KEY }} --version=${{ matrix.hs-docs-version }}
diff --git a/integrations/anthropic/tests/test_chat_generator.py b/integrations/anthropic/tests/test_chat_generator.py
@@ -188,9 +188,9 @@ def test_default_inference_params(self, chat_messages):
 
         first_reply = replies[0]
         assert isinstance(first_reply, ChatMessage), "First reply is not a ChatMessage instance"
-        assert first_reply.content, "First reply has no content"
+        assert first_reply.text, "First reply has no text"
         assert ChatMessage.is_from(first_reply, ChatRole.ASSISTANT), "First reply is not from the assistant"
-        assert "paris" in first_reply.content.lower(), "First reply does not contain 'paris'"
+        assert "paris" in first_reply.text.lower(), "First reply does not contain 'paris'"
         assert first_reply.meta, "First reply has no metadata"
 
     @pytest.mark.skipif(
@@ -221,9 +221,9 @@ def streaming_callback(chunk: StreamingChunk):
 
         first_reply = replies[0]
         assert isinstance(first_reply, ChatMessage), "First reply is not a ChatMessage instance"
-        assert first_reply.content, "First reply has no content"
+        assert first_reply.text, "First reply has no text"
         assert ChatMessage.is_from(first_reply, ChatRole.ASSISTANT), "First reply is not from the assistant"
-        assert "paris" in first_reply.content.lower(), "First reply does not contain 'paris'"
+        assert "paris" in first_reply.text.lower(), "First reply does not contain 'paris'"
         assert first_reply.meta, "First reply has no metadata"
 
     @pytest.mark.skipif(
@@ -255,11 +255,11 @@ def test_tools_use(self):
 
         first_reply = replies[0]
         assert isinstance(first_reply, ChatMessage), "First reply is not a ChatMessage instance"
-        assert first_reply.content, "First reply has no content"
+        assert first_reply.text, "First reply has no text"
         assert ChatMessage.is_from(first_reply, ChatRole.ASSISTANT), "First reply is not from the assistant"
-        assert "get_stock_price" in first_reply.content.lower(), "First reply does not contain get_stock_price"
+        assert "get_stock_price" in first_reply.text.lower(), "First reply does not contain get_stock_price"
         assert first_reply.meta, "First reply has no metadata"
-        fc_response = json.loads(first_reply.content)
+        fc_response = json.loads(first_reply.text)
         assert "name" in fc_response, "First reply does not contain name of the tool"
         assert "input" in fc_response, "First reply does not contain input of the tool"
 

diff --git a/integrations/anthropic/tests/test_vertex_chat_generator.py b/integrations/anthropic/tests/test_vertex_chat_generator.py
@@ -188,9 +188,9 @@ def test_default_inference_params(self, chat_messages):
 
         first_reply = replies[0]
         assert isinstance(first_reply, ChatMessage), "First reply is not a ChatMessage instance"
-        assert first_reply.content, "First reply has no content"
+        assert first_reply.text, "First reply has no text"
         assert ChatMessage.is_from(first_reply, ChatRole.ASSISTANT), "First reply is not from the assistant"
-        assert "paris" in first_reply.content.lower(), "First reply does not contain 'paris'"
+        assert "paris" in first_reply.text.lower(), "First reply does not contain 'paris'"
         assert first_reply.meta, "First reply has no metadata"
 
     # Anthropic messages API is similar for AnthropicVertex and Anthropic endpoint,

diff --git a/integrations/chroma/pyproject.toml b/integrations/chroma/pyproject.toml
@@ -25,9 +25,8 @@ classifiers = [
 dependencies = [
   "haystack-ai",
   "chromadb>=0.5.17",
-  "typing_extensions>=4.8.0",
-  "tokenizers>=0.13.2,<=0.20.3"  # TODO: remove when Chroma pins tokenizers internally
-]
+  "typing_extensions>=4.8.0"
+  ]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/chroma#readme"

diff --git a/integrations/cohere/CHANGELOG.md b/integrations/cohere/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## [integrations/cohere-v2.0.1] - 2024-12-09
+
+### ⚙️ CI
+
+- Adopt uv as installer (#1142)
+
+### 🧹 Chores
+
+- Update ruff linting scripts and settings (#1105)
+- Fix linting/isort (#1215)
+
+### 🌀 Miscellaneous
+
+- Chore: use class methods to create `ChatMessage` (#1222)
+- Chore: use `text` instead of `content` for `ChatMessage` in Cohere and Anthropic (#1237)
+
 ## [integrations/cohere-v2.0.0] - 2024-09-16
 
 ### 🚀 Features
@@ -16,28 +32,49 @@
 
 - Do not retry tests in `hatch run test` command (#954)
 
-### ⚙️ Miscellaneous Tasks
+### ⚙️ CI
 
 - Retry tests to reduce flakyness (#836)
+
+### 🧹 Chores
+
 - Update ruff invocation to include check parameter (#853)
 
-### Docs
+### 🌀 Miscellaneous
 
+- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
 - Update CohereChatGenerator docstrings (#958)
 - Update CohereGenerator docstrings (#960)
 
 ## [integrations/cohere-v1.1.1] - 2024-06-12
 
+### 🌀 Miscellaneous
+
+- Chore: `CohereGenerator` - remove warning about `generate` API (#805)
+
 ## [integrations/cohere-v1.1.0] - 2024-05-24
 
 ### 🐛 Bug Fixes
 
 - Remove support for generate API (#755)
 
+### 🌀 Miscellaneous
+
+- Chore: change the pydoc renderer class (#718)
+
 ## [integrations/cohere-v1.0.0] - 2024-05-03
 
+### 🌀 Miscellaneous
+
+- Follow up: update Cohere integration to use Cohere SDK v5 (#711)
+
 ## [integrations/cohere-v0.7.0] - 2024-05-02
 
+### 🌀 Miscellaneous
+
+- Chore: add license classifiers (#680)
+- Update Cohere integration to use Cohere SDK v5 (#702)
+
 ## [integrations/cohere-v0.6.0] - 2024-04-08
 
 ### 🚀 Features
@@ -46,36 +83,38 @@
 
 ## [integrations/cohere-v0.5.0] - 2024-03-29
 
+### 🌀 Miscellaneous
+
+- Add the Cohere client name to cohere requests  (#362)
+
 ## [integrations/cohere-v0.4.1] - 2024-03-21
 
 ### 🐛 Bug Fixes
 
 - Fix order of API docs (#447)
-
-This PR will also push the docs to Readme
 - Fix tests (#561)
 
-* fix unit tests
-
-* try
-
-* remove flaky check
-
 ### 📚 Documentation
 
 - Update category slug (#442)
 - Review cohere integration (#500)
 - Small consistency improvements (#536)
 - Disable-class-def (#556)
 
-### ⚙️ Miscellaneous Tasks
+### 🧹 Chores
 
 - Update Cohere integration to use new generic callable (de)serializers for their callback handlers (#453)
 - Use `serialize_callable` instead of `serialize_callback_handler` in Cohere  (#460)
 
-### Cohere
+### 🌀 Miscellaneous
 
+- Choere - remove matching error message from tests (#419)
 - Fix linting (#509)
+- Make tests show coverage (#566)
+- Refactor tests (#574)
+- Test: relax test constraints (#591)
+- Remove references to Python 3.7 (#601)
+- Fix: Pin cohere version (#609)
 
 ## [integrations/cohere-v0.4.0] - 2024-02-12
 
@@ -92,32 +131,57 @@ This PR will also push the docs to Readme
 
 - Fix failing `TestCohereChatGenerator.test_from_dict_fail_wo_env_var` test (#393)
 
-## [integrations/cohere-v0.3.0] - 2024-01-25
+### 🌀 Miscellaneous
 
-### 🐛 Bug Fixes
+- Cohere: generate api docs (#321)
+- Fix: update to latest haystack-ai version (#348)
 
-- Fix project urls (#96)
+## [integrations/cohere-v0.3.0] - 2024-01-25
 
+### 🐛 Bug Fixes
 
+- Fix project URLs (#96)
 - Cohere namespace reorg (#271)
 
 ### 🚜 Refactor
 
 - Use `hatch_vcs` to manage integrations versioning (#103)
 
-### ⚙️ Miscellaneous Tasks
+### 🧹 Chores
 
 - [**breaking**] Rename `model_name` to `model` in the Cohere integration (#222)
 - Cohere namespace change (#247)
 
+### 🌀 Miscellaneous
+
+- Cohere: remove unused constant (#91)
+- Change default 'input_type' for CohereTextEmbedder (#99)
+- Change metadata to meta (#152)
+- Add cohere chat generator (#88)
+- Optimize API key reading (#162)
+- Cohere - change metadata to meta (#178)
+
 ## [integrations/cohere-v0.2.0] - 2023-12-11
 
 ### 🚀 Features
 
 - Add support for V3 Embed models to CohereEmbedders (#89)
 
+### 🌀 Miscellaneous
+
+- Cohere: increase version to prepare release (#92)
+
 ## [integrations/cohere-v0.1.1] - 2023-12-07
 
+### 🌀 Miscellaneous
+
+- [cohere] Add text and document embedders (#80)
+- [cohere] fix cohere pypi version badge and add Embedder note (#86)
+
 ## [integrations/cohere-v0.0.1] - 2023-12-04
 
+### 🌀 Miscellaneous
+
+- Add `cohere_haystack` integration package (#75)
+
 <!-- generated by git-cliff -->
diff --git a/...ions/cohere/src/haystack_integrations/components/generators/cohere/chat/chat_generator.py b/...ions/cohere/src/haystack_integrations/components/generators/cohere/chat/chat_generator.py
@@ -136,7 +136,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CohereChatGenerator":
 
     def _message_to_dict(self, message: ChatMessage) -> Dict[str, str]:
         role = "User" if message.role == ChatRole.USER else "Chatbot"
-        chat_message = {"user_name": role, "text": message.content}
+        chat_message = {"user_name": role, "text": message.text}
         return chat_message
 
     @component.output_types(replies=List[ChatMessage])
@@ -157,7 +157,7 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str,
         chat_history = [self._message_to_dict(m) for m in messages[:-1]]
         if self.streaming_callback:
             response = self.client.chat_stream(
-                message=messages[-1].content,
+                message=messages[-1].text,
                 model=self.model,
                 chat_history=chat_history,
                 **generation_kwargs,
@@ -190,7 +190,7 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str,
                 )
         else:
             response = self.client.chat(
-                message=messages[-1].content,
+                message=messages[-1].text,
                 model=self.model,
                 chat_history=chat_history,
                 **generation_kwargs,

diff --git a/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py b/integrations/cohere/src/haystack_integrations/components/generators/cohere/generator.py
@@ -67,4 +67,4 @@ def run(self, prompt: str):
         chat_message = ChatMessage.from_user(prompt)
         # Note we have to call super() like this because of the way components are dynamically built with the decorator
         results = super(CohereGenerator, self).run([chat_message])  # noqa
-        return {"replies": [results["replies"][0].content], "meta": [results["replies"][0].meta]}
+        return {"replies": [results["replies"][0].text], "meta": [results["replies"][0].meta]}
diff --git a/integrations/cohere/tests/test_cohere_chat_generator.py b/integrations/cohere/tests/test_cohere_chat_generator.py
@@ -169,7 +169,7 @@ def test_live_run(self):
         results = component.run(chat_messages)
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
-        assert "Paris" in message.content
+        assert "Paris" in message.text
         assert "usage" in message.meta
         assert "prompt_tokens" in message.meta["usage"]
         assert "completion_tokens" in message.meta["usage"]
@@ -205,7 +205,7 @@ def __call__(self, chunk: StreamingChunk) -> None:
 
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
-        assert "Paris" in message.content
+        assert "Paris" in message.text
 
         assert message.meta["finish_reason"] == "COMPLETE"
 
@@ -227,7 +227,7 @@ def test_live_run_with_connector(self):
         results = component.run(chat_messages, generation_kwargs={"connectors": [{"id": "web-search"}]})
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
-        assert "Paris" in message.content
+        assert "Paris" in message.text
         assert message.meta["documents"] is not None
         assert "citations" in message.meta  # Citations might be None
 
@@ -253,7 +253,7 @@ def __call__(self, chunk: StreamingChunk) -> None:
 
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
-        assert "Paris" in message.content
+        assert "Paris" in message.text
 
         assert message.meta["finish_reason"] == "COMPLETE"
 
@@ -291,10 +291,10 @@ def test_tools_use(self):
 
         first_reply = replies[0]
         assert isinstance(first_reply, ChatMessage), "First reply is not a ChatMessage instance"
-        assert first_reply.content, "First reply has no content"
+        assert first_reply.text, "First reply has no text"
         assert ChatMessage.is_from(first_reply, ChatRole.ASSISTANT), "First reply is not from the assistant"
-        assert "get_stock_price" in first_reply.content.lower(), "First reply does not contain get_stock_price"
+        assert "get_stock_price" in first_reply.text.lower(), "First reply does not contain get_stock_price"
         assert first_reply.meta, "First reply has no metadata"
-        fc_response = json.loads(first_reply.content)
+        fc_response = json.loads(first_reply.text)
         assert "name" in fc_response, "First reply does not contain name of the tool"
         assert "parameters" in fc_response, "First reply does not contain parameters of the tool"
diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py
@@ -7,7 +7,8 @@
 from haystack import logging
 from haystack.components.generators.openai_utils import _convert_message_to_openai_format
 from haystack.dataclasses import ChatMessage
-from haystack.tracing import Span, Tracer, tracer
+from haystack.tracing import Span, Tracer
+from haystack.tracing import tracer as proxy_tracer
 from haystack.tracing import utils as tracing_utils
 
 import langfuse
@@ -78,7 +79,7 @@ def set_content_tag(self, key: str, value: Any) -> None:
         :param key: The content tag key.
         :param value: The content tag value.
         """
-        if not tracer.is_content_tracing_enabled:
+        if not proxy_tracer.is_content_tracing_enabled:
             return
         if key.endswith(".input"):
             if "messages" in value:
@@ -126,6 +127,12 @@ def __init__(self, tracer: "langfuse.Langfuse", name: str = "Haystack", public:
         be publicly accessible to anyone with the tracing URL. If set to `False`, the tracing data will be private
         and only accessible to the Langfuse account owner.
         """
+        if not proxy_tracer.is_content_tracing_enabled:
+            logger.warning(
+                "Traces will not be logged to Langfuse because Haystack tracing is disabled. "
+                "To enable, set the HAYSTACK_CONTENT_TRACING_ENABLED environment variable to true "
+                "before importing Haystack."
+            )
         self._tracer = tracer
         self._context: List[LangfuseSpan] = []
         self._name = name

diff --git a/integrations/langfuse/tests/test_tracer.py b/integrations/langfuse/tests/test_tracer.py
@@ -1,4 +1,6 @@
 import datetime
+import logging
+import sys
 from unittest.mock import MagicMock, Mock, patch
 
 from haystack.dataclasses import ChatMessage
@@ -149,3 +151,17 @@ def test_context_is_empty_after_tracing(self):
             pass
 
         assert tracer._context == []
+
+    def test_init_with_tracing_disabled(self, monkeypatch, caplog):
+        # Clear haystack modules because ProxyTracer is initialized whenever haystack is imported
+        modules_to_clear = [name for name in sys.modules if name.startswith('haystack')]
+        for name in modules_to_clear:
+            sys.modules.pop(name, None)
+
+        # Re-import LangfuseTracer and instantiate it with tracing disabled
+        with caplog.at_level(logging.WARNING):
+            monkeypatch.setenv("HAYSTACK_CONTENT_TRACING_ENABLED", "false")
+            from haystack_integrations.tracing.langfuse import LangfuseTracer
+
+            LangfuseTracer(tracer=MockTracer(), name="Haystack", public=False)
+            assert "tracing is disabled" in caplog.text