Skip to content

Commit

Permalink
feat: add File type to preview package (#5873)
Browse files Browse the repository at this point in the history
* add Blob type

* review feedback

* fix tests and naming

* Update add-blob-type-2a9476a39841f54d.yaml

* removed unused import

---------

Co-authored-by: Stefano Fiorucci <[email protected]>
  • Loading branch information
masci and anakin87 authored Oct 4, 2023
1 parent a4beec3 commit c2ec3f5
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 1 deletion.
3 changes: 2 additions & 1 deletion haystack/preview/dataclasses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from haystack.preview.dataclasses.document import Document
from haystack.preview.dataclasses.answer import ExtractedAnswer, GeneratedAnswer, Answer
from haystack.preview.dataclasses.byte_stream import ByteStream

__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer"]
__all__ = ["Document", "ExtractedAnswer", "GeneratedAnswer", "Answer", "ByteStream"]
37 changes: 37 additions & 0 deletions haystack/preview/dataclasses/byte_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Any


@dataclass(frozen=True)
class ByteStream:
"""
Base data class representing a binary object in the Haystack API.
"""

data: bytes
metadata: Dict[str, Any] = field(default_factory=dict, hash=False)

def to_file(self, destination_path: Path):
with open(destination_path, "wb") as fd:
fd.write(self.data)

@classmethod
def from_file_path(cls, filepath: Path) -> "ByteStream":
"""
Create a ByteStream from the contents read from a file.
:param filepath: A valid path to a file.
"""
with open(filepath, "rb") as fd:
return cls(data=fd.read())

@classmethod
def from_string(cls, text: str, encoding: str = "utf-8") -> "ByteStream":
"""
Create a ByteStream encoding a string.
:param text: The string to encode
:param encoding: The encoding used to convert the string into bytes
"""
return cls(data=text.encode(encoding))
5 changes: 5 additions & 0 deletions releasenotes/notes/add-blob-type-2a9476a39841f54d.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
preview:
- |
Add ByteStream type to send binary raw data across components
in a pipeline.
33 changes: 33 additions & 0 deletions test/preview/dataclasses/test_byte_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import io

from haystack.preview.dataclasses import ByteStream

import pytest


@pytest.mark.unit
def test_from_file_path(tmp_path, request):
test_bytes = "Hello, world!\n".encode()
test_path = tmp_path / request.node.name
with open(test_path, "wb") as fd:
assert fd.write(test_bytes)

b = ByteStream.from_file_path(test_path)
assert b.data == test_bytes


@pytest.mark.unit
def test_from_string():
test_string = "Hello, world!"
b = ByteStream.from_string(test_string)
assert b.data.decode() == test_string


@pytest.mark.unit
def test_to_file(tmp_path, request):
test_str = "Hello, world!\n"
test_path = tmp_path / request.node.name

ByteStream(test_str.encode()).to_file(test_path)
with open(test_path, "rb") as fd:
assert fd.read().decode() == test_str

0 comments on commit c2ec3f5

Please sign in to comment.