Skip to content

Commit

Permalink
Solve f bug
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlosFerLo committed Jun 11, 2024
1 parent fe720ba commit 224ec43
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions haystack/components/converters/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,26 @@ def run(
try:
bytestream = get_bytestream_from_source(source)
except Exception as e:
logger.warning(f"Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue

try:
file = docx.Document(io.BytesIO(bytestream.data))
except Exception as e:
logger.warning("Could not read {source} and convert it to a Docx Document, skipping. Error: {error}", source=source, error=e)
logger.warning(
"Could not read {source} and convert it to a Docx Document, skipping. Error: {error}",
source=source,
error=e,
)
continue

try:
paragraphs = [para.text for para in file.paragraphs]
text = "\n".join(paragraphs)
except Exception as e:
logger.warning("Could not convert {source} to a Document, skipping it. Error: {error}", source=source, error=e)
logger.warning(
"Could not convert {source} to a Document, skipping it. Error: {error}", source=source, error=e
)
continue

merged_metadata = {**bytestream.meta, **metadata}
Expand Down

0 comments on commit 224ec43

Please sign in to comment.