Skip to content

Commit

Permalink
adding sentence/period change warning
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsbatista committed Dec 12, 2024
1 parent 75952d4 commit ec03550
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion haystack/components/preprocessors/document_splitter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

import warnings
from copy import deepcopy
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple

Expand Down Expand Up @@ -116,6 +116,14 @@ def __init__( # pylint: disable=too-many-positional-arguments
keep_white_spaces=True,
)

if split_by == "sentence":
# ToDo: remove this warning in the next major release
msg = (
"The `split_by='sentence'` no longer splits by '.' and now relies on custom sentence tokenizer "
"based on NLTK. To achieve the previous behaviour `split_by='period'."
)
warnings.warn(msg, DeprecationWarning)

def _init_checks(
self,
*,
Expand Down

0 comments on commit ec03550

Please sign in to comment.