Skip to content

Commit

Permalink
Fix some typos (#215)
Browse files Browse the repository at this point in the history
  • Loading branch information
co63oc authored Feb 22, 2024
1 parent 5a4001f commit e710801
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions data_juicer/ops/common/helper_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def strip(document, strip_characters):
emojis).
:param document: document to be processed
:param strip_characters: characters uesd for stripping document
:param strip_characters: characters used for stripping document
:return: stripped document
"""
if not document:
Expand Down Expand Up @@ -76,7 +76,7 @@ def split_on_newline_tab_whitespace(document):
First split on "\\\\n", then on "\\\\t", then on " ".
:param document: document to be splited
:return: setence list obtained after splitting document
:return: sentence list obtained after splitting document
"""
sentences = document.split('\n')
sentences = [sentence.split('\t') for sentence in sentences]
Expand Down
4 changes: 2 additions & 2 deletions data_juicer/ops/filter/image_size_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

@OPERATORS.register_module('image_size_filter')
class ImageSizeFilter(Filter):
"""Keep data samples whose image size (in bytes/kb/MB/...) within a
"""Keep data samples whose image size (in Bytes/KB/MB/...) within a
specific range.
"""

Expand All @@ -24,7 +24,7 @@ def __init__(self,
:param min_size: The min image size to keep samples. set to be "0" by
default for no size constraint
:param max_size: The max image size to keep samples. set to be
"1Tb" by default, an approximate for un-limited case
"1TB" by default, an approximate for un-limited case
:param any_or_all: keep this sample with 'any' or 'all' strategy of
all images. 'any': keep this sample if any images meet the
condition. 'all': keep this sample only if all images meet the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self,
"""
Initialization method.
:param keep_alphabet: whether to keep alpabet
:param keep_alphabet: whether to keep alphabet
:param keep_number: whether to keep number
:param keep_punc: whether to keep punctuation
:param args: extra args
Expand Down

0 comments on commit e710801

Please sign in to comment.