From 4c2eedb72a4e1799097408ac9ca0071c1b94e7c9 Mon Sep 17 00:00:00 2001 From: Carolyn Wu <87150472+cw118@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:42:30 -0700 Subject: [PATCH] PR #18333: Typofixes for `StringLookup` documentation Imported from GitHub PR https://github.com/keras-team/keras/pull/18333 I was reading the documentation for `StringLookup` and I noticed some small typos/inconsistent formatting (e.g. vocbulary => vocabulary, etc.). Copybara import of the project: -- 77d63e4673e69039f94c3853b01e70d72e74ab79 by Carolyn Wu <87150472+cw118@users.noreply.github.com>: Small typofixes for StringLookup doc Merging this change closes #18333 FUTURE_COPYBARA_INTEGRATE_REVIEW=https://github.com/keras-team/keras/pull/18333 from cw118:stringlookup-typos 77d63e4673e69039f94c3853b01e70d72e74ab79 PiperOrigin-RevId: 557514971 --- keras/layers/preprocessing/string_lookup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras/layers/preprocessing/string_lookup.py b/keras/layers/preprocessing/string_lookup.py index a4914430d11..5bf7389b853 100644 --- a/keras/layers/preprocessing/string_lookup.py +++ b/keras/layers/preprocessing/string_lookup.py @@ -34,7 +34,7 @@ class StringLookup(index_lookup.IndexLookup): This layer translates a set of arbitrary strings into integer output via a table-based vocabulary lookup. This layer will perform no splitting or - transformation of input strings. For a layer than can split and tokenize + transformation of input strings. For a layer that can split and tokenize natural language, see the `tf.keras.layers.TextVectorization` layer. The vocabulary for the layer must be either supplied on construction or @@ -82,7 +82,7 @@ class StringLookup(index_lookup.IndexLookup): indices. Defaults to `"[UNK]"`. vocabulary: Optional. Either an array of strings or a string path to a text file. If passing an array, can pass a tuple, list, 1D numpy array, - or 1D tensor containing the string vocbulary terms. If passing a file + or 1D tensor containing the string vocabulary terms. If passing a file path, the file should contain one line per term in the vocabulary. If this argument is set, there is no need to `adapt()` the layer. idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, @@ -114,13 +114,13 @@ class StringLookup(index_lookup.IndexLookup): find the value in each token slot. For `"int"` output, any shape of input and output is supported. For all other output modes, currently only output up to rank 2 is supported. - Defaults to `"int"` + Defaults to `"int"`. pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, the output will have its feature axis padded to `max_tokens` even if the number of unique tokens in the vocabulary is less than max_tokens, resulting in a tensor of shape [batch_size, max_tokens] regardless of vocabulary size. Defaults to - False. + `False`. sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a dense `Tensor`. Defaults to `False`.