Skip to content

Commit

Permalink
Fix bug that string_embed function Only replace one columns when repl…
Browse files Browse the repository at this point in the history
…ace=True. (#5497)

* add for loop to replace all Columns.

* add replace test for string_embds function
  • Loading branch information
ForJadeForest authored Aug 29, 2022
1 parent 510d96f commit 91c482f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
3 changes: 2 additions & 1 deletion python/friesian/src/bigdl/friesian/feature/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2545,7 +2545,8 @@ def string_embed(
.withColumn(c + "_embds", tolist(c + "_embds"))

if replace:
df = df.drop(c).withColumnRenamed(c + "_embds", c)
for c in cols:
df = df.drop(c).withColumnRenamed(c + "_embds", c)
return FeatureTable(df)


Expand Down
4 changes: 4 additions & 0 deletions python/friesian/test/bigdl/friesian/feature/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,10 @@ def test_string_embed(self):
invalidInputError(text_embeds.select("text1_embds").size() == 3, "size error")
invalidInputError(text_embeds.select("text2_embds").size() == 3, "size error")

text_embeds_replaced = tbl.string_embed(["text1", "text2"], reduce_dim=5, replace=True)
invalidInputError('text1_embds' not in text_embeds_replaced.columns, 'replace failed')
invalidInputError('text2_embds' not in text_embeds_replaced.columns, 'replace failed')

with self.assertRaises(Exception) as context:
text_embeds = tbl.string_embed(["text1"], reduce_dim=1000)
self.assertTrue("must be no less than k=1000" in str(context.exception))
Expand Down

0 comments on commit 91c482f

Please sign in to comment.