Skip to content

Commit

Permalink
Enable CategorifyTransform inference operator to run on int16 types (
Browse files Browse the repository at this point in the history
…#1798)

* Enable CategorifyTransform cpp op to run on int16 types

* Add test for categorify inference op with different types
  • Loading branch information
oliverholworthy authored Apr 12, 2023
1 parent 2e5a84d commit ae580ad
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
10 changes: 10 additions & 0 deletions cpp/nvtabular/inference/categorify.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ namespace nvtabular
case 'u':
switch (dtype.itemsize())
{
case 2:
insert_int_mapping<uint16_t>(values);
return;
case 4:
insert_int_mapping<uint32_t>(values);
return;
Expand All @@ -104,6 +107,9 @@ namespace nvtabular
case 'i':
switch (dtype.itemsize())
{
case 2:
insert_int_mapping<int16_t>(values);
return;
case 4:
insert_int_mapping<int32_t>(values);
return;
Expand Down Expand Up @@ -198,6 +204,8 @@ namespace nvtabular
case 'u':
switch (itemsize)
{
case 2:
return transform_int<uint16_t>(input);
case 4:
return transform_int<uint32_t>(input);
case 8:
Expand All @@ -207,6 +215,8 @@ namespace nvtabular
case 'i':
switch (itemsize)
{
case 2:
return transform_int<int16_t>(input);
case 4:
return transform_int<int32_t>(input);
case 8:
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/ops/test_categorify.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,3 +695,29 @@ def test_categorify_joint_list(cpu):

assert compare_a == [1, 5, 2, 3]
assert compare_e == [2, 3, 1, 4, 1]


def test_categorify_inference():
num_rows = 100
a_char, z_char = np.array(["a", "z"]).view("int32")
input_tensors = {
"unicode_string": np.random.randint(
low=a_char, high=z_char, size=num_rows * 10, dtype="int32"
).view("U10"),
"int16_feature": np.random.randint(0, 10, dtype="int16", size=num_rows),
"int32_feature": np.random.randint(0, 10, dtype="int32", size=num_rows),
"int64_feature": np.random.randint(0, 10, dtype="int64", size=num_rows),
"uint16_feature": np.random.randint(0, 10, dtype="uint16", size=num_rows),
"uint32_feature": np.random.randint(0, 10, dtype="uint32", size=num_rows),
"uint64_feature": np.random.randint(0, 10, dtype="uint64", size=num_rows),
}
df = dispatch.make_df(input_tensors)
cat_names = df.columns
cats = cat_names >> nvt.ops.Categorify()
workflow = nvt.Workflow(cats)
workflow.fit(nvt.Dataset(df))
model_config = {}
inference_op = cats.op.inference_initialize(cats.input_columns, model_config)
output_tensors = inference_op.transform(cats.input_columns, input_tensors)
for key in input_tensors:
assert output_tensors[key].dtype == np.dtype("int64")

0 comments on commit ae580ad

Please sign in to comment.