From 4ed1b82841d6b553fef7e6863b99061703cacb7b Mon Sep 17 00:00:00 2001
From: Keren Fuentes <kedejesu@microsoft.com>
Date: Tue, 13 Oct 2020 16:49:45 -0700
Subject: [PATCH 1/5] fix for issue

---
 .../Text/WordTokenizing.cs                    |  9 ++++---
 test/Microsoft.ML.Tests/OnnxConversionTest.cs | 27 ++++++++++---------
 2 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
index 1eac17ccaa..53fae2c6e1 100644
--- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
@@ -404,6 +404,7 @@ public void SaveAsOnnx(OnnxContext ctx)
                 string opType;
                 while (columns.MoveNext())
                 {
+
                     opType = "Tokenizer";
                     var column = columns.Current;
                     var intermediateVar = ctx.AddIntermediateVariable(_type, "TokenizerOutput", true);
@@ -415,10 +416,10 @@ public void SaveAsOnnx(OnnxContext ctx)
                     string[] separators = column.SeparatorsArray.Select(c => c.ToString()).ToArray();
                     tokenizerNode.AddAttribute("separators", separators);
 
-                    opType = "Squeeze";
-                    var squeezeOutput = ctx.AddIntermediateVariable(_type, column.Name);
-                    var squeezeNode = ctx.CreateNode(opType, intermediateVar, squeezeOutput, ctx.GetNodeName(opType), "");
-                    squeezeNode.AddAttribute("axes", new long[] { 1 });
+                    opType = "Reshape";
+                    var shape = ctx.AddInitializer(new long[] { 1, -1 }, new long[] { 2 }, "Shape");
+                    var reshapeOutput = ctx.AddIntermediateVariable(new VectorDataViewType(TextDataViewType.Instance, 1), column.Name);
+                    var reshapeNode = ctx.CreateNode(opType, new[] { intermediateVar, shape }, new[] { reshapeOutput }, ctx.GetNodeName(opType), "");
                 }
             }
         }
diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
index 69dbbe57e5..8bf75f405d 100644
--- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs
+++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -1310,22 +1310,25 @@ public void NgramOnnxConversionTest(
             IEstimator<ITransformer>[] pipelines =
             {
                 mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' })
-                                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
-                                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                                            ngramLength: ngramLength,
-                                            useAllLengths: useAllLength,
-                                            weighting: weighting)),
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
+                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
+                ngramLength: ngramLength,
+                useAllLengths: useAllLength,
+                weighting: weighting)),
 
                 mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("Tokens", "Text")
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                            ngramLength: ngramLength,
-                            useAllLengths: useAllLength,
-                            weighting: weighting)),
+                ngramLength: ngramLength,
+                useAllLengths: useAllLength,
+                weighting: weighting)),
 
                 mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text",
-                                        ngramLength: ngramLength,
-                                        useAllLengths: useAllLength,
-                                        weighting: weighting)
+                ngramLength: ngramLength,
+                useAllLengths: useAllLength,
+                weighting: weighting),
+
+                mlContext.Transforms.Text.TokenizeIntoWords("Tokens0", "Text")
+                .Append(mlContext.Transforms.Text.ProduceWordBags("Tokens", "Tokens0"))
             };
 
             for (int i = 0; i < pipelines.Length; i++)
@@ -1346,7 +1349,7 @@ public void NgramOnnxConversionTest(
                     var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxFilePath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
                     var onnxTransformer = onnxEstimator.Fit(dataView);
                     var onnxResult = onnxTransformer.Transform(dataView);
-                    var columnName = i == pipelines.Length - 1 ? "Tokens" : "NGrams";
+                    var columnName = i >= pipelines.Length - 2 ? "Tokens" : "NGrams";
                     CompareResults(columnName, columnName, transformedData, onnxResult, 3);
 
                     VBuffer<ReadOnlyMemory<char>> mlNetSlots = default;

From 40c9b39d57be1c4ad5595c5177fa1c696740fe94 Mon Sep 17 00:00:00 2001
From: Keren Fuentes <kedejesu@microsoft.com>
Date: Tue, 13 Oct 2020 16:58:07 -0700
Subject: [PATCH 2/5] fix documentation

---
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 7b4b554b7c..9ae8888791 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -334,7 +334,7 @@ public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCa
             => new CustomStopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, stopwords);
 
         /// <summary>
-        /// Create a <see cref="WordHashBagEstimator"/>, which maps the column specified in <paramref name="inputColumnName"/>
+        /// Create a <see cref="WordBagEstimator"/>, which maps the column specified in <paramref name="inputColumnName"/>
         /// to a vector of n-gram counts in a new column named <paramref name="outputColumnName"/>.
         /// </summary>
         /// <remarks>
@@ -363,7 +363,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
                 outputColumnName, inputColumnName, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
 
         /// <summary>
-        /// Create a <see cref="WordHashBagEstimator"/>, which maps the multiple columns specified in <paramref name="inputColumnNames"/>
+        /// Create a <see cref="WordBagEstimator"/>, which maps the multiple columns specified in <paramref name="inputColumnNames"/>
         /// to a vector of n-gram counts in a new column named <paramref name="outputColumnName"/>.
         /// </summary>
         /// <remarks>

From 8571d4925b4f4b0794501ef6301aac5805e9b437 Mon Sep 17 00:00:00 2001
From: Keren Fuentes <kedejesu@microsoft.com>
Date: Wed, 14 Oct 2020 09:24:12 -0700
Subject: [PATCH 3/5] aligning test

---
 .../Text/WordTokenizing.cs                    |  1 -
 test/Microsoft.ML.Tests/OnnxConversionTest.cs | 19 +++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
index 53fae2c6e1..3af7a1e471 100644
--- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
@@ -404,7 +404,6 @@ public void SaveAsOnnx(OnnxContext ctx)
                 string opType;
                 while (columns.MoveNext())
                 {
-
                     opType = "Tokenizer";
                     var column = columns.Current;
                     var intermediateVar = ctx.AddIntermediateVariable(_type, "TokenizerOutput", true);
diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
index 8bf75f405d..136aa78132 100644
--- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs
+++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -1312,20 +1312,19 @@ public void NgramOnnxConversionTest(
                 mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' })
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                ngramLength: ngramLength,
-                useAllLengths: useAllLength,
-                weighting: weighting)),
+                            ngramLength: ngramLength,
+                            useAllLengths: useAllLength,
+                            weighting: weighting)),
 
                 mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("Tokens", "Text")
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                ngramLength: ngramLength,
-                useAllLengths: useAllLength,
-                weighting: weighting)),
-
+                            ngramLength: ngramLength,
+                            useAllLengths: useAllLength,
+                            weighting: weighting)),
                 mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text",
-                ngramLength: ngramLength,
-                useAllLengths: useAllLength,
-                weighting: weighting),
+                            ngramLength: ngramLength,
+                            useAllLengths: useAllLength,
+                            weighting: weighting),
 
                 mlContext.Transforms.Text.TokenizeIntoWords("Tokens0", "Text")
                 .Append(mlContext.Transforms.Text.ProduceWordBags("Tokens", "Tokens0"))

From b4ebafddb2b52a06ff496690f86ea6057df2a9ea Mon Sep 17 00:00:00 2001
From: Keren Fuentes <kedejesu@microsoft.com>
Date: Wed, 14 Oct 2020 09:25:37 -0700
Subject: [PATCH 4/5] adding back line

---
 test/Microsoft.ML.Tests/OnnxConversionTest.cs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
index 136aa78132..68961a0899 100644
--- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs
+++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -1321,6 +1321,7 @@ public void NgramOnnxConversionTest(
                             ngramLength: ngramLength,
                             useAllLengths: useAllLength,
                             weighting: weighting)),
+
                 mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text",
                             ngramLength: ngramLength,
                             useAllLengths: useAllLength,

From 4497de8679c6ff6521e6439f2949aee3613cc174 Mon Sep 17 00:00:00 2001
From: Keren Fuentes <kedejesu@microsoft.com>
Date: Wed, 14 Oct 2020 19:36:31 -0700
Subject: [PATCH 5/5] aligning fix

---
 test/Microsoft.ML.Tests/OnnxConversionTest.cs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
index 68961a0899..9f97f21ad4 100644
--- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs
+++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -1310,11 +1310,11 @@ public void NgramOnnxConversionTest(
             IEstimator<ITransformer>[] pipelines =
             {
                 mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' })
-                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
-                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                            ngramLength: ngramLength,
-                            useAllLengths: useAllLength,
-                            weighting: weighting)),
+                                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
+                                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
+                                            ngramLength: ngramLength,
+                                            useAllLengths: useAllLength,
+                                            weighting: weighting)),
 
                 mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("Tokens", "Text")
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",