#269 Add support for nested segment redefines.

AbsaOSS · Apr 3, 2020 · 36ac9de · 36ac9de
1 parent c519383
commit 36ac9de
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1180,6 +1180,10 @@ For multisegment variable lengths tests:
 ![](performance/images/exp3_multiseg_wide_records_throughput.svg) ![](performance/images/exp3_multiseg_wide_mb_throughput.svg)
 
 ## Changelog
+- #### 2.0.6 To be released soon.
+  - [#151](https://github.com/AbsaOSS/cobrix/issues/151) Added an option (`occurs_mapping`) to define mappings between non-numeric fields and sizes of corresponding OCCURS (Thanks [@tr11](https://github.com/tr11)).
+  - [#269](https://github.com/AbsaOSS/cobrix/issues/269) Added support for segment redefines deeply nested, instead of requiring them to be defined always at the top record level. 
+
 - #### 2.0.5 released 23 March 2020.
   - [#239](https://github.com/AbsaOSS/cobrix/issues/69) Added support for generation of debugging fields (`.option("debug", "true")`).
   - [#249](https://github.com/AbsaOSS/cobrix/issues/260) Added support for NATIONAL (`PIC N`) formatted strings (Thanks [@schaloner-kbc](https://github.com/schaloner-kbc)).

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/CopybookParser.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/CopybookParser.scala
@@ -474,7 +474,12 @@ object CopybookParser {
             g.withUpdatedIsSegmentRedefine(true)
           } else {
             ensureSegmentRedefinesAreIneGroup(g.name, isCurrentFieldASegmentRedefine = false)
-            g
+            // Check nested fields recursively only if segment redefines hasn't been found so far.
+            if (redefineGroupState == 0) {
+              processGroupFields(g)
+            } else {
+              g
+            }
           }
       }
       group.copy(children = childrenWithSegmentRedefines)(group.parent)

diff --git a/...st/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test10DeepSegmentRedefines.scala b/...st/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test10DeepSegmentRedefines.scala
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.spark.cobol.source.regression
+
+import org.scalatest.FunSuite
+import org.slf4j.{Logger, LoggerFactory}
+import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase}
+import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture
+
+class Test10DeepSegmentRedefines extends FunSuite with SparkTestBase with BinaryFileFixture with SimpleComparisonBase {
+
+  private implicit val logger: Logger = LoggerFactory.getLogger(this.getClass)
+
+  private val copybook =
+    """         01  ENTITY.
+        02 NESTED1.
+           03 NESTED2.
+              05  ID                      PIC X(1).
+           03 NESTED3.
+              04 NESTED4.
+                 05  SEG1.
+                    10  A                 PIC X(1).
+                 05  SEG2 REDEFINES SEG1.
+                    10  B                 PIC X(1).
+                 05  SEG3 REDEFINES SEG1.
+                    10  C                 PIC X(1).
+    """
+
+  val binFileContents: Array[Byte] = Array[Byte](
+    // 'Aa'
+    0x00, 0x00, 0x02, 0x00, 0xC1.toByte, 0x81.toByte,
+    // 'Bb'
+    0x00, 0x00, 0x02, 0x00, 0xC2.toByte, 0x82.toByte,
+    // 'Cc'
+    0x00, 0x00, 0x02, 0x00, 0xC3.toByte, 0x83.toByte,
+    // 'Dd'
+    0x00, 0x00, 0x02, 0x00, 0xC4.toByte, 0x84.toByte
+  )
+
+  test("Test a segment redefines work for deeply nested segment fields") {
+    withTempBinFile("binary_nested1", ".dat", binFileContents) { tmpFileName =>
+      val df = spark
+        .read
+        .format("cobol")
+        .option("copybook_contents", copybook)
+        .option("pedantic", "true")
+        .option("is_record_sequence", "true")
+        .option("schema_retention_policy", "collapse_root")
+        .option("segment_field", "ID")
+        .option("redefine_segment_id_map:1", "SEG1 => A")
+        .option("redefine-segment-id-map:2", "SEG2 => B")
+        .option("redefine-segment-id-map:3", "SEG3 => C")
+        .load(tmpFileName)
+
+      val expected = """[{"NESTED1":{"NESTED2":{"ID":"A"},"NESTED3":{"NESTED4":{"SEG1":{"A":"a"}}}}},{"NESTED1":{"NESTED2":{"ID":"B"},"NESTED3":{"NESTED4":{"SEG2":{"B":"b"}}}}},{"NESTED1":{"NESTED2":{"ID":"C"},"NESTED3":{"NESTED4":{"SEG3":{"C":"c"}}}}},{"NESTED1":{"NESTED2":{"ID":"D"},"NESTED3":{"NESTED4":{}}}}]"""
+
+      val actual = df.toJSON.collect().mkString("[", ",", "]")
+
+      assertEqualsMultiline(actual, expected)
+    }
+  }
+
+}