From 74f87e4a51226a907cd4d6a564dd6433774cf52d Mon Sep 17 00:00:00 2001 From: James Baiera Date: Thu, 19 Mar 2020 13:15:01 -0400 Subject: [PATCH] Fix BlockAwareJsonParser skipChildren level accounting. (#1444) The BlockAwareJsonParser is used to set a checkpoint on a JSON stream and exit the current blocks that the cursor is in back to the same level of the content that it was created at. If skipChildren is called to skip over an open array or object, the internal level counter will not be updated to account for the array or object that was skipped over. This PR updates the skipChildren method to check the currentToken and conditionally decrement the internal nested level counter if we ended up at an end of an array or object. --- .../json/BlockAwareJsonParser.java | 1 + .../json/BlockAwareJsonParserTest.java | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/mr/src/main/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParser.java b/mr/src/main/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParser.java index 0e3d3c667..5ff97fdbd 100644 --- a/mr/src/main/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParser.java +++ b/mr/src/main/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParser.java @@ -105,6 +105,7 @@ public Token nextToken() { @Override public void skipChildren() { delegate.skipChildren(); + updateLevelBasedOn(delegate.currentToken()); } @Override diff --git a/mr/src/test/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParserTest.java b/mr/src/test/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParserTest.java index 45354ca50..1fa3b5918 100644 --- a/mr/src/test/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParserTest.java +++ b/mr/src/test/java/org/elasticsearch/hadoop/serialization/json/BlockAwareJsonParserTest.java @@ -176,4 +176,34 @@ public void testSkippingAndEncounterEOF() { blockParser.exitBlock(); assertThat(parser.currentToken(), nullValue()); } + + /** + * We increment the level of nesting in the parser when getting the next token. If that token starts an array or object, + * the "open" counter is incremented. If we then call `skipChildren` instead of iterating to the end of the object, make + * sure that the open counter is decremented. + */ + @Test + public void testExitBlockAfterSkippingChildren() { + String data = "{\"nested\":{\"array\":[\"test\"],\"scalar\":1}}"; + // ^ ! |-------------^ + // ! = skipChildren + Parser parser = new JacksonJsonParser(data.getBytes(Charset.defaultCharset())); + assertThat(parser.nextToken(), equalTo(Parser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(Parser.Token.FIELD_NAME)); + assertThat(parser.text(), equalTo("nested")); + assertThat(parser.nextToken(), equalTo(Parser.Token.START_OBJECT)); + BlockAwareJsonParser blockAwareJsonParser = new BlockAwareJsonParser(parser); + assertThat(blockAwareJsonParser.getLevel(), equalTo(1)); + assertThat(blockAwareJsonParser.nextToken(), equalTo(Parser.Token.FIELD_NAME)); + assertThat(blockAwareJsonParser.text(), equalTo("array")); + assertThat(blockAwareJsonParser.nextToken(), equalTo(Parser.Token.START_ARRAY)); + assertThat(blockAwareJsonParser.getLevel(), equalTo(2)); + blockAwareJsonParser.skipChildren(); + assertThat(blockAwareJsonParser.currentToken(), equalTo(Parser.Token.END_ARRAY)); + assertThat(blockAwareJsonParser.getLevel(), equalTo(1)); + blockAwareJsonParser.exitBlock(); + assertThat(parser.currentToken(), equalTo(Parser.Token.END_OBJECT)); + assertThat(parser.nextToken(), equalTo(Parser.Token.END_OBJECT)); + assertThat(parser.nextToken(), nullValue()); + } } \ No newline at end of file