Skip to content

Commit

Permalink
Restore jsonPath and fix it (#12325)
Browse files Browse the repository at this point in the history
This restore the Json traversal library. A bug was introduce in the Json path library, the PR fix it.

In a json schema we can define an enum without specifying a "type" attribute. It wasn't handle in the previous implemantation. We now return a right type in the getType method and process it the same way than the an integer/boolean/string type.
  • Loading branch information
benmoriceau authored Apr 28, 2022
1 parent bb2da42 commit e8813ee
Show file tree
Hide file tree
Showing 43 changed files with 1,232 additions and 433 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@
* returning a list for query results. In addition, we provide helper functions that will just
* return a single value (see: {@link JsonPaths#getSingleValue(JsonNode, String)}). These should
* only be used if it is not possible for a query to return more than one value.
*
* Note: Package private as most uses of JsonPaths seems like they can be hidden inside other
* commons libraries (i.e. Jsons and JsonsSchemas). If this assumption proves incorrect, we can open
* it up.
*/
class JsonPaths {
public class JsonPaths {

private static final Logger LOGGER = LoggerFactory.getLogger(JsonPaths.class);

static final String JSON_PATH_START_CHARACTER = "$";
static final String JSON_PATH_LIST_SPLAT = "[*]";
static final String JSON_PATH_FIELD_SEPARATOR = ".";

// set default configurations at start up to match our JSON setup.
static {
Configuration.setDefaults(new Configuration.Defaults() {
Expand Down Expand Up @@ -82,6 +82,18 @@ public Set<Option> options() {
});
}

public static String empty() {
return JSON_PATH_START_CHARACTER;
}

public static String appendField(final String jsonPath, final String field) {
return jsonPath + JSON_PATH_FIELD_SEPARATOR + field;
}

public static String appendAppendListSplat(final String jsonPath) {
return jsonPath + JSON_PATH_LIST_SPLAT;
}

/*
* This version of the JsonPath Configuration object allows queries to return to the path of values
* instead of the values that were found.
Expand Down
185 changes: 179 additions & 6 deletions airbyte-commons/src/main/java/io/airbyte/commons/json/JsonSchemas.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,51 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.io.IOs;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.commons.util.MoreIterators;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.extern.slf4j.Slf4j;

// todo (cgardens) - we need the ability to identify jsonschemas that Airbyte considers invalid for
// a connector (e.g. "not" keyword).
@Slf4j
public class JsonSchemas {

private static final String JSON_SCHEMA_ENUM_KEY = "enum";
private static final String JSON_SCHEMA_TYPE_KEY = "type";
private static final String JSON_SCHEMA_PROPERTIES_KEY = "properties";
private static final String JSON_SCHEMA_ITEMS_KEY = "items";

// all JSONSchema types.
private static final String ARRAY_TYPE = "array";
private static final String OBJECT_TYPE = "object";
private static final String STRING_TYPE = "string";
private static final String NUMBER_TYPE = "number";
private static final String BOOLEAN_TYPE = "boolean";
private static final String NULL_TYPE = "null";
private static final String ONE_OF_TYPE = "oneOf";
private static final String ALL_OF_TYPE = "allOf";
private static final String ANY_OF_TYPE = "anyOf";

private static final String ARRAY_JSON_PATH = "[]";

private static final Set<String> COMPOSITE_KEYWORDS = Set.of(ONE_OF_TYPE, ALL_OF_TYPE, ANY_OF_TYPE);

/**
* JsonSchema supports to ways of declaring type. `type: "string"` and `type: ["null", "string"]`.
* This method will mutate a JsonNode with a type field so that the output type is the array
Expand All @@ -25,16 +61,16 @@ public class JsonSchemas {
* @param jsonNode - a json object with children that contain types.
*/
public static void mutateTypeToArrayStandard(final JsonNode jsonNode) {
if (jsonNode.get("type") != null && !jsonNode.get("type").isArray()) {
final JsonNode type = jsonNode.get("type");
((ObjectNode) jsonNode).putArray("type").add(type);
if (jsonNode.get(JSON_SCHEMA_TYPE_KEY) != null && !jsonNode.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
final JsonNode type = jsonNode.get(JSON_SCHEMA_TYPE_KEY);
((ObjectNode) jsonNode).putArray(JSON_SCHEMA_TYPE_KEY).add(type);
}
}

/*
* JsonReferenceProcessor relies on all of the json in consumes being in a file system (not in a
* jar). This method copies all of the json configs out of the jar into a temporary directory so
* that JsonReferenceProcessor can find them.
* JsonReferenceProcessor relies on all the json in consumes being in a file system (not in a jar).
* This method copies all the json configs out of the jar into a temporary directory so that
* JsonReferenceProcessor can find them.
*/
public static <T> Path prepareSchemas(final String resourceDir, final Class<T> klass) {
try {
Expand All @@ -59,4 +95,141 @@ public static <T> Path prepareSchemas(final String resourceDir, final Class<T> k
}
}

public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiConsumer<JsonNode, String> consumer) {
traverseJsonSchemaInternal(jsonSchemaNode, JsonPaths.empty(), consumer);
}

/**
* Traverse a JsonSchema object. At each node, optionally map a value.
*
* @param jsonSchema - JsonSchema object to traverse
* @param mapper - accepts the current node and the path to that node. if it returns an empty
* optional, nothing will be collected, otherwise, whatever is returned will be collected and
* returned by the final collection.
* @param <T> - type of objects being collected
* @return - collection of all items that were collected during the traversal. Returns a { @link
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
* make sense.
*/
public static <T> Collection<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema, final BiFunction<JsonNode, String, Optional<T>> mapper) {
final List<T> collectors = new ArrayList<>();
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collectors::add));
return collectors;
}

/**
* Traverses a JsonSchema object. It returns the path to each node that meet the provided condition.
* The paths are return in JsonPath format
*
* @param obj - JsonSchema object to traverse
* @param predicate - predicate to determine if the path for a node should be collected.
* @return - collection of all paths that were collected during the traversal.
*/
public static Set<String> collectJsonPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
return new HashSet<>(traverseJsonSchemaWithCollector(obj, (node, path) -> {
if (predicate.test(node)) {
return Optional.of(path);
} else {
return Optional.empty();
}
}));
}

/**
* Recursive, depth-first implementation of { @link JsonSchemas#traverseJsonSchema(final JsonNode
* jsonNode, final BiConsumer<JsonNode, List<String>> consumer) }. Takes path as argument so that
* the path can be passsed to the consumer.
*
* @param jsonSchemaNode - jsonschema object to traverse.
* @param path - path from the first call of traverseJsonSchema to the current node.
* @param consumer - consumer to be called at each node. it accepts the current node and the path to
* the node from the root of the object passed at the root level invocation
*/
// todo (cgardens) - replace with easier to understand traversal logic from SecretsHelper.
private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
final String path,
final BiConsumer<JsonNode, String> consumer) {
if (!jsonSchemaNode.isObject()) {
throw new IllegalArgumentException(String.format("json schema nodes should always be object nodes. path: %s actual: %s", path, jsonSchemaNode));
}

consumer.accept(jsonSchemaNode, path);
// if type is missing assume object. not official JsonSchema, but it seems to be a common
// compromise.
final List<String> nodeTypes = getTypeOrObject(jsonSchemaNode);

for (final String nodeType : nodeTypes) {
switch (nodeType) {
// case BOOLEAN_TYPE, NUMBER_TYPE, STRING_TYPE, NULL_TYPE -> do nothing after consumer.accept above.
case ARRAY_TYPE -> {
final String newPath = JsonPaths.appendAppendListSplat(path);
// hit every node.
// log.error("array: " + jsonSchemaNode);
traverseJsonSchemaInternal(jsonSchemaNode.get(JSON_SCHEMA_ITEMS_KEY), newPath, consumer);
}
case OBJECT_TYPE -> {
final Optional<String> comboKeyWordOptional = getKeywordIfComposite(jsonSchemaNode);
if (jsonSchemaNode.has(JSON_SCHEMA_PROPERTIES_KEY)) {
for (final Iterator<Entry<String, JsonNode>> it = jsonSchemaNode.get(JSON_SCHEMA_PROPERTIES_KEY).fields(); it.hasNext();) {
final Entry<String, JsonNode> child = it.next();
final String newPath = JsonPaths.appendField(path, child.getKey());
// log.error("obj1: " + jsonSchemaNode);
traverseJsonSchemaInternal(child.getValue(), newPath, consumer);
}
} else if (comboKeyWordOptional.isPresent()) {
for (final JsonNode arrayItem : jsonSchemaNode.get(comboKeyWordOptional.get())) {
// log.error("obj2: " + jsonSchemaNode);
traverseJsonSchemaInternal(arrayItem, path, consumer);
}
} else {
throw new IllegalArgumentException(
"malformed JsonSchema object type, must have one of the following fields: properties, oneOf, allOf, anyOf in " + jsonSchemaNode);
}
}
}
}
}

/**
* If the object uses JSONSchema composite functionality (e.g. oneOf, anyOf, allOf), detect it and
* return which one it is using.
*
* @param node - object to detect use of composite functionality.
* @return the composite functionality being used, if not using composite functionality, empty.
*/
private static Optional<String> getKeywordIfComposite(final JsonNode node) {
for (final String keyWord : COMPOSITE_KEYWORDS) {
if (node.has(keyWord)) {
return Optional.ofNullable(keyWord);
}
}
return Optional.empty();
}

public static List<String> getTypeOrObject(final JsonNode jsonNode) {
final List<String> types = getType(jsonNode);
if (types.isEmpty()) {
return List.of(OBJECT_TYPE);
} else {
return types;
}
}

public static List<String> getType(final JsonNode jsonNode) {
if (jsonNode.has(JSON_SCHEMA_TYPE_KEY)) {
if (jsonNode.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
return MoreIterators.toList(jsonNode.get(JSON_SCHEMA_TYPE_KEY).iterator())
.stream()
.map(JsonNode::asText)
.collect(Collectors.toList());
} else {
return List.of(jsonNode.get(JSON_SCHEMA_TYPE_KEY).asText());
}
}
if (jsonNode.has(JSON_SCHEMA_ENUM_KEY)) {
return List.of(STRING_TYPE);
}
return Collections.emptyList();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright (c) 2021 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.commons.json;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;

import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.commons.resources.MoreResources;
import java.io.IOException;
import java.util.function.BiConsumer;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.InOrder;
import org.mockito.Mockito;

class JsonSchemasTest {

@Test
void testMutateTypeToArrayStandard() {
final JsonNode expectedWithoutType = Jsons.deserialize("{\"test\":\"abc\"}");
final JsonNode actualWithoutType = Jsons.clone(expectedWithoutType);
JsonSchemas.mutateTypeToArrayStandard(expectedWithoutType);
assertEquals(expectedWithoutType, actualWithoutType);

final JsonNode expectedWithArrayType = Jsons.deserialize("{\"test\":\"abc\", \"type\":[\"object\"]}");
final JsonNode actualWithArrayType = Jsons.clone(expectedWithArrayType);
JsonSchemas.mutateTypeToArrayStandard(actualWithArrayType);
assertEquals(expectedWithoutType, actualWithoutType);

final JsonNode expectedWithoutArrayType = Jsons.deserialize("{\"test\":\"abc\", \"type\":[\"object\"]}");
final JsonNode actualWithStringType = Jsons.deserialize("{\"test\":\"abc\", \"type\":\"object\"}");
JsonSchemas.mutateTypeToArrayStandard(actualWithStringType);
assertEquals(expectedWithoutArrayType, actualWithStringType);
}

@SuppressWarnings("unchecked")
@Test
void testTraverse() throws IOException {
final JsonNode jsonWithAllTypes = Jsons.deserialize(MoreResources.readResource("json_schemas/json_with_all_types.json"));
final BiConsumer<JsonNode, String> mock = mock(BiConsumer.class);

JsonSchemas.traverseJsonSchema(jsonWithAllTypes, mock);
final InOrder inOrder = Mockito.inOrder(mock);
inOrder.verify(mock).accept(jsonWithAllTypes, JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("name"), "$.name");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("name").get("properties").get("first"), "$.name.first");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("name").get("properties").get("last"), "$.name.last");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("company"), "$.company");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("pets"), "$.pets");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("pets").get("items"), "$.pets[*]");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("pets").get("items").get("properties").get("type"), "$.pets[*].type");
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("pets").get("items").get("properties").get("number"), "$.pets[*].number");
inOrder.verifyNoMoreInteractions();
}

@SuppressWarnings("unchecked")
@ValueSource(strings = {
"anyOf",
"oneOf",
"allOf"
})
@ParameterizedTest
void testTraverseComposite(final String compositeKeyword) throws IOException {
final String jsonSchemaString = MoreResources.readResource("json_schemas/composite_json_schema.json")
.replaceAll("<composite-placeholder>", compositeKeyword);
final JsonNode jsonWithAllTypes = Jsons.deserialize(jsonSchemaString);
final BiConsumer<JsonNode, String> mock = mock(BiConsumer.class);

JsonSchemas.traverseJsonSchema(jsonWithAllTypes, mock);

final InOrder inOrder = Mockito.inOrder(mock);
inOrder.verify(mock).accept(jsonWithAllTypes, JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(0), JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(1), JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(1).get("properties").get("prop1"), "$.prop1");
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(2), JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(2).get("items"), "$[*]");
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(3).get(compositeKeyword).get(0), JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(3).get(compositeKeyword).get(1), JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(3).get(compositeKeyword).get(1).get("items"), "$[*]");
inOrder.verifyNoMoreInteractions();
}

@SuppressWarnings("unchecked")
@Test
void testTraverseMultiType() throws IOException {
final JsonNode jsonWithAllTypes = Jsons.deserialize(MoreResources.readResource("json_schemas/json_with_array_type_fields.json"));
final BiConsumer<JsonNode, String> mock = mock(BiConsumer.class);

JsonSchemas.traverseJsonSchema(jsonWithAllTypes, mock);
final InOrder inOrder = Mockito.inOrder(mock);
inOrder.verify(mock).accept(jsonWithAllTypes, JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get("properties").get("company"), "$.company");
inOrder.verify(mock).accept(jsonWithAllTypes.get("items"), "$[*]");
inOrder.verify(mock).accept(jsonWithAllTypes.get("items").get("properties").get("user"), "$[*].user");
inOrder.verifyNoMoreInteractions();
}

@SuppressWarnings("unchecked")
@Test
void testTraverseMultiTypeComposite() throws IOException {
final String compositeKeyword = "anyOf";
final JsonNode jsonWithAllTypes = Jsons.deserialize(MoreResources.readResource("json_schemas/json_with_array_type_fields_with_composites.json"));
final BiConsumer<JsonNode, String> mock = mock(BiConsumer.class);

JsonSchemas.traverseJsonSchema(jsonWithAllTypes, mock);

final InOrder inOrder = Mockito.inOrder(mock);
inOrder.verify(mock).accept(jsonWithAllTypes, JsonPaths.empty());
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(0).get("properties").get("company"), "$.company");
inOrder.verify(mock).accept(jsonWithAllTypes.get(compositeKeyword).get(1).get("properties").get("organization"), "$.organization");
inOrder.verify(mock).accept(jsonWithAllTypes.get("items"), "$[*]");
inOrder.verify(mock).accept(jsonWithAllTypes.get("items").get("properties").get("user"), "$[*].user");
inOrder.verifyNoMoreInteractions();
}

}
Loading

0 comments on commit e8813ee

Please sign in to comment.