Skip to content

Commit

Permalink
migrate JsonSchemas to use basic path instead of JSONPath (airbytehq#…
Browse files Browse the repository at this point in the history
  • Loading branch information
cgardens authored Jun 22, 2022
1 parent 94abef3 commit f483396
Show file tree
Hide file tree
Showing 7 changed files with 236 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.jayway.jsonpath.spi.json.JsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import com.jayway.jsonpath.spi.mapper.MappingProvider;
import io.airbyte.commons.json.JsonSchemas.FieldNameOrList;
import io.airbyte.commons.util.MoreIterators;
import java.util.Collections;
import java.util.EnumSet;
Expand Down Expand Up @@ -94,6 +95,20 @@ public static String appendAppendListSplat(final String jsonPath) {
return jsonPath + JSON_PATH_LIST_SPLAT;
}

/**
* Map path produced by {@link JsonSchemas} to the JSONPath format.
*
* @param jsonSchemaPath - path as described in {@link JsonSchemas}
* @return path as JSONPath
*/
public static String mapJsonSchemaPathToJsonPath(final List<FieldNameOrList> jsonSchemaPath) {
String jsonPath = empty();
for (final FieldNameOrList fieldNameOrList : jsonSchemaPath) {
jsonPath = fieldNameOrList.isList() ? appendAppendListSplat(jsonPath) : appendField(jsonPath, fieldNameOrList.getFieldName());
}
return jsonPath;
}

/*
* This version of the JsonPath Configuration object allows queries to return to the path of values
* instead of the values that were found.
Expand Down
166 changes: 136 additions & 30 deletions airbyte-commons/src/main/java/io/airbyte/commons/json/JsonSchemas.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Preconditions;
import io.airbyte.commons.io.IOs;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.commons.util.MoreIterators;
import io.airbyte.commons.util.MoreLists;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -95,8 +96,33 @@ public static <T> Path prepareSchemas(final String resourceDir, final Class<T> k
}
}

public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiConsumer<JsonNode, String> consumer) {
traverseJsonSchemaInternal(jsonSchemaNode, JsonPaths.empty(), consumer);
/**
* Traverse a JsonSchema object. The provided consumer will be called at each node with the node and
* the path to the node.
*
* @param jsonSchema - JsonSchema object to traverse
* @param consumer - accepts the current node and the path to that node.
*/
public static void traverseJsonSchema(final JsonNode jsonSchema, final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
traverseJsonSchemaInternal(jsonSchema, new ArrayList<>(), consumer);
}

/**
* Traverse a JsonSchema object. At each node, map a value.
*
* @param jsonSchema - JsonSchema object to traverse
* @param mapper - accepts the current node and the path to that node. whatever is returned will be
* collected and returned by the final collection.
* @param <T> - type of objects being collected
* @return - collection of all items that were collected during the traversal. Returns a { @link
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
* make sense.
*/
public static <T> List<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema,
final BiFunction<JsonNode, List<FieldNameOrList>, T> mapper) {
// for the sake of code reuse, use the filtered collector method but makes sure the filter always
// returns true.
return traverseJsonSchemaWithFilteredCollector(jsonSchema, (node, path) -> Optional.ofNullable(mapper.apply(node, path)));
}

/**
Expand All @@ -111,44 +137,45 @@ public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiCon
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
* make sense.
*/
public static <T> Collection<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema, final BiFunction<JsonNode, String, Optional<T>> mapper) {
final List<T> collectors = new ArrayList<>();
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collectors::add));
return collectors;
public static <T> List<T> traverseJsonSchemaWithFilteredCollector(final JsonNode jsonSchema,
final BiFunction<JsonNode, List<FieldNameOrList>, Optional<T>> mapper) {
final List<T> collector = new ArrayList<>();
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collector::add));
return collector.stream().toList(); // make list unmodifiable
}

/**
* Traverses a JsonSchema object. It returns the path to each node that meet the provided condition.
* The paths are return in JsonPath format
* The paths are return in JsonPath format. The traversal is depth-first search preoorder and values
* are returned in that order.
*
* @param obj - JsonSchema object to traverse
* @param predicate - predicate to determine if the path for a node should be collected.
* @return - collection of all paths that were collected during the traversal.
*/
public static Set<String> collectJsonPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
return new HashSet<>(traverseJsonSchemaWithCollector(obj, (node, path) -> {
public static List<List<FieldNameOrList>> collectPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
return traverseJsonSchemaWithFilteredCollector(obj, (node, path) -> {
if (predicate.test(node)) {
return Optional.of(path);
} else {
return Optional.empty();
}
}));
});
}

/**
* Recursive, depth-first implementation of { @link JsonSchemas#traverseJsonSchema(final JsonNode
* jsonNode, final BiConsumer<JsonNode, List<String>> consumer) }. Takes path as argument so that
* the path can be passsed to the consumer.
* the path can be passed to the consumer.
*
* @param jsonSchemaNode - jsonschema object to traverse.
* @param path - path from the first call of traverseJsonSchema to the current node.
* @param consumer - consumer to be called at each node. it accepts the current node and the path to
* the node from the root of the object passed at the root level invocation
*
*/
// todo (cgardens) - replace with easier to understand traversal logic from SecretsHelper.
private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
final String path,
final BiConsumer<JsonNode, String> consumer) {
final List<FieldNameOrList> path,
final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
if (!jsonSchemaNode.isObject()) {
throw new IllegalArgumentException(String.format("json schema nodes should always be object nodes. path: %s actual: %s", path, jsonSchemaNode));
}
Expand All @@ -162,23 +189,20 @@ private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
switch (nodeType) {
// case BOOLEAN_TYPE, NUMBER_TYPE, STRING_TYPE, NULL_TYPE -> do nothing after consumer.accept above.
case ARRAY_TYPE -> {
final String newPath = JsonPaths.appendAppendListSplat(path);
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.list());
// hit every node.
// log.error("array: " + jsonSchemaNode);
traverseJsonSchemaInternal(jsonSchemaNode.get(JSON_SCHEMA_ITEMS_KEY), newPath, consumer);
}
case OBJECT_TYPE -> {
final Optional<String> comboKeyWordOptional = getKeywordIfComposite(jsonSchemaNode);
if (jsonSchemaNode.has(JSON_SCHEMA_PROPERTIES_KEY)) {
for (final Iterator<Entry<String, JsonNode>> it = jsonSchemaNode.get(JSON_SCHEMA_PROPERTIES_KEY).fields(); it.hasNext();) {
final Entry<String, JsonNode> child = it.next();
final String newPath = JsonPaths.appendField(path, child.getKey());
// log.error("obj1: " + jsonSchemaNode);
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.fieldName(child.getKey()));
traverseJsonSchemaInternal(child.getValue(), newPath, consumer);
}
} else if (comboKeyWordOptional.isPresent()) {
for (final JsonNode arrayItem : jsonSchemaNode.get(comboKeyWordOptional.get())) {
// log.error("obj2: " + jsonSchemaNode);
traverseJsonSchemaInternal(arrayItem, path, consumer);
}
} else {
Expand Down Expand Up @@ -206,30 +230,112 @@ private static Optional<String> getKeywordIfComposite(final JsonNode node) {
return Optional.empty();
}

public static List<String> getTypeOrObject(final JsonNode jsonNode) {
final List<String> types = getType(jsonNode);
/**
* Same logic as {@link #getType(JsonNode)} except when no type is found, it defaults to type:
* Object.
*
* @param jsonSchema - JSONSchema object
* @return type of the node.
*/
public static List<String> getTypeOrObject(final JsonNode jsonSchema) {
final List<String> types = getType(jsonSchema);
if (types.isEmpty()) {
return List.of(OBJECT_TYPE);
} else {
return types;
}
}

public static List<String> getType(final JsonNode jsonNode) {
if (jsonNode.has(JSON_SCHEMA_TYPE_KEY)) {
if (jsonNode.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
return MoreIterators.toList(jsonNode.get(JSON_SCHEMA_TYPE_KEY).iterator())
/**
* Get the type of JSONSchema node. Uses JSONSchema types. Only returns the type of the "top-level"
* node. e.g. if more nodes are nested underneath because it is an object or an array, only the top
* level type is returned.
*
* @param jsonSchema - JSONSchema object
* @return type of the node.
*/
public static List<String> getType(final JsonNode jsonSchema) {
if (jsonSchema.has(JSON_SCHEMA_TYPE_KEY)) {
if (jsonSchema.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
return MoreIterators.toList(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).iterator())
.stream()
.map(JsonNode::asText)
.collect(Collectors.toList());
} else {
return List.of(jsonNode.get(JSON_SCHEMA_TYPE_KEY).asText());
return List.of(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).asText());
}
}
if (jsonNode.has(JSON_SCHEMA_ENUM_KEY)) {
if (jsonSchema.has(JSON_SCHEMA_ENUM_KEY)) {
return List.of(STRING_TYPE);
}
return Collections.emptyList();
}

/**
* Provides a basic scheme for describing the path into a JSON object. Each element in the path is
* either a field name or a list.
*
* This class is helpful in the case where fields can be any UTF-8 string, so the only simple way to
* keep track of the different parts of a path without going crazy with escape characters is to keep
* it in a list with list set aside as a special case.
*
* We prefer using this scheme instead of JSONPath in the tree traversal because, it is easier to
* decompose a path in this scheme than it is in JSONPath. Some callers of the traversal logic want
* to isolate parts of the path easily without the need for complex regex (that would be required if
* we used JSONPath).
*/
public static class FieldNameOrList {

private final String fieldName;
private final boolean isList;

public static FieldNameOrList fieldName(final String fieldName) {
return new FieldNameOrList(fieldName);
}

public static FieldNameOrList list() {
return new FieldNameOrList(null);
}

private FieldNameOrList(final String fieldName) {
isList = fieldName == null;
this.fieldName = fieldName;
}

public String getFieldName() {
Preconditions.checkState(!isList, "cannot return field name, is list node");
return fieldName;
}

public boolean isList() {
return isList;
}

@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (!(o instanceof FieldNameOrList)) {
return false;
}
final FieldNameOrList that = (FieldNameOrList) o;
return isList == that.isList && Objects.equals(fieldName, that.fieldName);
}

@Override
public int hashCode() {
return Objects.hash(fieldName, isList);
}

@Override
public String toString() {
return "FieldNameOrList{" +
"fieldName='" + fieldName + '\'' +
", isList=" + isList +
'}';
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,18 @@ public static <T> List<T> concat(final List<T>... lists) {
return Stream.of(lists).flatMap(List::stream).toList();
}

/**
* Copies provided list and adds the new item to the copy.
*
* @param list list to copy and add to
* @param toAdd item to add
* @param <T> type of list
* @return new list with contents of provided list and the added item
*/
public static <T> List<T> add(final List<T> list, final T toAdd) {
final ArrayList<T> newList = new ArrayList<>(list);
newList.add(toAdd);
return newList;
}

}
Loading

0 comments on commit f483396

Please sign in to comment.