Skip to content

Commit

Permalink
feat: Added JSONL support to file-import
Browse files Browse the repository at this point in the history
  • Loading branch information
jruaux committed Mar 18, 2024
1 parent 2fdb8a5 commit 127be6b
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

public enum FileExtension {

CSV, TSV, PSV, FW, JSON, XML, GZ
CSV, TSV, PSV, FW, JSON, JSONL, XML

}
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ private ItemReader<Map<String, Object>> reader(Resource resource) {
return FileUtils.xmlReader(resource, Map.class);
case JSON:
return FileUtils.jsonReader(resource, Map.class);
case JSONL:
return FileUtils.jsonlReader(resource);
default:
throw new UnsupportedOperationException("Unsupported file type: " + type);
}
Expand All @@ -188,46 +190,52 @@ private String delimiter(Resource resource) {
if (delimiter != null) {
return delimiter;
}
switch (FileUtils.extension(resource)) {
case CSV:
return DelimitedLineTokenizer.DELIMITER_COMMA;
case PSV:
return PIPE_DELIMITER;
case TSV:
return DelimitedLineTokenizer.DELIMITER_TAB;
default:
throw new IllegalArgumentException("Unknown file extension for " + resource);
FileExtension extension = FileUtils.extension(resource);
if (extension != null) {
switch (extension) {
case CSV:
return DelimitedLineTokenizer.DELIMITER_COMMA;
case PSV:
return PIPE_DELIMITER;
case TSV:
return DelimitedLineTokenizer.DELIMITER_TAB;
default:
throw new UnsupportedOperationException("Unsupported file extension: " + extension);
}
}
throw new IllegalArgumentException("Unknown file extension for " + resource);
}

private FileType type(Resource resource) {
if (fileType != null) {
return fileType;
}
FileExtension extension = FileUtils.extension(resource);
switch (extension) {
case FW:
return FileType.FIXED;
case JSON:
return FileType.JSON;
case XML:
return FileType.XML;
case CSV:
case PSV:
case TSV:
return FileType.DELIMITED;
default:
throw new UnknownFileTypeException("Unknown file extension: " + extension);
if (extension != null) {
switch (extension) {
case FW:
return FileType.FIXED;
case JSON:
return FileType.JSON;
case JSONL:
return FileType.JSONL;
case XML:
return FileType.XML;
case CSV:
case PSV:
case TSV:
return FileType.DELIMITED;
}
}
throw new UnknownFileTypeException("Unknown file extension: " + extension);
}

@SuppressWarnings({ "unchecked", "rawtypes" })
private <T extends Map<String, Object>> FlatFileItemReader<T> flatFileReader(Resource resource,
AbstractLineTokenizer tokenizer) {
private FlatFileItemReader<Map<String, Object>> flatFileReader(Resource resource, AbstractLineTokenizer tokenizer) {
if (!ObjectUtils.isEmpty(fields)) {
tokenizer.setNames(fields.toArray(new String[0]));
}
FlatFileItemReaderBuilder<T> builder = new FlatFileItemReaderBuilder<>();
FlatFileItemReaderBuilder<Map<String, Object>> builder = new FlatFileItemReaderBuilder<>();
builder.resource(resource);
if (fileOptions.getEncoding() != null) {
builder.encoding(fileOptions.getEncoding());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

public enum FileType {

DELIMITED, FIXED, JSON, XML
DELIMITED, FIXED, JSON, JSONL, XML

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand All @@ -19,6 +20,8 @@
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.mapping.JsonLineMapper;
import org.springframework.batch.item.json.JacksonJsonObjectReader;
import org.springframework.batch.item.json.JsonItemReader;
import org.springframework.batch.item.json.builder.JsonItemReaderBuilder;
Expand Down Expand Up @@ -108,7 +111,11 @@ public static FileExtension extension(Resource resource) {
if (extension == null) {
return null;
}
return FileExtension.valueOf(extension.toUpperCase());
try {
return FileExtension.valueOf(extension.toUpperCase());
} catch (Exception e) {
return null;
}
}

private static String extensionGroup(String file, String group) {
Expand Down Expand Up @@ -147,6 +154,13 @@ public static <T> JsonItemReader<T> jsonReader(Resource resource, Class<? super
return builder.build();
}

public static FlatFileItemReader<Map<String, Object>> jsonlReader(Resource resource) {
FlatFileItemReader<Map<String, Object>> reader = new FlatFileItemReader<>();
reader.setLineMapper(new JsonLineMapper());
reader.setResource(resource);
return reader;
}

public static ObjectMapper objectMapper() {
ObjectMapper mapper = new ObjectMapper();
configureMapper(mapper);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

abstract class AbstractFileTests extends AbstractTestBase {

public static final String BEERS_JSON_URL = "https://storage.googleapis.com/jrx/beers.json";
public static final String BUCKET_URL = "https://storage.googleapis.com/jrx/";
public static final String BEERS_JSON_URL = BUCKET_URL + "beers.json";
public static final String BEERS_JSONL_URL = BUCKET_URL + "beers.jsonl";

private static final String ID = "id";
private static final String KEYSPACE = "beer";
Expand Down Expand Up @@ -128,4 +130,27 @@ void fileImportCSVMultiThreaded(TestInfo info) throws Exception {
}
}

@SuppressWarnings("unchecked")
@Test
void fileImportJSONL(TestInfo info) throws Exception {
FileImport executable = new FileImport();
executable.setRedisClientOptions(redisClientOptions());
executable.setFiles(BEERS_JSONL_URL);
HsetBuilder hsetBuilder = new HsetBuilder();
hsetBuilder.setKeyspace(KEYSPACE);
hsetBuilder.setKeyFields(ID);
executable.setOperations(hsetBuilder.build());
executable.setName(name(info));
executable.run();
List<String> keys = commands.keys("*");
assertEquals(6, keys.size());
for (String key : keys) {
Map<String, String> map = commands.hgetall(key);
String id = map.get(ID);
assertEquals(key, KEYSPACE + ":" + id);
}
Map<String, String> beer1 = commands.hgetall(KEYSPACE + ":1");
Assertions.assertEquals("Hocus Pocus", beer1.get("name"));
}

}
6 changes: 6 additions & 0 deletions connectors/riot-file/src/test/resources/beers.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"id": "1","brewery_id": "812","name": "Hocus Pocus","abv": "4.5","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "Our take on a classic summer ale. A toast to weeds, rays, and summer haze. A light, crisp ale for mowing lawns, hitting lazy fly balls, and communing with nature, Hocus Pocus is offered up as a summer sacrifice to clodless days.\n\nIts malty sweetness finishes tart and crisp and is best apprediated with a wedge of orange.","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "Light American Wheat Ale or Lager","cat_name": "Other Style"}
{"id": "6","brewery_id": "1385","name": "Winter Warmer","abv": "5.199999809265137","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "Old Ale","cat_name": "British Ale"}
{"id": "8","brewery_id": "1099","name": "Oatmeal Stout","abv": "5","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "American-Style Stout","cat_name": "North American Ale"}
{"id": "10","brewery_id": "545","name": "Chocolate Stout","abv": "0","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "American-Style Stout","cat_name": "North American Ale"}
{"id": "13","brewery_id": "545","name": "The Kidd Lager","abv": "0","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "German-Style Schwarzbier","cat_name": "German Lager"}
{"id": "14","brewery_id": "1099","name": "Imperial Stout","abv": "7","ibu": "0","srm": "0","upc": "0","filepath": "","descript": "","add_user": "0","last_mod": "2010-07-22 20:00:20 UTC","style_name": "American-Style Stout","cat_name": "North American Ale"}

0 comments on commit 127be6b

Please sign in to comment.