Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor PipelinesDataFlowModelParser to take in an InputStream instead of a file path #4289

Merged
merged 2 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import org.opensearch.dataprepper.parser.PipelineTransformer;
import org.opensearch.dataprepper.parser.model.DataPrepperConfiguration;
import org.opensearch.dataprepper.peerforwarder.PeerForwarderProvider;
import org.opensearch.dataprepper.pipeline.parser.PipelineConfigurationFileReader;
import org.opensearch.dataprepper.pipeline.parser.PipelineConfigurationReader;
import org.opensearch.dataprepper.pipeline.parser.PipelinesDataflowModelParser;
import org.opensearch.dataprepper.pipeline.router.RouterFactory;
import org.opensearch.dataprepper.sourcecoordination.SourceCoordinatorFactory;
Expand Down Expand Up @@ -46,9 +48,15 @@ public PipelineTransformer pipelineParser(
}

@Bean
public PipelinesDataflowModelParser pipelinesDataflowModelParser(
public PipelineConfigurationReader pipelineConfigurationReader(
final FileStructurePathProvider fileStructurePathProvider) {
return new PipelinesDataflowModelParser(fileStructurePathProvider.getPipelineConfigFileLocation());
return new PipelineConfigurationFileReader(fileStructurePathProvider.getPipelineConfigFileLocation());
}

@Bean
public PipelinesDataflowModelParser pipelinesDataflowModelParser(
final PipelineConfigurationReader pipelineConfigurationReader) {
return new PipelinesDataflowModelParser(pipelineConfigurationReader);
}

@Bean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.opensearch.dataprepper.peerforwarder.PeerForwarderProvider;
import org.opensearch.dataprepper.peerforwarder.PeerForwarderReceiveBuffer;
import org.opensearch.dataprepper.pipeline.Pipeline;
import org.opensearch.dataprepper.pipeline.parser.PipelineConfigurationFileReader;
import org.opensearch.dataprepper.pipeline.parser.PipelinesDataflowModelParser;
import org.opensearch.dataprepper.pipeline.router.RouterFactory;
import org.opensearch.dataprepper.plugin.DefaultPluginFactory;
Expand Down Expand Up @@ -115,8 +116,9 @@ void tearDown() {
}

private PipelineTransformer createObjectUnderTest(final String pipelineConfigurationFileLocation) {

final PipelinesDataFlowModel pipelinesDataFlowModel = new PipelinesDataflowModelParser(
pipelineConfigurationFileLocation).parseConfiguration();
new PipelineConfigurationFileReader(pipelineConfigurationFileLocation)).parseConfiguration();
return new PipelineTransformer(pipelinesDataFlowModel, pluginFactory, peerForwarderProvider,
routerFactory, dataPrepperConfiguration, circuitBreakerManager, eventFactory,
acknowledgementSetManager, sourceCoordinatorFactory);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package org.opensearch.dataprepper.pipeline.parser;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.lang.String.format;

public class PipelineConfigurationFileReader implements PipelineConfigurationReader {
private static final Logger LOG = LoggerFactory.getLogger(PipelineConfigurationFileReader.class);
private final String pipelineConfigurationFileLocation;

public PipelineConfigurationFileReader(final String pipelineConfigurationFileLocation) {
this.pipelineConfigurationFileLocation = pipelineConfigurationFileLocation;
}

@Override
public List<InputStream> getPipelineConfigurationInputStreams() {
return getInputStreamsForConfigurationFiles();
}

private List<InputStream> getInputStreamsForConfigurationFiles() {
final File configurationLocation = new File(pipelineConfigurationFileLocation);

if (configurationLocation.isFile()) {
final List<InputStream> inputStreams = Stream.of(configurationLocation).map(this::getInputStreamForFile)
.filter(Objects::nonNull).collect(Collectors.toList());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I think we should throw an exception since we are looking for a single file.


if (inputStreams.size() != 1) {
throw new ParseException(format("Pipeline configuration file not loadable at %s", configurationLocation.getName()));
}
return inputStreams;
} else if (configurationLocation.isDirectory()) {
FileFilter yamlFilter = pathname -> (pathname.getName().endsWith(".yaml") || pathname.getName().endsWith(".yml"));
List<InputStream> inputStreams = Stream.of(configurationLocation.listFiles(yamlFilter))
.map(this::getInputStreamForFile)
.filter(Objects::nonNull)
.collect(Collectors.toList());

if (inputStreams.isEmpty()) {
LOG.error("Pipelines configuration file not found at {}", pipelineConfigurationFileLocation);
throw new ParseException(
format("Pipelines configuration file not found at %s", pipelineConfigurationFileLocation));
}

return inputStreams;
} else {
LOG.error("Pipelines configuration file not found at {}", pipelineConfigurationFileLocation);
throw new ParseException(format("Pipelines configuration file not found at %s", pipelineConfigurationFileLocation));
}
}

private InputStream getInputStreamForFile(final File pipelineConfigurationFile) {

try {
return new FileInputStream(pipelineConfigurationFile);
} catch (IOException e) {
LOG.warn("Unable to load pipeline configuration file {}", pipelineConfigurationFile.getName());
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.opensearch.dataprepper.pipeline.parser;

import java.io.InputStream;
import java.util.List;

public interface PipelineConfigurationReader {

/**
*
* @return a List of InputStream that contains each of the pipeline configurations.
* the caller of this method is responsible for closing these input streams after they are used
*/
List<InputStream> getPipelineConfigurationInputStreams();
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.lang.String.format;

Expand All @@ -34,14 +29,14 @@ public class PipelinesDataflowModelParser {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory())
.enable(DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY);

private final String pipelineConfigurationFileLocation;
private final PipelineConfigurationReader pipelineConfigurationReader;

public PipelinesDataflowModelParser(final String pipelineConfigurationFileLocation) {
this.pipelineConfigurationFileLocation = pipelineConfigurationFileLocation;
public PipelinesDataflowModelParser(final PipelineConfigurationReader pipelineConfigurationReader) {
this.pipelineConfigurationReader = pipelineConfigurationReader;
}

public PipelinesDataFlowModel parseConfiguration() {
final List<PipelinesDataFlowModel> pipelinesDataFlowModels = parsePipelineConfigurationFiles();
final List<PipelinesDataFlowModel> pipelinesDataFlowModels = parseStreamsToPipelinesDataFlowModel();
return mergePipelinesDataModels(pipelinesDataFlowModels);
}

Expand All @@ -53,35 +48,14 @@ private void validateDataPrepperVersion(final DataPrepperVersion version) {
}
}

private List<PipelinesDataFlowModel> parsePipelineConfigurationFiles() {
final File configurationLocation = new File(pipelineConfigurationFileLocation);

if (configurationLocation.isFile()) {
return Stream.of(configurationLocation).map(this::parsePipelineConfigurationFile)
.filter(Objects::nonNull).collect(Collectors.toList());
} else if (configurationLocation.isDirectory()) {
FileFilter yamlFilter = pathname -> (pathname.getName().endsWith(".yaml") || pathname.getName().endsWith(".yml"));
List<PipelinesDataFlowModel> pipelinesDataFlowModels = Stream.of(configurationLocation.listFiles(yamlFilter))
.map(this::parsePipelineConfigurationFile)
.filter(Objects::nonNull)
.collect(Collectors.toList());

if (pipelinesDataFlowModels.isEmpty()) {
LOG.error("Pipelines configuration file not found at {}", pipelineConfigurationFileLocation);
throw new ParseException(
format("Pipelines configuration file not found at %s", pipelineConfigurationFileLocation));
}

return pipelinesDataFlowModels;
} else {
LOG.error("Pipelines configuration file not found at {}", pipelineConfigurationFileLocation);
throw new ParseException(format("Pipelines configuration file not found at %s", pipelineConfigurationFileLocation));
}
private List<PipelinesDataFlowModel> parseStreamsToPipelinesDataFlowModel() {
return pipelineConfigurationReader.getPipelineConfigurationInputStreams().stream()
.map(this::parseStreamToPipelineDataFlowModel)
.collect(Collectors.toList());
}

private PipelinesDataFlowModel parsePipelineConfigurationFile(final File pipelineConfigurationFile) {
try (final InputStream pipelineConfigurationInputStream = new FileInputStream(pipelineConfigurationFile)) {
LOG.info("Reading pipeline configuration from {}", pipelineConfigurationFile.getName());
private PipelinesDataFlowModel parseStreamToPipelineDataFlowModel(final InputStream configurationInputStream) {
try (final InputStream pipelineConfigurationInputStream = configurationInputStream) {
final PipelinesDataFlowModel pipelinesDataFlowModel = OBJECT_MAPPER.readValue(pipelineConfigurationInputStream,
PipelinesDataFlowModel.class);

Expand All @@ -90,12 +64,7 @@ private PipelinesDataFlowModel parsePipelineConfigurationFile(final File pipelin

return pipelinesDataFlowModel;
} catch (IOException e) {
if (e instanceof FileNotFoundException) {
LOG.warn("Pipeline configuration file {} not found", pipelineConfigurationFile.getName());
return null;
}
LOG.error("Failed to parse the configuration file {}", pipelineConfigurationFileLocation);
throw new ParseException(format("Failed to parse the configuration file %s", pipelineConfigurationFileLocation), e);
throw new ParseException("Failed to parse the configuration", e);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package org.opensearch.dataprepper.pipeline.parser;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;

@ExtendWith(MockitoExtension.class)
public class PipelineConfigurationFileReaderTest {

@TempDir
Path tempDir;

@Test
void getPipelineConfigurationInputStreams_from_directory_with_no_yaml_files_should_throw() {
final PipelineConfigurationReader objectUnderTest =
new PipelineConfigurationFileReader(TestConfigurationProvider.EMPTY_PIPELINE_DIRECTOTRY);


final RuntimeException actualException = assertThrows(RuntimeException.class,
objectUnderTest::getPipelineConfigurationInputStreams);
assertThat(actualException.getMessage(), equalTo(
String.format("Pipelines configuration file not found at %s", TestConfigurationProvider.EMPTY_PIPELINE_DIRECTOTRY)));
}

@Test
void getPipelineConfigurationInputStreams_with_a_configuration_file_which_does_not_exist_should_throw() {
final PipelineConfigurationReader objectUnderTest =
new PipelineConfigurationFileReader("file_does_not_exist.yml");

final RuntimeException actualException = assertThrows(RuntimeException.class,
objectUnderTest::getPipelineConfigurationInputStreams);
assertThat(actualException.getMessage(), equalTo("Pipelines configuration file not found at file_does_not_exist.yml"));
}

@Test
void getPipelineConfigurationInputStreams_with_a_configuration_file_exists_and_is_not_loadable_should_throw() throws IOException {
final String yamlContent = UUID.randomUUID().toString();
final Path file = tempDir.resolve("test-pipeline.yaml");
Files.writeString(file, yamlContent);

file.toFile().setReadable(false, false);

final PipelineConfigurationReader objectUnderTest =
new PipelineConfigurationFileReader(file.toString());

final RuntimeException actualException = assertThrows(RuntimeException.class,
objectUnderTest::getPipelineConfigurationInputStreams);
assertThat(actualException.getMessage(), equalTo("Pipeline configuration file not loadable at test-pipeline.yaml"));
}

@Test
void getPipelineConfigurationInput_streams_from_existing_file() throws IOException {

final String yamlContent = UUID.randomUUID().toString();
final Path file = tempDir.resolve("test-pipeline.yaml");
Files.writeString(file, yamlContent);

final PipelineConfigurationReader objectUnderTest =
new PipelineConfigurationFileReader(file.toString());

final List<InputStream> inputStreams = objectUnderTest.getPipelineConfigurationInputStreams();

assertThat(inputStreams.size(), equalTo(1));

try (final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStreams.get(0), StandardCharsets.UTF_8))) {
final String content = bufferedReader.lines().collect(Collectors.joining(System.lineSeparator()));
assertThat(content, equalTo(yamlContent));
}
}

@Test
void getPipelineConfigurationInput_streams_from_existing_directory() throws IOException {


final String yamlContentPipelineOne = UUID.randomUUID().toString();
final String yamlContentPipelineTwo = UUID.randomUUID().toString();

Files.writeString(tempDir.resolve("test-pipeline-1.yaml"), yamlContentPipelineOne);
Files.writeString(tempDir.resolve("tset-pipeline-2.yml"), yamlContentPipelineTwo);

final PipelineConfigurationReader objectUnderTest =
new PipelineConfigurationFileReader(tempDir.toString());

final List<InputStream> inputStreams = objectUnderTest.getPipelineConfigurationInputStreams();

assertThat(inputStreams.size(), equalTo(2));

try (final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStreams.get(0), StandardCharsets.UTF_8))) {
final String content = bufferedReader.lines().collect(Collectors.joining(System.lineSeparator()));
assertThat(content, equalTo(yamlContentPipelineOne));
}

try (final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStreams.get(1), StandardCharsets.UTF_8))) {
final String content = bufferedReader.lines().collect(Collectors.joining(System.lineSeparator()));
assertThat(content, equalTo(yamlContentPipelineTwo));
}
}
}
Loading
Loading