Skip to content

Commit

Permalink
Move useLegacyVersion to OrcWriterOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
findepi committed Apr 13, 2021
1 parent 4407c99 commit 7098233
Show file tree
Hide file tree
Showing 15 changed files with 31 additions and 25 deletions.
3 changes: 1 addition & 2 deletions lib/trino-orc/src/main/java/io/trino/orc/OrcWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ public OrcWriter(
ColumnMetadata<OrcType> orcTypes,
CompressionKind compression,
OrcWriterOptions options,
boolean writeLegacyVersion,
Map<String, String> userMetadata,
boolean validate,
OrcWriteValidationMode validationMode,
Expand All @@ -162,7 +161,7 @@ public OrcWriter(

this.userMetadata.putAll(requireNonNull(userMetadata, "userMetadata is null"));
this.userMetadata.put(PRESTO_ORC_WRITER_VERSION_METADATA_KEY, PRESTO_ORC_WRITER_VERSION);
this.metadataWriter = new CompressedMetadataWriter(new OrcMetadataWriter(writeLegacyVersion), compression, maxCompressionBufferSize);
this.metadataWriter = new CompressedMetadataWriter(new OrcMetadataWriter(options.isUseLegacyVersion()), compression, maxCompressionBufferSize);
this.stats = requireNonNull(stats, "stats is null");

requireNonNull(columnNames, "columnNames is null");
Expand Down
25 changes: 25 additions & 0 deletions lib/trino-orc/src/main/java/io/trino/orc/OrcWriterOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class OrcWriterOptions
private static final int DEFAULT_ROW_GROUP_MAX_ROW_COUNT = 10_000;
private static final DataSize DEFAULT_DICTIONARY_MAX_MEMORY = DataSize.of(16, MEGABYTE);

private final boolean useLegacyVersion;
private final DataSize stripeMinSize;
private final DataSize stripeMaxSize;
private final int stripeMaxRowCount;
Expand All @@ -51,6 +52,7 @@ public class OrcWriterOptions
public OrcWriterOptions()
{
this(
false,
DEFAULT_STRIPE_MIN_SIZE,
DEFAULT_STRIPE_MAX_SIZE,
DEFAULT_STRIPE_MAX_ROW_COUNT,
Expand All @@ -63,6 +65,7 @@ public OrcWriterOptions()
}

private OrcWriterOptions(
boolean useLegacyVersion,
DataSize stripeMinSize,
DataSize stripeMaxSize,
int stripeMaxRowCount,
Expand All @@ -83,6 +86,7 @@ private OrcWriterOptions(
requireNonNull(bloomFilterColumns, "bloomFilterColumns is null");
checkArgument(bloomFilterFpp > 0.0 && bloomFilterFpp < 1.0, "bloomFilterFpp should be > 0.0 & < 1.0");

this.useLegacyVersion = useLegacyVersion;
this.stripeMinSize = stripeMinSize;
this.stripeMaxSize = stripeMaxSize;
this.stripeMaxRowCount = stripeMaxRowCount;
Expand All @@ -94,6 +98,18 @@ private OrcWriterOptions(
this.bloomFilterFpp = bloomFilterFpp;
}

public boolean isUseLegacyVersion()
{
return useLegacyVersion;
}

public OrcWriterOptions withUseLegacyVersion(boolean useLegacyVersion)
{
return builderFrom(this)
.setUseLegacyVersion(useLegacyVersion)
.build();
}

public DataSize getStripeMinSize()
{
return stripeMinSize;
Expand Down Expand Up @@ -230,6 +246,7 @@ public static Builder builderFrom(OrcWriterOptions options)

public static final class Builder
{
private boolean useLegacyVersion;
private DataSize stripeMinSize;
private DataSize stripeMaxSize;
private int stripeMaxRowCount;
Expand All @@ -244,6 +261,7 @@ private Builder(OrcWriterOptions options)
{
requireNonNull(options, "options is null");

this.useLegacyVersion = options.useLegacyVersion;
this.stripeMinSize = options.stripeMinSize;
this.stripeMaxSize = options.stripeMaxSize;
this.stripeMaxRowCount = options.stripeMaxRowCount;
Expand All @@ -255,6 +273,12 @@ private Builder(OrcWriterOptions options)
this.bloomFilterFpp = options.bloomFilterFpp;
}

public Builder setUseLegacyVersion(boolean useLegacyVersion)
{
this.useLegacyVersion = useLegacyVersion;
return this;
}

public Builder setStripeMinSize(DataSize stripeMinSize)
{
this.stripeMinSize = stripeMinSize;
Expand Down Expand Up @@ -312,6 +336,7 @@ public Builder setBloomFilterFpp(double bloomFilterFpp)
public OrcWriterOptions build()
{
return new OrcWriterOptions(
useLegacyVersion,
stripeMinSize,
stripeMaxSize,
stripeMaxRowCount,
Expand Down
2 changes: 0 additions & 2 deletions lib/trino-orc/src/test/java/io/trino/orc/OrcTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,6 @@ public static void writeOrcPages(File outputFile, CompressionKind compression, L
OrcType.createRootOrcType(columnNames, types),
compression,
new OrcWriterOptions(),
false,
ImmutableMap.of(),
true,
BOTH,
Expand All @@ -645,7 +644,6 @@ public static void writeOrcColumnTrino(File outputFile, CompressionKind compress
OrcType.createRootOrcType(columnNames, types),
compression,
new OrcWriterOptions(),
false,
ImmutableMap.of(),
true,
BOTH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ public void testWriteOutputStreamsInOrder()
.withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE)
.withDictionaryMaxMemory(DataSize.of(32, MEGABYTE))
.withBloomFilterColumns(ImmutableSet.copyOf(columnNames)),
false,
ImmutableMap.of(),
true,
validationMode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ private void write(TempFile tempFile, Type writerType, List<String> data)
.withStripeMaxRowCount(ORC_STRIPE_SIZE)
.withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE)
.withDictionaryMaxMemory(DataSize.of(32, MEGABYTE)),
false,
ImmutableMap.of(),
true,
BOTH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ public OrcFileWriter(
ColumnMetadata<OrcType> fileColumnOrcTypes,
CompressionKind compression,
OrcWriterOptions options,
boolean writeLegacyVersion,
int[] fileInputColumnIndexes,
Map<String, String> metadata,
Optional<Supplier<OrcDataSource>> validationInputFactory,
Expand Down Expand Up @@ -123,7 +122,6 @@ public OrcFileWriter(
fileColumnOrcTypes,
compression,
options,
writeLegacyVersion,
metadata,
validationInputFactory.isPresent(),
validationMode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,19 @@ public class OrcFileWriterFactory
private final FileFormatDataSourceStats readStats;
private final OrcWriterStats stats = new OrcWriterStats();
private final OrcWriterOptions orcWriterOptions;
private final boolean writeLegacyVersion;

@Inject
public OrcFileWriterFactory(
HdfsEnvironment hdfsEnvironment,
TypeManager typeManager,
NodeVersion nodeVersion,
OrcWriterConfig orcWriterConfig,
FileFormatDataSourceStats readStats,
OrcWriterConfig config)
{
this(
hdfsEnvironment,
typeManager,
nodeVersion,
requireNonNull(orcWriterConfig, "orcWriterConfig is null").isUseLegacyVersion(),
readStats,
requireNonNull(config, "config is null").toOrcWriterOptions());
}
Expand All @@ -109,14 +106,12 @@ public OrcFileWriterFactory(
HdfsEnvironment hdfsEnvironment,
TypeManager typeManager,
NodeVersion nodeVersion,
boolean writeLegacyVersion,
FileFormatDataSourceStats readStats,
OrcWriterOptions orcWriterOptions)
{
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
this.writeLegacyVersion = writeLegacyVersion;
this.readStats = requireNonNull(readStats, "readStats is null");
this.orcWriterOptions = requireNonNull(orcWriterOptions, "orcWriterOptions is null");
}
Expand Down Expand Up @@ -215,7 +210,6 @@ public Optional<FileWriter> createFileWriter(
.withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session))
.withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session))
.withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)),
writeLegacyVersion,
fileInputColumnIndexes,
ImmutableMap.<String, String>builder()
.put(PRESTO_VERSION_NAME, nodeVersion.toString())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public class OrcWriterConfig
private OrcWriterOptions options = new OrcWriterOptions();

private double defaultBloomFilterFpp = 0.05;
private boolean useLegacyVersion;
private double validationPercentage;
private OrcWriteValidationMode validationMode = OrcWriteValidationMode.BOTH;

Expand Down Expand Up @@ -139,14 +138,14 @@ public OrcWriterConfig setDefaultBloomFilterFpp(double defaultBloomFilterFpp)

public boolean isUseLegacyVersion()
{
return useLegacyVersion;
return options.isUseLegacyVersion();
}

@Config("hive.orc.writer.use-legacy-version-number")
@ConfigDescription("Write ORC files with a version number that is readable by Hive 2.0.0 to 2.2.0")
public OrcWriterConfig setUseLegacyVersion(boolean useLegacyVersion)
{
this.useLegacyVersion = useLegacyVersion;
this.options = options.withUseLegacyVersion(useLegacyVersion);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ private static OrcWriter createOrcFileWriter(OrcDataSink sink, List<Type> types)
.withMaxStringStatisticsLimit(DataSize.ofBytes(0))
.withStripeMinSize(DataSize.of(64, MEGABYTE))
.withDictionaryMaxMemory(DataSize.of(1, MEGABYTE)),
false,
ImmutableMap.of(),
false,
OrcWriteValidationMode.BOTH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ private static OrcFileWriterFactory getDefaultOrcFileWriterFactory(HdfsEnvironme
hdfsEnvironment,
TYPE_MANAGER,
new NodeVersion("test_version"),
new OrcWriterConfig(),
new FileFormatDataSourceStats(),
new OrcWriterConfig());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ public void testOrcOptimizedWriter(int rowCount, long fileSizePadding)
.withRowsCount(rowCount)
.withSession(session)
.withFileSizePadding(fileSizePadding)
.withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), false, STATS, new OrcWriterOptions()))
.withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions()))
.isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT))
.isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,6 @@ public PrestoOrcFormatWriter(File targetFile, List<String> columnNames, List<Typ
OrcType.createRootOrcType(columnNames, types),
compressionCodec.getOrcCompressionKind(),
new OrcWriterOptions(),
false,
ImmutableMap.of(),
false,
BOTH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ private void testOrcPredicates(ConnectorSession session)
file.delete();
try {
// Write data
OrcFileWriterFactory writerFactory = new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), false, STATS, new OrcWriterOptions());
OrcFileWriterFactory writerFactory = new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions());
FileSplit split = createTestFileTrino(file.getAbsolutePath(), ORC, HiveCompressionCodec.NONE, columnsToWrite, session, NUM_ROWS, writerFactory);

TupleDomain<TestColumn> testingPredicate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ private IcebergFileWriter createOrcWriter(
.withStripeMaxRowCount(getOrcWriterMaxStripeRows(session))
.withDictionaryMaxMemory(getOrcWriterMaxDictionaryMemory(session))
.withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)),
false,
IntStream.range(0, fileColumnNames.size()).toArray(),
ImmutableMap.<String, String>builder()
.put(PRESTO_VERSION_NAME, nodeVersion.toString())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,13 @@ public IcebergOrcFileWriter(
ColumnMetadata<OrcType> fileColumnOrcTypes,
CompressionKind compression,
OrcWriterOptions options,
boolean writeLegacyVersion,
int[] fileInputColumnIndexes,
Map<String, String> metadata,
Optional<Supplier<OrcDataSource>> validationInputFactory,
OrcWriteValidation.OrcWriteValidationMode validationMode,
OrcWriterStats stats)
{
super(orcDataSink, WriterKind.INSERT, NO_ACID_TRANSACTION, false, OptionalInt.empty(), rollbackAction, columnNames, fileColumnTypes, fileColumnOrcTypes, compression, options, writeLegacyVersion, fileInputColumnIndexes, metadata, validationInputFactory, validationMode, stats);
super(orcDataSink, WriterKind.INSERT, NO_ACID_TRANSACTION, false, OptionalInt.empty(), rollbackAction, columnNames, fileColumnTypes, fileColumnOrcTypes, compression, options, fileInputColumnIndexes, metadata, validationInputFactory, validationMode, stats);
this.icebergSchema = requireNonNull(icebergSchema, "icebergSchema is null");
orcColumns = fileColumnOrcTypes;
}
Expand Down

0 comments on commit 7098233

Please sign in to comment.