From 713ca1d37b717ccbe88e502ae96df028e116282f Mon Sep 17 00:00:00 2001 From: Artem Barger Date: Wed, 11 Mar 2020 12:34:55 +0200 Subject: [PATCH] wip/importccl support option to compress output files using gzip Signed-off-by: Artem Barger Release note (sql change): support option to compress output files using gzip --- pkg/ccl/importccl/exportcsv.go | 105 +++++++-- pkg/sql/distsql_physical_planner.go | 16 +- pkg/sql/execinfrapb/processors_bulk_io.pb.go | 218 ++++++++++++------- pkg/sql/execinfrapb/processors_bulk_io.proto | 15 +- pkg/sql/export.go | 35 ++- 5 files changed, 285 insertions(+), 104 deletions(-) diff --git a/pkg/ccl/importccl/exportcsv.go b/pkg/ccl/importccl/exportcsv.go index 3ce721db2499..2c722e72af7b 100644 --- a/pkg/ccl/importccl/exportcsv.go +++ b/pkg/ccl/importccl/exportcsv.go @@ -10,8 +10,10 @@ package importccl import ( "bytes" + "compress/gzip" "context" "fmt" + "io" "strings" "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" @@ -31,6 +33,88 @@ import ( const exportFilePatternPart = "%part%" const exportFilePatternDefault = exportFilePatternPart + ".csv" +// csvExporter data structure to augment the compression +// and csv writer, encapsulating the internals to make +// exporting oblivious for the consumers +type csvExporter struct { + compressor io.WriteCloser + buf *bytes.Buffer + csvWriter *csv.Writer +} + +// Write +func (c *csvExporter) Write(record []string) error { + return c.csvWriter.Write(record) +} + +// Close +func (c *csvExporter) Close() error { + if c.compressor != nil { + return c.compressor.Close() + } + return nil +} + +// Flush +func (c *csvExporter) Flush() { + c.csvWriter.Flush() +} + +// ResetBuffer +func (c *csvExporter) ResetBuffer() { + c.buf.Reset() +} + +func (c *csvExporter) Bytes() []byte { + return c.buf.Bytes() +} + +// Len +func (c *csvExporter) Len() int { + return c.buf.Len() +} + +func (c *csvExporter) FileName(spec execinfrapb.CSVWriterSpec, part string) string { + pattern := exportFilePatternDefault + if spec.NamePattern != "" { + pattern = spec.NamePattern + } + + fileName := strings.Replace(pattern, exportFilePatternPart, part, -1) + // TODO: add suffix based on compressor type + if c.compressor != nil { + fileName += ".gz" + } + return fileName +} + +func newCSVExporter(sp execinfrapb.CSVWriterSpec) *csvExporter { + buf := bytes.NewBuffer([]byte{}) + var exporter *csvExporter + switch sp.CompressionCodec { + case execinfrapb.CSVWriterSpec_gzip: + { + writer := gzip.NewWriter(buf) + exporter = &csvExporter{ + compressor: writer, + buf: buf, + csvWriter: csv.NewWriter(writer), + } + } + default: + { + exporter = &csvExporter{ + buf: buf, + csvWriter: csv.NewWriter(buf), + } + } + } + if sp.Options.Comma != 0 { + exporter.csvWriter.Comma = sp.Options.Comma + } + return exporter +} + func newCSVWriterProcessor( flowCtx *execinfra.FlowCtx, processorID int32, @@ -85,22 +169,15 @@ func (sp *csvWriter) Run(ctx context.Context) { defer tracing.FinishSpan(span) err := func() error { - pattern := exportFilePatternDefault - if sp.spec.NamePattern != "" { - pattern = sp.spec.NamePattern - } - typs := sp.input.OutputTypes() sp.input.Start(ctx) input := execinfra.MakeNoMetadataRowSource(sp.input, sp.output) alloc := &sqlbase.DatumAlloc{} - var buf bytes.Buffer - writer := csv.NewWriter(&buf) - if sp.spec.Options.Comma != 0 { - writer.Comma = sp.spec.Options.Comma - } + writer := newCSVExporter(sp.spec) + defer writer.Close() + nullsAs := "" if sp.spec.Options.NullEncoding != nil { nullsAs = *sp.spec.Options.NullEncoding @@ -114,7 +191,7 @@ func (sp *csvWriter) Run(ctx context.Context) { done := false for { var rows int64 - buf.Reset() + writer.ResetBuffer() for { if sp.spec.ChunkRows > 0 && rows >= sp.spec.ChunkRows { break @@ -160,12 +237,12 @@ func (sp *csvWriter) Run(ctx context.Context) { } defer es.Close() - size := buf.Len() + size := writer.Len() part := fmt.Sprintf("n%d.%d", sp.flowCtx.EvalCtx.NodeID, chunk) chunk++ - filename := strings.Replace(pattern, exportFilePatternPart, part, -1) - if err := es.WriteFile(ctx, filename, bytes.NewReader(buf.Bytes())); err != nil { + filename := writer.FileName(sp.spec, part) + if err := es.WriteFile(ctx, filename, bytes.NewReader(writer.Bytes())); err != nil { return err } res := sqlbase.EncDatumRow{ diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index 8cb1838d25bc..20f89da651b0 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -3219,11 +3219,19 @@ func (dsp *DistSQLPlanner) createPlanForExport( return PhysicalPlan{}, err } + // read value of compression codec, default is no compression + // otherwise lookup from supported set of codec values + compessionCodec := execinfrapb.CSVWriterSpec_none + if codec, exist := execinfrapb.CSVWriterSpec_Compression_value[n.codecName]; exist { + compessionCodec = execinfrapb.CSVWriterSpec_Compression(codec) + } + core := execinfrapb.ProcessorCoreUnion{CSVWriter: &execinfrapb.CSVWriterSpec{ - Destination: n.fileName, - NamePattern: exportFilePatternDefault, - Options: n.csvOpts, - ChunkRows: int64(n.chunkSize), + Destination: n.fileName, + NamePattern: exportFilePatternDefault, + Options: n.csvOpts, + ChunkRows: int64(n.chunkSize), + CompressionCodec: compessionCodec, }} resTypes := make([]types.T, len(sqlbase.ExportColumns)) diff --git a/pkg/sql/execinfrapb/processors_bulk_io.pb.go b/pkg/sql/execinfrapb/processors_bulk_io.pb.go index 574c12f9cf86..d1ac40ec20ba 100644 --- a/pkg/sql/execinfrapb/processors_bulk_io.pb.go +++ b/pkg/sql/execinfrapb/processors_bulk_io.pb.go @@ -71,7 +71,45 @@ func (x *BackfillerSpec_Type) UnmarshalJSON(data []byte) error { return nil } func (BackfillerSpec_Type) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{0, 0} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{0, 0} +} + +// Compression list of the compression codecs which are currently +// supported for CSVWriter spec +type CSVWriterSpec_Compression int32 + +const ( + CSVWriterSpec_none CSVWriterSpec_Compression = 0 + CSVWriterSpec_gzip CSVWriterSpec_Compression = 1 +) + +var CSVWriterSpec_Compression_name = map[int32]string{ + 0: "none", + 1: "gzip", +} +var CSVWriterSpec_Compression_value = map[string]int32{ + "none": 0, + "gzip": 1, +} + +func (x CSVWriterSpec_Compression) Enum() *CSVWriterSpec_Compression { + p := new(CSVWriterSpec_Compression) + *p = x + return p +} +func (x CSVWriterSpec_Compression) String() string { + return proto.EnumName(CSVWriterSpec_Compression_name, int32(x)) +} +func (x *CSVWriterSpec_Compression) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(CSVWriterSpec_Compression_value, data, "CSVWriterSpec_Compression") + if err != nil { + return err + } + *x = CSVWriterSpec_Compression(value) + return nil +} +func (CSVWriterSpec_Compression) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{3, 0} } // BackfillerSpec is the specification for a "schema change backfiller". @@ -107,7 +145,7 @@ func (m *BackfillerSpec) Reset() { *m = BackfillerSpec{} } func (m *BackfillerSpec) String() string { return proto.CompactTextString(m) } func (*BackfillerSpec) ProtoMessage() {} func (*BackfillerSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{0} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{0} } func (m *BackfillerSpec) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -147,7 +185,7 @@ func (m *JobProgress) Reset() { *m = JobProgress{} } func (m *JobProgress) String() string { return proto.CompactTextString(m) } func (*JobProgress) ProtoMessage() {} func (*JobProgress) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{1} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{1} } func (m *JobProgress) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -207,7 +245,7 @@ func (m *ReadImportDataSpec) Reset() { *m = ReadImportDataSpec{} } func (m *ReadImportDataSpec) String() string { return proto.CompactTextString(m) } func (*ReadImportDataSpec) ProtoMessage() {} func (*ReadImportDataSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{2} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{2} } func (m *ReadImportDataSpec) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -245,7 +283,7 @@ func (m *ReadImportDataSpec_ImportTable) Reset() { *m = ReadImportDataSp func (m *ReadImportDataSpec_ImportTable) String() string { return proto.CompactTextString(m) } func (*ReadImportDataSpec_ImportTable) ProtoMessage() {} func (*ReadImportDataSpec_ImportTable) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{2, 0} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{2, 0} } func (m *ReadImportDataSpec_ImportTable) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -281,13 +319,16 @@ type CSVWriterSpec struct { Options roachpb.CSVOptions `protobuf:"bytes,3,opt,name=options" json:"options"` // chunk_rows is num rows to write per file. 0 = no limit. ChunkRows int64 `protobuf:"varint,4,opt,name=chunk_rows,json=chunkRows" json:"chunk_rows"` + // compression code type to be used while epxporting CSV file, + // if it's null (i.e. not defined), no compression will be used. + CompressionCodec CSVWriterSpec_Compression `protobuf:"varint,5,opt,name=compression_codec,json=compressionCodec,enum=cockroach.sql.distsqlrun.CSVWriterSpec_Compression" json:"compression_codec"` } func (m *CSVWriterSpec) Reset() { *m = CSVWriterSpec{} } func (m *CSVWriterSpec) String() string { return proto.CompactTextString(m) } func (*CSVWriterSpec) ProtoMessage() {} func (*CSVWriterSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{3} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{3} } func (m *CSVWriterSpec) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -322,7 +363,7 @@ func (m *BulkRowWriterSpec) Reset() { *m = BulkRowWriterSpec{} } func (m *BulkRowWriterSpec) String() string { return proto.CompactTextString(m) } func (*BulkRowWriterSpec) ProtoMessage() {} func (*BulkRowWriterSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_processors_bulk_io_bc986c92ca21a758, []int{4} + return fileDescriptor_processors_bulk_io_c18227ff0a4c029c, []int{4} } func (m *BulkRowWriterSpec) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -358,6 +399,7 @@ func init() { proto.RegisterType((*CSVWriterSpec)(nil), "cockroach.sql.distsqlrun.CSVWriterSpec") proto.RegisterType((*BulkRowWriterSpec)(nil), "cockroach.sql.distsqlrun.BulkRowWriterSpec") proto.RegisterEnum("cockroach.sql.distsqlrun.BackfillerSpec_Type", BackfillerSpec_Type_name, BackfillerSpec_Type_value) + proto.RegisterEnum("cockroach.sql.distsqlrun.CSVWriterSpec_Compression", CSVWriterSpec_Compression_name, CSVWriterSpec_Compression_value) } func (m *BackfillerSpec) Marshal() (dAtA []byte, err error) { size := m.Size() @@ -656,6 +698,9 @@ func (m *CSVWriterSpec) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x20 i++ i = encodeVarintProcessorsBulkIo(dAtA, i, uint64(m.ChunkRows)) + dAtA[i] = 0x28 + i++ + i = encodeVarintProcessorsBulkIo(dAtA, i, uint64(m.CompressionCodec)) return i, nil } @@ -812,6 +857,7 @@ func (m *CSVWriterSpec) Size() (n int) { l = m.Options.Size() n += 1 + l + sovProcessorsBulkIo(uint64(l)) n += 1 + sovProcessorsBulkIo(uint64(m.ChunkRows)) + n += 1 + sovProcessorsBulkIo(uint64(m.CompressionCodec)) return n } @@ -1928,6 +1974,25 @@ func (m *CSVWriterSpec) Unmarshal(dAtA []byte) error { break } } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field CompressionCodec", wireType) + } + m.CompressionCodec = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProcessorsBulkIo + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.CompressionCodec |= (CSVWriterSpec_Compression(b) & 0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipProcessorsBulkIo(dAtA[iNdEx:]) @@ -2135,74 +2200,77 @@ var ( ) func init() { - proto.RegisterFile("sql/execinfrapb/processors_bulk_io.proto", fileDescriptor_processors_bulk_io_bc986c92ca21a758) + proto.RegisterFile("sql/execinfrapb/processors_bulk_io.proto", fileDescriptor_processors_bulk_io_c18227ff0a4c029c) } -var fileDescriptor_processors_bulk_io_bc986c92ca21a758 = []byte{ - // 1029 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0x4f, 0x6f, 0xdb, 0xb6, - 0x1b, 0xb6, 0x6c, 0xd9, 0x91, 0x5f, 0xc5, 0x81, 0x4b, 0xf4, 0x57, 0xe8, 0x17, 0x6c, 0x8e, 0xe1, - 0xb5, 0x9d, 0x57, 0xa0, 0x32, 0x16, 0x60, 0x43, 0xb1, 0xad, 0xe8, 0xe6, 0xa4, 0x09, 0xec, 0x61, - 0x4d, 0xa0, 0x64, 0x29, 0xd0, 0x8b, 0x41, 0x49, 0x8c, 0xc3, 0x98, 0x16, 0x15, 0x92, 0x6a, 0xea, - 0x7e, 0x8a, 0x7d, 0xa8, 0x1d, 0x72, 0xec, 0xb1, 0xa7, 0x60, 0x4d, 0xbe, 0x45, 0x4f, 0x83, 0x28, - 0x29, 0x53, 0x56, 0x64, 0x43, 0x76, 0x49, 0x64, 0xbe, 0xef, 0xf3, 0xf0, 0x79, 0xff, 0x12, 0xfa, - 0xf2, 0x84, 0x0d, 0xc8, 0x1b, 0x12, 0xd0, 0xe8, 0x50, 0xe0, 0xd8, 0x1f, 0xc4, 0x82, 0x07, 0x44, - 0x4a, 0x2e, 0xe4, 0xc4, 0x4f, 0xd8, 0x6c, 0x42, 0xb9, 0x1b, 0x0b, 0xae, 0x38, 0x72, 0x02, 0x1e, - 0xcc, 0x04, 0xc7, 0xc1, 0x91, 0x2b, 0x4f, 0x98, 0x1b, 0x52, 0xa9, 0xe4, 0x09, 0x13, 0x49, 0xb4, - 0x7a, 0xef, 0x98, 0xfb, 0x72, 0x90, 0xfe, 0x89, 0x7d, 0xfd, 0x2f, 0x43, 0xac, 0x3a, 0xda, 0x3b, - 0xf6, 0x07, 0x94, 0x3f, 0x3e, 0xe4, 0x62, 0x8e, 0x55, 0x61, 0xf9, 0x2c, 0xbd, 0x55, 0x9e, 0x30, - 0x1f, 0x4b, 0x32, 0x90, 0x4a, 0x24, 0x81, 0x4a, 0x04, 0x09, 0x73, 0xeb, 0x83, 0x7f, 0xd2, 0x84, - 0x25, 0x29, 0xe8, 0x13, 0x45, 0xd9, 0xe0, 0x88, 0x05, 0x03, 0x45, 0xe7, 0x44, 0x2a, 0x3c, 0x8f, - 0x73, 0xcb, 0xdd, 0x29, 0x9f, 0x72, 0xfd, 0x39, 0x48, 0xbf, 0xb2, 0xd3, 0xde, 0xc7, 0x1a, 0xac, - 0x0c, 0x71, 0x30, 0x3b, 0xa4, 0x8c, 0x11, 0xb1, 0x17, 0x93, 0x00, 0x6d, 0x83, 0xa9, 0x16, 0x31, - 0x71, 0x8c, 0xae, 0xd1, 0x5f, 0x59, 0x7f, 0xec, 0xde, 0x14, 0xa2, 0x7b, 0x1d, 0xe7, 0xee, 0x2f, - 0x62, 0x32, 0x34, 0xcf, 0xce, 0xd7, 0x2a, 0x9e, 0x26, 0x40, 0x43, 0xa8, 0x2b, 0xec, 0x33, 0xe2, - 0x54, 0xbb, 0x46, 0xdf, 0x5e, 0x7f, 0xf8, 0x37, 0xa6, 0x3c, 0x54, 0x77, 0x3f, 0xf5, 0xd9, 0x24, - 0x32, 0x10, 0x34, 0x56, 0x5c, 0xe4, 0x14, 0x19, 0x14, 0x3d, 0x87, 0xba, 0x8c, 0x71, 0x24, 0x9d, - 0x5a, 0xb7, 0xd6, 0xb7, 0xd7, 0xbf, 0xba, 0x59, 0x8d, 0xa6, 0xf1, 0x08, 0x0e, 0x53, 0x39, 0x38, - 0x2a, 0x68, 0x34, 0x1a, 0x7d, 0x0d, 0x56, 0x98, 0x08, 0xac, 0x28, 0x8f, 0x1c, 0xb3, 0x6b, 0xf4, - 0x6b, 0xc3, 0xff, 0xa5, 0xe6, 0x8f, 0xe7, 0x6b, 0xad, 0x34, 0x4f, 0xee, 0x66, 0x6e, 0xf4, 0xae, - 0xdc, 0xd0, 0x17, 0x00, 0xc1, 0x51, 0x12, 0xcd, 0x26, 0x92, 0xbe, 0x25, 0x4e, 0x5d, 0x83, 0x32, - 0xce, 0xa6, 0x3e, 0xdf, 0xa3, 0x6f, 0x09, 0xda, 0x81, 0x65, 0xae, 0x8e, 0x88, 0x98, 0x68, 0xb5, - 0xd2, 0x69, 0x68, 0x95, 0xb7, 0x8b, 0xd4, 0xd6, 0x0c, 0xda, 0x26, 0xd1, 0x33, 0xb0, 0x04, 0xc1, - 0xe1, 0x4f, 0x72, 0xe7, 0xd0, 0x59, 0xd2, 0x69, 0xfb, 0xbc, 0x44, 0x96, 0x16, 0xd7, 0x3d, 0x62, - 0x81, 0xbb, 0x5f, 0x14, 0x37, 0xe7, 0xb8, 0x02, 0xf5, 0x1e, 0x81, 0x99, 0x16, 0x02, 0xd9, 0xb0, - 0x34, 0x8a, 0x5e, 0x63, 0x46, 0xc3, 0x76, 0x05, 0x01, 0x34, 0x36, 0x38, 0x4b, 0xe6, 0x51, 0xdb, - 0x40, 0x4d, 0xa8, 0x8f, 0xa2, 0x90, 0xbc, 0x69, 0x57, 0x7b, 0xa7, 0x60, 0x8f, 0xb9, 0xbf, 0x2b, - 0xf8, 0x54, 0x10, 0x29, 0xd1, 0x7d, 0x68, 0x1c, 0x73, 0x7f, 0x42, 0x43, 0x5d, 0xfa, 0xda, 0xb0, - 0x95, 0x52, 0x5f, 0x9c, 0xaf, 0xd5, 0xc7, 0xdc, 0x1f, 0x6d, 0x7a, 0xf5, 0x63, 0xee, 0x8f, 0x42, - 0xd4, 0x87, 0xe5, 0x80, 0x47, 0x4a, 0x50, 0x3f, 0xd1, 0xe9, 0x4c, 0x8b, 0x5b, 0xcd, 0x65, 0x5c, - 0xb3, 0x20, 0x07, 0x4c, 0xc9, 0xb8, 0x72, 0x6a, 0x5d, 0xa3, 0x5f, 0x2f, 0x3a, 0x23, 0x3d, 0xe9, - 0x7d, 0x58, 0x02, 0x94, 0x96, 0x6a, 0x34, 0x8f, 0xb9, 0x50, 0x9b, 0x58, 0x61, 0xdd, 0x79, 0x0f, - 0xc0, 0x96, 0x78, 0x1e, 0x33, 0x92, 0xe5, 0xbc, 0x5a, 0xc2, 0x41, 0x66, 0xd0, 0x49, 0xdf, 0x06, - 0x2b, 0xce, 0x35, 0x3b, 0x0d, 0x9d, 0xa3, 0x07, 0x37, 0xb7, 0x45, 0x29, 0xc0, 0x22, 0x57, 0x05, - 0x18, 0x6d, 0x43, 0x2d, 0x11, 0xd4, 0x59, 0xd2, 0x45, 0xfb, 0xe6, 0x66, 0x8e, 0x4f, 0xa5, 0xba, - 0xbf, 0x0a, 0xfa, 0x3c, 0x52, 0x62, 0xe1, 0xa5, 0x0c, 0xe8, 0x29, 0x34, 0xb2, 0x59, 0x76, 0x2c, - 0xad, 0x67, 0xad, 0xc4, 0x95, 0xcf, 0xbb, 0x3b, 0xda, 0xd9, 0xa2, 0x8c, 0x6c, 0x69, 0xb7, 0x5c, - 0x49, 0x0e, 0x42, 0x07, 0xd0, 0xc8, 0xfb, 0xa7, 0xa9, 0xa5, 0x3c, 0xb9, 0x95, 0x94, 0xac, 0x73, - 0xb4, 0x1a, 0xcd, 0x6b, 0x78, 0x39, 0x1b, 0x7a, 0x06, 0xff, 0x97, 0x33, 0x1a, 0x4f, 0xe6, 0x54, - 0x4a, 0x1a, 0x4d, 0x27, 0x87, 0x5c, 0x10, 0x3a, 0x8d, 0x26, 0x33, 0xb2, 0x90, 0x0e, 0x74, 0x8d, - 0xbe, 0x95, 0x0b, 0xb9, 0x97, 0xba, 0xfd, 0x92, 0x79, 0x6d, 0x65, 0x4e, 0x3f, 0x93, 0x85, 0x44, - 0x8f, 0xa0, 0x75, 0x8a, 0x19, 0x4b, 0x47, 0xe4, 0x05, 0x8e, 0xb8, 0x74, 0xec, 0xd2, 0x18, 0x5c, - 0x37, 0xa1, 0x75, 0xb8, 0x23, 0xf4, 0xf4, 0xed, 0x62, 0x81, 0x19, 0x23, 0x8c, 0xca, 0xb9, 0xd3, - 0x2a, 0x95, 0xf0, 0x53, 0x33, 0x7a, 0x05, 0x20, 0x88, 0x4c, 0xe6, 0x64, 0x12, 0x73, 0xe9, 0xac, - 0xe8, 0xe0, 0xbf, 0xbf, 0x55, 0xf0, 0x9e, 0x86, 0xef, 0xf2, 0x2c, 0x7e, 0xaf, 0x29, 0x8a, 0xdf, - 0xab, 0x09, 0xd8, 0x99, 0xaf, 0xce, 0x0f, 0xfa, 0x11, 0xcc, 0x90, 0xc8, 0x40, 0xb7, 0xf6, 0xed, - 0x26, 0xd4, 0xf0, 0x34, 0x12, 0xdd, 0x07, 0x50, 0x58, 0x4c, 0x89, 0xda, 0xe0, 0x4c, 0x3a, 0xd5, - 0x6e, 0xad, 0xdf, 0xcc, 0xed, 0xa5, 0xf3, 0x55, 0x09, 0x76, 0xa9, 0x20, 0xa8, 0x0d, 0xb5, 0x19, - 0x59, 0xe8, 0x5b, 0x9b, 0x5e, 0xfa, 0x89, 0x5e, 0x40, 0xfd, 0x35, 0x66, 0x49, 0xb1, 0x15, 0x6f, - 0x57, 0xeb, 0x52, 0x44, 0x5e, 0x46, 0xf3, 0x5d, 0xf5, 0x89, 0xb1, 0xfa, 0x2d, 0x58, 0x45, 0x43, - 0x96, 0x6f, 0xac, 0x67, 0x37, 0xde, 0x2d, 0xdf, 0xd8, 0x2c, 0xe3, 0x7e, 0x80, 0x95, 0xeb, 0x09, - 0xfc, 0x37, 0x74, 0xad, 0x84, 0x1e, 0x9b, 0x96, 0xd1, 0xae, 0x8e, 0x4d, 0xab, 0xd6, 0x36, 0xc7, - 0xa6, 0x65, 0xb6, 0xeb, 0x63, 0xd3, 0xaa, 0xb7, 0x1b, 0x63, 0xd3, 0x5a, 0x6e, 0xb7, 0x7a, 0xbf, - 0x1b, 0xd0, 0xda, 0xd8, 0x3b, 0x78, 0x29, 0xa8, 0xca, 0x1f, 0x96, 0x87, 0x60, 0x87, 0x44, 0x2a, - 0x1a, 0x65, 0x7b, 0x58, 0xe7, 0xa4, 0xd8, 0x81, 0x25, 0x03, 0xfa, 0x12, 0x96, 0x23, 0x9c, 0xf6, - 0x04, 0x56, 0x8a, 0x88, 0x6c, 0xc3, 0x5c, 0x39, 0xa6, 0x96, 0xdd, 0xcc, 0x80, 0x9e, 0xc2, 0x12, - 0x8f, 0x53, 0x88, 0xd4, 0x3b, 0xe6, 0xfa, 0xae, 0x2c, 0xe6, 0x6e, 0x63, 0xef, 0x60, 0x27, 0x73, - 0xca, 0x29, 0x0a, 0xcc, 0x5f, 0x1b, 0x5e, 0xf0, 0x53, 0x99, 0x3f, 0x0b, 0xe5, 0x0d, 0xef, 0xf1, - 0x53, 0xd9, 0x7b, 0x09, 0x77, 0x86, 0x09, 0x4b, 0xbf, 0x4b, 0x91, 0x5c, 0xbd, 0x6c, 0xc6, 0x7f, - 0x7e, 0xd9, 0x86, 0x8f, 0xcf, 0x3e, 0x74, 0x2a, 0x67, 0x17, 0x1d, 0xe3, 0xdd, 0x45, 0xc7, 0x78, - 0x7f, 0xd1, 0x31, 0xfe, 0xb8, 0xe8, 0x18, 0xbf, 0x5d, 0x76, 0x2a, 0xef, 0x2e, 0x3b, 0x95, 0xf7, - 0x97, 0x9d, 0xca, 0x2b, 0xbb, 0xf4, 0xdc, 0xff, 0x19, 0x00, 0x00, 0xff, 0xff, 0x9f, 0x6b, 0x78, - 0x71, 0x94, 0x08, 0x00, 0x00, +var fileDescriptor_processors_bulk_io_c18227ff0a4c029c = []byte{ + // 1086 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x56, 0xdd, 0x6e, 0xdb, 0x36, + 0x14, 0xb6, 0x6c, 0xd9, 0x91, 0x8f, 0x92, 0x40, 0x25, 0xba, 0x42, 0x0b, 0x36, 0xc7, 0xf3, 0xda, + 0xce, 0x2b, 0x50, 0x19, 0xcb, 0xb0, 0xa1, 0xd8, 0x56, 0x74, 0x73, 0xd2, 0x16, 0xf1, 0xb0, 0x26, + 0x50, 0xba, 0x16, 0xe8, 0x8d, 0x41, 0x49, 0x8c, 0xc3, 0x9a, 0x16, 0x15, 0x92, 0x6a, 0x9a, 0x3e, + 0xc5, 0x1e, 0xab, 0x77, 0xeb, 0x65, 0xaf, 0x8a, 0x35, 0x7d, 0x8b, 0x5e, 0x0d, 0xa4, 0xa4, 0x54, + 0x59, 0x91, 0x0d, 0xd9, 0x8d, 0x4d, 0xf3, 0x9c, 0xef, 0xe3, 0x77, 0x7e, 0x78, 0x68, 0x18, 0xca, + 0x43, 0x36, 0x22, 0xcf, 0x49, 0x4c, 0xd3, 0x7d, 0x81, 0xb3, 0x68, 0x94, 0x09, 0x1e, 0x13, 0x29, + 0xb9, 0x90, 0xd3, 0x28, 0x67, 0xf3, 0x29, 0xe5, 0x41, 0x26, 0xb8, 0xe2, 0xc8, 0x8f, 0x79, 0x3c, + 0x17, 0x1c, 0xc7, 0x07, 0x81, 0x3c, 0x64, 0x41, 0x42, 0xa5, 0x92, 0x87, 0x4c, 0xe4, 0xe9, 0xda, + 0x95, 0xa7, 0x3c, 0x92, 0x23, 0xfd, 0x91, 0x45, 0xe6, 0xab, 0x40, 0xac, 0xf9, 0xc6, 0x3b, 0x8b, + 0x46, 0x94, 0xdf, 0xdc, 0xe7, 0x62, 0x81, 0x55, 0x65, 0xf9, 0x4c, 0x9f, 0x2a, 0x0f, 0x59, 0x84, + 0x25, 0x19, 0x49, 0x25, 0xf2, 0x58, 0xe5, 0x82, 0x24, 0xa5, 0xf5, 0xda, 0xbf, 0x69, 0xc2, 0x92, + 0x54, 0xf4, 0xb9, 0xa2, 0x6c, 0x74, 0xc0, 0xe2, 0x91, 0xa2, 0x0b, 0x22, 0x15, 0x5e, 0x64, 0xa5, + 0xe5, 0xf2, 0x8c, 0xcf, 0xb8, 0x59, 0x8e, 0xf4, 0xaa, 0xd8, 0x1d, 0xbc, 0x6f, 0xc1, 0xea, 0x18, + 0xc7, 0xf3, 0x7d, 0xca, 0x18, 0x11, 0x7b, 0x19, 0x89, 0xd1, 0x7d, 0xb0, 0xd5, 0x71, 0x46, 0x7c, + 0xab, 0x6f, 0x0d, 0x57, 0x37, 0x6e, 0x06, 0xe7, 0x85, 0x18, 0x9c, 0xc5, 0x05, 0x0f, 0x8f, 0x33, + 0x32, 0xb6, 0x5f, 0xbe, 0x59, 0x6f, 0x84, 0x86, 0x00, 0x8d, 0xa1, 0xad, 0x70, 0xc4, 0x88, 0xdf, + 0xec, 0x5b, 0x43, 0x77, 0xe3, 0xfa, 0x3f, 0x98, 0xca, 0x50, 0x83, 0x87, 0xda, 0x67, 0x8b, 0xc8, + 0x58, 0xd0, 0x4c, 0x71, 0x51, 0x52, 0x14, 0x50, 0x74, 0x17, 0xda, 0x32, 0xc3, 0xa9, 0xf4, 0x5b, + 0xfd, 0xd6, 0xd0, 0xdd, 0xf8, 0xfa, 0x7c, 0x35, 0x86, 0x26, 0x24, 0x38, 0xd1, 0x72, 0x70, 0x5a, + 0xd1, 0x18, 0x34, 0xfa, 0x06, 0x9c, 0x24, 0x17, 0x58, 0x51, 0x9e, 0xfa, 0x76, 0xdf, 0x1a, 0xb6, + 0xc6, 0x9f, 0x68, 0xf3, 0xfb, 0x37, 0xeb, 0x2b, 0x3a, 0x4f, 0xc1, 0x56, 0x69, 0x0c, 0x4f, 0xdd, + 0xd0, 0x97, 0x00, 0xf1, 0x41, 0x9e, 0xce, 0xa7, 0x92, 0xbe, 0x20, 0x7e, 0xdb, 0x80, 0x0a, 0xce, + 0xae, 0xd9, 0xdf, 0xa3, 0x2f, 0x08, 0xda, 0x81, 0x65, 0xae, 0x0e, 0x88, 0x98, 0x1a, 0xb5, 0xd2, + 0xef, 0x18, 0x95, 0x17, 0x8b, 0xd4, 0x35, 0x0c, 0xc6, 0x26, 0xd1, 0x1d, 0x70, 0x04, 0xc1, 0xc9, + 0x2f, 0x72, 0x67, 0xdf, 0x5f, 0x32, 0x69, 0xfb, 0xbc, 0x46, 0xa6, 0x8b, 0x1b, 0x1c, 0xb0, 0x38, + 0x78, 0x58, 0x15, 0xb7, 0xe4, 0x38, 0x05, 0x0d, 0x6e, 0x80, 0xad, 0x0b, 0x81, 0x5c, 0x58, 0xda, + 0x4e, 0x9f, 0x61, 0x46, 0x13, 0xaf, 0x81, 0x00, 0x3a, 0x9b, 0x9c, 0xe5, 0x8b, 0xd4, 0xb3, 0x50, + 0x17, 0xda, 0xdb, 0x69, 0x42, 0x9e, 0x7b, 0xcd, 0xc1, 0x11, 0xb8, 0x13, 0x1e, 0xed, 0x0a, 0x3e, + 0x13, 0x44, 0x4a, 0x74, 0x15, 0x3a, 0x4f, 0x79, 0x34, 0xa5, 0x89, 0x29, 0x7d, 0x6b, 0xbc, 0xa2, + 0xa9, 0x4f, 0xde, 0xac, 0xb7, 0x27, 0x3c, 0xda, 0xde, 0x0a, 0xdb, 0x4f, 0x79, 0xb4, 0x9d, 0xa0, + 0x21, 0x2c, 0xc7, 0x3c, 0x55, 0x82, 0x46, 0xb9, 0x49, 0xa7, 0x2e, 0x6e, 0xb3, 0x94, 0x71, 0xc6, + 0x82, 0x7c, 0xb0, 0x25, 0xe3, 0xca, 0x6f, 0xf5, 0xad, 0x61, 0xbb, 0xea, 0x0c, 0xbd, 0x33, 0x78, + 0xbb, 0x04, 0x48, 0x97, 0x6a, 0x7b, 0x91, 0x71, 0xa1, 0xb6, 0xb0, 0xc2, 0xa6, 0xf3, 0xae, 0x81, + 0x2b, 0xf1, 0x22, 0x63, 0xa4, 0xc8, 0x79, 0xb3, 0x86, 0x83, 0xc2, 0x60, 0x92, 0x7e, 0x1f, 0x9c, + 0xac, 0xd4, 0xec, 0x77, 0x4c, 0x8e, 0xae, 0x9d, 0xdf, 0x16, 0xb5, 0x00, 0xab, 0x5c, 0x55, 0x60, + 0x74, 0x1f, 0x5a, 0xb9, 0xa0, 0xfe, 0x92, 0x29, 0xda, 0x77, 0xe7, 0x73, 0x7c, 0x2c, 0x35, 0xf8, + 0x5d, 0xd0, 0xbb, 0xa9, 0x12, 0xc7, 0xa1, 0x66, 0x40, 0xb7, 0xa1, 0x53, 0xdc, 0x65, 0xdf, 0x31, + 0x7a, 0xd6, 0x6b, 0x5c, 0xe5, 0x7d, 0x0f, 0xb6, 0x77, 0xee, 0x51, 0x46, 0xee, 0x19, 0xb7, 0x52, + 0x49, 0x09, 0x42, 0x8f, 0xa0, 0x53, 0xf6, 0x4f, 0xd7, 0x48, 0xb9, 0x75, 0x21, 0x29, 0x45, 0xe7, + 0x18, 0x35, 0x86, 0xd7, 0x0a, 0x4b, 0x36, 0x74, 0x07, 0x3e, 0x95, 0x73, 0x9a, 0x4d, 0x17, 0x54, + 0x4a, 0x9a, 0xce, 0xa6, 0xfb, 0x5c, 0x10, 0x3a, 0x4b, 0xa7, 0x73, 0x72, 0x2c, 0x7d, 0xe8, 0x5b, + 0x43, 0xa7, 0x14, 0x72, 0x45, 0xbb, 0xfd, 0x56, 0x78, 0xdd, 0x2b, 0x9c, 0x7e, 0x25, 0xc7, 0x12, + 0xdd, 0x80, 0x95, 0x23, 0xcc, 0x98, 0xbe, 0x22, 0x0f, 0x70, 0xca, 0xa5, 0xef, 0xd6, 0xae, 0xc1, + 0x59, 0x13, 0xda, 0x80, 0x4b, 0xc2, 0xdc, 0xbe, 0x5d, 0x2c, 0x30, 0x63, 0x84, 0x51, 0xb9, 0xf0, + 0x57, 0x6a, 0x25, 0xfc, 0xd8, 0x8c, 0x9e, 0x00, 0x08, 0x22, 0xf3, 0x05, 0x99, 0x66, 0x5c, 0xfa, + 0xab, 0x26, 0xf8, 0x1f, 0x2f, 0x14, 0x7c, 0x68, 0xe0, 0xbb, 0xbc, 0x88, 0x3f, 0xec, 0x8a, 0xea, + 0xf7, 0x5a, 0x0e, 0x6e, 0xe1, 0x6b, 0xf2, 0x83, 0x7e, 0x06, 0x3b, 0x21, 0x32, 0x36, 0xad, 0x7d, + 0xb1, 0x1b, 0x6a, 0x85, 0x06, 0x89, 0xae, 0x02, 0x28, 0x2c, 0x66, 0x44, 0x6d, 0x72, 0x26, 0xfd, + 0x66, 0xbf, 0x35, 0xec, 0x96, 0xf6, 0xda, 0xfe, 0x9a, 0x04, 0xb7, 0x56, 0x10, 0xe4, 0x41, 0x6b, + 0x4e, 0x8e, 0xcd, 0xa9, 0xdd, 0x50, 0x2f, 0xd1, 0x03, 0x68, 0x3f, 0xc3, 0x2c, 0xaf, 0xa6, 0xe2, + 0xc5, 0x6a, 0x5d, 0x8b, 0x28, 0x2c, 0x68, 0x7e, 0x68, 0xde, 0xb2, 0xd6, 0xbe, 0x07, 0xa7, 0x6a, + 0xc8, 0xfa, 0x89, 0xed, 0xe2, 0xc4, 0xcb, 0xf5, 0x13, 0xbb, 0x75, 0xdc, 0x4f, 0xb0, 0x7a, 0x36, + 0x81, 0xff, 0x85, 0x6e, 0xd5, 0xd0, 0x13, 0xdb, 0xb1, 0xbc, 0xe6, 0xc4, 0x76, 0x5a, 0x9e, 0x3d, + 0xb1, 0x1d, 0xdb, 0x6b, 0x4f, 0x6c, 0xa7, 0xed, 0x75, 0x26, 0xb6, 0xb3, 0xec, 0xad, 0x0c, 0xfe, + 0x6c, 0xc2, 0xca, 0xe6, 0xde, 0xa3, 0xc7, 0x82, 0xaa, 0xf2, 0x61, 0xb9, 0x0e, 0x6e, 0x42, 0xa4, + 0xa2, 0x69, 0x31, 0x87, 0x4d, 0x4e, 0xaa, 0x19, 0x58, 0x33, 0xa0, 0xaf, 0x60, 0x39, 0xc5, 0xba, + 0x27, 0xb0, 0x52, 0x44, 0x14, 0x13, 0xe6, 0xd4, 0x51, 0x5b, 0x76, 0x0b, 0x03, 0xba, 0x0d, 0x4b, + 0x3c, 0xd3, 0x10, 0x69, 0x66, 0xcc, 0xd9, 0x59, 0x59, 0xdd, 0xbb, 0xcd, 0xbd, 0x47, 0x3b, 0x85, + 0x53, 0x49, 0x51, 0x61, 0x3e, 0x4c, 0x78, 0xc1, 0x8f, 0x64, 0xf9, 0x2c, 0xd4, 0x27, 0x7c, 0xc8, + 0x8f, 0x24, 0xda, 0x87, 0x4b, 0x31, 0x5f, 0x64, 0x7a, 0x5e, 0x50, 0x9e, 0x4e, 0x63, 0x9e, 0x90, + 0xd8, 0xbc, 0x06, 0xab, 0x1b, 0xdf, 0x9e, 0x5f, 0xba, 0x33, 0x81, 0x07, 0x9b, 0x1f, 0x08, 0xca, + 0x03, 0xbc, 0x1a, 0xe7, 0xa6, 0xa6, 0x1c, 0x7c, 0x01, 0x6e, 0xcd, 0x0d, 0x39, 0x60, 0xa7, 0x3c, + 0x25, 0x5e, 0x43, 0xaf, 0x66, 0x2f, 0x68, 0xe6, 0x59, 0x83, 0xc7, 0x70, 0x69, 0x9c, 0x33, 0x2d, + 0xab, 0x96, 0xd4, 0xd3, 0x47, 0xd6, 0xfa, 0xdf, 0x8f, 0xec, 0xf8, 0xe6, 0xcb, 0xb7, 0xbd, 0xc6, + 0xcb, 0x93, 0x9e, 0xf5, 0xea, 0xa4, 0x67, 0xbd, 0x3e, 0xe9, 0x59, 0x7f, 0x9d, 0xf4, 0xac, 0x3f, + 0xde, 0xf5, 0x1a, 0xaf, 0xde, 0xf5, 0x1a, 0xaf, 0xdf, 0xf5, 0x1a, 0x4f, 0xdc, 0xda, 0x3f, 0x8f, + 0xbf, 0x03, 0x00, 0x00, 0xff, 0xff, 0x59, 0x45, 0x3a, 0x8f, 0x1f, 0x09, 0x00, 0x00, } diff --git a/pkg/sql/execinfrapb/processors_bulk_io.proto b/pkg/sql/execinfrapb/processors_bulk_io.proto index 60b155aa04d1..62ce1921d494 100644 --- a/pkg/sql/execinfrapb/processors_bulk_io.proto +++ b/pkg/sql/execinfrapb/processors_bulk_io.proto @@ -65,12 +65,10 @@ message BackfillerSpec { optional util.hlc.Timestamp readAsOf = 7 [(gogoproto.nullable) = false]; } - // JobProgress identifies the job to report progress on. This reporting // happens outside this package. message JobProgress { - optional int64 job_id = 1 [(gogoproto.nullable) = false, - (gogoproto.customname) = "JobID"]; + optional int64 job_id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "JobID"]; // contribution is the percent of work of the total this processor will // process. optional float contribution = 2 [(gogoproto.nullable) = false]; @@ -145,6 +143,17 @@ message CSVWriterSpec { optional roachpb.CSVOptions options = 3 [(gogoproto.nullable) = false]; // chunk_rows is num rows to write per file. 0 = no limit. optional int64 chunk_rows = 4 [(gogoproto.nullable) = false]; + + // Compression list of the compression codecs which are currently + // supported for CSVWriter spec + enum Compression { + none = 0; + gzip = 1; + } + + // compression code type to be used while epxporting CSV file, + // if it's null (i.e. not defined), no compression will be used. + optional Compression compression_codec = 5 [(gogoproto.nullable) = false]; } // BulkRowWriterSpec is the specification for a processor that consumes rows and diff --git a/pkg/sql/export.go b/pkg/sql/export.go index 6145f94e8536..d4cc364fe731 100644 --- a/pkg/sql/export.go +++ b/pkg/sql/export.go @@ -12,7 +12,9 @@ package sql import ( "context" + "fmt" "strconv" + "strings" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" @@ -23,6 +25,8 @@ import ( "github.com/pkg/errors" ) +type compressionCodecType uint + type exportNode struct { optColumnsSlot @@ -31,6 +35,7 @@ type exportNode struct { fileName string csvOpts roachpb.CSVOptions chunkSize int + codecName string } func (e *exportNode) startExec(params runParams) error { @@ -50,22 +55,25 @@ func (e *exportNode) Close(ctx context.Context) { } const ( - exportOptionDelimiter = "delimiter" - exportOptionNullAs = "nullas" - exportOptionChunkSize = "chunk_rows" - exportOptionFileName = "filename" + exportOptionDelimiter = "delimiter" + exportOptionNullAs = "nullas" + exportOptionChunkSize = "chunk_rows" + exportOptionFileName = "filename" + exportOptionCompression = "compression" ) var exportOptionExpectValues = map[string]KVStringOptValidate{ - exportOptionChunkSize: KVStringOptRequireValue, - exportOptionDelimiter: KVStringOptRequireValue, - exportOptionFileName: KVStringOptRequireValue, - exportOptionNullAs: KVStringOptRequireValue, + exportOptionChunkSize: KVStringOptRequireValue, + exportOptionDelimiter: KVStringOptRequireValue, + exportOptionFileName: KVStringOptRequireValue, + exportOptionNullAs: KVStringOptRequireValue, + exportOptionCompression: KVStringOptRequireValue, } const exportChunkSizeDefault = 100000 const exportFilePatternPart = "%part%" const exportFilePatternDefault = exportFilePatternPart + ".csv" +const exportCompressionCodec = "gzip" // ConstructExport is part of the exec.Factory interface. func (ef *execFactory) ConstructExport( @@ -117,10 +125,21 @@ func (ef *execFactory) ConstructExport( } } + // Check whenever compression is expected and extract compression codec name in case + // of positive result + codecName := "" + if name, ok := optVals[exportOptionCompression]; ok && len(name) != 0 { + if !strings.EqualFold(name, exportCompressionCodec) { + return nil, pgerror.New(pgcode.InvalidParameterValue, fmt.Sprintf("unsupported compression codec %s", name)) + } + codecName = name + } + return &exportNode{ source: input.(planNode), fileName: string(*fileNameStr), csvOpts: csvOpts, chunkSize: chunkSize, + codecName: codecName, }, nil }