Skip to content

Commit

Permalink
Merge #57724 #57745
Browse files Browse the repository at this point in the history
57724: optbuilder: add ST_Collect/ST_MakeLine to isOrderingSensitive r=rytaft a=otan

Release note (bug fix): Fix a bug where ST_MakeLine and ST_Collect
did not respect ordering when used over a window clause.

57745: cloudimpl: fix short-read on EOF of compressed GCS file r=dt a=dt

Previously the helper for reading GCS files that handled resuming after
errors would return the amount read from Read() calls that did not
return an error, or would move to its retries for those that did return
an error.

However the io.Reader interface allows a Read call to return n > 0 *and*
a non-nil error, and expects the caller to handle the n read bytes before
the error, particularly if err = EOF, where the last Read() call might
read the last X bytes and then return (X, EOF).

This changes the resuming reader to return n and err from the underlying
reader as-is if err is nil or, after this change, if it is EOF. Other
errors are left as is -- they will cause it to return 0 regardless of
n -- a future change may wish to change these as well, however EOF is
a special case in that it is expected.

Interestingly the regular underlying reader does not do this -- it seems
that it returns (n, nil) and then (0, EOF) on the next call, so this bug
wasn't caught before. However a user observed that files stored using
the SDK's native compression functionality were read incorrectly, so
it seems like the decompressing implementation of the underlying reader
does utilize the (n>0, EOF) option in the interface specification. The
included test case thus exercises this change by comparing the content
of /usr/share/dict/words as stored on GCS with and without native gzip
compression enabled (gsutil's -Z option).

Release note (bug fix): Fix a bug that could cause IMPORT to incorrectly read files stored on Google Cloud if uploaded using its compression option (gsutil -Z).

Fixes #56152

Co-authored-by: Oliver Tan <[email protected]>
Co-authored-by: David Taylor <[email protected]>
  • Loading branch information
3 people committed Dec 9, 2020
3 parents b4cc0d0 + ab28f4a + e12b81a commit 044a292
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 3 deletions.
15 changes: 15 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -5014,6 +5014,11 @@ SELECT ST_AsEWKT(ST_MakeLine(g::geometry)) FROM ( VALUES
----
NULL

query T
SELECT ST_AsText(ST_MakeLine(geom ORDER BY geom)) FROM geo_table
----
LINESTRING (1 1, 2 2, 2.22 -2.445)

query T
SELECT ST_Extent(g::geometry) FROM ( VALUES
(NULL),
Expand Down Expand Up @@ -5091,6 +5096,16 @@ SELECT ST_AsEWKT(ST_Collect(g::geometry)) FROM ( VALUES ('GEOMETRYCOLLECTION (PO
----
GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1 1)), GEOMETRYCOLLECTION (POINT (2 2)))

query T
SELECT ST_AsText(ST_Collect(geom ORDER BY geom)) FROM geo_table
----
MULTIPOINT (1 1, 2 2, 2.22 -2.445)

query T
SELECT ST_AsText(ST_MemCollect(geom ORDER BY geom)) FROM geo_table
----
MULTIPOINT (1 1, 2 2, 2.22 -2.445)

query T
SELECT ST_AsEWKT(ST_MemCollect(g::geometry)) FROM ( VALUES (NULL), ('POINT (1 1)'), ('POINT EMPTY'), ('POINT (2 2)')) tbl(g)
----
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/optbuilder/groupby.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ func (a aggregateInfo) isOrderingSensitive() bool {
return true
}
switch a.def.Name {
case "array_agg", "concat_agg", "string_agg", "json_agg", "jsonb_agg", "json_object_agg", "jsonb_object_agg":
case "array_agg", "concat_agg", "string_agg", "json_agg", "jsonb_agg", "json_object_agg", "jsonb_object_agg",
"st_makeline", "st_collect", "st_memcollect":
return true
default:
return false
Expand Down
40 changes: 40 additions & 0 deletions pkg/storage/cloudimpl/cloudimpltests/gcs_storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,43 @@ func TestFileDoesNotExist(t *testing.T) {
require.True(t, errors.Is(err, cloudimpl.ErrFileDoesNotExist))
}
}

func TestCompressedGCS(t *testing.T) {
defer leaktest.AfterTest(t)()

if os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") == "" {
// This test requires valid GS credential file.
skip.IgnoreLint(t, "GOOGLE_APPLICATION_CREDENTIALS env var must be set")
}

user := security.RootUserName()
ctx := context.Background()

// gsutil cp /usr/share/dict/words gs://cockroach-fixtures/words-compressed.txt
gsFile1 := "gs://cockroach-fixtures/words.txt?AUTH=implicit"

// gsutil cp -Z /usr/share/dict/words gs://cockroach-fixtures/words-compressed.txt
gsFile2 := "gs://cockroach-fixtures/words-compressed.txt?AUTH=implicit"

conf1, err := cloudimpl.ExternalStorageConfFromURI(gsFile1, user)
require.NoError(t, err)
conf2, err := cloudimpl.ExternalStorageConfFromURI(gsFile2, user)
require.NoError(t, err)

s1, err := cloudimpl.MakeExternalStorage(ctx, conf1, base.ExternalIODirConfig{}, testSettings, nil, nil, nil)
require.NoError(t, err)
s2, err := cloudimpl.MakeExternalStorage(ctx, conf2, base.ExternalIODirConfig{}, testSettings, nil, nil, nil)
require.NoError(t, err)

reader1, err := s1.ReadFile(context.Background(), "")
require.NoError(t, err)
reader2, err := s2.ReadFile(context.Background(), "")
require.NoError(t, err)

content1, err := ioutil.ReadAll(reader1)
require.NoError(t, err)
content2, err := ioutil.ReadAll(reader2)
require.NoError(t, err)

require.Equal(t, string(content1), string(content2))
}
8 changes: 6 additions & 2 deletions pkg/storage/cloudimpl/gcs_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ func (r *resumingGoogleStorageReader) Read(p []byte) (int, error) {

if lastErr == nil {
n, readErr := r.data.Read(p)
if readErr == nil {
if readErr == nil || readErr == io.EOF {
r.pos += int64(n)
return n, nil
return n, readErr
}
lastErr = readErr
}
Expand All @@ -222,6 +222,10 @@ func (r *resumingGoogleStorageReader) Read(p []byte) (int, error) {
}
}

// NB: Go says Read() callers need to expect n > 0 *and* non-nil error, and do
// something with what was read before the error, but this mostly applies to
// err = EOF case which we handle above, so likely OK that we're discarding n
// here and pretending it was zero.
return 0, lastErr
}

Expand Down

0 comments on commit 044a292

Please sign in to comment.