Skip to content

Commit

Permalink
cli,sdnotify: improve the handling of unix socket directories
Browse files Browse the repository at this point in the history
tldr: This patch makes it easier for users to understand when
CockroachDB has a problem using/creating unix sockets, and
what they can do to avert the situation.

Example, before;
```
$ cockroach start-single-node --insecure --background
...
listen unixgram /data/home/kena/cockroach/this-is-a-very-long-directory-name-the-point-is-to-be-more-than-one-hundred-and-twenty-three-characters/sdnotify2139543236/notify.sock: bind: invalid argument
```

Example, after:
```
$ cockroach start-single-node --insecure --background
ERROR: no suitable directory found for the --background notify socket
HINT: Avoid using --background altogether (preferred), or use a
shorter directory name for --socket-dir, TMPDIR or the current directory.
```

**Context:**

CockroachDB uses unix sockets both to handle `--background` and to
create a postgres-compatible SQL client socket via `--socket-dir`. For
`--background`, the logic was previously hardwired to always use
`os.TempDir()`, i.e. either `$TMPDIR` or `/tmp`.

On Unix, socket objects are commonly limited to 128 bytes;
that's e.g. 104 path characters including final NUL on BSD, 108 on glibc.
When the actual length is larger, creating the socket fails with
a confusing error `bind: invalid argument`.

**The problem:**

Users have reported issues in the following circumstances:

- when setting their CWD/$TMPDIR to a long directory name,
  they expected `--socket-dir` to also influence the socket
  name used for `--background`. Instead, `--background`
  failed with a hard-to-understand error.

- when running tests inside a container/sandbox with a long
  prefix, they were confused that `--background` / `--socket-dir=.`
  did not work properly.

- they use macOS which has very large directory names in $TMPDIR.

To improve user experience, this patch changes the behavior
as described in the release notes below below.

Release note (cli change): CockroachDB now produces a clearer error
when the path specified via `--socket-dir` is too long.

Release note (cli change): When the flag `--background` is specified,
CockroachDB now makes 3 attempts to find a suitable directory to
create the notification socket: the value of `--socket-dir` if
specified, the value of `$TMPDIR` (or /tmp if the env var is empty),
and the current working directory. If none of these directories has a
name short enough, an explanatory error is printed.
  • Loading branch information
knz committed Jul 16, 2022
1 parent 6ea03b9 commit 265667c
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 10 deletions.
12 changes: 11 additions & 1 deletion pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,17 @@ func extraServerFlagInit(cmd *cobra.Command) error {
if serverSocketDir == "" {
serverCfg.SocketFile = ""
} else {
serverCfg.SocketFile = filepath.Join(serverSocketDir, ".s.PGSQL."+serverListenPort)
socketName := ".s.PGSQL." + serverListenPort
// On BSD, binding to a socket is limited to a path length of 104 characters
// (including the NUL terminator). In glibc, this limit is 108 characters.
// Otherwise, the bind operation fails with "invalid parameter".
if len(serverSocketDir) >= 104-1-len(socketName) {
return errors.WithHintf(
errors.Newf("value of --%s is too long: %s", cliflags.SocketDir.Name, serverSocketDir),
"The socket directory name must be shorter than %d characters.",
104-1-len(socketName))
}
serverCfg.SocketFile = filepath.Join(serverSocketDir, socketName)
}
}

Expand Down
65 changes: 65 additions & 0 deletions pkg/cli/interactive_tests/test_socket_name.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#! /usr/bin/env expect -f
#
source [file join [file dirname $argv0] common.tcl]

set longname "this-is-a-very-long-directory-name-the-point-is-to-be-more-than-one-hundred-and-twenty-three-characters/and-we-also-need-to-break-it-into-two-parts"

spawn /bin/bash
send "PS1=':''/# '\r"
eexpect ":/# "

send "mkdir -p $longname\r"
eexpect ":/# "

start_test "Check that the socket-dir flag checks the length of the directory."
send "$argv start-single-node --insecure --socket-dir=$longname\r"
eexpect "value of --socket-dir is too long"
eexpect "socket directory name must be shorter"
eexpect ":/# "
end_test

set crdb [file normalize $argv]
send "export BASEDIR=\$PWD\r"
eexpect ":/# "
send "export PREVTMP=\$TMPDIR\r"
eexpect ":/# "

start_test "Check that --background complains about the directory name if there is no default."
send "cd $longname\r"
eexpect ":/# "
send "export TMPDIR=\$PWD\r"
eexpect ":/# "
send "$crdb start-single-node --insecure --background\r"
eexpect "no suitable directory found for the --background notify socket"
eexpect "use a shorter directory name"
eexpect ":/# "
end_test

start_test "Check that --background can use --socket-name if specified and set to sane default."
send "$crdb start-single-node --insecure --background --socket-dir=\$BASEDIR --pid-file=\$BASEDIR/server_pid\r"
eexpect ":/# "
# check the server is running.
system "$crdb sql --insecure -e 'select 1'"
stop_server $crdb
end_test

start_test "Check that --background can use TMPDIR if specified and set to sane default."
# NB: we use a single-command override of TMPDIR (as opposed to using 'export') so that
# the following test below can reuse the value set above.
send "TMPDIR=\$PREVTMP $crdb start-single-node --insecure --background --pid-file=\$BASEDIR/server_pid\r"
eexpect ":/# "
# check the server is running.
system "$crdb sql --insecure -e 'select 1'"
stop_server $crdb
end_test

start_test "Check that --background can use cwd if TMPDIR is invalid."
# NB: at this point TMPDIR is still long, as per previous test.
send "cd \$BASEDIR\r"
eexpect ":/# "
send "$crdb start-single-node --insecure --background --pid-file=server_pid\r"
eexpect ":/# "
# check the server is running.
system "$crdb sql --insecure -e 'select 1'"
stop_server $crdb
end_test
65 changes: 64 additions & 1 deletion pkg/cli/start_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ import (
"os/signal"
"strings"

"github.com/cockroachdb/cockroach/pkg/cli/cliflags"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/sdnotify"
"github.com/cockroachdb/cockroach/pkg/util/sysutil"
"github.com/cockroachdb/errors"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -67,8 +69,69 @@ func handleSignalDuringShutdown(sig os.Signal) {

const backgroundFlagDefined = true

// findGoodNotifyDir determines a good target directory
// to create the unix socket used to signal successful
// background startup (via sdnotify).
// A directory is "good" if it seems writable and its
// name is short enough.
func findGoodNotifyDir() (string, error) {
goodEnough := func(s string) bool {
if len(s) >= 104-1-len("sdnotify/notify.sock")-10 {
// On BSD, binding to a socket is limited to a path length of 104 characters
// (including the NUL terminator). In glibc, this limit is 108 characters.
// macOS also has a tendency to produce very long temporary directory names.
return false
}
st, err := os.Stat(s)
if err != nil {
return false
}
if !st.IsDir() || st.Mode()&0222 == 0 /* any write bits? */ {
// Note: we're confident the directory is unsuitable if none of the
// write bits are set, however there could be a false positive
// if some write bits are set.
//
// For example, if the process runs as a UID that does not match
// the directory owner UID or GID, and the write mode is 0220 or
// less, the sd socket creation will still fail.
// As such, the mode check here is merely a heuristic. We're
// OK with that: the actual write failure will produce a clear
// error message.
return false
}
return true
}

// Was --socket-dir configured? Try to use that.
if serverSocketDir != "" && goodEnough(serverSocketDir) {
return serverSocketDir, nil
}
// Do we have a temp directory? Try to use that.
if tmpDir := os.TempDir(); goodEnough(tmpDir) {
return tmpDir, nil
}
// Can we perhaps use the current directory?
if cwd, err := os.Getwd(); err == nil && goodEnough(cwd) {
return cwd, nil
}

// Note: we do not attempt to use the configured on-disk store
// directory(ies), because they may point to a filesystem that does
// not support unix sockets.

return "", errors.WithHintf(
errors.Newf("no suitable directory found for the --background notify socket"),
"Avoid using --%s altogether (preferred), or use a shorter directory name "+
"for --socket-dir, TMPDIR or the current directory.", cliflags.Background.Name)
}

func maybeRerunBackground() (bool, error) {
if startBackground {
notifyDir, err := findGoodNotifyDir()
if err != nil {
return true, err
}

args := make([]string, 0, len(os.Args))
foundBackground := false
for _, arg := range os.Args {
Expand All @@ -88,7 +151,7 @@ func maybeRerunBackground() (bool, error) {
// Notify to ourselves that we're restarting.
_ = os.Setenv(backgroundEnvVar, "1")

return true, sdnotify.Exec(cmd)
return true, sdnotify.Exec(cmd, notifyDir)
}
return false, nil
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/sdnotify/sdnotify.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ func Ready() error {
// either exited or signaled that it is ready. If the command exits
// with a non-zero status before signaling readiness, returns an
// exec.ExitError.
func Exec(cmd *exec.Cmd) error {
return bgExec(cmd)
func Exec(cmd *exec.Cmd, socketDir string) error {
return bgExec(cmd, socketDir)
}
8 changes: 4 additions & 4 deletions pkg/util/sdnotify/sdnotify_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ func notify(path, msg string) error {
return err
}

func bgExec(cmd *exec.Cmd) error {
l, err := listen()
func bgExec(cmd *exec.Cmd, socketDir string) error {
l, err := listen(socketDir)
if err != nil {
return err
}
Expand Down Expand Up @@ -97,8 +97,8 @@ type listener struct {
conn *net.UnixConn
}

func listen() (listener, error) {
dir, err := ioutil.TempDir("", "sdnotify")
func listen(socketDir string) (listener, error) {
dir, err := ioutil.TempDir(socketDir, "sdnotify")
if err != nil {
return listener{}, err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/util/sdnotify/sdnotify_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
package sdnotify

import (
"os"
"testing"

_ "github.com/cockroachdb/cockroach/pkg/util/log" // for flags
)

func TestSDNotify(t *testing.T) {
l, err := listen()
l, err := listen(os.TempDir())
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/sdnotify/sdnotify_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ func ready() error {
return nil
}

func bgExec(*exec.Cmd) error {
func bgExec(*exec.Cmd, string) error {
return errors.New("not implemented")
}

0 comments on commit 265667c

Please sign in to comment.