Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for debug logging #726

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ As OpenTelemetry has now reached feature parity with OpenCensus, the migration
from OpenCensus to OpenTelemetry is strongly encouraged.
[OpenTelemetry bridge](https://github.com/open-telemetry/opentelemetry-go/tree/main/bridge/opencensus)
can be leveraged to migrate to OpenTelemetry without the need of replacing the
OpenCensus APIs in this library. Example code is shown below for migrating an
OpenCensus APIs in this library. Example code is shown below for migrating an
application using the OpenTelemetry bridge for traces.

```golang
Expand Down Expand Up @@ -428,6 +428,39 @@ It shouldn't impact database operations.
[Cloud Monitoring]: https://cloud.google.com/monitoring
[Cloud Trace]: https://cloud.google.com/trace

### Debug Logging

The Go Connector supports optional debug logging to help diagnose problems with
the background certificate refresh. To enable it, provide a logger that
implements the `debug.Logger` interface when initializing the Dialer.

For example:

``` go
import (
"context"
"net"

"cloud.google.com/go/cloudsqlconn"
)

type myLogger struct{}

func (l *myLogger) Debugf(format string, args ...interface{}) {
// Log as you like here
}

func connect() {
l := &myLogger{}

d, err := NewDialer(
context.Background(),
cloudsqlconn.WithDebugLogger(l),
)
// use dialer as usual...
}
```

## Support policy

### Major version lifecycle
Expand Down
21 changes: 21 additions & 0 deletions debug/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package debug

// Logger is the interface used for debug logging. By default, it is unused.
type Logger interface {
// Debugf is for reporting information about internal operations.
Debugf(format string, args ...interface{})
}
43 changes: 35 additions & 8 deletions dialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"sync/atomic"
"time"

"cloud.google.com/go/cloudsqlconn/debug"
"cloud.google.com/go/cloudsqlconn/errtype"
"cloud.google.com/go/cloudsqlconn/instance"
"cloud.google.com/go/cloudsqlconn/internal/cloudsql"
Expand Down Expand Up @@ -90,8 +91,8 @@ type Dialer struct {
instances map[instance.ConnName]connectionInfoCache
key *rsa.PrivateKey
refreshTimeout time.Duration

sqladmin *sqladmin.Service
sqladmin *sqladmin.Service
logger debug.Logger

// defaultDialConfig holds the constructor level DialOptions, so that it
// can be copied and mutated by the Dial function.
Expand All @@ -114,6 +115,10 @@ var (
errUseIAMTokenSource = errors.New("use WithIAMAuthNTokenSources instead of WithTokenSource be used when IAM AuthN is enabled")
)

type nullLogger struct{}

func (nullLogger) Debugf(_ string, _ ...interface{}) {}

// NewDialer creates a new Dialer.
//
// Initial calls to NewDialer make take longer than normal because generation of an
Expand All @@ -123,6 +128,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
cfg := &dialerConfig{
refreshTimeout: cloudsql.RefreshTimeout,
dialFunc: proxy.Dial,
logger: nullLogger{},
useragents: []string{userAgent},
}
for _, opt := range opts {
Expand Down Expand Up @@ -186,6 +192,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
key: cfg.rsaKey,
refreshTimeout: cfg.refreshTimeout,
sqladmin: client,
logger: cfg.logger,
defaultDialConfig: dc,
dialerID: uuid.New().String(),
iamTokenSource: cfg.iamLoginTokenSource,
Expand Down Expand Up @@ -225,6 +232,7 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
if err != nil {
d.lock.Lock()
defer d.lock.Unlock()
d.logger.Debugf("[%v] Removing connection info from cache", cn.String())
// Stop all background refreshes
i.Close()
delete(d.instances, cn)
Expand All @@ -238,13 +246,14 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
// The TLS handshake will not fail on an expired client certificate. It's
// not until the first read where the client cert error will be surfaced.
// So check that the certificate is valid before proceeding.
if invalidClientCert(tlsConfig) {
if invalidClientCert(cn, d.logger, tlsConfig) {
i.ForceRefresh()
// Block on refreshed connection info
addr, tlsConfig, err = i.ConnectInfo(ctx, cfg.ipType)
if err != nil {
d.lock.Lock()
defer d.lock.Unlock()
d.logger.Debugf("[%v] Removing connection info from cache", cn.String())
// Stop all background refreshes
i.Close()
delete(d.instances, cn)
Expand All @@ -260,8 +269,10 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
if cfg.dialFunc != nil {
f = cfg.dialFunc
}
d.logger.Debugf("[%v] Dialing %v", cn.String(), addr)
conn, err = f(ctx, "tcp", addr)
if err != nil {
d.logger.Debugf("[%v] Dialing %v failed: %v", cn.String(), addr, err)
// refresh the instance info in case it caused the connection failure
i.ForceRefresh()
return nil, errtype.NewDialError("failed to dial", cn.String(), err)
Expand All @@ -276,7 +287,9 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
}

tlsConn := tls.Client(conn, tlsConfig)
if err := tlsConn.HandshakeContext(ctx); err != nil {
err = tlsConn.HandshakeContext(ctx)
if err != nil {
d.logger.Debugf("[%v] TLS handshake failed: %v", cn.String(), err)
// refresh the instance info in case it caused the handshake failure
i.ForceRefresh()
_ = tlsConn.Close() // best effort close attempt
Expand All @@ -296,7 +309,7 @@ func (d *Dialer) Dial(ctx context.Context, icn string, opts ...DialOption) (conn
}), nil
}

func invalidClientCert(c *tls.Config) bool {
func invalidClientCert(cn instance.ConnName, l debug.Logger, c *tls.Config) bool {
// The following conditions should be impossible (no certs, nil leaf), but
// just in case there's an unknown edge case, check assumptions before
// proceeding.
Expand All @@ -306,7 +319,15 @@ func invalidClientCert(c *tls.Config) bool {
if c.Certificates[0].Leaf == nil {
return true
}
return time.Now().After(c.Certificates[0].Leaf.NotAfter)
now := time.Now()
notAfter := c.Certificates[0].Leaf.NotAfter
l.Debugf(
"[%v] Now = %v, Current cert expiration = %v",
cn.String(),
now.UTC().Format(time.RFC3339),
notAfter.Format(time.RFC3339),
)
return now.After(notAfter)
}

// EngineVersion returns the engine type and version for the instance
Expand Down Expand Up @@ -398,8 +419,14 @@ func (d *Dialer) instance(cn instance.ConnName, useIAMAuthN *bool) connectionInf
if useIAMAuthN != nil {
useIAMAuthNDial = *useIAMAuthN
}
i = cloudsql.NewInstance(cn, d.sqladmin, d.key,
d.refreshTimeout, d.iamTokenSource, d.dialerID, useIAMAuthNDial)
d.logger.Debugf("[%v] Connection info added to cache", cn.String())
i = cloudsql.NewInstance(
cn,
d.logger,
d.sqladmin, d.key,
d.refreshTimeout, d.iamTokenSource,
d.dialerID, useIAMAuthNDial,
)
d.instances[cn] = i
}
}
Expand Down
41 changes: 41 additions & 0 deletions internal/cloudsql/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sync"
"time"

"cloud.google.com/go/cloudsqlconn/debug"
"cloud.google.com/go/cloudsqlconn/errtype"
"cloud.google.com/go/cloudsqlconn/instance"
"golang.org/x/oauth2"
Expand Down Expand Up @@ -90,6 +91,7 @@ type Instance struct {
openConns uint64

connName instance.ConnName
logger debug.Logger
key *rsa.PrivateKey

// refreshTimeout sets the maximum duration a refresh cycle can run
Expand Down Expand Up @@ -118,6 +120,7 @@ type Instance struct {
// NewInstance initializes a new Instance given an instance connection name
func NewInstance(
cn instance.ConnName,
l debug.Logger,
client *sqladmin.Service,
key *rsa.PrivateKey,
refreshTimeout time.Duration,
Expand All @@ -128,9 +131,11 @@ func NewInstance(
ctx, cancel := context.WithCancel(context.Background())
i := &Instance{
connName: cn,
logger: l,
key: key,
l: rate.NewLimiter(rate.Every(refreshInterval), refreshBurst),
r: newRefresher(
l,
client,
ts,
dialerID,
Expand Down Expand Up @@ -293,10 +298,18 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
r.timer = time.AfterFunc(d, func() {
// instance has been closed, don't schedule anything
if err := i.ctx.Err(); err != nil {
i.logger.Debugf(
"[%v] Instance is closed, stopping refresh operations",
i.connName.String(),
)
r.err = err
close(r.ready)
return
}
i.logger.Debugf(
"[%v] Connection info refresh operation started",
i.connName.String(),
)

ctx, cancel := context.WithTimeout(i.ctx, i.refreshTimeout)
defer cancel()
Expand All @@ -314,6 +327,24 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
r.result, r.err = i.r.performRefresh(
ctx, i.connName, i.key, i.useIAMAuthNDial)
}
switch r.err {
case nil:
i.logger.Debugf(
"[%v] Connection info refresh operation complete",
i.connName.String(),
)
i.logger.Debugf(
"[%v] Current certificate expiration = %v",
i.connName.String(),
r.result.expiry.UTC().Format(time.RFC3339),
)
default:
i.logger.Debugf(
"[%v] Connection info refresh operation failed, err = %v",
i.connName.String(),
r.err,
)
}

close(r.ready)

Expand All @@ -324,6 +355,10 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {

// if failed, scheduled the next refresh immediately
if r.err != nil {
i.logger.Debugf(
"[%v] Connection info refresh operation scheduled immediately",
i.connName.String(),
)
i.next = i.scheduleRefresh(0)
// If the latest refreshOperation is bad, avoid replacing the
// used refreshOperation while it's still valid and potentially
Expand All @@ -341,6 +376,12 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
// the future
i.cur = r
t := refreshDuration(time.Now(), i.cur.result.expiry)
i.logger.Debugf(
"[%v] Connection info refresh operation scheduled at %v (now + %v)",
i.connName.String(),
time.Now().Add(t).UTC().Format(time.RFC3339),
t.Round(time.Second),
)
i.next = i.scheduleRefresh(t)
})
return r
Expand Down
34 changes: 28 additions & 6 deletions internal/cloudsql/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import (
"cloud.google.com/go/cloudsqlconn/internal/mock"
)

type nullLogger struct{}

func (nullLogger) Debugf(string, ...interface{}) {}

// genRSAKey generates an RSA key used for test.
func genRSAKey() *rsa.PrivateKey {
key, err := rsa.GenerateKey(rand.Reader, 2048)
Expand Down Expand Up @@ -65,7 +69,10 @@ func TestInstanceEngineVersion(t *testing.T) {
t.Fatalf("%v", err)
}
}()
i := NewInstance(testInstanceConnName(), client, RSAKey, 30*time.Second, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 30*time.Second, nil, "", false,
)
if err != nil {
t.Fatalf("failed to init instance: %v", err)
}
Expand Down Expand Up @@ -99,7 +106,10 @@ func TestConnectInfo(t *testing.T) {
}
}()

i := NewInstance(testInstanceConnName(), client, RSAKey, 30*time.Second, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 30*time.Second, nil, "", false,
)

gotAddr, gotTLSCfg, err := i.ConnectInfo(ctx, PublicIP)
if err != nil {
Expand Down Expand Up @@ -164,7 +174,10 @@ func TestConnectInfoAutoIP(t *testing.T) {
}
}()

i := NewInstance(testInstanceConnName(), client, RSAKey, 30*time.Second, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 30*time.Second, nil, "", false,
)
if err != nil {
t.Fatalf("failed to create mock instance: %v", err)
}
Expand Down Expand Up @@ -193,7 +206,10 @@ func TestConnectInfoErrors(t *testing.T) {
defer cleanup()

// Use a timeout that should fail instantly
i := NewInstance(testInstanceConnName(), client, RSAKey, 0, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 0, nil, "", false,
)

_, _, err = i.ConnectInfo(ctx, PublicIP)
var wantErr *errtype.DialError
Expand All @@ -218,7 +234,10 @@ func TestClose(t *testing.T) {
defer cleanup()

// Set up an instance and then close it immediately
i := NewInstance(testInstanceConnName(), client, RSAKey, 30, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 30*time.Second, nil, "", false,
)
i.Close()

_, _, err = i.ConnectInfo(ctx, PublicIP)
Expand Down Expand Up @@ -281,7 +300,10 @@ func TestContextCancelled(t *testing.T) {
defer cleanup()

// Set up an instance and then close it immediately
i := NewInstance(testInstanceConnName(), client, RSAKey, 30, nil, "", false)
i := NewInstance(
testInstanceConnName(), nullLogger{}, client,
RSAKey, 30*time.Second, nil, "", false,
)
if err != nil {
t.Fatalf("failed to initialize Instance: %v", err)
}
Expand Down
Loading
Loading