From 3719137c4d385638ecbb0113e4e18296d3b9d29a Mon Sep 17 00:00:00 2001 From: tg Date: Tue, 26 Mar 2019 21:39:22 +0300 Subject: [PATCH] return all dial errors if dial has failed License: MIT Signed-off-by: Georgij Tolstov --- dial_test.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 4 ++++ package.json | 7 +++++++ swarm_dial.go | 37 ++++++++++++++++++++++--------------- 5 files changed, 82 insertions(+), 15 deletions(-) diff --git a/dial_test.go b/dial_test.go index c9d0d5dd..adb3438a 100644 --- a/dial_test.go +++ b/dial_test.go @@ -2,7 +2,9 @@ package swarm_test import ( "context" + "fmt" "net" + "regexp" "sync" "testing" "time" @@ -480,3 +482,49 @@ func TestDialBackoffClears(t *testing.T) { t.Log("correctly cleared backoff") } } + +func TestDialPeerFailed(t *testing.T) { + t.Parallel() + ctx := context.Background() + + swarms := makeSwarms(ctx, t, 2) + defer closeSwarms(swarms) + testedSwarm, targetSwarm := swarms[0], swarms[1] + + exceptedErrorsCount := 5 + for i := 0; i < exceptedErrorsCount; i++ { + _, silentPeerAddress, silentPeerListener := newSilentPeer(t) + go acceptAndHang(silentPeerListener) + defer silentPeerListener.Close() + + testedSwarm.Peerstore().AddAddr( + targetSwarm.LocalPeer(), + silentPeerAddress, + pstore.PermanentAddrTTL) + } + + _, err := testedSwarm.DialPeer(ctx, targetSwarm.LocalPeer()) + if err == nil { + t.Fatal(err) + } + + // dial_test.go:508: correctly get a combined error: dial attempt failed: 10 errors occurred: + // * --> (/ip4/127.0.0.1/tcp/46485) dial attempt failed: failed to negotiate security protocol: context deadline exceeded + // * --> (/ip4/127.0.0.1/tcp/34881) dial attempt failed: failed to negotiate security protocol: context deadline exceeded + // ... + + errorCountRegexpString := fmt.Sprintf("%d errors occurred", exceptedErrorsCount) + errorCountRegexp := regexp.MustCompile(errorCountRegexpString) + if !errorCountRegexp.MatchString(err.Error()) { + t.Fatalf("can't find total err count: `%s' in `%s'", errorCountRegexpString, err.Error()) + } + + connectErrorsRegexpString := `\* --> \(.+?\) dial attempt failed:.+` + connectErrorsRegexp := regexp.MustCompile(connectErrorsRegexpString) + connectErrors := connectErrorsRegexp.FindAll([]byte(err.Error()), -1) + if len(connectErrors) != exceptedErrorsCount { + t.Fatalf("connectErrors must contain %d errros; "+ + "but `%s' was found in `%s' %d times", + exceptedErrorsCount, connectErrorsRegexpString, err.Error(), len(connectErrors)) + } +} diff --git a/go.mod b/go.mod index 39cf287b..efb39950 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,7 @@ module github.com/libp2p/go-libp2p-swarm require ( + github.com/hashicorp/go-multierror v1.0.0 github.com/ipfs/go-log v0.0.1 github.com/jbenet/goprocess v0.0.0-20160826012719-b497e2f366b8 github.com/libp2p/go-addr-util v0.0.1 diff --git a/go.sum b/go.sum index 957a7833..3f8a4b47 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,10 @@ github.com/gxed/hashland/keccakpg v0.0.1 h1:wrk3uMNaMxbXiHibbPO4S0ymqJMm41WiudyF github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= github.com/gxed/hashland/murmur3 v0.0.1 h1:SheiaIt0sda5K+8FLz952/1iWS9zrnKsEJaOJu4ZbSc= github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= diff --git a/package.json b/package.json index a332a349..8bd05c24 100644 --- a/package.json +++ b/package.json @@ -149,6 +149,12 @@ "hash": "QmSVaJe1aRjc78cZARTtf4pqvXERYwihyYhZWoVWceHnsK", "name": "go-libp2p-secio", "version": "2.0.30" + }, + { + "author": "hashicorp", + "hash": "QmfGQp6VVqdPCDyzEM6EGwMY74YPabTSEoQWHUxZuCSWj3", + "name": "go-multierror", + "version": "0.1.0" } ], "gxVersion": "0.9.1", @@ -158,3 +164,4 @@ "releaseCmd": "git commit -a -m \"gx publish $VERSION\"", "version": "3.0.35" } + diff --git a/swarm_dial.go b/swarm_dial.go index 8a3ba491..d4b31552 100644 --- a/swarm_dial.go +++ b/swarm_dial.go @@ -7,6 +7,8 @@ import ( "sync" "time" + "github.com/hashicorp/go-multierror" + logging "github.com/ipfs/go-log" addrutil "github.com/libp2p/go-addr-util" lgbl "github.com/libp2p/go-libp2p-loggables" @@ -358,9 +360,7 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma. // use a single response type instead of errs and conns, reduces complexity *a ton* respch := make(chan dialResult) - - defaultDialFail := inet.ErrNoRemoteAddrs - exitErr := defaultDialFail + var dialErrors *multierror.Error defer s.limiter.clearAllPeerDials(p) @@ -369,16 +369,17 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma. // Check for context cancellations and/or responses first. select { case <-ctx.Done(): - if exitErr == defaultDialFail { - exitErr = ctx.Err() + if dialError := dialErrors.ErrorOrNil(); dialError != nil { + return nil, dialError } - return nil, exitErr + + return nil, ctx.Err() case resp := <-respch: active-- if resp.Err != nil { - log.Infof("got error on dial to %s: %s", resp.Addr, resp.Err) // Errors are normal, lots of dials will fail - exitErr = resp.Err + log.Infof("got error on dial: %s", resp.Err) + dialErrors = multierror.Append(dialErrors, resp.Err) } else if resp.Conn != nil { return resp.Conn, nil } @@ -399,22 +400,28 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma. s.limitedDial(ctx, p, addr, respch) active++ case <-ctx.Done(): - if exitErr == defaultDialFail { - exitErr = ctx.Err() + if dialError := dialErrors.ErrorOrNil(); dialError != nil { + return nil, dialError } - return nil, exitErr + + return nil, ctx.Err() case resp := <-respch: active-- if resp.Err != nil { - log.Infof("got error on dial to %s: %s", resp.Addr, resp.Err) // Errors are normal, lots of dials will fail - exitErr = resp.Err + log.Infof("got error on dial: %s", resp.Err) + dialErrors = multierror.Append(dialErrors, resp.Err) } else if resp.Conn != nil { return resp.Conn, nil } } } - return nil, exitErr + + if dialError := dialErrors.ErrorOrNil(); dialError != nil { + return nil, dialError + } + + return nil, inet.ErrNoRemoteAddrs } // limitedDial will start a dial to the given peer when @@ -443,7 +450,7 @@ func (s *Swarm) dialAddr(ctx context.Context, p peer.ID, addr ma.Multiaddr) (tra connC, err := tpt.Dial(ctx, addr, p) if err != nil { - return nil, fmt.Errorf("%s --> %s dial attempt failed: %s", s.local, p, err) + return nil, fmt.Errorf("%s --> %s (%s) dial attempt failed: %s", s.local, p, addr, err) } // Trust the transport? Yeah... right.