-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle network errors/stalls #101
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,6 @@ import ( | |
"github.com/filecoin-project/go-data-transfer/registry" | ||
) | ||
|
||
var ChannelRemoveTimeout = 1 * time.Hour | ||
|
||
func (m *manager) OnChannelOpened(chid datatransfer.ChannelID) error { | ||
has, err := m.channels.HasChannel(chid) | ||
if err != nil { | ||
|
@@ -170,12 +168,42 @@ func (m *manager) OnRequestTimedOut(ctx context.Context, chid datatransfer.Chann | |
go func() { | ||
select { | ||
case <-ctx.Done(): | ||
case <-time.After(ChannelRemoveTimeout): | ||
case <-time.After(m.channelRemoveTimeout): | ||
channel, err := m.channels.GetByID(ctx, chid) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if a channel times/out disconnects and then the FSM/data-trasfer crashes ? I think we should have a handler for this state that starts this wait even when the FSM restarts. Does that make sense ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea we can address this when we have a better way to actually track disconencts other than the Message -- see comment above about challenges of essentially pushing state |
||
if err == nil { | ||
if !(channels.IsChannelTerminated(channel.Status()) || | ||
channels.IsChannelCleaningUp(channel.Status())) { | ||
if err := m.channels.Error(chid, datatransfer.ErrRemoved); err != nil { | ||
log.Errorf("failed to cancel timed-out channel: %v", err) | ||
return | ||
} | ||
log.Warnf("channel %+v has ben cancelled because of timeout", chid) | ||
} | ||
} | ||
} | ||
}() | ||
|
||
return nil | ||
} | ||
|
||
func (m *manager) OnRequestDisconnected(ctx context.Context, chid datatransfer.ChannelID) error { | ||
log.Warnf("channel %+v has stalled or disconnected", chid) | ||
|
||
// mark peer disconnected for informational purposes | ||
err := m.channels.Disconnected(chid) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
go func() { | ||
select { | ||
case <-ctx.Done(): | ||
case <-time.After(m.channelRemoveTimeout): | ||
channel, err := m.channels.GetByID(ctx, chid) | ||
if err == nil { | ||
if !(channels.IsChannelTerminated(channel.Status()) || | ||
channels.IsChannelCleaningUp(channel.Status())) { | ||
if err := m.channels.Cancel(chid); err != nil { | ||
if err := m.channels.Error(chid, datatransfer.ErrRemoved); err != nil { | ||
log.Errorf("failed to cancel timed-out channel: %v", err) | ||
return | ||
} | ||
|
@@ -198,7 +226,7 @@ func (m *manager) OnChannelCompleted(chid datatransfer.ChannelID, success bool) | |
if msg != nil { | ||
if err := m.dataTransferNetwork.SendMessage(context.TODO(), chid.Initiator, msg); err != nil { | ||
log.Warnf("failed to send completion message, err : %v", err) | ||
return m.channels.Disconnected(chid) | ||
return m.OnRequestDisconnected(context.TODO(), chid) | ||
} | ||
} | ||
if msg.Accepted() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@hannahhoward For my own curiosity:
What's the advantage of defining:
rather than using
errors.New()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it allows you to use
const
instead ofvar
-- there is a danger ofvar
being changed -- technically anyone using the module can do so.