-
Notifications
You must be signed in to change notification settings - Fork 882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add network restore to support docker live restore container #1135
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,7 +147,7 @@ type initializer struct { | |
} | ||
|
||
// New creates a new instance of network controller. | ||
func New(cfgOptions ...config.Option) (NetworkController, error) { | ||
func New(oldRunningContainers map[string]interface{}, cfgOptions ...config.Option) (NetworkController, map[string]interface{}, error) { | ||
c := &controller{ | ||
id: stringid.GenerateRandomID(), | ||
cfg: config.ParseConfigOptions(cfgOptions...), | ||
|
@@ -157,20 +157,20 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { | |
} | ||
|
||
if err := c.agentInit(c.cfg.Daemon.Bind); err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
|
||
if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
|
||
if err := c.initStores(); err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
|
||
drvRegistry, err := drvregistry.New(c.getStore(datastore.LocalScope), c.getStore(datastore.GlobalScope), c.RegisterDriver, nil) | ||
if err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
|
||
for _, i := range getInitializers() { | ||
|
@@ -183,7 +183,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { | |
} | ||
|
||
if err := drvRegistry.AddDriver(i.ntype, i.fn, dcfg); err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
} | ||
c.drvRegistry = drvRegistry | ||
|
@@ -196,15 +196,79 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { | |
} | ||
} | ||
|
||
c.sandboxCleanup() | ||
c.sandboxCleanup(oldRunningContainers) | ||
if err := c.restoreSandbox(oldRunningContainers); err != nil { | ||
log.Errorf("failed to restore sandbox") | ||
} | ||
|
||
c.cleanupLocalEndpoints() | ||
c.networkCleanup() | ||
|
||
if err := c.startExternalKeyListener(); err != nil { | ||
return nil, err | ||
return nil, nil, err | ||
} | ||
restored := make(map[string]interface{}) | ||
for _, sb := range c.sandboxes { | ||
restored[sb.ContainerID()] = true | ||
} | ||
|
||
return c, nil | ||
return c, restored, nil | ||
} | ||
|
||
func (c *controller) restoreSandbox(sbids map[string]interface{}) error { | ||
for id, sb := range c.sandboxes { | ||
log.Infof("restore sandbox %s of container %s", sb.ID(), sb.ContainerID()) | ||
option, ok := sbids[sb.ContainerID()].([]SandboxOption) | ||
if !ok { | ||
log.Errorf("failed to restore sandbox: no restore options passed from daemon") | ||
delete(c.sandboxes, id) | ||
continue | ||
} | ||
err := sb.restoreSandbox(option) | ||
if err != nil { | ||
log.Errorf("failed to restore sandbox %s", sb.ID()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also print the error. Across the diffs, pls follow the convention where Log message start with upper case and error msg with lower case. |
||
delete(c.sandboxes, id) | ||
continue | ||
} | ||
// restore endpoints in this sandbox | ||
// Fixme: if one of the endpoints failed to restore, should we delete this sandbox? | ||
for _, ep := range sb.endpoints { | ||
log.Infof("restore endpoint %s", ep.ID()) | ||
c.watchSvcRecord(ep) | ||
net, err := ep.getNetworkFromStore() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls follow the convention of calling the network variable as |
||
if err != nil { | ||
log.Errorf("Restore sandbox: failed to get endpoint network from store: %v", err) | ||
ep.Delete(true) | ||
continue | ||
} | ||
d, err := net.driver(true) | ||
if err != nil { | ||
log.Errorf("Resore sandbox: failed to get driver of endpoint %s: %v", ep.ID(), err) | ||
ep.Delete(true) | ||
continue | ||
} | ||
options := make(map[string]interface{}) | ||
for key, value := range options { | ||
options[key] = value | ||
} | ||
for key, value := range sb.Labels() { | ||
options[key] = value | ||
} | ||
|
||
err = d.Restore(net.ID(), ep.id, sb.Key(), ep.Interface(), options) | ||
if err != nil { | ||
log.Errorf("Restore sandbox: failed to restore endpoint %s to driver of network %s: %v", ep.Name(), net.Name(), err) | ||
ep.Delete(true) | ||
continue | ||
} | ||
} | ||
if sb.config.useDefaultSandBox { | ||
c.sboxOnce.Do(func() { | ||
c.defOsSbox = sb.osSbox | ||
}) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (c *controller) makeDriverConfig(ntype string) map[string]interface{} { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,9 @@ type Driver interface { | |
// Leave method is invoked when a Sandbox detaches from an endpoint. | ||
Leave(nid, eid string) error | ||
|
||
// Restore reconstruct driver struct | ||
Restore(nid, eid string, sboxKey string, ifInfo InterfaceInfo, options map[string]interface{}) error | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be possible to not add this new method. And reuse CreateEndpoint(..., restore bool) and Join(..., restore bool) |
||
|
||
// ProgramExternalConnectivity invokes the driver method which does the necessary | ||
// programming to allow the external connectivity dictated by the passed options | ||
ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error | ||
|
@@ -103,6 +106,9 @@ type InterfaceInfo interface { | |
|
||
// AddressIPv6 returns the IPv6 address. | ||
AddressIPv6() *net.IPNet | ||
|
||
// SrcName return the srcName | ||
SrcName() string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That way this method could be part of |
||
} | ||
|
||
// InterfaceNameInfo provides a go interface for the drivers to assign names | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -376,7 +376,8 @@ func (d *driver) configure(option map[string]interface{}) error { | |
logrus.Warnf("Running modprobe bridge br_netfilter failed with message: %s, error: %v", out, err) | ||
} | ||
} | ||
removeIPChains() | ||
// TODO: need a better way to handle this | ||
// removeIPChains() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this change is for avoiding network disruption during the reload, but I am worried about getting rid of this call, which let us start from a clean state after ungraceful daemon shutdowns. I heard the daemon restart use-case is narrowed to only the time needed to update the daemon and not meant to allow containers running for long time w/o a daemon. If that is the case, maybe we can live with an iptables cleanup during daemon restart. The rules are anyway reconstructed at the time the driver loads the networks. |
||
natChain, filterChain, isolationChain, err = setupIPChains(config) | ||
if err != nil { | ||
return err | ||
|
@@ -1211,11 +1212,10 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string | |
} | ||
|
||
// Program any required port mapping and store them in the endpoint | ||
endpoint.portMapping, err = network.allocatePorts(endpoint, network.config.DefaultBindingIP, d.config.EnableUserlandProxy) | ||
endpoint.portMapping, err = network.allocatePorts(endpoint, network.config.DefaultBindingIP, d.config.EnableUserlandProxy, false) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if !network.config.EnableICC { | ||
return d.link(network, endpoint, true) | ||
} | ||
|
@@ -1340,6 +1340,82 @@ func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{ | |
return nil | ||
} | ||
|
||
func (d *driver) Restore(nid, eid string, sboxKey string, ifInfo driverapi.InterfaceInfo, options map[string]interface{}) error { | ||
// restore endpoint | ||
if ifInfo == nil { | ||
return errors.New("invalid interface info passed") | ||
} | ||
|
||
// Get the network handler and make sure it exists | ||
d.Lock() | ||
n, ok := d.networks[nid] | ||
d.Unlock() | ||
|
||
if !ok { | ||
return types.NotFoundErrorf("network %s does not exist", nid) | ||
} | ||
if n == nil { | ||
return driverapi.ErrNoNetwork(nid) | ||
} | ||
|
||
// Sanity check | ||
n.Lock() | ||
if n.id != nid { | ||
n.Unlock() | ||
return InvalidNetworkIDError(nid) | ||
} | ||
n.Unlock() | ||
|
||
// Check if endpoint id is good and retrieve correspondent endpoint | ||
ep, err := n.getEndpoint(eid) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Endpoint with that id exists either on desired or other sandbox | ||
if ep != nil { | ||
return driverapi.ErrEndpointExists(eid) | ||
} | ||
|
||
// Try to convert the options to endpoint configuration | ||
epConfig, err := parseEndpointOptions(options) | ||
if err != nil { | ||
return err | ||
} | ||
endpoint := &bridgeEndpoint{id: eid, config: epConfig} | ||
endpoint.macAddress = ifInfo.MacAddress() | ||
endpoint.addr = ifInfo.Address() | ||
endpoint.addrv6 = ifInfo.AddressIPv6() | ||
endpoint.srcName = ifInfo.SrcName() | ||
|
||
endpoint.containerConfig, err = parseContainerOptions(options) | ||
if err != nil { | ||
return err | ||
} | ||
endpoint.extConnConfig, err = parseConnectivityOptions(options) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if len(endpoint.extConnConfig.PortBindings) > 0 { | ||
endpoint.portMapping, err = n.allocatePorts(endpoint, n.config.DefaultBindingIP, d.config.EnableUserlandProxy, true) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// This is to make sure that all the iptalbes rules are still exist | ||
// If the rule not exist, re-create it | ||
if !n.config.EnableICC { | ||
return d.link(n, endpoint, true) | ||
} | ||
|
||
} | ||
n.Lock() | ||
n.endpoints[eid] = endpoint | ||
n.Unlock() | ||
return nil | ||
} | ||
|
||
func parseEndpointOptions(epOptions map[string]interface{}) (*endpointConfiguration, error) { | ||
if epOptions == nil { | ||
return nil, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the caller is not using the list of restored containers (docker side changes) other than checking the list length, then we should probably return a maskable error instead of adding the new return list.