Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add network restore to support docker live restore container #1135

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func createTestNetwork(t *testing.T, network string) (libnetwork.NetworkControll
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -192,7 +193,8 @@ func TestCreateDeleteNetwork(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -267,7 +269,8 @@ func TestGetNetworksAndEndpoints(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -526,7 +529,8 @@ func TestProcGetServices(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -999,7 +1003,8 @@ func TestDetectGetNetworksInvalidQueryComposition(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1114,7 +1119,8 @@ func TestCreateDeleteEndpoints(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1240,7 +1246,8 @@ func TestJoinLeave(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1691,7 +1698,8 @@ func TestHttpHandlerUninit(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1761,7 +1769,8 @@ func TestHttpHandlerBadBody(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1797,7 +1806,8 @@ func TestEndToEnd(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -2249,7 +2259,8 @@ func TestEndToEndErrorMessage(t *testing.T) {
// Cleanup local datastore file
os.Remove(datastore.DefaultScopes("")[datastore.LocalScope].Client.Address)

c, err := libnetwork.New()
old := make(map[string]interface{})
c, _, err := libnetwork.New(old)
if err != nil {
t.Fatal(err)
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/dnet/dnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ func (d *dnetConnection) dnetDaemon(cfgFile string) error {

cOptions = append(cOptions, config.OptionDriverConfig("bridge", bridgeOption))

controller, err := libnetwork.New(cOptions...)
old := make(map[string]interface{})
controller, _, err := libnetwork.New(old, cOptions...)
if err != nil {
fmt.Println("Error starting dnetDaemon :", err)
return err
Expand Down
4 changes: 4 additions & 0 deletions cmd/ovrouter/ovrouter.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ func (ep *endpoint) SetNames(srcName, dstPrefix string) error {
return nil
}

func (ep *endpoint) SrcName() string {
return ep.name
}

func (ep *endpoint) SetGateway(net.IP) error {
return nil
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/readme_test/readme.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ func main() {
driverOptions := options.Generic{}
genericOption := make(map[string]interface{})
genericOption[netlabel.GenericData] = driverOptions
controller, err := libnetwork.New(config.OptionDriverConfig(networkType, genericOption))
old := make(map[string]interface{})
controller, _, err := libnetwork.New(old, config.OptionDriverConfig(networkType, genericOption))
if err != nil {
log.Fatalf("libnetwork.New: %s", err)
}
Expand Down
82 changes: 73 additions & 9 deletions controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ type initializer struct {
}

// New creates a new instance of network controller.
func New(cfgOptions ...config.Option) (NetworkController, error) {
func New(oldRunningContainers map[string]interface{}, cfgOptions ...config.Option) (NetworkController, map[string]interface{}, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the caller is not using the list of restored containers (docker side changes) other than checking the list length, then we should probably return a maskable error instead of adding the new return list.

c := &controller{
id: stringid.GenerateRandomID(),
cfg: config.ParseConfigOptions(cfgOptions...),
Expand All @@ -157,20 +157,20 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
}

if err := c.agentInit(c.cfg.Daemon.Bind); err != nil {
return nil, err
return nil, nil, err
}

if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil {
return nil, err
return nil, nil, err
}

if err := c.initStores(); err != nil {
return nil, err
return nil, nil, err
}

drvRegistry, err := drvregistry.New(c.getStore(datastore.LocalScope), c.getStore(datastore.GlobalScope), c.RegisterDriver, nil)
if err != nil {
return nil, err
return nil, nil, err
}

for _, i := range getInitializers() {
Expand All @@ -183,7 +183,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
}

if err := drvRegistry.AddDriver(i.ntype, i.fn, dcfg); err != nil {
return nil, err
return nil, nil, err
}
}
c.drvRegistry = drvRegistry
Expand All @@ -196,15 +196,79 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
}
}

c.sandboxCleanup()
c.sandboxCleanup(oldRunningContainers)
if err := c.restoreSandbox(oldRunningContainers); err != nil {
log.Errorf("failed to restore sandbox")
}

c.cleanupLocalEndpoints()
c.networkCleanup()

if err := c.startExternalKeyListener(); err != nil {
return nil, err
return nil, nil, err
}
restored := make(map[string]interface{})
for _, sb := range c.sandboxes {
restored[sb.ContainerID()] = true
}

return c, nil
return c, restored, nil
}

func (c *controller) restoreSandbox(sbids map[string]interface{}) error {
for id, sb := range c.sandboxes {
log.Infof("restore sandbox %s of container %s", sb.ID(), sb.ContainerID())
option, ok := sbids[sb.ContainerID()].([]SandboxOption)
if !ok {
log.Errorf("failed to restore sandbox: no restore options passed from daemon")
delete(c.sandboxes, id)
continue
}
err := sb.restoreSandbox(option)
if err != nil {
log.Errorf("failed to restore sandbox %s", sb.ID())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also print the error.

Across the diffs, pls follow the convention where Log message start with upper case and error msg with lower case.

delete(c.sandboxes, id)
continue
}
// restore endpoints in this sandbox
// Fixme: if one of the endpoints failed to restore, should we delete this sandbox?
for _, ep := range sb.endpoints {
log.Infof("restore endpoint %s", ep.ID())
c.watchSvcRecord(ep)
net, err := ep.getNetworkFromStore()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls follow the convention of calling the network variable as n.

if err != nil {
log.Errorf("Restore sandbox: failed to get endpoint network from store: %v", err)
ep.Delete(true)
continue
}
d, err := net.driver(true)
if err != nil {
log.Errorf("Resore sandbox: failed to get driver of endpoint %s: %v", ep.ID(), err)
ep.Delete(true)
continue
}
options := make(map[string]interface{})
for key, value := range options {
options[key] = value
}
for key, value := range sb.Labels() {
options[key] = value
}

err = d.Restore(net.ID(), ep.id, sb.Key(), ep.Interface(), options)
if err != nil {
log.Errorf("Restore sandbox: failed to restore endpoint %s to driver of network %s: %v", ep.Name(), net.Name(), err)
ep.Delete(true)
continue
}
}
if sb.config.useDefaultSandBox {
c.sboxOnce.Do(func() {
c.defOsSbox = sb.osSbox
})
}
}
return nil
}

func (c *controller) makeDriverConfig(ntype string) map[string]interface{} {
Expand Down
6 changes: 6 additions & 0 deletions driverapi/driverapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ type Driver interface {
// Leave method is invoked when a Sandbox detaches from an endpoint.
Leave(nid, eid string) error

// Restore reconstruct driver struct
Restore(nid, eid string, sboxKey string, ifInfo InterfaceInfo, options map[string]interface{}) error
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to not add this new method. And reuse CreateEndpoint(..., restore bool) and Join(..., restore bool)


// ProgramExternalConnectivity invokes the driver method which does the necessary
// programming to allow the external connectivity dictated by the passed options
ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error
Expand Down Expand Up @@ -103,6 +106,9 @@ type InterfaceInfo interface {

// AddressIPv6 returns the IPv6 address.
AddressIPv6() *net.IPNet

// SrcName return the srcName
SrcName() string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That way this method could be part of InterfaceNameInfo where the set names method already is.

}

// InterfaceNameInfo provides a go interface for the drivers to assign names
Expand Down
82 changes: 79 additions & 3 deletions drivers/bridge/bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ func (d *driver) configure(option map[string]interface{}) error {
logrus.Warnf("Running modprobe bridge br_netfilter failed with message: %s, error: %v", out, err)
}
}
removeIPChains()
// TODO: need a better way to handle this
// removeIPChains()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change is for avoiding network disruption during the reload, but I am worried about getting rid of this call, which let us start from a clean state after ungraceful daemon shutdowns.

I heard the daemon restart use-case is narrowed to only the time needed to update the daemon and not meant to allow containers running for long time w/o a daemon. If that is the case, maybe we can live with an iptables cleanup during daemon restart. The rules are anyway reconstructed at the time the driver loads the networks.

natChain, filterChain, isolationChain, err = setupIPChains(config)
if err != nil {
return err
Expand Down Expand Up @@ -1211,11 +1212,10 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
}

// Program any required port mapping and store them in the endpoint
endpoint.portMapping, err = network.allocatePorts(endpoint, network.config.DefaultBindingIP, d.config.EnableUserlandProxy)
endpoint.portMapping, err = network.allocatePorts(endpoint, network.config.DefaultBindingIP, d.config.EnableUserlandProxy, false)
if err != nil {
return err
}

if !network.config.EnableICC {
return d.link(network, endpoint, true)
}
Expand Down Expand Up @@ -1340,6 +1340,82 @@ func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{
return nil
}

func (d *driver) Restore(nid, eid string, sboxKey string, ifInfo driverapi.InterfaceInfo, options map[string]interface{}) error {
// restore endpoint
if ifInfo == nil {
return errors.New("invalid interface info passed")
}

// Get the network handler and make sure it exists
d.Lock()
n, ok := d.networks[nid]
d.Unlock()

if !ok {
return types.NotFoundErrorf("network %s does not exist", nid)
}
if n == nil {
return driverapi.ErrNoNetwork(nid)
}

// Sanity check
n.Lock()
if n.id != nid {
n.Unlock()
return InvalidNetworkIDError(nid)
}
n.Unlock()

// Check if endpoint id is good and retrieve correspondent endpoint
ep, err := n.getEndpoint(eid)
if err != nil {
return err
}

// Endpoint with that id exists either on desired or other sandbox
if ep != nil {
return driverapi.ErrEndpointExists(eid)
}

// Try to convert the options to endpoint configuration
epConfig, err := parseEndpointOptions(options)
if err != nil {
return err
}
endpoint := &bridgeEndpoint{id: eid, config: epConfig}
endpoint.macAddress = ifInfo.MacAddress()
endpoint.addr = ifInfo.Address()
endpoint.addrv6 = ifInfo.AddressIPv6()
endpoint.srcName = ifInfo.SrcName()

endpoint.containerConfig, err = parseContainerOptions(options)
if err != nil {
return err
}
endpoint.extConnConfig, err = parseConnectivityOptions(options)
if err != nil {
return err
}

if len(endpoint.extConnConfig.PortBindings) > 0 {
endpoint.portMapping, err = n.allocatePorts(endpoint, n.config.DefaultBindingIP, d.config.EnableUserlandProxy, true)
if err != nil {
return err
}

// This is to make sure that all the iptalbes rules are still exist
// If the rule not exist, re-create it
if !n.config.EnableICC {
return d.link(n, endpoint, true)
}

}
n.Lock()
n.endpoints[eid] = endpoint
n.Unlock()
return nil
}

func parseEndpointOptions(epOptions map[string]interface{}) (*endpointConfiguration, error) {
if epOptions == nil {
return nil, nil
Expand Down
Loading