From c1752378402d9a1ae67647cded2e7d0cdb95d0c5 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 16 Oct 2015 13:38:59 +0100
Subject: [PATCH 01/98] Prototype etcd driver process, replaces etcd watch
 loop.

---
 calico/felix/dispatch.py     |  28 ++--
 calico/felix/endpoint.py     |  43 ++---
 calico/felix/fetcd.py        | 230 +++++++++++++++++---------
 calico/felix/ipsets.py       |  81 +++++----
 calico/felix/profilerules.py |  25 +--
 calico/felix/readetcd.py     | 312 +++++++++++++++++++++++++++++++++++
 calico/felix/splitter.py     | 104 ++++++------
 7 files changed, 610 insertions(+), 213 deletions(-)
 create mode 100644 calico/felix/readetcd.py

diff --git a/calico/felix/dispatch.py b/calico/felix/dispatch.py
index 97214c9ab9..bdb6d466c0 100644
--- a/calico/felix/dispatch.py
+++ b/calico/felix/dispatch.py
@@ -48,20 +48,20 @@ def __init__(self, config, ip_version, iptables_updater):
         self.programmed_leaf_chains = set()
         self._dirty = False
 
-    @actor_message()
-    def apply_snapshot(self, ifaces):
-        """
-        Replaces all known interfaces with the given snapshot and rewrites the
-        chain.
-
-        :param set[str] ifaces: The interface
-        """
-        _log.info("Applying dispatch chains snapshot.")
-        self.ifaces = set(ifaces)  # Take a copy.
-        # Always reprogram the chain, even if it's empty.  This makes sure that
-        # we resync and it stops the iptables layer from marking our chain as
-        # missing.
-        self._dirty = True
+    # @actor_message()
+    # def apply_snapshot(self, ifaces):
+    #     """
+    #     Replaces all known interfaces with the given snapshot and rewrites the
+    #     chain.
+    #
+    #     :param set[str] ifaces: The interface
+    #     """
+    #     _log.info("Applying dispatch chains snapshot.")
+    #     self.ifaces = set(ifaces)  # Take a copy.
+    #     # Always reprogram the chain, even if it's empty.  This makes sure that
+    #     # we resync and it stops the iptables layer from marking our chain as
+    #     # missing.
+    #     self._dirty = True
 
     @actor_message()
     def on_endpoint_added(self, iface_name):
diff --git a/calico/felix/endpoint.py b/calico/felix/endpoint.py
index b6822684db..6e2afcf28e 100644
--- a/calico/felix/endpoint.py
+++ b/calico/felix/endpoint.py
@@ -86,27 +86,28 @@ def _on_object_started(self, endpoint_id, obj):
         ep = self.endpoints_by_id.get(endpoint_id)
         obj.on_endpoint_update(ep, async=True)
 
-    @actor_message()
-    def apply_snapshot(self, endpoints_by_id):
-        # Tell the dispatch chains about the local endpoints in advance so
-        # that we don't flap the dispatch chain at start-of-day.
-        local_iface_name_to_ep_id = {}
-        for ep_id, ep in endpoints_by_id.iteritems():
-            if ep and ep_id.host == self.config.HOSTNAME and ep.get("name"):
-                local_iface_name_to_ep_id[ep.get("name")] = ep_id
-        self.dispatch_chains.apply_snapshot(local_iface_name_to_ep_id.keys(),
-                                            async=True)
-        # Then update/create endpoints and work out which endpoints have been
-        # deleted.
-        missing_endpoints = set(self.endpoints_by_id.keys())
-        for endpoint_id, endpoint in endpoints_by_id.iteritems():
-            self.on_endpoint_update(endpoint_id, endpoint,
-                                    force_reprogram=True)
-            missing_endpoints.discard(endpoint_id)
-            self._maybe_yield()
-        for endpoint_id in missing_endpoints:
-            self.on_endpoint_update(endpoint_id, None)
-            self._maybe_yield()
+    # @actor_message()
+    # def apply_snapshot(self, endpoints_by_id):
+    #     # Tell the dispatch chains about the local endpoints in advance so
+    #     # that we don't flap the dispatch chain at start-of-day.
+    #     local_iface_name_to_ep_id = {}
+    #     for ep_id, ep in endpoints_by_id.iteritems():
+    #         if ep and ep_id.host == self.config.HOSTNAME and ep.get("name"):
+    #             local_iface_name_to_ep_id[ep.get("name")] = ep_id
+    #     self.dispatch_chains.apply_snapshot(local_iface_name_to_ep_id.keys(),
+    #                                         async=True)
+    #     # Then update/create endpoints and work out which endpoints have been
+    #     # deleted.
+    #     missing_endpoints = set(self.endpoints_by_id.keys())
+    #     for endpoint_id, endpoint in endpoints_by_id.iteritems():
+    #         self.on_endpoint_update(endpoint_id, endpoint,
+    #                                 force_reprogram=True)
+    #         missing_endpoints.discard(endpoint_id)
+    #         self._maybe_yield()
+    #     missing_endpoints.clear()
+    #     for endpoint_id in missing_endpoints:
+    #         self.on_endpoint_update(endpoint_id, None)
+    #         self._maybe_yield()
 
     @actor_message()
     def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False):
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 977eeb32a1..991ff62b30 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -25,6 +25,9 @@
 import random
 import json
 import logging
+import socket
+import msgpack
+import time
 from calico.monotonic import monotonic_time
 
 from etcd import EtcdException, EtcdKeyNotFound
@@ -50,6 +53,8 @@
 from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
                                  iso_utc_timestamp, IPV4, IPV6)
 
+from pytrie import Trie
+
 _log = logging.getLogger(__name__)
 
 
@@ -86,6 +91,7 @@
     POOL_V4_DIR,
 ]
 
+trie = Trie()
 
 class EtcdAPI(EtcdClientOwner, Actor):
     """
@@ -378,6 +384,56 @@ def _on_pre_resync(self):
             _log.info("etcd worker about to wait for begin_polling event")
         self.begin_polling.wait()
 
+    @logging_exceptions
+    def loop(self):
+        _log.info("Started %s loop", self)
+        while not self._stopped:
+            try:
+                _log.info("Reconnecting and loading snapshot from etcd...")
+                self.reconnect(copy_cluster_id=False)
+                self._on_pre_resync()
+                try:
+                    os.unlink("/tmp/felix.sck")
+                except:
+                    pass
+                update_socket = socket.socket(socket.AF_UNIX,
+                                              socket.SOCK_SEQPACKET)
+
+                print "Created socket"
+                update_socket.bind("/tmp/felix.sck")
+                print "Bound socket"
+                update_socket.listen(1)
+                print "Marked socket for listen"
+                os.chmod("/tmp/felix.sck", 0777)
+                print "Chmodded socket"
+                update_conn, _ = update_socket.accept()
+                print "Accepted connection on socket"
+                receive_count = 0
+                while True:
+                    data = update_conn.recv(8092)
+                    receive_count += 1
+                    if receive_count % 1000 == 0:
+                        print "Recieved", receive_count
+                    key, value = msgpack.loads(data)
+                    n = Node()
+                    n.action = "set" if value is not None else "delete"
+                    n.value = value
+                    n.key = key
+                    try:
+                        self.dispatcher.handle_event(n)
+                    except ResyncRequired:
+                        _log.warning("IGNORING RESYNC.")
+            except EtcdException as e:
+                # Most likely a timeout or other error in the pre-resync;
+                # start over.  These exceptions have good semantic error text
+                # so the stack trace would just add log spam.
+                _log.error("Unexpected IO or etcd error, triggering "
+                           "resync with etcd: %r.", e)
+                time.sleep(1)  # Prevent tight loop due to unexpected error.
+            except:
+                _log.exception("Exception reading from socket?")
+        _log.info("%s.loop() stopped due to self.stop == True", self)
+
     def _load_config(self):
         """
         Loads our configuration from etcd.  Does not return
@@ -435,79 +491,87 @@ def _load_config(self):
                     self.last_global_config = global_dict.copy()
                     self._config.report_etcd_config(host_dict, global_dict)
                 return
-
-    def _on_snapshot_loaded(self, etcd_snapshot_response):
-        """
-        Loads a snapshot from etcd and passes it to the update splitter.
-
-        :raises ResyncRequired: if the Ready flag is not set in the snapshot.
-        """
-        rules_by_id = {}
-        tags_by_id = {}
-        endpoints_by_id = {}
-        ipv4_pools_by_id = {}
-        self.endpoint_ids_per_host.clear()
-        self.ipv4_by_hostname.clear()
-        still_ready = False
-        for child in etcd_snapshot_response.children:
-            profile_id, rules = parse_if_rules(child)
-            if profile_id:
-                rules_by_id[profile_id] = rules
-                continue
-            profile_id, tags = parse_if_tags(child)
-            if profile_id:
-                tags_by_id[profile_id] = tags
-                continue
-            endpoint_id, endpoint = parse_if_endpoint(self._config, child)
-            if endpoint_id and endpoint:
-                endpoints_by_id[endpoint_id] = endpoint
-                self.endpoint_ids_per_host[endpoint_id.host].add(endpoint_id)
-                continue
-            pool_id, pool = parse_if_ipam_v4_pool(child)
-            if pool_id and pool:
-                ipv4_pools_by_id[pool_id] = pool
-                continue
-            if self._config.IP_IN_IP_ENABLED:
-                hostname, ip = parse_if_host_ip(child)
-                if hostname and ip:
-                    self.ipv4_by_hostname[hostname] = ip
-                    continue
-
-            # Double-check the flag hasn't changed since we read it before.
-            if child.key == READY_KEY:
-                if child.value == "true":
-                    still_ready = True
-                else:
-                    _log.warning("Aborting resync because ready flag was"
-                                 "unset since we read it.")
-                    raise ResyncRequired()
-
-        if not still_ready:
-            _log.warn("Aborting resync; ready flag no longer present.")
-            raise ResyncRequired()
-
-        # We now know exactly which endpoints are on this host, use that to
-        # clean up any endpoint statuses that should now be gone.
-        our_endpoints_ids = self.endpoint_ids_per_host[self._config.HOSTNAME]
-        self.clean_up_endpoint_statuses(our_endpoints_ids)
-
-        # Actually apply the snapshot. This does not return anything, but
-        # just sends the relevant messages to the relevant threads to make
-        # all the processing occur.
-        _log.info("Snapshot parsed, passing to update splitter")
-        self.splitter.apply_snapshot(rules_by_id,
-                                     tags_by_id,
-                                     endpoints_by_id,
-                                     ipv4_pools_by_id,
-                                     async=True)
-        if self._config.IP_IN_IP_ENABLED:
-            # We only support IPv4 for host tracking right now so there's not
-            # much point in going via the splitter.
-            # FIXME Support IP-in-IP for IPv6.
-            _log.info("Sending (%d) host IPs to ipset.",
-                      len(self.ipv4_by_hostname))
-            self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
-                                             async=True)
+    #
+    # def _on_snapshot_loaded(self, etcd_snapshot_response):
+    #     """
+    #     Loads a snapshot from etcd and passes it to the update splitter.
+    #
+    #     :raises ResyncRequired: if the Ready flag is not set in the snapshot.
+    #     """
+    #     start_time = monotonic_time()
+    #     rules_by_id = {}
+    #     tags_by_id = {}
+    #     endpoints_by_id = {}
+    #     ipv4_pools_by_id = {}
+    #     self.endpoint_ids_per_host.clear()
+    #     self.ipv4_by_hostname.clear()
+    #     still_ready = False
+    #     for child in etcd_snapshot_response.children:
+    #         trie_key = [intern(s.encode("utf8")) for s in
+    #                     child.key.split("/")][2:]
+    #         if trie.get(trie_key) == child.modifiedIndex and "host" in trie_key:
+    #             continue
+    #         trie[trie_key] = child.modifiedIndex
+    #
+    #         profile_id, rules = parse_if_rules(child)
+    #         if profile_id:
+    #             rules_by_id[profile_id] = rules
+    #             continue
+    #         profile_id, tags = parse_if_tags(child)
+    #         if profile_id:
+    #             tags_by_id[profile_id] = tags
+    #             continue
+    #         endpoint_id, endpoint = parse_if_endpoint(self._config, child)
+    #         if endpoint_id and endpoint:
+    #             endpoints_by_id[endpoint_id] = endpoint
+    #             self.endpoint_ids_per_host[endpoint_id.host].add(endpoint_id)
+    #             continue
+    #         pool_id, pool = parse_if_ipam_v4_pool(child)
+    #         if pool_id and pool:
+    #             ipv4_pools_by_id[pool_id] = pool
+    #             continue
+    #         if self._config.IP_IN_IP_ENABLED:
+    #             hostname, ip = parse_if_host_ip(child)
+    #             if hostname and ip:
+    #                 self.ipv4_by_hostname[hostname] = ip
+    #                 continue
+    #
+    #         # Double-check the flag hasn't changed since we read it before.
+    #         if child.key == READY_KEY:
+    #             if child.value == "true":
+    #                 still_ready = True
+    #             else:
+    #                 _log.warning("Aborting resync because ready flag was"
+    #                              "unset since we read it.")
+    #                 raise ResyncRequired()
+    #
+    #     if not still_ready:
+    #         _log.warn("Aborting resync; ready flag no longer present.")
+    #         raise ResyncRequired()
+    #
+    #     # We now know exactly which endpoints are on this host, use that to
+    #     # clean up any endpoint statuses that should now be gone.
+    #     our_endpoints_ids = self.endpoint_ids_per_host[self._config.HOSTNAME]
+    #     self.clean_up_endpoint_statuses(our_endpoints_ids)
+    #
+    #     # Actually apply the snapshot. This does not return anything, but
+    #     # just sends the relevant messages to the relevant threads to make
+    #     # all the processing occur.
+    #     _log.info("Snapshot parsed in %.2fs, passing to update splitter",
+    #               monotonic_time() - start_time)
+    #     self.splitter.apply_snapshot(rules_by_id,
+    #                                  tags_by_id,
+    #                                  endpoints_by_id,
+    #                                  ipv4_pools_by_id,
+    #                                  async=True)
+    #     if self._config.IP_IN_IP_ENABLED:
+    #         # We only support IPv4 for host tracking right now so there's not
+    #         # much point in going via the splitter.
+    #         # FIXME Support IP-in-IP for IPv6.
+    #         _log.info("Sending (%d) host IPs to ipset.",
+    #                   len(self.ipv4_by_hostname))
+    #         self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
+    #                                          async=True)
 
     def clean_up_endpoint_statuses(self, our_endpoints_ids):
         """
@@ -567,7 +631,7 @@ def on_endpoint_set(self, response, hostname, orchestrator,
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s updated", combined_id)
-        self.endpoint_ids_per_host[combined_id.host].add(combined_id)
+        #self.endpoint_ids_per_host[combined_id.host].add(combined_id)
         endpoint = parse_endpoint(self._config, combined_id, response.value)
         self.splitter.on_endpoint_update(combined_id, endpoint, async=True)
 
@@ -577,9 +641,9 @@ def on_endpoint_delete(self, response, hostname, orchestrator,
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s deleted", combined_id)
-        self.endpoint_ids_per_host[combined_id.host].discard(combined_id)
-        if not self.endpoint_ids_per_host[combined_id.host]:
-            del self.endpoint_ids_per_host[combined_id.host]
+        #self.endpoint_ids_per_host[combined_id.host].discard(combined_id)
+        # if not self.endpoint_ids_per_host[combined_id.host]:
+        #     del self.endpoint_ids_per_host[combined_id.host]
         self.splitter.on_endpoint_update(combined_id, None, async=True)
 
     def on_rules_set(self, response, profile_id):
@@ -915,7 +979,7 @@ def parse_endpoint(config, combined_id, raw_json):
         common.validate_endpoint(config, combined_id, endpoint)
     except ValidationFailed as e:
         _log.warning("Validation failed for endpoint %s, treating as "
-                     "missing: %s", combined_id, e.message)
+                     "missing: %s; %r", combined_id, e.message, raw_json)
         endpoint = None
     else:
         _log.debug("Validated endpoint : %s", endpoint)
@@ -1028,3 +1092,13 @@ def safe_decode_json(raw_json, log_tag=None):
                      log_tag, raw_json)
         return None
 
+
+class Node(object):
+    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex")
+
+    def __init__(self):
+        self.modifiedIndex = None
+        self.key = None
+        self.value = None
+        self.action = None
+        self.current_key = None
\ No newline at end of file
diff --git a/calico/felix/ipsets.py b/calico/felix/ipsets.py
index bd4aeafd65..be1ca364ff 100644
--- a/calico/felix/ipsets.py
+++ b/calico/felix/ipsets.py
@@ -120,42 +120,51 @@ def nets_key(self):
         nets = "ipv4_nets" if self.ip_type == IPV4 else "ipv6_nets"
         return nets
 
-    @actor_message()
-    def apply_snapshot(self, tags_by_prof_id, endpoints_by_id):
-        """
-        Apply a snapshot read from etcd, replacing existing state.
-
-        :param tags_by_prof_id: A dict mapping security profile ID to a list of
-            profile tags.
-        :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
-            data dicts.
-        """
-        _log.info("Applying tags snapshot. %s tags, %s endpoints",
-                  len(tags_by_prof_id), len(endpoints_by_id))
-        missing_profile_ids = set(self.tags_by_prof_id.keys())
-        for profile_id, tags in tags_by_prof_id.iteritems():
-            assert tags is not None
-            self.on_tags_update(profile_id, tags)
-            missing_profile_ids.discard(profile_id)
-            self._maybe_yield()
-        for profile_id in missing_profile_ids:
-            self.on_tags_update(profile_id, None)
-            self._maybe_yield()
-        del missing_profile_ids
-        missing_endpoints = set(self.endpoint_data_by_ep_id.keys())
-        for endpoint_id, endpoint in endpoints_by_id.iteritems():
-            assert endpoint is not None
-            endpoint_data = self._endpoint_data_from_dict(endpoint_id,
-                                                          endpoint)
-            self._on_endpoint_data_update(endpoint_id, endpoint_data)
-            missing_endpoints.discard(endpoint_id)
-            self._maybe_yield()
-        for endpoint_id in missing_endpoints:
-            self._on_endpoint_data_update(endpoint_id, EMPTY_ENDPOINT_DATA)
-            self._maybe_yield()
-        self._force_reprogram = True
-        _log.info("Tags snapshot applied: %s tags, %s endpoints",
-                  len(tags_by_prof_id), len(endpoints_by_id))
+    # @actor_message()
+    # def apply_snapshot(self, tags_by_prof_id, endpoints_by_id):
+    #     """
+    #     Apply a snapshot read from etcd, replacing existing state.
+    #
+    #     :param tags_by_prof_id: A dict mapping security profile ID to a list of
+    #         profile tags.
+    #     :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
+    #         data dicts.
+    #     """
+    #     _log.info("Applying tags snapshot. %s tags, %s endpoints",
+    #               len(tags_by_prof_id), len(endpoints_by_id))
+    #     missing_profile_ids = set(self.tags_by_prof_id.keys())
+    #     for profile_id, tags in tags_by_prof_id.iteritems():
+    #         assert tags is not None
+    #         self.on_tags_update(profile_id, tags)
+    #         missing_profile_ids.discard(profile_id)
+    #         self._maybe_yield()
+    #     for profile_id in missing_profile_ids:
+    #         self.on_tags_update(profile_id, None)
+    #         self._maybe_yield()
+    #     del missing_profile_ids
+    #     missing_endpoints = set(self.endpoint_data_by_ep_id.keys())
+    #     for endpoint_id, endpoint in endpoints_by_id.iteritems():
+    #         assert endpoint is not None
+    #         missing_endpoints.discard(endpoint_id)
+    #         endpoint_data = self.endpoint_data_by_ep_id.get(endpoint_id)
+    #         if endpoint_data:
+    #             profile_ids = set(endpoint.get("profile_ids", []))
+    #             nets_list = endpoint.get(self.nets_key, [])
+    #             ips = set(map(futils.net_to_ip, nets_list))
+    #             if (profile_ids == endpoint_data.profile_ids and
+    #                     ips == endpoint_data.ip_addresses):
+    #                 continue
+    #         endpoint_data = self._endpoint_data_from_dict(endpoint_id,
+    #                                                       endpoint)
+    #         self._on_endpoint_data_update(endpoint_id, endpoint_data)
+    #         self._maybe_yield()
+    #     missing_endpoints.clear()
+    #     for endpoint_id in missing_endpoints:
+    #         self._on_endpoint_data_update(endpoint_id, EMPTY_ENDPOINT_DATA)
+    #         self._maybe_yield()
+    #     self._force_reprogram = True
+    #     _log.info("Tags snapshot applied: %s tags, %s endpoints",
+    #               len(tags_by_prof_id), len(endpoints_by_id))
 
     @actor_message()
     def cleanup(self):
diff --git a/calico/felix/profilerules.py b/calico/felix/profilerules.py
index 4e47e5df6b..2246a2cba9 100644
--- a/calico/felix/profilerules.py
+++ b/calico/felix/profilerules.py
@@ -57,18 +57,19 @@ def _on_object_started(self, profile_id, active_profile):
                    profile_or_none)
         active_profile.on_profile_update(profile_or_none, async=True)
 
-    @actor_message()
-    def apply_snapshot(self, rules_by_profile_id):
-        _log.info("Rules manager applying snapshot; %s rules",
-                  len(rules_by_profile_id))
-        missing_ids = set(self.rules_by_profile_id.keys())
-        for profile_id, profile in rules_by_profile_id.iteritems():
-            self.on_rules_update(profile_id, profile,
-                                 force_reprogram=True)  # Skips queue
-            missing_ids.discard(profile_id)
-            self._maybe_yield()
-        for dead_profile_id in missing_ids:
-            self.on_rules_update(dead_profile_id, None)
+    # @actor_message()
+    # def apply_snapshot(self, rules_by_profile_id):
+    #     _log.info("Rules manager applying snapshot; %s rules",
+    #               len(rules_by_profile_id))
+    #     missing_ids = set(self.rules_by_profile_id.keys())
+    #     for profile_id, profile in rules_by_profile_id.iteritems():
+    #         self.on_rules_update(profile_id, profile,
+    #                              force_reprogram=True)  # Skips queue
+    #         missing_ids.discard(profile_id)
+    #         self._maybe_yield()
+    #     missing_ids.clear()
+    #     for dead_profile_id in missing_ids:
+    #         self.on_rules_update(dead_profile_id, None)
 
     @actor_message()
     def on_rules_update(self, profile_id, profile, force_reprogram=False):
diff --git a/calico/felix/readetcd.py b/calico/felix/readetcd.py
new file mode 100644
index 0000000000..a1035abd0b
--- /dev/null
+++ b/calico/felix/readetcd.py
@@ -0,0 +1,312 @@
+from Queue import Queue, Empty
+
+from httplib import HTTPException
+import socket
+import string
+from ijson.backends import yajl2 as ijson
+import logging
+import urllib3
+
+from json import loads
+from urllib3 import HTTPConnectionPool
+from datrie import Trie
+from threading import Thread, Event
+import time
+from msgpack import dumps
+from urllib3.exceptions import ReadTimeoutError
+
+_log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s [%(levelname)s][%(process)s/%(thread)d] %(name)s %(lineno)d: %(message)s')
+events_processed = 0
+snapshot_events = 0
+watcher_events = 0
+snap_skipped = 0
+
+
+def report_status():
+    while True:
+        start_tot = events_processed
+        start_snap = snapshot_events
+        start_watch = watcher_events
+        start_skip = snap_skipped
+        time.sleep(1)
+        end_tot = events_processed
+        end_snap = snapshot_events
+        end_watch = watcher_events
+        end_skip = snap_skipped
+        _log.info(
+            "Events/s: %s Snap: %s, Watch %s, Skip: %s",
+            end_tot - start_tot,
+            end_snap - start_snap,
+            end_watch - start_watch,
+            end_skip - start_skip
+        )
+
+
+x = {u'action': u'set',
+     u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
+               u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "63:4e:60:d9:91:a6", "ipv4_nets": ["1.0.0.174/32"]}',
+               u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'},
+     u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
+                   u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", "ipv4_nets": ["1.0.0.174/32"]}',
+                   u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'}}
+
+
+http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+
+
+def watch_etcd(next_index, result_queue, stop_event):
+    http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+    try:
+        while not stop_event.is_set():
+            try:
+                _log.info("About to call http.request...")
+                resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
+                                    fields={"recursive": "true", "wait": "true",
+                                            "waitIndex": next_index},
+                                    timeout=5)
+                resp_body = loads(resp.data)
+            except ReadTimeoutError:
+                _log.exception("Watch read timed out, restarting watch at index %s",
+                               next_index)
+                continue
+            except:
+                _log.exception("Unexpected exception")
+                raise
+            else:
+                node = resp_body["node"]
+                key = node["key"]
+                value = node.get("value")
+                modified_index = node["modifiedIndex"]
+                result_queue.put((modified_index, key, value))
+                next_index = modified_index + 1
+    finally:
+        result_queue.put(None)
+
+
+def resync_and_merge(update_sock):
+    global events_processed, snapshot_events, watcher_events, snap_skipped
+    hwms = Trie(string.printable)
+    stop_worker = None
+    event_hwm = 0
+    best_hwm = 0
+    first_resync = True
+
+    while True:
+        if stop_worker:
+            stop_worker.set()
+        # Load the recursive get as far as the headers...
+        #http = HTTPConnectionPool("localhost", 4001, maxsize=1)
+        resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
+                            fields={"recursive": "true"},
+                            timeout=120,
+                            preload_content=False)
+
+        # ASAP, start the background thread to listen for events and queue
+        # them up...
+        snapshot_index = int(resp.getheader("x-etcd-index", 1))
+        watcher_queue = Queue()
+        stop_worker = Event()
+        watcher_thread = Thread(target=watch_etcd,
+                                args=(snapshot_index + 1,
+                                      watcher_queue,
+                                      stop_worker))
+        watcher_thread.daemon = True
+        watcher_thread.start()
+
+
+
+        # Then plough through the update incrementally.
+        deletes_during_snapshot = Trie(string.printable)
+        try:
+            parser = ijson.parse(resp)  # urllib3 response is file-like.
+            stack = []
+            frame = Node()
+            count = 0
+            for prefix, event, value in parser:
+                if event == "start_map":
+                    stack.append(frame)
+                    frame = Node()
+                elif event == "map_key":
+                    frame.current_key = value
+                elif event in ("string", "number"):
+                    if frame.done:
+                        continue
+                    if frame.current_key == "modifiedIndex":
+                        frame.modifiedIndex = value
+                    if frame.current_key == "key":
+                        frame.key = value
+                    elif frame.current_key == "value":
+                        frame.value = value
+                    if (frame.key is not None and
+                            frame.value is not None and
+                            frame.modifiedIndex is not None):
+                        frame.done = True
+                        # We have all the data for a node.  See if it's fresh.
+                        key_parts = frame.key
+
+                        # See if the key or its directory has been deleted.
+                        del_hwm = deletes_during_snapshot.longest_prefix_value(
+                            key_parts,
+                            None
+                        )
+
+                        if frame.modifiedIndex <= del_hwm:
+                            # Update to a key that's already been deleted.
+                            _log.debug("Skipping: %s deleted at %s",
+                                       key_parts, del_hwm)
+                            snap_skipped += 1
+                            continue
+
+                        # Check if this is a newer version of the node than
+                        # what we've seen before.
+                        try:
+                            hwm = hwms[key_parts]
+                        except KeyError:
+                            hwm = None
+                        if snapshot_index > hwm:
+                            # We have to update the HWM to allow us to spot
+                            # deletions below.
+                            hwms[key_parts] = snapshot_index
+                        if frame.modifiedIndex <= hwm:
+                            snap_skipped += 1
+                            continue
+
+                        # This is a fresh value for the key.
+                        update_sock.sendall(dumps((frame.key, frame.value)))
+                        events_processed += 1
+                        snapshot_events += 1
+                    frame.current_key = None
+                elif event == "end_map":
+                    frame = stack.pop(-1)
+                if count % 100 == 0:
+                    try:
+                        while True:
+                            try:
+                                (mod, key, val) = watcher_queue.get_nowait()
+                            except TypeError:
+                                print "Queue finished"
+                                break
+                            key_parts = key
+                            if val is None:
+                                # Mark this item as deleted post-snapshot.  If this
+                                # is a dir then we'll squash every snapshot update
+                                # to this whole dir.
+                                _log.debug("Storing deletion of %s at %s",
+                                           key_parts, mod)
+                                # FIXME: need to add "/" here but that only works for dirs
+                                deletes_during_snapshot[key_parts + "/"] = mod
+                                # Simulate a delete for all the keys under the
+                                # deleted key.
+                                for child_key_parts, child_mod in hwms.items(key_parts + "/"):
+                                    del hwms[child_key_parts]
+                                    child_key = child_key_parts
+                                    #print "Simulating delete of", child_key
+                                    update_sock.sendall(
+                                        dumps((child_key, None))
+                                    )
+                            else:
+                                hwms[key_parts] = mod
+                                update_sock.sendall(dumps((key, val)))
+                            events_processed += 1
+                            watcher_events += 1
+                            event_hwm = mod
+                    except Empty:
+                        pass
+                count += 1
+
+            # Done applying snapshot.  If we need to do a snapshot again, we
+            # can skip any keys that have a modifiedIndex <= to best_hwm.
+            best_hwm = max(snapshot_index, event_hwm)
+
+            # Only used to resolve deleted during a snapshot so we can throw
+            # away.
+            del deletes_during_snapshot
+
+            if not first_resync:
+                # Find any keys that were deleted while we were down.
+                _log.info("Scanning for deletions")
+                # TODO Interleave with processing more watcher keys?
+                for key_parts, value in hwms.items():
+                    if value < snapshot_index:
+                        # We didn't see the value during the snapshot or via the
+                        # event queue.  It must have been deleted.
+                        del hwms[key_parts]
+                        update_sock.sendall(
+                            dumps((key_parts, None))
+                        )
+                        events_processed += 1
+            else:
+                _log.info("First resync, skipping delete check.")
+
+            _log.info("In sync, processing events only")
+            while True:
+                try:
+                    mod, key, val = watcher_queue.get()
+                except TypeError:
+                    print "Queue finished"
+                    break
+                key_parts = key
+                if val is None:
+                    # Simulate a delete for all the keys under the
+                    # deleted key.
+                    for child_key, child_mod in hwms.items(key_parts):
+                        del hwms[child_key]
+                        update_sock.sendall(
+                            dumps((child_key, None))
+                        )
+                else:
+                    # At this point, we're using hwms only to track existence
+                    # so we can generate deletions when whole directories are
+                    # deleted.  However, we may as well keep the modifiedIndex
+                    # up to date.
+                    hwms[key_parts] = mod
+                    update_sock.sendall(dumps((key, val)))
+                events_processed += 1
+                watcher_events += 1
+                event_hwm = mod
+                best_hwm = mod
+            _log.warning("Worker stopped, resyncing...")
+        except (urllib3.exceptions.HTTPError,
+                HTTPException,
+                socket.error) as e:
+            _log.error("Request to etcd failed: %r", e)
+        finally:
+            first_resync = False
+
+
+class Node(object):
+    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex", "done")
+
+    def __init__(self):
+        self.modifiedIndex = None
+        self.key = None
+        self.value = None
+        self.action = None
+        self.current_key = None
+        self.done = False
+
+
+def main():
+    global events_processed
+    t = Thread(target=report_status)
+    t.daemon = True
+    t.start()
+
+    update_socket = socket.socket(socket.AF_UNIX,
+                                  socket.SOCK_SEQPACKET)
+    while True:
+        try:
+            update_socket.connect("/tmp/felix.sck")
+        except:
+            _log.exception("Failed to connect to felix...")
+            time.sleep(1)
+        else:
+            break
+
+    resync_and_merge(update_socket)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/calico/felix/splitter.py b/calico/felix/splitter.py
index da56b6901a..707a738a4a 100644
--- a/calico/felix/splitter.py
+++ b/calico/felix/splitter.py
@@ -50,58 +50,58 @@ def __init__(self, config, ipsets_mgrs, rules_managers, endpoint_managers,
         self.ipv4_masq_manager = ipv4_masq_manager
         self._cleanup_scheduled = False
 
-    @actor_message()
-    def apply_snapshot(self, rules_by_prof_id, tags_by_prof_id,
-                       endpoints_by_id, ipv4_pools_by_id):
-        """
-        Replaces the whole cache state with the input.  Applies deltas vs the
-        current active state.
-
-        :param rules_by_prof_id: A dict mapping security profile ID to a list
-            of profile rules, each of which is a dict.
-        :param tags_by_prof_id: A dict mapping security profile ID to a list of
-            profile tags.
-        :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
-            data dicts.
-        :param ipv4_pools_by_id: A dict mapping IPAM pool ID to dicts
-            representing the pool.
-        """
-        # Step 1: fire in data update events to the profile and tag managers
-        # so they can build their indexes before we activate anything.
-        _log.info("Applying snapshot. Queueing rules.")
-        for rules_mgr in self.rules_mgrs:
-            rules_mgr.apply_snapshot(rules_by_prof_id, async=True)
-        _log.info("Applying snapshot. Queueing tags/endpoints to ipset mgr.")
-        for ipset_mgr in self.ipsets_mgrs:
-            ipset_mgr.apply_snapshot(tags_by_prof_id, endpoints_by_id,
-                                     async=True)
-
-        # Step 2: fire in update events into the endpoint manager, which will
-        # recursively trigger activation of profiles and tags.
-        _log.info("Applying snapshot. Queueing endpoints->endpoint mgr.")
-        for ep_mgr in self.endpoint_mgrs:
-            ep_mgr.apply_snapshot(endpoints_by_id, async=True)
-
-        # Step 3: send update to NAT manager.
-        _log.info("Applying snapshot.  Queueing IPv4 pools -> masq mgr.")
-        self.ipv4_masq_manager.apply_snapshot(ipv4_pools_by_id, async=True)
-
-        _log.info("Applying snapshot. DONE. %s rules, %s tags, "
-                  "%s endpoints, %s pools", len(rules_by_prof_id),
-                  len(tags_by_prof_id), len(endpoints_by_id),
-                  len(ipv4_pools_by_id))
-
-        # Since we don't wait for all the above processing to finish, set a
-        # timer to clean up orphaned ipsets and tables later.  If the snapshot
-        # takes longer than this timer to apply then we might do the cleanup
-        # before the snapshot is finished.  That would cause dropped packets
-        # until applying the snapshot finishes.
-        if not self._cleanup_scheduled:
-            _log.info("No cleanup scheduled, scheduling one.")
-            gevent.spawn_later(self.config.STARTUP_CLEANUP_DELAY,
-                               functools.partial(self.trigger_cleanup,
-                                                 async=True))
-            self._cleanup_scheduled = True
+    # @actor_message()
+    # def apply_snapshot(self, rules_by_prof_id, tags_by_prof_id,
+    #                    endpoints_by_id, ipv4_pools_by_id):
+    #     """
+    #     Replaces the whole cache state with the input.  Applies deltas vs the
+    #     current active state.
+    #
+    #     :param rules_by_prof_id: A dict mapping security profile ID to a list
+    #         of profile rules, each of which is a dict.
+    #     :param tags_by_prof_id: A dict mapping security profile ID to a list of
+    #         profile tags.
+    #     :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
+    #         data dicts.
+    #     :param ipv4_pools_by_id: A dict mapping IPAM pool ID to dicts
+    #         representing the pool.
+    #     """
+    #     # Step 1: fire in data update events to the profile and tag managers
+    #     # so they can build their indexes before we activate anything.
+    #     _log.info("Applying snapshot. Queueing rules.")
+    #     for rules_mgr in self.rules_mgrs:
+    #         rules_mgr.apply_snapshot(rules_by_prof_id, async=True)
+    #     _log.info("Applying snapshot. Queueing tags/endpoints to ipset mgr.")
+    #     for ipset_mgr in self.ipsets_mgrs:
+    #         ipset_mgr.apply_snapshot(tags_by_prof_id, endpoints_by_id,
+    #                                  async=True)
+    #
+    #     # Step 2: fire in update events into the endpoint manager, which will
+    #     # recursively trigger activation of profiles and tags.
+    #     _log.info("Applying snapshot. Queueing endpoints->endpoint mgr.")
+    #     for ep_mgr in self.endpoint_mgrs:
+    #         ep_mgr.apply_snapshot(endpoints_by_id, async=True)
+    #
+    #     # Step 3: send update to NAT manager.
+    #     _log.info("Applying snapshot.  Queueing IPv4 pools -> masq mgr.")
+    #     self.ipv4_masq_manager.apply_snapshot(ipv4_pools_by_id, async=True)
+    #
+    #     _log.info("Applying snapshot. DONE. %s rules, %s tags, "
+    #               "%s endpoints, %s pools", len(rules_by_prof_id),
+    #               len(tags_by_prof_id), len(endpoints_by_id),
+    #               len(ipv4_pools_by_id))
+    #
+    #     # Since we don't wait for all the above processing to finish, set a
+    #     # timer to clean up orphaned ipsets and tables later.  If the snapshot
+    #     # takes longer than this timer to apply then we might do the cleanup
+    #     # before the snapshot is finished.  That would cause dropped packets
+    #     # until applying the snapshot finishes.
+    #     if not self._cleanup_scheduled:
+    #         _log.info("No cleanup scheduled, scheduling one.")
+    #         gevent.spawn_later(self.config.STARTUP_CLEANUP_DELAY,
+    #                            functools.partial(self.trigger_cleanup,
+    #                                              async=True))
+    #         self._cleanup_scheduled = True
 
     @actor_message()
     def trigger_cleanup(self):

From 64771357229869c304411610f952e7a47f69a3aa Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 16 Oct 2015 13:47:08 +0100
Subject: [PATCH 02/98] Move etcd driver into its own package.

---
 calico/etcddriver/__init__.py                        | 0
 calico/{felix/readetcd.py => etcddriver/__main__.py} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 calico/etcddriver/__init__.py
 rename calico/{felix/readetcd.py => etcddriver/__main__.py} (100%)

diff --git a/calico/etcddriver/__init__.py b/calico/etcddriver/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/calico/felix/readetcd.py b/calico/etcddriver/__main__.py
similarity index 100%
rename from calico/felix/readetcd.py
rename to calico/etcddriver/__main__.py

From 6d592d3568cf3ab321a7ab2c39f8c32d9006c67a Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 16 Oct 2015 15:37:08 +0100
Subject: [PATCH 03/98] Move etcd driver code into own package, Felix now
 starts the driver.

---
 calico/etcddriver/__main__.py | 290 +-----------------------------
 calico/etcddriver/driver.py   | 326 ++++++++++++++++++++++++++++++++++
 calico/felix/fetcd.py         |   8 +-
 3 files changed, 336 insertions(+), 288 deletions(-)
 create mode 100644 calico/etcddriver/driver.py

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index a1035abd0b..de9cf098be 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -1,295 +1,13 @@
-from Queue import Queue, Empty
-
-from httplib import HTTPException
 import socket
-import string
-from ijson.backends import yajl2 as ijson
-import logging
-import urllib3
-
-from json import loads
-from urllib3 import HTTPConnectionPool
-from datrie import Trie
-from threading import Thread, Event
+from threading import Thread
 import time
-from msgpack import dumps
-from urllib3.exceptions import ReadTimeoutError
+from calico.etcddriver.driver import report_status, resync_and_merge
+import logging
 
 _log = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG,
-                    format='%(asctime)s [%(levelname)s][%(process)s/%(thread)d] %(name)s %(lineno)d: %(message)s')
-events_processed = 0
-snapshot_events = 0
-watcher_events = 0
-snap_skipped = 0
-
-
-def report_status():
-    while True:
-        start_tot = events_processed
-        start_snap = snapshot_events
-        start_watch = watcher_events
-        start_skip = snap_skipped
-        time.sleep(1)
-        end_tot = events_processed
-        end_snap = snapshot_events
-        end_watch = watcher_events
-        end_skip = snap_skipped
-        _log.info(
-            "Events/s: %s Snap: %s, Watch %s, Skip: %s",
-            end_tot - start_tot,
-            end_snap - start_snap,
-            end_watch - start_watch,
-            end_skip - start_skip
-        )
-
-
-x = {u'action': u'set',
-     u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
-               u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "63:4e:60:d9:91:a6", "ipv4_nets": ["1.0.0.174/32"]}',
-               u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'},
-     u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
-                   u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", "ipv4_nets": ["1.0.0.174/32"]}',
-                   u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'}}
-
-
-http = HTTPConnectionPool("localhost", 4001, maxsize=2)
-
-
-def watch_etcd(next_index, result_queue, stop_event):
-    http = HTTPConnectionPool("localhost", 4001, maxsize=2)
-    try:
-        while not stop_event.is_set():
-            try:
-                _log.info("About to call http.request...")
-                resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
-                                    fields={"recursive": "true", "wait": "true",
-                                            "waitIndex": next_index},
-                                    timeout=5)
-                resp_body = loads(resp.data)
-            except ReadTimeoutError:
-                _log.exception("Watch read timed out, restarting watch at index %s",
-                               next_index)
-                continue
-            except:
-                _log.exception("Unexpected exception")
-                raise
-            else:
-                node = resp_body["node"]
-                key = node["key"]
-                value = node.get("value")
-                modified_index = node["modifiedIndex"]
-                result_queue.put((modified_index, key, value))
-                next_index = modified_index + 1
-    finally:
-        result_queue.put(None)
-
-
-def resync_and_merge(update_sock):
-    global events_processed, snapshot_events, watcher_events, snap_skipped
-    hwms = Trie(string.printable)
-    stop_worker = None
-    event_hwm = 0
-    best_hwm = 0
-    first_resync = True
-
-    while True:
-        if stop_worker:
-            stop_worker.set()
-        # Load the recursive get as far as the headers...
-        #http = HTTPConnectionPool("localhost", 4001, maxsize=1)
-        resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
-                            fields={"recursive": "true"},
-                            timeout=120,
-                            preload_content=False)
-
-        # ASAP, start the background thread to listen for events and queue
-        # them up...
-        snapshot_index = int(resp.getheader("x-etcd-index", 1))
-        watcher_queue = Queue()
-        stop_worker = Event()
-        watcher_thread = Thread(target=watch_etcd,
-                                args=(snapshot_index + 1,
-                                      watcher_queue,
-                                      stop_worker))
-        watcher_thread.daemon = True
-        watcher_thread.start()
-
-
-
-        # Then plough through the update incrementally.
-        deletes_during_snapshot = Trie(string.printable)
-        try:
-            parser = ijson.parse(resp)  # urllib3 response is file-like.
-            stack = []
-            frame = Node()
-            count = 0
-            for prefix, event, value in parser:
-                if event == "start_map":
-                    stack.append(frame)
-                    frame = Node()
-                elif event == "map_key":
-                    frame.current_key = value
-                elif event in ("string", "number"):
-                    if frame.done:
-                        continue
-                    if frame.current_key == "modifiedIndex":
-                        frame.modifiedIndex = value
-                    if frame.current_key == "key":
-                        frame.key = value
-                    elif frame.current_key == "value":
-                        frame.value = value
-                    if (frame.key is not None and
-                            frame.value is not None and
-                            frame.modifiedIndex is not None):
-                        frame.done = True
-                        # We have all the data for a node.  See if it's fresh.
-                        key_parts = frame.key
-
-                        # See if the key or its directory has been deleted.
-                        del_hwm = deletes_during_snapshot.longest_prefix_value(
-                            key_parts,
-                            None
-                        )
-
-                        if frame.modifiedIndex <= del_hwm:
-                            # Update to a key that's already been deleted.
-                            _log.debug("Skipping: %s deleted at %s",
-                                       key_parts, del_hwm)
-                            snap_skipped += 1
-                            continue
-
-                        # Check if this is a newer version of the node than
-                        # what we've seen before.
-                        try:
-                            hwm = hwms[key_parts]
-                        except KeyError:
-                            hwm = None
-                        if snapshot_index > hwm:
-                            # We have to update the HWM to allow us to spot
-                            # deletions below.
-                            hwms[key_parts] = snapshot_index
-                        if frame.modifiedIndex <= hwm:
-                            snap_skipped += 1
-                            continue
-
-                        # This is a fresh value for the key.
-                        update_sock.sendall(dumps((frame.key, frame.value)))
-                        events_processed += 1
-                        snapshot_events += 1
-                    frame.current_key = None
-                elif event == "end_map":
-                    frame = stack.pop(-1)
-                if count % 100 == 0:
-                    try:
-                        while True:
-                            try:
-                                (mod, key, val) = watcher_queue.get_nowait()
-                            except TypeError:
-                                print "Queue finished"
-                                break
-                            key_parts = key
-                            if val is None:
-                                # Mark this item as deleted post-snapshot.  If this
-                                # is a dir then we'll squash every snapshot update
-                                # to this whole dir.
-                                _log.debug("Storing deletion of %s at %s",
-                                           key_parts, mod)
-                                # FIXME: need to add "/" here but that only works for dirs
-                                deletes_during_snapshot[key_parts + "/"] = mod
-                                # Simulate a delete for all the keys under the
-                                # deleted key.
-                                for child_key_parts, child_mod in hwms.items(key_parts + "/"):
-                                    del hwms[child_key_parts]
-                                    child_key = child_key_parts
-                                    #print "Simulating delete of", child_key
-                                    update_sock.sendall(
-                                        dumps((child_key, None))
-                                    )
-                            else:
-                                hwms[key_parts] = mod
-                                update_sock.sendall(dumps((key, val)))
-                            events_processed += 1
-                            watcher_events += 1
-                            event_hwm = mod
-                    except Empty:
-                        pass
-                count += 1
-
-            # Done applying snapshot.  If we need to do a snapshot again, we
-            # can skip any keys that have a modifiedIndex <= to best_hwm.
-            best_hwm = max(snapshot_index, event_hwm)
-
-            # Only used to resolve deleted during a snapshot so we can throw
-            # away.
-            del deletes_during_snapshot
-
-            if not first_resync:
-                # Find any keys that were deleted while we were down.
-                _log.info("Scanning for deletions")
-                # TODO Interleave with processing more watcher keys?
-                for key_parts, value in hwms.items():
-                    if value < snapshot_index:
-                        # We didn't see the value during the snapshot or via the
-                        # event queue.  It must have been deleted.
-                        del hwms[key_parts]
-                        update_sock.sendall(
-                            dumps((key_parts, None))
-                        )
-                        events_processed += 1
-            else:
-                _log.info("First resync, skipping delete check.")
-
-            _log.info("In sync, processing events only")
-            while True:
-                try:
-                    mod, key, val = watcher_queue.get()
-                except TypeError:
-                    print "Queue finished"
-                    break
-                key_parts = key
-                if val is None:
-                    # Simulate a delete for all the keys under the
-                    # deleted key.
-                    for child_key, child_mod in hwms.items(key_parts):
-                        del hwms[child_key]
-                        update_sock.sendall(
-                            dumps((child_key, None))
-                        )
-                else:
-                    # At this point, we're using hwms only to track existence
-                    # so we can generate deletions when whole directories are
-                    # deleted.  However, we may as well keep the modifiedIndex
-                    # up to date.
-                    hwms[key_parts] = mod
-                    update_sock.sendall(dumps((key, val)))
-                events_processed += 1
-                watcher_events += 1
-                event_hwm = mod
-                best_hwm = mod
-            _log.warning("Worker stopped, resyncing...")
-        except (urllib3.exceptions.HTTPError,
-                HTTPException,
-                socket.error) as e:
-            _log.error("Request to etcd failed: %r", e)
-        finally:
-            first_resync = False
-
-
-class Node(object):
-    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex", "done")
-
-    def __init__(self):
-        self.modifiedIndex = None
-        self.key = None
-        self.value = None
-        self.action = None
-        self.current_key = None
-        self.done = False
 
 
 def main():
-    global events_processed
     t = Thread(target=report_status)
     t.daemon = True
     t.start()
@@ -309,4 +27,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
new file mode 100644
index 0000000000..f3707a3c2c
--- /dev/null
+++ b/calico/etcddriver/driver.py
@@ -0,0 +1,326 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.driver
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Contains the logic for the etcd driver process, which monitors etcd for
+changes and sends them to Felix over a unix socket.
+
+The driver is responsible for
+
+* loading the configuration from etcd at start-of-day (Felix needs this before
+  it can receive further updates)
+* handling the initial load of data from etcd
+* watching etcd for changes
+* doing the above in parallel and merging the result into a consistent
+  sequence of events
+* resolving directory deletions so that if a directory is deleted, it tells
+  Felix about all the individual keys that are deleted.
+"""
+import logging
+
+_log = logging.getLogger(__name__)
+
+
+from Queue import Queue, Empty
+
+from httplib import HTTPException
+import socket
+import string
+from ijson.backends import yajl2 as ijson
+import logging
+import urllib3
+
+from json import loads
+from urllib3 import HTTPConnectionPool
+from datrie import Trie
+from threading import Thread, Event
+import time
+from msgpack import dumps
+from urllib3.exceptions import ReadTimeoutError
+
+_log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s [%(levelname)s][%(process)s/%(thread)d] %(name)s %(lineno)d: %(message)s')
+events_processed = 0
+snapshot_events = 0
+watcher_events = 0
+snap_skipped = 0
+
+
+def report_status():
+    while True:
+        start_tot = events_processed
+        start_snap = snapshot_events
+        start_watch = watcher_events
+        start_skip = snap_skipped
+        time.sleep(1)
+        end_tot = events_processed
+        end_snap = snapshot_events
+        end_watch = watcher_events
+        end_skip = snap_skipped
+        _log.info(
+            "Events/s: %s Snap: %s, Watch %s, Skip: %s",
+            end_tot - start_tot,
+            end_snap - start_snap,
+            end_watch - start_watch,
+            end_skip - start_skip
+        )
+
+
+x = {u'action': u'set',
+     u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
+               u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "63:4e:60:d9:91:a6", "ipv4_nets": ["1.0.0.174/32"]}',
+               u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'},
+     u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
+                   u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", "ipv4_nets": ["1.0.0.174/32"]}',
+                   u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'}}
+
+
+http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+
+
+def watch_etcd(next_index, result_queue, stop_event):
+    http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+    try:
+        while not stop_event.is_set():
+            try:
+                _log.info("About to call http.request...")
+                resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
+                                    fields={"recursive": "true", "wait": "true",
+                                            "waitIndex": next_index},
+                                    timeout=5)
+                resp_body = loads(resp.data)
+            except ReadTimeoutError:
+                _log.exception("Watch read timed out, restarting watch at index %s",
+                               next_index)
+                continue
+            except:
+                _log.exception("Unexpected exception")
+                raise
+            else:
+                node = resp_body["node"]
+                key = node["key"]
+                value = node.get("value")
+                modified_index = node["modifiedIndex"]
+                result_queue.put((modified_index, key, value))
+                next_index = modified_index + 1
+    finally:
+        result_queue.put(None)
+
+
+def resync_and_merge(update_sock):
+    global events_processed, snapshot_events, watcher_events, snap_skipped
+    hwms = Trie(string.printable)
+    stop_worker = None
+    event_hwm = 0
+    best_hwm = 0
+    first_resync = True
+
+    while True:
+        if stop_worker:
+            stop_worker.set()
+        # Load the recursive get as far as the headers...
+        #http = HTTPConnectionPool("localhost", 4001, maxsize=1)
+        resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
+                            fields={"recursive": "true"},
+                            timeout=120,
+                            preload_content=False)
+
+        # ASAP, start the background thread to listen for events and queue
+        # them up...
+        snapshot_index = int(resp.getheader("x-etcd-index", 1))
+        watcher_queue = Queue()
+        stop_worker = Event()
+        watcher_thread = Thread(target=watch_etcd,
+                                args=(snapshot_index + 1,
+                                      watcher_queue,
+                                      stop_worker))
+        watcher_thread.daemon = True
+        watcher_thread.start()
+
+
+
+        # Then plough through the update incrementally.
+        deletes_during_snapshot = Trie(string.printable)
+        try:
+            parser = ijson.parse(resp)  # urllib3 response is file-like.
+            stack = []
+            frame = Node()
+            count = 0
+            for prefix, event, value in parser:
+                if event == "start_map":
+                    stack.append(frame)
+                    frame = Node()
+                elif event == "map_key":
+                    frame.current_key = value
+                elif event in ("string", "number"):
+                    if frame.done:
+                        continue
+                    if frame.current_key == "modifiedIndex":
+                        frame.modifiedIndex = value
+                    if frame.current_key == "key":
+                        frame.key = value
+                    elif frame.current_key == "value":
+                        frame.value = value
+                    if (frame.key is not None and
+                            frame.value is not None and
+                            frame.modifiedIndex is not None):
+                        frame.done = True
+                        # We have all the data for a node.  See if it's fresh.
+                        key_parts = frame.key
+
+                        # See if the key or its directory has been deleted.
+                        del_hwm = deletes_during_snapshot.longest_prefix_value(
+                            key_parts,
+                            None
+                        )
+
+                        if frame.modifiedIndex <= del_hwm:
+                            # Update to a key that's already been deleted.
+                            _log.debug("Skipping: %s deleted at %s",
+                                       key_parts, del_hwm)
+                            snap_skipped += 1
+                            continue
+
+                        # Check if this is a newer version of the node than
+                        # what we've seen before.
+                        try:
+                            hwm = hwms[key_parts]
+                        except KeyError:
+                            hwm = None
+                        if snapshot_index > hwm:
+                            # We have to update the HWM to allow us to spot
+                            # deletions below.
+                            hwms[key_parts] = snapshot_index
+                        if frame.modifiedIndex <= hwm:
+                            snap_skipped += 1
+                            continue
+
+                        # This is a fresh value for the key.
+                        update_sock.sendall(dumps((frame.key, frame.value)))
+                        events_processed += 1
+                        snapshot_events += 1
+                    frame.current_key = None
+                elif event == "end_map":
+                    frame = stack.pop(-1)
+                if count % 100 == 0:
+                    try:
+                        while True:
+                            try:
+                                (mod, key, val) = watcher_queue.get_nowait()
+                            except TypeError:
+                                print "Queue finished"
+                                break
+                            key_parts = key
+                            if val is None:
+                                # Mark this item as deleted post-snapshot.  If this
+                                # is a dir then we'll squash every snapshot update
+                                # to this whole dir.
+                                _log.debug("Storing deletion of %s at %s",
+                                           key_parts, mod)
+                                # FIXME: need to add "/" here but that only works for dirs
+                                deletes_during_snapshot[key_parts + "/"] = mod
+                                # Simulate a delete for all the keys under the
+                                # deleted key.
+                                for child_key_parts, child_mod in hwms.items(key_parts + "/"):
+                                    del hwms[child_key_parts]
+                                    child_key = child_key_parts
+                                    #print "Simulating delete of", child_key
+                                    update_sock.sendall(
+                                        dumps((child_key, None))
+                                    )
+                            else:
+                                hwms[key_parts] = mod
+                                update_sock.sendall(dumps((key, val)))
+                            events_processed += 1
+                            watcher_events += 1
+                            event_hwm = mod
+                    except Empty:
+                        pass
+                count += 1
+
+            # Done applying snapshot.  If we need to do a snapshot again, we
+            # can skip any keys that have a modifiedIndex <= to best_hwm.
+            best_hwm = max(snapshot_index, event_hwm)
+
+            # Only used to resolve deleted during a snapshot so we can throw
+            # away.
+            del deletes_during_snapshot
+
+            if not first_resync:
+                # Find any keys that were deleted while we were down.
+                _log.info("Scanning for deletions")
+                # TODO Interleave with processing more watcher keys?
+                for key_parts, value in hwms.items():
+                    if value < snapshot_index:
+                        # We didn't see the value during the snapshot or via the
+                        # event queue.  It must have been deleted.
+                        del hwms[key_parts]
+                        update_sock.sendall(
+                            dumps((key_parts, None))
+                        )
+                        events_processed += 1
+            else:
+                _log.info("First resync, skipping delete check.")
+
+            _log.info("In sync, processing events only")
+            while True:
+                try:
+                    mod, key, val = watcher_queue.get()
+                except TypeError:
+                    print "Queue finished"
+                    break
+                key_parts = key
+                if val is None:
+                    # Simulate a delete for all the keys under the
+                    # deleted key.
+                    for child_key, child_mod in hwms.items(key_parts):
+                        del hwms[child_key]
+                        update_sock.sendall(
+                            dumps((child_key, None))
+                        )
+                else:
+                    # At this point, we're using hwms only to track existence
+                    # so we can generate deletions when whole directories are
+                    # deleted.  However, we may as well keep the modifiedIndex
+                    # up to date.
+                    hwms[key_parts] = mod
+                    update_sock.sendall(dumps((key, val)))
+                events_processed += 1
+                watcher_events += 1
+                event_hwm = mod
+                best_hwm = mod
+            _log.warning("Worker stopped, resyncing...")
+        except (urllib3.exceptions.HTTPError,
+                HTTPException,
+                socket.error) as e:
+            _log.error("Request to etcd failed: %r", e)
+        finally:
+            first_resync = False
+
+
+class Node(object):
+    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex", "done")
+
+    def __init__(self):
+        self.modifiedIndex = None
+        self.key = None
+        self.value = None
+        self.action = None
+        self.current_key = None
+        self.done = False
+
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 991ff62b30..6e4e268164 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -26,6 +26,7 @@
 import json
 import logging
 import socket
+import subprocess
 import msgpack
 import time
 from calico.monotonic import monotonic_time
@@ -404,8 +405,11 @@ def loop(self):
                 print "Bound socket"
                 update_socket.listen(1)
                 print "Marked socket for listen"
-                os.chmod("/tmp/felix.sck", 0777)
-                print "Chmodded socket"
+
+                subprocess.Popen([sys.executable,
+                                  "-m",
+                                  "calico.etcddriver"])
+
                 update_conn, _ = update_socket.accept()
                 print "Accepted connection on socket"
                 receive_count = 0

From 3db781ca163df4f2cf8cda2dd5f0ce875e6f285f Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 16 Oct 2015 18:13:25 +0100
Subject: [PATCH 04/98] Move gevent-specific function to geventutils. Set up
 logging in driver.

---
 calico/common.py              | 56 +++++++++++++--------------------
 calico/etcddriver/__main__.py | 59 ++++++++++++++++++++++++-----------
 calico/geventutils.py         | 53 +++++++++++++++++++++++++++++++
 3 files changed, 114 insertions(+), 54 deletions(-)
 create mode 100644 calico/geventutils.py

diff --git a/calico/common.py b/calico/common.py
index 580a7d2bd7..1520697f89 100644
--- a/calico/common.py
+++ b/calico/common.py
@@ -22,23 +22,24 @@
 Calico common utilities.
 """
 import errno
-import gevent
-import gevent.local
-import itertools
 import logging
 import logging.handlers
-import netaddr
-import netaddr.core
 import os
 import re
 import sys
 from types import StringTypes
+
+import netaddr
+import netaddr.core
 from netaddr.strategy import eui48
 
 _log = logging.getLogger(__name__)
 
 AGENT_TYPE_CALICO = 'Calico agent'
-FORMAT_STRING = '%(asctime)s [%(levelname)s][%(process)s/%(tid)d] %(name)s %(lineno)d: %(message)s'
+
+FORMAT_STRING = '%(asctime)s [%(levelname)s][%(process)s/%(thread)d] %(name)s %(lineno)d: %(message)s'
+# Used "tid", which we swap for the greenlet ID, instead of "thread"
+FORMAT_STRING_GEVENT = '%(asctime)s [%(levelname)s][%(process)s/%(tid)d] %(name)s %(lineno)d: %(message)s'
 
 # This format string deliberately uses two different styles of format
 # specifier. The %()s form is used by the logging module: the {} form is used
@@ -89,24 +90,6 @@
 VALID_IPAM_POOL_ID_RE = re.compile(r'^[0-9\.:a-fA-F\-]{1,43}$')
 EXPECTED_IPAM_POOL_KEYS = set(["cidr", "masquerade"])
 
-tid_storage = gevent.local.local()
-tid_counter = itertools.count()
-# Ought to do itertools.count(start=1), but python 2.6 does not support it.
-tid_counter.next()
-
-def greenlet_id():
-    """
-    Returns an integer greenlet ID.
-    itertools.count() is atomic, if the internet is correct.
-    http://stackoverflow.com/questions/23547604/python-counter-atomic-increment
-    """
-    try:
-        tid = tid_storage.tid
-    except:
-        tid = tid_counter.next()
-        tid_storage.tid = tid
-    return tid
-
 
 def validate_port(port):
     """
@@ -183,13 +166,8 @@ def mkdir_p(path):
                 pass
             else: raise
 
-class GreenletFilter(logging.Filter):
-    def filter(self, record):
-        record.tid = greenlet_id()
-        return True
-
 
-def default_logging():
+def default_logging(gevent_in_use=True):
     """
     Sets up the Calico default logging, with default severities.
 
@@ -223,18 +201,22 @@ def default_logging():
 
     root_logger.addHandler(syslog_handler)
 
-    file_formatter = logging.Formatter(FORMAT_STRING)
+    format_string = FORMAT_STRING_GEVENT if gevent_in_use else FORMAT_STRING
+    file_formatter = logging.Formatter(format_string)
     stream_handler = logging.StreamHandler(sys.stdout)
     stream_handler.setLevel(logging.ERROR)
     stream_handler.setFormatter(file_formatter)
-    stream_handler.addFilter(GreenletFilter())
+    if gevent_in_use:
+        from geventutils import GreenletFilter
+        stream_handler.addFilter(GreenletFilter())
     root_logger.addHandler(stream_handler)
 
 
 def complete_logging(logfile=None,
                      file_level=logging.DEBUG,
                      syslog_level=logging.ERROR,
-                     stream_level=logging.ERROR):
+                     stream_level=logging.ERROR,
+                     gevent_in_use=True):
     """
     Updates the logging configuration based on learned configuration.
 
@@ -279,9 +261,13 @@ def complete_logging(logfile=None,
     if logfile and file_level is not None:
         if not file_handler:
             mkdir_p(os.path.dirname(logfile))
-            formatter = logging.Formatter(FORMAT_STRING)
+            format_string = (FORMAT_STRING_GEVENT if gevent_in_use
+                             else FORMAT_STRING)
+            formatter = logging.Formatter(format_string)
             file_handler = logging.handlers.WatchedFileHandler(logfile)
-            file_handler.addFilter(GreenletFilter())
+            if gevent_in_use:
+                from geventutils import GreenletFilter
+                file_handler.addFilter(GreenletFilter())
             file_handler.setLevel(file_level)
             file_handler.setFormatter(formatter)
             root_logger.addHandler(file_handler)
diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index de9cf098be..a165d9f234 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -1,30 +1,51 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2014, 2015 Metaswitch Networks
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+calico.etcddriver.__main__
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Main entry point for the etcd driver, responsible for basic logging config
+and starting our threads.
+"""
+
+import logging
 import socket
 from threading import Thread
 import time
+
 from calico.etcddriver.driver import report_status, resync_and_merge
-import logging
+from calico.common import default_logging
 
 _log = logging.getLogger(__name__)
 
+default_logging(gevent_in_use=False)
 
-def main():
-    t = Thread(target=report_status)
-    t.daemon = True
-    t.start()
-
-    update_socket = socket.socket(socket.AF_UNIX,
-                                  socket.SOCK_SEQPACKET)
-    while True:
-        try:
-            update_socket.connect("/tmp/felix.sck")
-        except:
-            _log.exception("Failed to connect to felix...")
-            time.sleep(1)
-        else:
-            break
 
-    resync_and_merge(update_socket)
+update_socket = socket.socket(socket.AF_UNIX,
+                              socket.SOCK_SEQPACKET)
+while True:
+    try:
+        update_socket.connect("/tmp/felix.sck")
+    except:
+        _log.exception("Failed to connect to felix...")
+        time.sleep(1)
+    else:
+        break
 
+resync_and_merge(update_socket)
 
-if __name__ == "__main__":
-    main()
diff --git a/calico/geventutils.py b/calico/geventutils.py
new file mode 100644
index 0000000000..a2bb301cd6
--- /dev/null
+++ b/calico/geventutils.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.geventutils
+~~~~~~~~~~~~~~~~~~
+
+Helper utilities for gevent.
+"""
+import itertools
+import logging
+
+import gevent
+import gevent.local
+
+_log = logging.getLogger(__name__)
+
+
+tid_storage = gevent.local.local()
+tid_counter = itertools.count()
+# Ought to do itertools.count(start=1), but python 2.6 does not support it.
+tid_counter.next()
+
+
+def greenlet_id():
+    """
+    Returns an integer greenlet ID.
+    itertools.count() is atomic, if the internet is correct.
+    http://stackoverflow.com/questions/23547604/python-counter-atomic-increment
+    """
+    try:
+        tid = tid_storage.tid
+    except:
+        tid = tid_counter.next()
+        tid_storage.tid = tid
+    return tid
+
+
+class GreenletFilter(logging.Filter):
+    def filter(self, record):
+        record.tid = greenlet_id()
+        return True
\ No newline at end of file

From 540790d3e0f1b5d733de9f0210ba766b5b436447 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 19 Oct 2015 11:50:58 +0100
Subject: [PATCH 05/98] Work in progress on productising etcd driver.

---
 calico/etcddriver/__main__.py | 22 +++++++++++-----------
 calico/etcddriver/driver.py   | 16 ++++++++++------
 calico/felix/fetcd.py         | 28 +++++++++++++++-------------
 3 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index a165d9f234..fef56d8709 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -29,23 +29,23 @@
 import time
 
 from calico.etcddriver.driver import report_status, resync_and_merge
-from calico.common import default_logging
+from calico.common import default_logging, complete_logging
 
 _log = logging.getLogger(__name__)
 
 default_logging(gevent_in_use=False)
-
+complete_logging("/var/log/calico/etcddriver.log",
+                 logging.INFO,
+                 logging.WARNING,
+                 logging.WARNING)
 
 update_socket = socket.socket(socket.AF_UNIX,
-                              socket.SOCK_SEQPACKET)
-while True:
-    try:
-        update_socket.connect("/tmp/felix.sck")
-    except:
-        _log.exception("Failed to connect to felix...")
-        time.sleep(1)
-    else:
-        break
+                              socket.SOCK_STREAM)
+try:
+    update_socket.connect("/tmp/felix.sck")
+except:
+    _log.exception("Failed to connect to Felix")
+    raise
 
 resync_and_merge(update_socket)
 
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index f3707a3c2c..f8030d545d 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -30,10 +30,8 @@
 * resolving directory deletions so that if a directory is deleted, it tells
   Felix about all the individual keys that are deleted.
 """
-import logging
-
-_log = logging.getLogger(__name__)
 
+import sys
 
 from Queue import Queue, Empty
 
@@ -94,9 +92,11 @@ def report_status():
 
 
 def watch_etcd(next_index, result_queue, stop_event):
-    http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+    http = None
     try:
         while not stop_event.is_set():
+            if not http:
+                http = HTTPConnectionPool("localhost", 4001, maxsize=1)
             try:
                 _log.info("About to call http.request...")
                 resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
@@ -107,6 +107,7 @@ def watch_etcd(next_index, result_queue, stop_event):
             except ReadTimeoutError:
                 _log.exception("Watch read timed out, restarting watch at index %s",
                                next_index)
+                http = None  # Workaround issue where connection isn't properly timed out by urllib3
                 continue
             except:
                 _log.exception("Unexpected exception")
@@ -152,8 +153,6 @@ def resync_and_merge(update_sock):
         watcher_thread.daemon = True
         watcher_thread.start()
 
-
-
         # Then plough through the update incrementally.
         deletes_during_snapshot = Trie(string.printable)
         try:
@@ -305,6 +304,11 @@ def resync_and_merge(update_sock):
                 event_hwm = mod
                 best_hwm = mod
             _log.warning("Worker stopped, resyncing...")
+        except socket.error as e:
+            if e.errno == 32:
+                # FIXME Magic number
+                _log.error("Broken pipe, exiting")
+                sys.exit(1)
         except (urllib3.exceptions.HTTPError,
                 HTTPException,
                 socket.error) as e:
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 6e4e268164..ee1852ad64 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -398,7 +398,7 @@ def loop(self):
                 except:
                     pass
                 update_socket = socket.socket(socket.AF_UNIX,
-                                              socket.SOCK_SEQPACKET)
+                                              socket.SOCK_STREAM)
 
                 print "Created socket"
                 update_socket.bind("/tmp/felix.sck")
@@ -413,20 +413,22 @@ def loop(self):
                 update_conn, _ = update_socket.accept()
                 print "Accepted connection on socket"
                 receive_count = 0
+                unpacker = msgpack.Unpacker()
                 while True:
                     data = update_conn.recv(8092)
-                    receive_count += 1
-                    if receive_count % 1000 == 0:
-                        print "Recieved", receive_count
-                    key, value = msgpack.loads(data)
-                    n = Node()
-                    n.action = "set" if value is not None else "delete"
-                    n.value = value
-                    n.key = key
-                    try:
-                        self.dispatcher.handle_event(n)
-                    except ResyncRequired:
-                        _log.warning("IGNORING RESYNC.")
+                    unpacker.feed(data)
+                    for key, value in unpacker:
+                        receive_count += 1
+                        if receive_count % 1000 == 0:
+                            print "Recieved", receive_count
+                        n = Node()
+                        n.action = "set" if value is not None else "delete"
+                        n.value = value
+                        n.key = key
+                        try:
+                            self.dispatcher.handle_event(n)
+                        except ResyncRequired:
+                            _log.warning("IGNORING RESYNC.")
             except EtcdException as e:
                 # Most likely a timeout or other error in the pre-resync;
                 # start over.  These exceptions have good semantic error text

From 63a367a0531f1240bc87b34672e2d3c36fee4fb3 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 19 Oct 2015 15:39:42 +0100
Subject: [PATCH 06/98] Move complex HWM calculations into a dedicated class.
 General tidy-up.

---
 calico/etcddriver/__main__.py |  13 +-
 calico/etcddriver/driver.py   | 256 +++++++++++++++-------------------
 calico/etcddriver/hwm.py      | 147 +++++++++++++++++++
 calico/felix/fetcd.py         |   2 +-
 4 files changed, 274 insertions(+), 144 deletions(-)
 create mode 100644 calico/etcddriver/hwm.py

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index fef56d8709..4d819d6b53 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -33,11 +33,11 @@
 
 _log = logging.getLogger(__name__)
 
-default_logging(gevent_in_use=False)
 complete_logging("/var/log/calico/etcddriver.log",
                  logging.INFO,
                  logging.WARNING,
-                 logging.WARNING)
+                 logging.INFO,
+                 gevent_in_use=False)
 
 update_socket = socket.socket(socket.AF_UNIX,
                               socket.SOCK_STREAM)
@@ -47,5 +47,12 @@
     _log.exception("Failed to connect to Felix")
     raise
 
-resync_and_merge(update_socket)
+resync_thread = Thread(target=resync_and_merge, args=[update_socket])
+resync_thread.daemon = True
+resync_thread.start()
 
+try:
+    update_socket.recv(1024)
+except:
+    _log.exception("Failed to read from update socket,  Felix down?")
+    raise
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index f8030d545d..31a178b6d9 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -31,28 +31,28 @@
   Felix about all the individual keys that are deleted.
 """
 
-import sys
-
-from Queue import Queue, Empty
-
 from httplib import HTTPException
+from json import loads
 import socket
-import string
-from ijson.backends import yajl2 as ijson
 import logging
-import urllib3
-
-from json import loads
-from urllib3 import HTTPConnectionPool
-from datrie import Trie
+from Queue import Queue, Empty
+import sys
 from threading import Thread, Event
 import time
+
+from ijson.backends import yajl2 as ijson
 from msgpack import dumps
+import urllib3
+from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import ReadTimeoutError
 
+from calico.etcddriver.hwm import HighWaterTracker
+
 _log = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG,
-                    format='%(asctime)s [%(levelname)s][%(process)s/%(thread)d] %(name)s %(lineno)d: %(message)s')
+                    format='%(asctime)s [%(levelname)s]'
+                           '[%(process)s/%(thread)d] %(name)s %(lineno)d: '
+                           '%(message)s')
 events_processed = 0
 snapshot_events = 0
 watcher_events = 0
@@ -79,35 +79,49 @@ def report_status():
         )
 
 
-x = {u'action': u'set',
-     u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
-               u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "63:4e:60:d9:91:a6", "ipv4_nets": ["1.0.0.174/32"]}',
-               u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'},
-     u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
-                   u'value': u'{"name": "tap000174", "profile_id": "prof-174", "state": "active", "ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", "ipv4_nets": ["1.0.0.174/32"]}',
-                   u'key': u'/calico/v1/host/host_bloop/workload/orch/endpoint_175/endpoint/endpoint_175'}}
-
-
-http = HTTPConnectionPool("localhost", 4001, maxsize=2)
+# etcd response data looks like this:
+# {u'action': u'set',
+#      u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
+#                u'value': u'{"name": "tap000174", "profile_id": "prof-174", '
+#                          u'"state": "active", "ipv6_nets": [], '
+#                          u'"mac": "63:4e:60:d9:91:a6", "ipv4_nets": '
+#                          u'["1.0.0.174/32"]}',
+#                u'key': u'/calico/v1/host/host_bloop/workload/orch/'
+#                        u'endpoint_175/endpoint/endpoint_175'},
+#      u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
+#                    u'value': u'{"name": "tap000174", "profile_id": '
+#                              u'"prof-174", "state": "active", '
+#                              u'"ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", '
+#                              u'"ipv4_nets": ["1.0.0.174/32"]}',
+#                    u'key': u'/calico/v1/host/host_bloop/workload/orch/'
+#                            u'endpoint_175/endpoint/endpoint_175'}}
 
 
 def watch_etcd(next_index, result_queue, stop_event):
+    _log.info("Watcher thread started")
     http = None
     try:
         while not stop_event.is_set():
             if not http:
+                _log.info("No HTTP pool, creating one...")
                 http = HTTPConnectionPool("localhost", 4001, maxsize=1)
             try:
-                _log.info("About to call http.request...")
-                resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
-                                    fields={"recursive": "true", "wait": "true",
-                                            "waitIndex": next_index},
-                                    timeout=5)
+                _log.debug("Waiting on etcd index %s", next_index)
+                resp = http.request(
+                    "GET",
+                    "http://localhost:4001/v2/keys/calico/v1",
+                    fields={"recursive": "true",
+                            "wait": "true",
+                            "waitIndex": next_index},
+                    timeout=90,
+                )
                 resp_body = loads(resp.data)
             except ReadTimeoutError:
-                _log.exception("Watch read timed out, restarting watch at index %s",
-                               next_index)
-                http = None  # Workaround issue where connection isn't properly timed out by urllib3
+                _log.exception("Watch read timed out, restarting watch at "
+                               "index %s", next_index)
+                # Workaround urllib3 bug #718.  After a ReadTimeout, the
+                # connection is incorrectly recycled.
+                http = None
                 continue
             except:
                 _log.exception("Unexpected exception")
@@ -124,18 +138,20 @@ def watch_etcd(next_index, result_queue, stop_event):
 
 
 def resync_and_merge(update_sock):
+    _log.info("Resync thread started")
     global events_processed, snapshot_events, watcher_events, snap_skipped
-    hwms = Trie(string.printable)
-    stop_worker = None
-    event_hwm = 0
-    best_hwm = 0
+    hwms = HighWaterTracker()
+    stop_watcher = None
     first_resync = True
 
     while True:
-        if stop_worker:
-            stop_worker.set()
+        if stop_watcher:
+            _log.info("Watcher was running before, stopping it")
+            stop_watcher.set()
+
         # Load the recursive get as far as the headers...
-        #http = HTTPConnectionPool("localhost", 4001, maxsize=1)
+        _log.info("Loading snapshot headers...")
+        http = HTTPConnectionPool("localhost", 4001, maxsize=1)
         resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
                             fields={"recursive": "true"},
                             timeout=120,
@@ -144,17 +160,19 @@ def resync_and_merge(update_sock):
         # ASAP, start the background thread to listen for events and queue
         # them up...
         snapshot_index = int(resp.getheader("x-etcd-index", 1))
+        _log.info("Got snapshot headers, snapshot index is %s; starting "
+                  "watcher...", snapshot_index)
         watcher_queue = Queue()
-        stop_worker = Event()
+        stop_watcher = Event()
         watcher_thread = Thread(target=watch_etcd,
                                 args=(snapshot_index + 1,
                                       watcher_queue,
-                                      stop_worker))
+                                      stop_watcher))
         watcher_thread.daemon = True
         watcher_thread.start()
 
         # Then plough through the update incrementally.
-        deletes_during_snapshot = Trie(string.printable)
+        hwms.start_tracking_deletions()
         try:
             parser = ijson.parse(resp)  # urllib3 response is file-like.
             stack = []
@@ -179,130 +197,84 @@ def resync_and_merge(update_sock):
                             frame.value is not None and
                             frame.modifiedIndex is not None):
                         frame.done = True
-                        # We have all the data for a node.  See if it's fresh.
-                        key_parts = frame.key
-
-                        # See if the key or its directory has been deleted.
-                        del_hwm = deletes_during_snapshot.longest_prefix_value(
-                            key_parts,
-                            None
-                        )
 
-                        if frame.modifiedIndex <= del_hwm:
-                            # Update to a key that's already been deleted.
-                            _log.debug("Skipping: %s deleted at %s",
-                                       key_parts, del_hwm)
-                            snap_skipped += 1
-                            continue
-
-                        # Check if this is a newer version of the node than
-                        # what we've seen before.
-                        try:
-                            hwm = hwms[key_parts]
-                        except KeyError:
-                            hwm = None
-                        if snapshot_index > hwm:
-                            # We have to update the HWM to allow us to spot
-                            # deletions below.
-                            hwms[key_parts] = snapshot_index
-                        if frame.modifiedIndex <= hwm:
+                        old_hwm = hwms.update_hwm(frame.key, snapshot_index)
+                        hwm = frame.modifiedIndex
+                        if hwm > old_hwm:
+                            # This specific key's HWM is newer than the
+                            # previous version we've seen.
+                            update_sock.sendall(
+                                dumps((frame.key, frame.value))
+                            )
+                            events_processed += 1
+                            snapshot_events += 1
+                        else:
                             snap_skipped += 1
-                            continue
 
-                        # This is a fresh value for the key.
-                        update_sock.sendall(dumps((frame.key, frame.value)))
-                        events_processed += 1
-                        snapshot_events += 1
                     frame.current_key = None
                 elif event == "end_map":
                     frame = stack.pop(-1)
-                if count % 100 == 0:
-                    try:
-                        while True:
-                            try:
-                                (mod, key, val) = watcher_queue.get_nowait()
-                            except TypeError:
-                                print "Queue finished"
-                                break
-                            key_parts = key
-                            if val is None:
-                                # Mark this item as deleted post-snapshot.  If this
-                                # is a dir then we'll squash every snapshot update
-                                # to this whole dir.
-                                _log.debug("Storing deletion of %s at %s",
-                                           key_parts, mod)
-                                # FIXME: need to add "/" here but that only works for dirs
-                                deletes_during_snapshot[key_parts + "/"] = mod
-                                # Simulate a delete for all the keys under the
-                                # deleted key.
-                                for child_key_parts, child_mod in hwms.items(key_parts + "/"):
-                                    del hwms[child_key_parts]
-                                    child_key = child_key_parts
-                                    #print "Simulating delete of", child_key
-                                    update_sock.sendall(
-                                        dumps((child_key, None))
-                                    )
-                            else:
-                                hwms[key_parts] = mod
-                                update_sock.sendall(dumps((key, val)))
-                            events_processed += 1
-                            watcher_events += 1
-                            event_hwm = mod
-                    except Empty:
-                        pass
+                if count % 100 == 0:  # Avoid checking the queue on every loop.
+                    for _ in xrange(1000):  # Don't starve the snapshot.
+                        try:
+                            data = watcher_queue.get_nowait()
+                        except Empty:
+                            break
+                        if data is None:
+                            _log.warning("Watcher thread finished")
+                            break
+                        (mod, key, val) = data
+                        if val is None:
+                            # Deletion.
+                            deleted_keys = hwms.store_deletion(key, mod)
+                            for child_key in deleted_keys:
+                                update_sock.sendall(
+                                    dumps((child_key, None))
+                                )
+                        else:
+                            # Normal update.
+                            hwms.update_hwm(key, mod)
+                            update_sock.sendall(dumps((key, val)))
+                        events_processed += 1
+                        watcher_events += 1
                 count += 1
 
-            # Done applying snapshot.  If we need to do a snapshot again, we
-            # can skip any keys that have a modifiedIndex <= to best_hwm.
-            best_hwm = max(snapshot_index, event_hwm)
-
-            # Only used to resolve deleted during a snapshot so we can throw
-            # away.
-            del deletes_during_snapshot
+            # Save occupancy by throwing away the deletion tracking metadata.
+            hwms.stop_tracking_deletions()
 
             if not first_resync:
-                # Find any keys that were deleted while we were down.
+                # Find any keys that were deleted while we were unable to
+                # keep up with etcd.
                 _log.info("Scanning for deletions")
-                # TODO Interleave with processing more watcher keys?
-                for key_parts, value in hwms.items():
-                    if value < snapshot_index:
-                        # We didn't see the value during the snapshot or via the
-                        # event queue.  It must have been deleted.
-                        del hwms[key_parts]
-                        update_sock.sendall(
-                            dumps((key_parts, None))
-                        )
-                        events_processed += 1
+                deleted_keys = hwms.remove_old_keys(snapshot_index)
+                for key in deleted_keys:
+                    # We didn't see the value during the snapshot or via the
+                    # event queue.  It must have been deleted.
+                    update_sock.sendall(dumps((key, None)))
+                    events_processed += 1
             else:
                 _log.info("First resync, skipping delete check.")
 
             _log.info("In sync, processing events only")
             while True:
-                try:
-                    mod, key, val = watcher_queue.get()
-                except TypeError:
-                    print "Queue finished"
+                data = watcher_queue.get()
+                if data is None:
+                    _log.warning("Watcher thread finished, resyncing...")
                     break
-                key_parts = key
+                mod, key, val = data
                 if val is None:
-                    # Simulate a delete for all the keys under the
-                    # deleted key.
-                    for child_key, child_mod in hwms.items(key_parts):
-                        del hwms[child_key]
+                    # Deletion.
+                    deleted_keys = hwms.store_deletion(key, mod)
+                    for child_key in deleted_keys:
                         update_sock.sendall(
                             dumps((child_key, None))
                         )
                 else:
-                    # At this point, we're using hwms only to track existence
-                    # so we can generate deletions when whole directories are
-                    # deleted.  However, we may as well keep the modifiedIndex
-                    # up to date.
-                    hwms[key_parts] = mod
+                    # Normal update.
+                    hwms.update_hwm(key, mod)
                     update_sock.sendall(dumps((key, val)))
                 events_processed += 1
                 watcher_events += 1
-                event_hwm = mod
-                best_hwm = mod
             _log.warning("Worker stopped, resyncing...")
         except socket.error as e:
             if e.errno == 32:
@@ -313,12 +285,16 @@ def resync_and_merge(update_sock):
                 HTTPException,
                 socket.error) as e:
             _log.error("Request to etcd failed: %r", e)
+        except:
+            _log.exception("Unexpected exception")
+            raise
         finally:
             first_resync = False
 
 
 class Node(object):
-    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex", "done")
+    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex",
+                 "done")
 
     def __init__(self):
         self.modifiedIndex = None
diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
new file mode 100644
index 0000000000..27a96244fc
--- /dev/null
+++ b/calico/etcddriver/hwm.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.hwm
+~~~~~~~~~~~~~~~~~~~~~
+
+The HighWaterTracker is used to resolve the high water mark for each etcd
+key when processing a snapshot and event stream in parallel.
+"""
+
+import logging
+import re
+import string
+
+from datrie import Trie
+import datrie
+
+_log = logging.getLogger(__name__)
+
+
+TRIE_CHARS = string.ascii_letters + string.digits + "/_-"
+TRIE_CHARS_MATCH = re.compile(r'^[%s]+$' % re.escape(TRIE_CHARS))
+
+
+class HighWaterTracker(object):
+    """
+    Tracks the highest etcd index for which we've seen a particular
+    etcd key.
+    """
+    def __init__(self):
+        self._hwms = Trie(TRIE_CHARS)
+
+        # Set to a Trie while we're tracking deletions.  None otherwise.
+        self._deletion_hwms = None
+
+    def start_tracking_deletions(self):
+        """
+        Starts tracking which subtrees have been deleted so that update_hwm
+        can skip updates to keys that have subsequently been deleted.
+
+        Should be paired with a call to stop_tracking_deletions() to release
+        the associated tracking data structures.
+        """
+        _log.info("Started tracking deletions")
+        self._deletion_hwms = Trie(TRIE_CHARS)
+
+    def stop_tracking_deletions(self):
+        """
+        Stops deletion tracking and frees up the associated resources.
+
+        Calling this asserts that subsequent calls to update_hwm() will only
+        use HWMs after any stored deletes.
+        """
+        _log.info("Stopped tracking deletions")
+        self._deletion_hwms = None
+
+    def update_hwm(self, key, hwm):
+        """
+        Updates the HWM for a key if the new value is greater than the old.
+        If deletion tracking is enabled, resolves deletions so that updates
+        to subtrees that have been deleted are skipped iff the deletion is
+        after the update in HWM order.
+
+        :return int|NoneType: the old HWM of the key (or the HWM at which it
+                was deleted) or None if it did not previously exist.
+        """
+        _log.debug("Updating HWM for %s to %s", key, hwm)
+        key = encode_key(key)
+        if self._deletion_hwms is not None:
+            # We're tracking deletions, check that this key hasn't been
+            # deleted.
+            del_hwm = self._deletion_hwms.longest_prefix_value(key, None)
+            if del_hwm > hwm:
+                _log.debug("Key %s previously deleted, skipping", key)
+                return del_hwm
+        try:
+            old_hwm = self._hwms[key]  # Trie doesn't have get().
+        except KeyError:
+            old_hwm = None
+        if old_hwm < hwm:  # Works for None too.
+            _log.debug("Key %s HWM updated to %s, previous %s",
+                       key, hwm, old_hwm)
+            self._hwms[key] = hwm
+        return old_hwm
+
+    def store_deletion(self, key, hwm):
+        """
+        Store that a given key (or directory) was deleted at a given HWM.
+        :return: List of known keys that were deleted.  This will be the
+                 leaves only when a subtree is being deleted.
+        """
+        _log.debug("Key %s deleted", key)
+        key = encode_key(key)
+        if self._deletion_hwms is not None:
+            _log.debug("Tracking deletion in deletions trie")
+            self._deletion_hwms[key] = hwm
+        deleted_keys = []
+        for child_key, child_mod in self._hwms.items(key):
+            del self._hwms[child_key]
+            deleted_keys.append(decode_key(child_key))
+        _log.debug("Found %s keys deleted under %s", len(deleted_keys), key)
+        return deleted_keys
+
+    def remove_old_keys(self, hwm_limit):
+        """
+        Deletes and returns all keys that have HWMs less than hwm_limit.
+        :return: list of keys that were deleted.
+        """
+        assert not self._deletion_hwms, \
+            "Delete tracking incompatible with remove_old_keys()"
+        _log.info("Removing keys that are older than %s", hwm_limit)
+        old_keys = []
+        state = datrie.State(self._hwms)
+        state.walk(u"")
+        it = datrie.Iterator(state)
+        while it.next():
+            value = it.data()
+            if value < hwm_limit:
+                old_keys.append(it.key())
+        for old_key in old_keys:
+            del self._hwms[old_key]
+        _log.info("Deleted %s old keys", len(old_keys))
+        return map(decode_key, old_keys)
+
+
+def encode_key(key):
+    # FIXME May have to be more lenient
+    assert TRIE_CHARS_MATCH.match(key)
+    if key[-1] != "/":
+        key += "/"
+    return key
+
+
+def decode_key(key):
+    return key[:-1]
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index ee1852ad64..a1772f58d4 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -415,7 +415,7 @@ def loop(self):
                 receive_count = 0
                 unpacker = msgpack.Unpacker()
                 while True:
-                    data = update_conn.recv(8092)
+                    data = update_conn.recv(16384)
                     unpacker.feed(data)
                     for key, value in unpacker:
                         receive_count += 1

From 143fa0570a3706c7d7ccd415ee6a96eed1a9cfe8 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 19 Oct 2015 16:50:05 +0100
Subject: [PATCH 07/98] Optimization: avoid trie lookup for most events.

---
 calico/etcddriver/hwm.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index 27a96244fc..b761a58de5 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -44,6 +44,7 @@ def __init__(self):
 
         # Set to a Trie while we're tracking deletions.  None otherwise.
         self._deletion_hwms = None
+        self._latest_deletion = None
 
     def start_tracking_deletions(self):
         """
@@ -55,6 +56,7 @@ def start_tracking_deletions(self):
         """
         _log.info("Started tracking deletions")
         self._deletion_hwms = Trie(TRIE_CHARS)
+        self._latest_deletion = None
 
     def stop_tracking_deletions(self):
         """
@@ -65,6 +67,7 @@ def stop_tracking_deletions(self):
         """
         _log.info("Stopped tracking deletions")
         self._deletion_hwms = None
+        self._latest_deletion = None
 
     def update_hwm(self, key, hwm):
         """
@@ -78,7 +81,10 @@ def update_hwm(self, key, hwm):
         """
         _log.debug("Updating HWM for %s to %s", key, hwm)
         key = encode_key(key)
-        if self._deletion_hwms is not None:
+        if (self._deletion_hwms is not None and
+                # Optimization: avoid expensive lookup if this update comes
+                # after all deletions.
+                hwm < self._latest_deletion):
             # We're tracking deletions, check that this key hasn't been
             # deleted.
             del_hwm = self._deletion_hwms.longest_prefix_value(key, None)
@@ -103,6 +109,7 @@ def store_deletion(self, key, hwm):
         """
         _log.debug("Key %s deleted", key)
         key = encode_key(key)
+        self._latest_deletion = max(hwm, self._latest_deletion)
         if self._deletion_hwms is not None:
             _log.debug("Tracking deletion in deletions trie")
             self._deletion_hwms[key] = hwm

From b744f4df31bc16f11df8f681242ae43b2958ac39 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 20 Oct 2015 11:39:10 +0100
Subject: [PATCH 08/98] Use /run/felix-driver.sck for socket.

---
 calico/etcddriver/__main__.py |  3 +-
 calico/felix/fetcd.py         | 53 +++++++++++++++++++----------------
 2 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index 4d819d6b53..ced63032a7 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -27,6 +27,7 @@
 import socket
 from threading import Thread
 import time
+import sys
 
 from calico.etcddriver.driver import report_status, resync_and_merge
 from calico.common import default_logging, complete_logging
@@ -42,7 +43,7 @@
 update_socket = socket.socket(socket.AF_UNIX,
                               socket.SOCK_STREAM)
 try:
-    update_socket.connect("/tmp/felix.sck")
+    update_socket.connect(sys.argv[1])
 except:
     _log.exception("Failed to connect to Felix")
     raise
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index a1772f58d4..9b73069ddd 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -393,34 +393,14 @@ def loop(self):
                 _log.info("Reconnecting and loading snapshot from etcd...")
                 self.reconnect(copy_cluster_id=False)
                 self._on_pre_resync()
-                try:
-                    os.unlink("/tmp/felix.sck")
-                except:
-                    pass
-                update_socket = socket.socket(socket.AF_UNIX,
-                                              socket.SOCK_STREAM)
-
-                print "Created socket"
-                update_socket.bind("/tmp/felix.sck")
-                print "Bound socket"
-                update_socket.listen(1)
-                print "Marked socket for listen"
-
-                subprocess.Popen([sys.executable,
-                                  "-m",
-                                  "calico.etcddriver"])
-
-                update_conn, _ = update_socket.accept()
-                print "Accepted connection on socket"
-                receive_count = 0
+
+                driver_sck = self.start_driver()
                 unpacker = msgpack.Unpacker()
                 while True:
-                    data = update_conn.recv(16384)
+                    data = driver_sck.recv(16384)
                     unpacker.feed(data)
                     for key, value in unpacker:
-                        receive_count += 1
-                        if receive_count % 1000 == 0:
-                            print "Recieved", receive_count
+                        # TODO stats
                         n = Node()
                         n.action = "set" if value is not None else "delete"
                         n.value = value
@@ -440,6 +420,31 @@ def loop(self):
                 _log.exception("Exception reading from socket?")
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
+    def start_driver(self):
+        _log.info("Creating server socket.")
+        try:
+            os.unlink("/run/felix-driver.sck")
+        except:
+            pass
+        update_socket = socket.socket(socket.AF_UNIX,
+                                      socket.SOCK_STREAM)
+        update_socket.bind("/run/felix-driver.sck")
+        update_socket.listen(1)
+        subprocess.Popen([sys.executable,
+                          "-m",
+                          "calico.etcddriver",
+                          "/run/felix-driver.sck"])
+        update_conn, _ = update_socket.accept()
+        _log.info("Accepted connection on socket")
+        # No longer need the server socket, remove it.
+        try:
+            os.unlink("/run/felix-driver.sck")
+        except:
+            _log.exception("Failed to unlink socket")
+        else:
+            _log.info("Unlinked server socket")
+        return update_conn
+
     def _load_config(self):
         """
         Loads our configuration from etcd.  Does not return

From ffd190f71565cf5e4acc02179af1425b003a8411 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 20 Oct 2015 11:40:02 +0100
Subject: [PATCH 09/98] Experiment: use a BytesIO to avoid calling sendall as
 often.

---
 calico/etcddriver/driver.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 31a178b6d9..d1d230b903 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -41,6 +41,7 @@
 import time
 
 from ijson.backends import yajl2 as ijson
+from io import BytesIO
 from msgpack import dumps
 import urllib3
 from urllib3 import HTTPConnectionPool
@@ -174,6 +175,7 @@ def resync_and_merge(update_sock):
         # Then plough through the update incrementally.
         hwms.start_tracking_deletions()
         try:
+            buf = BytesIO()
             parser = ijson.parse(resp)  # urllib3 response is file-like.
             stack = []
             frame = Node()
@@ -203,7 +205,7 @@ def resync_and_merge(update_sock):
                         if hwm > old_hwm:
                             # This specific key's HWM is newer than the
                             # previous version we've seen.
-                            update_sock.sendall(
+                            buf.write(
                                 dumps((frame.key, frame.value))
                             )
                             events_processed += 1
@@ -215,7 +217,7 @@ def resync_and_merge(update_sock):
                 elif event == "end_map":
                     frame = stack.pop(-1)
                 if count % 100 == 0:  # Avoid checking the queue on every loop.
-                    for _ in xrange(1000):  # Don't starve the snapshot.
+                    for _ in xrange(100):  # Don't starve the snapshot.
                         try:
                             data = watcher_queue.get_nowait()
                         except Empty:
@@ -228,15 +230,17 @@ def resync_and_merge(update_sock):
                             # Deletion.
                             deleted_keys = hwms.store_deletion(key, mod)
                             for child_key in deleted_keys:
-                                update_sock.sendall(
-                                    dumps((child_key, None))
-                                )
+                                buf.write(dumps((child_key, None)))
                         else:
                             # Normal update.
                             hwms.update_hwm(key, mod)
-                            update_sock.sendall(dumps((key, val)))
+                            buf.write(dumps((key, val)))
                         events_processed += 1
                         watcher_events += 1
+                    buf_contents = buf.getvalue()
+                    if buf_contents:
+                        update_sock.sendall(buf_contents)
+                        buf = BytesIO()
                 count += 1
 
             # Save occupancy by throwing away the deletion tracking metadata.
@@ -250,11 +254,16 @@ def resync_and_merge(update_sock):
                 for key in deleted_keys:
                     # We didn't see the value during the snapshot or via the
                     # event queue.  It must have been deleted.
-                    update_sock.sendall(dumps((key, None)))
+                    buf.write(dumps((key, None)))
                     events_processed += 1
             else:
                 _log.info("First resync, skipping delete check.")
 
+            buf_contents = buf.getvalue()
+            if buf_contents:
+                update_sock.sendall(buf_contents)
+            del buf
+
             _log.info("In sync, processing events only")
             while True:
                 data = watcher_queue.get()

From ee41ab685752c8e48f4c4c987cf2fb313c542b69 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 20 Oct 2015 16:02:24 +0100
Subject: [PATCH 10/98] Refactor etcd driver into a class.  Add init message.

---
 calico/etcddriver/__main__.py |  26 +-
 calico/etcddriver/driver.py   | 490 +++++++++++++++++++++++-----------
 calico/felix/fetcd.py         |  18 +-
 3 files changed, 362 insertions(+), 172 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index ced63032a7..9416705928 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -25,35 +25,23 @@
 
 import logging
 import socket
-from threading import Thread
-import time
 import sys
 
-from calico.etcddriver.driver import report_status, resync_and_merge
+from calico.etcddriver.driver import EtcdDriver
 from calico.common import default_logging, complete_logging
 
 _log = logging.getLogger(__name__)
 
-complete_logging("/var/log/calico/etcddriver.log",
-                 logging.INFO,
-                 logging.WARNING,
-                 logging.INFO,
-                 gevent_in_use=False)
+default_logging(gevent_in_use=False)
 
-update_socket = socket.socket(socket.AF_UNIX,
+felix_sck = socket.socket(socket.AF_UNIX,
                               socket.SOCK_STREAM)
 try:
-    update_socket.connect(sys.argv[1])
+    felix_sck.connect(sys.argv[1])
 except:
     _log.exception("Failed to connect to Felix")
     raise
 
-resync_thread = Thread(target=resync_and_merge, args=[update_socket])
-resync_thread.daemon = True
-resync_thread.start()
-
-try:
-    update_socket.recv(1024)
-except:
-    _log.exception("Failed to read from update socket,  Felix down?")
-    raise
+driver = EtcdDriver(felix_sck)
+driver.start()
+driver.join()
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index d1d230b903..2dcf8a214e 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -39,10 +39,11 @@
 import sys
 from threading import Thread, Event
 import time
+from urlparse import urlparse
 
 from ijson.backends import yajl2 as ijson
 from io import BytesIO
-from msgpack import dumps
+import msgpack
 import urllib3
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import ReadTimeoutError
@@ -59,6 +60,23 @@
 watcher_events = 0
 snap_skipped = 0
 
+FLUSH_THRESHOLD = 200
+
+MSG_KEY_TYPE = "type"
+
+# Init message Felix -> Driver.
+MSG_TYPE_INIT = "init"
+MSG_KEY_ETCD_URL = "etcd_url"
+
+MSG_KEY_LOG_FILE = "log_file"
+MSG_KEY_SEV_FILE = "sev_file"
+MSG_KEY_SEV_SCREEN = "sev_screen"
+MSG_KEY_SEV_SYSLOG = "sev_syslog"
+
+MSG_TYPE_STATUS = "stat"
+MSG_TYPE_CONFIG = "conf"
+MSG_TYPE_UPDATE = "upd"
+
 
 def report_status():
     while True:
@@ -138,167 +156,324 @@ def watch_etcd(next_index, result_queue, stop_event):
         result_queue.put(None)
 
 
-def resync_and_merge(update_sock):
-    _log.info("Resync thread started")
-    global events_processed, snapshot_events, watcher_events, snap_skipped
-    hwms = HighWaterTracker()
-    stop_watcher = None
-    first_resync = True
+class EtcdDriver(object):
+    def __init__(self, felix_sck):
+        self._felix_sck = felix_sck
 
-    while True:
-        if stop_watcher:
-            _log.info("Watcher was running before, stopping it")
-            stop_watcher.set()
+        # Global stop event used to signal to all threads to stop.
+        self._stop_event = Event()
+
+        self._reader_thread = Thread(target=self._read_from_socket)
+        self._resync_thread = Thread(target=self._resync_and_merge)
+
+        self._watcher_thread = None  # Created on demand
+        self._watcher_stop_event = None
+
+        # High-water mark cache.  Owned by resync thread.
+        self._hwms = HighWaterTracker()
+        # Number of pending updates and buffer.  Owned by resync thread.
+        self._updates_pending = 0
+        self._buf = BytesIO()
+        self._first_resync = True
+
+        # Set by the reader thread once the config has been read from Felix.
+        self._config_loaded = Event()
+        self._etcd_base_url = None
+
+    def start(self):
+        self._reader_thread.start()
+        self._resync_thread.start()
+
+    def join(self):
+        self._reader_thread.join()
+        self._resync_thread.join()
 
-        # Load the recursive get as far as the headers...
+    def _read_from_socket(self):
+        """
+        Thread: reader thread.  Reads messages from Felix.
+
+        So far, this means reading the init message and then dealing
+        with the exception if Felix dies.
+        """
+        try:
+            unpacker = msgpack.Unpacker()
+            while not self._stop_event.is_set():
+                try:
+                    data = self._felix_sck.recv(8092)
+                except:
+                    _log.exception("Exception reading from Felix.")
+                    raise
+                if not data:
+                    _log.error("No data read, assuming Felix closed socket")
+                    break
+                unpacker.feed(data)
+                for msg in unpacker:
+                    if msg[MSG_KEY_TYPE] == MSG_TYPE_INIT:
+                        self._handle_init(msg)
+                    else:
+                        _log.warning("Unexpected message from Felix")
+        finally:
+            _log.error("Reader thread shutting down, triggering stop event")
+            self._stop_event.set()
+
+    def _handle_init(self, msg):
+        # OK to dump the msg, it's a one-off.
+        _log.info("Got init message from Felix %s", msg)
+        self._etcd_base_url = msg[MSG_KEY_ETCD_URL].rstrip("/")
+        self._etcd_url_parts = urlparse(self._etcd_base_url)
+        self._config_loaded.set()
+
+    def _resync_and_merge(self):
+        """
+        Thread: Resync-and-merge thread.  Loads the etcd snapshot, merges
+        it with the events going on concurrently and sends the event stream
+        to Felix.
+        """
+        _log.info("Resync thread started, waiting for config to be loaded...")
+        self._config_loaded.wait()
+        _log.info("Config loaded; continuing.")
+        while not self._stop_event.is_set():
+            # Only has an effect if it's running.  Note: stopping the watcher
+            # is async (and may take a long time for its connection to time
+            # out).
+            self._stop_watcher()
+            # Kick off the request as far as the headers.
+            resp, snapshot_index = self._start_snapshot_request()
+            # Before reading from the snapshot, start the watcher thread.
+            self._start_watcher(snapshot_index)
+            # Then plough through the update incrementally.
+            try:
+                # Incrementally process the snapshot, merging in events from
+                # the queue.
+                self._process_snapshot_and_events(resp, snapshot_index)
+                # Make sure we flush before we wait for events.
+                self._flush()
+                self._process_events_only()
+            except FelixWriteFailed:
+                _log.exception("Write to Felix failed; shutting down.")
+                self._stop_event.set()
+            except WatcherDied:
+                _log.warning("Watcher died; resyncing.")
+            except (urllib3.exceptions.HTTPError,
+                    HTTPException,
+                    socket.error) as e:
+                _log.error("Request to etcd failed: %r; resyncing.", e)
+            except:
+                _log.exception("Unexpected exception; shutting down.")
+                self._stop_event.set()
+                raise
+            finally:
+                self._first_resync = False
+
+    def _start_snapshot_request(self):
+        """
+        Issues the HTTP request to etcd to load the snapshot but only
+        loads it as far as the headers.
+        :return: tuple of response and snapshot's etcd index.
+        :raises HTTPException
+        :raises HTTPError
+        :raises socket.error
+        """
         _log.info("Loading snapshot headers...")
-        http = HTTPConnectionPool("localhost", 4001, maxsize=1)
-        resp = http.request("GET", "http://localhost:4001/v2/keys/calico/v1",
+        http = self.get_etcd_connection()
+        resp = http.request("GET",
+                            self._etcd_base_url + "/v2/keys/calico/v1",
                             fields={"recursive": "true"},
                             timeout=120,
                             preload_content=False)
-
-        # ASAP, start the background thread to listen for events and queue
-        # them up...
         snapshot_index = int(resp.getheader("x-etcd-index", 1))
         _log.info("Got snapshot headers, snapshot index is %s; starting "
                   "watcher...", snapshot_index)
-        watcher_queue = Queue()
-        stop_watcher = Event()
-        watcher_thread = Thread(target=watch_etcd,
-                                args=(snapshot_index + 1,
-                                      watcher_queue,
-                                      stop_watcher))
-        watcher_thread.daemon = True
-        watcher_thread.start()
-
-        # Then plough through the update incrementally.
-        hwms.start_tracking_deletions()
-        try:
-            buf = BytesIO()
-            parser = ijson.parse(resp)  # urllib3 response is file-like.
-            stack = []
-            frame = Node()
-            count = 0
-            for prefix, event, value in parser:
-                if event == "start_map":
-                    stack.append(frame)
-                    frame = Node()
-                elif event == "map_key":
-                    frame.current_key = value
-                elif event in ("string", "number"):
-                    if frame.done:
-                        continue
-                    if frame.current_key == "modifiedIndex":
-                        frame.modifiedIndex = value
-                    if frame.current_key == "key":
-                        frame.key = value
-                    elif frame.current_key == "value":
-                        frame.value = value
-                    if (frame.key is not None and
-                            frame.value is not None and
-                            frame.modifiedIndex is not None):
-                        frame.done = True
-
-                        old_hwm = hwms.update_hwm(frame.key, snapshot_index)
-                        hwm = frame.modifiedIndex
-                        if hwm > old_hwm:
-                            # This specific key's HWM is newer than the
-                            # previous version we've seen.
-                            buf.write(
-                                dumps((frame.key, frame.value))
-                            )
-                            events_processed += 1
-                            snapshot_events += 1
-                        else:
-                            snap_skipped += 1
-
-                    frame.current_key = None
-                elif event == "end_map":
-                    frame = stack.pop(-1)
-                if count % 100 == 0:  # Avoid checking the queue on every loop.
-                    for _ in xrange(100):  # Don't starve the snapshot.
-                        try:
-                            data = watcher_queue.get_nowait()
-                        except Empty:
-                            break
-                        if data is None:
-                            _log.warning("Watcher thread finished")
-                            break
-                        (mod, key, val) = data
-                        if val is None:
-                            # Deletion.
-                            deleted_keys = hwms.store_deletion(key, mod)
-                            for child_key in deleted_keys:
-                                buf.write(dumps((child_key, None)))
-                        else:
-                            # Normal update.
-                            hwms.update_hwm(key, mod)
-                            buf.write(dumps((key, val)))
-                        events_processed += 1
-                        watcher_events += 1
-                    buf_contents = buf.getvalue()
-                    if buf_contents:
-                        update_sock.sendall(buf_contents)
-                        buf = BytesIO()
-                count += 1
-
-            # Save occupancy by throwing away the deletion tracking metadata.
-            hwms.stop_tracking_deletions()
-
-            if not first_resync:
-                # Find any keys that were deleted while we were unable to
-                # keep up with etcd.
-                _log.info("Scanning for deletions")
-                deleted_keys = hwms.remove_old_keys(snapshot_index)
-                for key in deleted_keys:
-                    # We didn't see the value during the snapshot or via the
-                    # event queue.  It must have been deleted.
-                    buf.write(dumps((key, None)))
-                    events_processed += 1
-            else:
-                _log.info("First resync, skipping delete check.")
-
-            buf_contents = buf.getvalue()
-            if buf_contents:
-                update_sock.sendall(buf_contents)
-            del buf
-
-            _log.info("In sync, processing events only")
-            while True:
-                data = watcher_queue.get()
-                if data is None:
-                    _log.warning("Watcher thread finished, resyncing...")
+        return resp, snapshot_index
+
+    def _process_snapshot_and_events(self, etcd_response, snapshot_index):
+        """
+        Processes the etcd snapshot response incrementally while, concurrently,
+        merging in updates from the watcher thread.
+        :param etcd_response: file-like object representing the etcd response.
+        :param snapshot_index: the etcd index of the response.
+        """
+        self._hwms.start_tracking_deletions()
+        for snap_mod, snap_key, snap_value in parse_snapshot(etcd_response):
+            old_hwm = self._hwms.update_hwm(snap_key, snapshot_index)
+            if snap_mod > old_hwm:
+                # This specific key's HWM is newer than the previous
+                # version we've seen, send an update.
+                self._queue_update(snap_key, snap_value)
+
+            # After we process an update from the snapshot, process
+            # several updates from the watcher queue (if there are
+            # any).  We limit the number to ensure that we always
+            # finish the snapshot eventually.
+            for _ in xrange(100):
+                if not self._watcher_queue or self._watcher_queue.empty():
+                    # Don't block on the watcher if there's nothing to do.
                     break
-                mod, key, val = data
-                if val is None:
-                    # Deletion.
-                    deleted_keys = hwms.store_deletion(key, mod)
-                    for child_key in deleted_keys:
-                        update_sock.sendall(
-                            dumps((child_key, None))
-                        )
-                else:
-                    # Normal update.
-                    hwms.update_hwm(key, mod)
-                    update_sock.sendall(dumps((key, val)))
-                events_processed += 1
-                watcher_events += 1
-            _log.warning("Worker stopped, resyncing...")
-        except socket.error as e:
-            if e.errno == 32:
-                # FIXME Magic number
-                _log.error("Broken pipe, exiting")
-                sys.exit(1)
-        except (urllib3.exceptions.HTTPError,
-                HTTPException,
-                socket.error) as e:
-            _log.error("Request to etcd failed: %r", e)
-        except:
-            _log.exception("Unexpected exception")
-            raise
-        finally:
-            first_resync = False
+                try:
+                    self._handle_next_watcher_event()
+                except WatcherDied:
+                    # Continue processing to ensure that we make
+                    # progress.
+                    _log.warning("Watcher thread died, continuing "
+                                 "with snapshot")
+                    break
+            if self._stop_event.is_set():
+                _log.error("Stop event set, exiting")
+                raise Stopped()
+        # Save occupancy by throwing away the deletion tracking metadata.
+        self._hwms.stop_tracking_deletions()
+        # Scan for deletions that happened before the snapshot.  We effectively
+        # mark all the values seen in the current snapshot above and then this
+        # sweeps the ones we didn't touch.
+        self._scan_for_deletions(snapshot_index)
+
+    def _process_events_only(self):
+        """
+        Loops processing the event stream from the watcher thread and feeding
+        it to etcd.
+        :raises WatcherDied:
+        :raises FelixWriteFailed:
+        :raises Stopped:
+        """
+        _log.info("In sync, now processing events only...")
+        while not self._stop_event.is_set():
+            self._handle_next_watcher_event()
+            self._flush()
+
+    def _scan_for_deletions(self, snapshot_index):
+        """
+        Scans the high-water mark cache for keys that haven't been seen since
+        before the snapshot_index and deletes them.
+        """
+        if self._first_resync:
+            _log.info("First resync: skipping deletion scan")
+            return
+        # Find any keys that were deleted while we were unable to
+        # keep up with etcd.
+        _log.info("Scanning for deletions")
+        deleted_keys = self._hwms.remove_old_keys(snapshot_index)
+        for ev_key in deleted_keys:
+            # We didn't see the value during the snapshot or via
+            # the event queue.  It must have been deleted.
+            self._queue_update(ev_key, None)
+
+    def _handle_next_watcher_event(self):
+        """
+        Waits for an event on the watcher queue and sends it to Felix.
+        :raises Stopped:
+        :raises WatcherDied:
+        :raises FelixWriteFailed:
+        """
+        if self._watcher_queue is None:
+            raise WatcherDied()
+        while not self._stop_event.is_set():
+            try:
+                event = self._watcher_queue.get(timeout=1)
+            except Empty():
+                pass
+            else:
+                break
+        else:
+            raise Stopped()
+        if event is None:
+            self._watcher_queue = None
+            raise WatcherDied()
+        ev_mod, ev_key, ev_val = event
+        if ev_val is not None:
+            # Normal update.
+            self._hwms.update_hwm(ev_key, ev_mod)
+            self._queue_update(ev_key, ev_val)
+        else:
+            # Deletion.
+            deleted_keys = self._hwms.store_deletion(ev_key,
+                                                     ev_mod)
+            for child_key in deleted_keys:
+                self._queue_update(child_key, None)
+
+    def _start_watcher(self, snapshot_index):
+        """
+        Starts the watcher thread, creating its queue and event in the process.
+        """
+        self._watcher_queue = Queue()
+        self._watcher_stop_event = Event()
+        # Note: we pass the queue and event in as arguments so that the thread
+        # will always access the current queue and event.  If it used self.xyz
+        # to access them then an old thread that is shutting down could access
+        # a new queue.
+        self._watcher_thread = Thread(target=watch_etcd,
+                                      args=(snapshot_index + 1,
+                                            self._watcher_queue,
+                                            self._watcher_stop_event))
+        self._watcher_thread.daemon = True
+        self._watcher_thread.start()
+
+    def _stop_watcher(self):
+        """
+        If it's running, signals the watcher thread to stop.
+        """
+        if self._watcher_stop_event is not None:
+            _log.info("Watcher was running before, stopping it")
+            self._watcher_stop_event.set()
+            self._watcher_stop_event = None
+
+    def get_etcd_connection(self):
+        return HTTPConnectionPool(self._etcd_url_parts.hostname,
+                                  self._etcd_url_parts.port or 2379,
+                                  maxsize=1)
+
+    def _queue_update(self, key, value):
+        """
+        Queues an update message to Felix.
+        :raises FelixWriteFailed:
+        """
+        self._buf.write(msgpack.dumps((key, value)))
+        self._updates_pending += 1
+        if self._updates_pending > FLUSH_THRESHOLD:
+            self._flush()
+
+    def _flush(self):
+        """
+        Flushes the write buffer to Felix.
+        :raises FelixWriteFailed:
+        """
+        buf_contents = self._buf.getvalue()
+        if buf_contents:
+            try:
+                self._felix_sck.sendall(buf_contents)
+            except socket.error as e:
+                _log.exception("Failed to write to Felix socket")
+                raise FelixWriteFailed(e)
+            self._buf = BytesIO()
+        self._updates_pending = 0
+
+
+def parse_snapshot(resp):
+    parser = ijson.parse(resp)  # urllib3 response is file-like.
+    stack = []
+    frame = Node()
+    for prefix, event, value in parser:
+        if event == "start_map":
+            stack.append(frame)
+            frame = Node()
+        elif event == "map_key":
+            frame.current_key = value
+        elif event in ("string", "number"):
+            if frame.done:
+                continue
+            if frame.current_key == "modifiedIndex":
+                frame.modifiedIndex = value
+            if frame.current_key == "key":
+                frame.key = value
+            elif frame.current_key == "value":
+                frame.value = value
+            if (frame.key is not None and
+                    frame.value is not None and
+                    frame.modifiedIndex is not None):
+                frame.done = True
+                yield frame.modifiedIndex, frame.key, frame.value
+            frame.current_key = None
+        elif event == "end_map":
+            frame = stack.pop(-1)
 
 
 class Node(object):
@@ -313,3 +488,14 @@ def __init__(self):
         self.current_key = None
         self.done = False
 
+
+class WatcherDied(Exception):
+    pass
+
+
+class Stopped(Exception):
+    pass
+
+
+class FelixWriteFailed(Exception):
+    pass
\ No newline at end of file
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 9b73069ddd..aaeae4fd68 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -29,6 +29,8 @@
 import subprocess
 import msgpack
 import time
+from calico.etcddriver.driver import MSG_KEY_TYPE, MSG_KEY_ETCD_URL
+from calico.etcddriver.driver import MSG_TYPE_INIT
 from calico.monotonic import monotonic_time
 
 from etcd import EtcdException, EtcdKeyNotFound
@@ -396,11 +398,19 @@ def loop(self):
 
                 driver_sck = self.start_driver()
                 unpacker = msgpack.Unpacker()
+                read_count = 0
+                last_time = monotonic_time()
                 while True:
                     data = driver_sck.recv(16384)
                     unpacker.feed(data)
                     for key, value in unpacker:
-                        # TODO stats
+                        read_count += 1
+                        if read_count % 1000 == 0:
+                            now = monotonic_time()
+                            delta = now - last_time
+                            _log.warn("Processed %s updates from driver "
+                                      "%.1f/s", read_count, 1000.0 / delta)
+                            last_time = now
                         n = Node()
                         n.action = "set" if value is not None else "delete"
                         n.value = value
@@ -443,6 +453,12 @@ def start_driver(self):
             _log.exception("Failed to unlink socket")
         else:
             _log.info("Unlinked server socket")
+
+        update_conn.sendall(msgpack.dumps({
+            MSG_KEY_TYPE: MSG_TYPE_INIT,
+            MSG_KEY_ETCD_URL: "http://" + self._config.ETCD_ADDR,
+        }))
+
         return update_conn
 
     def _load_config(self):

From 0fade39ed9affc7b04e97b24f734e236b3dc2905 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 20 Oct 2015 17:15:55 +0100
Subject: [PATCH 11/98] Move Ready key check and config loading to driver.

---
 calico/etcddriver/__main__.py |   4 +-
 calico/etcddriver/driver.py   | 129 +++++++++++++++++++++++----
 calico/felix/config.py        |   3 +-
 calico/felix/felix.py         |   7 +-
 calico/felix/fetcd.py         | 158 +++++++++++++++-------------------
 5 files changed, 189 insertions(+), 112 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index 9416705928..bf1dd819be 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -28,14 +28,14 @@
 import sys
 
 from calico.etcddriver.driver import EtcdDriver
-from calico.common import default_logging, complete_logging
+from calico.common import default_logging
 
 _log = logging.getLogger(__name__)
 
 default_logging(gevent_in_use=False)
 
 felix_sck = socket.socket(socket.AF_UNIX,
-                              socket.SOCK_STREAM)
+                          socket.SOCK_STREAM)
 try:
     felix_sck.connect(sys.argv[1])
 except:
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 2dcf8a214e..e78a009cd0 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -33,6 +33,7 @@
 
 from httplib import HTTPException
 from json import loads
+import json
 import socket
 import logging
 from Queue import Queue, Empty
@@ -40,6 +41,7 @@
 from threading import Thread, Event
 import time
 from urlparse import urlparse
+from calico.monotonic import monotonic_time
 
 from ijson.backends import yajl2 as ijson
 from io import BytesIO
@@ -47,14 +49,12 @@
 import urllib3
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import ReadTimeoutError
+from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, dir_for_per_host_config
 
 from calico.etcddriver.hwm import HighWaterTracker
 
 _log = logging.getLogger(__name__)
-logging.basicConfig(level=logging.DEBUG,
-                    format='%(asctime)s [%(levelname)s]'
-                           '[%(process)s/%(thread)d] %(name)s %(lineno)d: '
-                           '%(message)s')
+
 events_processed = 0
 snapshot_events = 0
 watcher_events = 0
@@ -67,7 +67,14 @@
 # Init message Felix -> Driver.
 MSG_TYPE_INIT = "init"
 MSG_KEY_ETCD_URL = "etcd_url"
+MSG_KEY_HOSTNAME = "hostname"
+
+# Config loaded message Driver -> Felix.
+MSG_TYPE_CONFIG_LOADED = "config_loaded"
+MSG_KEY_GLOBAL_CONFIG = "global"
+MSG_KEY_HOST_CONFIG = "host"
 
+# Config message Felix -> Driver.
 MSG_KEY_LOG_FILE = "log_file"
 MSG_KEY_SEV_FILE = "sev_file"
 MSG_KEY_SEV_SCREEN = "sev_screen"
@@ -75,8 +82,10 @@
 
 MSG_TYPE_STATUS = "stat"
 MSG_TYPE_CONFIG = "conf"
-MSG_TYPE_UPDATE = "upd"
 
+MSG_TYPE_UPDATE = "upd"
+MSG_KEY_KEY = "k"
+MSG_KEY_VALUE = "v"
 
 def report_status():
     while True:
@@ -175,10 +184,12 @@ def __init__(self, felix_sck):
         self._updates_pending = 0
         self._buf = BytesIO()
         self._first_resync = True
+        self.resync_http_pool = None
 
         # Set by the reader thread once the config has been read from Felix.
         self._config_loaded = Event()
         self._etcd_base_url = None
+        self._hostname = None
 
     def start(self):
         self._reader_thread.start()
@@ -221,6 +232,7 @@ def _handle_init(self, msg):
         _log.info("Got init message from Felix %s", msg)
         self._etcd_base_url = msg[MSG_KEY_ETCD_URL].rstrip("/")
         self._etcd_url_parts = urlparse(self._etcd_base_url)
+        self._hostname = msg[MSG_KEY_HOSTNAME]
         self._config_loaded.set()
 
     def _resync_and_merge(self):
@@ -232,17 +244,25 @@ def _resync_and_merge(self):
         _log.info("Resync thread started, waiting for config to be loaded...")
         self._config_loaded.wait()
         _log.info("Config loaded; continuing.")
+
         while not self._stop_event.is_set():
+            loop_start = monotonic_time()
             # Only has an effect if it's running.  Note: stopping the watcher
             # is async (and may take a long time for its connection to time
             # out).
             self._stop_watcher()
-            # Kick off the request as far as the headers.
-            resp, snapshot_index = self._start_snapshot_request()
-            # Before reading from the snapshot, start the watcher thread.
-            self._start_watcher(snapshot_index)
-            # Then plough through the update incrementally.
             try:
+                # Start with a fresh HTTP pool just in case it got into a bad
+                # state.
+                self.resync_http_pool = self.get_etcd_connection()
+                # Before we get to the snapshot, Felix needs the configuration.
+                self._wait_for_ready()
+                self._preload_config()
+                # Kick off the snapshot  request as far as the headers.
+                resp, snapshot_index = self._start_snapshot_request()
+                # Before reading from the snapshot, start the watcher thread.
+                self._start_watcher(snapshot_index)
+                # Then plough through the update incrementally.
                 # Incrementally process the snapshot, merging in events from
                 # the queue.
                 self._process_snapshot_and_events(resp, snapshot_index)
@@ -258,6 +278,9 @@ def _resync_and_merge(self):
                     HTTPException,
                     socket.error) as e:
                 _log.error("Request to etcd failed: %r; resyncing.", e)
+                if monotonic_time() - loop_start < 1:
+                    _log.debug("May be tight looping, sleeping...")
+                    time.sleep(1)
             except:
                 _log.exception("Unexpected exception; shutting down.")
                 self._stop_event.set()
@@ -265,6 +288,65 @@ def _resync_and_merge(self):
             finally:
                 self._first_resync = False
 
+    def _wait_for_ready(self):
+        ready = False
+        while not ready:
+            # Read failure here will be handled by outer loop.
+            resp = self.resync_http_pool.request(
+                "GET",
+                self._etcd_base_url + "/v2/keys" + READY_KEY,
+                timeout=5,
+                preload_content=True
+            )
+            try:
+                etcd_resp = json.loads(resp.data)
+                ready = etcd_resp["node"]["value"] == "true"
+            except (TypeError, ValueError, KeyError) as e:
+                _log.warning("Failed to load Ready flag from etcd: %r", e)
+                time.sleep(1)
+
+    def _preload_config(self):
+        _log.info("Pre-loading config.")
+        global_config = self._load_config(CONFIG_DIR)
+        host_config_dir = dir_for_per_host_config(self._hostname)
+        host_config = self._load_config(host_config_dir)
+        self._buf.write(msgpack.dumps(
+            {
+                MSG_KEY_TYPE: MSG_TYPE_CONFIG_LOADED,
+                MSG_KEY_GLOBAL_CONFIG: global_config,
+                MSG_KEY_HOST_CONFIG: host_config,
+            }
+        ))
+        self._flush()
+        _log.info("Sent config message to Felix.")
+
+    def _load_config(self, config_dir):
+        # Read failure here will be handled by outer loop.
+        resp = self.resync_http_pool.request(
+            "GET",
+            self._etcd_base_url + "/v2/keys" + config_dir,
+            fields={
+                "recursive": "true",
+            },
+            timeout=5,
+            preload_content=True
+        )
+        try:
+            etcd_resp = json.loads(resp.data)
+            if etcd_resp.get("errorCode") == 100:  # Not found
+                _log.info("No config found at %s", config_dir)
+                return {}
+            config_nodes = etcd_resp["node"]["nodes"]
+            config = {}
+            for node in config_nodes:
+                if "key" in node and "value" in node:
+                    config[node["key"].split("/")[-1]] = node["value"]
+        except (TypeError, ValueError, KeyError) as e:
+            _log.warning("Failed to load config from etcd: %r,"
+                         "data %r", e, resp.data)
+            raise ResyncRequired(e)
+        return config
+
     def _start_snapshot_request(self):
         """
         Issues the HTTP request to etcd to load the snapshot but only
@@ -275,12 +357,13 @@ def _start_snapshot_request(self):
         :raises socket.error
         """
         _log.info("Loading snapshot headers...")
-        http = self.get_etcd_connection()
-        resp = http.request("GET",
-                            self._etcd_base_url + "/v2/keys/calico/v1",
-                            fields={"recursive": "true"},
-                            timeout=120,
-                            preload_content=False)
+        resp = self.resync_http_pool.request(
+            "GET",
+            self._etcd_base_url + "/v2/keys/calico/v1",
+            fields={"recursive": "true"},
+            timeout=120,
+            preload_content=False
+        )
         snapshot_index = int(resp.getheader("x-etcd-index", 1))
         _log.info("Got snapshot headers, snapshot index is %s; starting "
                   "watcher...", snapshot_index)
@@ -369,7 +452,7 @@ def _handle_next_watcher_event(self):
         while not self._stop_event.is_set():
             try:
                 event = self._watcher_queue.get(timeout=1)
-            except Empty():
+            except Empty:
                 pass
             else:
                 break
@@ -426,7 +509,11 @@ def _queue_update(self, key, value):
         Queues an update message to Felix.
         :raises FelixWriteFailed:
         """
-        self._buf.write(msgpack.dumps((key, value)))
+        self._buf.write(msgpack.dumps({
+            MSG_KEY_TYPE: MSG_TYPE_UPDATE,
+            MSG_KEY_KEY: key,
+            MSG_KEY_VALUE: value,
+        }))
         self._updates_pending += 1
         if self._updates_pending > FLUSH_THRESHOLD:
             self._flush()
@@ -498,4 +585,8 @@ class Stopped(Exception):
 
 
 class FelixWriteFailed(Exception):
-    pass
\ No newline at end of file
+    pass
+
+
+class ResyncRequired(Exception):
+    pass
diff --git a/calico/felix/config.py b/calico/felix/config.py
index 816656640b..a2406b7493 100644
--- a/calico/felix/config.py
+++ b/calico/felix/config.py
@@ -280,7 +280,8 @@ def _finish_update(self, final=False):
         common.complete_logging(self.LOGFILE,
                                 self.LOGLEVFILE,
                                 self.LOGLEVSYS,
-                                self.LOGLEVSCR)
+                                self.LOGLEVSCR,
+                                gevent_in_use=True)
 
         if final:
             # Log configuration - the whole lot of it.
diff --git a/calico/felix/felix.py b/calico/felix/felix.py
index d219b992e5..7e109e9c50 100644
--- a/calico/felix/felix.py
+++ b/calico/felix/felix.py
@@ -203,7 +203,7 @@ def dump_top_level_actors(log):
 
 def main():
     # Initialise the logging with default parameters.
-    common.default_logging()
+    common.default_logging(gevent_in_use=True)
 
     # Create configuration, reading defaults from file if it exists.
     parser = optparse.OptionParser()
@@ -214,7 +214,7 @@ def main():
 
     try:
         config = Config(options.config_file)
-    except Exception:
+    except Exception as e:
         # Config loading error, and not just invalid parameters (from optparse)
         # as they generate a SystemExit. Attempt to open a log file, ignoring
         # any errors it gets, before we raise the exception.
@@ -222,7 +222,8 @@ def main():
             common.complete_logging("/var/log/calico/felix.log",
                                     logging.DEBUG,
                                     logging.DEBUG,
-                                    logging.DEBUG)
+                                    logging.DEBUG,
+                                    gevent_in_use=True)
         except Exception:
             pass
 
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index aaeae4fd68..e385bb5ff7 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -29,7 +29,9 @@
 import subprocess
 import msgpack
 import time
-from calico.etcddriver.driver import MSG_KEY_TYPE, MSG_KEY_ETCD_URL
+from calico.etcddriver.driver import MSG_KEY_TYPE, MSG_KEY_ETCD_URL, \
+    MSG_KEY_HOSTNAME, MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, \
+    MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG
 from calico.etcddriver.driver import MSG_TYPE_INIT
 from calico.monotonic import monotonic_time
 
@@ -322,6 +324,9 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         # Register for events when values change.
         self._register_paths()
 
+        self.read_count = 0
+        self.last_rate_log_time = monotonic_time()
+
     def _register_paths(self):
         """
         Program the dispatcher with the paths we care about.
@@ -375,18 +380,6 @@ def _run(self):
         self.load_config.wait()
         self.loop()
 
-    def _on_pre_resync(self):
-        self.wait_for_ready(RETRY_DELAY)
-        # Always reload the config.  This lets us detect if the config has
-        # changed and restart felix if so.
-        self._load_config()
-        if not self.configured.is_set():
-            # Unblock anyone who's waiting on the config.
-            self.configured.set()
-        if not self.begin_polling.is_set():
-            _log.info("etcd worker about to wait for begin_polling event")
-        self.begin_polling.wait()
-
     @logging_exceptions
     def loop(self):
         _log.info("Started %s loop", self)
@@ -394,7 +387,6 @@ def loop(self):
             try:
                 _log.info("Reconnecting and loading snapshot from etcd...")
                 self.reconnect(copy_cluster_id=False)
-                self._on_pre_resync()
 
                 driver_sck = self.start_driver()
                 unpacker = msgpack.Unpacker()
@@ -403,22 +395,17 @@ def loop(self):
                 while True:
                     data = driver_sck.recv(16384)
                     unpacker.feed(data)
-                    for key, value in unpacker:
-                        read_count += 1
-                        if read_count % 1000 == 0:
-                            now = monotonic_time()
-                            delta = now - last_time
-                            _log.warn("Processed %s updates from driver "
-                                      "%.1f/s", read_count, 1000.0 / delta)
-                            last_time = now
-                        n = Node()
-                        n.action = "set" if value is not None else "delete"
-                        n.value = value
-                        n.key = key
-                        try:
-                            self.dispatcher.handle_event(n)
-                        except ResyncRequired:
-                            _log.warning("IGNORING RESYNC.")
+                    for msg in unpacker:
+                        # Optimization: put update first in the "switch"
+                        # block because it's on the critical path.
+                        msg_type = msg[MSG_KEY_TYPE]
+                        if msg_type == MSG_TYPE_UPDATE:
+                            self._handle_update(msg)
+                        elif msg_type == MSG_TYPE_CONFIG_LOADED:
+                            self._handle_config_loaded(msg)
+                        else:
+                            raise RuntimeError("Unexpected message %s" % msg)
+
             except EtcdException as e:
                 # Most likely a timeout or other error in the pre-resync;
                 # start over.  These exceptions have good semantic error text
@@ -428,8 +415,61 @@ def loop(self):
                 time.sleep(1)  # Prevent tight loop due to unexpected error.
             except:
                 _log.exception("Exception reading from socket?")
+                raise
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
+    def _handle_update(self, msg):
+        assert self.configured.is_set()
+        key = msg[MSG_KEY_KEY]
+        value = msg[MSG_KEY_VALUE]
+        self.read_count += 1
+        if self.read_count % 1000 == 0:
+            now = monotonic_time()
+            delta = now - self.last_rate_log_time
+            _log.warn("Processed %s updates from driver "
+                      "%.1f/s", self.read_count, 1000.0 / delta)
+            self.last_rate_log_time = now
+        n = Node()
+        n.action = "set" if value is not None else "delete"
+        n.value = value
+        n.key = key
+        try:
+            self.dispatcher.handle_event(n)
+        except ResyncRequired:
+            _log.warning("IGNORING RESYNC.")
+
+    def _handle_config_loaded(self, msg):
+        global_config = msg[MSG_KEY_GLOBAL_CONFIG]
+        host_config = msg[MSG_KEY_HOST_CONFIG]
+        _log.info("Config loaded by driver:\n"
+                  "Global: %s\nPer-host: %s",
+                  global_config,
+                  host_config)
+        if self.configured.is_set():
+            # We've already been configured.  We don't yet support
+            # dynamic config update so instead we check if the config
+            # has changed and die if it has.
+            _log.info("Checking configuration for changes...")
+            if (host_config != self.last_host_config or
+                        global_config != self.last_global_config):
+                _log.warning("Felix configuration has changed, "
+                             "felix must restart.")
+                _log.info("Old host config: %s", self.last_host_config)
+                _log.info("New host config: %s", host_config)
+                _log.info("Old global config: %s",
+                          self.last_global_config)
+                _log.info("New global config: %s", global_config)
+                die_and_restart()
+        else:
+            # First time loading the config.  Report it to the config
+            # object.  Take copies because report_etcd_config is
+            # destructive.
+            self.last_host_config = host_config.copy()
+            self.last_global_config = global_config.copy()
+            self._config.report_etcd_config(host_config,
+                                            global_config)
+            self.configured.set()
+
     def start_driver(self):
         _log.info("Creating server socket.")
         try:
@@ -457,67 +497,11 @@ def start_driver(self):
         update_conn.sendall(msgpack.dumps({
             MSG_KEY_TYPE: MSG_TYPE_INIT,
             MSG_KEY_ETCD_URL: "http://" + self._config.ETCD_ADDR,
+            MSG_KEY_HOSTNAME: self._config.HOSTNAME,
         }))
 
         return update_conn
 
-    def _load_config(self):
-        """
-        Loads our configuration from etcd.  Does not return
-        until the config is successfully loaded.
-
-        The first call to this method populates the config object.
-
-        Subsequent calls check the config hasn't changed and kill
-        the process if it has.  This allows us to be restarted by
-        the init daemon in order to pick up the new config.
-        """
-        while True:
-            try:
-                global_cfg = self.client.read(CONFIG_DIR,
-                                              recursive=True)
-                global_dict = _build_config_dict(global_cfg)
-
-                try:
-                    host_cfg = self.client.read(self.my_config_dir,
-                                                recursive=True)
-                    host_dict = _build_config_dict(host_cfg)
-                except EtcdKeyNotFound:
-                    # It is not an error for there to be no per-host
-                    # config; default to empty.
-                    _log.info("No configuration overrides for this node")
-                    host_dict = {}
-            except (EtcdKeyNotFound, EtcdException) as e:
-                # Note: we don't log the stack trace because it's too
-                # spammy and adds little.
-                _log.error("Failed to read config. etcd may be down or "
-                           "the data model may not be ready: %r. Will "
-                           "retry.", e)
-                gevent.sleep(RETRY_DELAY)
-            else:
-                if self.configured.is_set():
-                    # We've already been configured.  We don't yet support
-                    # dynamic config update so instead we check if the config
-                    # has changed and die if it has.
-                    _log.info("Checking configuration for changes...")
-                    if (host_dict != self.last_host_config or
-                            global_dict != self.last_global_config):
-                        _log.warning("Felix configuration has changed, "
-                                     "felix must restart.")
-                        _log.info("Old host config: %s", self.last_host_config)
-                        _log.info("New host config: %s", host_dict)
-                        _log.info("Old global config: %s",
-                                  self.last_global_config)
-                        _log.info("New global config: %s", global_dict)
-                        die_and_restart()
-                else:
-                    # First time loading the config.  Report it to the config
-                    # object.  Take copies because report_etcd_config is
-                    # destructive.
-                    self.last_host_config = host_dict.copy()
-                    self.last_global_config = global_dict.copy()
-                    self._config.report_etcd_config(host_dict, global_dict)
-                return
     #
     # def _on_snapshot_loaded(self, etcd_snapshot_response):
     #     """

From 652803e0d38a45120537c1daef5b8a41183762db Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 14:03:32 +0100
Subject: [PATCH 12/98] Pass config back ot driver once it's been validated.

---
 calico/etcddriver/__main__.py |  1 +
 calico/etcddriver/driver.py   | 84 +++++++++++++++++------------------
 calico/felix/fetcd.py         | 22 +++++++--
 3 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index bf1dd819be..cba1198d7e 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -45,3 +45,4 @@
 driver = EtcdDriver(felix_sck)
 driver.start()
 driver.join()
+_log.critical("Driver shutting down.")
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index e78a009cd0..b8b9742ba7 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -31,35 +31,31 @@
   Felix about all the individual keys that are deleted.
 """
 
+import errno
 from httplib import HTTPException
+from io import BytesIO
 from json import loads
 import json
-import socket
 import logging
 from Queue import Queue, Empty
-import sys
+import socket
 from threading import Thread, Event
 import time
 from urlparse import urlparse
-from calico.monotonic import monotonic_time
 
 from ijson.backends import yajl2 as ijson
-from io import BytesIO
 import msgpack
 import urllib3
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import ReadTimeoutError
-from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, dir_for_per_host_config
 
+from calico.common import complete_logging
+from calico.monotonic import monotonic_time
+from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, dir_for_per_host_config
 from calico.etcddriver.hwm import HighWaterTracker
 
 _log = logging.getLogger(__name__)
 
-events_processed = 0
-snapshot_events = 0
-watcher_events = 0
-snap_skipped = 0
-
 FLUSH_THRESHOLD = 200
 
 MSG_KEY_TYPE = "type"
@@ -75,37 +71,18 @@
 MSG_KEY_HOST_CONFIG = "host"
 
 # Config message Felix -> Driver.
+MSG_TYPE_CONFIG = "conf"
 MSG_KEY_LOG_FILE = "log_file"
 MSG_KEY_SEV_FILE = "sev_file"
 MSG_KEY_SEV_SCREEN = "sev_screen"
 MSG_KEY_SEV_SYSLOG = "sev_syslog"
 
 MSG_TYPE_STATUS = "stat"
-MSG_TYPE_CONFIG = "conf"
 
 MSG_TYPE_UPDATE = "upd"
 MSG_KEY_KEY = "k"
 MSG_KEY_VALUE = "v"
 
-def report_status():
-    while True:
-        start_tot = events_processed
-        start_snap = snapshot_events
-        start_watch = watcher_events
-        start_skip = snap_skipped
-        time.sleep(1)
-        end_tot = events_processed
-        end_snap = snapshot_events
-        end_watch = watcher_events
-        end_skip = snap_skipped
-        _log.info(
-            "Events/s: %s Snap: %s, Watch %s, Skip: %s",
-            end_tot - start_tot,
-            end_snap - start_snap,
-            end_watch - start_watch,
-            end_skip - start_skip
-        )
-
 
 # etcd response data looks like this:
 # {u'action': u'set',
@@ -172,8 +149,10 @@ def __init__(self, felix_sck):
         # Global stop event used to signal to all threads to stop.
         self._stop_event = Event()
 
-        self._reader_thread = Thread(target=self._read_from_socket)
-        self._resync_thread = Thread(target=self._resync_and_merge)
+        self._reader_thread = Thread(target=self._read_from_socket,
+                                     name="reader-thread")
+        self._resync_thread = Thread(target=self._resync_and_merge,
+                                     name="resync-thread")
 
         self._watcher_thread = None  # Created on demand
         self._watcher_stop_event = None
@@ -196,8 +175,7 @@ def start(self):
         self._resync_thread.start()
 
     def join(self):
-        self._reader_thread.join()
-        self._resync_thread.join()
+        self._stop_event.wait()
 
     def _read_from_socket(self):
         """
@@ -211,16 +189,25 @@ def _read_from_socket(self):
             while not self._stop_event.is_set():
                 try:
                     data = self._felix_sck.recv(8092)
-                except:
-                    _log.exception("Exception reading from Felix.")
-                    raise
+                except socket.error as e:
+                    if e.errno in (errno.EAGAIN,
+                                   errno.EWOULDBLOCK,
+                                   errno.EINTR):
+                        _log.debug("Retryable error on read from Felix.")
+                        continue
+                    else:
+                        _log.error("Failed to read from Felix socket: %r", e)
+                        raise
                 if not data:
                     _log.error("No data read, assuming Felix closed socket")
                     break
                 unpacker.feed(data)
                 for msg in unpacker:
-                    if msg[MSG_KEY_TYPE] == MSG_TYPE_INIT:
+                    msg_type = msg[MSG_KEY_TYPE]
+                    if msg_type == MSG_TYPE_INIT:
                         self._handle_init(msg)
+                    elif msg_type == MSG_TYPE_CONFIG:
+                        self._handle_config(msg)
                     else:
                         _log.warning("Unexpected message from Felix")
         finally:
@@ -235,6 +222,14 @@ def _handle_init(self, msg):
         self._hostname = msg[MSG_KEY_HOSTNAME]
         self._config_loaded.set()
 
+    def _handle_config(self, msg):
+        complete_logging(msg[MSG_KEY_LOG_FILE],
+                         file_level=msg[MSG_KEY_SEV_FILE],
+                         syslog_level=msg[MSG_KEY_SEV_SYSLOG],
+                         stream_level=msg[MSG_KEY_SEV_SCREEN],
+                         gevent_in_use=False)
+        _log.info("Received config from Felix: %s", msg)
+
     def _resync_and_merge(self):
         """
         Thread: Resync-and-merge thread.  Loads the etcd snapshot, merges
@@ -402,7 +397,7 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
                     break
             if self._stop_event.is_set():
                 _log.error("Stop event set, exiting")
-                raise Stopped()
+                raise DriverShutdown()
         # Save occupancy by throwing away the deletion tracking metadata.
         self._hwms.stop_tracking_deletions()
         # Scan for deletions that happened before the snapshot.  We effectively
@@ -416,7 +411,7 @@ def _process_events_only(self):
         it to etcd.
         :raises WatcherDied:
         :raises FelixWriteFailed:
-        :raises Stopped:
+        :raises DriverShutdown:
         """
         _log.info("In sync, now processing events only...")
         while not self._stop_event.is_set():
@@ -443,7 +438,7 @@ def _scan_for_deletions(self, snapshot_index):
     def _handle_next_watcher_event(self):
         """
         Waits for an event on the watcher queue and sends it to Felix.
-        :raises Stopped:
+        :raises DriverShutdown:
         :raises WatcherDied:
         :raises FelixWriteFailed:
         """
@@ -457,7 +452,7 @@ def _handle_next_watcher_event(self):
             else:
                 break
         else:
-            raise Stopped()
+            raise DriverShutdown()
         if event is None:
             self._watcher_queue = None
             raise WatcherDied()
@@ -486,7 +481,8 @@ def _start_watcher(self, snapshot_index):
         self._watcher_thread = Thread(target=watch_etcd,
                                       args=(snapshot_index + 1,
                                             self._watcher_queue,
-                                            self._watcher_stop_event))
+                                            self._watcher_stop_event),
+                                      name="watcher-thread")
         self._watcher_thread.daemon = True
         self._watcher_thread.start()
 
@@ -580,7 +576,7 @@ class WatcherDied(Exception):
     pass
 
 
-class Stopped(Exception):
+class DriverShutdown(Exception):
     pass
 
 
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index e385bb5ff7..bc6a986624 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -31,7 +31,9 @@
 import time
 from calico.etcddriver.driver import MSG_KEY_TYPE, MSG_KEY_ETCD_URL, \
     MSG_KEY_HOSTNAME, MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, \
-    MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG
+    MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG, \
+    MSG_TYPE_CONFIG, MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE, MSG_KEY_SEV_SCREEN, \
+    MSG_KEY_SEV_SYSLOG
 from calico.etcddriver.driver import MSG_TYPE_INIT
 from calico.monotonic import monotonic_time
 
@@ -402,7 +404,7 @@ def loop(self):
                         if msg_type == MSG_TYPE_UPDATE:
                             self._handle_update(msg)
                         elif msg_type == MSG_TYPE_CONFIG_LOADED:
-                            self._handle_config_loaded(msg)
+                            self._handle_config_loaded(msg, driver_sck)
                         else:
                             raise RuntimeError("Unexpected message %s" % msg)
 
@@ -438,7 +440,7 @@ def _handle_update(self, msg):
         except ResyncRequired:
             _log.warning("IGNORING RESYNC.")
 
-    def _handle_config_loaded(self, msg):
+    def _handle_config_loaded(self, msg, driver_sck):
         global_config = msg[MSG_KEY_GLOBAL_CONFIG]
         host_config = msg[MSG_KEY_HOST_CONFIG]
         _log.info("Config loaded by driver:\n"
@@ -468,6 +470,20 @@ def _handle_config_loaded(self, msg):
             self.last_global_config = global_config.copy()
             self._config.report_etcd_config(host_config,
                                             global_config)
+            # Config now fully resolved, inform the driver.
+            felix_log_file = self._config.LOGFILE
+            if felix_log_file:
+                # FIXME PRoper config for driver logfile
+                driver_log_file = felix_log_file + "-driver"
+            else:
+                driver_log_file = None
+            driver_sck.send(msgpack.dumps({
+                MSG_KEY_TYPE: MSG_TYPE_CONFIG,
+                MSG_KEY_LOG_FILE: driver_log_file,
+                MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,
+                MSG_KEY_SEV_SCREEN: self._config.LOGLEVSCR,
+                MSG_KEY_SEV_SYSLOG: self._config.LOGLEVSYS,
+            }))
             self.configured.set()
 
     def start_driver(self):

From 1119d831cda21868afb73ea0320e59c1ccb82f90 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 14:35:28 +0100
Subject: [PATCH 13/98] Driver now sends status messages to Felix.

---
 calico/etcddriver/driver.py | 49 +++++++++++++++++++++++++++++++------
 calico/felix/fetcd.py       |  8 +++++-
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index b8b9742ba7..7c568ff001 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -78,8 +78,12 @@
 MSG_KEY_SEV_SYSLOG = "sev_syslog"
 
 MSG_TYPE_STATUS = "stat"
+MSG_KEY_STATUS = "status"
+STATUS_WAIT_FOR_READY = "wait-for-ready"
+STATUS_RESYNC = "resync"
+STATUS_IN_SYNC = "in-sync"
 
-MSG_TYPE_UPDATE = "upd"
+MSG_TYPE_UPDATE = "u"
 MSG_KEY_KEY = "k"
 MSG_KEY_VALUE = "v"
 
@@ -165,8 +169,12 @@ def __init__(self, felix_sck):
         self._first_resync = True
         self.resync_http_pool = None
 
-        # Set by the reader thread once the config has been read from Felix.
-        self._config_loaded = Event()
+        # Set by the reader thread once the init message has been received
+        # from Felix.
+        self._init_received = Event()
+        # Set by the reader thread once the logging config has been received
+        # from Felix.
+        self._config_received = Event()
         self._etcd_base_url = None
         self._hostname = None
 
@@ -215,19 +223,30 @@ def _read_from_socket(self):
             self._stop_event.set()
 
     def _handle_init(self, msg):
+        """
+        Handle init message from Felix.
+
+        Called from the reader thread.
+        """
         # OK to dump the msg, it's a one-off.
         _log.info("Got init message from Felix %s", msg)
         self._etcd_base_url = msg[MSG_KEY_ETCD_URL].rstrip("/")
         self._etcd_url_parts = urlparse(self._etcd_base_url)
         self._hostname = msg[MSG_KEY_HOSTNAME]
-        self._config_loaded.set()
+        self._init_received.set()
 
     def _handle_config(self, msg):
+        """
+        Handle config message from Felix.
+
+        Called from the reader thread.
+        """
         complete_logging(msg[MSG_KEY_LOG_FILE],
                          file_level=msg[MSG_KEY_SEV_FILE],
                          syslog_level=msg[MSG_KEY_SEV_SYSLOG],
                          stream_level=msg[MSG_KEY_SEV_SCREEN],
                          gevent_in_use=False)
+        self._config_received.set()
         _log.info("Received config from Felix: %s", msg)
 
     def _resync_and_merge(self):
@@ -237,7 +256,7 @@ def _resync_and_merge(self):
         to Felix.
         """
         _log.info("Resync thread started, waiting for config to be loaded...")
-        self._config_loaded.wait()
+        self._init_received.wait()
         _log.info("Config loaded; continuing.")
 
         while not self._stop_event.is_set():
@@ -251,16 +270,22 @@ def _resync_and_merge(self):
                 # state.
                 self.resync_http_pool = self.get_etcd_connection()
                 # Before we get to the snapshot, Felix needs the configuration.
+                self._queue_status(STATUS_WAIT_FOR_READY)
                 self._wait_for_ready()
                 self._preload_config()
-                # Kick off the snapshot  request as far as the headers.
+                # Now (on the first run through) wait for Felix to process the
+                # config.
+                self._config_received.wait()
+                # Kick off the snapshot request as far as the headers.
+                self._queue_status(STATUS_RESYNC)
                 resp, snapshot_index = self._start_snapshot_request()
                 # Before reading from the snapshot, start the watcher thread.
                 self._start_watcher(snapshot_index)
-                # Then plough through the update incrementally.
                 # Incrementally process the snapshot, merging in events from
                 # the queue.
                 self._process_snapshot_and_events(resp, snapshot_index)
+                # We're now in-sync.  Tell Felix.
+                self._queue_status(STATUS_IN_SYNC)
                 # Make sure we flush before we wait for events.
                 self._flush()
                 self._process_events_only()
@@ -510,6 +535,16 @@ def _queue_update(self, key, value):
             MSG_KEY_KEY: key,
             MSG_KEY_VALUE: value,
         }))
+        self._maybe_flush()
+
+    def _queue_status(self, status):
+        self._buf.write(msgpack.dumps({
+            MSG_KEY_TYPE: MSG_TYPE_STATUS,
+            MSG_KEY_STATUS: status,
+        }))
+        self._maybe_flush()
+
+    def _maybe_flush(self):
         self._updates_pending += 1
         if self._updates_pending > FLUSH_THRESHOLD:
             self._flush()
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index bc6a986624..3ca8a2f2e8 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -33,7 +33,7 @@
     MSG_KEY_HOSTNAME, MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, \
     MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG, \
     MSG_TYPE_CONFIG, MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE, MSG_KEY_SEV_SCREEN, \
-    MSG_KEY_SEV_SYSLOG
+    MSG_KEY_SEV_SYSLOG, MSG_KEY_STATUS, MSG_TYPE_STATUS
 from calico.etcddriver.driver import MSG_TYPE_INIT
 from calico.monotonic import monotonic_time
 
@@ -405,6 +405,8 @@ def loop(self):
                             self._handle_update(msg)
                         elif msg_type == MSG_TYPE_CONFIG_LOADED:
                             self._handle_config_loaded(msg, driver_sck)
+                        elif msg_type == MSG_TYPE_STATUS:
+                            self._handle_status(msg)
                         else:
                             raise RuntimeError("Unexpected message %s" % msg)
 
@@ -486,6 +488,10 @@ def _handle_config_loaded(self, msg, driver_sck):
             }))
             self.configured.set()
 
+    def _handle_status(self, msg):
+        status = msg[MSG_KEY_STATUS]
+        _log.info("etcd driver status changed to %s", status)
+
     def start_driver(self):
         _log.info("Creating server socket.")
         try:

From 4360085f94a6534e23b48eea32d0496cf75a4691 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 15:07:00 +0100
Subject: [PATCH 14/98] Minor cleanups: logging and comments.

---
 calico/etcddriver/driver.py | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 7c568ff001..41d7c94726 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -153,11 +153,13 @@ def __init__(self, felix_sck):
         # Global stop event used to signal to all threads to stop.
         self._stop_event = Event()
 
+        # Threads to own the connection from/to Felix.  The resync thread
+        # is responsible for doing resyncs and merging updates from the
+        # watcher thread (which it manages).
         self._reader_thread = Thread(target=self._read_from_socket,
                                      name="reader-thread")
         self._resync_thread = Thread(target=self._resync_and_merge,
                                      name="resync-thread")
-
         self._watcher_thread = None  # Created on demand
         self._watcher_stop_event = None
 
@@ -167,22 +169,25 @@ def __init__(self, felix_sck):
         self._updates_pending = 0
         self._buf = BytesIO()
         self._first_resync = True
-        self.resync_http_pool = None
+        self._resync_http_pool = None
 
         # Set by the reader thread once the init message has been received
         # from Felix.
         self._init_received = Event()
-        # Set by the reader thread once the logging config has been received
-        # from Felix.
-        self._config_received = Event()
+        # Initial config, received in the init message.
         self._etcd_base_url = None
         self._hostname = None
+        # Set by the reader thread once the logging config has been received
+        # from Felix.  Triggers the first resync.
+        self._config_received = Event()
 
     def start(self):
+        """Starts the driver's reader and resync threads."""
         self._reader_thread.start()
         self._resync_thread.start()
 
     def join(self):
+        """Blocks until the driver stops."""
         self._stop_event.wait()
 
     def _read_from_socket(self):
@@ -268,7 +273,7 @@ def _resync_and_merge(self):
             try:
                 # Start with a fresh HTTP pool just in case it got into a bad
                 # state.
-                self.resync_http_pool = self.get_etcd_connection()
+                self._resync_http_pool = self.get_etcd_connection()
                 # Before we get to the snapshot, Felix needs the configuration.
                 self._queue_status(STATUS_WAIT_FOR_READY)
                 self._wait_for_ready()
@@ -309,10 +314,14 @@ def _resync_and_merge(self):
                 self._first_resync = False
 
     def _wait_for_ready(self):
+        """
+        Waits for the global Ready flag to be set.  We don't load the first
+        snapshot until that flag is set.
+        """
         ready = False
         while not ready:
             # Read failure here will be handled by outer loop.
-            resp = self.resync_http_pool.request(
+            resp = self._resync_http_pool.request(
                 "GET",
                 self._etcd_base_url + "/v2/keys" + READY_KEY,
                 timeout=5,
@@ -326,6 +335,10 @@ def _wait_for_ready(self):
                 time.sleep(1)
 
     def _preload_config(self):
+        """
+        Loads the config for Felix from etcd and sends it to Felix as a
+        dedicated message.
+        """
         _log.info("Pre-loading config.")
         global_config = self._load_config(CONFIG_DIR)
         host_config_dir = dir_for_per_host_config(self._hostname)
@@ -341,8 +354,11 @@ def _preload_config(self):
         _log.info("Sent config message to Felix.")
 
     def _load_config(self, config_dir):
+        """
+        Loads all the config keys from the given etcd directory.
+        """
         # Read failure here will be handled by outer loop.
-        resp = self.resync_http_pool.request(
+        resp = self._resync_http_pool.request(
             "GET",
             self._etcd_base_url + "/v2/keys" + config_dir,
             fields={
@@ -377,7 +393,7 @@ def _start_snapshot_request(self):
         :raises socket.error
         """
         _log.info("Loading snapshot headers...")
-        resp = self.resync_http_pool.request(
+        resp = self._resync_http_pool.request(
             "GET",
             self._etcd_base_url + "/v2/keys/calico/v1",
             fields={"recursive": "true"},
@@ -459,6 +475,7 @@ def _scan_for_deletions(self, snapshot_index):
             # We didn't see the value during the snapshot or via
             # the event queue.  It must have been deleted.
             self._queue_update(ev_key, None)
+        _log.info("Found %d deleted keys", len(deleted_keys))
 
     def _handle_next_watcher_event(self):
         """

From 8c5cd41f33eb645628405ebc6f850f45e1fe77c6 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 15:07:15 +0100
Subject: [PATCH 15/98] Handle errors from etcd when loading snapshot.

---
 calico/etcddriver/driver.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 41d7c94726..cf4b4ac3dc 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -583,6 +583,9 @@ def _flush(self):
 
 
 def parse_snapshot(resp):
+    if resp.status != 200:
+        raise ResyncRequired("Read from etcd failed.  HTTP status code %s",
+                             resp.status)
     parser = ijson.parse(resp)  # urllib3 response is file-like.
     stack = []
     frame = Node()
@@ -597,10 +600,13 @@ def parse_snapshot(resp):
                 continue
             if frame.current_key == "modifiedIndex":
                 frame.modifiedIndex = value
-            if frame.current_key == "key":
+            elif frame.current_key == "key":
                 frame.key = value
             elif frame.current_key == "value":
                 frame.value = value
+            elif frame.current_key == "errorCode":
+                raise ResyncRequired("Error from etcd, etcd error code %s",
+                                     value)
             if (frame.key is not None and
                     frame.value is not None and
                     frame.modifiedIndex is not None):

From 373a57a948b4a37f96981c2a02ff00e042cfbf46 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 15:48:58 +0100
Subject: [PATCH 16/98] Handle HTTP and etcd errors in watcher loop.

---
 calico/etcddriver/driver.py | 120 ++++++++++++++++++++++++------------
 1 file changed, 79 insertions(+), 41 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index cf4b4ac3dc..25f3b35e83 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -106,46 +106,6 @@
 #                            u'endpoint_175/endpoint/endpoint_175'}}
 
 
-def watch_etcd(next_index, result_queue, stop_event):
-    _log.info("Watcher thread started")
-    http = None
-    try:
-        while not stop_event.is_set():
-            if not http:
-                _log.info("No HTTP pool, creating one...")
-                http = HTTPConnectionPool("localhost", 4001, maxsize=1)
-            try:
-                _log.debug("Waiting on etcd index %s", next_index)
-                resp = http.request(
-                    "GET",
-                    "http://localhost:4001/v2/keys/calico/v1",
-                    fields={"recursive": "true",
-                            "wait": "true",
-                            "waitIndex": next_index},
-                    timeout=90,
-                )
-                resp_body = loads(resp.data)
-            except ReadTimeoutError:
-                _log.exception("Watch read timed out, restarting watch at "
-                               "index %s", next_index)
-                # Workaround urllib3 bug #718.  After a ReadTimeout, the
-                # connection is incorrectly recycled.
-                http = None
-                continue
-            except:
-                _log.exception("Unexpected exception")
-                raise
-            else:
-                node = resp_body["node"]
-                key = node["key"]
-                value = node.get("value")
-                modified_index = node["modifiedIndex"]
-                result_queue.put((modified_index, key, value))
-                next_index = modified_index + 1
-    finally:
-        result_queue.put(None)
-
-
 class EtcdDriver(object):
     def __init__(self, felix_sck):
         self._felix_sck = felix_sck
@@ -520,7 +480,7 @@ def _start_watcher(self, snapshot_index):
         # will always access the current queue and event.  If it used self.xyz
         # to access them then an old thread that is shutting down could access
         # a new queue.
-        self._watcher_thread = Thread(target=watch_etcd,
+        self._watcher_thread = Thread(target=self.watch_etcd,
                                       args=(snapshot_index + 1,
                                             self._watcher_queue,
                                             self._watcher_stop_event),
@@ -581,8 +541,86 @@ def _flush(self):
             self._buf = BytesIO()
         self._updates_pending = 0
 
+    def watch_etcd(self, next_index, event_queue, stop_event):
+        """
+        Thread: etcd watcher thread.  Watched etcd for changes and
+        sends them over the queue to the resync thread, which owns
+        the socket to Felix.
+
+        Note: it is important that we pass the index, queue and event
+        as parameters to ensure that each watcher thread only touches
+        the versions of those values that were created for it as
+        opposed to a later-created watcher thread.
+
+        :param next_index: The etcd index to start watching from.
+        :param event_queue: Queue of updates back to the resync thread.
+        :param stop_event: Event used to stop this thread when it is no
+               longer needed.
+        """
+        _log.info("Watcher thread started")
+        http = None
+        try:
+            while not stop_event.is_set():
+                if not http:
+                    _log.info("No HTTP pool, creating one...")
+                    http = HTTPConnectionPool("localhost", 4001, maxsize=1)
+                try:
+                    _log.debug("Waiting on etcd index %s", next_index)
+                    resp = http.request(
+                        "GET",
+                        "http://localhost:4001/v2/keys/calico/v1",
+                        fields={"recursive": "true",
+                                "wait": "true",
+                                "waitIndex": next_index},
+                        timeout=90,
+                    )
+                    if resp.status != 200:
+                        _log.warning("etcd watch returned bad HTTP status: %s",
+                                     resp.status)
+                    resp_body = resp.data
+                except ReadTimeoutError:
+                    _log.exception("Watch read timed out, restarting watch at "
+                                   "index %s", next_index)
+                    # Workaround urllib3 bug #718.  After a ReadTimeout, the
+                    # connection is incorrectly recycled.
+                    http = None
+                    continue
+                try:
+                    etcd_resp = loads(resp_body)
+                    if "errorCode" in etcd_resp:
+                        _log.error("Error from etcd: %s; triggering a resync.",
+                                   etcd_resp)
+                        break
+                    node = etcd_resp["node"]
+                    key = node["key"]
+                    value = node.get("value")
+                    modified_index = node["modifiedIndex"]
+                except (KeyError, TypeError, ValueError):
+                    _log.exception("Unexpected format for etcd response: %r;"
+                                   "trigering a resync.",
+                                   resp_body)
+                    break
+                else:
+                    event_queue.put((modified_index, key, value))
+                    next_index = modified_index + 1
+        except:
+            _log.exception("Exception finishing watcher thread.")
+            raise
+        finally:
+            # Signal to the resync thread that we've exited.
+            _log.info("Watcher thread finished. Signalling to resync thread.")
+            event_queue.put(None)
+
 
 def parse_snapshot(resp):
+    """
+    Generator: iteratively parses the response to the etcd snapshot.
+
+    Generates tuples of the form (modifiedIndex, key, value) for each
+    leaf encountered in the snapshot.
+
+    :raises ResyncRequired if the snapshot contains an error response.
+    """
     if resp.status != 200:
         raise ResyncRequired("Read from etcd failed.  HTTP status code %s",
                              resp.status)

From 67883613c9e3ee62d3d4f48b75dda048c43a81d1 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 17:38:11 +0100
Subject: [PATCH 17/98] Check cluster ID on etcd responses.

---
 calico/etcddriver/driver.py | 44 +++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 25f3b35e83..159b9a43a0 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -130,6 +130,7 @@ def __init__(self, felix_sck):
         self._buf = BytesIO()
         self._first_resync = True
         self._resync_http_pool = None
+        self._cluster_id = None
 
         # Set by the reader thread once the init message has been received
         # from Felix.
@@ -287,6 +288,7 @@ def _wait_for_ready(self):
                 timeout=5,
                 preload_content=True
             )
+            self._check_cluster_id(resp)
             try:
                 etcd_resp = json.loads(resp.data)
                 ready = etcd_resp["node"]["value"] == "true"
@@ -327,6 +329,7 @@ def _load_config(self, config_dir):
             timeout=5,
             preload_content=True
         )
+        self._check_cluster_id(resp)
         try:
             etcd_resp = json.loads(resp.data)
             if etcd_resp.get("errorCode") == 100:  # Not found
@@ -351,6 +354,7 @@ def _start_snapshot_request(self):
         :raises HTTPException
         :raises HTTPError
         :raises socket.error
+        :raises DriverShutdown if the etcd cluster ID changes.
         """
         _log.info("Loading snapshot headers...")
         resp = self._resync_http_pool.request(
@@ -361,10 +365,40 @@ def _start_snapshot_request(self):
             preload_content=False
         )
         snapshot_index = int(resp.getheader("x-etcd-index", 1))
+        self._check_cluster_id(resp)
+        if not self._cluster_id:
+            _log.error("Snapshot response did not contain cluster ID, "
+                       "resyncing to avoid inconsistency")
+            raise ResyncRequired()
         _log.info("Got snapshot headers, snapshot index is %s; starting "
                   "watcher...", snapshot_index)
         return resp, snapshot_index
 
+    def _check_cluster_id(self, resp):
+        """
+        Checks the x-etcd-cluster-id header for changes since the last call.
+
+        On change, stops the driver and raises DriverShutdown.
+        :param resp: urllib3 Response object.
+        """
+        cluster_id = resp.getheader("x-etcd-cluster-id")
+        if cluster_id:
+            if self._cluster_id:
+                if self._cluster_id != cluster_id:
+                    _log.error("etcd cluster ID changed from %s to %s.  "
+                               "This invalidates our local state so Felix "
+                               "must restart.", self._cluster_id, cluster_id)
+                    self._stop_event.set()
+                    raise DriverShutdown()
+            else:
+                _log.info("First successful read from etcd.  Cluster ID: %s",
+                          cluster_id)
+                self._cluster_id = cluster_id
+        else:
+            # Missing on certain error responses.
+            _log.warning("etcd response was missing cluster ID header, unable "
+                         "to check cluster ID")
+
     def _process_snapshot_and_events(self, etcd_response, snapshot_index):
         """
         Processes the etcd snapshot response incrementally while, concurrently,
@@ -543,10 +577,12 @@ def _flush(self):
 
     def watch_etcd(self, next_index, event_queue, stop_event):
         """
-        Thread: etcd watcher thread.  Watched etcd for changes and
+        Thread: etcd watcher thread.  Watches etcd for changes and
         sends them over the queue to the resync thread, which owns
         the socket to Felix.
 
+        Dies if it receives an error from etcd.
+
         Note: it is important that we pass the index, queue and event
         as parameters to ensure that each watcher thread only touches
         the versions of those values that were created for it as
@@ -573,11 +609,15 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                                 "wait": "true",
                                 "waitIndex": next_index},
                         timeout=90,
+                        # Don't pre-load so we can check the cluster ID before
+                        # we wait for the body.
+                        preload_content=False,
                     )
                     if resp.status != 200:
                         _log.warning("etcd watch returned bad HTTP status: %s",
                                      resp.status)
-                    resp_body = resp.data
+                    self._check_cluster_id(resp)
+                    resp_body = resp.data  # Force read inside try block.
                 except ReadTimeoutError:
                     _log.exception("Watch read timed out, restarting watch at "
                                    "index %s", next_index)

From 59881a8c0d289431390afe6730f8ad11210f40f1 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 17:40:49 +0100
Subject: [PATCH 18/98] Suppress error when reads time out on a watch.

---
 calico/etcddriver/driver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 159b9a43a0..93f5e9fe2b 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -619,8 +619,8 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     self._check_cluster_id(resp)
                     resp_body = resp.data  # Force read inside try block.
                 except ReadTimeoutError:
-                    _log.exception("Watch read timed out, restarting watch at "
-                                   "index %s", next_index)
+                    _log.debug("Watch read timed out, restarting watch at "
+                               "index %s", next_index)
                     # Workaround urllib3 bug #718.  After a ReadTimeout, the
                     # connection is incorrectly recycled.
                     http = None

From 6eaab109d48c3f3ae93b531806ba9b93696c65ad Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 21 Oct 2015 17:57:04 +0100
Subject: [PATCH 19/98] PReliminary requirements update.

---
 felix_requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/felix_requirements.txt b/felix_requirements.txt
index 12ebfaf463..700c0e6c6e 100644
--- a/felix_requirements.txt
+++ b/felix_requirements.txt
@@ -3,3 +3,7 @@ greenlet
 netaddr
 python-etcd>=0.4.1
 posix-spawn>=0.2.post6
+PyTrie
+datrie
+ijson
+msgpack-python

From 70dec6973c087e0c9fe0906e26034c0f0068f074 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 10:54:40 +0100
Subject: [PATCH 20/98] Allow ':' and '.' in etcd keys.

---
 calico/etcddriver/hwm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index b761a58de5..e1529216b9 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -30,7 +30,7 @@
 _log = logging.getLogger(__name__)
 
 
-TRIE_CHARS = string.ascii_letters + string.digits + "/_-"
+TRIE_CHARS = string.ascii_letters + string.digits + "/_-:."
 TRIE_CHARS_MATCH = re.compile(r'^[%s]+$' % re.escape(TRIE_CHARS))
 
 

From ad968cf055d8607ac8bcccf9975930c0f2946cc1 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 13:19:02 +0100
Subject: [PATCH 21/98] Do not handle any updates from driver until
 begin_polling is set.

---
 calico/felix/fetcd.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 3ca8a2f2e8..756b254a99 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -402,6 +402,7 @@ def loop(self):
                         # block because it's on the critical path.
                         msg_type = msg[MSG_KEY_TYPE]
                         if msg_type == MSG_TYPE_UPDATE:
+                            self.begin_polling.wait()
                             self._handle_update(msg)
                         elif msg_type == MSG_TYPE_CONFIG_LOADED:
                             self._handle_config_loaded(msg, driver_sck)

From 4ecd913e51e64d6b1fcdcdf7a8508e2e66df0d2c Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 13:20:17 +0100
Subject: [PATCH 22/98] In driver, trigger resync if ready key is unset.

---
 calico/etcddriver/driver.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 93f5e9fe2b..eafc354264 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -541,6 +541,9 @@ def _queue_update(self, key, value):
         Queues an update message to Felix.
         :raises FelixWriteFailed:
         """
+        if key == READY_KEY and value != "true":
+            _log.warning("Ready key no longer set to true, triggering resync.")
+            raise ResyncRequired()
         self._buf.write(msgpack.dumps({
             MSG_KEY_TYPE: MSG_TYPE_UPDATE,
             MSG_KEY_KEY: key,

From 8469f744e1083b8a7759a1ba0169c5af6390637f Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 13:33:44 +0100
Subject: [PATCH 23/98] Pull out protocol constants into new file.

---
 calico/etcddriver/driver.py   | 49 +++++++++------------------------
 calico/etcddriver/protocol.py | 51 +++++++++++++++++++++++++++++++++++
 calico/felix/fetcd.py         |  7 +----
 3 files changed, 64 insertions(+), 43 deletions(-)
 create mode 100644 calico/etcddriver/protocol.py

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index eafc354264..df7697de8a 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -50,6 +50,7 @@
 from urllib3.exceptions import ReadTimeoutError
 
 from calico.common import complete_logging
+from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, dir_for_per_host_config
 from calico.etcddriver.hwm import HighWaterTracker
@@ -58,35 +59,6 @@
 
 FLUSH_THRESHOLD = 200
 
-MSG_KEY_TYPE = "type"
-
-# Init message Felix -> Driver.
-MSG_TYPE_INIT = "init"
-MSG_KEY_ETCD_URL = "etcd_url"
-MSG_KEY_HOSTNAME = "hostname"
-
-# Config loaded message Driver -> Felix.
-MSG_TYPE_CONFIG_LOADED = "config_loaded"
-MSG_KEY_GLOBAL_CONFIG = "global"
-MSG_KEY_HOST_CONFIG = "host"
-
-# Config message Felix -> Driver.
-MSG_TYPE_CONFIG = "conf"
-MSG_KEY_LOG_FILE = "log_file"
-MSG_KEY_SEV_FILE = "sev_file"
-MSG_KEY_SEV_SCREEN = "sev_screen"
-MSG_KEY_SEV_SYSLOG = "sev_syslog"
-
-MSG_TYPE_STATUS = "stat"
-MSG_KEY_STATUS = "status"
-STATUS_WAIT_FOR_READY = "wait-for-ready"
-STATUS_RESYNC = "resync"
-STATUS_IN_SYNC = "in-sync"
-
-MSG_TYPE_UPDATE = "u"
-MSG_KEY_KEY = "k"
-MSG_KEY_VALUE = "v"
-
 
 # etcd response data looks like this:
 # {u'action': u'set',
@@ -412,7 +384,7 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
             if snap_mod > old_hwm:
                 # This specific key's HWM is newer than the previous
                 # version we've seen, send an update.
-                self._queue_update(snap_key, snap_value)
+                self._on_key_updated(snap_key, snap_value)
 
             # After we process an update from the snapshot, process
             # several updates from the watcher queue (if there are
@@ -468,7 +440,7 @@ def _scan_for_deletions(self, snapshot_index):
         for ev_key in deleted_keys:
             # We didn't see the value during the snapshot or via
             # the event queue.  It must have been deleted.
-            self._queue_update(ev_key, None)
+            self._on_key_updated(ev_key, None)
         _log.info("Found %d deleted keys", len(deleted_keys))
 
     def _handle_next_watcher_event(self):
@@ -496,13 +468,13 @@ def _handle_next_watcher_event(self):
         if ev_val is not None:
             # Normal update.
             self._hwms.update_hwm(ev_key, ev_mod)
-            self._queue_update(ev_key, ev_val)
+            self._on_key_updated(ev_key, ev_val)
         else:
             # Deletion.
             deleted_keys = self._hwms.store_deletion(ev_key,
                                                      ev_mod)
             for child_key in deleted_keys:
-                self._queue_update(child_key, None)
+                self._on_key_updated(child_key, None)
 
     def _start_watcher(self, snapshot_index):
         """
@@ -536,14 +508,17 @@ def get_etcd_connection(self):
                                   self._etcd_url_parts.port or 2379,
                                   maxsize=1)
 
-    def _queue_update(self, key, value):
+    def _on_key_updated(self, key, value):
+        if key == READY_KEY and value != "true":
+            _log.warning("Ready key no longer set to true, triggering resync.")
+            raise ResyncRequired()
+        self._queue_update_msg(key, value)
+
+    def _queue_update_msg(self, key, value):
         """
         Queues an update message to Felix.
         :raises FelixWriteFailed:
         """
-        if key == READY_KEY and value != "true":
-            _log.warning("Ready key no longer set to true, triggering resync.")
-            raise ResyncRequired()
         self._buf.write(msgpack.dumps({
             MSG_KEY_TYPE: MSG_TYPE_UPDATE,
             MSG_KEY_KEY: key,
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
new file mode 100644
index 0000000000..105314947d
--- /dev/null
+++ b/calico/etcddriver/protocol.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.protocol
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Protocol constants for Felix <-> Driver protocol.
+"""
+
+MSG_KEY_TYPE = "type"
+
+# Init message Felix -> Driver.
+MSG_TYPE_INIT = "init"
+MSG_KEY_ETCD_URL = "etcd_url"
+MSG_KEY_HOSTNAME = "hostname"
+
+# Config loaded message Driver -> Felix.
+MSG_TYPE_CONFIG_LOADED = "config_loaded"
+MSG_KEY_GLOBAL_CONFIG = "global"
+MSG_KEY_HOST_CONFIG = "host"
+
+# Config message Felix -> Driver.
+MSG_TYPE_CONFIG = "conf"
+MSG_KEY_LOG_FILE = "log_file"
+MSG_KEY_SEV_FILE = "sev_file"
+MSG_KEY_SEV_SCREEN = "sev_screen"
+MSG_KEY_SEV_SYSLOG = "sev_syslog"
+
+# Status message Driver -> Felix.
+MSG_TYPE_STATUS = "stat"
+MSG_KEY_STATUS = "status"
+STATUS_WAIT_FOR_READY = "wait-for-ready"
+STATUS_RESYNC = "resync"
+STATUS_IN_SYNC = "in-sync"
+
+# Update message Driver -> Felix.
+MSG_TYPE_UPDATE = "u"
+MSG_KEY_KEY = "k"
+MSG_KEY_VALUE = "v"
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 756b254a99..76c452f97f 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -29,12 +29,7 @@
 import subprocess
 import msgpack
 import time
-from calico.etcddriver.driver import MSG_KEY_TYPE, MSG_KEY_ETCD_URL, \
-    MSG_KEY_HOSTNAME, MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, \
-    MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG, \
-    MSG_TYPE_CONFIG, MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE, MSG_KEY_SEV_SCREEN, \
-    MSG_KEY_SEV_SYSLOG, MSG_KEY_STATUS, MSG_TYPE_STATUS
-from calico.etcddriver.driver import MSG_TYPE_INIT
+from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 
 from etcd import EtcdException, EtcdKeyNotFound

From 2f417096e22c80b39f65b1098b58d8cfb503b887 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 14:17:39 +0100
Subject: [PATCH 24/98] Cleanup: remove now-unused code.

We no longer need to handle directory deletions in Felix because the driver does that for us.
---
 calico/etcddriver/driver.py     |  34 ++++----
 calico/felix/fetcd.py           | 134 +++++++-------------------------
 calico/felix/test/test_fetcd.py | 121 ----------------------------
 felix_requirements.txt          |   1 -
 4 files changed, 40 insertions(+), 250 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index df7697de8a..fe61b5ffed 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -34,8 +34,10 @@
 import errno
 from httplib import HTTPException
 from io import BytesIO
-from json import loads
-import json
+try:
+    import simplejson as json
+except ImportError:
+    import json
 import logging
 from Queue import Queue, Empty
 import socket
@@ -60,24 +62,6 @@
 FLUSH_THRESHOLD = 200
 
 
-# etcd response data looks like this:
-# {u'action': u'set',
-#      u'node': {u'createdIndex': 2095663, u'modifiedIndex': 2095663,
-#                u'value': u'{"name": "tap000174", "profile_id": "prof-174", '
-#                          u'"state": "active", "ipv6_nets": [], '
-#                          u'"mac": "63:4e:60:d9:91:a6", "ipv4_nets": '
-#                          u'["1.0.0.174/32"]}',
-#                u'key': u'/calico/v1/host/host_bloop/workload/orch/'
-#                        u'endpoint_175/endpoint/endpoint_175'},
-#      u'prevNode': {u'createdIndex': 2025647, u'modifiedIndex': 2025647,
-#                    u'value': u'{"name": "tap000174", "profile_id": '
-#                              u'"prof-174", "state": "active", '
-#                              u'"ipv6_nets": [], "mac": "37:95:03:e2:f3:6c", '
-#                              u'"ipv4_nets": ["1.0.0.174/32"]}',
-#                    u'key': u'/calico/v1/host/host_bloop/workload/orch/'
-#                            u'endpoint_175/endpoint/endpoint_175'}}
-
-
 class EtcdDriver(object):
     def __init__(self, felix_sck):
         self._felix_sck = felix_sck
@@ -509,6 +493,14 @@ def get_etcd_connection(self):
                                   maxsize=1)
 
     def _on_key_updated(self, key, value):
+        """
+        Called when we've worked out that a key  ahs been updated/deleted.
+
+        Deos any local processing and sends the update to Felix.
+        :param str key: The etcd key that has changed.
+        :param str|NoneType value: the new value of the key (None indicates
+               deletion).
+        """
         if key == READY_KEY and value != "true":
             _log.warning("Ready key no longer set to true, triggering resync.")
             raise ResyncRequired()
@@ -604,7 +596,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     http = None
                     continue
                 try:
-                    etcd_resp = loads(resp_body)
+                    etcd_resp = json.loads(resp_body)
                     if "errorCode" in etcd_resp:
                         _log.error("Error from etcd: %s; triggering a resync.",
                                    etcd_resp)
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 76c452f97f..efc7825a5d 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -19,7 +19,6 @@
 Our API to etcd.  Contains function to synchronize felix with etcd
 as well as reporting our status into etcd.
 """
-from collections import defaultdict
 import functools
 import os
 import random
@@ -39,7 +38,7 @@
 
 from calico import common
 from calico.common import ValidationFailed, validate_ip_addr, canonicalise_ip
-from calico.datamodel_v1 import (VERSION_DIR, READY_KEY, CONFIG_DIR,
+from calico.datamodel_v1 import (VERSION_DIR, CONFIG_DIR,
                                  RULES_KEY_RE, TAGS_KEY_RE,
                                  dir_for_per_host_config,
                                  PROFILE_DIR, HOST_DIR, EndpointId, POLICY_DIR,
@@ -55,8 +54,6 @@
 from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
                                  iso_utc_timestamp, IPV4, IPV6)
 
-from pytrie import Trie
-
 _log = logging.getLogger(__name__)
 
 
@@ -93,7 +90,6 @@
     POOL_V4_DIR,
 ]
 
-trie = Trie()
 
 class EtcdAPI(EtcdClientOwner, Actor):
     """
@@ -311,10 +307,6 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         # Polling state initialized at poll start time.
         self.splitter = None
 
-        # Cache of known endpoints, used to resolve deletions of whole
-        # directory trees.
-        self.endpoint_ids_per_host = defaultdict(set)
-
         # Next-hop IP addresses of our hosts, if populated in etcd.
         self.ipv4_by_hostname = {}
 
@@ -337,20 +329,13 @@ def _register_paths(self):
         # resync.
         for key in RESYNC_KEYS:
             reg(key, on_del=self._resync)
-        reg(READY_KEY, on_set=self.on_ready_flag_set, on_del=self._resync)
         # Profiles and their contents.
-        reg(PER_PROFILE_DIR, on_del=self.on_profile_delete)
         reg(TAGS_KEY, on_set=self.on_tags_set, on_del=self.on_tags_delete)
         reg(RULES_KEY, on_set=self.on_rules_set, on_del=self.on_rules_delete)
         # Hosts, workloads and endpoints.
-        reg(PER_HOST_DIR, on_del=self.on_host_delete)
         reg(HOST_IP_KEY,
             on_set=self.on_host_ip_set,
             on_del=self.on_host_ip_delete)
-        reg(WORKLOAD_DIR, on_del=self.on_host_delete)
-        reg(PER_ORCH_DIR, on_del=self.on_orch_delete)
-        reg(PER_WORKLOAD_DIR, on_del=self.on_workload_delete)
-        reg(ENDPOINT_DIR, on_del=self.on_workload_delete)
         reg(PER_ENDPOINT_KEY,
             on_set=self.on_endpoint_set, on_del=self.on_endpoint_delete)
         reg(CIDR_V4_KEY,
@@ -362,11 +347,11 @@ def _register_paths(self):
         # explicitly set to the default, say), Felix terminates allowing the
         # init daemon to restart it.
         reg(CONFIG_PARAM_KEY,
-            on_set=self._resync,
-            on_del=self._resync)
+            on_set=self._on_config_updated,
+            on_del=self._on_config_updated)
         reg(PER_HOST_CONFIG_PARAM_KEY,
-            on_set=self._resync,
-            on_del=self._resync)
+            on_set=self._on_host_config_updated,
+            on_del=self._on_host_config_updated)
 
     @logging_exceptions
     def _run(self):
@@ -387,8 +372,6 @@ def loop(self):
 
                 driver_sck = self.start_driver()
                 unpacker = msgpack.Unpacker()
-                read_count = 0
-                last_time = monotonic_time()
                 while True:
                     data = driver_sck.recv(16384)
                     unpacker.feed(data)
@@ -426,7 +409,7 @@ def _handle_update(self, msg):
         if self.read_count % 1000 == 0:
             now = monotonic_time()
             delta = now - self.last_rate_log_time
-            _log.warn("Processed %s updates from driver "
+            _log.info("Processed %s updates from driver "
                       "%.1f/s", self.read_count, 1000.0 / delta)
             self.last_rate_log_time = now
         n = Node()
@@ -451,7 +434,7 @@ def _handle_config_loaded(self, msg, driver_sck):
             # has changed and die if it has.
             _log.info("Checking configuration for changes...")
             if (host_config != self.last_host_config or
-                        global_config != self.last_global_config):
+                    global_config != self.last_global_config):
                 _log.warning("Felix configuration has changed, "
                              "felix must restart.")
                 _log.info("Old host config: %s", self.last_host_config)
@@ -492,7 +475,7 @@ def start_driver(self):
         _log.info("Creating server socket.")
         try:
             os.unlink("/run/felix-driver.sck")
-        except:
+        except OSError:
             pass
         update_socket = socket.socket(socket.AF_UNIX,
                                       socket.SOCK_STREAM)
@@ -507,7 +490,7 @@ def start_driver(self):
         # No longer need the server socket, remove it.
         try:
             os.unlink("/run/felix-driver.sck")
-        except:
+        except OSError:
             _log.exception("Failed to unlink socket")
         else:
             _log.info("Unlinked server socket")
@@ -650,17 +633,12 @@ def _resync(self, response, **kwargs):
         _log.warning("Resync triggered due to change to %s", response.key)
         raise ResyncRequired()
 
-    def on_ready_flag_set(self, response):
-        if response.value != "true":
-            raise ResyncRequired()
-
     def on_endpoint_set(self, response, hostname, orchestrator,
                         workload_id, endpoint_id):
         """Handler for endpoint updates, passes the update to the splitter."""
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s updated", combined_id)
-        #self.endpoint_ids_per_host[combined_id.host].add(combined_id)
         endpoint = parse_endpoint(self._config, combined_id, response.value)
         self.splitter.on_endpoint_update(combined_id, endpoint, async=True)
 
@@ -670,9 +648,6 @@ def on_endpoint_delete(self, response, hostname, orchestrator,
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s deleted", combined_id)
-        #self.endpoint_ids_per_host[combined_id.host].discard(combined_id)
-        # if not self.endpoint_ids_per_host[combined_id.host]:
-        #     del self.endpoint_ids_per_host[combined_id.host]
         self.splitter.on_endpoint_update(combined_id, None, async=True)
 
     def on_rules_set(self, response, profile_id):
@@ -699,30 +674,6 @@ def on_tags_delete(self, response, profile_id):
         _log.debug("Tags for %s deleted", profile_id)
         self.splitter.on_tags_update(profile_id, None, async=True)
 
-    def on_profile_delete(self, response, profile_id):
-        """
-        Handler for a whole profile deletion
-
-        Fakes a tag and rules delete.
-        """
-        # Fake deletes for the rules and tags.
-        _log.debug("Whole profile %s deleted", profile_id)
-        self.splitter.on_rules_update(profile_id, None, async=True)
-        self.splitter.on_tags_update(profile_id, None, async=True)
-
-    def on_host_delete(self, response, hostname):
-        """
-        Handler for deletion of a whole host directory.
-
-        Deletes all the contained endpoints.
-        """
-        ids_on_that_host = self.endpoint_ids_per_host.pop(hostname, set())
-        _log.info("Host %s deleted, removing %d endpoints",
-                  hostname, len(ids_on_that_host))
-        for endpoint_id in ids_on_that_host:
-            self.splitter.on_endpoint_update(endpoint_id, None, async=True)
-        self.on_host_ip_delete(response, hostname)
-
     def on_host_ip_set(self, response, hostname):
         if not self._config.IP_IN_IP_ENABLED:
             _log.debug("Ignoring update to %s because IP-in-IP is disabled",
@@ -747,6 +698,23 @@ def on_host_ip_delete(self, response, hostname):
             self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
                                              async=True)
 
+    def _on_config_updated(self, response, config_param):
+        new_value = response.value
+        if self.last_global_config.get(config_param) != new_value:
+            _log.critical("Global config value %s updated.  Felix must be "
+                          "restarted.", config_param)
+            die_and_restart()
+
+    def _on_host_config_updated(self, response, hostname, config_param):
+        if hostname != self._config.HOSTNAME:
+            _log.debug("Ignoring config update for host %s", hostname)
+            return
+        new_value = response.value
+        if self.last_host_config.get(config_param) != new_value:
+            _log.critical("Global config value %s updated.  Felix must be "
+                          "restarted.", config_param)
+            die_and_restart()
+
     def on_ipam_v4_pool_set(self, response, pool_id):
         pool = parse_ipam_pool(pool_id, response.value)
         self.splitter.on_ipam_pool_update(pool_id, pool, async=True)
@@ -754,41 +722,6 @@ def on_ipam_v4_pool_set(self, response, pool_id):
     def on_ipam_v4_pool_delete(self, response, pool_id):
         self.splitter.on_ipam_pool_update(pool_id, None, async=True)
 
-    def on_orch_delete(self, response, hostname, orchestrator):
-        """
-        Handler for deletion of a whole host orchestrator directory.
-
-        Deletes all the contained endpoints.
-        """
-        _log.info("Orchestrator dir %s/%s deleted, removing contained hosts",
-                  hostname, orchestrator)
-        orchestrator = intern(orchestrator.encode("utf8"))
-        for endpoint_id in list(self.endpoint_ids_per_host[hostname]):
-            if endpoint_id.orchestrator == orchestrator:
-                self.splitter.on_endpoint_update(endpoint_id, None, async=True)
-                self.endpoint_ids_per_host[hostname].discard(endpoint_id)
-        if not self.endpoint_ids_per_host[hostname]:
-            del self.endpoint_ids_per_host[hostname]
-
-    def on_workload_delete(self, response, hostname, orchestrator,
-                           workload_id):
-        """
-        Handler for deletion of a whole workload directory.
-
-        Deletes all the contained endpoints.
-        """
-        _log.debug("Workload dir %s/%s/%s deleted, removing endpoints",
-                   hostname, orchestrator, workload_id)
-        orchestrator = intern(orchestrator.encode("utf8"))
-        workload_id = intern(workload_id.encode("utf8"))
-        for endpoint_id in list(self.endpoint_ids_per_host[hostname]):
-            if (endpoint_id.orchestrator == orchestrator and
-                    endpoint_id.workload == workload_id):
-                self.splitter.on_endpoint_update(endpoint_id, None, async=True)
-                self.endpoint_ids_per_host[hostname].discard(endpoint_id)
-        if not self.endpoint_ids_per_host[hostname]:
-            del self.endpoint_ids_per_host[hostname]
-
 
 class EtcdStatusReporter(EtcdClientOwner, Actor):
     """
@@ -952,19 +885,6 @@ def die_and_restart():
     os._exit(1)
 
 
-def _build_config_dict(cfg_node):
-    """
-    Updates the config dict provided from the given etcd node, which
-    should point at a config directory.
-    """
-    config_dict = {}
-    for child in cfg_node.children:
-        key = child.key.rsplit("/").pop()
-        value = str(child.value)
-        config_dict[key] = value
-    return config_dict
-
-
 # Intern JSON keys as we load them to reduce occupancy.
 FIELDS_TO_INTERN = set([
     # Endpoint dicts.  It doesn't seem worth interning items like the MAC
@@ -1130,4 +1050,4 @@ def __init__(self):
         self.key = None
         self.value = None
         self.action = None
-        self.current_key = None
\ No newline at end of file
+        self.current_key = None
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 094735c05d..789e3fd461 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -206,43 +206,6 @@ def test_load_config(self, m_die, m_build_dict, m_sleep):
         self.watcher._load_config()
         m_die.assert_called_once_with()
 
-    def test_on_snapshot_loaded(self):
-        m_response = Mock()
-
-        endpoint_on_host = Mock()
-        endpoint_on_host.key = ("/calico/v1/host/hostname/workload/"
-                                "orch/wlid/endpoint/epid")
-        endpoint_on_host.value = ENDPOINT_STR
-
-        bad_endpoint_on_host = Mock()
-        bad_endpoint_on_host.key = ("/calico/v1/host/hostname/workload/"
-                                    "orch/wlid/endpoint/epid2")
-        bad_endpoint_on_host.value = ENDPOINT_STR[:10]
-
-        endpoint_not_on_host = Mock()
-        endpoint_not_on_host.key = ("/calico/v1/host/other/workload/"
-                                    "orch/wlid/endpoint/epid")
-        endpoint_not_on_host.value = ENDPOINT_STR
-
-        still_ready = Mock()
-        still_ready.key = ("/calico/v1/Ready")
-        still_ready.value = "true"
-
-        m_response.children = [
-            endpoint_on_host,
-            bad_endpoint_on_host,
-            endpoint_not_on_host,
-            still_ready,
-        ]
-        with patch.object(self.watcher,
-                          "clean_up_endpoint_statuses") as m_clean:
-            self.watcher._on_snapshot_loaded(m_response)
-
-        # Cleanup should only get the endpoints on our host.
-        m_clean.assert_called_once_with(
-            set([EndpointId("hostname", "orch", "wlid", "epid")])
-        )
-
     def test_resync_flag(self):
         self.watcher.resync_after_current_poll = True
         self.watcher.next_etcd_index = 1
@@ -284,60 +247,6 @@ def test_endpoint_set_invalid(self):
             async=True,
         )
 
-    def test_parent_dir_delete(self):
-        """
-        Test that deletions of parent directories of endpoints are
-        correctly handled.
-        """
-        # This additional  endpoint should be ignored by the deletes below.
-        self.dispatch("/calico/v1/host/h2/workload/o1/w2/endpoint/e2",
-                      "set", value=ENDPOINT_STR)
-        for path in ["/calico/v1/host/h1",
-                     "/calico/v1/host/h1/workload",
-                     "/calico/v1/host/h1/workload/o1",
-                     "/calico/v1/host/h1/workload/o1/w1",
-                     "/calico/v1/host/h1/workload/o1/w1/endpoint"]:
-            # Create endpoints in the cache.
-            self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e1",
-                          "set", value=ENDPOINT_STR)
-            self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e2",
-                          "set", value=ENDPOINT_STR)
-            # This endpoint should not get cleaned up if only workload w1 is
-            # deleted...
-            self.dispatch("/calico/v1/host/h1/workload/o1/w3/endpoint/e3",
-                          "set", value=ENDPOINT_STR)
-
-            self.assertEqual(self.watcher.endpoint_ids_per_host, {
-                "h1": set([EndpointId("h1", "o1", "w1", "e1"),
-                           EndpointId("h1", "o1", "w1", "e2"),
-                           EndpointId("h1", "o1", "w3", "e3")]),
-                "h2": set([EndpointId("h2", "o1", "w2", "e2")]),
-            })
-            self.m_splitter.on_endpoint_update.reset_mock()
-            # Delete one of its parent dirs, should delete the endpoint.
-            self.dispatch(path, "delete")
-            exp_calls = [
-                call(EndpointId("h1", "o1", "w1", "e1"), None, async=True),
-                call(EndpointId("h1", "o1", "w1", "e2"), None, async=True),
-            ]
-            if path < "/calico/v1/host/h1/workload/o1/w1":
-                # Should also delete workload w3.
-                exp_calls.append(call(EndpointId("h1", "o1", "w3", "e3"),
-                                      None, async=True))
-            self.m_splitter.on_endpoint_update.assert_has_calls(exp_calls,
-                                                                any_order=True)
-            # Cache should be cleaned up.
-            exp_cache = {"h2": set([EndpointId("h2", "o1", "w2", "e2")])}
-            if path >= "/calico/v1/host/h1/workload/o1/w1":
-                # Should not have deleted workload w3.  Add it in.
-                exp_cache["h1"] = set([EndpointId("h1", "o1", "w3", "e3")])
-            self.assertEqual(self.watcher.endpoint_ids_per_host, exp_cache)
-
-            # Then simulate another delete, should have no effect.
-            self.m_splitter.on_endpoint_update.reset_mock()
-            self.dispatch(path, "delete")
-            self.assertFalse(self.m_splitter.on_endpoint_update.called)
-
     def test_rules_set(self):
         self.dispatch("/calico/v1/policy/profile/prof1/rules", "set",
                       value=RULES_STR)
@@ -380,30 +289,6 @@ def test_tags_set_invalid(self):
                                                                None,
                                                                async=True)
 
-    def test_dispatch_delete_resync(self):
-        """
-        Test dispatcher is correctly configured to trigger resync for
-        expected paths.
-        """
-        for key in ["/calico/v1",
-                    "/calico/v1/host",
-                    "/calico/v1/policy",
-                    "/calico/v1/policy/profile",
-                    "/calico/v1/config",
-                    "/calico/v1/config/Foo",
-                    "/calico/v1/Ready",]:
-            self.assertRaises(ResyncRequired, self.dispatch, key, "delete")
-
-    def test_per_profile_del(self):
-        """
-        Test profile deletion triggers deletion for tags and rules.
-        """
-        self.dispatch("/calico/v1/policy/profile/profA", action="delete")
-        self.m_splitter.on_tags_update.assert_called_once_with("profA", None,
-                                                               async=True)
-        self.m_splitter.on_rules_update.assert_called_once_with("profA", None,
-                                                                async=True)
-
     def test_tags_del(self):
         """
         Test tag-only deletion.
@@ -499,12 +384,6 @@ def test_host_del_clears_ip(self):
             async=True,
         )
 
-    def test_config_update_triggers_resync(self):
-        self.assertRaises(ResyncRequired, self.dispatch,
-                          "/calico/v1/config/Foo", "set", "bar")
-        self.assertRaises(ResyncRequired, self.dispatch,
-                          "/calico/v1/host/foo/config/Foo", "set", "bar")
-
     @patch("os._exit", autospec=True)
     @patch("gevent.sleep", autospec=True)
     def test_die_and_restart(self, m_sleep, m_exit):
diff --git a/felix_requirements.txt b/felix_requirements.txt
index 700c0e6c6e..ff1ff6148b 100644
--- a/felix_requirements.txt
+++ b/felix_requirements.txt
@@ -3,7 +3,6 @@ greenlet
 netaddr
 python-etcd>=0.4.1
 posix-spawn>=0.2.post6
-PyTrie
 datrie
 ijson
 msgpack-python

From d253497a90a5b7ddaf5e2314789644ef299bb67f Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 15:24:50 +0100
Subject: [PATCH 25/98] Rename driver handler methods.

---
 calico/felix/fetcd.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index efc7825a5d..ac886a0981 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -381,11 +381,11 @@ def loop(self):
                         msg_type = msg[MSG_KEY_TYPE]
                         if msg_type == MSG_TYPE_UPDATE:
                             self.begin_polling.wait()
-                            self._handle_update(msg)
+                            self._on_update_from_driver(msg)
                         elif msg_type == MSG_TYPE_CONFIG_LOADED:
-                            self._handle_config_loaded(msg, driver_sck)
+                            self._on_config_loaded_from_driver(msg, driver_sck)
                         elif msg_type == MSG_TYPE_STATUS:
-                            self._handle_status(msg)
+                            self._on_status_from_driver(msg)
                         else:
                             raise RuntimeError("Unexpected message %s" % msg)
 
@@ -401,7 +401,7 @@ def loop(self):
                 raise
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
-    def _handle_update(self, msg):
+    def _on_update_from_driver(self, msg):
         assert self.configured.is_set()
         key = msg[MSG_KEY_KEY]
         value = msg[MSG_KEY_VALUE]
@@ -421,7 +421,7 @@ def _handle_update(self, msg):
         except ResyncRequired:
             _log.warning("IGNORING RESYNC.")
 
-    def _handle_config_loaded(self, msg, driver_sck):
+    def _on_config_loaded_from_driver(self, msg, driver_sck):
         global_config = msg[MSG_KEY_GLOBAL_CONFIG]
         host_config = msg[MSG_KEY_HOST_CONFIG]
         _log.info("Config loaded by driver:\n"
@@ -467,7 +467,7 @@ def _handle_config_loaded(self, msg, driver_sck):
             }))
             self.configured.set()
 
-    def _handle_status(self, msg):
+    def _on_status_from_driver(self, msg):
         status = msg[MSG_KEY_STATUS]
         _log.info("etcd driver status changed to %s", status)
 

From 2c7b9f9f5ee8ea11e8754789a7f0f869e6d333ae Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 15:25:56 +0100
Subject: [PATCH 26/98] Implement in-sync handling for tag manager.

* Plumb through in-sync message.
* Defer programming of TagIpsets until we're in sync.
---
 calico/felix/endpoint.py     |  4 ++
 calico/felix/fetcd.py        |  3 ++
 calico/felix/ipsets.py       | 93 ++++++++++++------------------------
 calico/felix/profilerules.py | 16 ++-----
 calico/felix/splitter.py     | 68 +++++++-------------------
 5 files changed, 56 insertions(+), 128 deletions(-)

diff --git a/calico/felix/endpoint.py b/calico/felix/endpoint.py
index 6e2afcf28e..eab66b7257 100644
--- a/calico/felix/endpoint.py
+++ b/calico/felix/endpoint.py
@@ -86,6 +86,10 @@ def _on_object_started(self, endpoint_id, obj):
         ep = self.endpoints_by_id.get(endpoint_id)
         obj.on_endpoint_update(ep, async=True)
 
+    @actor_message()
+    def on_datamodel_in_sync(self):
+        _log.error("NOT IMPLEMENTED: EndpointManager.on_datamodel_in_sync()")
+
     # @actor_message()
     # def apply_snapshot(self, endpoints_by_id):
     #     # Tell the dispatch chains about the local endpoints in advance so
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index ac886a0981..f1d4ab1ddf 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -470,6 +470,9 @@ def _on_config_loaded_from_driver(self, msg, driver_sck):
     def _on_status_from_driver(self, msg):
         status = msg[MSG_KEY_STATUS]
         _log.info("etcd driver status changed to %s", status)
+        if status == STATUS_IN_SYNC:
+            self.begin_polling.wait()  # Make sure splitter is set.
+            self.splitter.on_datamodel_in_sync(async=True)
 
     def start_driver(self):
         _log.info("Creating server socket.")
diff --git a/calico/felix/ipsets.py b/calico/felix/ipsets.py
index be1ca364ff..3be01b2bad 100644
--- a/calico/felix/ipsets.py
+++ b/calico/felix/ipsets.py
@@ -73,6 +73,7 @@ def __init__(self, ip_type, config):
         # May include non-live tag IDs.
         self._dirty_tags = set()
         self._force_reprogram = False
+        self._datamodel_in_sync = False
 
     def _create(self, tag_id):
         active_ipset = TagIpset(futils.uniquely_shorten(tag_id, 16),
@@ -82,11 +83,9 @@ def _create(self, tag_id):
 
     def _on_object_started(self, tag_id, active_ipset):
         _log.debug("TagIpset actor for %s started", tag_id)
-        # Fill the ipset in with its members, this will trigger its first
-        # programming, after which it will call us back to tell us it is ready.
-        # We can't use self._dirty_tags to defer this in case the set becomes
-        # unreferenced before _finish_msg_batch() is called.
-        self._update_active_ipset(tag_id)
+        # We defer the update in order to delay updates until we're in-sync
+        # with the datamodel.
+        self._dirty_tags.add(tag_id)
 
     def _update_active_ipset(self, tag_id):
         """
@@ -96,6 +95,7 @@ def _update_active_ipset(self, tag_id):
         :param tag_id: The ID of the tag, must be an active tag.
         """
         assert self._is_starting_or_live(tag_id)
+        assert self._datamodel_in_sync
         active_ipset = self.objects_by_id[tag_id]
         members = frozenset(self.ip_owners_by_tag.get(tag_id, {}).iterkeys())
         active_ipset.replace_members(members,
@@ -120,51 +120,11 @@ def nets_key(self):
         nets = "ipv4_nets" if self.ip_type == IPV4 else "ipv6_nets"
         return nets
 
-    # @actor_message()
-    # def apply_snapshot(self, tags_by_prof_id, endpoints_by_id):
-    #     """
-    #     Apply a snapshot read from etcd, replacing existing state.
-    #
-    #     :param tags_by_prof_id: A dict mapping security profile ID to a list of
-    #         profile tags.
-    #     :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
-    #         data dicts.
-    #     """
-    #     _log.info("Applying tags snapshot. %s tags, %s endpoints",
-    #               len(tags_by_prof_id), len(endpoints_by_id))
-    #     missing_profile_ids = set(self.tags_by_prof_id.keys())
-    #     for profile_id, tags in tags_by_prof_id.iteritems():
-    #         assert tags is not None
-    #         self.on_tags_update(profile_id, tags)
-    #         missing_profile_ids.discard(profile_id)
-    #         self._maybe_yield()
-    #     for profile_id in missing_profile_ids:
-    #         self.on_tags_update(profile_id, None)
-    #         self._maybe_yield()
-    #     del missing_profile_ids
-    #     missing_endpoints = set(self.endpoint_data_by_ep_id.keys())
-    #     for endpoint_id, endpoint in endpoints_by_id.iteritems():
-    #         assert endpoint is not None
-    #         missing_endpoints.discard(endpoint_id)
-    #         endpoint_data = self.endpoint_data_by_ep_id.get(endpoint_id)
-    #         if endpoint_data:
-    #             profile_ids = set(endpoint.get("profile_ids", []))
-    #             nets_list = endpoint.get(self.nets_key, [])
-    #             ips = set(map(futils.net_to_ip, nets_list))
-    #             if (profile_ids == endpoint_data.profile_ids and
-    #                     ips == endpoint_data.ip_addresses):
-    #                 continue
-    #         endpoint_data = self._endpoint_data_from_dict(endpoint_id,
-    #                                                       endpoint)
-    #         self._on_endpoint_data_update(endpoint_id, endpoint_data)
-    #         self._maybe_yield()
-    #     missing_endpoints.clear()
-    #     for endpoint_id in missing_endpoints:
-    #         self._on_endpoint_data_update(endpoint_id, EMPTY_ENDPOINT_DATA)
-    #         self._maybe_yield()
-    #     self._force_reprogram = True
-    #     _log.info("Tags snapshot applied: %s tags, %s endpoints",
-    #               len(tags_by_prof_id), len(endpoints_by_id))
+    @actor_message()
+    def on_datamodel_in_sync(self):
+        if not self._datamodel_in_sync:
+            _log.info("Datamodel now in sync, uncorking updates to TagIpsets")
+            self._datamodel_in_sync = True
 
     @actor_message()
     def cleanup(self):
@@ -458,8 +418,10 @@ def _finish_msg_batch(self, batch, results):
         operation.  It also avoid wasted effort if tags are flapping.
         """
         super(IpsetManager, self)._finish_msg_batch(batch, results)
-        self._update_dirty_active_ipsets()
-        self._force_reprogram = False
+        if self._datamodel_in_sync:
+            _log.debug("Datamodel in sync, updating active TagIpsets.")
+            self._update_dirty_active_ipsets()
+            self._force_reprogram = False
 
 
 class EndpointData(object):
@@ -532,7 +494,7 @@ def __init__(self, ipset, qualifier=None):
 
         self._ipset = ipset
         # Members - which entries should be in the ipset.  None means
-        # "unknown", but this is updated immediately on actor startup.
+        # "unknown".  The first update to this field triggers programming.
         self.members = None
         # Members which really are in the ipset; again None means "unknown".
         self.programmed_members = None
@@ -572,7 +534,7 @@ def replace_members(self, members, force_reprogram=False):
 
     def _finish_msg_batch(self, batch, results):
         _log.debug("IpsetActor._finish_msg_batch() called")
-        if not self.stopped:
+        if not self.stopped and self.members is not None:
             self._sync_to_ipset()
 
     def _sync_to_ipset(self):
@@ -628,19 +590,24 @@ def on_unreferenced(self):
         # Mark the object as stopped so that we don't accidentally recreate
         # the ipset in _finish_msg_batch.
         self.stopped = True
-        try:
-            self._ipset.delete()
-        finally:
-            self._notify_cleanup_complete()
 
     def _finish_msg_batch(self, batch, results):
         _log.debug("_finish_msg_batch on TagIpset")
         super(TagIpset, self)._finish_msg_batch(batch, results)
-        if not self.notified_ready:
-            # We have created the set, so we are now ready.
-            _log.debug("TagIpset _finish_msg_batch notifying ready")
-            self.notified_ready = True
-            self._notify_ready()
+        if self.programmed_members is not None:
+            # We've managed to program the set.
+            if self.stopped:
+                # Only clean up if we ever programmed the ipset.
+                self._ipset.delete()
+            elif not self.notified_ready:
+                # Notify that the set is now available for use.
+                _log.debug("TagIpset _finish_msg_batch notifying ready")
+                self.notified_ready = True
+                self._notify_ready()
+        if self.stopped:
+            _log.debug("%s stopped, notifying cleanup complete.", self)
+            self._notify_cleanup_complete()
+
 
 
 class Ipset(object):
diff --git a/calico/felix/profilerules.py b/calico/felix/profilerules.py
index 2246a2cba9..ceccd600f1 100644
--- a/calico/felix/profilerules.py
+++ b/calico/felix/profilerules.py
@@ -57,19 +57,9 @@ def _on_object_started(self, profile_id, active_profile):
                    profile_or_none)
         active_profile.on_profile_update(profile_or_none, async=True)
 
-    # @actor_message()
-    # def apply_snapshot(self, rules_by_profile_id):
-    #     _log.info("Rules manager applying snapshot; %s rules",
-    #               len(rules_by_profile_id))
-    #     missing_ids = set(self.rules_by_profile_id.keys())
-    #     for profile_id, profile in rules_by_profile_id.iteritems():
-    #         self.on_rules_update(profile_id, profile,
-    #                              force_reprogram=True)  # Skips queue
-    #         missing_ids.discard(profile_id)
-    #         self._maybe_yield()
-    #     missing_ids.clear()
-    #     for dead_profile_id in missing_ids:
-    #         self.on_rules_update(dead_profile_id, None)
+    @actor_message()
+    def on_datamodel_in_sync(self):
+        _log.error("NOT IMPLEMENTED: RulesManager.on_datamodel_in_sync()")
 
     @actor_message()
     def on_rules_update(self, profile_id, profile, force_reprogram=False):
diff --git a/calico/felix/splitter.py b/calico/felix/splitter.py
index 707a738a4a..96b05ae340 100644
--- a/calico/felix/splitter.py
+++ b/calico/felix/splitter.py
@@ -50,58 +50,22 @@ def __init__(self, config, ipsets_mgrs, rules_managers, endpoint_managers,
         self.ipv4_masq_manager = ipv4_masq_manager
         self._cleanup_scheduled = False
 
-    # @actor_message()
-    # def apply_snapshot(self, rules_by_prof_id, tags_by_prof_id,
-    #                    endpoints_by_id, ipv4_pools_by_id):
-    #     """
-    #     Replaces the whole cache state with the input.  Applies deltas vs the
-    #     current active state.
-    #
-    #     :param rules_by_prof_id: A dict mapping security profile ID to a list
-    #         of profile rules, each of which is a dict.
-    #     :param tags_by_prof_id: A dict mapping security profile ID to a list of
-    #         profile tags.
-    #     :param endpoints_by_id: A dict mapping EndpointId objects to endpoint
-    #         data dicts.
-    #     :param ipv4_pools_by_id: A dict mapping IPAM pool ID to dicts
-    #         representing the pool.
-    #     """
-    #     # Step 1: fire in data update events to the profile and tag managers
-    #     # so they can build their indexes before we activate anything.
-    #     _log.info("Applying snapshot. Queueing rules.")
-    #     for rules_mgr in self.rules_mgrs:
-    #         rules_mgr.apply_snapshot(rules_by_prof_id, async=True)
-    #     _log.info("Applying snapshot. Queueing tags/endpoints to ipset mgr.")
-    #     for ipset_mgr in self.ipsets_mgrs:
-    #         ipset_mgr.apply_snapshot(tags_by_prof_id, endpoints_by_id,
-    #                                  async=True)
-    #
-    #     # Step 2: fire in update events into the endpoint manager, which will
-    #     # recursively trigger activation of profiles and tags.
-    #     _log.info("Applying snapshot. Queueing endpoints->endpoint mgr.")
-    #     for ep_mgr in self.endpoint_mgrs:
-    #         ep_mgr.apply_snapshot(endpoints_by_id, async=True)
-    #
-    #     # Step 3: send update to NAT manager.
-    #     _log.info("Applying snapshot.  Queueing IPv4 pools -> masq mgr.")
-    #     self.ipv4_masq_manager.apply_snapshot(ipv4_pools_by_id, async=True)
-    #
-    #     _log.info("Applying snapshot. DONE. %s rules, %s tags, "
-    #               "%s endpoints, %s pools", len(rules_by_prof_id),
-    #               len(tags_by_prof_id), len(endpoints_by_id),
-    #               len(ipv4_pools_by_id))
-    #
-    #     # Since we don't wait for all the above processing to finish, set a
-    #     # timer to clean up orphaned ipsets and tables later.  If the snapshot
-    #     # takes longer than this timer to apply then we might do the cleanup
-    #     # before the snapshot is finished.  That would cause dropped packets
-    #     # until applying the snapshot finishes.
-    #     if not self._cleanup_scheduled:
-    #         _log.info("No cleanup scheduled, scheduling one.")
-    #         gevent.spawn_later(self.config.STARTUP_CLEANUP_DELAY,
-    #                            functools.partial(self.trigger_cleanup,
-    #                                              async=True))
-    #         self._cleanup_scheduled = True
+    @actor_message()
+    def on_datamodel_in_sync(self):
+        """
+        Called when the data-model is known to be in-sync.
+        """
+        for mgr in self.ipsets_mgrs + self.rules_mgrs + self.endpoint_mgrs:
+            mgr.on_datamodel_in_sync(async=True)
+
+        # Now we're in sync, give the managers some time to get their house in
+        # order, then trigger the start-of-day cleanup.
+        if not self._cleanup_scheduled:
+            _log.info("No cleanup scheduled, scheduling one.")
+            gevent.spawn_later(self.config.STARTUP_CLEANUP_DELAY,
+                               functools.partial(self.trigger_cleanup,
+                                                 async=True))
+            self._cleanup_scheduled = True
 
     @actor_message()
     def trigger_cleanup(self):

From 678c601efab340f4d3b796952e9b874d25dd31f3 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 22 Oct 2015 16:11:18 +0100
Subject: [PATCH 27/98] Implement EndpointManager/DispatchChain in-sync
 processing.

---
 calico/felix/dispatch.py | 35 +++++++++++++++++--------------
 calico/felix/endpoint.py | 45 +++++++++++++++++++---------------------
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/calico/felix/dispatch.py b/calico/felix/dispatch.py
index bdb6d466c0..174224943f 100644
--- a/calico/felix/dispatch.py
+++ b/calico/felix/dispatch.py
@@ -47,21 +47,26 @@ def __init__(self, config, ip_version, iptables_updater):
         self.ifaces = set()
         self.programmed_leaf_chains = set()
         self._dirty = False
+        self._datamodel_in_sync = False
 
-    # @actor_message()
-    # def apply_snapshot(self, ifaces):
-    #     """
-    #     Replaces all known interfaces with the given snapshot and rewrites the
-    #     chain.
-    #
-    #     :param set[str] ifaces: The interface
-    #     """
-    #     _log.info("Applying dispatch chains snapshot.")
-    #     self.ifaces = set(ifaces)  # Take a copy.
-    #     # Always reprogram the chain, even if it's empty.  This makes sure that
-    #     # we resync and it stops the iptables layer from marking our chain as
-    #     # missing.
-    #     self._dirty = True
+    @actor_message()
+    def apply_snapshot(self, ifaces):
+        """
+        Replaces all known interfaces with the given snapshot and rewrites the
+        chain.
+
+        :param set[str] ifaces: The interface
+        """
+        _log.info("Applying dispatch chains snapshot.")
+        self.ifaces = set(ifaces)  # Take a copy.
+        # Always reprogram the chain, even if it's empty.  This makes sure that
+        # we resync and it stops the iptables layer from marking our chain as
+        # missing.
+        self._dirty = True
+
+        if not self._datamodel_in_sync:
+            _log.info("Datamodel in sync, unblocking dispatch chain updates")
+            self._datamodel_in_sync = True
 
     @actor_message()
     def on_endpoint_added(self, iface_name):
@@ -101,7 +106,7 @@ def on_endpoint_removed(self, iface_name):
             self._dirty = True
 
     def _finish_msg_batch(self, batch, results):
-        if self._dirty:
+        if self._dirty and self._datamodel_in_sync:
             _log.debug("Interface mapping changed, reprogramming chains.")
             self._reprogram_chains()
             self._dirty = False
diff --git a/calico/felix/endpoint.py b/calico/felix/endpoint.py
index eab66b7257..485f1f3732 100644
--- a/calico/felix/endpoint.py
+++ b/calico/felix/endpoint.py
@@ -66,6 +66,8 @@ def __init__(self, config, ip_type,
         # increffed.
         self.local_endpoint_ids = set()
 
+        self._data_model_in_sync = False
+
     def _create(self, combined_id):
         """
         Overrides ReferenceManager._create()
@@ -88,30 +90,25 @@ def _on_object_started(self, endpoint_id, obj):
 
     @actor_message()
     def on_datamodel_in_sync(self):
-        _log.error("NOT IMPLEMENTED: EndpointManager.on_datamodel_in_sync()")
-
-    # @actor_message()
-    # def apply_snapshot(self, endpoints_by_id):
-    #     # Tell the dispatch chains about the local endpoints in advance so
-    #     # that we don't flap the dispatch chain at start-of-day.
-    #     local_iface_name_to_ep_id = {}
-    #     for ep_id, ep in endpoints_by_id.iteritems():
-    #         if ep and ep_id.host == self.config.HOSTNAME and ep.get("name"):
-    #             local_iface_name_to_ep_id[ep.get("name")] = ep_id
-    #     self.dispatch_chains.apply_snapshot(local_iface_name_to_ep_id.keys(),
-    #                                         async=True)
-    #     # Then update/create endpoints and work out which endpoints have been
-    #     # deleted.
-    #     missing_endpoints = set(self.endpoints_by_id.keys())
-    #     for endpoint_id, endpoint in endpoints_by_id.iteritems():
-    #         self.on_endpoint_update(endpoint_id, endpoint,
-    #                                 force_reprogram=True)
-    #         missing_endpoints.discard(endpoint_id)
-    #         self._maybe_yield()
-    #     missing_endpoints.clear()
-    #     for endpoint_id in missing_endpoints:
-    #         self.on_endpoint_update(endpoint_id, None)
-    #         self._maybe_yield()
+        if not self._data_model_in_sync:
+            _log.info("%s: First time we've been in-sync with the datamodel,"
+                      "sending snapshot to DispatchChains.", self)
+            self._data_model_in_sync = True
+
+            # Tell the dispatch chains about the local endpoints in advance so
+            # that we don't flap the dispatch chain at start-of-day.  Note:
+            # the snapshot may contain information that is ahead of the
+            # state that our individual LocalEndpoint actors are sending to the
+            # DispatchChains actor.  That is OK!  The worst that can happen is
+            # that a LocalEndpoint undoes part of our update and then goes on
+            # to re-apply the update when it catches up to the snapshot.
+            local_ifaces = set()
+            for ep_id, ep in self.endpoints_by_id.iteritems():
+                if (ep and
+                        ep_id.host == self.config.HOSTNAME and
+                        ep.get("name")):
+                    local_ifaces.add(ep.get("name"))
+            self.dispatch_chains.apply_snapshot(local_ifaces, async=True)
 
     @actor_message()
     def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False):

From 3939686d0eb0002a253c554c451491803deb1f8e Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 11:57:54 +0100
Subject: [PATCH 28/98] Block ProfileRules startup until we're in sync.

* Override _maybe_start() to block startup until we're in sync.
* Retry startup after in-sync message.
* Change IpsetManager to use consistent approach.
---
 calico/felix/ipsets.py       | 50 +++++++++++++++++++-----------------
 calico/felix/profilerules.py | 25 ++++++++++++++++--
 calico/felix/refcount.py     | 21 ++++++++++++---
 3 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/calico/felix/ipsets.py b/calico/felix/ipsets.py
index 3be01b2bad..5065ed97c5 100644
--- a/calico/felix/ipsets.py
+++ b/calico/felix/ipsets.py
@@ -81,11 +81,21 @@ def _create(self, tag_id):
                                 max_elem=self._config.MAX_IPSET_SIZE)
         return active_ipset
 
+    def _maybe_start(self, obj_id):
+        if self._datamodel_in_sync:
+            _log.debug("Datamodel is in-sync, deferring to superclass.")
+            return super(IpsetManager, self)._maybe_start(obj_id)
+        else:
+            _log.info("Delaying startup of tag %s because datamodel is"
+                      "not in sync.", obj_id)
+
     def _on_object_started(self, tag_id, active_ipset):
         _log.debug("TagIpset actor for %s started", tag_id)
-        # We defer the update in order to delay updates until we're in-sync
-        # with the datamodel.
-        self._dirty_tags.add(tag_id)
+        # Fill the ipset in with its members, this will trigger its first
+        # programming, after which it will call us back to tell us it is ready.
+        # We can't use self._dirty_tags to defer this in case the set becomes
+        # unreferenced before _finish_msg_batch() is called.
+        self._update_active_ipset(tag_id)
 
     def _update_active_ipset(self, tag_id):
         """
@@ -125,6 +135,7 @@ def on_datamodel_in_sync(self):
         if not self._datamodel_in_sync:
             _log.info("Datamodel now in sync, uncorking updates to TagIpsets")
             self._datamodel_in_sync = True
+            self._maybe_start_all()
 
     @actor_message()
     def cleanup(self):
@@ -418,10 +429,8 @@ def _finish_msg_batch(self, batch, results):
         operation.  It also avoid wasted effort if tags are flapping.
         """
         super(IpsetManager, self)._finish_msg_batch(batch, results)
-        if self._datamodel_in_sync:
-            _log.debug("Datamodel in sync, updating active TagIpsets.")
-            self._update_dirty_active_ipsets()
-            self._force_reprogram = False
+        self._update_dirty_active_ipsets()
+        self._force_reprogram = False
 
 
 class EndpointData(object):
@@ -494,7 +503,7 @@ def __init__(self, ipset, qualifier=None):
 
         self._ipset = ipset
         # Members - which entries should be in the ipset.  None means
-        # "unknown".  The first update to this field triggers programming.
+        # "unknown", but this is updated immediately on actor startup.
         self.members = None
         # Members which really are in the ipset; again None means "unknown".
         self.programmed_members = None
@@ -534,7 +543,7 @@ def replace_members(self, members, force_reprogram=False):
 
     def _finish_msg_batch(self, batch, results):
         _log.debug("IpsetActor._finish_msg_batch() called")
-        if not self.stopped and self.members is not None:
+        if not self.stopped:
             self._sync_to_ipset()
 
     def _sync_to_ipset(self):
@@ -590,24 +599,19 @@ def on_unreferenced(self):
         # Mark the object as stopped so that we don't accidentally recreate
         # the ipset in _finish_msg_batch.
         self.stopped = True
+        try:
+            self._ipset.delete()
+        finally:
+            self._notify_cleanup_complete()
 
     def _finish_msg_batch(self, batch, results):
         _log.debug("_finish_msg_batch on TagIpset")
         super(TagIpset, self)._finish_msg_batch(batch, results)
-        if self.programmed_members is not None:
-            # We've managed to program the set.
-            if self.stopped:
-                # Only clean up if we ever programmed the ipset.
-                self._ipset.delete()
-            elif not self.notified_ready:
-                # Notify that the set is now available for use.
-                _log.debug("TagIpset _finish_msg_batch notifying ready")
-                self.notified_ready = True
-                self._notify_ready()
-        if self.stopped:
-            _log.debug("%s stopped, notifying cleanup complete.", self)
-            self._notify_cleanup_complete()
-
+        if not self.notified_ready:
+            # We have created the set, so we are now ready.
+            _log.debug("TagIpset _finish_msg_batch notifying ready")
+            self.notified_ready = True
+            self._notify_ready()
 
 
 class Ipset(object):
diff --git a/calico/felix/profilerules.py b/calico/felix/profilerules.py
index ceccd600f1..0c5926555a 100644
--- a/calico/felix/profilerules.py
+++ b/calico/felix/profilerules.py
@@ -44,6 +44,7 @@ def __init__(self, ip_version, iptables_updater, ipset_manager):
         self.iptables_updater = iptables_updater
         self.ipset_manager = ipset_manager
         self.rules_by_profile_id = {}
+        self._datamodel_in_sync = False
 
     def _create(self, profile_id):
         return ProfileRules(profile_id,
@@ -57,9 +58,23 @@ def _on_object_started(self, profile_id, active_profile):
                    profile_or_none)
         active_profile.on_profile_update(profile_or_none, async=True)
 
+    def _maybe_start(self, obj_id, in_sync=False):
+        in_sync |= self._datamodel_in_sync
+        if in_sync or obj_id in self.rules_by_profile_id:
+            _log.debug("Profile %s is in-sync, deferring to superclass.",
+                       obj_id)
+            return super(RulesManager, self)._maybe_start(obj_id)
+        else:
+            _log.info("Delaying startup of profile %s because datamodel is"
+                      "not in sync.", obj_id)
+
     @actor_message()
     def on_datamodel_in_sync(self):
-        _log.error("NOT IMPLEMENTED: RulesManager.on_datamodel_in_sync()")
+        if not self._datamodel_in_sync:
+            _log.error("%s: datamodel now in sync, unblocking profile startup",
+                       self)
+            self._datamodel_in_sync = True
+            self._maybe_start_all()
 
     @actor_message()
     def on_rules_update(self, profile_id, profile, force_reprogram=False):
@@ -75,6 +90,12 @@ def on_rules_update(self, profile_id, profile, force_reprogram=False):
             ap = self.objects_by_id[profile_id]
             ap.on_profile_update(profile, force_reprogram=force_reprogram,
                                  async=True)
+        elif profile_id in self.objects_by_id:
+            _log.debug("Checking if the update allows us to start profile %s",
+                       profile_id)
+            # Pass in_sync=True because we now explicitly know this profile is
+            # in sync, even if this is a deletion.
+            self._maybe_start(profile_id, in_sync=True)
 
 
 class ProfileRules(RefCountedActor):
@@ -159,7 +180,7 @@ def _finish_msg_batch(self, batch, results):
                     _log.info("%s unreferenced, removing our chains", self)
                     self._delete_chains()
                     self._ipset_refs.discard_all()
-                    self._ipset_refs = None # Break ref cycle.
+                    self._ipset_refs = None  # Break ref cycle.
                     self._profile = None
                     self._pending_profile = None
                 finally:
diff --git a/calico/felix/refcount.py b/calico/felix/refcount.py
index ff75913312..012d80d75a 100644
--- a/calico/felix/refcount.py
+++ b/calico/felix/refcount.py
@@ -141,10 +141,24 @@ def on_object_cleanup_complete(self, object_id, obj):
             # May have unblocked start of new object...
             self._maybe_start(object_id)
 
+    def _maybe_start_all(self):
+        _log.debug("Checking all objects to see if they can be started")
+        for obj_id in self.objects_by_id:
+            self._maybe_start(obj_id)
+
     def _maybe_start(self, obj_id):
         """
         Starts the actor with the given ID if it is present and there
         are no pending cleanups for that ID.
+
+        Subclasses may override this method to place additional
+        pre-requisites on starting the object.  They should call
+        this implementation if they are happy for the start to
+        proceed.
+
+        If the subclass chooses to block startup, it must later call
+        this method (or the convenience method _maybe_start_all())
+        when it wants to allow startup to proceed.
         """
         obj = self.objects_by_id.get(obj_id)
         if (obj and
@@ -196,9 +210,8 @@ def _create(self, object_id):
         raise NotImplementedError()  # pragma nocover
 
     def _is_starting_or_live(self, obj_id):
-        return (obj_id in self.objects_by_id
-                and self.objects_by_id[obj_id].ref_mgmt_state in
-                    (STARTING, LIVE))
+        return (obj_id in self.objects_by_id and
+                self.objects_by_id[obj_id].ref_mgmt_state in (STARTING, LIVE))
 
 
 class RefHelper(object):
@@ -366,4 +379,4 @@ def _notify_cleanup_complete(self):
         is complete.  Notifies the manager.
         """
         _log.debug("Notifying manager that %s is done cleaning up", self)
-        self._manager.on_object_cleanup_complete(self._id, self, async=True)
\ No newline at end of file
+        self._manager.on_object_cleanup_complete(self._id, self, async=True)

From c52d2ea8ff26a4367da9c3261f0cfc562367832d Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 13:56:45 +0100
Subject: [PATCH 29/98] Make Felix's etcd watching thread more cooperative.

---
 calico/felix/fetcd.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index f1d4ab1ddf..058abbee8b 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -90,6 +90,9 @@
     POOL_V4_DIR,
 ]
 
+# Max number of events from driver process before we yield to another greenlet.
+MAX_EVENTS_BEFORE_YIELD = 200
+
 
 class EtcdAPI(EtcdClientOwner, Actor):
     """
@@ -372,6 +375,7 @@ def loop(self):
 
                 driver_sck = self.start_driver()
                 unpacker = msgpack.Unpacker()
+                msgs_processed = 0
                 while True:
                     data = driver_sck.recv(16384)
                     unpacker.feed(data)
@@ -388,6 +392,12 @@ def loop(self):
                             self._on_status_from_driver(msg)
                         else:
                             raise RuntimeError("Unexpected message %s" % msg)
+                        msgs_processed += 1
+                        if msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
+                            # Yield to ensure that other actors make progress.
+                            # Sleep must be non-zero to work around gevent
+                            # issue where we could be immediately rescheduled.
+                            gevent.sleep(0.000001)
 
             except EtcdException as e:
                 # Most likely a timeout or other error in the pre-resync;

From 12d914dde85603dffe0cfa02f5afcec5113d1d42 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 14:14:20 +0100
Subject: [PATCH 30/98] Only log tag update message if we actually updated
 tags.

---
 calico/felix/ipsets.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/calico/felix/ipsets.py b/calico/felix/ipsets.py
index 5065ed97c5..e779c128f6 100644
--- a/calico/felix/ipsets.py
+++ b/calico/felix/ipsets.py
@@ -118,11 +118,14 @@ def _update_dirty_active_ipsets(self):
 
         Clears the set of dirty tags as a side-effect.
         """
+        num_updates = 0
         for tag_id in self._dirty_tags:
             if self._is_starting_or_live(tag_id):
                 self._update_active_ipset(tag_id)
+                num_updates += 1
             self._maybe_yield()
-        _log.info("Sent updates to %s updated tags", len(self._dirty_tags))
+        if num_updates > 0:
+            _log.info("Sent updates to %s updated tags", num_updates)
         self._dirty_tags.clear()
 
     @property

From 948be71d263489b5267a75895e0e4d8ae169ad9d Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 15:07:26 +0100
Subject: [PATCH 31/98] Fix up tests, remove now-invalid tests.

---
 calico/felix/test/test_fetcd.py    |  76 ----------
 calico/felix/test/test_ipsets.py   |  69 ++-------
 calico/felix/test/test_splitter.py | 224 ++++++++++++++---------------
 calico/test/test_common.py         |  13 --
 calico/test/test_geventutils.py    |  44 ++++++
 5 files changed, 168 insertions(+), 258 deletions(-)
 create mode 100644 calico/test/test_geventutils.py

diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 789e3fd461..ea4bbcb210 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -151,61 +151,6 @@ def setUp(self):
         self.client = Mock()
         self.watcher.client = self.client
 
-    @patch("gevent.sleep", autospec=True)
-    @patch("calico.felix.fetcd._build_config_dict", autospec=True)
-    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
-    def test_load_config(self, m_die, m_build_dict, m_sleep):
-        # First call, loads the config.
-        global_cfg = {"foo": "bar"}
-        m_build_dict.side_effect = iter([
-            # First call, global-only.
-            global_cfg,
-            # Second call, no change.
-            global_cfg,
-            # Third call, change of config.
-            {"foo": "baz"}, {"biff": "bop"}])
-        self.client.read.side_effect = iter([
-            # First time round the loop, fail to read global config, should
-            # retry.
-            etcd.EtcdKeyNotFound,
-            # Then get the global config but there's not host-only config.
-            None, etcd.EtcdKeyNotFound,
-            # Twice...
-            None, etcd.EtcdKeyNotFound,
-            # Then some host-only config shows up.
-            None, None])
-
-        # First call.
-        self.watcher._load_config()
-
-        m_sleep.assert_called_once_with(5)
-        self.assertFalse(m_die.called)
-
-        m_report = self.m_config.report_etcd_config
-        rpd_host_cfg, rpd_global_cfg = m_report.mock_calls[0][1]
-        self.assertEqual(rpd_host_cfg, {})
-        self.assertEqual(rpd_global_cfg, global_cfg)
-        self.assertTrue(rpd_host_cfg is not self.watcher.last_host_config)
-        self.assertTrue(rpd_global_cfg is not self.watcher.last_global_config)
-        self.assertEqual(rpd_host_cfg, self.watcher.last_host_config)
-        self.assertEqual(rpd_global_cfg, self.watcher.last_global_config)
-
-        self.assertEqual(self.watcher.last_host_config, {})
-        self.assertEqual(self.watcher.last_global_config, global_cfg)
-        self.watcher.configured.set()  # Normally done by the caller.
-        self.client.read.assert_has_calls([
-            call("/calico/v1/config", recursive=True),
-            call("/calico/v1/host/hostname/config", recursive=True),
-        ])
-
-        # Second call, no change.
-        self.watcher._load_config()
-        self.assertFalse(m_die.called)
-
-        # Third call, should detect the config change and die.
-        self.watcher._load_config()
-        m_die.assert_called_once_with()
-
     def test_resync_flag(self):
         self.watcher.resync_after_current_poll = True
         self.watcher.next_etcd_index = 1
@@ -213,13 +158,6 @@ def test_resync_flag(self):
                           self.watcher.wait_for_etcd_event)
         self.assertFalse(self.watcher.resync_after_current_poll)
 
-    def test_ready_flag_set(self):
-        self.dispatch("/calico/v1/Ready", "set", value="true")
-        self.assertRaises(ResyncRequired, self.dispatch,
-                          "/calico/v1/Ready", "set", value="false")
-        self.assertRaises(ResyncRequired, self.dispatch,
-                          "/calico/v1/Ready", "set", value="foo")
-
     def test_endpoint_set(self):
         self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e1",
                       "set", value=ENDPOINT_STR)
@@ -370,20 +308,6 @@ def test_host_ip_invalid(self):
             async=True,
         )
 
-    def test_host_del_clears_ip(self):
-        """
-        Test set for the IP of a host.
-        """
-        self.dispatch("/calico/v1/host/foo/bird_ip",
-                      action="set", value="10.0.0.1")
-        self.m_hosts_ipset.reset_mock()
-        self.dispatch("/calico/v1/host/foo",
-                      action="delete")
-        self.m_hosts_ipset.replace_members.assert_called_once_with(
-            [],
-            async=True,
-        )
-
     @patch("os._exit", autospec=True)
     @patch("gevent.sleep", autospec=True)
     def test_die_and_restart(self, m_sleep, m_exit):
diff --git a/calico/felix/test/test_ipsets.py b/calico/felix/test/test_ipsets.py
index f6f6ea2fd1..146f88f29e 100644
--- a/calico/felix/test/test_ipsets.py
+++ b/calico/felix/test/test_ipsets.py
@@ -310,6 +310,8 @@ def on_ref_acquired(self, tag_id, ipset):
     @patch("calico.felix.ipsets.list_ipset_names", autospec=True)
     @patch("calico.felix.futils.check_call", autospec=True)
     def test_cleanup(self, m_check_call, m_list_ipsets):
+        # We're testing the in-sync processing
+        self.mgr.on_datamodel_in_sync(async=True)
         # Start with a couple ipsets.
         self.mgr.get_and_incref("foo", callback=self.on_ref_acquired,
                                 async=True)
@@ -357,63 +359,16 @@ def test_cleanup(self, m_check_call, m_list_ipsets):
                              call(["ipset", "destroy", "felix-v4-baz"]),
                          ]))
 
-    def test_apply_snapshot_mainline(self):
-        self.mgr.apply_snapshot(
-            {"prof1": ["tag1"], "prof2": ["B"], "prof3": ["B"]},
-            {EP_ID_1_1: EP_1_1,
-             EP_ID_2_1: EP_2_1},
-            async=True,
-        )
-        self.mgr.get_and_incref("tag1",
-                                callback=self.on_ref_acquired,
-                                async=True)
-        self.step_mgr()
-        self.mgr.on_object_startup_complete("tag1",
-                                            self.created_refs["tag1"][0],
-                                            async=True)
-        self.step_mgr()
-        self.mgr.apply_snapshot(
-            {"prof1": ["tag1", "tag2"]},
-            {EP_ID_1_1: EP_1_1},
-            async=True,
-        )
-        self.step_mgr()
-        self.assertEqual(self.mgr.tags_by_prof_id,
-                         {"prof1": ["tag1", "tag2"]})
-        self.assertEqual(self.mgr.endpoint_data_by_ep_id,
-                         {EP_ID_1_1: EP_DATA_1_1})
-        ipset = self.acquired_refs["tag1"]
-        self.assertEqual(
-            ipset.replace_members.mock_calls,
-            [
-                call(set(['10.0.0.1']), force_reprogram=True, async=True),
-                call(set(['10.0.0.1']), force_reprogram=True, async=True),
-            ]
-        )
-
-    def test_apply_snapshot_forces_reprogram(self):
-        # Apply a snapshot but mock the finish call so that we can check that
-        # apply_snapshot set the flag...
-        self.mgr.apply_snapshot(
-            {"prof1": ["A"], "prof2": ["B"]},
-            {EP_ID_1_1: EP_1_1,
-             EP_ID_2_1: EP_2_1},
-            async=True,
-        )
-        # noinspection PyUnresolvedReferences
-        with patch.object(self.mgr, "_finish_msg_batch"):
-            self.step_actor(self.mgr)
-        self.assertTrue(self.mgr._force_reprogram)
-
-    def test_finish_msg_batch_clears_reprogram_flag(self):
-        # Apply a snapshot and step the actor for real, should clear the flag.
-        self.mgr.apply_snapshot(
-            {"prof1": ["A"]},
-            {EP_ID_1_1: EP_1_1},
-            async=True,
-        )
-        self.step_mgr()
-        self.assertFalse(self.mgr._force_reprogram)
+    #
+    # def test_finish_msg_batch_clears_reprogram_flag(self):
+    #     # Apply a snapshot and step the actor for real, should clear the flag.
+    #     self.mgr.apply_snapshot(
+    #         {"prof1": ["A"]},
+    #         {EP_ID_1_1: EP_1_1},
+    #         async=True,
+    #     )
+    #     self.step_mgr()
+    #     self.assertFalse(self.mgr._force_reprogram)
 
     def _notify_ready(self, tags):
         for tag in tags:
diff --git a/calico/felix/test/test_splitter.py b/calico/felix/test/test_splitter.py
index dfb9b7c39a..ca8ce8c146 100644
--- a/calico/felix/test/test_splitter.py
+++ b/calico/felix/test/test_splitter.py
@@ -55,118 +55,118 @@ def get_splitter(self):
             self.iptables_updaters,
             self.masq_manager
         )
-
-    def test_apply_whole_snapshot_clean(self):
-        """
-        Test that a whole snapshot applies cleanly to all managers.
-        """
-        # We apply a simple sentinel map. The exact map we use really shouldn't
-        # matter here. We do, however, use different ones for rules, tags, and
-        # endpoints.
-        rules = {'profileA': ['first rule', 'second rule']}
-        tags = {'profileA': ['first tag', 'second tag']}
-        endpoints = {'endpointA': 'endpoint object'}
-        ipv4_pools_by_id = {"10.0.0.1-5": {"cidr": "10.0.0.1/5",
-                                           "masquerade": True}}
-        s = self.get_splitter()
-
-        # Apply the snapshot and let it run.
-        s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
-        self.step_actor(s)
-
-        # At this point, each of our managers should have been notified (one
-        # call to apply_snapshot), but cleanup should not have occurred.
-        for mgr in self.ipsets_mgrs:
-            mgr.apply_snapshot.assertCalledOnceWith(
-                tags, endpoints, async=True
-            )
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.rules_mgrs:
-            mgr.apply_snapshot.assertCalledOnceWith(rules, async=True)
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.endpoint_mgrs:
-            mgr.apply_snapshot.assertCalledOnceWith(endpoints, async=True)
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.iptables_updaters:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        self.masq_manager.apply_snapshot.assert_called_once_with(
-            ipv4_pools_by_id, async=True)
-
-        # If we spin the scheduler again, we should begin cleanup.
-        # Warning: this might be a bit brittle, we may not be waiting long
-        # enough here, at least on busy machines.
-        gevent.sleep(0.1)
-        self.step_actor(s)
-
-        # Confirm that we cleaned up. Cleanup only affects the
-        # iptables_updaters and the ipsets_managers, so confirm the other
-        # managers got left alone.
-        for mgr in self.ipsets_mgrs:
-            mgr.cleanup.assertCalledOnceWith(async=False)
-        for mgr in self.rules_mgrs:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.endpoint_mgrs:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.iptables_updaters:
-            mgr.cleanup.assertCalledOnceWith(async=False)
-
-    def test_repeated_snapshots_clean_up_only_once(self):
-        """
-        Test that repeated snapshots only clean up once.
-        """
-        # We apply a simple sentinel map. The exact map we use really shouldn't
-        # matter here. We do, however, use different ones for rules, tags, and
-        # endpoints.
-        rules = {'profileA': ['first rule', 'second rule']}
-        tags = {'profileA': ['first tag', 'second tag']}
-        endpoints = {'endpointA': 'endpoint object'}
-        ipv4_pools_by_id = {}
-        s = self.get_splitter()
-
-        # Apply three snapshots and let them run. Because of batching logic,
-        # we should only need to spin the actor once.
-        s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
-        s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
-        s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
-        self.step_actor(s)
-
-        # At this point, each of our managers should have been notified (one
-        # call to apply_snapshot), but cleanup should not have occurred.
-        for mgr in self.ipsets_mgrs:
-            mgr.apply_snapshot.assertCalledWith(
-                tags, endpoints, async=True
-            )
-            self.assertEqual(mgr.apply_snapshot.call_count, 3)
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.rules_mgrs:
-            mgr.apply_snapshot.assertCalledWith(rules, async=True)
-            self.assertEqual(mgr.apply_snapshot.call_count, 3)
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.endpoint_mgrs:
-            mgr.apply_snapshot.assertCalledWith(endpoints, async=True)
-            self.assertEqual(mgr.apply_snapshot.call_count, 3)
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.iptables_updaters:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        self.assertEqual(self.masq_manager.apply_snapshot.call_count, 3)
-
-        # If we spin the scheduler again, we should begin cleanup.
-        # Warning: this might be a bit brittle, we may not be waiting long
-        # enough here, at least on busy machines.
-        gevent.sleep(0.1)
-        self.step_actor(s)
-
-        # Confirm that we cleaned up. Cleanup only affects the
-        # iptables_updaters and the ipsets_managagers, so confirm the other
-        # managers got left alone.
-        for mgr in self.ipsets_mgrs:
-            mgr.cleanup.assertCalledOnceWith(async=False)
-        for mgr in self.rules_mgrs:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.endpoint_mgrs:
-            self.assertEqual(mgr.cleanup.call_count, 0)
-        for mgr in self.iptables_updaters:
-            mgr.cleanup.assertCalledOnceWith(async=False)
+    #
+    # def test_apply_whole_snapshot_clean(self):
+    #     """
+    #     Test that a whole snapshot applies cleanly to all managers.
+    #     """
+    #     # We apply a simple sentinel map. The exact map we use really shouldn't
+    #     # matter here. We do, however, use different ones for rules, tags, and
+    #     # endpoints.
+    #     rules = {'profileA': ['first rule', 'second rule']}
+    #     tags = {'profileA': ['first tag', 'second tag']}
+    #     endpoints = {'endpointA': 'endpoint object'}
+    #     ipv4_pools_by_id = {"10.0.0.1-5": {"cidr": "10.0.0.1/5",
+    #                                        "masquerade": True}}
+    #     s = self.get_splitter()
+    #
+    #     # Apply the snapshot and let it run.
+    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
+    #     self.step_actor(s)
+    #
+    #     # At this point, each of our managers should have been notified (one
+    #     # call to apply_snapshot), but cleanup should not have occurred.
+    #     for mgr in self.ipsets_mgrs:
+    #         mgr.apply_snapshot.assertCalledOnceWith(
+    #             tags, endpoints, async=True
+    #         )
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.rules_mgrs:
+    #         mgr.apply_snapshot.assertCalledOnceWith(rules, async=True)
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.endpoint_mgrs:
+    #         mgr.apply_snapshot.assertCalledOnceWith(endpoints, async=True)
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.iptables_updaters:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     self.masq_manager.apply_snapshot.assert_called_once_with(
+    #         ipv4_pools_by_id, async=True)
+    #
+    #     # If we spin the scheduler again, we should begin cleanup.
+    #     # Warning: this might be a bit brittle, we may not be waiting long
+    #     # enough here, at least on busy machines.
+    #     gevent.sleep(0.1)
+    #     self.step_actor(s)
+    #
+    #     # Confirm that we cleaned up. Cleanup only affects the
+    #     # iptables_updaters and the ipsets_managers, so confirm the other
+    #     # managers got left alone.
+    #     for mgr in self.ipsets_mgrs:
+    #         mgr.cleanup.assertCalledOnceWith(async=False)
+    #     for mgr in self.rules_mgrs:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.endpoint_mgrs:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.iptables_updaters:
+    #         mgr.cleanup.assertCalledOnceWith(async=False)
+    #
+    # def test_repeated_snapshots_clean_up_only_once(self):
+    #     """
+    #     Test that repeated snapshots only clean up once.
+    #     """
+    #     # We apply a simple sentinel map. The exact map we use really shouldn't
+    #     # matter here. We do, however, use different ones for rules, tags, and
+    #     # endpoints.
+    #     rules = {'profileA': ['first rule', 'second rule']}
+    #     tags = {'profileA': ['first tag', 'second tag']}
+    #     endpoints = {'endpointA': 'endpoint object'}
+    #     ipv4_pools_by_id = {}
+    #     s = self.get_splitter()
+    #
+    #     # Apply three snapshots and let them run. Because of batching logic,
+    #     # we should only need to spin the actor once.
+    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
+    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
+    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
+    #     self.step_actor(s)
+    #
+    #     # At this point, each of our managers should have been notified (one
+    #     # call to apply_snapshot), but cleanup should not have occurred.
+    #     for mgr in self.ipsets_mgrs:
+    #         mgr.apply_snapshot.assertCalledWith(
+    #             tags, endpoints, async=True
+    #         )
+    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.rules_mgrs:
+    #         mgr.apply_snapshot.assertCalledWith(rules, async=True)
+    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.endpoint_mgrs:
+    #         mgr.apply_snapshot.assertCalledWith(endpoints, async=True)
+    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.iptables_updaters:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     self.assertEqual(self.masq_manager.apply_snapshot.call_count, 3)
+    #
+    #     # If we spin the scheduler again, we should begin cleanup.
+    #     # Warning: this might be a bit brittle, we may not be waiting long
+    #     # enough here, at least on busy machines.
+    #     gevent.sleep(0.1)
+    #     self.step_actor(s)
+    #
+    #     # Confirm that we cleaned up. Cleanup only affects the
+    #     # iptables_updaters and the ipsets_managagers, so confirm the other
+    #     # managers got left alone.
+    #     for mgr in self.ipsets_mgrs:
+    #         mgr.cleanup.assertCalledOnceWith(async=False)
+    #     for mgr in self.rules_mgrs:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.endpoint_mgrs:
+    #         self.assertEqual(mgr.cleanup.call_count, 0)
+    #     for mgr in self.iptables_updaters:
+    #         mgr.cleanup.assertCalledOnceWith(async=False)
 
     def test_cleanup_give_up_on_exception(self):
         """
diff --git a/calico/test/test_common.py b/calico/test/test_common.py
index ebcf59e993..901bdbb4f2 100644
--- a/calico/test/test_common.py
+++ b/calico/test/test_common.py
@@ -686,19 +686,6 @@ def test_validate_tags(self):
                                      "Invalid tag"):
             common.validate_tags(profile_id, ["value", "bad value"])
 
-    def test_greenlet_id(self):
-        def greenlet_run():
-            tid = common.greenlet_id()
-            return tid
-
-        tid = common.greenlet_id()
-        child = eventlet.spawn(greenlet_run)
-        child_tid = child.wait()
-        new_tid = common.greenlet_id()
-
-        self.assertTrue(child_tid > tid)
-        self.assertEqual(tid, new_tid)
-
     def test_validate_ipam_pool(self):
         self.assert_ipam_pool_valid({"cidr": "10/16", "foo": "bar"},
                                     {"cidr": "10.0.0.0/16"}, 4)
diff --git a/calico/test/test_geventutils.py b/calico/test/test_geventutils.py
new file mode 100644
index 0000000000..03ab3210b2
--- /dev/null
+++ b/calico/test/test_geventutils.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+test_geventutils
+~~~~~~~~~~~~~~~~
+
+Test code for gevent utility functions.
+"""
+
+import logging
+import gevent
+from calico import geventutils
+
+from calico.felix.test.base import BaseTestCase
+
+_log = logging.getLogger(__name__)
+
+
+class TestGreenletUtils(BaseTestCase):
+
+    def test_greenlet_id(self):
+        def greenlet_run():
+            tid = geventutils.greenlet_id()
+            return tid
+
+        tid = geventutils.greenlet_id()
+        child = gevent.spawn(greenlet_run)
+        child_tid = child.get()
+        new_tid = geventutils.greenlet_id()
+
+        self.assertTrue(child_tid > tid)
+        self.assertEqual(tid, new_tid)

From f46deebc42d19fe274c721b8296f83dd8c93611b Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 15:10:14 +0100
Subject: [PATCH 32/98] Add new deb dependencies.

---
 debian/control | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index 49db4c2863..fa862b1ad4 100644
--- a/debian/control
+++ b/debian/control
@@ -52,7 +52,11 @@ Depends:
  ${misc:Depends},
  ${python:Depends},
  ${shlibs:Depends},
- python-etcd (>= 0.4.1+calico.1)
+ python-etcd (>= 0.4.1+calico.1),
+ python-ijson (>= 2.2-1),
+ python-datrie (>= 0.7-1),
+ libyajl2 (>= 2.0.4-4),
+ libdatrie1 (>= 0.2.8-1)
 Description: Project Calico virtual networking for cloud data centers.
  Project Calico is an open source solution for virtual networking in
  cloud data centers. Its IP-centric architecture offers numerous

From 5b3d47678f83b55fa2f75219146b5459fe78a188 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 16:08:30 +0100
Subject: [PATCH 33/98] Defensively check for process adoption in driver.

---
 calico/etcddriver/__main__.py | 12 +++++++++++-
 calico/etcddriver/driver.py   | 22 +++++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index cba1198d7e..6ada8a6dac 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -24,6 +24,7 @@
 """
 
 import logging
+import os
 import socket
 import sys
 
@@ -32,6 +33,7 @@
 
 _log = logging.getLogger(__name__)
 
+last_ppid = os.getppid()
 default_logging(gevent_in_use=False)
 
 felix_sck = socket.socket(socket.AF_UNIX,
@@ -44,5 +46,13 @@
 
 driver = EtcdDriver(felix_sck)
 driver.start()
-driver.join()
+
+while not driver.join(timeout=1):
+    parent_pid = os.getppid()
+    # Defensive, just in case we don't get a socket error, check if the
+    # parent PID has changed, indicating that Felix has died.
+    if parent_pid == 1 or parent_pid != last_ppid:
+        _log.critical("Process adopted, assuming felix has died")
+        driver.stop()
+        break
 _log.critical("Driver shutting down.")
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index fe61b5ffed..097e100d84 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -103,9 +103,17 @@ def start(self):
         self._reader_thread.start()
         self._resync_thread.start()
 
-    def join(self):
-        """Blocks until the driver stops."""
-        self._stop_event.wait()
+    def join(self, timeout=None):
+        """
+        Blocks until the driver stops or until timeout expires.
+
+        :returns True if the driver stopped, False on timeout.
+        """
+        return self._stop_event.wait(timeout=None)
+
+    def stop(self):
+        _log.info("Stopping driver")
+        self._stop_event.set()
 
     def _read_from_socket(self):
         """
@@ -142,7 +150,7 @@ def _read_from_socket(self):
                         _log.warning("Unexpected message from Felix")
         finally:
             _log.error("Reader thread shutting down, triggering stop event")
-            self._stop_event.set()
+            self.stop()
 
     def _handle_init(self, msg):
         """
@@ -213,7 +221,7 @@ def _resync_and_merge(self):
                 self._process_events_only()
             except FelixWriteFailed:
                 _log.exception("Write to Felix failed; shutting down.")
-                self._stop_event.set()
+                self.stop()
             except WatcherDied:
                 _log.warning("Watcher died; resyncing.")
             except (urllib3.exceptions.HTTPError,
@@ -225,7 +233,7 @@ def _resync_and_merge(self):
                     time.sleep(1)
             except:
                 _log.exception("Unexpected exception; shutting down.")
-                self._stop_event.set()
+                self.stop()
                 raise
             finally:
                 self._first_resync = False
@@ -344,7 +352,7 @@ def _check_cluster_id(self, resp):
                     _log.error("etcd cluster ID changed from %s to %s.  "
                                "This invalidates our local state so Felix "
                                "must restart.", self._cluster_id, cluster_id)
-                    self._stop_event.set()
+                    self.stop()
                     raise DriverShutdown()
             else:
                 _log.info("First successful read from etcd.  Cluster ID: %s",

From 79597209baaa0381e47be7e330a8a64c84e422f9 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 16:49:34 +0100
Subject: [PATCH 34/98] Defensive: encode keys that use characters not allowed
 in the trie.

---
 calico/etcddriver/hwm.py           | 28 ++++++++++++---
 calico/etcddriver/test/__init__.py |  6 ++++
 calico/etcddriver/test/test_hwm.py | 56 ++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 calico/etcddriver/test/__init__.py
 create mode 100644 calico/etcddriver/test/test_hwm.py

diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index e1529216b9..20f0d4e35b 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -26,11 +26,12 @@
 
 from datrie import Trie
 import datrie
+import urllib
 
 _log = logging.getLogger(__name__)
 
-
-TRIE_CHARS = string.ascii_letters + string.digits + "/_-:."
+TRIE_SYMBOLS = "/_-:."
+TRIE_CHARS = string.ascii_letters + string.digits + TRIE_SYMBOLS + "%"
 TRIE_CHARS_MATCH = re.compile(r'^[%s]+$' % re.escape(TRIE_CHARS))
 
 
@@ -143,12 +144,31 @@ def remove_old_keys(self, hwm_limit):
 
 
 def encode_key(key):
-    # FIXME May have to be more lenient
-    assert TRIE_CHARS_MATCH.match(key)
+    """
+    Encode an etcd key for use in the trie.
+
+    This does three things:
+    * Encodes any characters that are not supported by the trie using
+      %-encoding.
+    * Adds a trailing slash if not present.  This prevents /foobar/baz from
+      being seen as a subtree of /foo/.
+    * Converts the result to a unicode string, which is what is required
+      by the trie.
+
+    Since our datamodel specifies the characters that are allowed, the first
+    operation should be a no-op on most keys but it's better to be tolerant
+    here than to blow up.
+    """
     if key[-1] != "/":
         key += "/"
+    key = unicode(urllib.quote(key.encode("utf8"), safe=TRIE_SYMBOLS))
+    assert TRIE_CHARS_MATCH.match(key)
     return key
 
 
 def decode_key(key):
+    """
+    Reverses the encoding done by encode_key.
+    """
+    key = urllib.unquote(key.encode("utf8")).decode("utf8")
     return key[:-1]
diff --git a/calico/etcddriver/test/__init__.py b/calico/etcddriver/test/__init__.py
new file mode 100644
index 0000000000..edc7577a0c
--- /dev/null
+++ b/calico/etcddriver/test/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Metaswitch Networks 2015. All rights reserved.
+
+import logging
+
+_log = logging.getLogger(__name__)
+
diff --git a/calico/etcddriver/test/test_hwm.py b/calico/etcddriver/test/test_hwm.py
new file mode 100644
index 0000000000..64b9bca9bb
--- /dev/null
+++ b/calico/etcddriver/test/test_hwm.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+test_hwm
+~~~~~~~~
+
+Tests for high water mark tracking function.
+"""
+
+import logging
+from unittest import TestCase
+from mock import Mock, call, patch
+from calico.etcddriver import hwm
+
+_log = logging.getLogger(__name__)
+
+
+class TestHighWaterTracker(TestCase):
+    pass
+
+
+class TestKeyEncoding(TestCase):
+    def test_encode_key(self):
+        self.assert_enc_dec("/calico/v1/foo/bar", "/calico/v1/foo/bar/")
+
+        self.assert_enc_dec("/:_-./foo", "/:_-./foo/")
+        self.assert_enc_dec("/:_-.~/foo", "/:_-.%7E/foo/")
+        self.assert_enc_dec("/%/foo", "/%25/foo/")
+        self.assert_enc_dec(u"/\u01b1/foo", "/%C6%B1/foo/")
+
+    def assert_enc_dec(self, key, expected_encoding):
+        encoded = hwm.encode_key(key)
+        self.assertEqual(
+            encoded,
+            expected_encoding,
+            msg="Expected %r to encode as %r but got %r" %
+                (key, expected_encoding, encoded))
+        decoded = hwm.decode_key(encoded)
+        self.assertEqual(
+            decoded,
+            key,
+            msg="Expected %r to decode as %r but got %r" %
+                (encoded, key, decoded))
+

From e3314bbd6884e91f63e2e152321c0eeceb340af6 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 17:04:07 +0100
Subject: [PATCH 35/98] Fix that in-sync was logged at error.

---
 calico/felix/profilerules.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/calico/felix/profilerules.py b/calico/felix/profilerules.py
index 0c5926555a..d07e31f631 100644
--- a/calico/felix/profilerules.py
+++ b/calico/felix/profilerules.py
@@ -71,8 +71,8 @@ def _maybe_start(self, obj_id, in_sync=False):
     @actor_message()
     def on_datamodel_in_sync(self):
         if not self._datamodel_in_sync:
-            _log.error("%s: datamodel now in sync, unblocking profile startup",
-                       self)
+            _log.info("%s: datamodel now in sync, unblocking profile startup",
+                      self)
             self._datamodel_in_sync = True
             self._maybe_start_all()
 

From c9875eac5a55b52375ead74d38e92ccee864ab90 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 23 Oct 2015 17:04:27 +0100
Subject: [PATCH 36/98] Add msgpack dependency.

---
 debian/control | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index fa862b1ad4..b8020b786d 100644
--- a/debian/control
+++ b/debian/control
@@ -56,7 +56,8 @@ Depends:
  python-ijson (>= 2.2-1),
  python-datrie (>= 0.7-1),
  libyajl2 (>= 2.0.4-4),
- libdatrie1 (>= 0.2.8-1)
+ libdatrie1 (>= 0.2.8-1),
+ python-msgpack (>= 0.4.2-1)
 Description: Project Calico virtual networking for cloud data centers.
  Project Calico is an open source solution for virtual networking in
  cloud data centers. Its IP-centric architecture offers numerous

From 0f08fa1099814f9e3be6bfa91189d6a6c060b93f Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 26 Oct 2015 13:23:10 +0000
Subject: [PATCH 37/98] Add versions to felix_requirements to match built
 packages.

---
 felix_requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/felix_requirements.txt b/felix_requirements.txt
index ff1ff6148b..260108f1fa 100644
--- a/felix_requirements.txt
+++ b/felix_requirements.txt
@@ -3,6 +3,6 @@ greenlet
 netaddr
 python-etcd>=0.4.1
 posix-spawn>=0.2.post6
-datrie
-ijson
-msgpack-python
+datrie>=0.7
+ijson>=2.2
+msgpack-python>=0.3

From 2a7afe4ae44630b1af067d2e6fc48c94afd89ff4 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 26 Oct 2015 13:42:58 +0000
Subject: [PATCH 38/98] Rev build number for upgrade testing.

---
 debian/changelog | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 7b0880a2ef..041d939056 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+calico (1.3.0~~smc.1-1) trusty; urgency=medium
+
+  * Private pre-release build.
+
+ -- Shaun Crampton <shaun@projectcalico.org>  Mon, 26 Oct 2015 13:41:00 +0100
+
 calico (1.2.0-1)  trusty; urgency=medium
 
   * Truncate long output from FailedSystemCall exception.

From b2dbd9645e6e91c686e34f7cb7d9f92400f0b686 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 26 Oct 2015 16:03:11 +0000
Subject: [PATCH 39/98] Trigger status cleanup and hosts ipset updates from
 in-sync message.

---
 calico/felix/fetcd.py           | 113 ++++++--------------------------
 calico/felix/test/test_fetcd.py |   9 ++-
 2 files changed, 25 insertions(+), 97 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 058abbee8b..f7a858be90 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -293,6 +293,9 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         self._status_reporter = status_reporter
         self.hosts_ipset = hosts_ipset
 
+        # Whether we've been in sync with etcd at some point.
+        self._been_in_sync = False
+
         # Keep track of the config loaded from etcd so we can spot if it
         # changes.
         self.last_global_config = None
@@ -482,7 +485,10 @@ def _on_status_from_driver(self, msg):
         _log.info("etcd driver status changed to %s", status)
         if status == STATUS_IN_SYNC:
             self.begin_polling.wait()  # Make sure splitter is set.
+            self._been_in_sync = True
             self.splitter.on_datamodel_in_sync(async=True)
+            self._update_hosts_ipset()
+            self.clean_up_endpoint_statuses()
 
     def start_driver(self):
         _log.info("Creating server socket.")
@@ -516,95 +522,10 @@ def start_driver(self):
 
         return update_conn
 
-    #
-    # def _on_snapshot_loaded(self, etcd_snapshot_response):
-    #     """
-    #     Loads a snapshot from etcd and passes it to the update splitter.
-    #
-    #     :raises ResyncRequired: if the Ready flag is not set in the snapshot.
-    #     """
-    #     start_time = monotonic_time()
-    #     rules_by_id = {}
-    #     tags_by_id = {}
-    #     endpoints_by_id = {}
-    #     ipv4_pools_by_id = {}
-    #     self.endpoint_ids_per_host.clear()
-    #     self.ipv4_by_hostname.clear()
-    #     still_ready = False
-    #     for child in etcd_snapshot_response.children:
-    #         trie_key = [intern(s.encode("utf8")) for s in
-    #                     child.key.split("/")][2:]
-    #         if trie.get(trie_key) == child.modifiedIndex and "host" in trie_key:
-    #             continue
-    #         trie[trie_key] = child.modifiedIndex
-    #
-    #         profile_id, rules = parse_if_rules(child)
-    #         if profile_id:
-    #             rules_by_id[profile_id] = rules
-    #             continue
-    #         profile_id, tags = parse_if_tags(child)
-    #         if profile_id:
-    #             tags_by_id[profile_id] = tags
-    #             continue
-    #         endpoint_id, endpoint = parse_if_endpoint(self._config, child)
-    #         if endpoint_id and endpoint:
-    #             endpoints_by_id[endpoint_id] = endpoint
-    #             self.endpoint_ids_per_host[endpoint_id.host].add(endpoint_id)
-    #             continue
-    #         pool_id, pool = parse_if_ipam_v4_pool(child)
-    #         if pool_id and pool:
-    #             ipv4_pools_by_id[pool_id] = pool
-    #             continue
-    #         if self._config.IP_IN_IP_ENABLED:
-    #             hostname, ip = parse_if_host_ip(child)
-    #             if hostname and ip:
-    #                 self.ipv4_by_hostname[hostname] = ip
-    #                 continue
-    #
-    #         # Double-check the flag hasn't changed since we read it before.
-    #         if child.key == READY_KEY:
-    #             if child.value == "true":
-    #                 still_ready = True
-    #             else:
-    #                 _log.warning("Aborting resync because ready flag was"
-    #                              "unset since we read it.")
-    #                 raise ResyncRequired()
-    #
-    #     if not still_ready:
-    #         _log.warn("Aborting resync; ready flag no longer present.")
-    #         raise ResyncRequired()
-    #
-    #     # We now know exactly which endpoints are on this host, use that to
-    #     # clean up any endpoint statuses that should now be gone.
-    #     our_endpoints_ids = self.endpoint_ids_per_host[self._config.HOSTNAME]
-    #     self.clean_up_endpoint_statuses(our_endpoints_ids)
-    #
-    #     # Actually apply the snapshot. This does not return anything, but
-    #     # just sends the relevant messages to the relevant threads to make
-    #     # all the processing occur.
-    #     _log.info("Snapshot parsed in %.2fs, passing to update splitter",
-    #               monotonic_time() - start_time)
-    #     self.splitter.apply_snapshot(rules_by_id,
-    #                                  tags_by_id,
-    #                                  endpoints_by_id,
-    #                                  ipv4_pools_by_id,
-    #                                  async=True)
-    #     if self._config.IP_IN_IP_ENABLED:
-    #         # We only support IPv4 for host tracking right now so there's not
-    #         # much point in going via the splitter.
-    #         # FIXME Support IP-in-IP for IPv6.
-    #         _log.info("Sending (%d) host IPs to ipset.",
-    #                   len(self.ipv4_by_hostname))
-    #         self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
-    #                                          async=True)
-
-    def clean_up_endpoint_statuses(self, our_endpoints_ids):
+    def clean_up_endpoint_statuses(self):
         """
         Mark any endpoint status reports for non-existent endpoints
         for cleanup.
-
-        :param set our_endpoints_ids: Set of endpoint IDs for endpoints on
-               this host.
         """
         if not self._config.REPORT_ENDPOINT_STATUS:
             _log.debug("Endpoint status reporting disabled, ignoring.")
@@ -619,12 +540,11 @@ def clean_up_endpoint_statuses(self, our_endpoints_ids):
         except EtcdKeyNotFound:
             _log.info("No endpoint statuses found, nothing to clean up")
         else:
+            # Mark all statuses we find as dirty.  This will result in any
+            # unknown endpoints being cleaned up.
             for node in response.leaves:
                 combined_id = get_endpoint_id_from_key(node.key)
-                if combined_id and combined_id not in our_endpoints_ids:
-                    # We found an endpoint in our status reporting tree that
-                    # wasn't in the main tree.  Mark it as dirty so the status
-                    # reporting thread will clean it up.
+                if combined_id:
                     _log.debug("Endpoint %s removed by resync, marking "
                                "status key for cleanup",
                                combined_id)
@@ -699,8 +619,7 @@ def on_host_ip_set(self, response, hostname):
             _log.warning("Invalid IP for hostname %s: %s, treating as "
                          "deletion", hostname, response.value)
             self.ipv4_by_hostname.pop(hostname, None)
-        self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
-                                         async=True)
+        self._update_hosts_ipset()
 
     def on_host_ip_delete(self, response, hostname):
         if not self._config.IP_IN_IP_ENABLED:
@@ -708,8 +627,14 @@ def on_host_ip_delete(self, response, hostname):
                        response.key)
             return
         if self.ipv4_by_hostname.pop(hostname, None):
-            self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
-                                             async=True)
+            self._update_hosts_ipset()
+
+    def _update_hosts_ipset(self):
+        if not self._been_in_sync:
+            _log.debug("Deferring update to hosts ipset until we're in-sync")
+            return
+        self.hosts_ipset.replace_members(self.ipv4_by_hostname.values(),
+                                         async=True)
 
     def _on_config_updated(self, response, config_param):
         new_value = response.value
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index ea4bbcb210..fbb1d5015f 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -261,6 +261,7 @@ def test_host_ip_set(self):
         """
         Test set for the IP of a host.
         """
+        self.watcher._been_in_sync = True
         self.dispatch("/calico/v1/host/foo/bird_ip",
                       action="set", value="10.0.0.1")
         self.m_hosts_ipset.replace_members.assert_called_once_with(
@@ -284,6 +285,7 @@ def test_host_ip_del(self):
         """
         Test set for the IP of a host.
         """
+        self.watcher._been_in_sync = True
         self.dispatch("/calico/v1/host/foo/bird_ip",
                       action="set", value="10.0.0.1")
         self.m_hosts_ipset.reset_mock()
@@ -298,6 +300,7 @@ def test_host_ip_invalid(self):
         """
         Test set for the IP of a host.
         """
+        self.watcher._been_in_sync = True
         self.dispatch("/calico/v1/host/foo/bird_ip",
                       action="set", value="10.0.0.1")
         self.m_hosts_ipset.reset_mock()
@@ -345,7 +348,7 @@ def test_clean_up_endpoint_status(self):
             empty_dir,
             missing_ep,
         ]
-        self.watcher.clean_up_endpoint_statuses(set([ep_id]))
+        self.watcher.clean_up_endpoint_statuses()
 
         # Missing endpoint should have been marked for cleanup.
         self.m_status_rep.mark_endpoint_dirty.assert_called_once_with(
@@ -359,13 +362,13 @@ def test_clean_up_endpoint_status(self):
     def test_clean_up_endpoint_status_not_found(self):
         self.m_config.REPORT_ENDPOINT_STATUS = True
         self.client.read.side_effect = etcd.EtcdKeyNotFound()
-        self.watcher.clean_up_endpoint_statuses(set())
+        self.watcher.clean_up_endpoint_statuses()
         self.assertFalse(self.m_status_rep.mark_endpoint_dirty.called)
 
     def test_clean_up_endpoint_status_disabled(self):
         self.m_config.REPORT_ENDPOINT_STATUS = False
         self.client.read.side_effect = self.failureException
-        self.watcher.clean_up_endpoint_statuses(set())
+        self.watcher.clean_up_endpoint_statuses()
 
 
 class TestEtcdReporting(BaseTestCase):

From 73ae15ad1c83abb5b114d75b978fc2c812e759a9 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 11:20:08 +0000
Subject: [PATCH 40/98] Fix python-msgpack version.

---
 debian/control | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index b8020b786d..b1536919f7 100644
--- a/debian/control
+++ b/debian/control
@@ -57,7 +57,7 @@ Depends:
  python-datrie (>= 0.7-1),
  libyajl2 (>= 2.0.4-4),
  libdatrie1 (>= 0.2.8-1),
- python-msgpack (>= 0.4.2-1)
+ python-msgpack (>= 0.3.0-1ubuntu3)
 Description: Project Calico virtual networking for cloud data centers.
  Project Calico is an open source solution for virtual networking in
  cloud data centers. Its IP-centric architecture offers numerous

From f8691f41015b1b199d8de9dbe7b6e57a99bf9463 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 12:08:46 +0000
Subject: [PATCH 41/98] Tidy up fetcd.py:

* FelixEtcdWatcher no longer an EtcdWatcher subclass.
* Move status cleanup to the StatusReporter.
* Remove ResyncRequired exception.
---
 calico/datamodel_v1.py          |   6 +
 calico/etcddriver/driver.py     |  12 ++
 calico/felix/fetcd.py           | 265 +++++++++++++++-----------------
 calico/felix/test/test_fetcd.py |  98 ++++++------
 4 files changed, 192 insertions(+), 189 deletions(-)

diff --git a/calico/datamodel_v1.py b/calico/datamodel_v1.py
index 77c51499a4..e9b40720e5 100644
--- a/calico/datamodel_v1.py
+++ b/calico/datamodel_v1.py
@@ -190,6 +190,12 @@ def path_for_status(self):
     def __str__(self):
         return self.__class__.__name__ + ("<%s>" % self.endpoint)
 
+    def __repr__(self):
+        return self.__class__.__name__ + ("(%r,%r,%r,%r)" % (self.host,
+                                                             self.orchestrator,
+                                                             self.workload,
+                                                             self.endpoint))
+
     def __eq__(self, other):
         if other is self:
             return True
diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 097e100d84..f303fcf033 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -61,6 +61,18 @@
 
 FLUSH_THRESHOLD = 200
 
+# TODO: trigger immediate resync if these are deleted?
+# RESYNC_KEYS = [
+#     VERSION_DIR,
+#     POLICY_DIR,
+#     PROFILE_DIR,
+#     CONFIG_DIR,
+#     HOST_DIR,
+#     IPAM_DIR,
+#     IPAM_V4_DIR,
+#     POOL_V4_DIR,
+# ]
+
 
 class EtcdDriver(object):
     def __init__(self, felix_sck):
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index f7a858be90..ed4145c6d8 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -27,7 +27,6 @@
 import socket
 import subprocess
 import msgpack
-import time
 from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 
@@ -41,15 +40,15 @@
 from calico.datamodel_v1 import (VERSION_DIR, CONFIG_DIR,
                                  RULES_KEY_RE, TAGS_KEY_RE,
                                  dir_for_per_host_config,
-                                 PROFILE_DIR, HOST_DIR, EndpointId, POLICY_DIR,
+                                 PROFILE_DIR, HOST_DIR, EndpointId,
                                  HOST_IP_KEY_RE, IPAM_V4_CIDR_KEY_RE,
                                  key_for_last_status, key_for_status,
                                  FELIX_STATUS_DIR, get_endpoint_id_from_key,
                                  dir_for_felix_status, ENDPOINT_STATUS_ERROR,
                                  ENDPOINT_STATUS_DOWN, ENDPOINT_STATUS_UP)
 from calico.etcdutils import (
-    EtcdClientOwner, EtcdWatcher, ResyncRequired,
-    delete_empty_parents)
+    EtcdClientOwner, delete_empty_parents, PathDispatcher
+)
 from calico.felix.actor import Actor, actor_message
 from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
                                  iso_utc_timestamp, IPV4, IPV6)
@@ -79,17 +78,6 @@
 POOL_V4_DIR = IPAM_V4_DIR + "/pool"
 CIDR_V4_KEY = POOL_V4_DIR + "/<pool_id>"
 
-RESYNC_KEYS = [
-    VERSION_DIR,
-    POLICY_DIR,
-    PROFILE_DIR,
-    CONFIG_DIR,
-    HOST_DIR,
-    IPAM_DIR,
-    IPAM_V4_DIR,
-    POOL_V4_DIR,
-]
-
 # Max number of events from driver process before we yield to another greenlet.
 MAX_EVENTS_BEFORE_YIELD = 200
 
@@ -268,26 +256,17 @@ def _on_worker_died(self, watch_greenlet):
         sys.exit(1)
 
 
-class _FelixEtcdWatcher(EtcdWatcher, gevent.Greenlet):
+class _FelixEtcdWatcher(gevent.Greenlet):
     """
-    Greenlet that watches the etcd data model for changes.
-
-    (1) Waits for the load_config event to be triggered.
-    (2) Connects to etcd and waits for the Ready flag to be set,
-        indicating the data model is consistent.
-    (3) Loads the config from etcd and passes it to the config object.
-    (4) Waits for the begin_polling Event to be triggered.
-    (5) Loads a complete snapshot from etcd and passes it to the
-        UpdateSplitter.
-    (6) Watches etcd for changes, sending them incrementally to the
-        UpdateSplitter.
-    (On etcd error) starts again from step (5)
-
-    This greenlet is expected to be managed by the EtcdAPI Actor.
+    Greenlet that communicates with the etcd driver over a socket.
+
+    * Handles initial configuration of the driver.
+    * Processes the initial config responses.
+    * Then fans out the stream of updates.
     """
 
     def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
-        super(_FelixEtcdWatcher, self).__init__(config.ETCD_ADDR, VERSION_DIR)
+        super(_FelixEtcdWatcher, self).__init__()
         self._config = config
         self._etcd_api = etcd_api
         self._status_reporter = status_reporter
@@ -316,6 +295,12 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         # Next-hop IP addresses of our hosts, if populated in etcd.
         self.ipv4_by_hostname = {}
 
+        # Forces a resync after the current poll if set.  Safe to set from
+        # another thread.  Automatically reset to False after the resync is
+        # triggered.
+        self.resync_after_current_poll = False  # FIXME Periodic resync
+        self.dispatcher = PathDispatcher()
+
         # Register for events when values change.
         self._register_paths()
 
@@ -330,11 +315,7 @@ def _register_paths(self):
         deletion, we have to handle deletes for lots of directories that
         we otherwise wouldn't care about.
         """
-        reg = self.register_path
-        # Top-level directories etc.  If these go away, stop polling and
-        # resync.
-        for key in RESYNC_KEYS:
-            reg(key, on_del=self._resync)
+        reg = self.dispatcher.register
         # Profiles and their contents.
         reg(TAGS_KEY, on_set=self.on_tags_set, on_del=self.on_tags_delete)
         reg(RULES_KEY, on_set=self.on_rules_set, on_del=self.on_rules_delete)
@@ -361,63 +342,45 @@ def _register_paths(self):
 
     @logging_exceptions
     def _run(self):
-        """
-        Greenlet main loop: loads the initial dump from etcd and then
-        monitors for changes and feeds them to the splitter.
-        """
+        _log.info("Waiting for load_config event...")
         self.load_config.wait()
-        self.loop()
-
-    @logging_exceptions
-    def loop(self):
-        _log.info("Started %s loop", self)
-        while not self._stopped:
-            try:
-                _log.info("Reconnecting and loading snapshot from etcd...")
-                self.reconnect(copy_cluster_id=False)
-
-                driver_sck = self.start_driver()
-                unpacker = msgpack.Unpacker()
-                msgs_processed = 0
-                while True:
-                    data = driver_sck.recv(16384)
-                    unpacker.feed(data)
-                    for msg in unpacker:
-                        # Optimization: put update first in the "switch"
-                        # block because it's on the critical path.
-                        msg_type = msg[MSG_KEY_TYPE]
-                        if msg_type == MSG_TYPE_UPDATE:
-                            self.begin_polling.wait()
-                            self._on_update_from_driver(msg)
-                        elif msg_type == MSG_TYPE_CONFIG_LOADED:
-                            self._on_config_loaded_from_driver(msg, driver_sck)
-                        elif msg_type == MSG_TYPE_STATUS:
-                            self._on_status_from_driver(msg)
-                        else:
-                            raise RuntimeError("Unexpected message %s" % msg)
-                        msgs_processed += 1
-                        if msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
-                            # Yield to ensure that other actors make progress.
-                            # Sleep must be non-zero to work around gevent
-                            # issue where we could be immediately rescheduled.
-                            gevent.sleep(0.000001)
-
-            except EtcdException as e:
-                # Most likely a timeout or other error in the pre-resync;
-                # start over.  These exceptions have good semantic error text
-                # so the stack trace would just add log spam.
-                _log.error("Unexpected IO or etcd error, triggering "
-                           "resync with etcd: %r.", e)
-                time.sleep(1)  # Prevent tight loop due to unexpected error.
-            except:
-                _log.exception("Exception reading from socket?")
-                raise
+        _log.info("...load_config set.  Starting driver read %s loop", self)
+        driver_sck = self.start_driver()
+        unpacker = msgpack.Unpacker()
+        msgs_processed = 0
+        while True:
+            data = driver_sck.recv(16384)
+            unpacker.feed(data)
+            for msg in unpacker:
+                # Optimization: put update first in the "switch"
+                # block because it's on the critical path.
+                msg_type = msg[MSG_KEY_TYPE]
+                if msg_type == MSG_TYPE_UPDATE:
+                    self.begin_polling.wait()
+                    self._on_update_from_driver(msg)
+                elif msg_type == MSG_TYPE_CONFIG_LOADED:
+                    self._on_config_loaded_from_driver(msg, driver_sck)
+                elif msg_type == MSG_TYPE_STATUS:
+                    self._on_status_from_driver(msg)
+                else:
+                    raise RuntimeError("Unexpected message %s" % msg)
+                msgs_processed += 1
+                if msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
+                    # Yield to ensure that other actors make progress.
+                    # Sleep must be non-zero to work around gevent
+                    # issue where we could be immediately rescheduled.
+                    gevent.sleep(0.000001)
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
     def _on_update_from_driver(self, msg):
-        assert self.configured.is_set()
+        """
+        Called when the driver sends us a key/value pair update.
+        :param dict msg: The message recived from the driver.
+        """
+        assert self.configured.is_set(), "Received update before config"
         key = msg[MSG_KEY_KEY]
         value = msg[MSG_KEY_VALUE]
+        _log.debug("Update from driver: %s -> %s", key, value)
         self.read_count += 1
         if self.read_count % 1000 == 0:
             now = monotonic_time()
@@ -425,16 +388,24 @@ def _on_update_from_driver(self, msg):
             _log.info("Processed %s updates from driver "
                       "%.1f/s", self.read_count, 1000.0 / delta)
             self.last_rate_log_time = now
+        # Create a fake etcd node object.
+        # FIXME: avoid creating fake node.
         n = Node()
         n.action = "set" if value is not None else "delete"
         n.value = value
         n.key = key
-        try:
-            self.dispatcher.handle_event(n)
-        except ResyncRequired:
-            _log.warning("IGNORING RESYNC.")
+        # And dispatch it.
+        self.dispatcher.handle_event(n)
 
     def _on_config_loaded_from_driver(self, msg, driver_sck):
+        """
+        Called when we receive a config loaded message from the driver.
+
+        Responds to the driver immediately with a config response.
+
+        If the config has changed since a previous call, triggers Felix
+        to die.
+        """
         global_config = msg[MSG_KEY_GLOBAL_CONFIG]
         host_config = msg[MSG_KEY_HOST_CONFIG]
         _log.info("Config loaded by driver:\n"
@@ -467,7 +438,7 @@ def _on_config_loaded_from_driver(self, msg, driver_sck):
             # Config now fully resolved, inform the driver.
             felix_log_file = self._config.LOGFILE
             if felix_log_file:
-                # FIXME PRoper config for driver logfile
+                # FIXME Proper config for driver logfile
                 driver_log_file = felix_log_file + "-driver"
             else:
                 driver_log_file = None
@@ -481,16 +452,30 @@ def _on_config_loaded_from_driver(self, msg, driver_sck):
             self.configured.set()
 
     def _on_status_from_driver(self, msg):
+        """
+        Called when we receive a status update from the driver.
+
+        If the status is in-sync, triggers the relevant processing.
+        :param msg:
+        :return:
+        """
         status = msg[MSG_KEY_STATUS]
         _log.info("etcd driver status changed to %s", status)
         if status == STATUS_IN_SYNC:
+            # We're now in sync, tell the splitter so that we can complete
+            # our start-of day cleanup etc.
             self.begin_polling.wait()  # Make sure splitter is set.
             self._been_in_sync = True
             self.splitter.on_datamodel_in_sync(async=True)
+            self._status_reporter.clean_up_endpoint_statuses(async=True)
             self._update_hosts_ipset()
-            self.clean_up_endpoint_statuses()
 
     def start_driver(self):
+        """
+        Starts the driver subprocess, connects to it over the socket
+        and sends it the init message.
+        :return: the connected socket to the driver.
+        """
         _log.info("Creating server socket.")
         try:
             os.unlink("/run/felix-driver.sck")
@@ -522,50 +507,6 @@ def start_driver(self):
 
         return update_conn
 
-    def clean_up_endpoint_statuses(self):
-        """
-        Mark any endpoint status reports for non-existent endpoints
-        for cleanup.
-        """
-        if not self._config.REPORT_ENDPOINT_STATUS:
-            _log.debug("Endpoint status reporting disabled, ignoring.")
-            return
-
-        our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME,
-                                 "workload"])
-        try:
-            # Grab all the existing status reports.
-            response = self.client.read(our_host_dir,
-                                        recursive=True)
-        except EtcdKeyNotFound:
-            _log.info("No endpoint statuses found, nothing to clean up")
-        else:
-            # Mark all statuses we find as dirty.  This will result in any
-            # unknown endpoints being cleaned up.
-            for node in response.leaves:
-                combined_id = get_endpoint_id_from_key(node.key)
-                if combined_id:
-                    _log.debug("Endpoint %s removed by resync, marking "
-                               "status key for cleanup",
-                               combined_id)
-                    self._status_reporter.mark_endpoint_dirty(combined_id,
-                                                              async=True)
-                elif node.dir:
-                    # This leaf is an empty directory, try to clean it up.
-                    # This is safe even if another thread is adding keys back
-                    # into the directory.
-                    _log.debug("Found empty directory %s, cleaning up",
-                               node.key)
-                    delete_empty_parents(self.client, node.key, our_host_dir)
-
-    def _resync(self, response, **kwargs):
-        """
-        Force a resync.
-        :raises ResyncRequired: always.
-        """
-        _log.warning("Resync triggered due to change to %s", response.key)
-        raise ResyncRequired()
-
     def on_endpoint_set(self, response, hostname, orchestrator,
                         workload_id, endpoint_id):
         """Handler for endpoint updates, passes the update to the splitter."""
@@ -680,6 +621,7 @@ def __init__(self, config):
         self._newer_dirty_endpoints = set()
         self._older_dirty_endpoints = set()
 
+        self._cleanup_pending = False
         self._timer_scheduled = False
         self._reporting_allowed = True
 
@@ -724,6 +666,14 @@ def _mark_endpoint_dirty(self, endpoint_id):
             _log.debug("Marking endpoint %s dirty", endpoint_id)
             self._newer_dirty_endpoints.add(endpoint_id)
 
+    @actor_message()
+    def clean_up_endpoint_statuses(self):
+        """
+        Note that we need to do cleanup.  We'll then try/retry from
+        _finish_msg_batch().
+        """
+        self._cleanup_pending = True
+
     def _finish_msg_batch(self, batch, results):
         if not self._config.REPORT_ENDPOINT_STATUS:
             _log.error("StatusReporter called even though status reporting "
@@ -733,6 +683,15 @@ def _finish_msg_batch(self, batch, results):
             self._newer_dirty_endpoints.clear()
             self._older_dirty_endpoints.clear()
             return
+
+        if self._cleanup_pending:
+            try:
+                self._attempt_cleanup()
+            except EtcdException as e:
+                _log.error("Cleanup failed: %r", e)
+            else:
+                self._cleanup_pending = False
+
         if self._reporting_allowed:
             # We're not rate limited, go ahead and do a write to etcd.
             _log.debug("Status reporting is allowed by rate limit.")
@@ -758,8 +717,9 @@ def _finish_msg_batch(self, batch, results):
                 # Reset the rate limit flag.
                 self._reporting_allowed = False
 
-        if not self._timer_scheduled and not self._reporting_allowed:
-            # Schedule a timer to stop our rate limiting.
+        if not self._timer_scheduled and ((not self._reporting_allowed) or
+                                          self._cleanup_pending):
+            # Schedule a timer to stop our rate limiting or retry cleanup.
             timeout = self._config.ENDPOINT_REPORT_DELAY
             timeout *= 0.9 + (random.random() * 0.2)  # Jitter by +/- 10%.
             gevent.spawn_later(timeout,
@@ -767,6 +727,33 @@ def _finish_msg_batch(self, batch, results):
                                async=True)
             self._timer_scheduled = True
 
+    def _attempt_cleanup(self):
+        our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME,
+                                 "workload"])
+        try:
+            # Grab all the existing status reports.
+            response = self.client.read(our_host_dir,
+                                        recursive=True)
+        except EtcdKeyNotFound:
+            _log.info("No endpoint statuses found, nothing to clean up")
+        else:
+            # Mark all statuses we find as dirty.  This will result in any
+            # unknown endpoints being cleaned up.
+            for node in response.leaves:
+                combined_id = get_endpoint_id_from_key(node.key)
+                if combined_id:
+                    _log.debug("Endpoint %s removed by resync, marking "
+                               "status key for cleanup",
+                               combined_id)
+                    self._mark_endpoint_dirty(combined_id)
+                elif node.dir:
+                    # This leaf is an empty directory, try to clean it up.
+                    # This is safe even if another thread is adding keys back
+                    # into the directory.
+                    _log.debug("Found empty directory %s, cleaning up",
+                               node.key)
+                    delete_empty_parents(self.client, node.key, our_host_dir)
+
     def _write_endpoint_status_to_etcd(self, ep_id, status):
         """
         Try to actually write the status dict into etcd or delete the key
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index fbb1d5015f..225208f56a 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -25,7 +25,7 @@
 from calico.felix.config import Config
 from calico.felix.futils import IPV4, IPV6
 from calico.felix.ipsets import IpsetActor
-from calico.felix.fetcd import (_FelixEtcdWatcher, ResyncRequired, EtcdAPI,
+from calico.felix.fetcd import (_FelixEtcdWatcher, EtcdAPI,
     die_and_restart, EtcdStatusReporter, combine_statuses)
 from calico.felix.splitter import UpdateSplitter
 from calico.felix.test.base import BaseTestCase, JSONString
@@ -151,13 +151,6 @@ def setUp(self):
         self.client = Mock()
         self.watcher.client = self.client
 
-    def test_resync_flag(self):
-        self.watcher.resync_after_current_poll = True
-        self.watcher.next_etcd_index = 1
-        self.assertRaises(ResyncRequired,
-                          self.watcher.wait_for_etcd_event)
-        self.assertFalse(self.watcher.resync_after_current_poll)
-
     def test_endpoint_set(self):
         self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e1",
                       "set", value=ENDPOINT_STR)
@@ -328,48 +321,6 @@ def dispatch(self, key, action, value=None):
         m_response.value = value
         self.watcher.dispatcher.handle_event(m_response)
 
-    def test_clean_up_endpoint_status(self):
-        self.m_config.REPORT_ENDPOINT_STATUS = True
-        ep_id = EndpointId("hostname",
-                           "openstack",
-                           "workloadid",
-                           "endpointid")
-
-        empty_dir = Mock()
-        empty_dir.key = ("/calico/felix/v1/host/hostname/workload/"
-                         "openstack/foobar")
-        empty_dir.dir = True
-
-        missing_ep = Mock()
-        missing_ep.key = ("/calico/felix/v1/host/hostname/workload/"
-                          "openstack/aworkload/endpoint/anendpoint")
-
-        self.client.read.return_value.leaves = [
-            empty_dir,
-            missing_ep,
-        ]
-        self.watcher.clean_up_endpoint_statuses()
-
-        # Missing endpoint should have been marked for cleanup.
-        self.m_status_rep.mark_endpoint_dirty.assert_called_once_with(
-            EndpointId("hostname",
-                       "openstack",
-                       "aworkload",
-                       "anendpoint"),
-            async=True
-        )
-
-    def test_clean_up_endpoint_status_not_found(self):
-        self.m_config.REPORT_ENDPOINT_STATUS = True
-        self.client.read.side_effect = etcd.EtcdKeyNotFound()
-        self.watcher.clean_up_endpoint_statuses()
-        self.assertFalse(self.m_status_rep.mark_endpoint_dirty.called)
-
-    def test_clean_up_endpoint_status_disabled(self):
-        self.m_config.REPORT_ENDPOINT_STATUS = False
-        self.client.read.side_effect = self.failureException
-        self.watcher.clean_up_endpoint_statuses()
-
 
 class TestEtcdReporting(BaseTestCase):
     def setUp(self):
@@ -635,3 +586,50 @@ def assert_combined_status(self, a, b, expected):
             self.assertEqual(result, expected,
                              "Expected %r and %r to combine to %s but got %r" %
                              (lhs, rhs, expected, result))
+
+    def test_clean_up_endpoint_status(self):
+        self.m_config.REPORT_ENDPOINT_STATUS = True
+        ep_id = EndpointId("foo",
+                           "openstack",
+                           "workloadid",
+                           "endpointid")
+
+        empty_dir = Mock()
+        empty_dir.key = ("/calico/felix/v1/host/foo/workload/"
+                         "openstack/foobar")
+        empty_dir.dir = True
+
+        missing_ep = Mock()
+        missing_ep.key = ("/calico/felix/v1/host/foo/workload/"
+                          "openstack/aworkload/endpoint/anendpoint")
+
+        self.m_client.read.return_value.leaves = [
+            empty_dir,
+            missing_ep,
+        ]
+        with patch.object(self.rep, "_mark_endpoint_dirty") as m_mark:
+            self.rep.clean_up_endpoint_statuses(async=True)
+            self.step_actor(self.rep)
+
+            # Missing endpoint should have been marked for cleanup.
+            m_mark.assert_called_once_with(
+                EndpointId("foo",
+                           "openstack",
+                           "aworkload",
+                           "anendpoint")
+            )
+
+    def test_clean_up_endpoint_status_not_found(self):
+        self.m_config.REPORT_ENDPOINT_STATUS = True
+        self.m_client.read.side_effect = etcd.EtcdKeyNotFound()
+        with patch.object(self.rep, "_mark_endpoint_dirty") as m_mark:
+            self.rep.clean_up_endpoint_statuses(async=True)
+            self.step_actor(self.rep)
+            self.assertFalse(m_mark.called)
+
+    def test_clean_up_endpoint_status_disabled(self):
+        self.m_config.REPORT_ENDPOINT_STATUS = False
+        self.m_client.read.side_effect = self.failureException
+        self.rep.clean_up_endpoint_statuses(async=True)
+        self.step_actor(self.rep)
+

From 906a0e5b29f8638ff9ae8c934980fb3b8f7d4199 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 14:49:49 +0000
Subject: [PATCH 42/98] Implement periodic resync.

---
 calico/etcddriver/driver.py   | 15 +++++++++++++++
 calico/etcddriver/protocol.py |  3 +++
 calico/felix/fetcd.py         | 20 ++++++++++++++++----
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index f303fcf033..8725b726b4 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -110,6 +110,10 @@ def __init__(self, felix_sck):
         # from Felix.  Triggers the first resync.
         self._config_received = Event()
 
+        # Flag to request a resync.  Set by the reader thread, polled by the
+        # resync and merge thread.
+        self._resync_requested = False
+
     def start(self):
         """Starts the driver's reader and resync threads."""
         self._reader_thread.start()
@@ -158,6 +162,8 @@ def _read_from_socket(self):
                         self._handle_init(msg)
                     elif msg_type == MSG_TYPE_CONFIG:
                         self._handle_config(msg)
+                    elif msg_type == MSG_TYPE_RESYNC:
+                        self._handle_resync(msg)
                     else:
                         _log.warning("Unexpected message from Felix")
         finally:
@@ -191,6 +197,10 @@ def _handle_config(self, msg):
         self._config_received.set()
         _log.info("Received config from Felix: %s", msg)
 
+    def _handle_resync(self, msg):
+        _log.info("Got resync message from felix: %s", msg)
+        self._resync_requested = True
+
     def _resync_and_merge(self):
         """
         Thread: Resync-and-merge thread.  Loads the etcd snapshot, merges
@@ -249,6 +259,7 @@ def _resync_and_merge(self):
                 raise
             finally:
                 self._first_resync = False
+                self._resync_requested = False
 
     def _wait_for_ready(self):
         """
@@ -453,10 +464,14 @@ def _handle_next_watcher_event(self):
         :raises DriverShutdown:
         :raises WatcherDied:
         :raises FelixWriteFailed:
+        :raises ResyncRequested:
         """
         if self._watcher_queue is None:
             raise WatcherDied()
         while not self._stop_event.is_set():
+            if self._resync_requested and self._watcher_stop_event:
+                _log.info("Resync requested, triggering one.")
+                self._watcher_stop_event.set()
             try:
                 event = self._watcher_queue.get(timeout=1)
             except Empty:
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 105314947d..2f42c1bd66 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -45,6 +45,9 @@
 STATUS_RESYNC = "resync"
 STATUS_IN_SYNC = "in-sync"
 
+# Force resync message Felix->Driver.
+MSG_TYPE_RESYNC = "resync"
+
 # Update message Driver -> Felix.
 MSG_TYPE_UPDATE = "u"
 MSG_KEY_KEY = "k"
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index ed4145c6d8..c03d97c27e 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -27,6 +27,7 @@
 import socket
 import subprocess
 import msgpack
+import select
 from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 
@@ -240,7 +241,7 @@ def force_resync(self, reason="unknown"):
         :param str reason: Optional reason to log out.
         """
         _log.info("Forcing a resync with etcd.  Reason: %s.", reason)
-        self._watcher.resync_after_current_poll = True
+        self._watcher.resync_requested = True
 
         if self._config.REPORT_ENDPOINT_STATUS:
             _log.info("Endpoint status reporting enabled, marking existing "
@@ -298,7 +299,7 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         # Forces a resync after the current poll if set.  Safe to set from
         # another thread.  Automatically reset to False after the resync is
         # triggered.
-        self.resync_after_current_poll = False  # FIXME Periodic resync
+        self.resync_requested = False
         self.dispatcher = PathDispatcher()
 
         # Register for events when values change.
@@ -349,8 +350,12 @@ def _run(self):
         unpacker = msgpack.Unpacker()
         msgs_processed = 0
         while True:
-            data = driver_sck.recv(16384)
-            unpacker.feed(data)
+            # Use select to impose a timeout on how long we block so that we
+            # periodically check the resync flag.
+            read_ready, _, _ = select.select([driver_sck], [], [], 1)
+            if read_ready:
+                data = driver_sck.recv(16384)
+                unpacker.feed(data)
             for msg in unpacker:
                 # Optimization: put update first in the "switch"
                 # block because it's on the critical path.
@@ -370,6 +375,13 @@ def _run(self):
                     # Sleep must be non-zero to work around gevent
                     # issue where we could be immediately rescheduled.
                     gevent.sleep(0.000001)
+            if self.resync_requested:
+                self.resync_requested = False
+                driver_sck.sendall(
+                    msgpack.dumps({
+                        MSG_KEY_TYPE: MSG_TYPE_RESYNC,
+                    })
+                )
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
     def _on_update_from_driver(self, msg):

From 5eae9ed4a2cf31079aa76d65b5b960a303a7cc43 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 14:49:59 +0000
Subject: [PATCH 43/98] Fic incorrect use of send() vs sendall().

---
 calico/felix/fetcd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index c03d97c27e..fa71c91b98 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -454,7 +454,7 @@ def _on_config_loaded_from_driver(self, msg, driver_sck):
                 driver_log_file = felix_log_file + "-driver"
             else:
                 driver_log_file = None
-            driver_sck.send(msgpack.dumps({
+            driver_sck.sendall(msgpack.dumps({
                 MSG_KEY_TYPE: MSG_TYPE_CONFIG,
                 MSG_KEY_LOG_FILE: driver_log_file,
                 MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,

From e3a946f44c3f257d5cbb84b18801407ff11dd9c3 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 15:17:13 +0000
Subject: [PATCH 44/98] Make sure status messages are flushed immediately.

---
 calico/etcddriver/driver.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 8725b726b4..b4e40744db 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -223,6 +223,7 @@ def _resync_and_merge(self):
                 self._resync_http_pool = self.get_etcd_connection()
                 # Before we get to the snapshot, Felix needs the configuration.
                 self._queue_status(STATUS_WAIT_FOR_READY)
+                self._flush()  # Send the status message immediately.
                 self._wait_for_ready()
                 self._preload_config()
                 # Now (on the first run through) wait for Felix to process the
@@ -230,6 +231,7 @@ def _resync_and_merge(self):
                 self._config_received.wait()
                 # Kick off the snapshot request as far as the headers.
                 self._queue_status(STATUS_RESYNC)
+                self._flush()  # Send the status message immediately.
                 resp, snapshot_index = self._start_snapshot_request()
                 # Before reading from the snapshot, start the watcher thread.
                 self._start_watcher(snapshot_index)
@@ -238,8 +240,7 @@ def _resync_and_merge(self):
                 self._process_snapshot_and_events(resp, snapshot_index)
                 # We're now in-sync.  Tell Felix.
                 self._queue_status(STATUS_IN_SYNC)
-                # Make sure we flush before we wait for events.
-                self._flush()
+                self._flush()  # Send the status message immediately.
                 self._process_events_only()
             except FelixWriteFailed:
                 _log.exception("Write to Felix failed; shutting down.")
@@ -554,11 +555,13 @@ def _queue_update_msg(self, key, value):
         self._maybe_flush()
 
     def _queue_status(self, status):
+        """
+        Queues the given status to felix as a status message.
+        """
         self._buf.write(msgpack.dumps({
             MSG_KEY_TYPE: MSG_TYPE_STATUS,
             MSG_KEY_STATUS: status,
         }))
-        self._maybe_flush()
 
     def _maybe_flush(self):
         self._updates_pending += 1

From ed037c8736e0bd4d094cb8732610e14391b38fa2 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 15:17:49 +0000
Subject: [PATCH 45/98] Ensure start-of-dat cleanup only runs once when we
 Felix moves to being in sync.

---
 calico/etcddriver/driver.py |  1 +
 calico/felix/fetcd.py       | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index b4e40744db..403b1ba9ad 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -473,6 +473,7 @@ def _handle_next_watcher_event(self):
             if self._resync_requested and self._watcher_stop_event:
                 _log.info("Resync requested, triggering one.")
                 self._watcher_stop_event.set()
+                raise WatcherDied()
             try:
                 event = self._watcher_queue.get(timeout=1)
             except Empty:
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index fa71c91b98..0bf8e53a26 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -473,13 +473,14 @@ def _on_status_from_driver(self, msg):
         """
         status = msg[MSG_KEY_STATUS]
         _log.info("etcd driver status changed to %s", status)
-        if status == STATUS_IN_SYNC:
-            # We're now in sync, tell the splitter so that we can complete
-            # our start-of day cleanup etc.
+        if status == STATUS_IN_SYNC and not self._been_in_sync:
+            # We're now in sync, tell the Actors that need to do start-of-day
+            # cleanup.
             self.begin_polling.wait()  # Make sure splitter is set.
             self._been_in_sync = True
             self.splitter.on_datamodel_in_sync(async=True)
-            self._status_reporter.clean_up_endpoint_statuses(async=True)
+            if self._config.REPORT_ENDPOINT_STATUS:
+                self._status_reporter.clean_up_endpoint_statuses(async=True)
             self._update_hosts_ipset()
 
     def start_driver(self):
@@ -688,8 +689,8 @@ def clean_up_endpoint_statuses(self):
 
     def _finish_msg_batch(self, batch, results):
         if not self._config.REPORT_ENDPOINT_STATUS:
-            _log.error("StatusReporter called even though status reporting "
-                       "disabled.  Ignoring.")
+            _log.warning("StatusReporter called even though status reporting "
+                         "disabled.  Ignoring.")
             self._endpoint_status[IPV4].clear()
             self._endpoint_status[IPV6].clear()
             self._newer_dirty_endpoints.clear()

From e02cbaaabe2757b7cbbff63764dd6edd7da2879e Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 15:39:05 +0000
Subject: [PATCH 46/98] Trigger resync if the whole /calico/v1 directory
 disappears.

---
 calico/etcddriver/driver.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 403b1ba9ad..c5799d074b 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -34,6 +34,8 @@
 import errno
 from httplib import HTTPException
 from io import BytesIO
+from calico.etcdutils import ACTION_MAPPING
+
 try:
     import simplejson as json
 except ImportError:
@@ -54,25 +56,15 @@
 from calico.common import complete_logging
 from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
-from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, dir_for_per_host_config
+from calico.datamodel_v1 import (
+    READY_KEY, CONFIG_DIR, dir_for_per_host_config, VERSION_DIR
+)
 from calico.etcddriver.hwm import HighWaterTracker
 
 _log = logging.getLogger(__name__)
 
 FLUSH_THRESHOLD = 200
 
-# TODO: trigger immediate resync if these are deleted?
-# RESYNC_KEYS = [
-#     VERSION_DIR,
-#     POLICY_DIR,
-#     PROFILE_DIR,
-#     CONFIG_DIR,
-#     HOST_DIR,
-#     IPAM_DIR,
-#     IPAM_V4_DIR,
-#     POOL_V4_DIR,
-# ]
-
 
 class EtcdDriver(object):
     def __init__(self, felix_sck):
@@ -642,11 +634,28 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                         break
                     node = etcd_resp["node"]
                     key = node["key"]
+                    action = ACTION_MAPPING[etcd_resp["action"]]
+                    is_dir = node.get("dir", False)
                     value = node.get("value")
+                    if is_dir:
+                        if action != "delete":
+                            # Just ignore sets to directories, we only track
+                            # leaves.
+                            _log.debug("Skipping non-delete to dir %s", key)
+                            continue
+                        else:
+                            if key == VERSION_DIR:
+                                # Special case: if the whole keyspace is
+                                # deleted, that implies the ready flag is gone
+                                # too; resync rather than generating deletes
+                                # for every key.
+                                _log.warning("Whole %s deleted, resyncing",
+                                             VERSION_DIR)
+                                break
                     modified_index = node["modifiedIndex"]
                 except (KeyError, TypeError, ValueError):
                     _log.exception("Unexpected format for etcd response: %r;"
-                                   "trigering a resync.",
+                                   "triggering a resync.",
                                    resp_body)
                     break
                 else:

From fa5ef318c4a9661c7b0694f69ac3fbc265904881 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 27 Oct 2015 16:56:12 +0000
Subject: [PATCH 47/98] Fix UT broken by variable rename.

---
 calico/felix/test/test_fetcd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 225208f56a..97b07afa34 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -124,7 +124,7 @@ def test_force_resync(self, m_spawn, m_etcd_watcher):
             api.force_resync(async=True)
             self.step_actor(api)
         m_status_rep.resync.assert_called_once_with(async=True)
-        self.assertTrue(m_etcd_watcher.return_value.resync_after_current_poll)
+        self.assertTrue(m_etcd_watcher.return_value.resync_requested)
 
 
 class ExpectedException(Exception):

From 6c56d3c8d2056ba5b01d270729f99912bad57781 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 13:10:05 +0000
Subject: [PATCH 48/98] clean up fetcd.py.  Add stats, comments and logging.

---
 calico/felix/fetcd.py | 118 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 21 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 0bf8e53a26..797ad7aa91 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -52,7 +52,7 @@
 )
 from calico.felix.actor import Actor, actor_message
 from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
-                                 iso_utc_timestamp, IPV4, IPV6)
+                                 iso_utc_timestamp, IPV4, IPV6, StatCounter)
 
 _log = logging.getLogger(__name__)
 
@@ -83,6 +83,10 @@
 MAX_EVENTS_BEFORE_YIELD = 200
 
 
+# Global diagnostic counters.
+_stats = StatCounter("Etcd counters")
+
+
 class EtcdAPI(EtcdClientOwner, Actor):
     """
     Our API to etcd.
@@ -153,6 +157,7 @@ def _periodically_resync(self):
                        "seconds.", sleep_time)
             gevent.sleep(sleep_time)
             self.force_resync(reason="periodic resync", async=True)
+            _stats.increment("Periodic resync")
 
     @logging_exceptions
     def _periodically_report_status(self):
@@ -218,7 +223,7 @@ def load_config(self):
         """
         Loads our config from etcd, should only be called once.
 
-        :return: an event which is triggered when the config has been loaded.
+        :return: an Event which is triggered when the config has been loaded.
         """
         self._watcher.load_config.set()
         return self._watcher.configured
@@ -305,6 +310,8 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         # Register for events when values change.
         self._register_paths()
 
+        self._driver_process = None
+
         self.read_count = 0
         self.last_rate_log_time = monotonic_time()
 
@@ -343,29 +350,44 @@ def _register_paths(self):
 
     @logging_exceptions
     def _run(self):
+        # Don't do anything until we're told to load the config.
         _log.info("Waiting for load_config event...")
         self.load_config.wait()
         _log.info("...load_config set.  Starting driver read %s loop", self)
-        driver_sck = self.start_driver()
+        # Start the driver process and wait for it to connect back to our
+        # socket.
+        self._driver_sck = self.start_driver()
+        # Loop reading from the socket and processing messages.
         unpacker = msgpack.Unpacker()
         msgs_processed = 0
         while True:
             # Use select to impose a timeout on how long we block so that we
-            # periodically check the resync flag.
-            read_ready, _, _ = select.select([driver_sck], [], [], 1)
+            # periodically check the resync flag below.
+            read_ready, _, _ = select.select([self._driver_sck], [], [], 1)
             if read_ready:
-                data = driver_sck.recv(16384)
+                # Socket has some data to read so this call shouldn't block.
+                data = self._driver_sck.recv(16384)
+                if not data:
+                    # No data indicates an orderly shutdown of the socket,
+                    # which shouldn't happen.
+                    _log.critical("Driver closed the socket. Felix must exit.")
+                    die_and_restart()
+                # Feed the data into the Unpacker, if it has enough data it
+                # will then generate some messages.  Otherwise we'll loop
+                # again.
                 unpacker.feed(data)
             for msg in unpacker:
-                # Optimization: put update first in the "switch"
-                # block because it's on the critical path.
+                # Optimization: put update first in the "switch" block because
+                # it's on the critical path.
                 msg_type = msg[MSG_KEY_TYPE]
                 if msg_type == MSG_TYPE_UPDATE:
-                    self.begin_polling.wait()
+                    _stats.increment("Update messages from driver")
                     self._on_update_from_driver(msg)
                 elif msg_type == MSG_TYPE_CONFIG_LOADED:
-                    self._on_config_loaded_from_driver(msg, driver_sck)
+                    _stats.increment("Config loaded messages from driver")
+                    self._on_config_loaded_from_driver(msg)
                 elif msg_type == MSG_TYPE_STATUS:
+                    _stats.increment("Status messages from driver")
                     self._on_status_from_driver(msg)
                 else:
                     raise RuntimeError("Unexpected message %s" % msg)
@@ -376,23 +398,41 @@ def _run(self):
                     # issue where we could be immediately rescheduled.
                     gevent.sleep(0.000001)
             if self.resync_requested:
+                _log.info("Resync requested, sending resync request to driver")
                 self.resync_requested = False
-                driver_sck.sendall(
+                self._driver_sck.sendall(
                     msgpack.dumps({
                         MSG_KEY_TYPE: MSG_TYPE_RESYNC,
                     })
                 )
+            # Check that the driver hasn't died.  The recv() call should
+            # raise an exception when the buffer runs dry but this usually
+            # gets hit first.
+            if self._driver_process.poll() is not None:
+                _log.critical("Driver process died with RC = %s.  Felix must "
+                              "exit.", self._driver_process.poll())
+                die_and_restart()
         _log.info("%s.loop() stopped due to self.stop == True", self)
 
     def _on_update_from_driver(self, msg):
         """
         Called when the driver sends us a key/value pair update.
-        :param dict msg: The message recived from the driver.
+
+        After the initial handshake, the stream of events consists
+        entirely of updates unless something happens to change the
+        state of the driver.
+
+        :param dict msg: The message received from the driver.
         """
         assert self.configured.is_set(), "Received update before config"
+        # The driver starts polling immediately, make sure we block until
+        # everyone else is ready to receive updates.
+        self.begin_polling.wait()
+        # Unpack the message.
         key = msg[MSG_KEY_KEY]
         value = msg[MSG_KEY_VALUE]
         _log.debug("Update from driver: %s -> %s", key, value)
+        # Output some very coarse stats.
         self.read_count += 1
         if self.read_count % 1000 == 0:
             now = monotonic_time()
@@ -409,11 +449,15 @@ def _on_update_from_driver(self, msg):
         # And dispatch it.
         self.dispatcher.handle_event(n)
 
-    def _on_config_loaded_from_driver(self, msg, driver_sck):
+    def _on_config_loaded_from_driver(self, msg):
         """
         Called when we receive a config loaded message from the driver.
 
-        Responds to the driver immediately with a config response.
+        This message is expected once per resync, when the config is
+        pre-loaded by the driver.
+
+        On the first call, responds to the driver synchronously with a
+        config response.
 
         If the config has changed since a previous call, triggers Felix
         to die.
@@ -454,7 +498,7 @@ def _on_config_loaded_from_driver(self, msg, driver_sck):
                 driver_log_file = felix_log_file + "-driver"
             else:
                 driver_log_file = None
-            driver_sck.sendall(msgpack.dumps({
+            self._driver_sck.sendall(msgpack.dumps({
                 MSG_KEY_TYPE: MSG_TYPE_CONFIG,
                 MSG_KEY_LOG_FILE: driver_log_file,
                 MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,
@@ -467,9 +511,19 @@ def _on_status_from_driver(self, msg):
         """
         Called when we receive a status update from the driver.
 
+        The driver sends us status messages whenever its status changes.
+        It moves through these states:
+
+        * wait-for-ready (waiting for the global ready flag to become set)
+        * resync (resyncing with etcd, processing a snapshot and any
+          concurrent events)
+        * in-sync (snapshot processsing complete, now processing only events
+          from etcd)
+
+        If it falls out of sync with etcd then it moves back into
+        wait-for-ready state and starts again.
+
         If the status is in-sync, triggers the relevant processing.
-        :param msg:
-        :return:
         """
         status = msg[MSG_KEY_STATUS]
         _log.info("etcd driver status changed to %s", status)
@@ -487,6 +541,10 @@ def start_driver(self):
         """
         Starts the driver subprocess, connects to it over the socket
         and sends it the init message.
+
+        Stores the Popen object in self._driver_process for future
+        access.
+
         :return: the connected socket to the driver.
         """
         _log.info("Creating server socket.")
@@ -498,10 +556,11 @@ def start_driver(self):
                                       socket.SOCK_STREAM)
         update_socket.bind("/run/felix-driver.sck")
         update_socket.listen(1)
-        subprocess.Popen([sys.executable,
-                          "-m",
-                          "calico.etcddriver",
-                          "/run/felix-driver.sck"])
+        self._driver_process = subprocess.Popen([sys.executable,
+                                                 "-m",
+                                                 "calico.etcddriver",
+                                                 "/run/felix-driver.sck"])
+        _log.info("Started etcd driver with PID %s", self._driver_process.pid)
         update_conn, _ = update_socket.accept()
         _log.info("Accepted connection on socket")
         # No longer need the server socket, remove it.
@@ -526,6 +585,7 @@ def on_endpoint_set(self, response, hostname, orchestrator,
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s updated", combined_id)
+        _stats.increment("Endpoint created/updated")
         endpoint = parse_endpoint(self._config, combined_id, response.value)
         self.splitter.on_endpoint_update(combined_id, endpoint, async=True)
 
@@ -535,11 +595,13 @@ def on_endpoint_delete(self, response, hostname, orchestrator,
         combined_id = EndpointId(hostname, orchestrator, workload_id,
                                  endpoint_id)
         _log.debug("Endpoint %s deleted", combined_id)
+        _stats.increment("Endpoint deleted")
         self.splitter.on_endpoint_update(combined_id, None, async=True)
 
     def on_rules_set(self, response, profile_id):
         """Handler for rules updates, passes the update to the splitter."""
         _log.debug("Rules for %s set", profile_id)
+        _stats.increment("Rules created/updated")
         rules = parse_rules(profile_id, response.value)
         profile_id = intern(profile_id.encode("utf8"))
         self.splitter.on_rules_update(profile_id, rules, async=True)
@@ -547,11 +609,13 @@ def on_rules_set(self, response, profile_id):
     def on_rules_delete(self, response, profile_id):
         """Handler for rules deletes, passes the update to the splitter."""
         _log.debug("Rules for %s deleted", profile_id)
+        _stats.increment("Rules deleted")
         self.splitter.on_rules_update(profile_id, None, async=True)
 
     def on_tags_set(self, response, profile_id):
         """Handler for tags updates, passes the update to the splitter."""
         _log.debug("Tags for %s set", profile_id)
+        _stats.increment("Tags created/updated")
         rules = parse_tags(profile_id, response.value)
         profile_id = intern(profile_id.encode("utf8"))
         self.splitter.on_tags_update(profile_id, rules, async=True)
@@ -559,6 +623,7 @@ def on_tags_set(self, response, profile_id):
     def on_tags_delete(self, response, profile_id):
         """Handler for tags deletes, passes the update to the splitter."""
         _log.debug("Tags for %s deleted", profile_id)
+        _stats.increment("Tags deleted")
         self.splitter.on_tags_update(profile_id, None, async=True)
 
     def on_host_ip_set(self, response, hostname):
@@ -566,6 +631,7 @@ def on_host_ip_set(self, response, hostname):
             _log.debug("Ignoring update to %s because IP-in-IP is disabled",
                        response.key)
             return
+        _stats.increment("Host IP created/updated")
         ip = parse_host_ip(hostname, response.value)
         if ip:
             self.ipv4_by_hostname[hostname] = ip
@@ -580,6 +646,7 @@ def on_host_ip_delete(self, response, hostname):
             _log.debug("Ignoring update to %s because IP-in-IP is disabled",
                        response.key)
             return
+        _stats.increment("Host IP deleted")
         if self.ipv4_by_hostname.pop(hostname, None):
             self._update_hosts_ipset()
 
@@ -596,11 +663,13 @@ def _on_config_updated(self, response, config_param):
             _log.critical("Global config value %s updated.  Felix must be "
                           "restarted.", config_param)
             die_and_restart()
+        _stats.increment("Global config (non) updates")
 
     def _on_host_config_updated(self, response, hostname, config_param):
         if hostname != self._config.HOSTNAME:
             _log.debug("Ignoring config update for host %s", hostname)
             return
+        _stats.increment("Per-host config created/updated")
         new_value = response.value
         if self.last_host_config.get(config_param) != new_value:
             _log.critical("Global config value %s updated.  Felix must be "
@@ -608,10 +677,12 @@ def _on_host_config_updated(self, response, hostname, config_param):
             die_and_restart()
 
     def on_ipam_v4_pool_set(self, response, pool_id):
+        _stats.increment("IPAM pool created/updated")
         pool = parse_ipam_pool(pool_id, response.value)
         self.splitter.on_ipam_pool_update(pool_id, pool, async=True)
 
     def on_ipam_v4_pool_delete(self, response, pool_id):
+        _stats.increment("IPAM pool deleted")
         self.splitter.on_ipam_pool_update(pool_id, None, async=True)
 
 
@@ -642,8 +713,10 @@ def __init__(self, config):
     def on_endpoint_status_changed(self, endpoint_id, ip_type, status):
         assert isinstance(endpoint_id, EndpointId)
         if status is not None:
+            _stats.increment("Endpoint status deleted")
             self._endpoint_status[ip_type][endpoint_id] = status
         else:
+            _stats.increment("Endpoint status updated")
             self._endpoint_status[ip_type].pop(endpoint_id, None)
         self._mark_endpoint_dirty(endpoint_id)
 
@@ -702,7 +775,9 @@ def _finish_msg_batch(self, batch, results):
                 self._attempt_cleanup()
             except EtcdException as e:
                 _log.error("Cleanup failed: %r", e)
+                _stats.increment("Status report cleanup failed")
             else:
+                _stats.increment("Status report cleanup done")
                 self._cleanup_pending = False
 
         if self._reporting_allowed:
@@ -772,6 +847,7 @@ def _write_endpoint_status_to_etcd(self, ep_id, status):
         Try to actually write the status dict into etcd or delete the key
         if it is no longer needed.
         """
+        _stats.increment("Per-port status report etcd writes")
         status_key = ep_id.path_for_status
         if status:
             _log.debug("Writing endpoint status %s = %s", ep_id, status)

From f3ce0d3b748ff2f42ecf09df10cafb2a414369cb Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 14:00:35 +0000
Subject: [PATCH 49/98] Move Felix socket handling into utility classes.  Clean
 up EtcdEvent class.

---
 calico/etcddriver/protocol.py | 52 ++++++++++++++++++++
 calico/etcdutils.py           | 10 ++++
 calico/felix/fetcd.py         | 90 ++++++++++++-----------------------
 3 files changed, 92 insertions(+), 60 deletions(-)

diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 2f42c1bd66..9f34449095 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -18,6 +18,11 @@
 
 Protocol constants for Felix <-> Driver protocol.
 """
+import logging
+import msgpack
+import select
+
+_log = logging.getLogger(__name__)
 
 MSG_KEY_TYPE = "type"
 
@@ -52,3 +57,50 @@
 MSG_TYPE_UPDATE = "u"
 MSG_KEY_KEY = "k"
 MSG_KEY_VALUE = "v"
+
+
+class SocketClosed(Exception):
+    pass
+
+
+class MessageWriter(object):
+    def __init__(self, sck):
+        self._sck = sck
+
+    def send_message(self, msg_type, fields=None):
+        msg = {MSG_KEY_TYPE: msg_type}
+        if fields:
+            msg.update(fields)
+        self._sck.sendall(msgpack.dumps(msg))
+
+
+class MessageReader(object):
+    def __init__(self, sck):
+        self._sck = sck
+        self._unpacker = msgpack.Unpacker()
+
+    def new_messages(self, timeout=None):
+        """
+        Generator: generates 0 or more tuples containing message type and
+        message body (as a dict).
+
+        :param timeout: Maximum time to block waiting on the socket before
+               giving up.  No exception is raised upon timeout but 0 events
+               are generated.
+        :raises SocketClosed if the socket is closed.
+        """
+        if timeout is not None:
+            read_ready, _, _ = select.select([self._sck], [], [], 1)
+            if not read_ready:
+                return
+        data = self._sck.recv(16384)
+        if not data:
+            # No data indicates an orderly shutdown of the socket,
+            # which shouldn't happen.
+            _log.error("Socket closed by other end.")
+            raise SocketClosed()
+        # Feed the data into the Unpacker, if it has enough data it will then
+        # generate some messages.
+        self._unpacker.feed(data)
+        for msg in self._unpacker:
+            yield msg[MSG_KEY_TYPE], msg
diff --git a/calico/etcdutils.py b/calico/etcdutils.py
index 2584f4e9ad..cc6efec254 100644
--- a/calico/etcdutils.py
+++ b/calico/etcdutils.py
@@ -1,4 +1,5 @@
 # Copyright (c) Metaswitch Networks 2015. All rights reserved.
+from collections import namedtuple
 
 import logging
 import re
@@ -54,6 +55,12 @@ def register(self, path, on_set=None, on_del=None):
             node["delete"] = on_del
 
     def handle_event(self, response):
+        """
+        :param EtcdEvent|EtcdResponse: Either a python-etcd response object
+               for a watch response or an instance of our dedicated EtcdEvent
+               class, which we use when deserialising an event that came over
+               the etcd driver socket.
+        """
         _log.debug("etcd event %s for key %s", response.action, response.key)
         key_parts = response.key.strip("/").split("/")
         self._handle(key_parts, response, self.handler_root, {})
@@ -80,6 +87,9 @@ def _handle(self, key_parts, response, handler_node, captures):
                        action, response.key, handler_node)
 
 
+EtcdEvent = namedtuple("EtcdEvent", ["action", "key", "value"])
+
+
 class EtcdClientOwner(object):
     """
     Base class for objects that own an etcd Client.  Supports
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 797ad7aa91..a27eac30f8 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -26,8 +26,6 @@
 import logging
 import socket
 import subprocess
-import msgpack
-import select
 from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 
@@ -48,8 +46,8 @@
                                  dir_for_felix_status, ENDPOINT_STATUS_ERROR,
                                  ENDPOINT_STATUS_DOWN, ENDPOINT_STATUS_UP)
 from calico.etcdutils import (
-    EtcdClientOwner, delete_empty_parents, PathDispatcher
-)
+    EtcdClientOwner, delete_empty_parents, PathDispatcher,
+    EtcdEvent)
 from calico.felix.actor import Actor, actor_message
 from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
                                  iso_utc_timestamp, IPV4, IPV6, StatCounter)
@@ -356,30 +354,13 @@ def _run(self):
         _log.info("...load_config set.  Starting driver read %s loop", self)
         # Start the driver process and wait for it to connect back to our
         # socket.
-        self._driver_sck = self.start_driver()
+        self._msg_reader, self._msg_writer = self.start_driver()
         # Loop reading from the socket and processing messages.
-        unpacker = msgpack.Unpacker()
         msgs_processed = 0
         while True:
-            # Use select to impose a timeout on how long we block so that we
-            # periodically check the resync flag below.
-            read_ready, _, _ = select.select([self._driver_sck], [], [], 1)
-            if read_ready:
-                # Socket has some data to read so this call shouldn't block.
-                data = self._driver_sck.recv(16384)
-                if not data:
-                    # No data indicates an orderly shutdown of the socket,
-                    # which shouldn't happen.
-                    _log.critical("Driver closed the socket. Felix must exit.")
-                    die_and_restart()
-                # Feed the data into the Unpacker, if it has enough data it
-                # will then generate some messages.  Otherwise we'll loop
-                # again.
-                unpacker.feed(data)
-            for msg in unpacker:
+            for msg_type, msg in self._msg_reader.new_messages(timeout=1):
                 # Optimization: put update first in the "switch" block because
                 # it's on the critical path.
-                msg_type = msg[MSG_KEY_TYPE]
                 if msg_type == MSG_TYPE_UPDATE:
                     _stats.increment("Update messages from driver")
                     self._on_update_from_driver(msg)
@@ -400,11 +381,7 @@ def _run(self):
             if self.resync_requested:
                 _log.info("Resync requested, sending resync request to driver")
                 self.resync_requested = False
-                self._driver_sck.sendall(
-                    msgpack.dumps({
-                        MSG_KEY_TYPE: MSG_TYPE_RESYNC,
-                    })
-                )
+                self._msg_writer.send_message(MSG_TYPE_RESYNC)
             # Check that the driver hasn't died.  The recv() call should
             # raise an exception when the buffer runs dry but this usually
             # gets hit first.
@@ -440,13 +417,9 @@ def _on_update_from_driver(self, msg):
             _log.info("Processed %s updates from driver "
                       "%.1f/s", self.read_count, 1000.0 / delta)
             self.last_rate_log_time = now
-        # Create a fake etcd node object.
-        # FIXME: avoid creating fake node.
-        n = Node()
-        n.action = "set" if value is not None else "delete"
-        n.value = value
-        n.key = key
-        # And dispatch it.
+        # Wrap the update in an EtcdEvent object so we can dispatch it via the
+        # PathDispatcher.
+        n = EtcdEvent("set" if value is not None else "delete", key, value)
         self.dispatcher.handle_event(n)
 
     def _on_config_loaded_from_driver(self, msg):
@@ -498,13 +471,15 @@ def _on_config_loaded_from_driver(self, msg):
                 driver_log_file = felix_log_file + "-driver"
             else:
                 driver_log_file = None
-            self._driver_sck.sendall(msgpack.dumps({
-                MSG_KEY_TYPE: MSG_TYPE_CONFIG,
-                MSG_KEY_LOG_FILE: driver_log_file,
-                MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,
-                MSG_KEY_SEV_SCREEN: self._config.LOGLEVSCR,
-                MSG_KEY_SEV_SYSLOG: self._config.LOGLEVSYS,
-            }))
+            self._msg_writer.send_message(
+                MSG_TYPE_CONFIG,
+                {
+                    MSG_KEY_LOG_FILE: driver_log_file,
+                    MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,
+                    MSG_KEY_SEV_SCREEN: self._config.LOGLEVSCR,
+                    MSG_KEY_SEV_SYSLOG: self._config.LOGLEVSYS,
+                }
+            )
             self.configured.set()
 
     def _on_status_from_driver(self, msg):
@@ -571,13 +546,19 @@ def start_driver(self):
         else:
             _log.info("Unlinked server socket")
 
-        update_conn.sendall(msgpack.dumps({
-            MSG_KEY_TYPE: MSG_TYPE_INIT,
-            MSG_KEY_ETCD_URL: "http://" + self._config.ETCD_ADDR,
-            MSG_KEY_HOSTNAME: self._config.HOSTNAME,
-        }))
-
-        return update_conn
+        # Wrap the socket in reader/writer objects that simplify using the
+        # protocol.
+        reader = MessageReader(update_conn)
+        writer = MessageWriter(update_conn)
+        # Give the driver its config.
+        writer.send_message(
+            MSG_TYPE_INIT,
+            {
+                MSG_KEY_ETCD_URL: "http://" + self._config.ETCD_ADDR,
+                MSG_KEY_HOSTNAME: self._config.HOSTNAME,
+            }
+        )
+        return reader, writer
 
     def on_endpoint_set(self, response, hostname, orchestrator,
                         workload_id, endpoint_id):
@@ -1054,14 +1035,3 @@ def safe_decode_json(raw_json, log_tag=None):
         _log.warning("Failed to decode JSON for %s: %r.  Returning None.",
                      log_tag, raw_json)
         return None
-
-
-class Node(object):
-    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex")
-
-    def __init__(self):
-        self.modifiedIndex = None
-        self.key = None
-        self.value = None
-        self.action = None
-        self.current_key = None

From 15dad50b3ff6691e955bc264c4d32de1c2fb7d9a Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 14:50:37 +0000
Subject: [PATCH 50/98] Refactor driver to use MessageReader/Writer classes.

---
 calico/etcddriver/driver.py   | 130 +++++++++++-----------------------
 calico/etcddriver/protocol.py |  67 +++++++++++++++++-
 2 files changed, 106 insertions(+), 91 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index c5799d074b..1a68cef197 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -31,30 +31,34 @@
   Felix about all the individual keys that are deleted.
 """
 
-import errno
 from httplib import HTTPException
-from io import BytesIO
-from calico.etcdutils import ACTION_MAPPING
-
+import logging
+from Queue import Queue, Empty
+import socket
 try:
+    # simplejson is a faster drop-in replacement.
     import simplejson as json
 except ImportError:
     import json
-import logging
-from Queue import Queue, Empty
-import socket
 from threading import Thread, Event
 import time
 from urlparse import urlparse
 
 from ijson.backends import yajl2 as ijson
-import msgpack
 import urllib3
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import ReadTimeoutError
 
+from calico.etcddriver.protocol import (
+    MessageReader, MSG_TYPE_INIT, MSG_TYPE_CONFIG, MSG_TYPE_RESYNC,
+    MSG_KEY_ETCD_URL, MSG_KEY_HOSTNAME, MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE,
+    MSG_KEY_SEV_SYSLOG, MSG_KEY_SEV_SCREEN, STATUS_WAIT_FOR_READY,
+    STATUS_RESYNC, STATUS_IN_SYNC, MSG_TYPE_CONFIG_LOADED,
+    MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG, MSG_TYPE_UPDATE, MSG_KEY_KEY,
+    MSG_KEY_VALUE, MessageWriter, MSG_TYPE_STATUS, MSG_KEY_STATUS,
+    WriteFailed)
+from calico.etcdutils import ACTION_MAPPING
 from calico.common import complete_logging
-from calico.etcddriver.protocol import *
 from calico.monotonic import monotonic_time
 from calico.datamodel_v1 import (
     READY_KEY, CONFIG_DIR, dir_for_per_host_config, VERSION_DIR
@@ -63,12 +67,12 @@
 
 _log = logging.getLogger(__name__)
 
-FLUSH_THRESHOLD = 200
-
 
 class EtcdDriver(object):
     def __init__(self, felix_sck):
-        self._felix_sck = felix_sck
+        # Wrap the socket with our protocol reader/writer objects.
+        self._msg_reader = MessageReader(felix_sck)
+        self._msg_writer = MessageWriter(felix_sck)
 
         # Global stop event used to signal to all threads to stop.
         self._stop_event = Event()
@@ -85,9 +89,6 @@ def __init__(self, felix_sck):
 
         # High-water mark cache.  Owned by resync thread.
         self._hwms = HighWaterTracker()
-        # Number of pending updates and buffer.  Owned by resync thread.
-        self._updates_pending = 0
-        self._buf = BytesIO()
         self._first_resync = True
         self._resync_http_pool = None
         self._cluster_id = None
@@ -117,7 +118,7 @@ def join(self, timeout=None):
 
         :returns True if the driver stopped, False on timeout.
         """
-        return self._stop_event.wait(timeout=None)
+        return self._stop_event.wait(timeout=timeout)
 
     def stop(self):
         _log.info("Stopping driver")
@@ -131,25 +132,8 @@ def _read_from_socket(self):
         with the exception if Felix dies.
         """
         try:
-            unpacker = msgpack.Unpacker()
             while not self._stop_event.is_set():
-                try:
-                    data = self._felix_sck.recv(8092)
-                except socket.error as e:
-                    if e.errno in (errno.EAGAIN,
-                                   errno.EWOULDBLOCK,
-                                   errno.EINTR):
-                        _log.debug("Retryable error on read from Felix.")
-                        continue
-                    else:
-                        _log.error("Failed to read from Felix socket: %r", e)
-                        raise
-                if not data:
-                    _log.error("No data read, assuming Felix closed socket")
-                    break
-                unpacker.feed(data)
-                for msg in unpacker:
-                    msg_type = msg[MSG_KEY_TYPE]
+                for msg_type, msg in self._msg_reader.new_messages():
                     if msg_type == MSG_TYPE_INIT:
                         self._handle_init(msg)
                     elif msg_type == MSG_TYPE_CONFIG:
@@ -214,16 +198,14 @@ def _resync_and_merge(self):
                 # state.
                 self._resync_http_pool = self.get_etcd_connection()
                 # Before we get to the snapshot, Felix needs the configuration.
-                self._queue_status(STATUS_WAIT_FOR_READY)
-                self._flush()  # Send the status message immediately.
+                self._send_status(STATUS_WAIT_FOR_READY)
                 self._wait_for_ready()
                 self._preload_config()
                 # Now (on the first run through) wait for Felix to process the
                 # config.
                 self._config_received.wait()
                 # Kick off the snapshot request as far as the headers.
-                self._queue_status(STATUS_RESYNC)
-                self._flush()  # Send the status message immediately.
+                self._send_status(STATUS_RESYNC)
                 resp, snapshot_index = self._start_snapshot_request()
                 # Before reading from the snapshot, start the watcher thread.
                 self._start_watcher(snapshot_index)
@@ -231,10 +213,9 @@ def _resync_and_merge(self):
                 # the queue.
                 self._process_snapshot_and_events(resp, snapshot_index)
                 # We're now in-sync.  Tell Felix.
-                self._queue_status(STATUS_IN_SYNC)
-                self._flush()  # Send the status message immediately.
+                self._send_status(STATUS_IN_SYNC)
                 self._process_events_only()
-            except FelixWriteFailed:
+            except WriteFailed:
                 _log.exception("Write to Felix failed; shutting down.")
                 self.stop()
             except WatcherDied:
@@ -285,14 +266,13 @@ def _preload_config(self):
         global_config = self._load_config(CONFIG_DIR)
         host_config_dir = dir_for_per_host_config(self._hostname)
         host_config = self._load_config(host_config_dir)
-        self._buf.write(msgpack.dumps(
+        self._msg_writer.send_message(
+            MSG_TYPE_CONFIG_LOADED,
             {
-                MSG_KEY_TYPE: MSG_TYPE_CONFIG_LOADED,
                 MSG_KEY_GLOBAL_CONFIG: global_config,
                 MSG_KEY_HOST_CONFIG: host_config,
             }
-        ))
-        self._flush()
+        )
         _log.info("Sent config message to Felix.")
 
     def _load_config(self, config_dir):
@@ -431,7 +411,7 @@ def _process_events_only(self):
         _log.info("In sync, now processing events only...")
         while not self._stop_event.is_set():
             self._handle_next_watcher_event()
-            self._flush()
+            self._msg_writer.flush()
 
     def _scan_for_deletions(self, snapshot_index):
         """
@@ -533,48 +513,26 @@ def _on_key_updated(self, key, value):
         if key == READY_KEY and value != "true":
             _log.warning("Ready key no longer set to true, triggering resync.")
             raise ResyncRequired()
-        self._queue_update_msg(key, value)
-
-    def _queue_update_msg(self, key, value):
-        """
-        Queues an update message to Felix.
-        :raises FelixWriteFailed:
-        """
-        self._buf.write(msgpack.dumps({
-            MSG_KEY_TYPE: MSG_TYPE_UPDATE,
-            MSG_KEY_KEY: key,
-            MSG_KEY_VALUE: value,
-        }))
-        self._maybe_flush()
+        self._msg_writer.send_message(
+            MSG_TYPE_UPDATE,
+            {
+                MSG_KEY_KEY: key,
+                MSG_KEY_VALUE: value,
+            },
+            flush=False
+        )
 
-    def _queue_status(self, status):
+    def _send_status(self, status):
         """
         Queues the given status to felix as a status message.
         """
-        self._buf.write(msgpack.dumps({
-            MSG_KEY_TYPE: MSG_TYPE_STATUS,
-            MSG_KEY_STATUS: status,
-        }))
-
-    def _maybe_flush(self):
-        self._updates_pending += 1
-        if self._updates_pending > FLUSH_THRESHOLD:
-            self._flush()
-
-    def _flush(self):
-        """
-        Flushes the write buffer to Felix.
-        :raises FelixWriteFailed:
-        """
-        buf_contents = self._buf.getvalue()
-        if buf_contents:
-            try:
-                self._felix_sck.sendall(buf_contents)
-            except socket.error as e:
-                _log.exception("Failed to write to Felix socket")
-                raise FelixWriteFailed(e)
-            self._buf = BytesIO()
-        self._updates_pending = 0
+        _log.info("Sending status to Felix: %s", status)
+        self._msg_writer.send_message(
+            MSG_TYPE_STATUS,
+            {
+                MSG_KEY_STATUS: status,
+            }
+        )
 
     def watch_etcd(self, next_index, event_queue, stop_event):
         """
@@ -734,9 +692,5 @@ class DriverShutdown(Exception):
     pass
 
 
-class FelixWriteFailed(Exception):
-    pass
-
-
 class ResyncRequired(Exception):
     pass
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 9f34449095..3b44ddb370 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -19,6 +19,9 @@
 Protocol constants for Felix <-> Driver protocol.
 """
 import logging
+import socket
+import errno
+from io import BytesIO
 import msgpack
 import select
 
@@ -59,19 +62,67 @@
 MSG_KEY_VALUE = "v"
 
 
+FLUSH_THRESHOLD = 200
+
+
 class SocketClosed(Exception):
     pass
 
 
+class WriteFailed(Exception):
+    pass
+
+
 class MessageWriter(object):
+    """
+    Wrapper around a socket used to write protocol messages.
+
+    Supports buffering a number of messages for subsequent flush().
+    """
     def __init__(self, sck):
         self._sck = sck
+        self._buf = BytesIO()
+        self._updates_pending = 0
 
-    def send_message(self, msg_type, fields=None):
+    def send_message(self, msg_type, fields=None, flush=True):
+        """
+        Send a message of the given type with the given fields.
+        Optionally, flush the data to the socket.
+
+        This method will flush the buffer if it grows too large in any
+        case.
+
+        :param msg_type: one of the MSG_TYPE_* constants.
+        :param dict fields: dict mapping MSG_KEY_* constants to values.
+        :param flush: True to force the data to be written immediately.
+        """
         msg = {MSG_KEY_TYPE: msg_type}
         if fields:
             msg.update(fields)
-        self._sck.sendall(msgpack.dumps(msg))
+        self._buf.write(msgpack.dumps(msg))
+        if flush:
+            self.flush()
+        else:
+            self._maybe_flush()
+
+    def _maybe_flush(self):
+        self._updates_pending += 1
+        if self._updates_pending > FLUSH_THRESHOLD:
+            self.flush()
+
+    def flush(self):
+        """
+        Flushes the write buffer to the socket immediately.
+        """
+        buf_contents = self._buf.getvalue()
+        if buf_contents:
+            try:
+                self._sck.sendall(buf_contents)
+            except socket.error as e:
+                _log.exception("Failed to write to socket")
+                raise WriteFailed(e)
+            self._buf = BytesIO()
+        self._updates_pending = 0
 
 
 class MessageReader(object):
@@ -93,7 +144,17 @@ def new_messages(self, timeout=None):
             read_ready, _, _ = select.select([self._sck], [], [], 1)
             if not read_ready:
                 return
-        data = self._sck.recv(16384)
+        try:
+            data = self._sck.recv(16384)
+        except socket.error as e:
+            if e.errno in (errno.EAGAIN,
+                           errno.EWOULDBLOCK,
+                           errno.EINTR):
+                _log.debug("Retryable error on read.")
+                return
+            else:
+                _log.error("Failed to read from socket: %r", e)
+                raise
         if not data:
             # No data indicates an orderly shutdown of the socket,
             # which shouldn't happen.

From 8bb53c69bd9b2876e17b502f7d70fdb8fec0c4db Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 14:59:12 +0000
Subject: [PATCH 51/98] Fix up imports.

---
 calico/etcddriver/protocol.py |  3 +++
 calico/felix/fetcd.py         | 37 +++++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 3b44ddb370..35ebe58b85 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -66,10 +66,12 @@
 
 
 class SocketClosed(Exception):
+    """The socket was unexpectedly closed by the other end."""
     pass
 
 
 class WriteFailed(Exception):
+    """Write to the socket failed."""
     pass
 
 
@@ -114,6 +116,7 @@ def flush(self):
         """
         Flushes the write buffer to the socket immediately.
         """
+        _log.debug("Flushing the buffer to the socket")
         buf_contents = self._buf.getvalue()
         if buf_contents:
             try:
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index a27eac30f8..8ed6f193ce 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -26,8 +26,6 @@
 import logging
 import socket
 import subprocess
-from calico.etcddriver.protocol import *
-from calico.monotonic import monotonic_time
 
 from etcd import EtcdException, EtcdKeyNotFound
 import gevent
@@ -36,21 +34,30 @@
 
 from calico import common
 from calico.common import ValidationFailed, validate_ip_addr, canonicalise_ip
-from calico.datamodel_v1 import (VERSION_DIR, CONFIG_DIR,
-                                 RULES_KEY_RE, TAGS_KEY_RE,
-                                 dir_for_per_host_config,
-                                 PROFILE_DIR, HOST_DIR, EndpointId,
-                                 HOST_IP_KEY_RE, IPAM_V4_CIDR_KEY_RE,
-                                 key_for_last_status, key_for_status,
-                                 FELIX_STATUS_DIR, get_endpoint_id_from_key,
-                                 dir_for_felix_status, ENDPOINT_STATUS_ERROR,
-                                 ENDPOINT_STATUS_DOWN, ENDPOINT_STATUS_UP)
+from calico.datamodel_v1 import (
+    VERSION_DIR, CONFIG_DIR, RULES_KEY_RE, TAGS_KEY_RE,
+    dir_for_per_host_config, PROFILE_DIR, HOST_DIR, EndpointId, HOST_IP_KEY_RE,
+    IPAM_V4_CIDR_KEY_RE, key_for_last_status, key_for_status, FELIX_STATUS_DIR,
+    get_endpoint_id_from_key, dir_for_felix_status, ENDPOINT_STATUS_ERROR,
+    ENDPOINT_STATUS_DOWN, ENDPOINT_STATUS_UP
+)
+from calico.etcddriver.protocol import (
+    MessageReader, MSG_TYPE_INIT, MSG_TYPE_CONFIG, MSG_TYPE_RESYNC,
+    MSG_KEY_ETCD_URL, MSG_KEY_HOSTNAME, MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE,
+    MSG_KEY_SEV_SYSLOG, MSG_KEY_SEV_SCREEN, STATUS_IN_SYNC,
+    MSG_TYPE_CONFIG_LOADED, MSG_KEY_GLOBAL_CONFIG, MSG_KEY_HOST_CONFIG,
+    MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, MessageWriter,
+    MSG_TYPE_STATUS, MSG_KEY_STATUS
+)
 from calico.etcdutils import (
-    EtcdClientOwner, delete_empty_parents, PathDispatcher,
-    EtcdEvent)
+    EtcdClientOwner, delete_empty_parents, PathDispatcher, EtcdEvent
+)
 from calico.felix.actor import Actor, actor_message
-from calico.felix.futils import (intern_dict, intern_list, logging_exceptions,
-                                 iso_utc_timestamp, IPV4, IPV6, StatCounter)
+from calico.felix.futils import (
+    intern_dict, intern_list, logging_exceptions, iso_utc_timestamp, IPV4,
+    IPV6, StatCounter
+)
+from calico.monotonic import monotonic_time
 
 _log = logging.getLogger(__name__)
 

From 67dd8106323aa0a2d3182b301fb4c7bbd504c7fd Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 15:10:15 +0000
Subject: [PATCH 52/98] Add configuration for driver's log file location.

---
 calico/felix/config.py        |   8 ++-
 calico/felix/fetcd.py         |   7 +--
 debian/calico-felix.logrotate |   7 +++
 docs/source/configuration.rst | 110 +++++++++++++++++-----------------
 rpm/calico-felix.logrotate    |   7 +++
 5 files changed, 78 insertions(+), 61 deletions(-)

diff --git a/calico/felix/config.py b/calico/felix/config.py
index a2406b7493..14a9fcf860 100644
--- a/calico/felix/config.py
+++ b/calico/felix/config.py
@@ -185,6 +185,9 @@ def __init__(self, config_path):
                            "an endpoint to the host.", "DROP")
         self.add_parameter("LogFilePath",
                            "Path to log file", "/var/log/calico/felix.log")
+        self.add_parameter("EtcdDriverLogFilePath",
+                           "Path to log file for etcd driver",
+                           "/var/log/calico/felix-etcd.log")
         self.add_parameter("LogSeverityFile",
                            "Log severity for logging to file", "INFO")
         self.add_parameter("LogSeveritySys",
@@ -261,6 +264,7 @@ def _finish_update(self, final=False):
         self.DEFAULT_INPUT_CHAIN_ACTION = \
             self.parameters["DefaultEndpointToHostAction"].value
         self.LOGFILE = self.parameters["LogFilePath"].value
+        self.DRIVERLOGFILE = self.parameters["EtcdDriverLogFilePath"].value
         self.LOGLEVFILE = self.parameters["LogSeverityFile"].value
         self.LOGLEVSYS = self.parameters["LogSeveritySys"].value
         self.LOGLEVSCR = self.parameters["LogSeverityScreen"].value
@@ -387,10 +391,12 @@ def _validate_cfg(self, final=True):
             raise ConfigException("Invalid log level",
                                   self.parameters["LogSeverityScreen"])
 
-        # Log file may be "None" (the literal string, case insensitive). In
+        # Log files may be "None" (the literal string, case insensitive). In
         # this case no log file should be written.
         if self.LOGFILE.lower() == "none":
             self.LOGFILE = None
+        if self.DRIVERLOGFILE.lower() == "none":
+            self.DRIVERLOGFILE = None
 
         if self.METADATA_IP.lower() == "none":
             # Metadata is not required.
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 8ed6f193ce..9e8cb4de0c 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -472,12 +472,7 @@ def _on_config_loaded_from_driver(self, msg):
             self._config.report_etcd_config(host_config,
                                             global_config)
             # Config now fully resolved, inform the driver.
-            felix_log_file = self._config.LOGFILE
-            if felix_log_file:
-                # FIXME Proper config for driver logfile
-                driver_log_file = felix_log_file + "-driver"
-            else:
-                driver_log_file = None
+            driver_log_file = self._config.DRIVERLOGFILE
             self._msg_writer.send_message(
                 MSG_TYPE_CONFIG,
                 {
diff --git a/debian/calico-felix.logrotate b/debian/calico-felix.logrotate
index 876a3b57c3..15f7093628 100644
--- a/debian/calico-felix.logrotate
+++ b/debian/calico-felix.logrotate
@@ -5,3 +5,10 @@
     delaycompress
     minsize 1M
 }
+/var/log/calico/felix-etcd.log {
+    daily
+    missingok
+    compress
+    delaycompress
+    minsize 1M
+}
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index eda3cca4d2..0b462cf4a9 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -73,60 +73,62 @@ environment variables or etcd is often more convenient.
 
 The full list of parameters which can be set is as follows.
 
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| Setting                     | Default                   | Meaning                                                                                   |
-+=============================+===========================+===========================================================================================+
-| EtcdAddr                    | localhost:4001            | The location (IP / hostname and port) of the etcd node or proxy that Felix should connect |
-|                             |                           | to.                                                                                       |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| DefaultEndpointToHostAction | DROP                      | By default Calico blocks traffic from endpoints to the host itself by using an iptables   |
-|                             |                           | DROP action.  If you want to allow some or all traffic from endpoint to host then set     |
-|                             |                           | this parameter to "RETURN" (which causes the rest of the iptables INPUT chain to be       |
-|                             |                           | processed) or "ACCEPT" (which immediately accepts packets).                               |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| FelixHostname               | socket.gethostname()      | The hostname Felix reports to the plugin. Should be used if the hostname Felix            |
-|                             |                           | autodetects is incorrect or does not match what the plugin will expect.                   |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| MetadataAddr                | 127.0.0.1                 | The IP address or domain name of the server that can answer VM queries for cloud-init     |
-|                             |                           | metadata. In OpenStack, this corresponds to the machine running nova-api (or in Ubuntu,   |
-|                             |                           | nova-api-metadata). A value of 'None' (case insensitive) means that Felix should not set  |
-|                             |                           | up any NAT rule for the metadata path.                                                    |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| MetadataPort                | 8775                      | The port of the metadata server. This, combined with global.MetadataAddr (if not 'None'), |
-|                             |                           | is used to set up a NAT rule, from 169.254.169.254:80 to MetadataAddr:MetadataPort. In    |
-|                             |                           | most cases this should not need to be changed.                                            |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| InterfacePrefix             | None                      | The start of the interface name for all interfaces. This is set to "tap" on OpenStack     |
-|                             |                           | by the plugin, but must be set to "veth" on most Docker deployments.                      |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| LogFilePath                 | /var/log/calico/felix.log | The full path to the felix log. Set to "none" to disable file logging.                    |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| LogSeveritySys              | ERROR                     | The log severity above which logs are sent to the syslog. Valid values are DEBUG, INFO,   |
-|                             |                           | WARNING, ERROR and CRITICAL, or NONE for no logging to syslog (all values case            |
-|                             |                           | insensitive).                                                                             |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| LogSeverityFile             | INFO                      | The log severity above which logs are sent to the log file. Valid values as for           |
-|                             |                           | LogSeveritySys.                                                                           |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| LogSeverityScreen           | ERROR                     | The log severity above which logs are sent to the stdout. Valid values as for             |
-|                             |                           | LogSeveritySys.                                                                           |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| StartupCleanupDelay         | 30                        | Delay, in seconds, before felix does its start-of-day cleanup to remove orphaned iptables |
-|                             |                           | chains and ipsets.   Before the first cleanup, felix operates in "graceful restart" mode, |
-|                             |                           | during which it preserves any pre-existing chains and ipsets.                             |
-|                             |                           |                                                                                           |
-|                             |                           | In a large deployment you may want to increase this value to give felix more time to      |
-|                             |                           | load the initial snapshot from etcd before cleaning up.                                   |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| PeriodicResyncInterval      | 3600                      | Period, in seconds, at which felix does a full resync with etcd and reprograms            |
-|                             |                           | iptables/ipsets.  Set to 0 to disable periodic resync.                                    |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| IptablesRefreshInterval     | 60                        | Period, in seconds, at which felix re-applies all iptables state to ensure that no other  |
-|                             |                           | process has accidentally broken Calico's rules.  Set to 0 to disable iptables refresh.    |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
-| MaxIpsetSize                | 1048576                   | Maximum size for the ipsets used by Felix to implement tags.  Should be set to a number   |
-|                             |                           | that is greater than the maximum number of IP addresses that are ever expected in a tag.  |
-+-----------------------------+---------------------------+-------------------------------------------------------------------------------------------+
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| Setting                     | Default                        | Meaning                                                                                   |
++=============================+================================+===========================================================================================+
+| EtcdAddr                    | localhost:4001                 | The location (IP / hostname and port) of the etcd node or proxy that Felix should connect |
+|                             |                                | to.                                                                                       |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| DefaultEndpointToHostAction | DROP                           | By default Calico blocks traffic from endpoints to the host itself by using an iptables   |
+|                             |                                | DROP action.  If you want to allow some or all traffic from endpoint to host then set     |
+|                             |                                | this parameter to "RETURN" (which causes the rest of the iptables INPUT chain to be       |
+|                             |                                | processed) or "ACCEPT" (which immediately accepts packets).                               |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| FelixHostname               | socket.gethostname()           | The hostname Felix reports to the plugin. Should be used if the hostname Felix            |
+|                             |                                | autodetects is incorrect or does not match what the plugin will expect.                   |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| MetadataAddr                | 127.0.0.1                      | The IP address or domain name of the server that can answer VM queries for cloud-init     |
+|                             |                                | metadata. In OpenStack, this corresponds to the machine running nova-api (or in Ubuntu,   |
+|                             |                                | nova-api-metadata). A value of 'None' (case insensitive) means that Felix should not set  |
+|                             |                                | up any NAT rule for the metadata path.                                                    |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| MetadataPort                | 8775                           | The port of the metadata server. This, combined with global.MetadataAddr (if not 'None'), |
+|                             |                                | is used to set up a NAT rule, from 169.254.169.254:80 to MetadataAddr:MetadataPort. In    |
+|                             |                                | most cases this should not need to be changed.                                            |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| InterfacePrefix             | None                           | The start of the interface name for all interfaces. This is set to "tap" on OpenStack     |
+|                             |                                | by the plugin, but must be set to "veth" on most Docker deployments.                      |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| LogFilePath                 | /var/log/calico/felix.log      | The full path to the felix log. Set to "none" to disable file logging.                    |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| EtcdDriverLogFilePath       | /var/log/calico/felix-etcd.log | Felix's etcd driver has its own log file. This parameter contains its full path.          |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| LogSeveritySys              | ERROR                          | The log severity above which logs are sent to the syslog. Valid values are DEBUG, INFO,   |
+|                             |                                | WARNING, ERROR and CRITICAL, or NONE for no logging to syslog (all values case            |
+|                             |                                | insensitive).                                                                             |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| LogSeverityFile             | INFO                           | The log severity above which logs are sent to the log file. Valid values as for           |
+|                             |                                | LogSeveritySys.                                                                           |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| LogSeverityScreen           | ERROR                          | The log severity above which logs are sent to the stdout. Valid values as for             |
+|                             |                                | LogSeveritySys.                                                                           |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| StartupCleanupDelay         | 30                             | Delay, in seconds, before felix does its start-of-day cleanup to remove orphaned iptables |
+|                             |                                | chains and ipsets.   Before the first cleanup, felix operates in "graceful restart" mode, |
+|                             |                                | during which it preserves any pre-existing chains and ipsets.                             |
+|                             |                                |                                                                                           |
+|                             |                                | In a large deployment you may want to increase this value to give felix more time to      |
+|                             |                                | load the initial snapshot from etcd before cleaning up.                                   |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| PeriodicResyncInterval      | 3600                           | Period, in seconds, at which felix does a full resync with etcd and reprograms            |
+|                             |                                | iptables/ipsets.  Set to 0 to disable periodic resync.                                    |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| IptablesRefreshInterval     | 60                             | Period, in seconds, at which felix re-applies all iptables state to ensure that no other  |
+|                             |                                | process has accidentally broken Calico's rules.  Set to 0 to disable iptables refresh.    |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
+| MaxIpsetSize                | 1048576                        | Maximum size for the ipsets used by Felix to implement tags.  Should be set to a number   |
+|                             |                                | that is greater than the maximum number of IP addresses that are ever expected in a tag.  |
++-----------------------------+--------------------------------+-------------------------------------------------------------------------------------------+
 
 
 Environment variables
diff --git a/rpm/calico-felix.logrotate b/rpm/calico-felix.logrotate
index 876a3b57c3..15f7093628 100644
--- a/rpm/calico-felix.logrotate
+++ b/rpm/calico-felix.logrotate
@@ -5,3 +5,10 @@
     delaycompress
     minsize 1M
 }
+/var/log/calico/felix-etcd.log {
+    daily
+    missingok
+    compress
+    delaycompress
+    minsize 1M
+}

From 59244e6763b9db791ca6a6fb8a9d13e7fa47778c Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 15:24:42 +0000
Subject: [PATCH 53/98] Set syslog executable name in driver.

---
 calico/common.py              | 8 +++++---
 calico/etcddriver/__main__.py | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/calico/common.py b/calico/common.py
index 1520697f89..3afed8950d 100644
--- a/calico/common.py
+++ b/calico/common.py
@@ -161,13 +161,13 @@ def mkdir_p(path):
     except TypeError:
         try:
             os.makedirs(path)
-        except OSError as exc: # Python >2.5
+        except OSError as exc:  # Python >2.5
             if exc.errno == errno.EEXIST and os.path.isdir(path):
                 pass
             else: raise
 
 
-def default_logging(gevent_in_use=True):
+def default_logging(gevent_in_use=True, syslog_executable_name=None):
     """
     Sets up the Calico default logging, with default severities.
 
@@ -188,7 +188,7 @@ def default_logging(gevent_in_use=True):
     root_logger = logging.getLogger()
     root_logger.setLevel(logging.DEBUG)
 
-    executable_name = os.path.basename(sys.argv[0])
+    executable_name = syslog_executable_name or os.path.basename(sys.argv[0])
     syslog_format = SYSLOG_FORMAT_STRING.format(excname=executable_name)
     syslog_formatter = logging.Formatter(syslog_format)
     if os.path.exists("/dev/log"):
@@ -388,6 +388,7 @@ def validate_endpoint(config, combined_id, endpoint):
     if issues:
         raise ValidationFailed(" ".join(issues))
 
+
 def validate_rules(profile_id, rules):
     """
     Ensures that the supplied rules are valid. Once this routine has returned
@@ -574,6 +575,7 @@ def validate_tags(profile_id, tags):
     if issues:
         raise ValidationFailed(" ".join(issues))
 
+
 def validate_ipam_pool(pool_id, pool, ip_version):
     """
     Validates and canonicalises an IPAM pool dict.  Removes any fields that
diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index 6ada8a6dac..12cc810042 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -34,7 +34,8 @@
 _log = logging.getLogger(__name__)
 
 last_ppid = os.getppid()
-default_logging(gevent_in_use=False)
+default_logging(gevent_in_use=False,
+                syslog_executable_name="calico-felix-etcd")
 
 felix_sck = socket.socket(socket.AF_UNIX,
                           socket.SOCK_STREAM)

From 1f1b7f44715fcd5e121f45580ca46f031f2fb8fe Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 16:16:21 +0000
Subject: [PATCH 54/98] Remove fake stack and use function calls instead.

---
 calico/etcddriver/driver.py | 85 +++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 1a68cef197..a82c489443 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -367,13 +367,13 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
         :param snapshot_index: the etcd index of the response.
         """
         self._hwms.start_tracking_deletions()
-        for snap_mod, snap_key, snap_value in parse_snapshot(etcd_response):
+
+        def _handle_etcd_node(snap_mod, snap_key, snap_value):
             old_hwm = self._hwms.update_hwm(snap_key, snapshot_index)
             if snap_mod > old_hwm:
                 # This specific key's HWM is newer than the previous
                 # version we've seen, send an update.
                 self._on_key_updated(snap_key, snap_value)
-
             # After we process an update from the snapshot, process
             # several updates from the watcher queue (if there are
             # any).  We limit the number to ensure that we always
@@ -393,6 +393,8 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
             if self._stop_event.is_set():
                 _log.error("Stop event set, exiting")
                 raise DriverShutdown()
+        parse_snapshot(etcd_response, _handle_etcd_node)
+
         # Save occupancy by throwing away the deletion tracking metadata.
         self._hwms.stop_tracking_deletions()
         # Scan for deletions that happened before the snapshot.  We effectively
@@ -628,7 +630,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
             event_queue.put(None)
 
 
-def parse_snapshot(resp):
+def parse_snapshot(resp, callback):
     """
     Generator: iteratively parses the response to the etcd snapshot.
 
@@ -641,47 +643,48 @@ def parse_snapshot(resp):
         raise ResyncRequired("Read from etcd failed.  HTTP status code %s",
                              resp.status)
     parser = ijson.parse(resp)  # urllib3 response is file-like.
-    stack = []
-    frame = Node()
-    for prefix, event, value in parser:
-        if event == "start_map":
-            stack.append(frame)
-            frame = Node()
-        elif event == "map_key":
-            frame.current_key = value
-        elif event in ("string", "number"):
-            if frame.done:
-                continue
-            if frame.current_key == "modifiedIndex":
-                frame.modifiedIndex = value
-            elif frame.current_key == "key":
-                frame.key = value
-            elif frame.current_key == "value":
-                frame.value = value
-            elif frame.current_key == "errorCode":
+
+    prefix, event, value = next(parser)
+    if event == "start_map":
+        _parse_dict(parser, callback)
+    else:
+        raise ResyncRequired("Bad response from etcd")
+
+
+def _parse_dict(parser, callback):
+    # Expect a sequence of keys and values.
+    mod_index = None
+    node_key = None
+    node_value = None
+    while True:
+        prefix, event, value = next(parser)
+        if event == "map_key":
+            map_key = value
+            prefix, event, value = next(parser)
+            if map_key == "modifiedIndex":
+                mod_index = value
+            elif map_key == "key":
+                node_key = value
+            elif map_key == "value":
+                node_value = value
+            elif map_key == "errorCode":
                 raise ResyncRequired("Error from etcd, etcd error code %s",
                                      value)
-            if (frame.key is not None and
-                    frame.value is not None and
-                    frame.modifiedIndex is not None):
-                frame.done = True
-                yield frame.modifiedIndex, frame.key, frame.value
-            frame.current_key = None
+            elif map_key == "nodes":
+                while True:
+                    prefix, event, value = next(parser)
+                    if event == "start_map":
+                        _parse_dict(parser, callback)
+                    elif event == "end_array":
+                        break
+                    else:
+                        raise ValueError("Unexpected: %s" % event)
         elif event == "end_map":
-            frame = stack.pop(-1)
-
-
-class Node(object):
-    __slots__ = ("key", "value", "action", "current_key", "modifiedIndex",
-                 "done")
-
-    def __init__(self):
-        self.modifiedIndex = None
-        self.key = None
-        self.value = None
-        self.action = None
-        self.current_key = None
-        self.done = False
+            if (node_key is not None and
+                    node_value is not None and
+                    mod_index is not None):
+                callback(mod_index, node_key, node_value)
+            break
 
 
 class WatcherDied(Exception):

From 999c2d917458c6041e8a5e06aebfb4f48c49c880 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 17:02:26 +0000
Subject: [PATCH 55/98] Make URL calculation common; other minor cleanups.

---
 calico/etcddriver/driver.py | 42 ++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index a82c489443..8ccf544976 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -61,8 +61,8 @@
 from calico.common import complete_logging
 from calico.monotonic import monotonic_time
 from calico.datamodel_v1 import (
-    READY_KEY, CONFIG_DIR, dir_for_per_host_config, VERSION_DIR
-)
+    READY_KEY, CONFIG_DIR, dir_for_per_host_config, VERSION_DIR,
+    ROOT_DIR)
 from calico.etcddriver.hwm import HighWaterTracker
 
 _log = logging.getLogger(__name__)
@@ -214,6 +214,7 @@ def _resync_and_merge(self):
                 self._process_snapshot_and_events(resp, snapshot_index)
                 # We're now in-sync.  Tell Felix.
                 self._send_status(STATUS_IN_SYNC)
+                # Then switch to processing events only.
                 self._process_events_only()
             except WriteFailed:
                 _log.exception("Write to Felix failed; shutting down.")
@@ -245,7 +246,7 @@ def _wait_for_ready(self):
             # Read failure here will be handled by outer loop.
             resp = self._resync_http_pool.request(
                 "GET",
-                self._etcd_base_url + "/v2/keys" + READY_KEY,
+                self._calculate_url(READY_KEY),
                 timeout=5,
                 preload_content=True
             )
@@ -282,7 +283,7 @@ def _load_config(self, config_dir):
         # Read failure here will be handled by outer loop.
         resp = self._resync_http_pool.request(
             "GET",
-            self._etcd_base_url + "/v2/keys" + config_dir,
+            self._calculate_url(config_dir),
             fields={
                 "recursive": "true",
             },
@@ -319,7 +320,7 @@ def _start_snapshot_request(self):
         _log.info("Loading snapshot headers...")
         resp = self._resync_http_pool.request(
             "GET",
-            self._etcd_base_url + "/v2/keys/calico/v1",
+            self._calculate_url(VERSION_DIR),
             fields={"recursive": "true"},
             timeout=120,
             preload_content=False
@@ -536,6 +537,9 @@ def _send_status(self, status):
             }
         )
 
+    def _calculate_url(self, etcd_key):
+        return self._etcd_base_url + "/v2/keys/" + etcd_key.strip("/")
+
     def watch_etcd(self, next_index, event_queue, stop_event):
         """
         Thread: etcd watcher thread.  Watches etcd for changes and
@@ -565,7 +569,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     _log.debug("Waiting on etcd index %s", next_index)
                     resp = http.request(
                         "GET",
-                        "http://localhost:4001/v2/keys/calico/v1",
+                        self._calculate_url(VERSION_DIR),
                         fields={"recursive": "true",
                                 "wait": "true",
                                 "waitIndex": next_index},
@@ -604,7 +608,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                             _log.debug("Skipping non-delete to dir %s", key)
                             continue
                         else:
-                            if key == VERSION_DIR:
+                            if key.rstrip("/") in (VERSION_DIR, ROOT_DIR):
                                 # Special case: if the whole keyspace is
                                 # deleted, that implies the ready flag is gone
                                 # too; resync rather than generating deletes
@@ -632,10 +636,8 @@ def watch_etcd(self, next_index, event_queue, stop_event):
 
 def parse_snapshot(resp, callback):
     """
-    Generator: iteratively parses the response to the etcd snapshot.
-
-    Generates tuples of the form (modifiedIndex, key, value) for each
-    leaf encountered in the snapshot.
+    Iteratively parses the response to the etcd snapshot, calling the
+    callback with each key/value pair found.
 
     :raises ResyncRequired if the snapshot contains an error response.
     """
@@ -646,13 +648,23 @@ def parse_snapshot(resp, callback):
 
     prefix, event, value = next(parser)
     if event == "start_map":
-        _parse_dict(parser, callback)
+        # As expected, response is a map.
+        _parse_map(parser, callback)
     else:
+        _log.error("Response from etcd did non contain a JSON map.")
         raise ResyncRequired("Bad response from etcd")
 
 
-def _parse_dict(parser, callback):
-    # Expect a sequence of keys and values.
+def _parse_map(parser, callback):
+    """
+    Searches the stream of JSON tokens for key/value pairs.
+
+    Calls itself recursively to handle subdirectories.
+
+    :param parser: iterator, returning JSON parse event tuples.
+    :param callback: callback to call when a key/value pair is found.
+    """
+    # Expect a sequence of keys and values terminated by an "end_map" event.
     mod_index = None
     node_key = None
     node_value = None
@@ -674,7 +686,7 @@ def _parse_dict(parser, callback):
                 while True:
                     prefix, event, value = next(parser)
                     if event == "start_map":
-                        _parse_dict(parser, callback)
+                        _parse_map(parser, callback)
                     elif event == "end_array":
                         break
                     else:

From a853baa2ec02f45a577e7a94064fc87cac19d131 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 2 Nov 2015 17:35:48 +0000
Subject: [PATCH 56/98] Add comments to HWM tracker.

---
 calico/etcddriver/hwm.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index 20f0d4e35b..b4b17847b4 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -39,12 +39,42 @@ class HighWaterTracker(object):
     """
     Tracks the highest etcd index for which we've seen a particular
     etcd key.
+
+    This class is expected to be used as follows:
+
+    Starting with a resync, while also merging events from our watch on etcd:
+
+    * Call start_tracking_deletions() to enable resolution between events
+      and the snapshot.
+    * Repeatedly call update_hwm() and store_deletion(), feeding in the
+      data from the snapshot and event stream.
+    * At the end of the snapshot processing, call stop_tracking_deletions()
+      to discard the tracking metadata (which would otherwise grow
+      indefinitely).
+    * Call remove_old_keys() to find and delete any keys that have not been
+      seen since before the snapshot was started, and hence must have been
+      deleted before the snapshot was taken.
+
+    While in sync:
+
+    * feed in events with update_hwm() and store_deletion().
+
+    At any point, if a new resync is required restart from
+    "Call start_tracking_deletions()..."
+
     """
     def __init__(self):
+        # We use a trie to track the highest etcd index at which we've seen
+        # each key.  The trie implementation forces a fixed character set;
+        # we explicitly allow the characters we expect and encode any others
+        # that we're not expecting.
         self._hwms = Trie(TRIE_CHARS)
 
         # Set to a Trie while we're tracking deletions.  None otherwise.
         self._deletion_hwms = None
+        # Optimization: tracks the highest etcd index at which we've seen a
+        # deletion.  This allows us to skip an expensive lookup in the
+        # _deletion_hwms trie for events that come after the deletion.
         self._latest_deletion = None
 
     def start_tracking_deletions(self):

From 5f838f539936b8c46c4bfcdf1ddf82fcd2e161df Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 10:56:27 +0000
Subject: [PATCH 57/98] Add UTs for HWM tracking.

---
 calico/etcddriver/hwm.py           |  3 +
 calico/etcddriver/test/test_hwm.py | 95 +++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index b4b17847b4..0ffbe6336d 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -172,6 +172,9 @@ def remove_old_keys(self, hwm_limit):
         _log.info("Deleted %s old keys", len(old_keys))
         return map(decode_key, old_keys)
 
+    def __len__(self):
+        return len(self._hwms)
+
 
 def encode_key(key):
     """
diff --git a/calico/etcddriver/test/test_hwm.py b/calico/etcddriver/test/test_hwm.py
index 64b9bca9bb..7384352ebf 100644
--- a/calico/etcddriver/test/test_hwm.py
+++ b/calico/etcddriver/test/test_hwm.py
@@ -23,12 +23,105 @@
 from unittest import TestCase
 from mock import Mock, call, patch
 from calico.etcddriver import hwm
+from calico.etcddriver.hwm import HighWaterTracker
 
 _log = logging.getLogger(__name__)
 
 
 class TestHighWaterTracker(TestCase):
-    pass
+    def setUp(self):
+        self.hwm = HighWaterTracker()
+
+    def test_mainline(self):
+        # Test merging of updates between a snapshot with etcd_index 10 and
+        # updates coming in afterwards with indexes 11, 12, ...
+
+        # We use prefix "/a/$" because $ is not allowed in the trie so it
+        # implicitly tests encoding/decoding is being properly applied.
+
+        old_hwm = self.hwm.update_hwm("/a/$/c", 9)  # Pre-snapshot
+        self.assertEqual(old_hwm, None)
+        old_hwm = self.hwm.update_hwm("/b/c/d", 9)  # Pre-snapshot
+        self.assertEqual(old_hwm, None)
+        old_hwm = self.hwm.update_hwm("/j/c/d", 9)  # Pre-snapshot
+        self.assertEqual(old_hwm, None)
+        self.assertEqual(len(self.hwm), 3)
+
+        # While merging a snapshot we track deletions.
+        self.hwm.start_tracking_deletions()
+
+        # Send in some keys from the snapshot.
+        old_hwm = self.hwm.update_hwm("/a/$/c", 10)  # From snapshot
+        self.assertEqual(old_hwm, 9)
+        old_hwm = self.hwm.update_hwm("/a/$/d", 10)  # From snapshot
+        self.assertEqual(old_hwm, None)
+        old_hwm = self.hwm.update_hwm("/d/e/f", 10)  # From snapshot
+        self.assertEqual(old_hwm, None)
+        self.assertEqual(len(self.hwm), 5)
+
+        # This key is first seen in the event stream, so the snapshot version
+        # should be ignored.
+        old_hwm = self.hwm.update_hwm("/a/h/i", 11)  # From events
+        self.assertEqual(old_hwm, None)
+        old_hwm = self.hwm.update_hwm("/a/h/i", 10)  # From snapshot
+        self.assertEqual(old_hwm, 11)
+        old_hwm = self.hwm.update_hwm("/a/h/i", 12)  # From events
+        self.assertEqual(old_hwm, 11)  # Still 11, snapshot ignored.
+        self.assertEqual(len(self.hwm), 6)
+
+        # Then a whole subtree gets deleted by the events.
+        deleted_keys = self.hwm.store_deletion("/a/$", 13)
+        self.assertEqual(set(deleted_keys), set(["/a/$/c", "/a/$/d"]))
+        self.assertEqual(len(self.hwm), 4)
+
+        # But afterwards, we see a snapshot key within the subtree, it should
+        # be ignored.
+        old_hwm = self.hwm.update_hwm("/a/$/e", 10)
+        self.assertEqual(old_hwm, 13)  # Returns the etcd_index of the delete.
+        # Then a new update from the event stream, recreates the directory.
+        old_hwm = self.hwm.update_hwm("/a/$/f", 14)
+        self.assertEqual(old_hwm, None)
+        self.assertEqual(len(self.hwm), 5)
+        # And subsequent updates are processed ignoring the delete.
+        old_hwm = self.hwm.update_hwm("/a/$/f", 15)
+        self.assertEqual(old_hwm, 14)
+        # However, snapshot updates from within the deleted subtree are still
+        # ignored.
+        old_hwm = self.hwm.update_hwm("/a/$/e", 10)
+        self.assertEqual(old_hwm, 13)  # Returns the etcd_index of the delete.
+        old_hwm = self.hwm.update_hwm("/a/$/f", 10)
+        self.assertEqual(old_hwm, 13)  # Returns the etcd_index of the delete.
+        old_hwm = self.hwm.update_hwm("/a/$/g", 10)
+        self.assertEqual(old_hwm, 13)  # Returns the etcd_index of the delete.
+        self.assertEqual(len(self.hwm), 5)
+        # And subsequent updates are processed ignoring the delete.
+        old_hwm = self.hwm.update_hwm("/a/$/f", 16)
+        self.assertEqual(old_hwm, 15)
+
+        # End of snapshot: we stop tracking deletions, which should free up the
+        # resources.
+        self.hwm.stop_tracking_deletions()
+        self.assertEqual(self.hwm._deletion_hwms, None)
+
+        # Then, subseqent updates should be handled normally.
+        old_hwm = self.hwm.update_hwm("/a/$/f", 17)
+        self.assertEqual(old_hwm, 16)  # From previous event
+        old_hwm = self.hwm.update_hwm("/g/b/f", 18)
+        self.assertEqual(old_hwm, None)  # Seen for the first time.
+        old_hwm = self.hwm.update_hwm("/d/e/f", 19)
+        self.assertEqual(old_hwm, 10)  # From the snapshot.
+        self.assertEqual(len(self.hwm), 6)
+
+        # We should be able to find all the keys that weren't seen during
+        # the snapshot.
+        old_keys = self.hwm.remove_old_keys(10)
+        self.assertEqual(set(old_keys), set(["/b/c/d", "/j/c/d"]))
+        self.assertEqual(len(self.hwm), 4)
+
+        # They should now be gone from the index.
+        old_hwm = self.hwm.update_hwm("/b/c/d", 20)
+        self.assertEqual(old_hwm, None)
+        self.assertEqual(len(self.hwm), 5)
 
 
 class TestKeyEncoding(TestCase):

From ef80e3ae8afcac86b31c89704d2e68fd84d947e2 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 11:24:28 +0000
Subject: [PATCH 58/98] Factor out shared request logic in driver.

---
 calico/etcddriver/driver.py | 71 ++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 8ccf544976..2aec12fcfb 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -244,13 +244,7 @@ def _wait_for_ready(self):
         ready = False
         while not ready:
             # Read failure here will be handled by outer loop.
-            resp = self._resync_http_pool.request(
-                "GET",
-                self._calculate_url(READY_KEY),
-                timeout=5,
-                preload_content=True
-            )
-            self._check_cluster_id(resp)
+            resp = self._etcd_request(self._resync_http_pool, READY_KEY)
             try:
                 etcd_resp = json.loads(resp.data)
                 ready = etcd_resp["node"]["value"] == "true"
@@ -281,16 +275,8 @@ def _load_config(self, config_dir):
         Loads all the config keys from the given etcd directory.
         """
         # Read failure here will be handled by outer loop.
-        resp = self._resync_http_pool.request(
-            "GET",
-            self._calculate_url(config_dir),
-            fields={
-                "recursive": "true",
-            },
-            timeout=5,
-            preload_content=True
-        )
-        self._check_cluster_id(resp)
+        resp = self._etcd_request(self._resync_http_pool,
+                                  config_dir, recursive=True)
         try:
             etcd_resp = json.loads(resp.data)
             if etcd_resp.get("errorCode") == 100:  # Not found
@@ -318,15 +304,12 @@ def _start_snapshot_request(self):
         :raises DriverShutdown if the etcd cluster ID changes.
         """
         _log.info("Loading snapshot headers...")
-        resp = self._resync_http_pool.request(
-            "GET",
-            self._calculate_url(VERSION_DIR),
-            fields={"recursive": "true"},
-            timeout=120,
-            preload_content=False
-        )
+        resp = self._etcd_request(self._resync_http_pool,
+                                  VERSION_DIR,
+                                  recursive=True,
+                                  timeout=120,
+                                  preload_content=False)
         snapshot_index = int(resp.getheader("x-etcd-index", 1))
-        self._check_cluster_id(resp)
         if not self._cluster_id:
             _log.error("Snapshot response did not contain cluster ID, "
                        "resyncing to avoid inconsistency")
@@ -335,6 +318,39 @@ def _start_snapshot_request(self):
                   "watcher...", snapshot_index)
         return resp, snapshot_index
 
+    def _etcd_request(self, http_pool, key, timeout=5, wait_index=None,
+                      recursive=False, preload_content=None):
+        """
+        Make a request to etcd on the given HTTP pool for the given key.
+
+        :param timeout: Read timeout for the request.
+        :param int wait_index: If set, issues a watch request.
+        :param recursive: True to request a recursive GET or watch.
+
+        :return: The urllib3 Response object.
+        """
+        fields = {}
+        if recursive:
+            _log.debug("Adding recursive=true to request")
+            fields["recursive"] = "true"
+        if wait_index is not None:
+            _log.debug("Request is a watch, adding wait* headers and forcing "
+                       "preload_content to False")
+            fields["wait"] = "true"
+            fields["waitIndex"] = wait_index
+            preload_content = False
+        if preload_content is None:
+            preload_content = True
+        resp = http_pool.request(
+            "GET",
+            self._calculate_url(key),
+            fields=fields or None,
+            timeout=timeout,
+            preload_content=preload_content
+        )
+        self._check_cluster_id(resp)
+        return resp
+
     def _check_cluster_id(self, resp):
         """
         Checks the x-etcd-cluster-id header for changes since the last call.
@@ -567,6 +583,11 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     http = HTTPConnectionPool("localhost", 4001, maxsize=1)
                 try:
                     _log.debug("Waiting on etcd index %s", next_index)
+                    self._etcd_request(http,
+                                       VERSION_DIR,
+                                       recursive=True,
+                                       wait_index=next_index,
+                                       timeout=90)
                     resp = http.request(
                         "GET",
                         self._calculate_url(VERSION_DIR),

From 053d1352ae1af4a4ff2a7320ba5a08930889c05c Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 13:38:47 +0000
Subject: [PATCH 59/98] Fix hard-coded URL for etcd.

---
 calico/etcddriver/driver.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 2aec12fcfb..29a07e01cb 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -482,7 +482,9 @@ def _handle_next_watcher_event(self):
             self._hwms.update_hwm(ev_key, ev_mod)
             self._on_key_updated(ev_key, ev_val)
         else:
-            # Deletion.
+            # Deletion.  In case this is a directory deletion, we search the
+            # trie for anything that is under the deleted key and send
+            # individual deletions to Felix for each one.
             deleted_keys = self._hwms.store_deletion(ev_key,
                                                      ev_mod)
             for child_key in deleted_keys:
@@ -580,7 +582,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
             while not stop_event.is_set():
                 if not http:
                     _log.info("No HTTP pool, creating one...")
-                    http = HTTPConnectionPool("localhost", 4001, maxsize=1)
+                    http = self.get_etcd_connection()
                 try:
                     _log.debug("Waiting on etcd index %s", next_index)
                     self._etcd_request(http,

From d53d97e0b2e8b5bc92bad4c9ecab84eac984232e Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 14:01:49 +0000
Subject: [PATCH 60/98] Add initial FV-level test for driver.

---
 calico/etcddriver/test/test_driver.py | 148 ++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 calico/etcddriver/test/test_driver.py

diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
new file mode 100644
index 0000000000..c51c6838e2
--- /dev/null
+++ b/calico/etcddriver/test/test_driver.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.test.test_driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tests for the etcd driver module.
+"""
+
+import logging
+from Queue import Queue, Empty
+from unittest import TestCase
+
+from mock import Mock, call, patch
+
+from calico.etcddriver.driver import EtcdDriver
+from calico.etcddriver.protocol import *
+
+_log = logging.getLogger(__name__)
+
+
+FLUSH = object()
+
+
+class StubMessageReader(MessageReader):
+    def __init__(self, sck):
+        super(StubMessageReader, self).__init__(sck)
+        self.queue = Queue()
+
+    def send_msg(self, msg_type, fields=None):
+        msg = {
+            MSG_KEY_TYPE: msg_type
+        }
+        msg.update(fields or {})
+        self.queue.put((msg_type, msg))
+
+    def send_timeout(self):
+        self.queue.put(None)
+
+    def send_exception(self, exc):
+        self.queue.put(exc)
+
+    def new_messages(self, timeout=None):
+        while True:
+            item = self.queue.get()
+            if item is None:
+                return  # timeout
+            if isinstance(item, Exception):
+                raise item
+            else:
+                yield item
+
+
+class StubMessageWriter(MessageWriter):
+    def __init__(self, sck):
+        super(StubMessageWriter, self).__init__(sck)
+        self.queue = Queue()
+
+    def send_message(self, msg_type, fields=None, flush=True):
+        self.queue.put((msg_type, fields))
+        if flush:
+            self.flush()
+
+    def flush(self):
+        self.queue.put(FLUSH)
+
+
+class TestEtcdDriverFV(TestCase):
+    """
+    FV-level tests for the driver.  These tests run a real copy of the driver
+    but they stub out the felix socket and requests to etcd.
+    """
+
+    def setUp(self):
+        sck = Mock()
+        self.msg_reader = StubMessageReader(sck)
+        self.msg_writer = StubMessageWriter(sck)
+
+        self.driver = EtcdDriver(sck)
+        self.driver._msg_reader = self.msg_reader
+        self.driver._msg_writer = self.msg_writer
+        self.driver._etcd_request = Mock(spec=self.driver._etcd_request,
+                                         side_effect=self.mock_etcd_request)
+
+    def mock_etcd_request(self, http_pool, key, timeout=5, wait_index=None,
+                          recursive=False, preload_content=None):
+        if http_pool is self.driver._resync_http_pool:
+            _log.info("Resync thread issuing request for %s timeout=%s, "
+                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
+                      wait_index, recursive, preload_content)
+        else:
+            _log.info("Watcher thread issuing request for %s timeout=%s, "
+                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
+                      wait_index, recursive, preload_content)
+        return NotImplemented
+
+    def test_start(self):
+        self.driver.start()
+        self.assert_no_msgs()
+        self.msg_reader.send_msg(
+            MSG_TYPE_INIT,
+            {
+                MSG_KEY_ETCD_URL: "http://localhost:4001",
+                MSG_KEY_HOSTNAME: "thehostname",
+            }
+        )
+        self.assert_next_msg(
+            MSG_TYPE_STATUS,
+            {MSG_KEY_STATUS: STATUS_WAIT_FOR_READY}
+        )
+
+    def assert_next_msg(self, msg_type, fields=None):
+        mt, fs = self.msg_writer.queue.get(timeout=10)
+        self.assertEqual(msg_type, mt)
+        self.assertEqual(fields, fs)
+
+    def assert_no_msgs(self):
+        try:
+            msg = self.msg_writer.queue.get(timeout=1)
+        except Empty:
+            pass
+        else:
+            self.fail("Message unexpectedly received: %s" % msg)
+
+    def tearDown(self):
+        self.driver.stop()
+        self.msg_reader.send_timeout()
+        self.driver._reader_thread.join(2)
+        self.driver._resync_thread.join(2)
+        try:
+            self.driver._watcher_thread.join(2)
+            self.assertFalse(self.driver._watcher_thread.is_alive())
+        except AttributeError:
+            pass
+        self.assertFalse(self.driver._reader_thread.is_alive())
+        self.assertFalse(self.driver._resync_thread.is_alive())

From be9fe6cae2ee5d187df33e816b6f0157e711df37 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 17:20:52 +0000
Subject: [PATCH 61/98] Fix thread shutdown; make sure threads are daemons and
 join them correctly.

---
 calico/etcddriver/driver.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 29a07e01cb..18bfc341d8 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -82,8 +82,10 @@ def __init__(self, felix_sck):
         # watcher thread (which it manages).
         self._reader_thread = Thread(target=self._read_from_socket,
                                      name="reader-thread")
+        self._reader_thread.daemon = True
         self._resync_thread = Thread(target=self._resync_and_merge,
                                      name="resync-thread")
+        self._resync_thread.daemon = True
         self._watcher_thread = None  # Created on demand
         self._watcher_stop_event = None
 
@@ -118,7 +120,19 @@ def join(self, timeout=None):
 
         :returns True if the driver stopped, False on timeout.
         """
-        return self._stop_event.wait(timeout=timeout)
+        self._stop_event.wait(timeout=timeout)
+        stopped = self._stop_event.is_set()
+        if stopped:
+            self._resync_thread.join(timeout=timeout)
+            stopped &= not self._resync_thread.is_alive()
+            self._reader_thread.join(timeout=timeout)
+            stopped &= not self._reader_thread.is_alive()
+            try:
+                self._watcher_thread.join(timeout=timeout)
+                stopped &= not self._watcher_thread.is_alive()
+            except AttributeError:
+                pass
+        return stopped
 
     def stop(self):
         _log.info("Stopping driver")

From 725a9f43e9e7c0f02ce324973768efe1e111e705 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 17:22:03 +0000
Subject: [PATCH 62/98] Minor fixes: prevent blocking forever on events.

---
 calico/etcddriver/driver.py | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 18bfc341d8..37da0499a3 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -217,7 +217,12 @@ def _resync_and_merge(self):
                 self._preload_config()
                 # Now (on the first run through) wait for Felix to process the
                 # config.
-                self._config_received.wait()
+                while not self._config_received.is_set():
+                    _log.info("Waiting for Felix to process the config...")
+                    self._config_received.wait(1)
+                    if self._stop_event.is_set():
+                        raise DriverShutdown()
+                    _log.info("Felix sent us the config, continuing.")
                 # Kick off the snapshot request as far as the headers.
                 self._send_status(STATUS_RESYNC)
                 resp, snapshot_index = self._start_snapshot_request()
@@ -256,7 +261,7 @@ def _wait_for_ready(self):
         snapshot until that flag is set.
         """
         ready = False
-        while not ready:
+        while not ready and not self._stop_event.is_set():
             # Read failure here will be handled by outer loop.
             resp = self._etcd_request(self._resync_http_pool, READY_KEY)
             try:
@@ -335,7 +340,8 @@ def _start_snapshot_request(self):
     def _etcd_request(self, http_pool, key, timeout=5, wait_index=None,
                       recursive=False, preload_content=None):
         """
-        Make a request to etcd on the given HTTP pool for the given key.
+        Make a request to etcd on the given HTTP pool for the given key
+        and check the cluster ID.
 
         :param timeout: Read timeout for the request.
         :param int wait_index: If set, issues a watch request.
@@ -599,22 +605,11 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     http = self.get_etcd_connection()
                 try:
                     _log.debug("Waiting on etcd index %s", next_index)
-                    self._etcd_request(http,
-                                       VERSION_DIR,
-                                       recursive=True,
-                                       wait_index=next_index,
-                                       timeout=90)
-                    resp = http.request(
-                        "GET",
-                        self._calculate_url(VERSION_DIR),
-                        fields={"recursive": "true",
-                                "wait": "true",
-                                "waitIndex": next_index},
-                        timeout=90,
-                        # Don't pre-load so we can check the cluster ID before
-                        # we wait for the body.
-                        preload_content=False,
-                    )
+                    resp = self._etcd_request(http,
+                                              VERSION_DIR,
+                                              recursive=True,
+                                              wait_index=next_index,
+                                              timeout=90)
                     if resp.status != 200:
                         _log.warning("etcd watch returned bad HTTP status: %s",
                                      resp.status)
@@ -678,12 +673,15 @@ def parse_snapshot(resp, callback):
 
     :raises ResyncRequired if the snapshot contains an error response.
     """
+    _log.debug("Parsing snapshot response...")
     if resp.status != 200:
         raise ResyncRequired("Read from etcd failed.  HTTP status code %s",
                              resp.status)
     parser = ijson.parse(resp)  # urllib3 response is file-like.
 
     prefix, event, value = next(parser)
+    _log.debug("Read first token from response %s, %s, %s", prefix, event,
+               value)
     if event == "start_map":
         # As expected, response is a map.
         _parse_map(parser, callback)
@@ -707,6 +705,7 @@ def _parse_map(parser, callback):
     node_value = None
     while True:
         prefix, event, value = next(parser)
+        _log.debug("Parsing %s, %s, %s", prefix, event, value)
         if event == "map_key":
             map_key = value
             prefix, event, value = next(parser)

From b93c1311e0074ab29703e63cee1c75bbd0d7cbc9 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 17:22:50 +0000
Subject: [PATCH 63/98] Record ready flag in HWM cache so that we spot if it
 disappears.

---
 calico/etcddriver/driver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 37da0499a3..393a2df9e3 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -267,9 +267,13 @@ def _wait_for_ready(self):
             try:
                 etcd_resp = json.loads(resp.data)
                 ready = etcd_resp["node"]["value"] == "true"
+                mod_idx = etcd_resp["node"]["modifiedIndex"]
             except (TypeError, ValueError, KeyError) as e:
                 _log.warning("Failed to load Ready flag from etcd: %r", e)
                 time.sleep(1)
+            else:
+                _log.info("Ready flag set to %s", etcd_resp["node"]["value"])
+                self._hwms.update_hwm(READY_KEY, mod_idx)
 
     def _preload_config(self):
         """

From 99cec92e255e3e3360ffa718c5a48465c9ef1e44 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 3 Nov 2015 17:23:24 +0000
Subject: [PATCH 64/98] Add FV-level test for mainline resync processing in
 driver.

---
 calico/etcddriver/driver.py           |  10 +-
 calico/etcddriver/test/test_driver.py | 388 ++++++++++++++++++++++++--
 2 files changed, 368 insertions(+), 30 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 393a2df9e3..b5ff70a26f 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -353,6 +353,15 @@ def _etcd_request(self, http_pool, key, timeout=5, wait_index=None,
 
         :return: The urllib3 Response object.
         """
+        resp = self._issue_etcd_request(
+            http_pool, key, timeout, wait_index,
+            recursive, preload_content
+        )
+        self._check_cluster_id(resp)
+        return resp
+
+    def _issue_etcd_request(self, http_pool, key, timeout=5, wait_index=None,
+                            recursive=False, preload_content=None):
         fields = {}
         if recursive:
             _log.debug("Adding recursive=true to request")
@@ -372,7 +381,6 @@ def _etcd_request(self, http_pool, key, timeout=5, wait_index=None,
             timeout=timeout,
             preload_content=preload_content
         )
-        self._check_cluster_id(resp)
         return resp
 
     def _check_cluster_id(self, resp):
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index c51c6838e2..bfe06b549c 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -18,14 +18,18 @@
 
 Tests for the etcd driver module.
 """
+import json
 
 import logging
 from Queue import Queue, Empty
+import os
 from unittest import TestCase
 
 from mock import Mock, call, patch
+from urllib3.exceptions import TimeoutError
+from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 
-from calico.etcddriver.driver import EtcdDriver
+from calico.etcddriver.driver import EtcdDriver, DriverShutdown
 from calico.etcddriver.protocol import *
 
 _log = logging.getLogger(__name__)
@@ -77,6 +81,137 @@ def flush(self):
         self.queue.put(FLUSH)
 
 
+class StubEtcd(object):
+    def __init__(self):
+        self.request_queue = Queue()
+        self.response_queue = Queue()
+        self.headers = {
+            "x-etcd-cluster-id": "abcdefg"
+        }
+
+    def request(self, key, **kwargs):
+        self.request_queue.put((key, kwargs))
+        response = self.response_queue.get(30)
+        if isinstance(response, Exception):
+            raise response
+        else:
+            return response
+
+    def get_open_request(self):
+        return self.request_queue.get(timeout=10)
+
+    def assert_request(self, expected_key, **expected_args):
+        key, args = self.get_open_request()
+        default_args = {'wait_index': None,
+                        'preload_content': None,
+                        'recursive': False,
+                        'timeout': 5}
+        for k, v in default_args.iteritems():
+            if k in args and args[k] == v:
+                del args[k]
+        if expected_key != key:
+            raise AssertionError("Expected request for %s but got %s" %
+                                 (expected_key, key))
+        if expected_args != args:
+            raise AssertionError("Expected request args %s for %s but got %s" %
+                                 (expected_args, key, args))
+
+    def respond_with_exception(self, exc):
+        self.response_queue.put(exc)
+
+    def respond_with_value(self, key, value, mod_index=None,
+                           etcd_index=None, status=200, action="get"):
+        data = json.dumps({
+            "action": action,
+            "node": {
+                "key": key,
+                "value": value,
+                "modifiedIndex": mod_index,
+            }
+        })
+        self.respond_with_data(data, etcd_index, status)
+
+    def respond_with_dir(self, key, children, mod_index=None,
+                         etcd_index=None, status=200):
+        nodes = [{"key": k, "value": v, "modifiedIndex": mod_index}
+                 for (k, v) in children.iteritems()]
+        data = json.dumps({
+            "action": "get",
+            "node": {
+                "key": key,
+                "dir": True,
+                "modifiedIndex": mod_index,
+                "nodes": nodes
+            }
+        })
+        self.respond_with_data(data, etcd_index, status)
+
+    def respond_with_data(self, data, etcd_index, status):
+        headers = self.headers.copy()
+        if etcd_index is not None:
+            headers["x-etcd-index"] = str(etcd_index)
+        resp = MockResponse(status, data, headers)
+        self.response_queue.put(resp)
+
+    def respond_with_stream(self, etcd_index, status=200):
+        headers = self.headers.copy()
+        if etcd_index is not None:
+            headers["x-etcd-index"] = str(etcd_index)
+        rh, wh = os.pipe()
+        # os.fdopen() is the standard way to wrap a pipe object but, on the
+        # read side, it seems to be impossible to prevent buffering.  That's
+        # no good for us, where it can result in blocking the reader forever.
+        # Use our own, more basic, wrapper.
+        rf = FileWrapper(rh)
+        wf = FileWrapper(wh)
+        resp = MockResponse(status, rf, headers)
+        self.response_queue.put(resp)
+        return wf
+
+
+class FileWrapper(object):
+    """
+    Ultra low-level file-like wrapper.  Avoids the buffering that is
+    baked into os.fdopen()'s file wrapper.
+    """
+    def __init__(self, fd):
+        self.fd = fd
+
+    def read(self, bufsize):
+        return os.read(self.fd, bufsize)
+
+    def write(self, s):
+        while s:
+            bytes_written = os.write(self.fd, s)
+            s = s[bytes_written:]
+
+    def __del__(self):
+        os.close(self.fd)
+
+
+class MockResponse(object):
+    def __init__(self, status, data_or_exc, headers=None):
+        self.status = status
+        self._data_or_exc = data_or_exc
+        self.headers = headers or {}
+
+    @property
+    def data(self):
+        if isinstance(self._data_or_exc, Exception):
+            raise self._data_or_exc
+        elif hasattr(self._data_or_exc, "read"):
+            return self._data_or_exc.read()
+        else:
+            return self._data_or_exc
+
+    def read(self, *args):
+        return self._data_or_exc.read(*args)
+
+    def getheader(self, header, default=None):
+        _log.debug("Asked for header %s", header)
+        return self.headers.get(header.lower(), default)
+
+
 class TestEtcdDriverFV(TestCase):
     """
     FV-level tests for the driver.  These tests run a real copy of the driver
@@ -87,28 +222,24 @@ def setUp(self):
         sck = Mock()
         self.msg_reader = StubMessageReader(sck)
         self.msg_writer = StubMessageWriter(sck)
+        self.watcher_etcd = StubEtcd()
+        self.resync_etcd = StubEtcd()
 
         self.driver = EtcdDriver(sck)
         self.driver._msg_reader = self.msg_reader
         self.driver._msg_writer = self.msg_writer
-        self.driver._etcd_request = Mock(spec=self.driver._etcd_request,
-                                         side_effect=self.mock_etcd_request)
+        self.driver._issue_etcd_request = Mock(
+            spec=self.driver._issue_etcd_request,
+            side_effect=self.mock_etcd_request
+        )
 
-    def mock_etcd_request(self, http_pool, key, timeout=5, wait_index=None,
-                          recursive=False, preload_content=None):
-        if http_pool is self.driver._resync_http_pool:
-            _log.info("Resync thread issuing request for %s timeout=%s, "
-                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
-                      wait_index, recursive, preload_content)
-        else:
-            _log.info("Watcher thread issuing request for %s timeout=%s, "
-                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
-                      wait_index, recursive, preload_content)
-        return NotImplemented
+        self._logging_patch = patch("calico.etcddriver.driver."
+                                    "complete_logging", autospec=True)
+        self._logging_patch.start()
 
-    def test_start(self):
+    def test_mainline(self):
         self.driver.start()
-        self.assert_no_msgs()
+        # First message comes from Felix.
         self.msg_reader.send_msg(
             MSG_TYPE_INIT,
             {
@@ -116,16 +247,189 @@ def test_start(self):
                 MSG_KEY_HOSTNAME: "thehostname",
             }
         )
-        self.assert_next_msg(
+        # Should trigger driver to start polling the ready flag.
+        self.assert_msg_to_felix(
             MSG_TYPE_STATUS,
             {MSG_KEY_STATUS: STATUS_WAIT_FOR_READY}
         )
+        self.assert_flush_to_felix()
+        # Respond with ready == true.
+        self.resync_etcd.assert_request(READY_KEY)
+        self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
+        # Then we should get the global config request.
+        self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
+        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
+            CONFIG_DIR + "/InterfacePrefix": "tap"
+        })
+        # Followed by the per-host one...
+        self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
+                                        recursive=True)
+        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
+            "/calico/v1/host/thehostname/config/LogSeverityFile": "DEBUG"
+        })
+        # Then the driver should send the config to Felix.
+        self.assert_msg_to_felix(
+            MSG_TYPE_CONFIG_LOADED,
+            {
+                MSG_KEY_GLOBAL_CONFIG: {"InterfacePrefix": "tap"},
+                MSG_KEY_HOST_CONFIG: {"LogSeverityFile": "DEBUG"},
+            }
+        )
+        self.assert_flush_to_felix()
+        # We respond with the config message to trigger the start of the
+        # resync.
+        self.msg_reader.send_msg(
+            MSG_TYPE_CONFIG,
+            {
+                MSG_KEY_LOG_FILE: "/tmp/driver.log",
+                MSG_KEY_SEV_FILE: "DEBUG",
+                MSG_KEY_SEV_SCREEN: "DEBUG",
+                MSG_KEY_SEV_SYSLOG: "DEBUG",
+            }
+        )
+        self.assert_msg_to_felix(
+            MSG_TYPE_STATUS,
+            {
+                MSG_KEY_STATUS: STATUS_RESYNC,
+            }
+        )
+        self.assert_flush_to_felix()
+        # We should get a request to load the full snapshot.
+        self.resync_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=120, preload_content=False
+        )
+        snap_stream = self.resync_etcd.respond_with_stream(etcd_index=10)
+        # And then the headers should trigger a request from the watcher
+        # including the etcd_index we sent even though we haven't sent a
+        # response body to the resync thread.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=11
+        )
+        # Start sending the snapshot response:
+        snap_stream.write('''{
+            "action": "get",
+            "node": {
+                "key": "/calico/v1",
+                "dir": true,
+                "nodes": [
+                {
+                    "key": "/calico/v1/adir",
+                    "dir": true,
+                    "nodes": [
+                    {
+                        "key": "/calico/v1/adir/akey",
+                        "value": "akey's value",
+                        "modifiedIndex": 8
+                    },
+        ''')
+        # Should generate a message to felix even though it's only seen part
+        # of the response...
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/akey",
+            MSG_KEY_VALUE: "akey's value",
+        })
+        # Respond to the watcher, this should get merged into the event
+        # stream at some point later.
+        self.watcher_etcd.respond_with_value(
+            "/calico/v1/adir/bkey",
+            "b",
+            mod_index=12,
+            action="set"
+        )
+        # Wait until the watcher makes its next request (with revved
+        # wait_index) to make sure it has queued its event to the resync
+        # thread.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=13
+        )
+        # Write some more data to the resync thread, it should process that
+        # and the queued watcher event.
+        snap_stream.write('''
+                     {
+                         "key": "/calico/v1/adir/ckey",
+                         "value": "c",
+                         "modifiedIndex": 8
+                     },
+        ''')
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ckey",
+            MSG_KEY_VALUE: "c",
+        })
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/bkey",
+            MSG_KEY_VALUE: "b",
+        })
+        # Respond to the watcher with another event.
+        self.watcher_etcd.respond_with_value(
+            "/calico/v1/adir/dkey",
+            "d",
+            mod_index=13,
+            action="set"
+        )
+        # Wait until the watcher makes its next request (with revved
+        # wait_index) to make sure it has queued its event to the resync
+        # thread.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=14
+        )
+        # Send the resync thread some data that should be ignored due to the
+        # preceding event.
+        snap_stream.write('''
+                    {
+                        "key": "/calico/v1/adir/bkey",
+                        "value": "b",
+                        "modifiedIndex": 9
+                    },
+        ''')
+        # The resync event would be generated first but we should should only
+        # see the watcher event.
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/dkey",
+            MSG_KEY_VALUE: "d",
+        })
+        # Finish the snapshot.
+        snap_stream.write('''
+                    {
+                        "key": "/calico/v1/Ready",
+                        "value": "true",
+                        "modifiedIndex": 10
+                    }]
+                }]
+            }
+        }
+        ''')
+        # Should get the in-sync message.  (No event for Ready flag due to
+        # HWM.
+        self.assert_msg_to_felix(MSG_TYPE_STATUS, {
+            MSG_KEY_STATUS: STATUS_IN_SYNC,
+        })
+        self.assert_flush_to_felix()
+        # Now send a watcher event, which should go straight through.
+        self.watcher_etcd.respond_with_value(
+            "/calico/v1/adir/ekey",
+            "e",
+            mod_index=14,
+            action="set"
+        )
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ekey",
+            MSG_KEY_VALUE: "e",
+        })
+        self.assert_flush_to_felix()
 
-    def assert_next_msg(self, msg_type, fields=None):
-        mt, fs = self.msg_writer.queue.get(timeout=10)
+    def assert_msg_to_felix(self, msg_type, fields=None):
+        try:
+            mt, fs = self.msg_writer.queue.get(timeout=2)
+        except Empty:
+            self.fail("Expected %s message to felix but no message was sent" %
+                      msg_type)
         self.assertEqual(msg_type, mt)
         self.assertEqual(fields, fs)
 
+    def assert_flush_to_felix(self):
+        self.assertEqual(self.msg_writer.queue.get(timeout=10),
+                         FLUSH)
+
     def assert_no_msgs(self):
         try:
             msg = self.msg_writer.queue.get(timeout=1)
@@ -134,15 +438,41 @@ def assert_no_msgs(self):
         else:
             self.fail("Message unexpectedly received: %s" % msg)
 
+    def mock_etcd_request(self, http_pool, key, timeout=5, wait_index=None,
+                          recursive=False, preload_content=None):
+        """
+        Called from another thread when the driver makes an etcd request,
+        we queue the request via the correct stub, then block, waiting
+        for the main thread to tell us what to do.
+        """
+        if http_pool is self.driver._resync_http_pool:
+            _log.info("Resync thread issuing request for %s timeout=%s, "
+                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
+                      wait_index, recursive, preload_content)
+            etcd_stub = self.resync_etcd
+        else:
+            _log.info("Watcher thread issuing request for %s timeout=%s, "
+                      "wait_index=%s, recursive=%s, preload=%s", key, timeout,
+                      wait_index, recursive, preload_content)
+            etcd_stub = self.watcher_etcd
+
+        return etcd_stub.request(key,
+                                 timeout=timeout,
+                                 wait_index=wait_index,
+                                 recursive=recursive,
+                                 preload_content=preload_content)
+
     def tearDown(self):
-        self.driver.stop()
-        self.msg_reader.send_timeout()
-        self.driver._reader_thread.join(2)
-        self.driver._resync_thread.join(2)
         try:
-            self.driver._watcher_thread.join(2)
-            self.assertFalse(self.driver._watcher_thread.is_alive())
-        except AttributeError:
-            pass
-        self.assertFalse(self.driver._reader_thread.is_alive())
-        self.assertFalse(self.driver._resync_thread.is_alive())
+            # Request that the driver stops.
+            self.driver.stop()
+            # Make sure we don't block the driver from stopping.
+            self.msg_reader.send_timeout()
+            self.resync_etcd.respond_with_exception(TimeoutError())
+            self.watcher_etcd.respond_with_exception(TimeoutError())
+            # Wait for it to stop.
+            self.assertTrue(self.driver.join(1), "Driver failed to stop")
+        finally:
+            # Now the driver is stopped, it's safe to remove out patch of
+            # complete_logging()
+            self._logging_patch.stop()

From 389456f5b80ba6d3c3488f1febeda2ea2dca4e03 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 09:38:06 +0000
Subject: [PATCH 65/98] Minor cleanups; get driver tests working with other
 tests.

---
 calico/etcddriver/test/test_driver.py | 106 +++++++++++++++++++-------
 calico/test/test_etcdutils.py         |   8 +-
 2 files changed, 85 insertions(+), 29 deletions(-)

diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index bfe06b549c..38a700b028 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -39,11 +39,18 @@
 
 
 class StubMessageReader(MessageReader):
+    """
+    Replacement for the Driver's MessageReader, which is how it reads
+    from Felix.
+
+    Allows us to send messages as if we were Felix.
+    """
     def __init__(self, sck):
         super(StubMessageReader, self).__init__(sck)
         self.queue = Queue()
 
     def send_msg(self, msg_type, fields=None):
+        """Called by the test to send the driver a message."""
         msg = {
             MSG_KEY_TYPE: msg_type
         }
@@ -51,12 +58,15 @@ def send_msg(self, msg_type, fields=None):
         self.queue.put((msg_type, msg))
 
     def send_timeout(self):
+        """Called by the test to send the driver a timeout."""
         self.queue.put(None)
 
     def send_exception(self, exc):
+        """Called by the test to raise an exception from the driver's read."""
         self.queue.put(exc)
 
     def new_messages(self, timeout=None):
+        """Called by the driver to receive new messages."""
         while True:
             item = self.queue.get()
             if item is None:
@@ -68,6 +78,13 @@ def new_messages(self, timeout=None):
 
 
 class StubMessageWriter(MessageWriter):
+    """
+    Replacement for the driver's MessageWriter, which it uses to send messages
+    to Felix.
+
+    Buffers the messages and flush calls in a queue for the test to
+    interrogate.
+    """
     def __init__(self, sck):
         super(StubMessageWriter, self).__init__(sck)
         self.queue = Queue()
@@ -82,6 +99,11 @@ def flush(self):
 
 
 class StubEtcd(object):
+    """
+    A fake connection to etcd.  We hook the driver's _issue_etcd_request
+    method and block the relevant thread until the test calls one of the
+    respond_... methods.
+    """
     def __init__(self):
         self.request_queue = Queue()
         self.response_queue = Queue()
@@ -90,6 +112,10 @@ def __init__(self):
         }
 
     def request(self, key, **kwargs):
+        """
+        Called from the driver to make a request.  Blocks until the
+        test thread sends a response.
+        """
         self.request_queue.put((key, kwargs))
         response = self.response_queue.get(30)
         if isinstance(response, Exception):
@@ -97,11 +123,17 @@ def request(self, key, **kwargs):
         else:
             return response
 
-    def get_open_request(self):
+    def get_next_request(self):
+        """
+        Called from the test to get the next request from the driver.
+        """
         return self.request_queue.get(timeout=10)
 
     def assert_request(self, expected_key, **expected_args):
-        key, args = self.get_open_request()
+        """
+        Asserts the properies of the next request.
+        """
+        key, args = self.get_next_request()
         default_args = {'wait_index': None,
                         'preload_content': None,
                         'recursive': False,
@@ -117,10 +149,18 @@ def assert_request(self, expected_key, **expected_args):
                                  (expected_args, key, args))
 
     def respond_with_exception(self, exc):
+        """
+        Called from the test to raise an exception from the current/next
+        request.
+        """
         self.response_queue.put(exc)
 
     def respond_with_value(self, key, value, mod_index=None,
                            etcd_index=None, status=200, action="get"):
+        """
+        Called from the test to return a simple single-key value to the
+        driver.
+        """
         data = json.dumps({
             "action": action,
             "node": {
@@ -133,6 +173,10 @@ def respond_with_value(self, key, value, mod_index=None,
 
     def respond_with_dir(self, key, children, mod_index=None,
                          etcd_index=None, status=200):
+        """
+        Called from the test to return a directory of key/values (from a
+        recursive request).
+        """
         nodes = [{"key": k, "value": v, "modifiedIndex": mod_index}
                  for (k, v) in children.iteritems()]
         data = json.dumps({
@@ -147,6 +191,10 @@ def respond_with_dir(self, key, children, mod_index=None,
         self.respond_with_data(data, etcd_index, status)
 
     def respond_with_data(self, data, etcd_index, status):
+        """
+        Called from the test to return a raw response (e.g. to send
+        malformed JSON).
+        """
         headers = self.headers.copy()
         if etcd_index is not None:
             headers["x-etcd-index"] = str(etcd_index)
@@ -154,39 +202,43 @@ def respond_with_data(self, data, etcd_index, status):
         self.response_queue.put(resp)
 
     def respond_with_stream(self, etcd_index, status=200):
+        """
+        Called from the test to respond with a stream, allowing the test to
+        send chunks of data in response.
+        """
         headers = self.headers.copy()
         if etcd_index is not None:
             headers["x-etcd-index"] = str(etcd_index)
-        rh, wh = os.pipe()
-        # os.fdopen() is the standard way to wrap a pipe object but, on the
-        # read side, it seems to be impossible to prevent buffering.  That's
-        # no good for us, where it can result in blocking the reader forever.
-        # Use our own, more basic, wrapper.
-        rf = FileWrapper(rh)
-        wf = FileWrapper(wh)
-        resp = MockResponse(status, rf, headers)
+        f = PipeFile()
+        resp = MockResponse(status, f, headers)
         self.response_queue.put(resp)
-        return wf
-
+        return f
 
-class FileWrapper(object):
-    """
-    Ultra low-level file-like wrapper.  Avoids the buffering that is
-    baked into os.fdopen()'s file wrapper.
-    """
-    def __init__(self, fd):
-        self.fd = fd
 
-    def read(self, bufsize):
-        return os.read(self.fd, bufsize)
-
-    def write(self, s):
-        while s:
-            bytes_written = os.write(self.fd, s)
-            s = s[bytes_written:]
+class PipeFile(object):
+    def __init__(self):
+        self.queue = Queue()
+        self.buf = None
+
+    def read(self, length):
+        data = ""
+        if not self.buf:
+            self.buf = self.queue.get()
+        while len(data) < length:
+            data += self.buf[:length - len(data)]
+            self.buf = self.buf[length - len(data):]
+            if not self.buf:
+                try:
+                    self.buf = self.queue.get_nowait()
+                except Empty:
+                    break
+        return data
+
+    def write(self, data):
+        self.queue.put(data)
 
     def __del__(self):
-        os.close(self.fd)
+        self.queue.put("")
 
 
 class MockResponse(object):
diff --git a/calico/test/test_etcdutils.py b/calico/test/test_etcdutils.py
index 93bfe69296..40a1d628b0 100644
--- a/calico/test/test_etcdutils.py
+++ b/calico/test/test_etcdutils.py
@@ -21,9 +21,13 @@
 
 import logging
 import types
-import etcd
 from mock import Mock, patch, call
-from calico.etcdutils import PathDispatcher, EtcdWatcher, delete_empty_parents
+from calico.etcdutils import (
+    PathDispatcher, EtcdWatcher, delete_empty_parents
+)
+# Since other tests patch the module table, make sure we have the same etcd
+# module as the module under test.
+from calico.etcdutils import etcd
 
 from calico.felix.test.base import BaseTestCase
 

From dcd3df0ae3ac5ba9c3036c01a455b59dd6ec5df1 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 11:11:45 +0000
Subject: [PATCH 66/98] Add UTs for protocol.py read/write function.  Minor fix
 to reader.

---
 calico/etcddriver/protocol.py           |   2 +-
 calico/etcddriver/test/test_protocol.py | 217 ++++++++++++++++++++++++
 2 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 calico/etcddriver/test/test_protocol.py

diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 35ebe58b85..f242becc6e 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -144,7 +144,7 @@ def new_messages(self, timeout=None):
         :raises SocketClosed if the socket is closed.
         """
         if timeout is not None:
-            read_ready, _, _ = select.select([self._sck], [], [], 1)
+            read_ready, _, _ = select.select([self._sck], [], [], timeout)
             if not read_ready:
                 return
         try:
diff --git a/calico/etcddriver/test/test_protocol.py b/calico/etcddriver/test/test_protocol.py
new file mode 100644
index 0000000000..427ae705f8
--- /dev/null
+++ b/calico/etcddriver/test/test_protocol.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.test_protocol
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tests for Felix/etcddriver protocol read/write function.
+"""
+
+import logging
+import socket
+from unittest import TestCase
+import errno
+from mock import Mock, call, patch
+import msgpack
+from calico.etcddriver.protocol import (
+    MessageWriter, STATUS_RESYNC, MSG_KEY_STATUS, MSG_TYPE_STATUS,
+    MSG_KEY_TYPE, STATUS_IN_SYNC, MessageReader,
+    SocketClosed, WriteFailed)
+
+_log = logging.getLogger(__name__)
+
+
+class StubWriterSocket(object):
+    def __init__(self):
+        self.chunks = []
+        self.unpacker = msgpack.Unpacker()
+        self.exception = None
+
+    def sendall(self, data):
+        if self.exception:
+            raise self.exception
+        self.chunks.append(data)
+        self.unpacker.feed(data)
+
+    def next_msg(self):
+        return next(self.unpacker)
+
+
+class TestMessageWriter(TestCase):
+    def setUp(self):
+        self.sck = StubWriterSocket()
+        self.writer = MessageWriter(self.sck)
+        self.unpacker = msgpack.Unpacker()
+
+    def test_send_message(self):
+        self.writer.send_message(MSG_TYPE_STATUS,
+                                 {
+                                     MSG_KEY_STATUS: STATUS_RESYNC
+                                 })
+        self.assert_message_sent({
+            MSG_KEY_TYPE: MSG_TYPE_STATUS,
+            MSG_KEY_STATUS: STATUS_RESYNC
+        })
+        self.assert_no_more_messages()
+
+    def test_send_message_error(self):
+        self.sck.exception = socket.error()
+        self.assertRaises(WriteFailed, self.writer.send_message,
+                          MSG_TYPE_STATUS,
+                          {
+                              MSG_KEY_STATUS: STATUS_RESYNC
+                          })
+
+    def test_send_message_buffered(self):
+        # First message gets buffered.
+        self.writer.send_message(MSG_TYPE_STATUS,
+                                 {
+                                     MSG_KEY_STATUS: STATUS_RESYNC
+                                 },
+                                 flush=False)
+        self.assert_no_more_messages()
+
+        # Second message triggers a flush of both messages, in order.
+        self.writer.send_message(MSG_TYPE_STATUS,
+                                 {
+                                     MSG_KEY_STATUS: STATUS_IN_SYNC
+                                 })
+        self.assert_message_sent({
+            MSG_KEY_TYPE: MSG_TYPE_STATUS,
+            MSG_KEY_STATUS: STATUS_RESYNC
+        })
+        self.assert_message_sent({
+            MSG_KEY_TYPE: MSG_TYPE_STATUS,
+            MSG_KEY_STATUS: STATUS_IN_SYNC
+        })
+        self.assert_no_more_messages()
+
+    def test_eventual_flush(self):
+        # First 200 messages should be buffered.
+        for _ in xrange(200):
+            self.writer.send_message(MSG_TYPE_STATUS,
+                                     {
+                                         MSG_KEY_STATUS: STATUS_RESYNC
+                                     },
+                                     flush=False)
+        self.assert_no_more_messages()
+
+        # 201st message triggers them all to be sent.
+        self.writer.send_message(MSG_TYPE_STATUS,
+                                 {
+                                     MSG_KEY_STATUS: STATUS_RESYNC
+                                 },
+                                 flush=False)
+        for _ in xrange(201):
+            self.assert_message_sent({
+                MSG_KEY_TYPE: MSG_TYPE_STATUS,
+                MSG_KEY_STATUS: STATUS_RESYNC
+            })
+        self.assert_no_more_messages()
+
+    def assert_message_sent(self, msg):
+        try:
+            received_msg = self.sck.next_msg()
+        except StopIteration:
+            self.fail("No messages received")
+        self.assertEqual(received_msg, msg,
+                         "Received incorrect message: %s "
+                         "while expecting: %s" % (received_msg, msg))
+
+    def assert_no_more_messages(self):
+        try:
+            msg = self.sck.next_msg()
+        except StopIteration:
+            return
+        else:
+            self.fail("Unexpected message: %s" % msg)
+
+
+class TestMessageReader(TestCase):
+    def setUp(self):
+        self.sck = Mock(spec=socket.socket)
+        self.reader = MessageReader(self.sck)
+
+    @patch("select.select", autospec=True)
+    def test_mainline(self, m_select):
+        m_select.side_effect = iter([
+            ([self.sck], [], []),
+            ([self.sck], [], []),
+        ])
+        exp_msg = {MSG_KEY_TYPE: MSG_TYPE_STATUS,
+                   MSG_KEY_STATUS: STATUS_RESYNC}
+        self.sck.recv.return_value = msgpack.dumps(exp_msg)
+        for _ in xrange(2):
+            msg_gen = self.reader.new_messages(timeout=1)
+            msg_type, msg = next(msg_gen)
+            self.assertEqual(msg_type, MSG_TYPE_STATUS)
+            self.assertEqual(msg, exp_msg)
+        self.assertEqual(
+            self.sck.recv.mock_calls,
+            [
+                call(16384),
+                call(16384),
+            ]
+        )
+
+    @patch("select.select", autospec=True)
+    def test_retryable_error(self, m_select):
+        m_select.side_effect = iter([
+            ([self.sck], [], []),
+            ([self.sck], [], []),
+            ([self.sck], [], []),
+            ([self.sck], [], []),
+        ])
+        errors = []
+        for no in [errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR]:
+            err = socket.error()
+            err.errno = no
+            errors.append(err)
+        exp_msg = {MSG_KEY_TYPE: MSG_TYPE_STATUS,
+                   MSG_KEY_STATUS: STATUS_RESYNC}
+        self.sck.recv.side_effect = iter(errors + [msgpack.dumps(exp_msg)])
+        for _ in errors:
+            msg_gen = self.reader.new_messages(timeout=1)
+            self.assertRaises(StopIteration, next, msg_gen)
+        msg_gen = self.reader.new_messages(timeout=1)
+        msg_type, msg = next(msg_gen)
+        self.assertEqual(msg_type, MSG_TYPE_STATUS)
+        self.assertEqual(msg, exp_msg)
+
+    @patch("select.select", autospec=True)
+    def test_non_retryable_error(self, m_select):
+        m_select.side_effect = iter([
+            ([self.sck], [], []),
+        ])
+        err = socket.error()
+        err.errno = errno.E2BIG
+        self.sck.recv.side_effect = err
+        msg_gen = self.reader.new_messages(timeout=1)
+        self.assertRaises(socket.error, next, msg_gen)
+
+    @patch("select.select", autospec=True)
+    def test_timeout(self, m_select):
+        m_select.side_effect = iter([
+            ([], [], []),
+        ])
+        msg_gen = self.reader.new_messages(timeout=1)
+        self.assertRaises(StopIteration, next, msg_gen)
+        self.assertFalse(self.sck.recv.called)
+
+    @patch("select.select", autospec=True)
+    def test_shutdown(self, m_select):
+        self.sck.recv.return_value = ""
+        msg_gen = self.reader.new_messages()
+        self.assertRaises(SocketClosed, next, msg_gen)

From b1e6662ebe4624c4b9db0acbdbb104cf4fb47870 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 11:56:23 +0000
Subject: [PATCH 67/98] Cover remaining lines in protocol.py.

---
 calico/etcddriver/test/test_protocol.py | 26 ++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/calico/etcddriver/test/test_protocol.py b/calico/etcddriver/test/test_protocol.py
index 427ae705f8..ad12954d3f 100644
--- a/calico/etcddriver/test/test_protocol.py
+++ b/calico/etcddriver/test/test_protocol.py
@@ -77,9 +77,6 @@ def test_send_message_error(self):
     def test_send_message_buffered(self):
         # First message gets buffered.
         self.writer.send_message(MSG_TYPE_STATUS,
-                                 {
-                                     MSG_KEY_STATUS: STATUS_RESYNC
-                                 },
                                  flush=False)
         self.assert_no_more_messages()
 
@@ -89,8 +86,7 @@ def test_send_message_buffered(self):
                                      MSG_KEY_STATUS: STATUS_IN_SYNC
                                  })
         self.assert_message_sent({
-            MSG_KEY_TYPE: MSG_TYPE_STATUS,
-            MSG_KEY_STATUS: STATUS_RESYNC
+            MSG_KEY_TYPE: MSG_TYPE_STATUS
         })
         self.assert_message_sent({
             MSG_KEY_TYPE: MSG_TYPE_STATUS,
@@ -121,6 +117,10 @@ def test_eventual_flush(self):
             })
         self.assert_no_more_messages()
 
+    def test_flush_no_content(self):
+        self.writer.flush()
+        self.assertFalse(self.sck.chunks)
+
     def assert_message_sent(self, msg):
         try:
             received_msg = self.sck.next_msg()
@@ -166,6 +166,22 @@ def test_mainline(self, m_select):
             ]
         )
 
+    @patch("select.select", autospec=True)
+    def test_partial_read(self, m_select):
+        m_select.side_effect = iter([
+            ([self.sck], [], []),
+            ([self.sck], [], []),
+        ])
+        exp_msg = {MSG_KEY_TYPE: MSG_TYPE_STATUS}
+        msg_bytes = msgpack.dumps(exp_msg)
+        self.sck.recv.side_effect = iter([
+            msg_bytes[:len(msg_bytes)/2],
+            msg_bytes[len(msg_bytes)/2:],
+        ])
+        self.assertRaises(StopIteration, next, self.reader.new_messages())
+        self.assertEqual(next(self.reader.new_messages()),
+                         (MSG_TYPE_STATUS, exp_msg))
+
     @patch("select.select", autospec=True)
     def test_retryable_error(self, m_select):
         m_select.side_effect = iter([

From bd8189b5ab5c656b5662854b399a3b1ff45126e4 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 14:02:46 +0000
Subject: [PATCH 68/98] Streamline driver test, avoid messy stack trace on
 success.

---
 calico/etcddriver/test/test_driver.py | 63 +++++++++++++--------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 38a700b028..e13d3b1036 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -22,14 +22,12 @@
 
 import logging
 from Queue import Queue, Empty
-import os
 from unittest import TestCase
 
 from mock import Mock, call, patch
-from urllib3.exceptions import TimeoutError
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 
-from calico.etcddriver.driver import EtcdDriver, DriverShutdown
+from calico.etcddriver.driver import EtcdDriver
 from calico.etcddriver.protocol import *
 
 _log = logging.getLogger(__name__)
@@ -71,7 +69,7 @@ def new_messages(self, timeout=None):
             item = self.queue.get()
             if item is None:
                 return  # timeout
-            if isinstance(item, Exception):
+            if isinstance(item, BaseException):
                 raise item
             else:
                 yield item
@@ -118,7 +116,7 @@ def request(self, key, **kwargs):
         """
         self.request_queue.put((key, kwargs))
         response = self.response_queue.get(30)
-        if isinstance(response, Exception):
+        if isinstance(response, BaseException):
             raise response
         else:
             return response
@@ -292,23 +290,14 @@ def setUp(self):
     def test_mainline(self):
         self.driver.start()
         # First message comes from Felix.
-        self.msg_reader.send_msg(
-            MSG_TYPE_INIT,
-            {
-                MSG_KEY_ETCD_URL: "http://localhost:4001",
-                MSG_KEY_HOSTNAME: "thehostname",
-            }
-        )
-        # Should trigger driver to start polling the ready flag.
-        self.assert_msg_to_felix(
-            MSG_TYPE_STATUS,
-            {MSG_KEY_STATUS: STATUS_WAIT_FOR_READY}
-        )
-        self.assert_flush_to_felix()
-        # Respond with ready == true.
+        self.send_init_msg()
+        # Should trigger driver to send a status and start polling the ready
+        # flag.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+        # Respond to etcd request with ready == true.
         self.resync_etcd.assert_request(READY_KEY)
         self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
-        # Then we should get the global config request.
+        # Then etcd should get the global config request.
         self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
         self.resync_etcd.respond_with_dir(CONFIG_DIR, {
             CONFIG_DIR + "/InterfacePrefix": "tap"
@@ -339,13 +328,7 @@ def test_mainline(self):
                 MSG_KEY_SEV_SYSLOG: "DEBUG",
             }
         )
-        self.assert_msg_to_felix(
-            MSG_TYPE_STATUS,
-            {
-                MSG_KEY_STATUS: STATUS_RESYNC,
-            }
-        )
-        self.assert_flush_to_felix()
+        self.assert_status_message(STATUS_RESYNC)
         # We should get a request to load the full snapshot.
         self.resync_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=120, preload_content=False
@@ -452,10 +435,7 @@ def test_mainline(self):
         ''')
         # Should get the in-sync message.  (No event for Ready flag due to
         # HWM.
-        self.assert_msg_to_felix(MSG_TYPE_STATUS, {
-            MSG_KEY_STATUS: STATUS_IN_SYNC,
-        })
-        self.assert_flush_to_felix()
+        self.assert_status_message(STATUS_IN_SYNC)
         # Now send a watcher event, which should go straight through.
         self.watcher_etcd.respond_with_value(
             "/calico/v1/adir/ekey",
@@ -469,6 +449,22 @@ def test_mainline(self):
         })
         self.assert_flush_to_felix()
 
+    def assert_status_message(self, status):
+        self.assert_msg_to_felix(
+            MSG_TYPE_STATUS,
+            {MSG_KEY_STATUS: status}
+        )
+        self.assert_flush_to_felix()
+
+    def send_init_msg(self):
+        self.msg_reader.send_msg(
+            MSG_TYPE_INIT,
+            {
+                MSG_KEY_ETCD_URL: "http://localhost:4001",
+                MSG_KEY_HOSTNAME: "thehostname",
+            }
+        )
+
     def assert_msg_to_felix(self, msg_type, fields=None):
         try:
             mt, fs = self.msg_writer.queue.get(timeout=2)
@@ -520,8 +516,9 @@ def tearDown(self):
             self.driver.stop()
             # Make sure we don't block the driver from stopping.
             self.msg_reader.send_timeout()
-            self.resync_etcd.respond_with_exception(TimeoutError())
-            self.watcher_etcd.respond_with_exception(TimeoutError())
+            # SystemExit kills (only) the thread silently.
+            self.resync_etcd.respond_with_exception(SystemExit())
+            self.watcher_etcd.respond_with_exception(SystemExit())
             # Wait for it to stop.
             self.assertTrue(self.driver.join(1), "Driver failed to stop")
         finally:

From 513fd83a7cdf716e4bb807bdcd40cc7214d308a2 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 14:35:46 +0000
Subject: [PATCH 69/98] Make timeout=1 the default for MessageReader.  Avoids
 accidentally blocking forever.

---
 calico/etcddriver/driver.py             | 6 +++---
 calico/etcddriver/protocol.py           | 2 +-
 calico/etcddriver/test/test_protocol.py | 7 ++++---
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index b5ff70a26f..138393f276 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -147,7 +147,7 @@ def _read_from_socket(self):
         """
         try:
             while not self._stop_event.is_set():
-                for msg_type, msg in self._msg_reader.new_messages():
+                for msg_type, msg in self._msg_reader.new_messages(timeout=1):
                     if msg_type == MSG_TYPE_INIT:
                         self._handle_init(msg)
                     elif msg_type == MSG_TYPE_CONFIG:
@@ -556,9 +556,9 @@ def get_etcd_connection(self):
 
     def _on_key_updated(self, key, value):
         """
-        Called when we've worked out that a key  ahs been updated/deleted.
+        Called when we've worked out that a key has been updated/deleted.
 
-        Deos any local processing and sends the update to Felix.
+        Does any local processing and sends the update to Felix.
         :param str key: The etcd key that has changed.
         :param str|NoneType value: the new value of the key (None indicates
                deletion).
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index f242becc6e..78b987b34c 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -133,7 +133,7 @@ def __init__(self, sck):
         self._sck = sck
         self._unpacker = msgpack.Unpacker()
 
-    def new_messages(self, timeout=None):
+    def new_messages(self, timeout=1):
         """
         Generator: generates 0 or more tuples containing message type and
         message body (as a dict).
diff --git a/calico/etcddriver/test/test_protocol.py b/calico/etcddriver/test/test_protocol.py
index ad12954d3f..14d36bf5ad 100644
--- a/calico/etcddriver/test/test_protocol.py
+++ b/calico/etcddriver/test/test_protocol.py
@@ -178,8 +178,9 @@ def test_partial_read(self, m_select):
             msg_bytes[:len(msg_bytes)/2],
             msg_bytes[len(msg_bytes)/2:],
         ])
-        self.assertRaises(StopIteration, next, self.reader.new_messages())
-        self.assertEqual(next(self.reader.new_messages()),
+        self.assertRaises(StopIteration, next,
+                          self.reader.new_messages(timeout=None))
+        self.assertEqual(next(self.reader.new_messages(timeout=None)),
                          (MSG_TYPE_STATUS, exp_msg))
 
     @patch("select.select", autospec=True)
@@ -229,5 +230,5 @@ def test_timeout(self, m_select):
     @patch("select.select", autospec=True)
     def test_shutdown(self, m_select):
         self.sck.recv.return_value = ""
-        msg_gen = self.reader.new_messages()
+        msg_gen = self.reader.new_messages(timeout=None)
         self.assertRaises(SocketClosed, next, msg_gen)

From 0096a1af06b7f91c841c6b8bfbe4c941201d8b0a Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 14:36:21 +0000
Subject: [PATCH 70/98] More driver FV tests covering various error cases.

---
 calico/etcddriver/test/test_driver.py | 208 ++++++++++++++++----------
 1 file changed, 132 insertions(+), 76 deletions(-)

diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index e13d3b1036..b015e8f1b7 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -25,6 +25,7 @@
 from unittest import TestCase
 
 from mock import Mock, call, patch
+from urllib3.exceptions import TimeoutError
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 
 from calico.etcddriver.driver import EtcdDriver
@@ -86,8 +87,11 @@ class StubMessageWriter(MessageWriter):
     def __init__(self, sck):
         super(StubMessageWriter, self).__init__(sck)
         self.queue = Queue()
+        self.exception = None
 
     def send_message(self, msg_type, fields=None, flush=True):
+        if self.exception:
+            raise self.exception
         self.queue.put((msg_type, fields))
         if flush:
             self.flush()
@@ -223,6 +227,8 @@ def read(self, length):
         if not self.buf:
             self.buf = self.queue.get()
         while len(data) < length:
+            if isinstance(self.buf, BaseException):
+                raise self.buf
             data += self.buf[:length - len(data)]
             self.buf = self.buf[length - len(data):]
             if not self.buf:
@@ -287,82 +293,18 @@ def setUp(self):
                                     "complete_logging", autospec=True)
         self._logging_patch.start()
 
-    def test_mainline(self):
-        self.driver.start()
-        # First message comes from Felix.
-        self.send_init_msg()
-        # Should trigger driver to send a status and start polling the ready
-        # flag.
-        self.assert_status_message(STATUS_WAIT_FOR_READY)
-        # Respond to etcd request with ready == true.
-        self.resync_etcd.assert_request(READY_KEY)
-        self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
-        # Then etcd should get the global config request.
-        self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
-        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
-            CONFIG_DIR + "/InterfacePrefix": "tap"
-        })
-        # Followed by the per-host one...
-        self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
-                                        recursive=True)
-        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
-            "/calico/v1/host/thehostname/config/LogSeverityFile": "DEBUG"
-        })
-        # Then the driver should send the config to Felix.
-        self.assert_msg_to_felix(
-            MSG_TYPE_CONFIG_LOADED,
-            {
-                MSG_KEY_GLOBAL_CONFIG: {"InterfacePrefix": "tap"},
-                MSG_KEY_HOST_CONFIG: {"LogSeverityFile": "DEBUG"},
-            }
-        )
-        self.assert_flush_to_felix()
-        # We respond with the config message to trigger the start of the
-        # resync.
-        self.msg_reader.send_msg(
-            MSG_TYPE_CONFIG,
-            {
-                MSG_KEY_LOG_FILE: "/tmp/driver.log",
-                MSG_KEY_SEV_FILE: "DEBUG",
-                MSG_KEY_SEV_SCREEN: "DEBUG",
-                MSG_KEY_SEV_SYSLOG: "DEBUG",
-            }
-        )
-        self.assert_status_message(STATUS_RESYNC)
-        # We should get a request to load the full snapshot.
-        self.resync_etcd.assert_request(
-            VERSION_DIR, recursive=True, timeout=120, preload_content=False
-        )
-        snap_stream = self.resync_etcd.respond_with_stream(etcd_index=10)
-        # And then the headers should trigger a request from the watcher
-        # including the etcd_index we sent even though we haven't sent a
-        # response body to the resync thread.
-        self.watcher_etcd.assert_request(
-            VERSION_DIR, recursive=True, timeout=90, wait_index=11
-        )
-        # Start sending the snapshot response:
-        snap_stream.write('''{
-            "action": "get",
-            "node": {
-                "key": "/calico/v1",
-                "dir": true,
-                "nodes": [
-                {
-                    "key": "/calico/v1/adir",
-                    "dir": true,
-                    "nodes": [
-                    {
-                        "key": "/calico/v1/adir/akey",
-                        "value": "akey's value",
-                        "modifiedIndex": 8
-                    },
-        ''')
-        # Should generate a message to felix even though it's only seen part
-        # of the response...
-        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
-            MSG_KEY_KEY: "/calico/v1/adir/akey",
-            MSG_KEY_VALUE: "akey's value",
-        })
+    def test_mainline_resync(self):
+        """
+        Test of the mainline resync-and-merge processing.
+
+        * Does the initial config handshake with Felix.
+        * Interleaves the snapshot response with updates via the watcher.
+        * Checks that the result is correctly merged.
+        """
+        # Initial handshake.
+        self.start_driver_and_handshake()
+        # Check for etcd request and start the response.
+        snap_stream = self.start_snapshot_response()
         # Respond to the watcher, this should get merged into the event
         # stream at some point later.
         self.watcher_etcd.respond_with_value(
@@ -449,6 +391,120 @@ def test_mainline(self):
         })
         self.assert_flush_to_felix()
 
+    @patch("time.sleep", autospec=True)
+    def test_resync_pipe_write_fail(self, m_sleep):
+        """
+        Test a read failure on the snapshot.
+        """
+        # Start the driver, it will wait for a message from Felix.
+        self.driver.start()
+        # Queue up an error on the driver's next write.
+        self.msg_writer.exception = WriteFailed()
+        # Send init message from Felix to driver.
+        self.send_init_msg()
+        # Driver should die.
+        for _ in xrange(100):
+            # Need to time out the reader thread or it will block shutdown.
+            self.msg_reader.send_timeout()
+            if self.driver.join(timeout=0.01):
+                break
+        else:
+            self.fail("Driver failed to die.")
+
+    @patch("time.sleep", autospec=True)
+    def test_resync_etcd_read_fail(self, m_sleep):
+        """
+        Test a read failure on the snapshot.
+        """
+        # Initial handshake.
+        self.start_driver_and_handshake()
+        # Start streaming some data.
+        snap_stream = self.start_snapshot_response()
+        # But then the read times out...
+        snap_stream.write(TimeoutError())
+        # Triggering a restart of the resync loop.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+
+    def start_driver_and_handshake(self):
+        self.driver.start()
+        # First message comes from Felix.
+        self.send_init_msg()
+        # Should trigger driver to send a status and start polling the ready
+        # flag.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+        # Respond to etcd request with ready == true.
+        self.resync_etcd.assert_request(READY_KEY)
+        self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
+        # Then etcd should get the global config request.
+        self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
+        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
+            CONFIG_DIR + "/InterfacePrefix": "tap"
+        })
+        # Followed by the per-host one...
+        self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
+                                        recursive=True)
+        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
+            "/calico/v1/host/thehostname/config/LogSeverityFile": "DEBUG"
+        })
+        # Then the driver should send the config to Felix.
+        self.assert_msg_to_felix(
+            MSG_TYPE_CONFIG_LOADED,
+            {
+                MSG_KEY_GLOBAL_CONFIG: {"InterfacePrefix": "tap"},
+                MSG_KEY_HOST_CONFIG: {"LogSeverityFile": "DEBUG"},
+            }
+        )
+        self.assert_flush_to_felix()
+        # We respond with the config message to trigger the start of the
+        # resync.
+        self.msg_reader.send_msg(
+            MSG_TYPE_CONFIG,
+            {
+                MSG_KEY_LOG_FILE: "/tmp/driver.log",
+                MSG_KEY_SEV_FILE: "DEBUG",
+                MSG_KEY_SEV_SCREEN: "DEBUG",
+                MSG_KEY_SEV_SYSLOG: "DEBUG",
+            }
+        )
+        self.assert_status_message(STATUS_RESYNC)
+
+    def start_snapshot_response(self):
+        # We should get a request to load the full snapshot.
+        self.resync_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=120, preload_content=False
+        )
+        snap_stream = self.resync_etcd.respond_with_stream(etcd_index=10)
+        # And then the headers should trigger a request from the watcher
+        # including the etcd_index we sent even though we haven't sent a
+        # response body to the resync thread.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=11
+        )
+        # Start sending the snapshot response:
+        snap_stream.write('''{
+            "action": "get",
+            "node": {
+                "key": "/calico/v1",
+                "dir": true,
+                "nodes": [
+                {
+                    "key": "/calico/v1/adir",
+                    "dir": true,
+                    "nodes": [
+                    {
+                        "key": "/calico/v1/adir/akey",
+                        "value": "akey's value",
+                        "modifiedIndex": 8
+                    },
+        ''')
+        # Should generate a message to felix even though it's only seen part
+        # of the response...
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/akey",
+            MSG_KEY_VALUE: "akey's value",
+        })
+        return snap_stream
+
     def assert_status_message(self, status):
         self.assert_msg_to_felix(
             MSG_TYPE_STATUS,

From 2a072c5cae1e3571c6ddc22cec500fef4d6f2ab3 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 16:31:31 +0000
Subject: [PATCH 71/98] Move stub code to own file.

---
 calico/etcddriver/test/stubs.py       | 265 ++++++++++++++++++++++++++
 calico/etcddriver/test/test_driver.py | 249 +-----------------------
 calico/felix/fetcd.py                 |  43 ++---
 3 files changed, 291 insertions(+), 266 deletions(-)
 create mode 100644 calico/etcddriver/test/stubs.py

diff --git a/calico/etcddriver/test/stubs.py b/calico/etcddriver/test/stubs.py
new file mode 100644
index 0000000000..bc84085068
--- /dev/null
+++ b/calico/etcddriver/test/stubs.py
@@ -0,0 +1,265 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.test.stubs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Stub objects used for testing driver/protocol code.
+"""
+import json
+
+import logging
+from Queue import Queue, Empty
+
+from calico.etcddriver.protocol import (
+    MessageReader, MessageWriter, MSG_KEY_TYPE
+)
+
+_log = logging.getLogger(__name__)
+
+
+# Singleton representing a flush in the stream of writes.
+FLUSH = object()
+
+
+class StubMessageReader(MessageReader):
+    """
+    Replacement for the Driver's MessageReader, which is how it reads
+    from Felix.
+
+    Allows us to send messages as if we were Felix.
+    """
+    def __init__(self, sck):
+        super(StubMessageReader, self).__init__(sck)
+        self.queue = Queue()
+
+    def send_msg(self, msg_type, fields=None):
+        """Called by the test to send the driver a message."""
+        msg = {
+            MSG_KEY_TYPE: msg_type
+        }
+        msg.update(fields or {})
+        self.queue.put((msg_type, msg))
+
+    def send_timeout(self):
+        """Called by the test to send the driver a timeout."""
+        self.queue.put(None)
+
+    def send_exception(self, exc):
+        """Called by the test to raise an exception from the driver's read."""
+        self.queue.put(exc)
+
+    def new_messages(self, timeout=None):
+        """Called by the driver to receive new messages."""
+        while True:
+            item = self.queue.get()
+            if item is None:
+                return  # timeout
+            if isinstance(item, BaseException):
+                raise item
+            else:
+                yield item
+
+
+class StubMessageWriter(MessageWriter):
+    """
+    Replacement for the driver's MessageWriter, which it uses to send messages
+    to Felix.
+
+    Buffers the messages and flush calls in a queue for the test to
+    interrogate.
+    """
+    def __init__(self, sck):
+        super(StubMessageWriter, self).__init__(sck)
+        self.queue = Queue()
+        self.exception = None
+
+    def send_message(self, msg_type, fields=None, flush=True):
+        if self.exception:
+            raise self.exception
+        self.queue.put((msg_type, fields))
+        if flush:
+            self.flush()
+
+    def flush(self):
+        self.queue.put(FLUSH)
+
+
+class PipeFile(object):
+    def __init__(self):
+        self.queue = Queue()
+        self.buf = None
+
+    def read(self, length):
+        data = ""
+        if not self.buf:
+            self.buf = self.queue.get()
+        while len(data) < length:
+            if isinstance(self.buf, BaseException):
+                raise self.buf
+            data += self.buf[:length - len(data)]
+            self.buf = self.buf[length - len(data):]
+            if not self.buf:
+                try:
+                    self.buf = self.queue.get_nowait()
+                except Empty:
+                    break
+        return data
+
+    def write(self, data):
+        self.queue.put(data)
+
+    def __del__(self):
+        self.queue.put("")
+
+
+class StubEtcd(object):
+    """
+    A fake connection to etcd.  We hook the driver's _issue_etcd_request
+    method and block the relevant thread until the test calls one of the
+    respond_... methods.
+    """
+    def __init__(self):
+        self.request_queue = Queue()
+        self.response_queue = Queue()
+        self.headers = {
+            "x-etcd-cluster-id": "abcdefg"
+        }
+
+    def request(self, key, **kwargs):
+        """
+        Called from the driver to make a request.  Blocks until the
+        test thread sends a response.
+        """
+        self.request_queue.put((key, kwargs))
+        response = self.response_queue.get(30)
+        if isinstance(response, BaseException):
+            raise response
+        else:
+            return response
+
+    def get_next_request(self):
+        """
+        Called from the test to get the next request from the driver.
+        """
+        return self.request_queue.get(timeout=10)
+
+    def assert_request(self, expected_key, **expected_args):
+        """
+        Asserts the properies of the next request.
+        """
+        key, args = self.get_next_request()
+        default_args = {'wait_index': None,
+                        'preload_content': None,
+                        'recursive': False,
+                        'timeout': 5}
+        for k, v in default_args.iteritems():
+            if k in args and args[k] == v:
+                del args[k]
+        if expected_key != key:
+            raise AssertionError("Expected request for %s but got %s" %
+                                 (expected_key, key))
+        if expected_args != args:
+            raise AssertionError("Expected request args %s for %s but got %s" %
+                                 (expected_args, key, args))
+
+    def respond_with_exception(self, exc):
+        """
+        Called from the test to raise an exception from the current/next
+        request.
+        """
+        self.response_queue.put(exc)
+
+    def respond_with_value(self, key, value, mod_index=None,
+                           etcd_index=None, status=200, action="get"):
+        """
+        Called from the test to return a simple single-key value to the
+        driver.
+        """
+        data = json.dumps({
+            "action": action,
+            "node": {
+                "key": key,
+                "value": value,
+                "modifiedIndex": mod_index,
+            }
+        })
+        self.respond_with_data(data, etcd_index, status)
+
+    def respond_with_dir(self, key, children, mod_index=None,
+                         etcd_index=None, status=200):
+        """
+        Called from the test to return a directory of key/values (from a
+        recursive request).
+        """
+        nodes = [{"key": k, "value": v, "modifiedIndex": mod_index}
+                 for (k, v) in children.iteritems()]
+        data = json.dumps({
+            "action": "get",
+            "node": {
+                "key": key,
+                "dir": True,
+                "modifiedIndex": mod_index,
+                "nodes": nodes
+            }
+        })
+        self.respond_with_data(data, etcd_index, status)
+
+    def respond_with_data(self, data, etcd_index, status):
+        """
+        Called from the test to return a raw response (e.g. to send
+        malformed JSON).
+        """
+        headers = self.headers.copy()
+        if etcd_index is not None:
+            headers["x-etcd-index"] = str(etcd_index)
+        resp = MockResponse(status, data, headers)
+        self.response_queue.put(resp)
+
+    def respond_with_stream(self, etcd_index, status=200):
+        """
+        Called from the test to respond with a stream, allowing the test to
+        send chunks of data in response.
+        """
+        headers = self.headers.copy()
+        if etcd_index is not None:
+            headers["x-etcd-index"] = str(etcd_index)
+        f = PipeFile()
+        resp = MockResponse(status, f, headers)
+        self.response_queue.put(resp)
+        return f
+
+
+class MockResponse(object):
+    def __init__(self, status, data_or_exc, headers=None):
+        self.status = status
+        self._data_or_exc = data_or_exc
+        self.headers = headers or {}
+
+    @property
+    def data(self):
+        if isinstance(self._data_or_exc, Exception):
+            raise self._data_or_exc
+        elif hasattr(self._data_or_exc, "read"):
+            return self._data_or_exc.read()
+        else:
+            return self._data_or_exc
+
+    def read(self, *args):
+        return self._data_or_exc.read(*args)
+
+    def getheader(self, header, default=None):
+        _log.debug("Asked for header %s", header)
+        return self.headers.get(header.lower(), default)
\ No newline at end of file
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index b015e8f1b7..6620541af5 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -18,256 +18,23 @@
 
 Tests for the etcd driver module.
 """
-import json
-
-import logging
-from Queue import Queue, Empty
+from Queue import Empty
 from unittest import TestCase
 
-from mock import Mock, call, patch
+from mock import Mock, patch
+
 from urllib3.exceptions import TimeoutError
-from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 
+from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 from calico.etcddriver.driver import EtcdDriver
 from calico.etcddriver.protocol import *
+from calico.etcddriver.test.stubs import (
+    StubMessageReader, StubMessageWriter, StubEtcd,
+    FLUSH)
 
 _log = logging.getLogger(__name__)
 
 
-FLUSH = object()
-
-
-class StubMessageReader(MessageReader):
-    """
-    Replacement for the Driver's MessageReader, which is how it reads
-    from Felix.
-
-    Allows us to send messages as if we were Felix.
-    """
-    def __init__(self, sck):
-        super(StubMessageReader, self).__init__(sck)
-        self.queue = Queue()
-
-    def send_msg(self, msg_type, fields=None):
-        """Called by the test to send the driver a message."""
-        msg = {
-            MSG_KEY_TYPE: msg_type
-        }
-        msg.update(fields or {})
-        self.queue.put((msg_type, msg))
-
-    def send_timeout(self):
-        """Called by the test to send the driver a timeout."""
-        self.queue.put(None)
-
-    def send_exception(self, exc):
-        """Called by the test to raise an exception from the driver's read."""
-        self.queue.put(exc)
-
-    def new_messages(self, timeout=None):
-        """Called by the driver to receive new messages."""
-        while True:
-            item = self.queue.get()
-            if item is None:
-                return  # timeout
-            if isinstance(item, BaseException):
-                raise item
-            else:
-                yield item
-
-
-class StubMessageWriter(MessageWriter):
-    """
-    Replacement for the driver's MessageWriter, which it uses to send messages
-    to Felix.
-
-    Buffers the messages and flush calls in a queue for the test to
-    interrogate.
-    """
-    def __init__(self, sck):
-        super(StubMessageWriter, self).__init__(sck)
-        self.queue = Queue()
-        self.exception = None
-
-    def send_message(self, msg_type, fields=None, flush=True):
-        if self.exception:
-            raise self.exception
-        self.queue.put((msg_type, fields))
-        if flush:
-            self.flush()
-
-    def flush(self):
-        self.queue.put(FLUSH)
-
-
-class StubEtcd(object):
-    """
-    A fake connection to etcd.  We hook the driver's _issue_etcd_request
-    method and block the relevant thread until the test calls one of the
-    respond_... methods.
-    """
-    def __init__(self):
-        self.request_queue = Queue()
-        self.response_queue = Queue()
-        self.headers = {
-            "x-etcd-cluster-id": "abcdefg"
-        }
-
-    def request(self, key, **kwargs):
-        """
-        Called from the driver to make a request.  Blocks until the
-        test thread sends a response.
-        """
-        self.request_queue.put((key, kwargs))
-        response = self.response_queue.get(30)
-        if isinstance(response, BaseException):
-            raise response
-        else:
-            return response
-
-    def get_next_request(self):
-        """
-        Called from the test to get the next request from the driver.
-        """
-        return self.request_queue.get(timeout=10)
-
-    def assert_request(self, expected_key, **expected_args):
-        """
-        Asserts the properies of the next request.
-        """
-        key, args = self.get_next_request()
-        default_args = {'wait_index': None,
-                        'preload_content': None,
-                        'recursive': False,
-                        'timeout': 5}
-        for k, v in default_args.iteritems():
-            if k in args and args[k] == v:
-                del args[k]
-        if expected_key != key:
-            raise AssertionError("Expected request for %s but got %s" %
-                                 (expected_key, key))
-        if expected_args != args:
-            raise AssertionError("Expected request args %s for %s but got %s" %
-                                 (expected_args, key, args))
-
-    def respond_with_exception(self, exc):
-        """
-        Called from the test to raise an exception from the current/next
-        request.
-        """
-        self.response_queue.put(exc)
-
-    def respond_with_value(self, key, value, mod_index=None,
-                           etcd_index=None, status=200, action="get"):
-        """
-        Called from the test to return a simple single-key value to the
-        driver.
-        """
-        data = json.dumps({
-            "action": action,
-            "node": {
-                "key": key,
-                "value": value,
-                "modifiedIndex": mod_index,
-            }
-        })
-        self.respond_with_data(data, etcd_index, status)
-
-    def respond_with_dir(self, key, children, mod_index=None,
-                         etcd_index=None, status=200):
-        """
-        Called from the test to return a directory of key/values (from a
-        recursive request).
-        """
-        nodes = [{"key": k, "value": v, "modifiedIndex": mod_index}
-                 for (k, v) in children.iteritems()]
-        data = json.dumps({
-            "action": "get",
-            "node": {
-                "key": key,
-                "dir": True,
-                "modifiedIndex": mod_index,
-                "nodes": nodes
-            }
-        })
-        self.respond_with_data(data, etcd_index, status)
-
-    def respond_with_data(self, data, etcd_index, status):
-        """
-        Called from the test to return a raw response (e.g. to send
-        malformed JSON).
-        """
-        headers = self.headers.copy()
-        if etcd_index is not None:
-            headers["x-etcd-index"] = str(etcd_index)
-        resp = MockResponse(status, data, headers)
-        self.response_queue.put(resp)
-
-    def respond_with_stream(self, etcd_index, status=200):
-        """
-        Called from the test to respond with a stream, allowing the test to
-        send chunks of data in response.
-        """
-        headers = self.headers.copy()
-        if etcd_index is not None:
-            headers["x-etcd-index"] = str(etcd_index)
-        f = PipeFile()
-        resp = MockResponse(status, f, headers)
-        self.response_queue.put(resp)
-        return f
-
-
-class PipeFile(object):
-    def __init__(self):
-        self.queue = Queue()
-        self.buf = None
-
-    def read(self, length):
-        data = ""
-        if not self.buf:
-            self.buf = self.queue.get()
-        while len(data) < length:
-            if isinstance(self.buf, BaseException):
-                raise self.buf
-            data += self.buf[:length - len(data)]
-            self.buf = self.buf[length - len(data):]
-            if not self.buf:
-                try:
-                    self.buf = self.queue.get_nowait()
-                except Empty:
-                    break
-        return data
-
-    def write(self, data):
-        self.queue.put(data)
-
-    def __del__(self):
-        self.queue.put("")
-
-
-class MockResponse(object):
-    def __init__(self, status, data_or_exc, headers=None):
-        self.status = status
-        self._data_or_exc = data_or_exc
-        self.headers = headers or {}
-
-    @property
-    def data(self):
-        if isinstance(self._data_or_exc, Exception):
-            raise self._data_or_exc
-        elif hasattr(self._data_or_exc, "read"):
-            return self._data_or_exc.read()
-        else:
-            return self._data_or_exc
-
-    def read(self, *args):
-        return self._data_or_exc.read(*args)
-
-    def getheader(self, header, default=None):
-        _log.debug("Asked for header %s", header)
-        return self.headers.get(header.lower(), default)
-
-
 class TestEtcdDriverFV(TestCase):
     """
     FV-level tests for the driver.  These tests run a real copy of the driver
@@ -578,6 +345,6 @@ def tearDown(self):
             # Wait for it to stop.
             self.assertTrue(self.driver.join(1), "Driver failed to stop")
         finally:
-            # Now the driver is stopped, it's safe to remove out patch of
+            # Now the driver is stopped, it's safe to remove our patch of
             # complete_logging()
             self._logging_patch.stop()
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 9e8cb4de0c..9f02a53e77 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -271,9 +271,17 @@ class _FelixEtcdWatcher(gevent.Greenlet):
     """
     Greenlet that communicates with the etcd driver over a socket.
 
-    * Handles initial configuration of the driver.
-    * Processes the initial config responses.
-    * Then fans out the stream of updates.
+    * Does the initial handshake with the driver, sening it the init
+      message.
+    * Receives the pre-loaded config from the driver and uses that
+      to do Felix's one-off configuration.
+    * Sends the relevant config back to the driver.
+    * Processes the event stream from the driver, sending it on to
+      the splitter.
+
+    This class is similar to the EtcdWatcher class in that it uses
+    a PathDispatcher to fan out updates but it doesn't own an etcd
+    connection of its own.
     """
 
     def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
@@ -282,57 +290,45 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         self._etcd_api = etcd_api
         self._status_reporter = status_reporter
         self.hosts_ipset = hosts_ipset
-
         # Whether we've been in sync with etcd at some point.
         self._been_in_sync = False
-
         # Keep track of the config loaded from etcd so we can spot if it
         # changes.
         self.last_global_config = None
         self.last_host_config = None
         self.my_config_dir = dir_for_per_host_config(self._config.HOSTNAME)
-
         # Events triggered by the EtcdAPI Actor to tell us to load the config
         # and start polling.  These are one-way flags.
         self.load_config = Event()
         self.begin_polling = Event()
-
         # Event that we trigger once the config is loaded.
         self.configured = Event()
-
         # Polling state initialized at poll start time.
         self.splitter = None
-
         # Next-hop IP addresses of our hosts, if populated in etcd.
         self.ipv4_by_hostname = {}
-
         # Forces a resync after the current poll if set.  Safe to set from
         # another thread.  Automatically reset to False after the resync is
         # triggered.
         self.resync_requested = False
         self.dispatcher = PathDispatcher()
-
-        # Register for events when values change.
-        self._register_paths()
-
+        # The Popen object for the driver.
         self._driver_process = None
-
+        # Stats.
         self.read_count = 0
         self.last_rate_log_time = monotonic_time()
+        # Register for events when values change.
+        self._register_paths()
 
     def _register_paths(self):
         """
         Program the dispatcher with the paths we care about.
-
-        Since etcd gives us a single event for a recursive directory
-        deletion, we have to handle deletes for lots of directories that
-        we otherwise wouldn't care about.
         """
         reg = self.dispatcher.register
         # Profiles and their contents.
         reg(TAGS_KEY, on_set=self.on_tags_set, on_del=self.on_tags_delete)
         reg(RULES_KEY, on_set=self.on_rules_set, on_del=self.on_rules_delete)
-        # Hosts, workloads and endpoints.
+        # Hosts and endpoints.
         reg(HOST_IP_KEY,
             on_set=self.on_host_ip_set,
             on_del=self.on_host_ip_delete)
@@ -341,11 +337,8 @@ def _register_paths(self):
         reg(CIDR_V4_KEY,
             on_set=self.on_ipam_v4_pool_set,
             on_del=self.on_ipam_v4_pool_delete)
-        # Configuration keys.  If any of these is changed or set a resync is
-        # done, including a full reload of configuration. If any field has
-        # actually changed (as opposed to being reset to the same value or
-        # explicitly set to the default, say), Felix terminates allowing the
-        # init daemon to restart it.
+        # Configuration keys.  If any of these is changed or created, we'll
+        # restart to pick up the change.
         reg(CONFIG_PARAM_KEY,
             on_set=self._on_config_updated,
             on_del=self._on_config_updated)

From dc4c8d582ec300704db5e04384d9f8aa723ce024 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 16:33:34 +0000
Subject: [PATCH 72/98] Remove now-unused parse_if_* functions.

---
 calico/felix/fetcd.py | 66 -------------------------------------------
 1 file changed, 66 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 9f02a53e77..4d22ac0856 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -897,20 +897,6 @@ def die_and_restart():
 )
 
 
-def parse_if_endpoint(config, etcd_node):
-    combined_id = get_endpoint_id_from_key(etcd_node.key)
-    if combined_id:
-        # Got an endpoint.
-        if etcd_node.action == "delete":
-            _log.debug("Found deleted endpoint %s", combined_id)
-            endpoint = None
-        else:
-            endpoint = parse_endpoint(config, combined_id, etcd_node.value)
-        # EndpointId does the interning for us.
-        return combined_id, endpoint
-    return None, None
-
-
 def parse_endpoint(config, combined_id, raw_json):
     endpoint = safe_decode_json(raw_json,
                                 log_tag="endpoint %s" % combined_id.endpoint)
@@ -925,19 +911,6 @@ def parse_endpoint(config, combined_id, raw_json):
     return endpoint
 
 
-def parse_if_rules(etcd_node):
-    m = RULES_KEY_RE.match(etcd_node.key)
-    if m:
-        # Got some rules.
-        profile_id = m.group("profile_id")
-        if etcd_node.action == "delete":
-            rules = None
-        else:
-            rules = parse_rules(profile_id, etcd_node.value)
-        return intern(profile_id.encode("utf8")), rules
-    return None, None
-
-
 def parse_rules(profile_id, raw_json):
     rules = safe_decode_json(raw_json, log_tag="rules %s" % profile_id)
     try:
@@ -950,19 +923,6 @@ def parse_rules(profile_id, raw_json):
         return rules
 
 
-def parse_if_tags(etcd_node):
-    m = TAGS_KEY_RE.match(etcd_node.key)
-    if m:
-        # Got some tags.
-        profile_id = m.group("profile_id")
-        if etcd_node.action == "delete":
-            tags = None
-        else:
-            tags = parse_tags(profile_id, etcd_node.value)
-        return intern(profile_id.encode("utf8")), tags
-    return None, None
-
-
 def parse_tags(profile_id, raw_json):
     tags = safe_decode_json(raw_json, log_tag="tags %s" % profile_id)
     try:
@@ -977,19 +937,6 @@ def parse_tags(profile_id, raw_json):
         return intern_list(tags)
 
 
-def parse_if_host_ip(etcd_node):
-    m = HOST_IP_KEY_RE.match(etcd_node.key)
-    if m:
-        # Got some rules.
-        hostname = m.group("hostname")
-        if etcd_node.action == "delete":
-            ip = None
-        else:
-            ip = parse_host_ip(hostname, etcd_node.value)
-        return hostname, ip
-    return None, None
-
-
 def parse_host_ip(hostname, raw_value):
     if raw_value is None or validate_ip_addr(raw_value):
         return canonicalise_ip(raw_value, None)
@@ -998,19 +945,6 @@ def parse_host_ip(hostname, raw_value):
         return None
 
 
-def parse_if_ipam_v4_pool(etcd_node):
-    m = IPAM_V4_CIDR_KEY_RE.match(etcd_node.key)
-    if m:
-        # Got some rules.
-        pool_id = m.group("encoded_cidr")
-        if etcd_node.action == "delete":
-            pool = None
-        else:
-            pool = parse_ipam_pool(pool_id, etcd_node.value)
-        return pool_id, pool
-    return None, None
-
-
 def parse_ipam_pool(pool_id, raw_json):
     pool = safe_decode_json(raw_json, log_tag="ipam pool %s" % pool_id)
     try:

From 69af1d459d592ba362fd15f678750ac56e950e17 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 17:54:28 +0000
Subject: [PATCH 73/98] Cover new config.

---
 calico/felix/test/data/felix_nolog.cfg | 1 +
 calico/felix/test/test_config.py       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/calico/felix/test/data/felix_nolog.cfg b/calico/felix/test/data/felix_nolog.cfg
index a14188ff33..704229a038 100644
--- a/calico/felix/test/data/felix_nolog.cfg
+++ b/calico/felix/test/data/felix_nolog.cfg
@@ -1,3 +1,4 @@
 [log]
 # Log file path.
 LogFilePath = none
+EtcdDriverLogFilePath = none
diff --git a/calico/felix/test/test_config.py b/calico/felix/test/test_config.py
index f84ccaa460..7a1e85e9f6 100644
--- a/calico/felix/test/test_config.py
+++ b/calico/felix/test/test_config.py
@@ -118,6 +118,7 @@ def test_no_logfile(self):
         config.report_etcd_config({}, cfg_dict)
 
         self.assertEqual(config.LOGFILE, None)
+        self.assertEqual(config.DRIVERLOGFILE, None)
 
     def test_no_metadata(self):
         # Metadata can be excluded by explicitly saying "none"

From d32dbf59b30226ffd21168ccb633e0bfac5276a7 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 17:56:07 +0000
Subject: [PATCH 74/98] Cover new EndpointManager function.

---
 calico/felix/endpoint.py           |  7 +---
 calico/felix/test/test_endpoint.py | 53 +++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/calico/felix/endpoint.py b/calico/felix/endpoint.py
index 485f1f3732..bc6a6e57fc 100644
--- a/calico/felix/endpoint.py
+++ b/calico/felix/endpoint.py
@@ -102,12 +102,7 @@ def on_datamodel_in_sync(self):
             # DispatchChains actor.  That is OK!  The worst that can happen is
             # that a LocalEndpoint undoes part of our update and then goes on
             # to re-apply the update when it catches up to the snapshot.
-            local_ifaces = set()
-            for ep_id, ep in self.endpoints_by_id.iteritems():
-                if (ep and
-                        ep_id.host == self.config.HOSTNAME and
-                        ep.get("name")):
-                    local_ifaces.add(ep.get("name"))
+            local_ifaces = frozenset(self.endpoint_id_by_iface_name.keys())
             self.dispatch_chains.apply_snapshot(local_ifaces, async=True)
 
     @actor_message()
diff --git a/calico/felix/test/test_endpoint.py b/calico/felix/test/test_endpoint.py
index b5d5329142..cb372a761c 100644
--- a/calico/felix/test/test_endpoint.py
+++ b/calico/felix/test/test_endpoint.py
@@ -20,7 +20,8 @@
 """
 from contextlib import nested
 import logging
-from calico.felix.endpoint import EndpointManager
+from neutron.common.constants import IPv4
+from calico.felix.endpoint import EndpointManager, LocalEndpoint
 from calico.felix.fetcd import EtcdAPI, EtcdStatusReporter
 from calico.felix.fiptables import IptablesUpdater
 from calico.felix.dispatch import DispatchChains
@@ -40,6 +41,56 @@
 _log = logging.getLogger(__name__)
 
 
+ENDPOINT_ID = EndpointId("hostname", "b", "c", "d")
+
+
+class TestEndpointManager(BaseTestCase):
+    def setUp(self):
+        super(TestEndpointManager, self).setUp()
+        self.m_config = Mock(spec=config.Config)
+        self.m_config.HOSTNAME = "hostname"
+        self.m_updater = Mock(spec=IptablesUpdater)
+        self.m_dispatch = Mock(spec=DispatchChains)
+        self.m_rules_mgr = Mock(spec=RulesManager)
+        self.m_status_reporter = Mock(spec=EtcdStatusReporter)
+        self.mgr = EndpointManager(self.m_config, "IPv4", self.m_updater,
+                                   self.m_dispatch, self.m_rules_mgr,
+                                   self.m_status_reporter)
+
+    def test_create(self):
+        obj = self.mgr._create(ENDPOINT_ID)
+        self.assertTrue(isinstance(obj, LocalEndpoint))
+
+    def test_on_started(self):
+        ep = {"name": "tap1234"}
+        self.mgr.on_endpoint_update(ENDPOINT_ID,
+                                    ep,
+                                    async=True)
+        self.step_actor(self.mgr)
+        m_endpoint = Mock(spec=LocalEndpoint)
+        self.mgr._on_object_started(ENDPOINT_ID, m_endpoint)
+        self.assertEqual(
+            m_endpoint.on_endpoint_update.mock_calls,
+            [mock.call(ep, async=True)]
+        )
+
+    def test_on_datamodel_in_sync(self):
+        ep = {"name": "tap1234"}
+        self.mgr.on_endpoint_update(ENDPOINT_ID,
+                                    ep,
+                                    async=True)
+        self.step_actor(self.mgr)
+        self.mgr.on_datamodel_in_sync(async=True)
+        self.step_actor(self.mgr)
+        # Second call should have no effect.
+        self.mgr.on_datamodel_in_sync(async=True)
+        self.step_actor(self.mgr)
+        self.assertEqual(
+            self.m_dispatch.apply_snapshot.mock_calls,
+            [mock.call(frozenset(["tap1234"]), async=True)]
+        )
+
+
 class TestLocalEndpoint(BaseTestCase):
     def setUp(self):
         super(TestLocalEndpoint, self).setUp()

From 08d4a89f39210542495e752d50921f2a4dc260cf Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Wed, 4 Nov 2015 17:56:18 +0000
Subject: [PATCH 75/98] Minor fix: removing chains should be synchronous.

---
 calico/felix/endpoint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/calico/felix/endpoint.py b/calico/felix/endpoint.py
index bc6a6e57fc..3d6d43f9dc 100644
--- a/calico/felix/endpoint.py
+++ b/calico/felix/endpoint.py
@@ -486,7 +486,7 @@ def _update_chains(self):
     def _remove_chains(self):
         try:
             self.iptables_updater.delete_chains(chain_names(self._suffix),
-                                                async=True)
+                                                async=False)
         except FailedSystemCall:
             _log.exception("Failed to delete chains for %s", self)
         else:

From 70a29797a092143c2f6eec400ef70ce3aa1d0cc0 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun@cantab.net>
Date: Wed, 4 Nov 2015 21:06:21 +0000
Subject: [PATCH 76/98] Prevent accidental start of a real LocalEndpoint during
 test.

---
 calico/felix/test/test_endpoint.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/calico/felix/test/test_endpoint.py b/calico/felix/test/test_endpoint.py
index cb372a761c..38f49d43c8 100644
--- a/calico/felix/test/test_endpoint.py
+++ b/calico/felix/test/test_endpoint.py
@@ -56,6 +56,7 @@ def setUp(self):
         self.mgr = EndpointManager(self.m_config, "IPv4", self.m_updater,
                                    self.m_dispatch, self.m_rules_mgr,
                                    self.m_status_reporter)
+        self.mgr.get_and_incref = Mock()
 
     def test_create(self):
         obj = self.mgr._create(ENDPOINT_ID)

From 078dd9474a9a477050f94a84245cb1ed05d29357 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 10:38:12 +0000
Subject: [PATCH 77/98] Full coverage for EndpointManager.

---
 calico/felix/test/test_endpoint.py | 79 +++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/calico/felix/test/test_endpoint.py b/calico/felix/test/test_endpoint.py
index 38f49d43c8..a6388b7af9 100644
--- a/calico/felix/test/test_endpoint.py
+++ b/calico/felix/test/test_endpoint.py
@@ -57,6 +57,7 @@ def setUp(self):
                                    self.m_dispatch, self.m_rules_mgr,
                                    self.m_status_reporter)
         self.mgr.get_and_incref = Mock()
+        self.mgr.decref = Mock()
 
     def test_create(self):
         obj = self.mgr._create(ENDPOINT_ID)
@@ -83,14 +84,88 @@ def test_on_datamodel_in_sync(self):
         self.step_actor(self.mgr)
         self.mgr.on_datamodel_in_sync(async=True)
         self.step_actor(self.mgr)
+        self.assertEqual(
+            self.m_dispatch.apply_snapshot.mock_calls,
+            [mock.call(frozenset(["tap1234"]), async=True)]
+        )
         # Second call should have no effect.
+        self.m_dispatch.apply_snapshot.reset_mock()
         self.mgr.on_datamodel_in_sync(async=True)
         self.step_actor(self.mgr)
+        self.assertEqual(self.m_dispatch.apply_snapshot.mock_calls, [])
+
+    def test_endpoint_update_not_our_host(self):
+        ep = {"name": "tap1234"}
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            self.mgr.on_endpoint_update(EndpointId("notus", "b", "c", "d"),
+                                        ep,
+                                        async=True)
+            self.step_actor(self.mgr)
+        self.assertFalse(m_sol.called)
+
+    def test_endpoint_live_obj(self):
+        ep = {"name": "tap1234"}
+        # First send in an update to trigger creation.
+        self.mgr.on_endpoint_update(ENDPOINT_ID, ep, async=True)
+        self.step_actor(self.mgr)
+        self.assertEqual(self.mgr.get_and_incref.mock_calls,
+                         [mock.call(ENDPOINT_ID)])
+        m_endpoint = Mock(spec=LocalEndpoint)
+        self.mgr.objects_by_id[ENDPOINT_ID] = m_endpoint
+        # Then send a second update to check that it gets passed on to the
+        # LocalEndpoint.
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            m_sol.return_value = True
+            self.mgr.on_endpoint_update(ENDPOINT_ID, ep, async=True)
+            self.step_actor(self.mgr)
+        self.assertEqual(m_sol.mock_calls, [mock.call(ENDPOINT_ID)])
+        self.assertEqual(m_endpoint.on_endpoint_update.mock_calls,
+                         [mock.call(ep, force_reprogram=False,
+                                    async=True)])
+        self.assertTrue(ENDPOINT_ID in self.mgr.local_endpoint_ids)
+        # Finally, send in a deletion.
+        m_endpoint.on_endpoint_update.reset_mock()
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            m_sol.return_value = True
+            self.mgr.on_endpoint_update(ENDPOINT_ID, None, async=True)
+            self.step_actor(self.mgr)
+        self.assertEqual(m_endpoint.on_endpoint_update.mock_calls,
+                         [mock.call(None, force_reprogram=False,
+                                    async=True)])
+        self.assertEqual(self.mgr.decref.mock_calls, [mock.call(ENDPOINT_ID)])
+        self.assertFalse(ENDPOINT_ID in self.mgr.local_endpoint_ids)
+
+    def test_on_interface_update_unknown(self):
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            self.mgr.on_interface_update("foo", True, async=True)
+            self.step_actor(self.mgr)
+        self.assertFalse(m_sol.called)
+
+    def test_on_interface_update_known(self):
+        ep = {"name": "tap1234"}
+        m_endpoint = Mock(spec=LocalEndpoint)
+        self.mgr.objects_by_id[ENDPOINT_ID] = m_endpoint
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            m_sol.return_value = True
+            self.mgr.on_endpoint_update(ENDPOINT_ID, ep, async=True)
+            self.mgr.on_interface_update("tap1234", True, async=True)
+            self.step_actor(self.mgr)
         self.assertEqual(
-            self.m_dispatch.apply_snapshot.mock_calls,
-            [mock.call(frozenset(["tap1234"]), async=True)]
+            m_endpoint.on_interface_update.mock_calls,
+            [mock.call(True, async=True)]
         )
 
+    def test_on_interface_update_known_but_not_live(self):
+        ep = {"name": "tap1234"}
+        m_endpoint = Mock(spec=LocalEndpoint)
+        self.mgr.objects_by_id[ENDPOINT_ID] = m_endpoint
+        with mock.patch.object(self.mgr, "_is_starting_or_live") as m_sol:
+            m_sol.return_value = False
+            self.mgr.on_endpoint_update(ENDPOINT_ID, ep, async=True)
+            self.mgr.on_interface_update("tap1234", True, async=True)
+            self.step_actor(self.mgr)
+        self.assertEqual(m_endpoint.on_interface_update.mock_calls, [])
+
 
 class TestLocalEndpoint(BaseTestCase):
     def setUp(self):

From a24201e1f4924ac9e5c6b725d1a115d793b39c49 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 12:01:01 +0000
Subject: [PATCH 78/98] Add UTs for ProfileManager.

---
 calico/felix/profilerules.py           |  12 ++-
 calico/felix/test/test_profilerules.py | 118 ++++++++++++++++++++++++-
 2 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/calico/felix/profilerules.py b/calico/felix/profilerules.py
index d07e31f631..ea1d746114 100644
--- a/calico/felix/profilerules.py
+++ b/calico/felix/profilerules.py
@@ -59,8 +59,17 @@ def _on_object_started(self, profile_id, active_profile):
         active_profile.on_profile_update(profile_or_none, async=True)
 
     def _maybe_start(self, obj_id, in_sync=False):
+        """
+        Override: gates starting the ProfileRules on being in sync.
+
+        :param obj_id: The ID of the object (profile) that we'd like to start.
+        :param in_sync: True if we know that this profile is in-sync even if
+               we might not have received the global in-sync message.
+        """
         in_sync |= self._datamodel_in_sync
         if in_sync or obj_id in self.rules_by_profile_id:
+            # Either we're globally in-sync or we've explicitly heard about
+            # this profile so we know it is in sync.  Defer to the superclass.
             _log.debug("Profile %s is in-sync, deferring to superclass.",
                        obj_id)
             return super(RulesManager, self)._maybe_start(obj_id)
@@ -224,7 +233,8 @@ def _finish_msg_batch(self, batch, results):
                                    self.id)
                 else:
                     self._dirty = False
-            elif not self._ipset_refs.ready:
+            else:
+                assert not self._ipset_refs.ready
                 _log.info("Can't program rules %s yet, waiting on ipsets",
                           self.id)
 
diff --git a/calico/felix/test/test_profilerules.py b/calico/felix/test/test_profilerules.py
index f477726ae9..8a680e24b0 100644
--- a/calico/felix/test/test_profilerules.py
+++ b/calico/felix/test/test_profilerules.py
@@ -20,7 +20,8 @@
 """
 
 import logging
-from mock import Mock, call
+from mock import Mock, call, patch
+from calico.felix import refcount
 from calico.felix.fiptables import IptablesUpdater
 from calico.felix.futils import FailedSystemCall
 from calico.felix.ipsets import IpsetManager, TagIpset
@@ -96,6 +97,116 @@
 }
 
 
+class TestRulesManager(BaseTestCase):
+    def setUp(self):
+        super(TestRulesManager, self).setUp()
+        self.m_updater = Mock(spec=IptablesUpdater)
+        self.m_ipset_mgr = Mock(spec=IpsetManager)
+        self.mgr = RulesManager(4, self.m_updater, self.m_ipset_mgr)
+
+    def test_create(self):
+        pr = self.mgr._create("profile-id")
+        self.assertEqual(pr.id, "profile-id")
+        self.assertEqual(pr.ip_version, 4)
+        self.assertEqual(pr._iptables_updater, self.m_updater)
+        self.assertEqual(pr._ipset_mgr, self.m_ipset_mgr)
+
+    def test_on_object_started_unknown(self):
+        m_pr = Mock(spec=ProfileRules)
+        self.mgr._on_object_started("profile-id", m_pr)
+        self.assertEqual(
+            m_pr.on_profile_update.mock_calls,
+            [call(None, async=True)]
+        )
+
+    def test_on_object_started(self):
+        m_pr = Mock(spec=ProfileRules)
+        self.mgr.rules_by_profile_id["profile-id"] = {"foo": "bar"}
+        self.mgr._on_object_started("profile-id", m_pr)
+        self.assertEqual(
+            m_pr.on_profile_update.mock_calls,
+            [call({"foo": "bar"}, async=True)]
+        )
+
+    def test_on_datamodel_in_sync(self):
+        with patch("calico.felix.refcount.ReferenceManager."
+                   "_maybe_start_all", autospec=True) as m_start:
+            self.mgr.on_datamodel_in_sync(async=True)
+            self.mgr.on_datamodel_in_sync(async=True)
+            self.step_actor(self.mgr)
+            # Only the first datamodel_in_sync triggers maybe_start_all.
+            self.assertEqual(m_start.mock_calls, [call(self.mgr)])
+
+    def test_maybe_start_known_in_sync(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr._maybe_start("profile-id", in_sync=True)
+            self.assertEqual(
+                m_maybe_start.mock_calls,
+                [call("profile-id")]
+            )
+
+    def test_maybe_start_globally_in_sync(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr.on_datamodel_in_sync(async=True)
+            self.step_actor(self.mgr)
+            self.mgr._maybe_start("profile-id")
+            self.assertEqual(
+                m_maybe_start.mock_calls,
+                [call("profile-id")]
+            )
+
+    def test_maybe_start_not_in_sync(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr._maybe_start("profile-id", in_sync=False)
+            self.assertEqual(m_maybe_start.mock_calls, [])
+
+    def test_on_rules_update_unknown(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr.on_rules_update("prof-id", {"foo": "bar"}, async=True)
+            self.step_actor(self.mgr)
+            # Nothing to try to start.
+            self.assertEqual(m_maybe_start.mock_calls, [])
+
+    def test_on_rules_update_not_started(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr.on_rules_update("prof-id", {"foo": "bar"}, async=True)
+            self.mgr.objects_by_id["prof-id"] = Mock()
+            self.step_actor(self.mgr)
+            # Should try to start the ProfileRules.
+            self.assertEqual(m_maybe_start.mock_calls,
+                             [call("prof-id")])
+
+    def test_on_rules_update_started(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            p = {"foo": "bar"}
+            self.mgr.on_rules_update("prof-id", p, async=True)
+            m_pr = Mock()
+            m_pr.ref_mgmt_state = refcount.LIVE
+            self.mgr.objects_by_id["prof-id"] = m_pr
+            self.step_actor(self.mgr)
+            self.assertEqual(m_pr.on_profile_update.mock_calls,
+                             [call(p, force_reprogram=False, async=True)])
+            # Already started so shouldn't try to start it.
+            self.assertEqual(m_maybe_start.mock_calls, [])
+
+    def test_on_rules_delete(self):
+        with patch("calico.felix.refcount."
+                   "ReferenceManager._maybe_start") as m_maybe_start:
+            self.mgr.on_rules_update("prof-id", None, async=True)
+            self.mgr.objects_by_id["prof-id"] = Mock()
+            self.step_actor(self.mgr)
+            # Even though we know it's gone, still try to start it.  If it's
+            # referenced this will ensure that the chain is cleaned up.
+            self.assertEqual(m_maybe_start.mock_calls,
+                             [call("prof-id")])
+
+
 class TestProfileRules(BaseTestCase):
     def setUp(self):
         super(TestProfileRules, self).setUp()
@@ -254,6 +365,11 @@ def test_early_unreferenced(self):
         self.m_ipt_updater.delete_chains.assert_called_once_with(
             set(['felix-p-prof1-i', 'felix-p-prof1-o']), async=False
         )
+        # Further calls should be ignored
+        self.m_ipt_updater.reset_mock()
+        self.rules.on_unreferenced(async=True)
+        self.step_actor(self.rules)
+        self.assertFalse(self.m_ipt_updater.delete_chains.called)
 
     def test_unreferenced_after_creation(self):
         """

From 53b3040d64b483eb7cb88ca11d9140bb700969ed Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 15:17:49 +0000
Subject: [PATCH 79/98] Remove now-unused force_reprogram flag.  Add UT for
 ipsets.py.

---
 calico/felix/ipsets.py           |   8 +--
 calico/felix/test/test_ipsets.py | 117 +++++++++++++++++++++++++++----
 2 files changed, 104 insertions(+), 21 deletions(-)

diff --git a/calico/felix/ipsets.py b/calico/felix/ipsets.py
index e779c128f6..36ace4aaf6 100644
--- a/calico/felix/ipsets.py
+++ b/calico/felix/ipsets.py
@@ -72,7 +72,6 @@ def __init__(self, ip_type, config):
         # index-update functions. We apply the updates in _finish_msg_batch().
         # May include non-live tag IDs.
         self._dirty_tags = set()
-        self._force_reprogram = False
         self._datamodel_in_sync = False
 
     def _create(self, tag_id):
@@ -108,9 +107,7 @@ def _update_active_ipset(self, tag_id):
         assert self._datamodel_in_sync
         active_ipset = self.objects_by_id[tag_id]
         members = frozenset(self.ip_owners_by_tag.get(tag_id, {}).iterkeys())
-        active_ipset.replace_members(members,
-                                     force_reprogram=self._force_reprogram,
-                                     async=True)
+        active_ipset.replace_members(members, async=True)
 
     def _update_dirty_active_ipsets(self):
         """
@@ -433,7 +430,6 @@ def _finish_msg_batch(self, batch, results):
         """
         super(IpsetManager, self)._finish_msg_batch(batch, results)
         self._update_dirty_active_ipsets()
-        self._force_reprogram = False
 
 
 class EndpointData(object):
@@ -537,7 +533,7 @@ def replace_members(self, members, force_reprogram=False):
         """
         Replace the members of this ipset with the supplied set.
 
-        :param set[str]|list[str] members: IP address strings. Must be a copy
+        :param set[str] members: IP address strings. Must be a copy
         (as this routine keeps a link to it).
         """
         _log.info("Replacing members of ipset %s", self.name)
diff --git a/calico/felix/test/test_ipsets.py b/calico/felix/test/test_ipsets.py
index 146f88f29e..52d8dea18b 100644
--- a/calico/felix/test/test_ipsets.py
+++ b/calico/felix/test/test_ipsets.py
@@ -23,10 +23,13 @@
 import logging
 from pprint import pformat
 from mock import *
+from netaddr import IPAddress
+
 from calico.datamodel_v1 import EndpointId
-from calico.felix.futils import IPV4, FailedSystemCall
-from calico.felix.ipsets import (EndpointData,  IpsetManager, IpsetActor,
-                                 TagIpset, EMPTY_ENDPOINT_DATA, Ipset)
+from calico.felix.futils import IPV4, FailedSystemCall, CommandOutput
+from calico.felix.ipsets import (EndpointData, IpsetManager, IpsetActor,
+                                 TagIpset, EMPTY_ENDPOINT_DATA, Ipset,
+                                 list_ipset_names)
 from calico.felix.refcount import CREATED
 from calico.felix.test.base import BaseTestCase
 
@@ -64,6 +67,26 @@
 }
 EP_DATA_2_1 = EndpointData(["prof1"], ["10.0.0.1"])
 
+IPSET_LIST_OUTPUT = """Name: felix-v4-calico_net
+Type: hash:ip
+Revision: 2
+Header: family inet hashsize 1024 maxelem 1048576
+Size in memory: 16728
+References: 1
+Members:
+10.1.0.28
+10.1.0.29
+10.1.0.19
+
+Name: felix-v6-calico_net
+Type: hash:ip
+Revision: 2
+Header: family inet6 hashsize 1024 maxelem 1048576
+Size in memory: 16504
+References: 1
+Members:
+"""
+
 
 class TestIpsetManager(BaseTestCase):
     def setUp(self):
@@ -106,6 +129,17 @@ def test_create(self):
                                         'inet', 'hash:ip',
                                         max_elem=1234)
 
+    def test_maybe_start_gates_on_in_sync(self):
+        with patch("calico.felix.refcount.ReferenceManager."
+                   "_maybe_start") as m_maybe_start:
+            self.mgr._maybe_start("tag-123")
+            self.assertFalse(m_maybe_start.called)
+            self.mgr.on_datamodel_in_sync(async=True)
+            self.step_mgr()
+            self.mgr._maybe_start("tag-123")
+            self.assertEqual(m_maybe_start.mock_calls,
+                             [call("tag-123")])
+
     def test_tag_then_endpoint(self):
         # Send in the messages.
         self.mgr.on_tags_update("prof1", ["tag1"], async=True)
@@ -113,6 +147,11 @@ def test_tag_then_endpoint(self):
         # Let the actor process them.
         self.step_mgr()
         self.assert_one_ep_one_tag()
+        # Undo our messages to check that the index is correctly updated,
+        self.mgr.on_tags_update("prof1", None, async=True)
+        self.mgr.on_endpoint_update(EP_ID_1_1, None, async=True)
+        self.step_mgr()
+        self.assert_index_empty()
 
     def test_endpoint_then_tag(self):
         # Send in the messages.
@@ -141,6 +180,10 @@ def assert_one_ep_one_tag(self):
             }
         })
 
+    def assert_index_empty(self):
+        self.assertEqual(self.mgr.endpoint_data_by_ep_id, {})
+        self.assertEqual(self.mgr.ip_owners_by_tag, {})
+
     def test_change_ip(self):
         # Initial set-up.
         self.mgr.on_tags_update("prof1", ["tag1"], async=True)
@@ -359,16 +402,20 @@ def test_cleanup(self, m_check_call, m_list_ipsets):
                              call(["ipset", "destroy", "felix-v4-baz"]),
                          ]))
 
-    #
-    # def test_finish_msg_batch_clears_reprogram_flag(self):
-    #     # Apply a snapshot and step the actor for real, should clear the flag.
-    #     self.mgr.apply_snapshot(
-    #         {"prof1": ["A"]},
-    #         {EP_ID_1_1: EP_1_1},
-    #         async=True,
-    #     )
-    #     self.step_mgr()
-    #     self.assertFalse(self.mgr._force_reprogram)
+    def test_update_dirty(self):
+        self.mgr.on_datamodel_in_sync(async=True)
+        self.step_mgr()
+        self.mgr._dirty_tags.add("tag-123")
+        m_ipset = Mock(spec=TagIpset)
+        self.mgr.objects_by_id["tag-123"] = m_ipset
+        with patch.object(self.mgr, "_is_starting_or_live",
+                          autospec=True) as m_sol:
+            m_sol.return_value = True
+            self.mgr._update_dirty_active_ipsets()
+            self.assertEqual(
+                m_ipset.replace_members.mock_calls,
+                [call(frozenset(), async=True)]
+            )
 
     def _notify_ready(self, tags):
         for tag in tags:
@@ -416,6 +463,7 @@ def setUp(self):
         self.ipset = Mock(spec=Ipset)
         self.ipset.max_elem = 1234
         self.ipset.set_name = "felix-a_set_name"
+        self.ipset.temp_set_name = "felix-a_set_name-tmp"
         self.actor = IpsetActor(self.ipset)
 
     def test_sync_to_ipset(self):
@@ -480,11 +528,44 @@ def test_sync_to_ipset(self):
         self.ipset.reset_mock()
 
     def test_members_too_big(self):
-        self.actor.members = ["1.2.3.4"] * 2000
-        self.actor._sync_to_ipset()
+        members = set([str(IPAddress(x)) for x in range(2000)])
+        self.actor.replace_members(members, async=True)
+        self.step_actor(self.actor)
         # Check we return early without updating programmed_members.
         self.assertEqual(self.actor.programmed_members, None)
 
+    def test_owned_ipset_names(self):
+        self.assertEqual(self.actor.owned_ipset_names(),
+                         set(["felix-a_set_name", "felix-a_set_name-tmp"]))
+
+
+class TestTagIpsetActor(BaseTestCase):
+    def setUp(self):
+        super(TestTagIpsetActor, self).setUp()
+        self.m_ipset = Mock(spec=Ipset)
+        self.m_ipset.max_elem = 1234
+        self.m_ipset.set_name = "felix-a_set_name"
+        self.m_ipset.temp_set_name = "felix-a_set_name-tmp"
+        self.tag_ipset = TagIpset("tag-123", "IPv4", max_elem=1024)
+        self.tag_ipset._ipset = self.m_ipset
+        self.m_mgr = Mock()
+        self.tag_ipset._manager = self.m_mgr
+        self.tag_ipset._id = "tag-123"
+
+    def test_lifecycle(self):
+        self.tag_ipset.replace_members(set(["1.2.3.4"]), async=True)
+        self.step_actor(self.tag_ipset)
+        self.assertEqual(
+            self.m_mgr.on_object_startup_complete.mock_calls,
+            [call("tag-123", self.tag_ipset, async=True)]
+        )
+        self.tag_ipset.on_unreferenced(async=True)
+        self.step_actor(self.tag_ipset)
+        self.assertEqual(
+            self.m_mgr.on_object_cleanup_complete.mock_calls,
+            [call("tag-123", self.tag_ipset, async=True)]
+        )
+
 
 class TestIpset(BaseTestCase):
     def setUp(self):
@@ -606,3 +687,9 @@ def test_delete(self, m_call_silent):
                 call(["ipset", "destroy", "foo-tmp"]),
             ]
         )
+
+    @patch("calico.felix.futils.check_call", autospec=True)
+    def test_list_ipset_names(self, m_check_call):
+        m_check_call.return_value = CommandOutput(IPSET_LIST_OUTPUT, "")
+        self.assertEqual(list_ipset_names(),
+                         ['felix-v4-calico_net', 'felix-v6-calico_net'])

From fd60353e8b36db6cc2436bf38c50b3b33e511604 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 15:23:18 +0000
Subject: [PATCH 80/98] Exclude new test files from coverage.

---
 .coveragerc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.coveragerc b/.coveragerc
index ac399fe492..c75ec63cfe 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -4,7 +4,9 @@ include =
     calico/openstack/*
     calico/*.py
 omit =
+    calico/test/*
     calico/felix/test/*
     calico/openstack/test/*
+    calico/etcddriver/test/*
 branch = True
 concurrency = eventlet

From 249769836f0898f9490d6867e136d00f489f7dea Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 16:24:34 +0000
Subject: [PATCH 81/98] Additional UT for etcd driver.

---
 calico/etcddriver/driver.py           |  18 ++-
 calico/etcddriver/test/test_driver.py | 203 ++++++++++++++++++++++++--
 2 files changed, 199 insertions(+), 22 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 138393f276..fbb159ed2a 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -155,7 +155,8 @@ def _read_from_socket(self):
                     elif msg_type == MSG_TYPE_RESYNC:
                         self._handle_resync(msg)
                     else:
-                        _log.warning("Unexpected message from Felix")
+                        _log.error("Unexpected message from Felix: %s", msg)
+                        raise RuntimeError("Unexpected message from Felix")
         finally:
             _log.error("Reader thread shutting down, triggering stop event")
             self.stop()
@@ -217,12 +218,7 @@ def _resync_and_merge(self):
                 self._preload_config()
                 # Now (on the first run through) wait for Felix to process the
                 # config.
-                while not self._config_received.is_set():
-                    _log.info("Waiting for Felix to process the config...")
-                    self._config_received.wait(1)
-                    if self._stop_event.is_set():
-                        raise DriverShutdown()
-                    _log.info("Felix sent us the config, continuing.")
+                self._wait_for_config()
                 # Kick off the snapshot request as far as the headers.
                 self._send_status(STATUS_RESYNC)
                 resp, snapshot_index = self._start_snapshot_request()
@@ -255,6 +251,14 @@ def _resync_and_merge(self):
                 self._first_resync = False
                 self._resync_requested = False
 
+    def _wait_for_config(self):
+        while not self._config_received.is_set():
+            _log.info("Waiting for Felix to process the config...")
+            if self._stop_event.is_set():
+                raise DriverShutdown()
+            self._config_received.wait(1)
+            _log.info("Felix sent us the config, continuing.")
+
     def _wait_for_ready(self):
         """
         Waits for the global Ready flag to be set.  We don't load the first
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 6620541af5..4d29050d23 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -19,14 +19,14 @@
 Tests for the etcd driver module.
 """
 from Queue import Empty
-from unittest import TestCase
-
-from mock import Mock, patch
+from unittest2 import TestCase, SkipTest
 
+from mock import Mock, patch, call
+from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import TimeoutError
 
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
-from calico.etcddriver.driver import EtcdDriver
+from calico.etcddriver.driver import EtcdDriver, DriverShutdown
 from calico.etcddriver.protocol import *
 from calico.etcddriver.test.stubs import (
     StubMessageReader, StubMessageWriter, StubEtcd,
@@ -43,12 +43,12 @@ class TestEtcdDriverFV(TestCase):
 
     def setUp(self):
         sck = Mock()
-        self.msg_reader = StubMessageReader(sck)
-        self.msg_writer = StubMessageWriter(sck)
         self.watcher_etcd = StubEtcd()
         self.resync_etcd = StubEtcd()
 
         self.driver = EtcdDriver(sck)
+        self.msg_reader = StubMessageReader(sck)
+        self.msg_writer = StubMessageWriter(sck)
         self.driver._msg_reader = self.msg_reader
         self.driver._msg_writer = self.msg_writer
         self.driver._issue_etcd_request = Mock(
@@ -146,10 +146,96 @@ def test_mainline_resync(self):
         # HWM.
         self.assert_status_message(STATUS_IN_SYNC)
         # Now send a watcher event, which should go straight through.
+        self.send_watcher_event_and_assert_felix_msg(14)
+
+    def test_second_resync(self):
+        try:
+            # Start by going through the first resync.
+            self.test_mainline_resync()
+        except AssertionError:
+            _log.exception("Mainline resync test failed")
+            raise SkipTest("Mainline resync test failed to initialise driver")
+
+        # Felix sends a resync message.
+        self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
+
+        # Wait for the watcher to make its request.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=15
+        )
+        # Then for determinism, force it to die before it polls again.
+        self.driver._watcher_stop_event.set()
+        # The event from the watcher triggers the resync.
+        self.send_watcher_event_and_assert_felix_msg(15)
+
+        # Back into wait-for-ready mode.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+        # Re-do the config handshake.
+        self.do_handshake()
+
+        # Check for etcd request and start the response.
+        snap_stream = self.start_snapshot_response(etcd_index=100)
+        # Respond to the watcher, this should get merged into the event
+        # stream at some point later.
+        self.watcher_etcd.respond_with_value(
+            "/calico/v1/adir/bkey",
+            "b",
+            mod_index=102,
+            action="set"
+        )
+        # Wait until the watcher makes its next request (with revved
+        # wait_index) to make sure it has queued its event to the resync
+        # thread.
+        self.watcher_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=90, wait_index=103
+        )
+        # Write some data for an unchanged key to the resync thread, which
+        # should be ignored.
+        snap_stream.write('''
+                     {
+                         "key": "/calico/v1/adir/ckey",
+                         "value": "c",
+                         "modifiedIndex": 8
+                     },
+        ''')
+        # But we should get the watcher update.
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/bkey",
+            MSG_KEY_VALUE: "b",
+        })
+        # Finish the snapshot.
+        snap_stream.write('''
+                    {
+                        "key": "/calico/v1/adir/dkey",
+                        "value": "c",
+                        "modifiedIndex": 8
+                    },
+                    {
+                        "key": "/calico/v1/Ready",
+                        "value": "true",
+                        "modifiedIndex": 10
+                    }]
+                }]
+            }
+        }
+        ''')
+        # Should get a deletion for the keys that were missing in this
+        # snapshot.
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ekey",
+            MSG_KEY_VALUE: None,
+        })
+        # Should get the in-sync message.  (No event for Ready flag due to
+        # HWM.
+        self.assert_status_message(STATUS_IN_SYNC)
+        # Now send a watcher event, which should go straight through.
+        self.send_watcher_event_and_assert_felix_msg(104)
+
+    def send_watcher_event_and_assert_felix_msg(self, etcd_index):
         self.watcher_etcd.respond_with_value(
             "/calico/v1/adir/ekey",
             "e",
-            mod_index=14,
+            mod_index=etcd_index,
             action="set"
         )
         self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
@@ -192,13 +278,29 @@ def test_resync_etcd_read_fail(self, m_sleep):
         # Triggering a restart of the resync loop.
         self.assert_status_message(STATUS_WAIT_FOR_READY)
 
-    def start_driver_and_handshake(self):
+    @patch("time.sleep")
+    def test_bad_ready_key_retry(self, m_sleep):
+        self.start_driver_and_init()
+        # Respond to etcd request with a bad response
+        self.resync_etcd.assert_request(READY_KEY)
+        self.resync_etcd.respond_with_data("foobar", 123, 500)
+        # Then it should retry.
+        self.resync_etcd.assert_request(READY_KEY)
+        m_sleep.assert_called_once_with(1)
+
+    def start_driver_and_init(self):
         self.driver.start()
         # First message comes from Felix.
         self.send_init_msg()
         # Should trigger driver to send a status and start polling the ready
         # flag.
         self.assert_status_message(STATUS_WAIT_FOR_READY)
+
+    def start_driver_and_handshake(self):
+        self.start_driver_and_init()
+        self.do_handshake()
+
+    def do_handshake(self):
         # Respond to etcd request with ready == true.
         self.resync_etcd.assert_request(READY_KEY)
         self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
@@ -235,17 +337,19 @@ def start_driver_and_handshake(self):
         )
         self.assert_status_message(STATUS_RESYNC)
 
-    def start_snapshot_response(self):
+    def start_snapshot_response(self, etcd_index=10):
         # We should get a request to load the full snapshot.
         self.resync_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=120, preload_content=False
         )
-        snap_stream = self.resync_etcd.respond_with_stream(etcd_index=10)
+        snap_stream = self.resync_etcd.respond_with_stream(
+            etcd_index=etcd_index
+        )
         # And then the headers should trigger a request from the watcher
         # including the etcd_index we sent even though we haven't sent a
         # response body to the resync thread.
         self.watcher_etcd.assert_request(
-            VERSION_DIR, recursive=True, timeout=90, wait_index=11
+            VERSION_DIR, recursive=True, timeout=90, wait_index=etcd_index+1
         )
         # Start sending the snapshot response:
         snap_stream.write('''{
@@ -261,9 +365,9 @@ def start_snapshot_response(self):
                     {
                         "key": "/calico/v1/adir/akey",
                         "value": "akey's value",
-                        "modifiedIndex": 8
+                        "modifiedIndex": %d
                     },
-        ''')
+        ''' % (etcd_index - 2))
         # Should generate a message to felix even though it's only seen part
         # of the response...
         self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
@@ -294,8 +398,8 @@ def assert_msg_to_felix(self, msg_type, fields=None):
         except Empty:
             self.fail("Expected %s message to felix but no message was sent" %
                       msg_type)
-        self.assertEqual(msg_type, mt)
-        self.assertEqual(fields, fs)
+        self.assertEqual(msg_type, mt, msg="Unexpected message: %s" % fs)
+        self.assertEqual(fields, fs, msg="Unexpected message: %s" % fs)
 
     def assert_flush_to_felix(self):
         self.assertEqual(self.msg_writer.queue.get(timeout=10),
@@ -348,3 +452,72 @@ def tearDown(self):
             # Now the driver is stopped, it's safe to remove our patch of
             # complete_logging()
             self._logging_patch.stop()
+
+
+class TestDriver(TestCase):
+    """
+    Unit-test tests of the Driver.
+    """
+    def setUp(self):
+        self.m_sck = Mock(spec=socket.socket)
+        self.driver = EtcdDriver(self.m_sck)
+        self.msg_reader = StubMessageReader(self.m_sck)
+        self.msg_writer = StubMessageWriter(self.m_sck)
+        self.driver._msg_reader = self.msg_reader
+        self.driver._msg_writer = self.msg_writer
+
+    def test_read_bad_message(self):
+        self.msg_reader.send_msg("unknown", {})
+        self.assertRaises(RuntimeError, self.driver._read_from_socket)
+
+    def test_shutdown_before_config(self):
+        self.driver._stop_event.set()
+        self.assertRaises(DriverShutdown, self.driver._wait_for_config)
+
+    def test_issue_etcd_request_basic_get(self):
+        # Initialise the etcd URL.
+        self.driver._handle_init({
+            MSG_KEY_ETCD_URL: "http://localhost:4001/",
+            MSG_KEY_HOSTNAME: "ourhost",
+        })
+        m_pool = Mock(spec=HTTPConnectionPool)
+        self.driver._issue_etcd_request(m_pool, "calico/v1/Ready")
+        self.assertEqual(
+            m_pool.request.mock_calls,
+            [call("GET",
+                  "http://localhost:4001/v2/keys/calico/v1/Ready",
+                  fields=None,
+                  timeout=5,
+                  preload_content=True)]
+        )
+
+    def test_issue_etcd_request_recursive_watch(self):
+        # Initialise the etcd URL.
+        self.driver._handle_init({
+            MSG_KEY_ETCD_URL: "http://localhost:4001/",
+            MSG_KEY_HOSTNAME: "ourhost",
+        })
+        m_pool = Mock(spec=HTTPConnectionPool)
+        self.driver._issue_etcd_request(m_pool, "calico/v1", timeout=10,
+                                        wait_index=11, recursive=True)
+        self.assertEqual(
+            m_pool.request.mock_calls,
+            [call("GET",
+                  "http://localhost:4001/v2/keys/calico/v1",
+                  fields={"recursive": "true",
+                          "wait": "true",
+                          "waitIndex": 11},
+                  timeout=10,
+                  preload_content=False)]
+        )
+
+    def test_cluster_id_check(self):
+        m_resp = Mock()
+        m_resp.getheader.return_value = "abcdef"
+        self.driver._check_cluster_id(m_resp)
+        m_resp = Mock()
+        m_resp.getheader.return_value = "ghijkl"
+        self.assertRaises(DriverShutdown, self.driver._check_cluster_id,
+                          m_resp)
+        self.assertTrue(self.driver._stop_event.is_set())
+

From aaea805220df3753380298b36a3b0cf49564349e Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 18:08:45 +0000
Subject: [PATCH 82/98] First batch of tests for new fetcd function.

---
 calico/felix/fetcd.py           |  58 +++++-----
 calico/felix/test/test_fetcd.py | 187 +++++++++++++++++++++++---------
 2 files changed, 166 insertions(+), 79 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index 4d22ac0856..e7281d0e6a 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -161,8 +161,8 @@ def _periodically_resync(self):
             _log.debug("After jitter, next periodic resync will be in %.1f "
                        "seconds.", sleep_time)
             gevent.sleep(sleep_time)
-            self.force_resync(reason="periodic resync", async=True)
             _stats.increment("Periodic resync")
+            self.force_resync(reason="periodic resync", async=True)
 
     @logging_exceptions
     def _periodically_report_status(self):
@@ -316,6 +316,7 @@ def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
         self._driver_process = None
         # Stats.
         self.read_count = 0
+        self.msgs_processed = 0
         self.last_rate_log_time = monotonic_time()
         # Register for events when values change.
         self._register_paths()
@@ -354,30 +355,14 @@ def _run(self):
         _log.info("...load_config set.  Starting driver read %s loop", self)
         # Start the driver process and wait for it to connect back to our
         # socket.
-        self._msg_reader, self._msg_writer = self.start_driver()
+        self._msg_reader, self._msg_writer = self._start_driver()
         # Loop reading from the socket and processing messages.
-        msgs_processed = 0
+        self._loop_reading_from_driver()
+
+    def _loop_reading_from_driver(self):
         while True:
             for msg_type, msg in self._msg_reader.new_messages(timeout=1):
-                # Optimization: put update first in the "switch" block because
-                # it's on the critical path.
-                if msg_type == MSG_TYPE_UPDATE:
-                    _stats.increment("Update messages from driver")
-                    self._on_update_from_driver(msg)
-                elif msg_type == MSG_TYPE_CONFIG_LOADED:
-                    _stats.increment("Config loaded messages from driver")
-                    self._on_config_loaded_from_driver(msg)
-                elif msg_type == MSG_TYPE_STATUS:
-                    _stats.increment("Status messages from driver")
-                    self._on_status_from_driver(msg)
-                else:
-                    raise RuntimeError("Unexpected message %s" % msg)
-                msgs_processed += 1
-                if msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
-                    # Yield to ensure that other actors make progress.
-                    # Sleep must be non-zero to work around gevent
-                    # issue where we could be immediately rescheduled.
-                    gevent.sleep(0.000001)
+                self._dispatch_msg_from_driver(msg_type, msg)
             if self.resync_requested:
                 _log.info("Resync requested, sending resync request to driver")
                 self.resync_requested = False
@@ -385,11 +370,32 @@ def _run(self):
             # Check that the driver hasn't died.  The recv() call should
             # raise an exception when the buffer runs dry but this usually
             # gets hit first.
-            if self._driver_process.poll() is not None:
+            driver_rc = self._driver_process.poll()
+            if driver_rc is not None:
                 _log.critical("Driver process died with RC = %s.  Felix must "
-                              "exit.", self._driver_process.poll())
+                              "exit.", driver_rc)
                 die_and_restart()
-        _log.info("%s.loop() stopped due to self.stop == True", self)
+
+    def _dispatch_msg_from_driver(self, msg_type, msg):
+        # Optimization: put update first in the "switch" block because
+        # it's on the critical path.
+        if msg_type == MSG_TYPE_UPDATE:
+            _stats.increment("Update messages from driver")
+            self._on_update_from_driver(msg)
+        elif msg_type == MSG_TYPE_CONFIG_LOADED:
+            _stats.increment("Config loaded messages from driver")
+            self._on_config_loaded_from_driver(msg)
+        elif msg_type == MSG_TYPE_STATUS:
+            _stats.increment("Status messages from driver")
+            self._on_status_from_driver(msg)
+        else:
+            raise RuntimeError("Unexpected message %s" % msg)
+        self.msgs_processed += 1
+        if self.msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
+            # Yield to ensure that other actors make progress.
+            # Sleep must be non-zero to work around gevent
+            # issue where we could be immediately rescheduled.
+            gevent.sleep(0.000001)
 
     def _on_update_from_driver(self, msg):
         """
@@ -507,7 +513,7 @@ def _on_status_from_driver(self, msg):
                 self._status_reporter.clean_up_endpoint_statuses(async=True)
             self._update_hosts_ipset()
 
-    def start_driver(self):
+    def _start_driver(self):
         """
         Starts the driver subprocess, connects to it over the socket
         and sends it the init message.
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 97b07afa34..b7a71e0349 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import subprocess
 from datetime import datetime
 import json
 import logging
@@ -19,9 +20,13 @@
 from etcd import EtcdResult, EtcdException
 import etcd
 from gevent.event import Event
+import gevent
 from mock import Mock, call, patch, ANY
 
 from calico.datamodel_v1 import EndpointId
+from calico.etcddriver.protocol import MessageReader, MessageWriter, \
+    MSG_TYPE_CONFIG_LOADED, MSG_TYPE_STATUS, STATUS_RESYNC, MSG_KEY_STATUS, \
+    MSG_TYPE_UPDATE
 from calico.felix.config import Config
 from calico.felix.futils import IPV4, IPV6
 from calico.felix.ipsets import IpsetActor
@@ -32,7 +37,6 @@
 
 _log = logging.getLogger(__name__)
 
-
 VALID_ENDPOINT = {
     "state": "active",
     "name": "tap1234",
@@ -60,71 +64,84 @@
 
 
 class TestEtcdAPI(BaseTestCase):
-
-    @patch("calico.felix.fetcd._FelixEtcdWatcher", autospec=True)
-    @patch("gevent.spawn", autospec=True)
-    def test_create(self, m_spawn, m_etcd_watcher):
-        m_config = Mock(spec=Config)
-        m_config.ETCD_ADDR = ETCD_ADDRESS
-        m_hosts_ipset = Mock(spec=IpsetActor)
-        api = EtcdAPI(m_config, m_hosts_ipset)
-        m_etcd_watcher.assert_has_calls([
-            call(m_config, m_hosts_ipset).link(api._on_worker_died),
-            call(m_config, m_hosts_ipset).start(),
+    def setUp(self):
+        super(TestEtcdAPI, self).setUp()
+        self.m_config = Mock(spec=Config)
+        self.m_config.ETCD_ADDR = ETCD_ADDRESS
+        self.m_hosts_ipset = Mock(spec=IpsetActor)
+        with patch("calico.felix.fetcd._FelixEtcdWatcher",
+                   autospec=True) as m_etcd_watcher:
+            with patch("gevent.spawn", autospec=True) as m_spawn:
+                self.api = EtcdAPI(self.m_config, self.m_hosts_ipset)
+        self.m_spawn = m_spawn
+        self.m_etcd_watcher = m_etcd_watcher.return_value
+        self.m_etcd_watcher.load_config = Mock(spec=Event)
+        self.m_etcd_watcher.begin_polling = Mock(spec=Event)
+        self.m_etcd_watcher.configured = Mock(spec=Event)
+
+    def test_create(self):
+        self.m_etcd_watcher.assert_has_calls([
+            call.link(self.api._on_worker_died),
+            call.start(),
         ])
-        m_spawn.assert_has_calls([
-            call(api._periodically_resync),
-            call(api._periodically_resync).link_exception(api._on_worker_died)
+        self.m_spawn.assert_has_calls([
+            call(self.api._periodically_resync),
+            call(self.api._periodically_resync).link_exception(
+                self.api._on_worker_died)
         ])
 
-    @patch("calico.felix.fetcd._FelixEtcdWatcher", autospec=True)
-    @patch("gevent.spawn", autospec=True)
     @patch("gevent.sleep", autospec=True)
-    def test_periodic_resync_mainline(self, m_sleep, m_spawn, m_etcd_watcher):
+    def test_periodic_resync_mainline(self, m_sleep):
+        self.m_config.RESYNC_INTERVAL = 10
         m_configured = Mock(spec=Event)
-        m_etcd_watcher.return_value.configured = m_configured
-        m_config = Mock(spec=Config)
-        m_config.ETCD_ADDR = ETCD_ADDRESS
-        m_hosts_ipset = Mock(spec=IpsetActor)
-        api = EtcdAPI(m_config, m_hosts_ipset)
-        m_config.RESYNC_INTERVAL = 10
-        with patch.object(api, "force_resync") as m_force_resync:
+        self.m_etcd_watcher.configured = m_configured
+        with patch.object(self.api, "force_resync") as m_force_resync:
             m_force_resync.side_effect = ExpectedException()
-            self.assertRaises(ExpectedException, api._periodically_resync)
+            self.assertRaises(ExpectedException,
+                              self.api._periodically_resync)
         m_configured.wait.assert_called_once_with()
         m_sleep.assert_called_once_with(ANY)
         sleep_time = m_sleep.call_args[0][0]
         self.assertTrue(sleep_time >= 10)
         self.assertTrue(sleep_time <= 12)
 
-    @patch("calico.felix.fetcd._FelixEtcdWatcher", autospec=True)
-    @patch("gevent.spawn", autospec=True)
     @patch("gevent.sleep", autospec=True)
-    def test_periodic_resync_disabled(self, m_sleep, m_spawn, m_etcd_watcher):
-        m_etcd_watcher.return_value.configured = Mock(spec=Event)
-        m_config = Mock(spec=Config)
-        m_config.ETCD_ADDR = ETCD_ADDRESS
-        m_hosts_ipset = Mock(spec=IpsetActor)
-        api = EtcdAPI(m_config, m_hosts_ipset)
-        m_config.RESYNC_INTERVAL = 0
-        with patch.object(api, "force_resync") as m_force_resync:
+    def test_periodic_resync_disabled(self, m_sleep):
+        self.m_config.RESYNC_INTERVAL = 0
+        self.m_etcd_watcher.configured = Mock(spec=Event)
+        with patch.object(self.api, "force_resync") as m_force_resync:
             m_force_resync.side_effect = Exception()
-            api._periodically_resync()
-
-    @patch("calico.felix.fetcd._FelixEtcdWatcher", autospec=True)
-    @patch("gevent.spawn", autospec=True)
-    def test_force_resync(self, m_spawn, m_etcd_watcher):
-        m_config = Mock(spec=Config)
-        m_config.ETCD_ADDR = ETCD_ADDRESS
-        m_config.REPORT_ENDPOINT_STATUS = True
-        m_hosts_ipset = Mock(spec=IpsetActor)
-        api = EtcdAPI(m_config, m_hosts_ipset)
-        endpoint_id = EndpointId("foo", "bar", "baz", "biff")
-        with patch.object(api, "status_reporter") as m_status_rep:
-            api.force_resync(async=True)
-            self.step_actor(api)
+            self.api._periodically_resync()
+
+    def test_force_resync(self):
+        self.m_config.REPORT_ENDPOINT_STATUS = True
+        with patch.object(self.api, "status_reporter") as m_status_rep:
+            self.api.force_resync(async=True)
+            self.step_actor(self.api)
         m_status_rep.resync.assert_called_once_with(async=True)
-        self.assertTrue(m_etcd_watcher.return_value.resync_requested)
+        self.assertTrue(self.m_etcd_watcher.resync_requested)
+
+    def test_load_config(self):
+        result = self.api.load_config(async=True)
+        self.step_actor(self.api)
+        conf = result.get()
+        self.assertEqual(conf, self.m_etcd_watcher.configured)
+        self.m_etcd_watcher.load_config.set.assert_called_once_with()
+
+    def test_start_watch(self):
+        m_splitter = Mock()
+        result = self.api.start_watch(m_splitter, async=True)
+        self.step_actor(self.api)
+        self.m_etcd_watcher.load_config.set.assert_called_once_with()
+        self.assertEqual(self.m_etcd_watcher.splitter, m_splitter)
+        self.m_etcd_watcher.begin_polling.set.assert_called_once_with()
+
+    @patch("sys.exit", autospec=True)
+    def test_on_worker_died(self, m_exit):
+        glet = gevent.spawn(lambda: None)
+        glet.link(self.api._on_worker_died)
+        glet.join(1)
+        m_exit.assert_called_once_with(1)
 
 
 class ExpectedException(Exception):
@@ -148,8 +165,72 @@ def setUp(self):
                                          self.m_hosts_ipset)
         self.m_splitter = Mock(spec=UpdateSplitter)
         self.watcher.splitter = self.m_splitter
-        self.client = Mock()
-        self.watcher.client = self.client
+        self.m_reader = Mock(spec=MessageReader)
+        self.m_writer = Mock(spec=MessageWriter)
+        self.watcher._msg_reader = self.m_reader
+        self.watcher._msg_writer = self.m_writer
+        self.m_driver_proc = Mock(spec=subprocess.Popen)
+        self.watcher._driver_process = self.m_driver_proc
+
+    def test_run(self):
+        with patch.object(self.watcher.load_config, "wait") as m_wait:
+            with patch.object(self.watcher, "_start_driver") as m_start:
+                m_reader = Mock()
+                m_writer = Mock()
+                m_start.return_value = (m_reader, m_writer)
+                m_reader.new_messages.side_effect = ExpectedException()
+                self.assertRaises(ExpectedException, self.watcher._run)
+        self.assertEqual(m_wait.mock_calls, [call()])
+
+    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
+    def test_read_loop(self, m_die):
+        self.m_reader.new_messages.side_effect = iter([
+            iter([]),
+            iter([(MSG_TYPE_STATUS, {MSG_KEY_STATUS: STATUS_RESYNC})])
+        ])
+        self.m_driver_proc.poll.side_effect = iter([
+            None, 1
+        ])
+        m_die.side_effect = ExpectedException()
+        with patch.object(self.watcher, "_dispatch_msg_from_driver") as m_disp:
+            self.assertRaises(ExpectedException,
+                              self.watcher._loop_reading_from_driver)
+        self.assertEqual(m_disp.mock_calls,
+                         [call(MSG_TYPE_STATUS,
+                               {MSG_KEY_STATUS: STATUS_RESYNC})])
+
+    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
+    def test_read_loop_resync(self, m_die):
+        self.m_reader.new_messages.side_effect = iter([iter([]), iter([])])
+        self.m_driver_proc.poll.side_effect = iter([None, 1])
+        self.watcher.resync_requested = True
+        m_die.side_effect = ExpectedException()
+        self.assertRaises(ExpectedException,
+                          self.watcher._loop_reading_from_driver)
+
+    def test_dispatch_from_driver(self):
+        for msg_type, expected_method in [
+                (MSG_TYPE_UPDATE, "_on_update_from_driver"),
+                (MSG_TYPE_CONFIG_LOADED, "_on_config_loaded_from_driver"),
+                (MSG_TYPE_STATUS, "_on_status_from_driver"),]:
+            with patch.object(self.watcher, expected_method) as m_meth:
+                msg = Mock()
+                self.watcher._dispatch_msg_from_driver(msg_type, msg)
+                self.assertEqual(m_meth.mock_calls, [call(msg)])
+
+    def test_dispatch_from_driver_unexpected(self):
+        self.assertRaises(RuntimeError,
+                          self.watcher._dispatch_msg_from_driver,
+                          "unknown", {})
+
+    @patch("gevent.sleep")
+    def test_dispatch_yield(self, m_sleep):
+        for _ in xrange(399):
+            with patch.object(self.watcher, "_on_update_from_driver") as m_upd:
+                msg = Mock()
+                self.watcher._dispatch_msg_from_driver(MSG_TYPE_UPDATE, msg)
+        self.assertEqual(m_sleep.mock_calls,
+                         [call(0.000001)])
 
     def test_endpoint_set(self):
         self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e1",

From 526690858ab8edcd2e2144dfdaddc9c06415ef7b Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 18:46:27 +0000
Subject: [PATCH 83/98] Cover dispatch functions in fetcd.

---
 calico/felix/fetcd.py           |   4 +-
 calico/felix/test/test_fetcd.py | 131 +++++++++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index e7281d0e6a..cf06d16222 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -527,7 +527,8 @@ def _start_driver(self):
         try:
             os.unlink("/run/felix-driver.sck")
         except OSError:
-            pass
+            _log.debug("Failed to delete driver socket, assuming it "
+                       "didn't exist.")
         update_socket = socket.socket(socket.AF_UNIX,
                                       socket.SOCK_STREAM)
         update_socket.bind("/run/felix-driver.sck")
@@ -543,6 +544,7 @@ def _start_driver(self):
         try:
             os.unlink("/run/felix-driver.sck")
         except OSError:
+            # Unexpected but carry on...
             _log.exception("Failed to unlink socket")
         else:
             _log.info("Unlinked server socket")
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index b7a71e0349..6bf8978aaa 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import socket
 import subprocess
 from datetime import datetime
 import json
@@ -26,7 +27,10 @@
 from calico.datamodel_v1 import EndpointId
 from calico.etcddriver.protocol import MessageReader, MessageWriter, \
     MSG_TYPE_CONFIG_LOADED, MSG_TYPE_STATUS, STATUS_RESYNC, MSG_KEY_STATUS, \
-    MSG_TYPE_UPDATE
+    MSG_TYPE_UPDATE, MSG_KEY_KEY, MSG_KEY_VALUE, MSG_KEY_TYPE, \
+    MSG_KEY_HOST_CONFIG, MSG_KEY_GLOBAL_CONFIG, MSG_TYPE_CONFIG, \
+    MSG_KEY_LOG_FILE, MSG_KEY_SEV_FILE, MSG_KEY_SEV_SCREEN, MSG_KEY_SEV_SYSLOG, \
+    STATUS_IN_SYNC
 from calico.felix.config import Config
 from calico.felix.futils import IPV4, IPV6
 from calico.felix.ipsets import IpsetActor
@@ -229,8 +233,129 @@ def test_dispatch_yield(self, m_sleep):
             with patch.object(self.watcher, "_on_update_from_driver") as m_upd:
                 msg = Mock()
                 self.watcher._dispatch_msg_from_driver(MSG_TYPE_UPDATE, msg)
-        self.assertEqual(m_sleep.mock_calls,
-                         [call(0.000001)])
+        self.assertEqual(m_sleep.mock_calls, [call(0.000001)])
+
+    def test_on_update_from_driver(self):
+        self.watcher.read_count = 999
+        self.watcher.configured.set()
+        with patch.object(self.watcher, "begin_polling") as m_begin:
+            self.watcher._on_update_from_driver({
+                MSG_KEY_TYPE: MSG_TYPE_UPDATE,
+                MSG_KEY_KEY: "/calico/v1/Ready",
+                MSG_KEY_VALUE: "true",
+            })
+        m_begin.wait.assert_called_once_with()
+
+    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
+    def test_on_config_loaded(self, m_die):
+        self.m_config.DRIVERLOGFILE = "/tmp/driver.log"
+        global_config = {"InterfacePrefix": "tap"}
+        local_config = {"LogSeverityFile": "DEBUG"}
+        self.watcher._on_config_loaded_from_driver({
+            MSG_KEY_GLOBAL_CONFIG: global_config,
+            MSG_KEY_HOST_CONFIG: local_config,
+        })
+        self.assertTrue(self.watcher.configured.is_set())
+        self.assertEqual(
+            self.m_config.report_etcd_config.mock_calls,
+            [call(local_config, global_config)]
+        )
+        self.assertEqual(
+            self.m_writer.send_message.mock_calls,
+            [call(MSG_TYPE_CONFIG,
+                  {
+                      MSG_KEY_LOG_FILE: "/tmp/driver.log",
+                      MSG_KEY_SEV_FILE: self.m_config.LOGLEVFILE,
+                      MSG_KEY_SEV_SCREEN: self.m_config.LOGLEVSCR,
+                      MSG_KEY_SEV_SYSLOG: self.m_config.LOGLEVSYS,
+                  })]
+        )
+        self.assertEqual(m_die.mock_calls, [])
+
+        # Check a subsequent config change results in Felix dying.
+        global_config = {"InterfacePrefix": "not!tap"}
+        local_config = {"LogSeverityFile": "not!DEBUG"}
+        self.watcher._on_config_loaded_from_driver({
+            MSG_KEY_GLOBAL_CONFIG: global_config,
+            MSG_KEY_HOST_CONFIG: local_config,
+        })
+        self.assertEqual(m_die.mock_calls, [call()])
+
+    def test_on_status_from_driver(self):
+        self.watcher._on_status_from_driver({
+            MSG_KEY_STATUS: STATUS_RESYNC
+        })
+        self.assertFalse(self.watcher._been_in_sync)
+
+        with patch.object(self.watcher, "begin_polling") as m_begin:
+            # Two calls but second should be ignored...
+            self.watcher._on_status_from_driver({
+                MSG_KEY_STATUS: STATUS_IN_SYNC
+            })
+            self.watcher._on_status_from_driver({
+                MSG_KEY_STATUS: STATUS_IN_SYNC
+            })
+        m_begin.wait.assert_called_once_with()
+        self.assertTrue(self.watcher._been_in_sync)
+        self.assertEqual(self.m_splitter.on_datamodel_in_sync.mock_calls,
+                         [call(async=True)])
+        self.assertEqual(self.m_hosts_ipset.replace_members.mock_calls,
+                         [call([], async=True)])
+
+    @patch("subprocess.Popen")
+    @patch("socket.socket")
+    @patch("os.unlink")
+    def test_start_driver(self, m_unlink, m_socket, m_popen):
+        m_sck = Mock()
+        m_socket.return_value = m_sck
+        m_conn = Mock()
+        m_sck.accept.return_value = m_conn, None
+        reader, writer = self.watcher._start_driver()
+        self.assertEqual(m_socket.mock_calls[0], call(socket.AF_UNIX,
+                                                      socket.SOCK_STREAM))
+        self.assertEqual(m_sck.bind.mock_calls,
+                         [call("/run/felix-driver.sck")])
+        self.assertEqual(m_sck.listen.mock_calls, [call(1)])
+        self.assertEqual(m_popen.mock_calls[0],
+                         call([ANY, "-m", "calico.etcddriver",
+                               "/run/felix-driver.sck"]))
+        self.assertEqual(m_unlink.mock_calls,
+                         [call("/run/felix-driver.sck")] * 2)
+        self.assertTrue(isinstance(reader, MessageReader))
+        self.assertTrue(isinstance(writer, MessageWriter))
+
+    @patch("subprocess.Popen")
+    @patch("socket.socket")
+    @patch("os.unlink")
+    def test_start_driver_unlink_fail(self, m_unlink, m_socket, m_popen):
+        m_unlink.side_effect = OSError()
+        m_sck = Mock()
+        m_socket.return_value = m_sck
+        m_conn = Mock()
+        m_sck.accept.return_value = m_conn, None
+        reader, writer = self.watcher._start_driver()
+        self.assertTrue(isinstance(reader, MessageReader))
+        self.assertTrue(isinstance(writer, MessageWriter))
+
+    def test_update_hosts_ipset_not_in_sync(self):
+        self.watcher._update_hosts_ipset()
+        self.assertEqual(self.m_hosts_ipset.mock_calls, [])
+
+    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
+    def test_config_set(self, m_die):
+        self.watcher.last_global_config = {}
+        self.dispatch("/calico/v1/config/InterfacePrefix",
+                      "set", value="foo")
+        self.assertEqual(m_die.mock_calls, [call()])
+
+    @patch("calico.felix.fetcd.die_and_restart", autospec=True)
+    def test_host_config_set(self, m_die):
+        self.watcher.last_host_config = {}
+        self.dispatch("/calico/v1/host/notourhostname/config/InterfacePrefix",
+                      "set", value="foo")
+        self.dispatch("/calico/v1/host/hostname/config/InterfacePrefix",
+                      "set", value="foo")
+        self.assertEqual(m_die.mock_calls, [call()])
 
     def test_endpoint_set(self):
         self.dispatch("/calico/v1/host/h1/workload/o1/w1/endpoint/e1",

From 158a86a45afdb2e20d0b7e570c9452c5a6da5b40 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 5 Nov 2015 19:17:25 +0000
Subject: [PATCH 84/98] More coverage for fetcd.py.

---
 calico/felix/fetcd.py           |  4 ----
 calico/felix/test/test_fetcd.py | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index cf06d16222..bd90433c96 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -720,10 +720,6 @@ def _on_timer_pop(self):
         self._timer_scheduled = False
         self._reporting_allowed = True
 
-    @actor_message()
-    def mark_endpoint_dirty(self, endpoint_id):
-        self._mark_endpoint_dirty(endpoint_id)
-
     def _mark_endpoint_dirty(self, endpoint_id):
         assert isinstance(endpoint_id, EndpointId)
         if endpoint_id in self._older_dirty_endpoints:
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 6bf8978aaa..2e91630e4a 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -510,6 +510,16 @@ def test_host_ip_invalid(self):
             async=True,
         )
 
+    def test_ipam_pool_set(self):
+        self.dispatch("/calico/v1/ipam/v4/pool/1234", action="set", value="{}")
+        self.assertEqual(self.m_splitter.on_ipam_pool_update.mock_calls,
+                         [call("1234", None, async=True)])
+
+    def test_ipam_pool_del(self):
+        self.dispatch("/calico/v1/ipam/v4/pool/1234", action="delete")
+        self.assertEqual(self.m_splitter.on_ipam_pool_update.mock_calls,
+                         [call("1234", None, async=True)])
+
     @patch("os._exit", autospec=True)
     @patch("gevent.sleep", autospec=True)
     def test_die_and_restart(self, m_sleep, m_exit):
@@ -692,6 +702,12 @@ def test_on_endpoint_status_mainline(self):
         self.assertEqual(self.rep._newer_dirty_endpoints, set())
         self.assertEqual(self.rep._older_dirty_endpoints, set())
 
+    def test_mark_endpoint_dirty_already_dirty(self):
+        endpoint_id = EndpointId("a", "b", "c", "d")
+        self.rep._older_dirty_endpoints.add(endpoint_id)
+        self.rep._mark_endpoint_dirty(endpoint_id)
+        self.assertFalse(endpoint_id in self.rep._newer_dirty_endpoints)
+
     def test_on_endpoint_status_failure(self):
         # Send in an endpoint status update.
         endpoint_id = EndpointId("foo", "bar", "baz", "biff")
@@ -825,6 +841,14 @@ def test_clean_up_endpoint_status(self):
                            "anendpoint")
             )
 
+    def test_clean_up_endpoint_status_etcd_error(self):
+        self.m_config.REPORT_ENDPOINT_STATUS = True
+        with patch.object(self.rep, "_attempt_cleanup") as m_clean:
+            m_clean.side_effect = EtcdException()
+            self.rep.clean_up_endpoint_statuses(async=True)
+            self.step_actor(self.rep)
+            self.assertTrue(self.rep._cleanup_pending)
+
     def test_clean_up_endpoint_status_not_found(self):
         self.m_config.REPORT_ENDPOINT_STATUS = True
         self.m_client.read.side_effect = etcd.EtcdKeyNotFound()

From 6a9b9a0b3d820ddbc471a5759f28c6c95e93718e Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 10:26:15 +0000
Subject: [PATCH 85/98] Additional coverage for etcdutils.py.

---
 calico/felix/test/base.py       |   6 +-
 calico/felix/test/test_actor.py |  18 ++--
 calico/test/lib.py              |  17 +++-
 calico/test/test_etcdutils.py   | 163 +++++++++++++++++++++++++++++++-
 4 files changed, 186 insertions(+), 18 deletions(-)

diff --git a/calico/felix/test/base.py b/calico/felix/test/base.py
index f4bd319ff9..4ffbaefa54 100644
--- a/calico/felix/test/base.py
+++ b/calico/felix/test/base.py
@@ -73,4 +73,8 @@ def __eq__(self, other):
             return False
 
     def __repr__(self):
-        return '%s(%r)' % (self.__class__.__name__, self.json_obj)
\ No newline at end of file
+        return '%s(%r)' % (self.__class__.__name__, self.json_obj)
+
+
+class ExpectedException(Exception):
+    pass
\ No newline at end of file
diff --git a/calico/felix/test/test_actor.py b/calico/felix/test/test_actor.py
index 59fb3dbbe1..d56189426c 100644
--- a/calico/felix/test/test_actor.py
+++ b/calico/felix/test/test_actor.py
@@ -19,17 +19,17 @@
 Tests of the Actor framework.
 """
 
-import logging
-import itertools
 import gc
+import itertools
+import logging
 import sys
 
-from gevent.event import AsyncResult
 import mock
-from calico.felix.actor import actor_message, ResultOrExc, SplitBatchAndRetry
-from calico.felix.test.base import BaseTestCase
-from calico.felix import actor
+from gevent.event import AsyncResult
 
+from calico.felix import actor
+from calico.felix.actor import actor_message, ResultOrExc, SplitBatchAndRetry
+from calico.felix.test.base import BaseTestCase, ExpectedException
 
 # Logger
 log = logging.getLogger(__name__)
@@ -184,7 +184,7 @@ def test_own_batch(self):
     def test_blocking_call(self):
         self._actor.start()  # Really start it.
         self._actor.do_a(async=False)
-        self.assertRaises(ExpectedException, self._actor.do_exc,  async=False)
+        self.assertRaises(ExpectedException, self._actor.do_exc, async=False)
 
     def test_same_actor_call(self):
         """
@@ -369,10 +369,6 @@ def on_unreferenced(self, async=None):
         return self.on_unref_result
 
 
-class ExpectedException(Exception):
-    pass
-
-
 class FinishException(Exception):
     pass
 
diff --git a/calico/test/lib.py b/calico/test/lib.py
index aa079ac4bf..31cde72c77 100644
--- a/calico/test/lib.py
+++ b/calico/test/lib.py
@@ -87,7 +87,7 @@ class EtcdException(Exception):
     pass
 
 
-class EtcdKeyNotFound(EtcdException):
+class EtcdValueError(EtcdException, ValueError):
     pass
 
 
@@ -95,11 +95,23 @@ class EtcdClusterIdChanged(EtcdException):
     pass
 
 
+class EtcdKeyError(EtcdException):
+    pass
+
+
+class EtcdKeyNotFound(EtcdKeyError):
+    pass
+
+
 class EtcdEventIndexCleared(EtcdException):
     pass
 
 
-class EtcdValueError(EtcdException):
+class EtcdConnectionFailed(EtcdException):
+    pass
+
+
+class EtcdWatcherCleared(EtcdException):
     pass
 
 
@@ -110,6 +122,7 @@ class EtcdDirNotEmpty(EtcdValueError):
 m_etcd.EtcdException = EtcdException
 m_etcd.EtcdKeyNotFound = EtcdKeyNotFound
 m_etcd.EtcdClusterIdChanged = EtcdClusterIdChanged
+m_etcd.EtcdConnectionFailed = EtcdConnectionFailed
 m_etcd.EtcdEventIndexCleared = EtcdEventIndexCleared
 m_etcd.EtcdValueError = EtcdValueError
 m_etcd.EtcdDirNotEmpty = EtcdDirNotEmpty
diff --git a/calico/test/test_etcdutils.py b/calico/test/test_etcdutils.py
index 40a1d628b0..8f274a80eb 100644
--- a/calico/test/test_etcdutils.py
+++ b/calico/test/test_etcdutils.py
@@ -21,15 +21,20 @@
 
 import logging
 import types
+
+from etcd import EtcdException
 from mock import Mock, patch, call
+from urllib3.exceptions import ReadTimeoutError
+
 from calico.etcdutils import (
-    PathDispatcher, EtcdWatcher, delete_empty_parents
+    PathDispatcher, EtcdWatcher, delete_empty_parents,
+    EtcdClientOwner, ResyncRequired
 )
 # Since other tests patch the module table, make sure we have the same etcd
 # module as the module under test.
 from calico.etcdutils import etcd
 
-from calico.felix.test.base import BaseTestCase
+from calico.felix.test.base import BaseTestCase, ExpectedException
 
 _log = logging.getLogger(__name__)
 
@@ -97,6 +102,11 @@ def test_delete_empty_parents_other_exception(self):
             ]
         )
 
+    def test_delete_empty_parents_bad_prefix(self):
+        self.assertRaises(ValueError,
+                          delete_empty_parents,
+                          Mock(), "/foo/bar/baz/biff", "/bar")
+
 
 class _TestPathDispatcherBase(BaseTestCase):
     """
@@ -232,13 +242,93 @@ class TestDispatcherExpire(_TestPathDispatcherBase):
     expected_handlers = "delete"
 
 
+class TestEtcdClientOwner(BaseTestCase):
+    @patch("etcd.Client", autospec=True)
+    def test_create(self, m_client_cls):
+        owner = EtcdClientOwner("localhost:1234")
+        m_client = m_client_cls.return_value
+        m_client.expected_cluster_id = "abcdef"
+        owner.reconnect()
+        self.assertEqual(m_client_cls.mock_calls,
+                         [call(host="localhost", port=1234,
+                               expected_cluster_id=None),
+                          call().__nonzero__(),
+                          call(host="localhost", port=1234,
+                               expected_cluster_id="abcdef"),])
+
+    @patch("etcd.Client", autospec=True)
+    def test_create_default(self, m_client):
+        owner = EtcdClientOwner("localhost")
+        self.assertEqual(m_client.mock_calls,
+                         [call(host="localhost", port=4001,
+                               expected_cluster_id=None)])
+
+
 class TestEtcdWatcher(BaseTestCase):
     def setUp(self):
         super(TestEtcdWatcher, self).setUp()
-        with patch("calico.etcdutils.EtcdWatcher.reconnect") as m_reconnect:
-            self.watcher = EtcdWatcher("foobar:4001", "/calico")
+        self.reconnect_patch = patch("calico.etcdutils.EtcdWatcher.reconnect")
+        self.m_reconnect = self.reconnect_patch.start()
+        self.watcher = EtcdWatcher("foobar:4001", "/calico")
         self.m_client = Mock()
         self.watcher.client = self.m_client
+        self.m_dispatcher = Mock(spec=PathDispatcher)
+        self.watcher.dispatcher = self.m_dispatcher
+
+    @patch("time.sleep", autospec=True)
+    def test_mainline(self, m_sleep):
+        m_snap_response = Mock()
+        m_snap_response.etcd_index = 1
+        m_poll_response = Mock()
+        m_poll_response.modifiedIndex = 2
+        responses = [
+            m_snap_response, m_poll_response, ResyncRequired(),  # Loop 1
+            EtcdException(),  # Loop 2
+            ExpectedException(),  # Loop 3, Break out of loop.
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        with patch.object(self.watcher, "_on_pre_resync",
+                          autospec=True) as m_pre_r:
+            with patch.object(self.watcher, "_on_snapshot_loaded",
+                              autospec=True) as m_snap_load:
+                self.assertRaises(ExpectedException, self.watcher.loop)
+        # _on_pre_resync() called once per loop.
+        self.assertEqual(m_pre_r.mock_calls, [call(), call(), call()])
+        # The snapshot only loads successfully the first time.
+        self.assertEqual(m_snap_load.mock_calls, [call(m_snap_response)])
+        self.assertEqual(self.m_dispatcher.handle_event.mock_calls,
+                         [call(m_poll_response)])
+        # Should sleep after exception.
+        m_sleep.assert_called_once_with(1)
+
+    def test_loop_stopped(self):
+        self.watcher._stopped = True
+
+        with patch.object(self.watcher, "_on_pre_resync",
+                          autospec=True) as m_pre_r:
+            self.watcher.loop()
+        self.assertFalse(m_pre_r.called)
+
+    def test_register(self):
+        self.watcher.register_path("key", foo="bar")
+        self.assertEqual(self.m_dispatcher.register.mock_calls,
+                         [call("key", foo="bar")])
+
+    @patch("time.sleep", autospec=True)
+    def test_wait_for_ready(self, m_sleep):
+        m_resp_1 = Mock()
+        m_resp_1.value = "false"
+        m_resp_2 = Mock()
+        m_resp_2.value = "true"
+        responses = [
+            etcd.EtcdException(),
+            etcd.EtcdKeyNotFound(),
+            m_resp_1,
+            m_resp_2,
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        self.watcher.wait_for_ready(1)
+        self.assertEqual(m_sleep.mock_calls, [call(1)] * 3)
 
     def test_load_initial_dump(self):
         m_response = Mock(spec=etcd.EtcdResult)
@@ -256,3 +346,68 @@ def test_load_initial_dump(self):
             call("/calico", recursive=True),
         ])
         self.assertEqual(self.watcher.next_etcd_index, 10001)
+
+    def test_load_initial_dump_stopped(self):
+        self.watcher.stop()
+        self.m_client.read.side_effect = etcd.EtcdKeyNotFound()
+        self.assertRaises(etcd.EtcdKeyNotFound, self.watcher.load_initial_dump)
+
+    def test_resync_set(self):
+        self.watcher.next_etcd_index = 1
+        self.watcher.resync_after_current_poll = True
+        self.assertRaises(ResyncRequired, self.watcher.wait_for_etcd_event)
+        self.assertFalse(self.watcher.resync_after_current_poll)
+
+    @patch("time.sleep", autospec=True)
+    def test_wait_for_etcd_event_conn_failed(self, m_sleep):
+        self.watcher.next_etcd_index = 1
+        m_resp = Mock()
+        m_resp.modifiedIndex = 123
+        read_timeout = etcd.EtcdConnectionFailed()
+        read_timeout.cause = ReadTimeoutError(Mock(), "", "")
+        other_error = etcd.EtcdConnectionFailed()
+        other_error.cause = ExpectedException()
+        responses = [
+            read_timeout,
+            other_error,
+            m_resp,
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        event = self.watcher.wait_for_etcd_event()
+        self.assertEqual(event, m_resp)
+        self.assertEqual(m_sleep.mock_calls, [call(1)])
+
+    def test_wait_for_etcd_event_cluster_id_changed(self):
+        self.watcher.next_etcd_index = 1
+        responses = [
+            etcd.EtcdClusterIdChanged(),
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        self.assertRaises(ResyncRequired, self.watcher.wait_for_etcd_event)
+
+    def test_wait_for_etcd_event_index_cleared(self):
+        self.watcher.next_etcd_index = 1
+        responses = [
+            etcd.EtcdEventIndexCleared(),
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        self.assertRaises(ResyncRequired, self.watcher.wait_for_etcd_event)
+
+    @patch("time.sleep", autospec=True)
+    def test_wait_for_etcd_event_unexpected_error(self, m_sleep):
+        self.watcher.next_etcd_index = 1
+        responses = [
+            etcd.EtcdException(),
+        ]
+        self.m_client.read.side_effect = iter(responses)
+        self.assertRaises(ResyncRequired, self.watcher.wait_for_etcd_event)
+        self.assertEqual(m_sleep.mock_calls, [call(1)])
+
+    def test_coverage(self):
+        # These methods are no-ops.
+        self.watcher._on_pre_resync()
+        self.watcher._on_snapshot_loaded(Mock())
+
+    def tearDown(self):
+        self.reconnect_patch.stop()
+        super(TestEtcdWatcher, self).tearDown()

From d2dcb76858b3e09f9160bdb01056d19076128c7f Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 10:28:33 +0000
Subject: [PATCH 86/98] Add etcddriver to coveragerc includes.

---
 .coveragerc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.coveragerc b/.coveragerc
index c75ec63cfe..90a8c361a3 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,5 +1,6 @@
 [run]
 include =
+    calico/etcddriver/*
     calico/felix/*
     calico/openstack/*
     calico/*.py

From 0c326312e12b8da267645ddc634beb9fe20949c5 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 10:48:08 +0000
Subject: [PATCH 87/98] Cover __main__.py.

---
 calico/etcddriver/__main__.py       | 16 +++---
 calico/etcddriver/test/test_main.py | 89 +++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 8 deletions(-)
 create mode 100644 calico/etcddriver/test/test_main.py

diff --git a/calico/etcddriver/__main__.py b/calico/etcddriver/__main__.py
index 12cc810042..f5714fafe0 100644
--- a/calico/etcddriver/__main__.py
+++ b/calico/etcddriver/__main__.py
@@ -28,14 +28,14 @@
 import socket
 import sys
 
-from calico.etcddriver.driver import EtcdDriver
-from calico.common import default_logging
+from calico.etcddriver import driver
+from calico import common
 
 _log = logging.getLogger(__name__)
 
 last_ppid = os.getppid()
-default_logging(gevent_in_use=False,
-                syslog_executable_name="calico-felix-etcd")
+common.default_logging(gevent_in_use=False,
+                       syslog_executable_name="calico-felix-etcd")
 
 felix_sck = socket.socket(socket.AF_UNIX,
                           socket.SOCK_STREAM)
@@ -45,15 +45,15 @@
     _log.exception("Failed to connect to Felix")
     raise
 
-driver = EtcdDriver(felix_sck)
-driver.start()
+etcd_driver = driver.EtcdDriver(felix_sck)
+etcd_driver.start()
 
-while not driver.join(timeout=1):
+while not etcd_driver.join(timeout=1):
     parent_pid = os.getppid()
     # Defensive, just in case we don't get a socket error, check if the
     # parent PID has changed, indicating that Felix has died.
     if parent_pid == 1 or parent_pid != last_ppid:
         _log.critical("Process adopted, assuming felix has died")
-        driver.stop()
+        etcd_driver.stop()
         break
 _log.critical("Driver shutting down.")
diff --git a/calico/etcddriver/test/test_main.py b/calico/etcddriver/test/test_main.py
new file mode 100644
index 0000000000..e25d8b7b19
--- /dev/null
+++ b/calico/etcddriver/test/test_main.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 Metaswitch Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+calico.etcddriver.test.test_main
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Test __main__ module.
+"""
+
+import logging
+from unittest import TestCase
+
+import sys
+from mock import Mock, call, patch
+
+_log = logging.getLogger(__name__)
+
+
+class TestMain(TestCase):
+    def setUp(self):
+        assert "calico.etcddriver.__main__" not in sys.modules
+
+    @patch("os.getppid", autospec=True)
+    @patch("socket.socket", autospec=True)
+    @patch("calico.common.default_logging", autospec=True)
+    @patch("calico.etcddriver.driver.EtcdDriver", autospec=True)
+    def test_mainline(self, m_driver_cls, m_logging, m_socket, m_ppid):
+        m_ppid.return_value = 123
+        m_driver = m_driver_cls.return_value
+        m_driver.join.side_effect = iter([
+            False,
+            True
+        ])
+        self._import_main()
+        self.assertEqual(m_driver.mock_calls,
+                         [call.start(),
+                          call.join(timeout=1),
+                          call.join(timeout=1)])
+        self.assertEqual(m_logging.mock_calls,
+                         [call(gevent_in_use=False,
+                               syslog_executable_name="calico-felix-etcd")])
+
+    @patch("os.getppid", autospec=True)
+    @patch("socket.socket", autospec=True)
+    @patch("calico.common.default_logging", autospec=True)
+    @patch("calico.etcddriver.driver.EtcdDriver", autospec=True)
+    def test_reparent(self, m_driver_cls, m_logging, m_socket, m_ppid):
+        m_ppid.side_effect = iter([123, 123, 1])
+        m_driver = m_driver_cls.return_value
+        m_driver.join.return_value = False
+        self._import_main()
+        self.assertEqual(m_driver.mock_calls,
+                         [call.start(),
+                          call.join(timeout=1),
+                          call.join(timeout=1),
+                          call.stop()])
+
+    @patch("os.getppid", autospec=True)
+    @patch("socket.socket", autospec=True)
+    @patch("calico.common.default_logging", autospec=True)
+    @patch("calico.etcddriver.driver.EtcdDriver", autospec=True)
+    def test_connection_failure(self, m_driver_cls, m_logging, m_socket,
+                                m_ppid):
+        m_ppid.side_effect = iter([123, 123, 1])
+        m_sck = m_socket.return_value
+        m_sck.connect.side_effect = RuntimeError()
+        self.assertRaises(RuntimeError, self._import_main)
+
+    def _import_main(self):
+        import calico.etcddriver.__main__ as main
+        _ = main  # Keep linter happy
+
+    def tearDown(self):
+        try:
+            del sys.modules["calico.etcddriver.__main__"]
+        except KeyError:
+            pass

From d5fb12919b87afe161b3aa547512f42c57ed2f9b Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 14:33:04 +0000
Subject: [PATCH 88/98] More coverage for driver.py and some minor fixes:

* Only stop the watcher for resync when we've finished the previous
  resync.
* Extra logging around join().
* Fix non-determinism in FV test.
---
 calico/etcddriver/driver.py           | 113 +++++++-----
 calico/etcddriver/test/stubs.py       |   7 +-
 calico/etcddriver/test/test_driver.py | 245 ++++++++++++++++++++++++--
 3 files changed, 302 insertions(+), 63 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index fbb159ed2a..9f99724309 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -30,11 +30,14 @@
 * resolving directory deletions so that if a directory is deleted, it tells
   Felix about all the individual keys that are deleted.
 """
-
+from functools import partial
 from httplib import HTTPException
 import logging
 from Queue import Queue, Empty
 import socket
+
+from ijson import IncompleteJSONError, JSONError
+
 try:
     # simplejson is a faster drop-in replacement.
     import simplejson as json
@@ -123,13 +126,22 @@ def join(self, timeout=None):
         self._stop_event.wait(timeout=timeout)
         stopped = self._stop_event.is_set()
         if stopped:
+
             self._resync_thread.join(timeout=timeout)
-            stopped &= not self._resync_thread.is_alive()
+            resync_alive = self._resync_thread.is_alive()
+            stopped &= not resync_alive
+            _log.debug("Resync thread alive: %s", resync_alive)
+
             self._reader_thread.join(timeout=timeout)
-            stopped &= not self._reader_thread.is_alive()
+            reader_alive = self._reader_thread.is_alive()
+            stopped &= not reader_alive
+            _log.debug("Reader thread alive: %s", reader_alive)
+
             try:
                 self._watcher_thread.join(timeout=timeout)
-                stopped &= not self._watcher_thread.is_alive()
+                watcher_alive = self._watcher_thread.is_alive()
+                stopped &= not watcher_alive
+                _log.debug("Watcher thread alive: %s", watcher_alive)
             except AttributeError:
                 pass
         return stopped
@@ -243,6 +255,9 @@ def _resync_and_merge(self):
                 if monotonic_time() - loop_start < 1:
                     _log.debug("May be tight looping, sleeping...")
                     time.sleep(1)
+            except DriverShutdown:
+                _log.info("Driver shut down.")
+                return
             except:
                 _log.exception("Unexpected exception; shutting down.")
                 self.stop()
@@ -420,33 +435,9 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
         :param snapshot_index: the etcd index of the response.
         """
         self._hwms.start_tracking_deletions()
-
-        def _handle_etcd_node(snap_mod, snap_key, snap_value):
-            old_hwm = self._hwms.update_hwm(snap_key, snapshot_index)
-            if snap_mod > old_hwm:
-                # This specific key's HWM is newer than the previous
-                # version we've seen, send an update.
-                self._on_key_updated(snap_key, snap_value)
-            # After we process an update from the snapshot, process
-            # several updates from the watcher queue (if there are
-            # any).  We limit the number to ensure that we always
-            # finish the snapshot eventually.
-            for _ in xrange(100):
-                if not self._watcher_queue or self._watcher_queue.empty():
-                    # Don't block on the watcher if there's nothing to do.
-                    break
-                try:
-                    self._handle_next_watcher_event()
-                except WatcherDied:
-                    # Continue processing to ensure that we make
-                    # progress.
-                    _log.warning("Watcher thread died, continuing "
-                                 "with snapshot")
-                    break
-            if self._stop_event.is_set():
-                _log.error("Stop event set, exiting")
-                raise DriverShutdown()
-        parse_snapshot(etcd_response, _handle_etcd_node)
+        parse_snapshot(etcd_response,
+                       callback=partial(self._handle_etcd_node,
+                                        snapshot_index=snapshot_index))
 
         # Save occupancy by throwing away the deletion tracking metadata.
         self._hwms.stop_tracking_deletions()
@@ -455,6 +446,34 @@ def _handle_etcd_node(snap_mod, snap_key, snap_value):
         # sweeps the ones we didn't touch.
         self._scan_for_deletions(snapshot_index)
 
+    def _handle_etcd_node(self, snap_mod, snap_key, snap_value,
+                          snapshot_index=None):
+        assert snapshot_index is not None
+        old_hwm = self._hwms.update_hwm(snap_key, snapshot_index)
+        if snap_mod > old_hwm:
+            # This specific key's HWM is newer than the previous
+            # version we've seen, send an update.
+            self._on_key_updated(snap_key, snap_value)
+        # After we process an update from the snapshot, process
+        # several updates from the watcher queue (if there are
+        # any).  We limit the number to ensure that we always
+        # finish the snapshot eventually.
+        for _ in xrange(100):
+            if not self._watcher_queue or self._watcher_queue.empty():
+                # Don't block on the watcher if there's nothing to do.
+                break
+            try:
+                self._handle_next_watcher_event(resync_in_progress=True)
+            except WatcherDied:
+                # Continue processing to ensure that we make
+                # progress.
+                _log.warning("Watcher thread died, continuing "
+                             "with snapshot")
+                break
+        if self._stop_event.is_set():
+            _log.error("Stop event set, exiting")
+            raise DriverShutdown()
+
     def _process_events_only(self):
         """
         Loops processing the event stream from the watcher thread and feeding
@@ -465,7 +484,7 @@ def _process_events_only(self):
         """
         _log.info("In sync, now processing events only...")
         while not self._stop_event.is_set():
-            self._handle_next_watcher_event()
+            self._handle_next_watcher_event(resync_in_progress=False)
             self._msg_writer.flush()
 
     def _scan_for_deletions(self, snapshot_index):
@@ -486,7 +505,7 @@ def _scan_for_deletions(self, snapshot_index):
             self._on_key_updated(ev_key, None)
         _log.info("Found %d deleted keys", len(deleted_keys))
 
-    def _handle_next_watcher_event(self):
+    def _handle_next_watcher_event(self, resync_in_progress):
         """
         Waits for an event on the watcher queue and sends it to Felix.
         :raises DriverShutdown:
@@ -497,7 +516,11 @@ def _handle_next_watcher_event(self):
         if self._watcher_queue is None:
             raise WatcherDied()
         while not self._stop_event.is_set():
-            if self._resync_requested and self._watcher_stop_event:
+            # To make sure we always make progress, only trigger a new resync
+            # if we're not in the middle of one.
+            if (not resync_in_progress and
+                    self._resync_requested and
+                    self._watcher_stop_event):
                 _log.info("Resync requested, triggering one.")
                 self._watcher_stop_event.set()
                 raise WatcherDied()
@@ -695,15 +718,19 @@ def parse_snapshot(resp, callback):
                              resp.status)
     parser = ijson.parse(resp)  # urllib3 response is file-like.
 
-    prefix, event, value = next(parser)
-    _log.debug("Read first token from response %s, %s, %s", prefix, event,
-               value)
-    if event == "start_map":
-        # As expected, response is a map.
-        _parse_map(parser, callback)
-    else:
-        _log.error("Response from etcd did non contain a JSON map.")
-        raise ResyncRequired("Bad response from etcd")
+    try:
+        prefix, event, value = next(parser)
+        _log.debug("Read first token from response %s, %s, %s", prefix, event,
+                   value)
+        if event == "start_map":
+            # As expected, response is a map.
+            _parse_map(parser, callback)
+        else:
+            _log.error("Response from etcd did non contain a JSON map.")
+            raise ResyncRequired("Bad response from etcd")
+    except JSONError:
+        _log.exception("Response from etcd containers bad JSON.")
+        raise ResyncRequired("Bad JSON from etcd")
 
 
 def _parse_map(parser, callback):
diff --git a/calico/etcddriver/test/stubs.py b/calico/etcddriver/test/stubs.py
index bc84085068..27cadd1010 100644
--- a/calico/etcddriver/test/stubs.py
+++ b/calico/etcddriver/test/stubs.py
@@ -101,6 +101,7 @@ class PipeFile(object):
     def __init__(self):
         self.queue = Queue()
         self.buf = None
+        self._finished = False
 
     def read(self, length):
         data = ""
@@ -120,9 +121,11 @@ def read(self, length):
 
     def write(self, data):
         self.queue.put(data)
+        if data == "" or isinstance(data, Exception):
+            self._finished = True
 
     def __del__(self):
-        self.queue.put("")
+        assert self._finished, "PipeFile wasn't correctly finished."
 
 
 class StubEtcd(object):
@@ -160,11 +163,13 @@ def assert_request(self, expected_key, **expected_args):
         """
         Asserts the properies of the next request.
         """
+        _log.info("Waiting for request for key %s")
         key, args = self.get_next_request()
         default_args = {'wait_index': None,
                         'preload_content': None,
                         'recursive': False,
                         'timeout': 5}
+        _log.info("Got request for key %s")
         for k, v in default_args.iteritems():
             if k in args and args[k] == v:
                 del args[k]
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 4d29050d23..513ee6a064 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -18,15 +18,20 @@
 
 Tests for the etcd driver module.
 """
+import json
 from Queue import Empty
-from unittest2 import TestCase, SkipTest
+
+import time
+from unittest import TestCase
 
 from mock import Mock, patch, call
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import TimeoutError
 
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
-from calico.etcddriver.driver import EtcdDriver, DriverShutdown
+from calico.etcddriver import driver
+from calico.etcddriver.driver import EtcdDriver, DriverShutdown, ResyncRequired, \
+    WatcherDied
 from calico.etcddriver.protocol import *
 from calico.etcddriver.test.stubs import (
     StubMessageReader, StubMessageWriter, StubEtcd,
@@ -35,6 +40,9 @@
 _log = logging.getLogger(__name__)
 
 
+patch.object = getattr(patch, "object")  # Keep PyCharm linter happy.
+
+
 class TestEtcdDriverFV(TestCase):
     """
     FV-level tests for the driver.  These tests run a real copy of the driver
@@ -105,7 +113,7 @@ def test_mainline_resync(self):
         })
         # Respond to the watcher with another event.
         self.watcher_etcd.respond_with_value(
-            "/calico/v1/adir/dkey",
+            "/calico/v1/adir2/dkey",
             "d",
             mod_index=13,
             action="set"
@@ -128,7 +136,7 @@ def test_mainline_resync(self):
         # The resync event would be generated first but we should should only
         # see the watcher event.
         self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
-            MSG_KEY_KEY: "/calico/v1/adir/dkey",
+            MSG_KEY_KEY: "/calico/v1/adir2/dkey",
             MSG_KEY_VALUE: "d",
         })
         # Finish the snapshot.
@@ -142,22 +150,27 @@ def test_mainline_resync(self):
             }
         }
         ''')
+        snap_stream.write("")  # Close the response.
         # Should get the in-sync message.  (No event for Ready flag due to
         # HWM.
         self.assert_status_message(STATUS_IN_SYNC)
         # Now send a watcher event, which should go straight through.
         self.send_watcher_event_and_assert_felix_msg(14)
 
-    def test_second_resync(self):
-        try:
-            # Start by going through the first resync.
-            self.test_mainline_resync()
-        except AssertionError:
-            _log.exception("Mainline resync test failed")
-            raise SkipTest("Mainline resync test failed to initialise driver")
+        # Check the contents of the trie.
+        keys = set(self.driver._hwms._hwms.keys())
+        self.assertEqual(keys, set([u'/calico/v1/Ready/',
+                                    u'/calico/v1/adir/akey/',
+                                    u'/calico/v1/adir/bkey/',
+                                    u'/calico/v1/adir/ckey/',
+                                    u'/calico/v1/adir2/dkey/',
+                                    u'/calico/v1/adir/ekey/']))
 
-        # Felix sends a resync message.
-        self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
+    def test_felix_triggers_resync(self):
+        self._run_initial_resync()
+
+        # Send a resync request from Felix.
+        self.send_resync_and_wait_for_flag()
 
         # Wait for the watcher to make its request.
         self.watcher_etcd.assert_request(
@@ -206,7 +219,7 @@ def test_second_resync(self):
         # Finish the snapshot.
         snap_stream.write('''
                     {
-                        "key": "/calico/v1/adir/dkey",
+                        "key": "/calico/v1/adir2/dkey",
                         "value": "c",
                         "modifiedIndex": 8
                     },
@@ -219,6 +232,7 @@ def test_second_resync(self):
             }
         }
         ''')
+        snap_stream.write("")  # Close the response.
         # Should get a deletion for the keys that were missing in this
         # snapshot.
         self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
@@ -231,6 +245,97 @@ def test_second_resync(self):
         # Now send a watcher event, which should go straight through.
         self.send_watcher_event_and_assert_felix_msg(104)
 
+    def send_resync_and_wait_for_flag(self):
+        # Felix sends a resync message.
+        self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
+
+        # For determinism, wait for the message to be processed.
+        for _ in xrange(100):
+            if self.driver._resync_requested:
+                break
+            time.sleep(0.01)
+        else:
+            self.fail("Resync flag never got set.")
+
+    def test_directory_deletion(self):
+        self._run_initial_resync()
+        # For coverage: Nothing happens for a while, poll times out.
+        self.watcher_etcd.respond_with_exception(
+            driver.ReadTimeoutError(Mock(), "", "")
+        )
+        # For coverage: Then a set to a dir, which should be ignored.
+        self.watcher_etcd.respond_with_data(json.dumps({
+            "action": "create",
+            "node": {
+                "key": "/calico/v1/foo",
+                "dir": True
+            }
+        }), 100, 200)
+        # Then a whole directory is deleted.
+        self.watcher_etcd.respond_with_value(
+            "/calico/v1/adir",
+            value=None,
+            action="delete",
+            mod_index=101,
+            status=300  # For coverage of warning log.
+        )
+        # Should get individual deletes for each one then a flush.  We're
+        # relying on the trie returning sorted results here.
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/akey",
+            MSG_KEY_VALUE: None,
+        })
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/bkey",
+            MSG_KEY_VALUE: None,
+        })
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ckey",
+            MSG_KEY_VALUE: None,
+        })
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ekey",
+            MSG_KEY_VALUE: None,
+        })
+        self.assert_flush_to_felix()
+
+        # Check the contents of the trie.
+        keys = set(self.driver._hwms._hwms.keys())
+        self.assertEqual(keys, set([u'/calico/v1/Ready/',
+                                    u'/calico/v1/adir2/dkey/']))
+
+    def _run_initial_resync(self):
+        try:
+            # Start by going through the first resync.
+            self.test_mainline_resync()
+        except AssertionError:
+            _log.exception("Mainline resync test failed, aborting test %s",
+                           self.id())
+            raise AssertionError("Mainline resync test failed to "
+                                 "initialise driver")
+
+    def test_root_directory_deletion(self):
+        self._run_initial_resync()
+        # Delete the whole /calico/v1 dir.
+        self.watcher_etcd.respond_with_data(json.dumps({
+            "action": "delete",
+            "node": {
+                "key": "/calico/v1/",
+                "dir": True
+            }
+        }), 100, 200)
+
+        # Should trigger a resync.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+
+    def test_garbage_watcher_response(self):
+        self._run_initial_resync()
+        # Delete the whole /calico/v1 dir.
+        self.watcher_etcd.respond_with_data("{foobar", 100, 200)
+
+        # Should trigger a resync.
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+
     def send_watcher_event_and_assert_felix_msg(self, etcd_index):
         self.watcher_etcd.respond_with_value(
             "/calico/v1/adir/ekey",
@@ -312,15 +417,14 @@ def do_handshake(self):
         # Followed by the per-host one...
         self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
                                         recursive=True)
-        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
-            "/calico/v1/host/thehostname/config/LogSeverityFile": "DEBUG"
-        })
+        self.resync_etcd.respond_with_data('{"errorCode": 100}',
+                                           10, 404)
         # Then the driver should send the config to Felix.
         self.assert_msg_to_felix(
             MSG_TYPE_CONFIG_LOADED,
             {
                 MSG_KEY_GLOBAL_CONFIG: {"InterfacePrefix": "tap"},
-                MSG_KEY_HOST_CONFIG: {"LogSeverityFile": "DEBUG"},
+                MSG_KEY_HOST_CONFIG: {},
             }
         )
         self.assert_flush_to_felix()
@@ -377,6 +481,7 @@ def start_snapshot_response(self, etcd_index=10):
         return snap_stream
 
     def assert_status_message(self, status):
+        _log.info("Expecting %s status from driver...", status)
         self.assert_msg_to_felix(
             MSG_TYPE_STATUS,
             {MSG_KEY_STATUS: status}
@@ -438,16 +543,18 @@ def mock_etcd_request(self, http_pool, key, timeout=5, wait_index=None,
                                  preload_content=preload_content)
 
     def tearDown(self):
+        _log.info("Tearing down test")
         try:
             # Request that the driver stops.
             self.driver.stop()
             # Make sure we don't block the driver from stopping.
             self.msg_reader.send_timeout()
+
             # SystemExit kills (only) the thread silently.
             self.resync_etcd.respond_with_exception(SystemExit())
             self.watcher_etcd.respond_with_exception(SystemExit())
             # Wait for it to stop.
-            self.assertTrue(self.driver.join(1), "Driver failed to stop")
+            self.assertTrue(self.driver.join(0.1), "Driver failed to stop")
         finally:
             # Now the driver is stopped, it's safe to remove our patch of
             # complete_logging()
@@ -521,3 +628,103 @@ def test_cluster_id_check(self):
                           m_resp)
         self.assertTrue(self.driver._stop_event.is_set())
 
+    def test_load_config_bad_data(self):
+        with patch.object(self.driver, "_etcd_request") as m_etcd_req:
+            m_resp = Mock()
+            m_resp.data = "{garbage"
+            m_etcd_req.return_value = m_resp
+            self.assertRaises(ResyncRequired,
+                              self.driver._load_config, "/calico/v1/config")
+
+    def test_start_snap_missing_cluster_id(self):
+        with patch.object(self.driver, "_etcd_request") as m_etcd_req:
+            m_resp = Mock()
+            m_resp.getheader.return_value = 123
+            m_etcd_req.return_value = m_resp
+            self.assertRaises(ResyncRequired,
+                              self.driver._start_snapshot_request)
+
+    def test_cluster_id_missing(self):
+        m_resp = Mock()
+        m_resp.getheader.return_value = None
+        self.driver._check_cluster_id(m_resp)
+        self.assertEqual(m_resp.getheader.mock_calls,
+                         [call("x-etcd-cluster-id")])
+
+    def test_watcher_dies_during_resync(self):
+        self.driver.stop()
+        with patch.object(self.driver, "_on_key_updated") as m_on_key:
+            with patch.object(self.driver,
+                              "_handle_next_watcher_event") as m_handle:
+                m_queue = Mock()
+                m_queue.empty.return_value = False
+                m_handle.side_effect = WatcherDied()
+                self.driver._watcher_queue = m_queue
+                self.assertRaises(DriverShutdown,
+                                  self.driver._handle_etcd_node,
+                                  123, "/calico/v1/foo", "bar",
+                                  snapshot_index=1000)
+
+    def test_handle_next_watcher_died(self):
+        self.driver._watcher_queue = None
+        self.assertRaises(WatcherDied, self.driver._handle_next_watcher_event,
+                          False)
+
+    def test_handle_next_queue_empty(self):
+        m_queue = Mock()
+        m_queue.get.side_effect = iter([
+            Empty(),
+            RuntimeError()
+        ])
+        self.driver._watcher_queue = m_queue
+        self.assertRaises(RuntimeError,
+                          self.driver._handle_next_watcher_event,
+                          False)
+
+    def test_handle_next_stopped(self):
+        self.driver._watcher_queue = Mock()
+        self.driver.stop()
+        self.assertRaises(DriverShutdown,
+                          self.driver._handle_next_watcher_event,
+                          False)
+
+    def test_ready_key_set_to_false(self):
+        self.assertRaises(ResyncRequired,
+                          self.driver._on_key_updated, READY_KEY, "false")
+
+    def test_watch_etcd_error_from_etcd(self):
+        m_queue = Mock()
+        m_stop_ev = Mock()
+        m_stop_ev.is_set.return_value = False
+        with patch.object(self.driver, "get_etcd_connection") as m_get_conn:
+            with patch.object(self.driver, "_etcd_request") as m_req:
+                with patch.object(self.driver, "_check_cluster_id") as m_check:
+                    m_resp = Mock()
+                    m_resp.data = json.dumps({"errorCode": 100})
+                    m_req.side_effect = iter([
+                        m_resp,
+                        AssertionError()
+                    ])
+                    self.driver.watch_etcd(10, m_queue, m_stop_ev)
+
+    def test_parse_snapshot_bad_status(self):
+        m_resp = Mock()
+        m_resp.status = 500
+        self.assertRaises(ResyncRequired, driver.parse_snapshot,
+                          m_resp, Mock())
+
+    def test_parse_snapshot_bad_data(self):
+        m_resp = Mock()
+        m_resp.status = 200
+        m_resp.read.return_value = "[]"
+        self.assertRaises(ResyncRequired, driver.parse_snapshot,
+                          m_resp, Mock())
+
+    def test_parse_snapshot_garbage_data(self):
+        m_resp = Mock()
+        m_resp.status = 200
+        m_resp.read.return_value = "garbage"
+        self.assertRaises(ResyncRequired, driver.parse_snapshot,
+                          m_resp, Mock())
+
+

From 486798be99a6875410ff039a92484d768a2eebe9 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 14:34:15 +0000
Subject: [PATCH 89/98] Convince PyCharm's linter that patch.object exists. 
 Removes many warnings.

---
 calico/felix/test/base.py           | 2 ++
 calico/felix/test/test_endpoint.py  | 2 +-
 calico/felix/test/test_fetcd.py     | 2 ++
 calico/felix/test/test_fiptables.py | 1 +
 calico/felix/test/test_ipsets.py    | 1 +
 calico/test/test_etcdutils.py       | 1 +
 6 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/calico/felix/test/base.py b/calico/felix/test/base.py
index 4ffbaefa54..972e6fc71a 100644
--- a/calico/felix/test/base.py
+++ b/calico/felix/test/base.py
@@ -26,6 +26,8 @@
 
 import mock
 
+mock.patch.object = getattr(mock.patch, "object")  # Keep PyCharm linter happy.
+
 _log = logging.getLogger(__name__)
 
 
diff --git a/calico/felix/test/test_endpoint.py b/calico/felix/test/test_endpoint.py
index a6388b7af9..c03de5ffdb 100644
--- a/calico/felix/test/test_endpoint.py
+++ b/calico/felix/test/test_endpoint.py
@@ -20,7 +20,6 @@
 """
 from contextlib import nested
 import logging
-from neutron.common.constants import IPv4
 from calico.felix.endpoint import EndpointManager, LocalEndpoint
 from calico.felix.fetcd import EtcdAPI, EtcdStatusReporter
 from calico.felix.fiptables import IptablesUpdater
@@ -40,6 +39,7 @@
 
 _log = logging.getLogger(__name__)
 
+mock.patch.object = getattr(mock.patch, "object")  # Keep PyCharm linter happy.
 
 ENDPOINT_ID = EndpointId("hostname", "b", "c", "d")
 
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 2e91630e4a..24fb3cd8ba 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -41,6 +41,8 @@
 
 _log = logging.getLogger(__name__)
 
+patch.object = getattr(patch, "object")  # Keep PyCharm linter happy.
+
 VALID_ENDPOINT = {
     "state": "active",
     "name": "tap1234",
diff --git a/calico/felix/test/test_fiptables.py b/calico/felix/test/test_fiptables.py
index 9cc0672c4c..22a20a3d94 100644
--- a/calico/felix/test/test_fiptables.py
+++ b/calico/felix/test/test_fiptables.py
@@ -31,6 +31,7 @@
 
 _log = logging.getLogger(__name__)
 
+patch.object = getattr(patch, "object")  # Keep PyCharm linter happy.
 
 EXTRACT_UNREF_TESTS = [
 ("""Chain INPUT (policy DROP)
diff --git a/calico/felix/test/test_ipsets.py b/calico/felix/test/test_ipsets.py
index 52d8dea18b..ee957370d9 100644
--- a/calico/felix/test/test_ipsets.py
+++ b/calico/felix/test/test_ipsets.py
@@ -37,6 +37,7 @@
 # Logger
 _log = logging.getLogger(__name__)
 
+patch.object = getattr(patch, "object")  # Keep PyCharm linter happy.
 
 EP_ID_1_1 = EndpointId("host1", "orch", "wl1_1", "ep1_1")
 EP_1_1 = {
diff --git a/calico/test/test_etcdutils.py b/calico/test/test_etcdutils.py
index 8f274a80eb..01354a9cf9 100644
--- a/calico/test/test_etcdutils.py
+++ b/calico/test/test_etcdutils.py
@@ -38,6 +38,7 @@
 
 _log = logging.getLogger(__name__)
 
+patch.object = getattr(patch, "object")  # Keep PyCharm linter happy.
 
 SAME_AS_KEY = object()
 

From 8e10582cb4c8f62631e00df18bc0467304148b74 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 15:25:32 +0000
Subject: [PATCH 90/98] More coverage for driver.py and hwm.py.

---
 calico/etcddriver/driver.py           |  17 ++--
 calico/etcddriver/hwm.py              |   2 +-
 calico/etcddriver/protocol.py         |   4 +-
 calico/etcddriver/test/stubs.py       |  27 ++++---
 calico/etcddriver/test/test_driver.py | 109 ++++++++++++++++++++++++--
 calico/etcddriver/test/test_hwm.py    |  10 ++-
 6 files changed, 141 insertions(+), 28 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 9f99724309..3760be2bfe 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -126,7 +126,6 @@ def join(self, timeout=None):
         self._stop_event.wait(timeout=timeout)
         stopped = self._stop_event.is_set()
         if stopped:
-
             self._resync_thread.join(timeout=timeout)
             resync_alive = self._resync_thread.is_alive()
             stopped &= not resync_alive
@@ -673,12 +672,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                     is_dir = node.get("dir", False)
                     value = node.get("value")
                     if is_dir:
-                        if action != "delete":
-                            # Just ignore sets to directories, we only track
-                            # leaves.
-                            _log.debug("Skipping non-delete to dir %s", key)
-                            continue
-                        else:
+                        if action == "delete":
                             if key.rstrip("/") in (VERSION_DIR, ROOT_DIR):
                                 # Special case: if the whole keyspace is
                                 # deleted, that implies the ready flag is gone
@@ -687,6 +681,11 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                                 _log.warning("Whole %s deleted, resyncing",
                                              VERSION_DIR)
                                 break
+                        else:
+                            # Just ignore sets to directories, we only track
+                            # leaves.
+                            _log.debug("Skipping non-delete to dir %s", key)
+                            continue
                     modified_index = node["modifiedIndex"]
                 except (KeyError, TypeError, ValueError):
                     _log.exception("Unexpected format for etcd response: %r;"
@@ -770,7 +769,9 @@ def _parse_map(parser, callback):
                         break
                     else:
                         raise ValueError("Unexpected: %s" % event)
-        elif event == "end_map":
+        else:
+            assert event == "end_map", ("Unexpected JSON event %s %s %s" %
+                                        (prefix, event, value))
             if (node_key is not None and
                     node_value is not None and
                     mod_index is not None):
diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index 0ffbe6336d..5d3d8ecf2d 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -119,7 +119,7 @@ def update_hwm(self, key, hwm):
             # We're tracking deletions, check that this key hasn't been
             # deleted.
             del_hwm = self._deletion_hwms.longest_prefix_value(key, None)
-            if del_hwm > hwm:
+            if hwm < del_hwm:
                 _log.debug("Key %s previously deleted, skipping", key)
                 return del_hwm
         try:
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index 78b987b34c..c2204c478c 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -167,4 +167,6 @@ def new_messages(self, timeout=1):
         # generate some messages.
         self._unpacker.feed(data)
         for msg in self._unpacker:
-            yield msg[MSG_KEY_TYPE], msg
+            _log.debug("Unpacked message: %s", msg)
+            # coverage.py doesn't fully support yield statements.
+            yield msg[MSG_KEY_TYPE], msg  # pragma: nocover
diff --git a/calico/etcddriver/test/stubs.py b/calico/etcddriver/test/stubs.py
index 27cadd1010..d886931164 100644
--- a/calico/etcddriver/test/stubs.py
+++ b/calico/etcddriver/test/stubs.py
@@ -157,13 +157,14 @@ def get_next_request(self):
         """
         Called from the test to get the next request from the driver.
         """
-        return self.request_queue.get(timeout=10)
+        return self.request_queue.get(timeout=1)
 
     def assert_request(self, expected_key, **expected_args):
         """
         Asserts the properies of the next request.
         """
-        _log.info("Waiting for request for key %s")
+        _log.info("Waiting for request for key %s, %s",
+                  expected_key, expected_args)
         key, args = self.get_next_request()
         default_args = {'wait_index': None,
                         'preload_content': None,
@@ -187,19 +188,18 @@ def respond_with_exception(self, exc):
         """
         self.response_queue.put(exc)
 
-    def respond_with_value(self, key, value, mod_index=None,
+    def respond_with_value(self, key, value, dir=False, mod_index=None,
                            etcd_index=None, status=200, action="get"):
         """
         Called from the test to return a simple single-key value to the
         driver.
         """
+        node = {"key": key, "value": value, "modifiedIndex": mod_index}
+        if dir:
+            node["dir"] = True
         data = json.dumps({
             "action": action,
-            "node": {
-                "key": key,
-                "value": value,
-                "modifiedIndex": mod_index,
-            }
+            "node": node
         })
         self.respond_with_data(data, etcd_index, status)
 
@@ -209,8 +209,15 @@ def respond_with_dir(self, key, children, mod_index=None,
         Called from the test to return a directory of key/values (from a
         recursive request).
         """
-        nodes = [{"key": k, "value": v, "modifiedIndex": mod_index}
-                 for (k, v) in children.iteritems()]
+        nodes = []
+        for k, v in children.iteritems():
+            if v is not None:
+                nodes.append({"key": k, "value": v,
+                              "modifiedIndex": mod_index})
+            else:
+                nodes.append({"key": k, "dir": True,
+                              "modifiedIndex": mod_index,
+                              "nodes": []})
         data = json.dumps({
             "action": "get",
             "node": {
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 513ee6a064..984e4a4a4c 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -22,16 +22,17 @@
 from Queue import Empty
 
 import time
+from StringIO import StringIO
 from unittest import TestCase
 
 from mock import Mock, patch, call
 from urllib3 import HTTPConnectionPool
-from urllib3.exceptions import TimeoutError
-
+from urllib3.exceptions import TimeoutError, HTTPError
 from calico.datamodel_v1 import READY_KEY, CONFIG_DIR, VERSION_DIR
 from calico.etcddriver import driver
-from calico.etcddriver.driver import EtcdDriver, DriverShutdown, ResyncRequired, \
-    WatcherDied
+from calico.etcddriver.driver import (
+    EtcdDriver, DriverShutdown, ResyncRequired, WatcherDied, ijson
+)
 from calico.etcddriver.protocol import *
 from calico.etcddriver.test.stubs import (
     StubMessageReader, StubMessageWriter, StubEtcd,
@@ -166,6 +167,61 @@ def test_mainline_resync(self):
                                     u'/calico/v1/adir2/dkey/',
                                     u'/calico/v1/adir/ekey/']))
 
+
+    def test_many_events_during_resync(self):
+        """
+        Test of the mainline resync-and-merge processing.
+
+        * Does the initial config handshake with Felix.
+        * Interleaves the snapshot response with updates via the watcher.
+        * Checks that the result is correctly merged.
+        """
+        # Initial handshake.
+        self.start_driver_and_handshake()
+
+        # Check for etcd request and start the response.
+        snap_stream = self.start_snapshot_response()
+
+        # Respond to the watcher, this should get merged into the event
+        # stream at some point later.
+        for ii in xrange(200):
+            self.watcher_etcd.respond_with_value(
+                "/calico/v1/adir/bkey",
+                "watch",
+                mod_index=11 + ii,
+                action="set"
+            )
+            self.watcher_etcd.assert_request(
+                VERSION_DIR, recursive=True, timeout=90, wait_index=12 + ii
+            )
+        snap_stream.write('''
+                     {
+                         "key": "/calico/v1/adir/bkey",
+                         "value": "snap",
+                         "modifiedIndex": 8
+                     },
+                     {
+                        "key": "/calico/v1/Ready",
+                        "value": "true",
+                        "modifiedIndex": 10
+                    }]
+                }]
+            }
+        }
+        ''')
+        snap_stream.write("")
+
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/bkey",
+            MSG_KEY_VALUE: "snap",
+        })
+        for _ in xrange(200):
+            self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+                MSG_KEY_KEY: "/calico/v1/adir/bkey",
+                MSG_KEY_VALUE: "watch",
+            })
+        self.assert_status_message(STATUS_IN_SYNC)
+
     def test_felix_triggers_resync(self):
         self._run_initial_resync()
 
@@ -274,6 +330,7 @@ def test_directory_deletion(self):
         # Then a whole directory is deleted.
         self.watcher_etcd.respond_with_value(
             "/calico/v1/adir",
+            dir=True,
             value=None,
             action="delete",
             mod_index=101,
@@ -412,7 +469,8 @@ def do_handshake(self):
         # Then etcd should get the global config request.
         self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
         self.resync_etcd.respond_with_dir(CONFIG_DIR, {
-            CONFIG_DIR + "/InterfacePrefix": "tap"
+            CONFIG_DIR + "/InterfacePrefix": "tap",
+            CONFIG_DIR + "/Foo": None,  # Directory
         })
         # Followed by the per-host one...
         self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
@@ -727,4 +785,45 @@ def test_parse_snapshot_garbage_data(self):
         self.assertRaises(ResyncRequired, driver.parse_snapshot,
                           m_resp, Mock())
 
+    def test_resync_driver_stopped(self):
+        self.driver._init_received.set()
+        with patch.object(self.driver, "get_etcd_connection") as m_get:
+            m_get.side_effect = DriverShutdown()
+            self.driver._resync_and_merge()
+
+    @patch("time.sleep")
+    def test_resync_http_error(self, m_sleep):
+        self.driver._init_received.set()
+        with patch.object(self.driver, "get_etcd_connection") as m_get:
+            with patch("calico.etcddriver.driver.monotonic_time") as m_time:
+                m_time.side_effect = iter([
+                    1, 10, RuntimeError()
+                ])
+                m_get.side_effect = HTTPError()
+                self.assertRaises(RuntimeError, self.driver._resync_and_merge)
+
+    def test_parse_snap_error_from_etcd(self):
+        parser = ijson.parse(StringIO(json.dumps({
+            "errorCode": 100
+        })))
+        next(parser)
+        self.assertRaises(ResyncRequired, driver._parse_map, parser, None)
+
+    def test_parse_snap_bad_data(self):
+        parser = ijson.parse(StringIO(json.dumps({
+            "nodes": [
+                "foo"
+            ]
+        })))
+        next(parser)
+        self.assertRaises(ValueError, driver._parse_map, parser, None)
+
+    def test_join_not_stopped(self):
+        with patch.object(self.driver._stop_event, "wait"):
+            self.assertFalse(self.driver.join())
+
+    def test_process_events_stopped(self):
+        self.driver._stop_event.set()
+        self.driver._process_events_only()
+
 
diff --git a/calico/etcddriver/test/test_hwm.py b/calico/etcddriver/test/test_hwm.py
index 7384352ebf..a0dbca98c2 100644
--- a/calico/etcddriver/test/test_hwm.py
+++ b/calico/etcddriver/test/test_hwm.py
@@ -94,6 +94,9 @@ def test_mainline(self):
         old_hwm = self.hwm.update_hwm("/a/$/g", 10)
         self.assertEqual(old_hwm, 13)  # Returns the etcd_index of the delete.
         self.assertEqual(len(self.hwm), 5)
+        # But ones outside the subtree ar not.
+        old_hwm = self.hwm.update_hwm("/f/g", 10)
+        self.assertEqual(old_hwm, None)
         # And subsequent updates are processed ignoring the delete.
         old_hwm = self.hwm.update_hwm("/a/$/f", 16)
         self.assertEqual(old_hwm, 15)
@@ -110,18 +113,18 @@ def test_mainline(self):
         self.assertEqual(old_hwm, None)  # Seen for the first time.
         old_hwm = self.hwm.update_hwm("/d/e/f", 19)
         self.assertEqual(old_hwm, 10)  # From the snapshot.
-        self.assertEqual(len(self.hwm), 6)
+        self.assertEqual(len(self.hwm), 7)
 
         # We should be able to find all the keys that weren't seen during
         # the snapshot.
         old_keys = self.hwm.remove_old_keys(10)
         self.assertEqual(set(old_keys), set(["/b/c/d", "/j/c/d"]))
-        self.assertEqual(len(self.hwm), 4)
+        self.assertEqual(len(self.hwm), 5)
 
         # They should now be gone from the index.
         old_hwm = self.hwm.update_hwm("/b/c/d", 20)
         self.assertEqual(old_hwm, None)
-        self.assertEqual(len(self.hwm), 5)
+        self.assertEqual(len(self.hwm), 6)
 
 
 class TestKeyEncoding(TestCase):
@@ -132,6 +135,7 @@ def test_encode_key(self):
         self.assert_enc_dec("/:_-.~/foo", "/:_-.%7E/foo/")
         self.assert_enc_dec("/%/foo", "/%25/foo/")
         self.assert_enc_dec(u"/\u01b1/foo", "/%C6%B1/foo/")
+        self.assertEqual(hwm.encode_key("/foo/"), "/foo/")
 
     def assert_enc_dec(self, key, expected_encoding):
         encoded = hwm.encode_key(key)

From ec4aa175d623dcf0f1e1e60e1c8d17dcabfba5e5 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Fri, 6 Nov 2015 16:03:28 +0000
Subject: [PATCH 91/98] Cover splitter.py.

---
 calico/felix/test/test_splitter.py | 135 +++++------------------------
 1 file changed, 22 insertions(+), 113 deletions(-)

diff --git a/calico/felix/test/test_splitter.py b/calico/felix/test/test_splitter.py
index ca8ce8c146..2b1c73e812 100644
--- a/calico/felix/test/test_splitter.py
+++ b/calico/felix/test/test_splitter.py
@@ -31,6 +31,7 @@
 # A mocked config object for use in the UpdateSplitter.
 Config = collections.namedtuple('Config', ['STARTUP_CLEANUP_DELAY'])
 
+
 class TestUpdateSplitter(BaseTestCase):
     """
     Tests for the UpdateSplitter actor.
@@ -55,118 +56,19 @@ def get_splitter(self):
             self.iptables_updaters,
             self.masq_manager
         )
-    #
-    # def test_apply_whole_snapshot_clean(self):
-    #     """
-    #     Test that a whole snapshot applies cleanly to all managers.
-    #     """
-    #     # We apply a simple sentinel map. The exact map we use really shouldn't
-    #     # matter here. We do, however, use different ones for rules, tags, and
-    #     # endpoints.
-    #     rules = {'profileA': ['first rule', 'second rule']}
-    #     tags = {'profileA': ['first tag', 'second tag']}
-    #     endpoints = {'endpointA': 'endpoint object'}
-    #     ipv4_pools_by_id = {"10.0.0.1-5": {"cidr": "10.0.0.1/5",
-    #                                        "masquerade": True}}
-    #     s = self.get_splitter()
-    #
-    #     # Apply the snapshot and let it run.
-    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
-    #     self.step_actor(s)
-    #
-    #     # At this point, each of our managers should have been notified (one
-    #     # call to apply_snapshot), but cleanup should not have occurred.
-    #     for mgr in self.ipsets_mgrs:
-    #         mgr.apply_snapshot.assertCalledOnceWith(
-    #             tags, endpoints, async=True
-    #         )
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.rules_mgrs:
-    #         mgr.apply_snapshot.assertCalledOnceWith(rules, async=True)
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.endpoint_mgrs:
-    #         mgr.apply_snapshot.assertCalledOnceWith(endpoints, async=True)
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.iptables_updaters:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     self.masq_manager.apply_snapshot.assert_called_once_with(
-    #         ipv4_pools_by_id, async=True)
-    #
-    #     # If we spin the scheduler again, we should begin cleanup.
-    #     # Warning: this might be a bit brittle, we may not be waiting long
-    #     # enough here, at least on busy machines.
-    #     gevent.sleep(0.1)
-    #     self.step_actor(s)
-    #
-    #     # Confirm that we cleaned up. Cleanup only affects the
-    #     # iptables_updaters and the ipsets_managers, so confirm the other
-    #     # managers got left alone.
-    #     for mgr in self.ipsets_mgrs:
-    #         mgr.cleanup.assertCalledOnceWith(async=False)
-    #     for mgr in self.rules_mgrs:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.endpoint_mgrs:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.iptables_updaters:
-    #         mgr.cleanup.assertCalledOnceWith(async=False)
-    #
-    # def test_repeated_snapshots_clean_up_only_once(self):
-    #     """
-    #     Test that repeated snapshots only clean up once.
-    #     """
-    #     # We apply a simple sentinel map. The exact map we use really shouldn't
-    #     # matter here. We do, however, use different ones for rules, tags, and
-    #     # endpoints.
-    #     rules = {'profileA': ['first rule', 'second rule']}
-    #     tags = {'profileA': ['first tag', 'second tag']}
-    #     endpoints = {'endpointA': 'endpoint object'}
-    #     ipv4_pools_by_id = {}
-    #     s = self.get_splitter()
-    #
-    #     # Apply three snapshots and let them run. Because of batching logic,
-    #     # we should only need to spin the actor once.
-    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id, async=True)
-    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
-    #     s.apply_snapshot(rules, tags, endpoints, ipv4_pools_by_id,  async=True)
-    #     self.step_actor(s)
-    #
-    #     # At this point, each of our managers should have been notified (one
-    #     # call to apply_snapshot), but cleanup should not have occurred.
-    #     for mgr in self.ipsets_mgrs:
-    #         mgr.apply_snapshot.assertCalledWith(
-    #             tags, endpoints, async=True
-    #         )
-    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.rules_mgrs:
-    #         mgr.apply_snapshot.assertCalledWith(rules, async=True)
-    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.endpoint_mgrs:
-    #         mgr.apply_snapshot.assertCalledWith(endpoints, async=True)
-    #         self.assertEqual(mgr.apply_snapshot.call_count, 3)
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.iptables_updaters:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     self.assertEqual(self.masq_manager.apply_snapshot.call_count, 3)
-    #
-    #     # If we spin the scheduler again, we should begin cleanup.
-    #     # Warning: this might be a bit brittle, we may not be waiting long
-    #     # enough here, at least on busy machines.
-    #     gevent.sleep(0.1)
-    #     self.step_actor(s)
-    #
-    #     # Confirm that we cleaned up. Cleanup only affects the
-    #     # iptables_updaters and the ipsets_managagers, so confirm the other
-    #     # managers got left alone.
-    #     for mgr in self.ipsets_mgrs:
-    #         mgr.cleanup.assertCalledOnceWith(async=False)
-    #     for mgr in self.rules_mgrs:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.endpoint_mgrs:
-    #         self.assertEqual(mgr.cleanup.call_count, 0)
-    #     for mgr in self.iptables_updaters:
-    #         mgr.cleanup.assertCalledOnceWith(async=False)
+
+    def test_on_datamodel_in_sync(self):
+        s = self.get_splitter()
+        with mock.patch("gevent.spawn_later") as m_spawn:
+            s.on_datamodel_in_sync(async=True)
+            s.on_datamodel_in_sync(async=True)
+            self.step_actor(s)
+        self.assertTrue(s._cleanup_scheduled)
+        self.assertEqual(m_spawn.mock_calls,
+                         [mock.call(0, mock.ANY)])
+        for mgr in self.ipsets_mgrs + self.rules_mgrs + self.endpoint_mgrs:
+            self.assertEqual(mgr.on_datamodel_in_sync.mock_calls,
+                             [mock.call(async=True), mock.call(async=True)])
 
     def test_cleanup_give_up_on_exception(self):
         """
@@ -178,13 +80,20 @@ def test_cleanup_give_up_on_exception(self):
         # However, make sure that the first ipset manager and the first
         # iptables updater throw exceptions when called.
         self.ipsets_mgrs[0].cleanup.side_effect = RuntimeError('Bang!')
-        self.iptables_updaters[0].cleanup.side_effect = RuntimeError('Bang!')
 
         # Start the cleanup.
         result = s.trigger_cleanup(async=True)
         self.step_actor(s)
         self.assertRaises(RuntimeError, result.get)
 
+    def test_cleanup_mainline(self):
+        # No need to apply any data here.
+        s = self.get_splitter()
+        # Start the cleanup.
+        result = s.trigger_cleanup(async=True)
+        self.step_actor(s)
+        result.get()
+
     def test_rule_updates_propagate(self):
         """
         Test that the on_rules_update message propagates correctly.

From 73f6c1e6d08ffd17f5ed49d091effa1f0682bc7c Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Mon, 9 Nov 2015 15:10:18 +0000
Subject: [PATCH 92/98] Update dependencies.

---
 felix_requirements.txt | 1 +
 rpm/calico.spec        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/felix_requirements.txt b/felix_requirements.txt
index 260108f1fa..00019c577d 100644
--- a/felix_requirements.txt
+++ b/felix_requirements.txt
@@ -6,3 +6,4 @@ posix-spawn>=0.2.post6
 datrie>=0.7
 ijson>=2.2
 msgpack-python>=0.3
+urllib3>=1.7.1
diff --git a/rpm/calico.spec b/rpm/calico.spec
index e7a5b04417..293923b2c9 100644
--- a/rpm/calico.spec
+++ b/rpm/calico.spec
@@ -93,7 +93,7 @@ This package provides common files.
 %package felix
 Group:          Applications/Engineering
 Summary:        Project Calico virtual networking for cloud data centers
-Requires:       calico-common, conntrack-tools, ipset, iptables, net-tools, python-devel, python-netaddr, python-gevent
+Requires:       calico-common, conntrack-tools, ipset, iptables, net-tools, python-devel, python-netaddr, python-gevent, datrie, ijson, python-urllib3, python-msgpack
 
 
 %description felix

From 025379c6ff145e39f18487104c38ae802fc7f3b2 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 10 Nov 2015 09:09:32 +0000
Subject: [PATCH 93/98] Add calico.etcddriver to list of tests in tox.ini.

---
 tox.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tox.ini b/tox.ini
index 4b5d94a677..3ddf2bc04b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -18,6 +18,7 @@ commands =
     ./tox-cover.sh thread calico.test
     ./tox-cover.sh gevent calico.felix
     ./tox-cover.sh eventlet calico.openstack
+    ./tox-cover.sh thread calico.etcddriver
     coverage report -m
 
 [testenv:pypy]
@@ -27,6 +28,7 @@ commands =
 commands =
     ./tox-cover.sh thread calico.test
     nosetests calico.felix
+    ./tox-cover.sh thread calico.etcddriver
     coverage report -m
 deps =
     nose

From c7cc1be20d4ea4d0938b96609f2c50b6ebbeaf40 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 10 Nov 2015 09:20:40 +0000
Subject: [PATCH 94/98] Update changelogs.

---
 CHANGES.md       | 3 +++
 debian/changelog | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 16f4176731..57c8a21b5b 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,8 @@
 # Changelog
 
+- Felix now parses the etcd snapshot in parallel with the event stream;
+  this dramatically increases scale when under load.
+
 ## 1.2.0
 
 - Add liveness reporting to Felix.  Felix now reports its liveness into
diff --git a/debian/changelog b/debian/changelog
index 041d939056..ec6805db74 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
-calico (1.3.0~~smc.1-1) trusty; urgency=medium
+calico (1.3.0~pre.1) trusty; urgency=medium
 
-  * Private pre-release build.
+  * Felix now parses the etcd snapshot in parallel with the event stream;
+    this dramatically increases scale when under load.
 
  -- Shaun Crampton <shaun@projectcalico.org>  Mon, 26 Oct 2015 13:41:00 +0100
 

From 2ca158908ec0d623cd686e621e82b7c4060bf8f0 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Tue, 10 Nov 2015 16:46:23 +0000
Subject: [PATCH 95/98] Rework driver tests to handle concurrent requests from
 old/new watcher.

Improve shutdown mechanism.
---
 calico/etcddriver/test/stubs.py       | 211 ++++++++++++++++++--------
 calico/etcddriver/test/test_driver.py | 205 +++++++++++++++++--------
 2 files changed, 289 insertions(+), 127 deletions(-)

diff --git a/calico/etcddriver/test/stubs.py b/calico/etcddriver/test/stubs.py
index d886931164..ec95a2be2d 100644
--- a/calico/etcddriver/test/stubs.py
+++ b/calico/etcddriver/test/stubs.py
@@ -19,6 +19,7 @@
 Stub objects used for testing driver/protocol code.
 """
 import json
+import threading
 
 import logging
 from Queue import Queue, Empty
@@ -31,7 +32,15 @@
 
 
 # Singleton representing a flush in the stream of writes.
-FLUSH = object()
+class Sigil(object):
+    def __init__(self, name):
+        self.name = name
+
+    def __str__(self):
+        return "<%s>" % self.name
+
+
+FLUSH = Sigil("FLUSH")
 
 
 class StubMessageReader(MessageReader):
@@ -93,6 +102,9 @@ def send_message(self, msg_type, fields=None, flush=True):
         if flush:
             self.flush()
 
+    def next_msg(self):
+        return self.queue.get(timeout=1)
+
     def flush(self):
         self.queue.put(FLUSH)
 
@@ -128,65 +140,32 @@ def __del__(self):
         assert self._finished, "PipeFile wasn't correctly finished."
 
 
-class StubEtcd(object):
-    """
-    A fake connection to etcd.  We hook the driver's _issue_etcd_request
-    method and block the relevant thread until the test calls one of the
-    respond_... methods.
-    """
-    def __init__(self):
-        self.request_queue = Queue()
-        self.response_queue = Queue()
-        self.headers = {
-            "x-etcd-cluster-id": "abcdefg"
-        }
-
-    def request(self, key, **kwargs):
-        """
-        Called from the driver to make a request.  Blocks until the
-        test thread sends a response.
-        """
-        self.request_queue.put((key, kwargs))
-        response = self.response_queue.get(30)
-        if isinstance(response, BaseException):
-            raise response
-        else:
-            return response
-
-    def get_next_request(self):
-        """
-        Called from the test to get the next request from the driver.
-        """
-        return self.request_queue.get(timeout=1)
+class StubRequest(object):
+    def __init__(self, stub_etcd, key, kwargs):
+        self.stub_etcd = stub_etcd
+        self.thread = threading.current_thread()
+        self.key = key
+        self.kwargs = kwargs
+        self.response = None
+        self.response_available = threading.Event()
+        self.pipe_file = None
 
-    def assert_request(self, expected_key, **expected_args):
-        """
-        Asserts the properies of the next request.
-        """
-        _log.info("Waiting for request for key %s, %s",
-                  expected_key, expected_args)
-        key, args = self.get_next_request()
-        default_args = {'wait_index': None,
-                        'preload_content': None,
-                        'recursive': False,
-                        'timeout': 5}
-        _log.info("Got request for key %s")
-        for k, v in default_args.iteritems():
-            if k in args and args[k] == v:
-                del args[k]
-        if expected_key != key:
-            raise AssertionError("Expected request for %s but got %s" %
-                                 (expected_key, key))
-        if expected_args != args:
-            raise AssertionError("Expected request args %s for %s but got %s" %
-                                 (expected_args, key, args))
+    def __str__(self):
+        return "Request<key=%s,args=%s,thread=%s>" % (self.key,
+                                                      self.kwargs,
+                                                      self.thread)
 
     def respond_with_exception(self, exc):
         """
         Called from the test to raise an exception from the current/next
         request.
         """
-        self.response_queue.put(exc)
+        self.response = exc
+        self.on_response_avail()
+
+    def on_response_avail(self):
+        self.response_available.set()
+        self.stub_etcd.on_req_closed(self)
 
     def respond_with_value(self, key, value, dir=False, mod_index=None,
                            etcd_index=None, status=200, action="get"):
@@ -234,24 +213,136 @@ def respond_with_data(self, data, etcd_index, status):
         Called from the test to return a raw response (e.g. to send
         malformed JSON).
         """
-        headers = self.headers.copy()
+        headers = self.stub_etcd.headers.copy()
         if etcd_index is not None:
             headers["x-etcd-index"] = str(etcd_index)
         resp = MockResponse(status, data, headers)
-        self.response_queue.put(resp)
+        self.response = resp
+        self.on_response_avail()
 
     def respond_with_stream(self, etcd_index, status=200):
         """
         Called from the test to respond with a stream, allowing the test to
         send chunks of data in response.
         """
-        headers = self.headers.copy()
+        headers = self.stub_etcd.headers.copy()
         if etcd_index is not None:
             headers["x-etcd-index"] = str(etcd_index)
-        f = PipeFile()
-        resp = MockResponse(status, f, headers)
-        self.response_queue.put(resp)
-        return f
+        self.pipe_file = PipeFile()
+        resp = MockResponse(status, self.pipe_file, headers)
+        self.response = resp
+        self.response_available.set()  # We leave the req open in StubEtcd.
+        return self.pipe_file
+
+    def get_response(self):
+        if self.response_available.wait(30):
+            return self.response
+        else:
+            raise AssertionError("No response")
+
+    def assert_request(self, expected_key, **expected_args):
+        """
+        Asserts the properies of the next request.
+        """
+        default_args = {'wait_index': None,
+                        'preload_content': None,
+                        'recursive': False,
+                        'timeout': 5}
+        key = self.key
+        args = self.kwargs
+        for k, v in default_args.iteritems():
+            if k in args and args[k] == v:
+                del args[k]
+        if expected_key != key:
+            raise AssertionError("Expected request for %s but got %s" %
+                                 (expected_key, key))
+        if expected_args != args:
+            raise AssertionError("Expected request args %s for %s but got %s" %
+                                 (expected_args, key, args))
+
+    def stop(self):
+        if self.response_available.is_set():
+            if self.pipe_file:
+                self.pipe_file.write(SystemExit())
+        else:
+            self.respond_with_exception(SystemExit())
+
+
+class StubEtcd(object):
+    """
+    A fake connection to etcd.  We hook the driver's _issue_etcd_request
+    method and block the relevant thread until the test calls one of the
+    respond_... methods.
+    """
+    def __init__(self):
+        self.request_queue = Queue()
+        self.response_queue = Queue()
+        self.headers = {
+            "x-etcd-cluster-id": "abcdefg"
+        }
+        self.lock = threading.Lock()
+        self.open_reqs = set()
+
+    def request(self, key, **kwargs):
+        """
+        Called from the driver to make a request.  Blocks until the
+        test thread sends a response.
+        """
+        _log.info("New request on thread %s: %s, %s",
+                  threading.current_thread(),
+                  key, kwargs)
+        request = StubRequest(self, key, kwargs)
+        with self.lock:
+            self.open_reqs.add(request)
+            rq = self.request_queue
+            if rq is None:
+                _log.warn("Request after shutdown: %s, %s", key, kwargs)
+                raise SystemExit()
+            else:
+                rq.put(request)
+        response = request.get_response()
+        if isinstance(response, BaseException):
+            raise response
+        else:
+            return response
+
+    def get_next_request(self):
+        """
+        Called from the test to get the next request from the driver.
+        """
+        _log.info("Waiting for next request")
+        req = self.request_queue.get(timeout=1)
+        _log.info("Got request %s", req)
+        return req
+
+    def assert_request(self, expected_key, **expected_args):
+        """
+        Asserts the properies of the next request.
+        """
+        req = self.request_queue.get(timeout=1)
+        req.assert_request(expected_key, **expected_args)
+        return req
+
+    def on_req_closed(self, req):
+        with self.lock:
+            self.open_reqs.remove(req)
+
+    def stop(self):
+        _log.info("Stopping stub etcd")
+        with self.lock:
+            _log.info("stop() got rq_lock")
+            while True:
+                try:
+                    req = self.request_queue.get_nowait()
+                except Empty:
+                    break
+                else:
+                    self.open_reqs.add(req)
+            self.request_queue = None
+        for req in list(self.open_reqs):
+            _log.info("Aborting request %s", req)
+            req.stop()
+        _log.info("Stub etcd stopped; future requests should self-abort")
 
 
 class MockResponse(object):
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 984e4a4a4c..a58e45da96 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -19,12 +19,13 @@
 Tests for the etcd driver module.
 """
 import json
+import traceback
 from Queue import Empty
 
-import time
 from StringIO import StringIO
 from unittest import TestCase
 
+import sys
 from mock import Mock, patch, call
 from urllib3 import HTTPConnectionPool
 from urllib3.exceptions import TimeoutError, HTTPError
@@ -80,10 +81,10 @@ def test_mainline_resync(self):
         # Initial handshake.
         self.start_driver_and_handshake()
         # Check for etcd request and start the response.
-        snap_stream = self.start_snapshot_response()
+        snap_stream, watcher_req = self.start_snapshot_response()
         # Respond to the watcher, this should get merged into the event
         # stream at some point later.
-        self.watcher_etcd.respond_with_value(
+        watcher_req.respond_with_value(
             "/calico/v1/adir/bkey",
             "b",
             mod_index=12,
@@ -92,7 +93,7 @@ def test_mainline_resync(self):
         # Wait until the watcher makes its next request (with revved
         # wait_index) to make sure it has queued its event to the resync
         # thread.
-        self.watcher_etcd.assert_request(
+        watcher_req = self.watcher_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=90, wait_index=13
         )
         # Write some more data to the resync thread, it should process that
@@ -113,7 +114,7 @@ def test_mainline_resync(self):
             MSG_KEY_VALUE: "b",
         })
         # Respond to the watcher with another event.
-        self.watcher_etcd.respond_with_value(
+        watcher_req.respond_with_value(
             "/calico/v1/adir2/dkey",
             "d",
             mod_index=13,
@@ -122,7 +123,7 @@ def test_mainline_resync(self):
         # Wait until the watcher makes its next request (with revved
         # wait_index) to make sure it has queued its event to the resync
         # thread.
-        self.watcher_etcd.assert_request(
+        watcher_req = self.watcher_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=90, wait_index=14
         )
         # Send the resync thread some data that should be ignored due to the
@@ -156,7 +157,7 @@ def test_mainline_resync(self):
         # HWM.
         self.assert_status_message(STATUS_IN_SYNC)
         # Now send a watcher event, which should go straight through.
-        self.send_watcher_event_and_assert_felix_msg(14)
+        self.send_watcher_event_and_assert_felix_msg(14, req=watcher_req)
 
         # Check the contents of the trie.
         keys = set(self.driver._hwms._hwms.keys())
@@ -167,10 +168,9 @@ def test_mainline_resync(self):
                                     u'/calico/v1/adir2/dkey/',
                                     u'/calico/v1/adir/ekey/']))
 
-
     def test_many_events_during_resync(self):
         """
-        Test of the mainline resync-and-merge processing.
+        Test many events during resync
 
         * Does the initial config handshake with Felix.
         * Interleaves the snapshot response with updates via the watcher.
@@ -180,18 +180,18 @@ def test_many_events_during_resync(self):
         self.start_driver_and_handshake()
 
         # Check for etcd request and start the response.
-        snap_stream = self.start_snapshot_response()
+        snap_stream, watcher_req = self.start_snapshot_response()
 
         # Respond to the watcher, this should get merged into the event
         # stream at some point later.
         for ii in xrange(200):
-            self.watcher_etcd.respond_with_value(
+            watcher_req.respond_with_value(
                 "/calico/v1/adir/bkey",
                 "watch",
                 mod_index=11 + ii,
                 action="set"
             )
-            self.watcher_etcd.assert_request(
+            watcher_req = self.watcher_etcd.assert_request(
                 VERSION_DIR, recursive=True, timeout=90, wait_index=12 + ii
             )
         snap_stream.write('''
@@ -225,39 +225,99 @@ def test_many_events_during_resync(self):
     def test_felix_triggers_resync(self):
         self._run_initial_resync()
 
-        # Send a resync request from Felix.
-        self.send_resync_and_wait_for_flag()
-
         # Wait for the watcher to make its request.
-        self.watcher_etcd.assert_request(
+        watcher_req = self.watcher_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=90, wait_index=15
         )
-        # Then for determinism, force it to die before it polls again.
-        self.driver._watcher_stop_event.set()
-        # The event from the watcher triggers the resync.
-        self.send_watcher_event_and_assert_felix_msg(15)
 
-        # Back into wait-for-ready mode.
+        # Take a copy of the watcher stop event so that we don't race to read
+        # it.
+        watcher_stop_event = self.driver._watcher_stop_event
+
+        # Send a resync request from Felix.
+        self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
+
+        # Respond to the watcher, this should trigger the resync.
+        watcher_req.respond_with_value(
+            "/calico/v1/adir/ekey",
+            "e",
+            mod_index=15,
+            action="set"
+        )
+
+        # Resync thread should tell the watcher to die.
+        watcher_stop_event.wait(timeout=1)
+
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ekey",
+            MSG_KEY_VALUE: "e",
+        })
+        self.assert_flush_to_felix()
         self.assert_status_message(STATUS_WAIT_FOR_READY)
+
         # Re-do the config handshake.
         self.do_handshake()
 
-        # Check for etcd request and start the response.
-        snap_stream = self.start_snapshot_response(etcd_index=100)
+        # We should get a request to load the full snapshot.
+        watcher_req = self.resync_etcd.assert_request(
+            VERSION_DIR, recursive=True, timeout=120, preload_content=False
+        )
+        snap_stream = watcher_req.respond_with_stream(
+            etcd_index=100
+        )
+
+        # There could be more than one watcher now so we need to be careful
+        # to respond to the right one...
+        watcher_req = self.watcher_etcd.get_next_request()
+        if watcher_req.kwargs["wait_index"] == 16:
+            # Old watcher thread
+            watcher_req.respond_with_value("/calico/v1/adir/ekey", "e",
+                                           mod_index=99)
+            watcher_req = self.watcher_etcd.get_next_request()
+        # watcher_req should be from the new watcher thread
+        self.assertEqual(watcher_req.kwargs["wait_index"], 101)
+
+        # Start sending the snapshot response:
+        snap_stream.write('''{
+            "action": "get",
+            "node": {
+                "key": "/calico/v1",
+                "dir": true,
+                "nodes": [
+                {
+                    "key": "/calico/v1/adir",
+                    "dir": true,
+                    "nodes": [
+                    {
+                        "key": "/calico/v1/adir/akey",
+                        "value": "akey's value",
+                        "modifiedIndex": 98
+                    },
+        ''')
+        # Should generate a message to felix even though it's only seen part
+        # of the response...
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/akey",
+            MSG_KEY_VALUE: "akey's value",
+        })
+
         # Respond to the watcher, this should get merged into the event
         # stream at some point later.
-        self.watcher_etcd.respond_with_value(
+        watcher_req.respond_with_value(
             "/calico/v1/adir/bkey",
             "b",
             mod_index=102,
             action="set"
         )
+
         # Wait until the watcher makes its next request (with revved
         # wait_index) to make sure it has queued its event to the resync
-        # thread.
-        self.watcher_etcd.assert_request(
-            VERSION_DIR, recursive=True, timeout=90, wait_index=103
-        )
+        # thread.  Skip any events fro the old watcher.
+        watcher_req = self.watcher_etcd.get_next_request()
+        if watcher_req.kwargs["wait_index"] in (16, 100):
+            watcher_req = self.watcher_etcd.get_next_request()
+        self.assertFalse(watcher_req.kwargs["wait_index"] in (16, 100))
+
         # Write some data for an unchanged key to the resync thread, which
         # should be ignored.
         snap_stream.write('''
@@ -299,28 +359,18 @@ def test_felix_triggers_resync(self):
         # HWM.
         self.assert_status_message(STATUS_IN_SYNC)
         # Now send a watcher event, which should go straight through.
-        self.send_watcher_event_and_assert_felix_msg(104)
-
-    def send_resync_and_wait_for_flag(self):
-        # Felix sends a resync message.
-        self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
-
-        # For determinism, wait for the message to be processed.
-        for _ in xrange(100):
-            if self.driver._resync_requested:
-                break
-            time.sleep(0.01)
-        else:
-            self.fail("Resync flag never got set.")
+        self.send_watcher_event_and_assert_felix_msg(104, req=watcher_req)
 
     def test_directory_deletion(self):
         self._run_initial_resync()
         # For coverage: Nothing happens for a while, poll times out.
-        self.watcher_etcd.respond_with_exception(
+        watcher_req = self.watcher_etcd.get_next_request()
+        watcher_req.respond_with_exception(
             driver.ReadTimeoutError(Mock(), "", "")
         )
         # For coverage: Then a set to a dir, which should be ignored.
-        self.watcher_etcd.respond_with_data(json.dumps({
+        watcher_req = self.watcher_etcd.get_next_request()
+        watcher_req.respond_with_data(json.dumps({
             "action": "create",
             "node": {
                 "key": "/calico/v1/foo",
@@ -328,7 +378,8 @@ def test_directory_deletion(self):
             }
         }), 100, 200)
         # Then a whole directory is deleted.
-        self.watcher_etcd.respond_with_value(
+        watcher_req = self.watcher_etcd.get_next_request()
+        watcher_req.respond_with_value(
             "/calico/v1/adir",
             dir=True,
             value=None,
@@ -364,7 +415,7 @@ def test_directory_deletion(self):
     def _run_initial_resync(self):
         try:
             # Start by going through the first resync.
-            self.test_mainline_resync()
+            self.test_mainline_resync()  # Returns open watcher req.
         except AssertionError:
             _log.exception("Mainline resync test failed, aborting test %s",
                            self.id())
@@ -374,7 +425,8 @@ def _run_initial_resync(self):
     def test_root_directory_deletion(self):
         self._run_initial_resync()
         # Delete the whole /calico/v1 dir.
-        self.watcher_etcd.respond_with_data(json.dumps({
+        watcher_req = self.watcher_etcd.get_next_request()
+        watcher_req.respond_with_data(json.dumps({
             "action": "delete",
             "node": {
                 "key": "/calico/v1/",
@@ -388,13 +440,16 @@ def test_root_directory_deletion(self):
     def test_garbage_watcher_response(self):
         self._run_initial_resync()
         # Delete the whole /calico/v1 dir.
-        self.watcher_etcd.respond_with_data("{foobar", 100, 200)
+        watcher_req = self.watcher_etcd.get_next_request()
+        watcher_req.respond_with_data("{foobar", 100, 200)
 
         # Should trigger a resync.
         self.assert_status_message(STATUS_WAIT_FOR_READY)
 
-    def send_watcher_event_and_assert_felix_msg(self, etcd_index):
-        self.watcher_etcd.respond_with_value(
+    def send_watcher_event_and_assert_felix_msg(self, etcd_index, req=None):
+        if req is None:
+            req = self.watcher_etcd.get_next_request()
+        req.respond_with_value(
             "/calico/v1/adir/ekey",
             "e",
             mod_index=etcd_index,
@@ -434,7 +489,7 @@ def test_resync_etcd_read_fail(self, m_sleep):
         # Initial handshake.
         self.start_driver_and_handshake()
         # Start streaming some data.
-        snap_stream = self.start_snapshot_response()
+        snap_stream, watcher_req = self.start_snapshot_response()
         # But then the read times out...
         snap_stream.write(TimeoutError())
         # Triggering a restart of the resync loop.
@@ -444,8 +499,8 @@ def test_resync_etcd_read_fail(self, m_sleep):
     def test_bad_ready_key_retry(self, m_sleep):
         self.start_driver_and_init()
         # Respond to etcd request with a bad response
-        self.resync_etcd.assert_request(READY_KEY)
-        self.resync_etcd.respond_with_data("foobar", 123, 500)
+        req = self.resync_etcd.assert_request(READY_KEY)
+        req.respond_with_data("foobar", 123, 500)
         # Then it should retry.
         self.resync_etcd.assert_request(READY_KEY)
         m_sleep.assert_called_once_with(1)
@@ -464,18 +519,19 @@ def start_driver_and_handshake(self):
 
     def do_handshake(self):
         # Respond to etcd request with ready == true.
-        self.resync_etcd.assert_request(READY_KEY)
-        self.resync_etcd.respond_with_value(READY_KEY, "true", mod_index=10)
+        req = self.resync_etcd.assert_request(READY_KEY)
+        req.respond_with_value(READY_KEY, "true", mod_index=10)
         # Then etcd should get the global config request.
-        self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
-        self.resync_etcd.respond_with_dir(CONFIG_DIR, {
+        req = self.resync_etcd.assert_request(CONFIG_DIR, recursive=True)
+        req.respond_with_dir(CONFIG_DIR, {
             CONFIG_DIR + "/InterfacePrefix": "tap",
             CONFIG_DIR + "/Foo": None,  # Directory
         })
         # Followed by the per-host one...
-        self.resync_etcd.assert_request("/calico/v1/host/thehostname/config",
-                                        recursive=True)
-        self.resync_etcd.respond_with_data('{"errorCode": 100}',
+        req = self.resync_etcd.assert_request(
+            "/calico/v1/host/thehostname/config", recursive=True
+        )
+        req.respond_with_data('{"errorCode": 100}',
                                            10, 404)
         # Then the driver should send the config to Felix.
         self.assert_msg_to_felix(
@@ -501,16 +557,16 @@ def do_handshake(self):
 
     def start_snapshot_response(self, etcd_index=10):
         # We should get a request to load the full snapshot.
-        self.resync_etcd.assert_request(
+        req = self.resync_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=120, preload_content=False
         )
-        snap_stream = self.resync_etcd.respond_with_stream(
+        snap_stream = req.respond_with_stream(
             etcd_index=etcd_index
         )
         # And then the headers should trigger a request from the watcher
         # including the etcd_index we sent even though we haven't sent a
         # response body to the resync thread.
-        self.watcher_etcd.assert_request(
+        req = self.watcher_etcd.assert_request(
             VERSION_DIR, recursive=True, timeout=90, wait_index=etcd_index+1
         )
         # Start sending the snapshot response:
@@ -536,7 +592,7 @@ def start_snapshot_response(self, etcd_index=10):
             MSG_KEY_KEY: "/calico/v1/adir/akey",
             MSG_KEY_VALUE: "akey's value",
         })
-        return snap_stream
+        return snap_stream, req
 
     def assert_status_message(self, status):
         _log.info("Expecting %s status from driver...", status)
@@ -557,7 +613,7 @@ def send_init_msg(self):
 
     def assert_msg_to_felix(self, msg_type, fields=None):
         try:
-            mt, fs = self.msg_writer.queue.get(timeout=2)
+            mt, fs = self.msg_writer.next_msg()
         except Empty:
             self.fail("Expected %s message to felix but no message was sent" %
                       msg_type)
@@ -609,10 +665,12 @@ def tearDown(self):
             self.msg_reader.send_timeout()
 
             # SystemExit kills (only) the thread silently.
-            self.resync_etcd.respond_with_exception(SystemExit())
-            self.watcher_etcd.respond_with_exception(SystemExit())
+            self.resync_etcd.stop()
+            self.watcher_etcd.stop()
             # Wait for it to stop.
-            self.assertTrue(self.driver.join(0.1), "Driver failed to stop")
+            if not self.driver.join(1):
+                dump_all_thread_stacks()
+                self.fail("Driver failed to stop")
         finally:
             # Now the driver is stopped, it's safe to remove our patch of
             # complete_logging()
@@ -827,3 +885,16 @@ def test_process_events_stopped(self):
         self.driver._process_events_only()
 
 
+def dump_all_thread_stacks():
+    print >> sys.stderr, "\n*** STACKTRACE - START ***\n"
+    code = []
+    for threadId, stack in sys._current_frames().items():
+        code.append("\n# ThreadID: %s" % threadId)
+        for filename, lineno, name, line in traceback.extract_stack(stack):
+            code.append('File: "%s", line %d, in %s' % (filename,
+                                                        lineno, name))
+            if line:
+                code.append("  %s" % (line.strip()))
+    for line in code:
+        print >> sys.stderr, line
+    print >> sys.stderr, "\n*** STACKTRACE - END ***\n"
\ No newline at end of file

From d61de4a3aef32ea006f9bc9ccc30ffc0c7b981ec Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 12 Nov 2015 09:37:19 +0000
Subject: [PATCH 96/98] Code review markups for concurrent resync function.

---
 calico/etcddriver/driver.py           | 54 +++++++++++++++++++--------
 calico/etcddriver/hwm.py              | 39 ++++++++++++-------
 calico/etcddriver/protocol.py         |  4 ++
 calico/etcddriver/test/stubs.py       |  3 +-
 calico/etcddriver/test/test_driver.py |  6 ++-
 calico/felix/felix.py                 |  2 +-
 calico/felix/fetcd.py                 | 22 ++++++-----
 calico/felix/test/test_fetcd.py       |  1 +
 8 files changed, 88 insertions(+), 43 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 3760be2bfe..114b5077c7 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -151,19 +151,20 @@ def stop(self):
 
     def _read_from_socket(self):
         """
-        Thread: reader thread.  Reads messages from Felix.
-
-        So far, this means reading the init message and then dealing
-        with the exception if Felix dies.
+        Thread: reader thread.  Reads messages from Felix and fans them out.
         """
         try:
             while not self._stop_event.is_set():
                 for msg_type, msg in self._msg_reader.new_messages(timeout=1):
                     if msg_type == MSG_TYPE_INIT:
+                        # Init message, received at start of day.
                         self._handle_init(msg)
                     elif msg_type == MSG_TYPE_CONFIG:
+                        # Config message, expected after we send the raw
+                        # config to Felix.
                         self._handle_config(msg)
                     elif msg_type == MSG_TYPE_RESYNC:
+                        # Request to do a resync.
                         self._handle_resync(msg)
                     else:
                         _log.error("Unexpected message from Felix: %s", msg)
@@ -227,8 +228,7 @@ def _resync_and_merge(self):
                 self._send_status(STATUS_WAIT_FOR_READY)
                 self._wait_for_ready()
                 self._preload_config()
-                # Now (on the first run through) wait for Felix to process the
-                # config.
+                # Wait for config if we have not already received it.
                 self._wait_for_config()
                 # Kick off the snapshot request as far as the headers.
                 self._send_status(STATUS_RESYNC)
@@ -252,7 +252,7 @@ def _resync_and_merge(self):
                     socket.error) as e:
                 _log.error("Request to etcd failed: %r; resyncing.", e)
                 if monotonic_time() - loop_start < 1:
-                    _log.debug("May be tight looping, sleeping...")
+                    _log.warning("May be tight looping, sleeping...")
                     time.sleep(1)
             except DriverShutdown:
                 _log.info("Driver shut down.")
@@ -268,8 +268,7 @@ def _resync_and_merge(self):
     def _wait_for_config(self):
         while not self._config_received.is_set():
             _log.info("Waiting for Felix to process the config...")
-            if self._stop_event.is_set():
-                raise DriverShutdown()
+            self._check_stop_event()
             self._config_received.wait(1)
             _log.info("Felix sent us the config, continuing.")
 
@@ -292,6 +291,12 @@ def _wait_for_ready(self):
             else:
                 _log.info("Ready flag set to %s", etcd_resp["node"]["value"])
                 self._hwms.update_hwm(READY_KEY, mod_idx)
+        self._check_stop_event()
+
+    def _check_stop_event(self):
+        if self._stop_event.is_set():
+            _log.info("Told to stop, raising DriverShutdown.")
+            raise DriverShutdown()
 
     def _preload_config(self):
         """
@@ -447,16 +452,29 @@ def _process_snapshot_and_events(self, etcd_response, snapshot_index):
 
     def _handle_etcd_node(self, snap_mod, snap_key, snap_value,
                           snapshot_index=None):
+        """
+        Callback for use with parse_snapshot.  Called once for each key/value
+        pair that is found.
+
+        Handles the key/value itself and then checks for work from the
+        watcher.
+
+        :param snap_mod: Modified index of the key.
+        :param snap_key: The key itself.
+        :param snap_value: The value attached to the key.
+        :param snapshot_index: Index of the snapshot as a whole.
+        """
         assert snapshot_index is not None
         old_hwm = self._hwms.update_hwm(snap_key, snapshot_index)
         if snap_mod > old_hwm:
             # This specific key's HWM is newer than the previous
             # version we've seen, send an update.
             self._on_key_updated(snap_key, snap_value)
-        # After we process an update from the snapshot, process
-        # several updates from the watcher queue (if there are
-        # any).  We limit the number to ensure that we always
-        # finish the snapshot eventually.
+        # After we process an update from the snapshot, process several
+        # updates from the watcher queue (if there are any).  We limit the
+        # number to ensure that we always finish the snapshot eventually.
+        # The limit isn't too sensitive but values much lower than 100 seemed
+        # to starve the watcher in testing.
         for _ in xrange(100):
             if not self._watcher_queue or self._watcher_queue.empty():
                 # Don't block on the watcher if there's nothing to do.
@@ -469,9 +487,7 @@ def _handle_etcd_node(self, snap_mod, snap_key, snap_value,
                 _log.warning("Watcher thread died, continuing "
                              "with snapshot")
                 break
-        if self._stop_event.is_set():
-            _log.error("Stop event set, exiting")
-            raise DriverShutdown()
+        self._check_stop_event()
 
     def _process_events_only(self):
         """
@@ -485,6 +501,7 @@ def _process_events_only(self):
         while not self._stop_event.is_set():
             self._handle_next_watcher_event(resync_in_progress=False)
             self._msg_writer.flush()
+        self._check_stop_event()
 
     def _scan_for_deletions(self, snapshot_index):
         """
@@ -552,6 +569,8 @@ def _start_watcher(self, snapshot_index):
         """
         Starts the watcher thread, creating its queue and event in the process.
         """
+        # Defensive: stop the watcher if it's already running.
+        self._stop_watcher()
         self._watcher_queue = Queue()
         self._watcher_stop_event = Event()
         # Note: we pass the queue and event in as arguments so that the thread
@@ -590,6 +609,9 @@ def _on_key_updated(self, key, value):
                deletion).
         """
         if key == READY_KEY and value != "true":
+            # Special case: the global Ready flag has been unset, trigger a
+            # resync, which will poll the Ready flag until it is set to true
+            # again.
             _log.warning("Ready key no longer set to true, triggering resync.")
             raise ResyncRequired()
         self._msg_writer.send_message(
diff --git a/calico/etcddriver/hwm.py b/calico/etcddriver/hwm.py
index 5d3d8ecf2d..21da89bce2 100644
--- a/calico/etcddriver/hwm.py
+++ b/calico/etcddriver/hwm.py
@@ -30,8 +30,14 @@
 
 _log = logging.getLogger(__name__)
 
+# The trie implementation that we use requires us to specify the character set
+# in advance...
+# Symbols that are allowed in our etcd keys.
 TRIE_SYMBOLS = "/_-:."
+# Chars we allow in the trie.  In addition to alphanumerics and our
+# white-listed symbols, we also use % for %-encoding of unexpected symbols.
 TRIE_CHARS = string.ascii_letters + string.digits + TRIE_SYMBOLS + "%"
+# Regex that matches chars that are allowed in the trie.
 TRIE_CHARS_MATCH = re.compile(r'^[%s]+$' % re.escape(TRIE_CHARS))
 
 
@@ -100,7 +106,7 @@ def stop_tracking_deletions(self):
         self._deletion_hwms = None
         self._latest_deletion = None
 
-    def update_hwm(self, key, hwm):
+    def update_hwm(self, key, new_mod_idx):
         """
         Updates the HWM for a key if the new value is greater than the old.
         If deletion tracking is enabled, resolves deletions so that updates
@@ -110,29 +116,29 @@ def update_hwm(self, key, hwm):
         :return int|NoneType: the old HWM of the key (or the HWM at which it
                 was deleted) or None if it did not previously exist.
         """
-        _log.debug("Updating HWM for %s to %s", key, hwm)
+        _log.debug("Updating HWM for %s to %s", key, new_mod_idx)
         key = encode_key(key)
         if (self._deletion_hwms is not None and
                 # Optimization: avoid expensive lookup if this update comes
                 # after all deletions.
-                hwm < self._latest_deletion):
+                new_mod_idx < self._latest_deletion):
             # We're tracking deletions, check that this key hasn't been
             # deleted.
             del_hwm = self._deletion_hwms.longest_prefix_value(key, None)
-            if hwm < del_hwm:
+            if new_mod_idx < del_hwm:
                 _log.debug("Key %s previously deleted, skipping", key)
                 return del_hwm
         try:
             old_hwm = self._hwms[key]  # Trie doesn't have get().
         except KeyError:
             old_hwm = None
-        if old_hwm < hwm:  # Works for None too.
+        if old_hwm < new_mod_idx:  # Works for None too.
             _log.debug("Key %s HWM updated to %s, previous %s",
-                       key, hwm, old_hwm)
-            self._hwms[key] = hwm
+                       key, new_mod_idx, old_hwm)
+            self._hwms[key] = new_mod_idx
         return old_hwm
 
-    def store_deletion(self, key, hwm):
+    def store_deletion(self, key, deletion_mod_idx):
         """
         Store that a given key (or directory) was deleted at a given HWM.
         :return: List of known keys that were deleted.  This will be the
@@ -140,10 +146,10 @@ def store_deletion(self, key, hwm):
         """
         _log.debug("Key %s deleted", key)
         key = encode_key(key)
-        self._latest_deletion = max(hwm, self._latest_deletion)
+        self._latest_deletion = max(deletion_mod_idx, self._latest_deletion)
         if self._deletion_hwms is not None:
             _log.debug("Tracking deletion in deletions trie")
-            self._deletion_hwms[key] = hwm
+            self._deletion_hwms[key] = deletion_mod_idx
         deleted_keys = []
         for child_key, child_mod in self._hwms.items(key):
             del self._hwms[child_key]
@@ -193,10 +199,15 @@ def encode_key(key):
     here than to blow up.
     """
     if key[-1] != "/":
-        key += "/"
-    key = unicode(urllib.quote(key.encode("utf8"), safe=TRIE_SYMBOLS))
-    assert TRIE_CHARS_MATCH.match(key)
-    return key
+        suffixed_key = key + "/"
+    else:
+        suffixed_key = key
+    encoded_key = unicode(urllib.quote(suffixed_key.encode("utf8"),
+                                       safe=TRIE_SYMBOLS))
+    assert TRIE_CHARS_MATCH.match(encoded_key), (
+        "Key %r encoded to %r contained invalid chars" % (key, encoded_key)
+    )
+    return encoded_key
 
 
 def decode_key(key):
diff --git a/calico/etcddriver/protocol.py b/calico/etcddriver/protocol.py
index c2204c478c..5fa5a80b3e 100644
--- a/calico/etcddriver/protocol.py
+++ b/calico/etcddriver/protocol.py
@@ -138,10 +138,14 @@ def new_messages(self, timeout=1):
         Generator: generates 0 or more tuples containing message type and
         message body (as a dict).
 
+        May generate 0 events in certain conditions even if there are
+        events available.  (If the socket returns EAGAIN, for example.)
+
         :param timeout: Maximum time to block waiting on the socket before
                giving up.  No exception is raised upon timeout but 0 events
                are generated.
         :raises SocketClosed if the socket is closed.
+        :raises socket.error if an unexpected socket error occurs.
         """
         if timeout is not None:
             read_ready, _, _ = select.select([self._sck], [], [], timeout)
diff --git a/calico/etcddriver/test/stubs.py b/calico/etcddriver/test/stubs.py
index ec95a2be2d..f43575e71d 100644
--- a/calico/etcddriver/test/stubs.py
+++ b/calico/etcddriver/test/stubs.py
@@ -235,7 +235,8 @@ def respond_with_stream(self, etcd_index, status=200):
         return self.pipe_file
 
     def get_response(self):
-        if self.response_available.wait(30):
+        self.response_available.wait(timeout=30)  # returns None in Python 2.6
+        if self.response_available.is_set():
             return self.response
         else:
             raise AssertionError("No response")
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index a58e45da96..3b7dde2d1e 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -697,6 +697,10 @@ def test_shutdown_before_config(self):
         self.driver._stop_event.set()
         self.assertRaises(DriverShutdown, self.driver._wait_for_config)
 
+    def test_shutdown_before_ready(self):
+        self.driver._stop_event.set()
+        self.assertRaises(DriverShutdown, self.driver._wait_for_ready)
+
     def test_issue_etcd_request_basic_get(self):
         # Initialise the etcd URL.
         self.driver._handle_init({
@@ -882,7 +886,7 @@ def test_join_not_stopped(self):
 
     def test_process_events_stopped(self):
         self.driver._stop_event.set()
-        self.driver._process_events_only()
+        self.assertRaises(DriverShutdown, self.driver._process_events_only)
 
 
 def dump_all_thread_stacks():
diff --git a/calico/felix/felix.py b/calico/felix/felix.py
index 7e109e9c50..13aa1b53c7 100644
--- a/calico/felix/felix.py
+++ b/calico/felix/felix.py
@@ -214,7 +214,7 @@ def main():
 
     try:
         config = Config(options.config_file)
-    except Exception as e:
+    except Exception:
         # Config loading error, and not just invalid parameters (from optparse)
         # as they generate a SystemExit. Attempt to open a log file, ignoring
         # any errors it gets, before we raise the exception.
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index bd90433c96..dac2ee0096 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -239,7 +239,9 @@ def start_watch(self, splitter):
         Starts watching etcd for changes.  Implicitly loads the config
         if it hasn't been loaded yet.
         """
-        self._watcher.load_config.set()
+        assert self._watcher.load_config.is_set(), (
+            "load_config() should be called before start_watch()."
+        )
         self._watcher.splitter = splitter
         self._watcher.begin_polling.set()
 
@@ -271,7 +273,7 @@ class _FelixEtcdWatcher(gevent.Greenlet):
     """
     Greenlet that communicates with the etcd driver over a socket.
 
-    * Does the initial handshake with the driver, sening it the init
+    * Does the initial handshake with the driver, sending it the init
       message.
     * Receives the pre-loaded config from the driver and uses that
       to do Felix's one-off configuration.
@@ -490,14 +492,14 @@ def _on_status_from_driver(self, msg):
         The driver sends us status messages whenever its status changes.
         It moves through these states:
 
-        * wait-for-ready (waiting for the global ready flag to become set)
-        * resync (resyncing with etcd, processing a snapshot and any
-          concurrent events)
-        * in-sync (snapshot processsing complete, now processing only events
-          from etcd)
+        (1) wait-for-ready (waiting for the global ready flag to become set)
+        (2) resync (resyncing with etcd, processing a snapshot and any
+            concurrent events)
+        (3) in-sync (snapshot processsing complete, now processing only events
+            from etcd)
 
-        If it falls out of sync with etcd then it moves back into
-        wait-for-ready state and starts again.
+        If the driver falls out of sync with etcd then it will start again
+        from (1).
 
         If the status is in-sync, triggers the relevant processing.
         """
@@ -789,7 +791,7 @@ def _finish_msg_batch(self, batch, results):
                                           self._cleanup_pending):
             # Schedule a timer to stop our rate limiting or retry cleanup.
             timeout = self._config.ENDPOINT_REPORT_DELAY
-            timeout *= 0.9 + (random.random() * 0.2)  # Jitter by +/- 10%.
+            timeout *= (0.9 + (random.random() * 0.2))  # Jitter by +/- 10%.
             gevent.spawn_later(timeout,
                                self._on_timer_pop,
                                async=True)
diff --git a/calico/felix/test/test_fetcd.py b/calico/felix/test/test_fetcd.py
index 24fb3cd8ba..4ee5279399 100644
--- a/calico/felix/test/test_fetcd.py
+++ b/calico/felix/test/test_fetcd.py
@@ -136,6 +136,7 @@ def test_load_config(self):
 
     def test_start_watch(self):
         m_splitter = Mock()
+        self.api.load_config(async=True)
         result = self.api.start_watch(m_splitter, async=True)
         self.step_actor(self.api)
         self.m_etcd_watcher.load_config.set.assert_called_once_with()

From 2937a7d25e57e46d5b540e2ecbf9272631f67934 Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 12 Nov 2015 17:48:54 +0000
Subject: [PATCH 97/98] Code review markups: bound size of watcher queue.

---
 calico/etcddriver/driver.py | 14 +++++++++++---
 calico/felix/fetcd.py       |  7 ++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index 114b5077c7..cccb42e92c 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -71,6 +71,12 @@
 _log = logging.getLogger(__name__)
 
 
+# Bound on the size of the queue between watcher and resync thread.  In
+# general, Felix and the resync thread process much more quickly than the
+# watcher can read from etcd so this is defensive.
+WATCHER_QUEUE_SIZE = 20000
+
+
 class EtcdDriver(object):
     def __init__(self, felix_sck):
         # Wrap the socket with our protocol reader/writer objects.
@@ -571,7 +577,7 @@ def _start_watcher(self, snapshot_index):
         """
         # Defensive: stop the watcher if it's already running.
         self._stop_watcher()
-        self._watcher_queue = Queue()
+        self._watcher_queue = Queue(maxsize=WATCHER_QUEUE_SIZE)
         self._watcher_stop_event = Event()
         # Note: we pass the queue and event in as arguments so that the thread
         # will always access the current queue and event.  If it used self.xyz
@@ -698,8 +704,10 @@ def watch_etcd(self, next_index, event_queue, stop_event):
                             if key.rstrip("/") in (VERSION_DIR, ROOT_DIR):
                                 # Special case: if the whole keyspace is
                                 # deleted, that implies the ready flag is gone
-                                # too; resync rather than generating deletes
-                                # for every key.
+                                # too.  Break out of the loop to trigger a
+                                # resync.  This avoids queuing up a bunch of
+                                # events that would be discarded by the
+                                # resync thread.
                                 _log.warning("Whole %s deleted, resyncing",
                                              VERSION_DIR)
                                 break
diff --git a/calico/felix/fetcd.py b/calico/felix/fetcd.py
index dac2ee0096..22f1bb55a3 100644
--- a/calico/felix/fetcd.py
+++ b/calico/felix/fetcd.py
@@ -394,9 +394,10 @@ def _dispatch_msg_from_driver(self, msg_type, msg):
             raise RuntimeError("Unexpected message %s" % msg)
         self.msgs_processed += 1
         if self.msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
-            # Yield to ensure that other actors make progress.
-            # Sleep must be non-zero to work around gevent
-            # issue where we could be immediately rescheduled.
+            # Yield to ensure that other actors make progress.  (gevent only
+            # yields for us if the socket would block.)  The sleep must be
+            # non-zero to work around gevent issue where we could be
+            # immediately rescheduled.
             gevent.sleep(0.000001)
 
     def _on_update_from_driver(self, msg):

From 9c4cd35b110f2066ed6e7c93b3ce503f1ee2d30d Mon Sep 17 00:00:00 2001
From: Shaun Crampton <shaun.crampton@metaswitch.com>
Date: Thu, 12 Nov 2015 18:39:18 +0000
Subject: [PATCH 98/98] Code review markups: don't stop the watcher unless we
 have to.

---
 calico/etcddriver/driver.py           | 48 ++++++++++++-------
 calico/etcddriver/test/test_driver.py | 69 ++++++++++++++++++---------
 2 files changed, 79 insertions(+), 38 deletions(-)

diff --git a/calico/etcddriver/driver.py b/calico/etcddriver/driver.py
index cccb42e92c..07278b6a1d 100644
--- a/calico/etcddriver/driver.py
+++ b/calico/etcddriver/driver.py
@@ -36,7 +36,7 @@
 from Queue import Queue, Empty
 import socket
 
-from ijson import IncompleteJSONError, JSONError
+from ijson import JSONError
 
 try:
     # simplejson is a faster drop-in replacement.
@@ -97,6 +97,7 @@ def __init__(self, felix_sck):
         self._resync_thread.daemon = True
         self._watcher_thread = None  # Created on demand
         self._watcher_stop_event = None
+        self._watcher_start_index = None
 
         # High-water mark cache.  Owned by resync thread.
         self._hwms = HighWaterTracker()
@@ -222,10 +223,6 @@ def _resync_and_merge(self):
 
         while not self._stop_event.is_set():
             loop_start = monotonic_time()
-            # Only has an effect if it's running.  Note: stopping the watcher
-            # is async (and may take a long time for its connection to time
-            # out).
-            self._stop_watcher()
             try:
                 # Start with a fresh HTTP pool just in case it got into a bad
                 # state.
@@ -240,7 +237,7 @@ def _resync_and_merge(self):
                 self._send_status(STATUS_RESYNC)
                 resp, snapshot_index = self._start_snapshot_request()
                 # Before reading from the snapshot, start the watcher thread.
-                self._start_watcher(snapshot_index)
+                self._ensure_watcher_running(snapshot_index)
                 # Incrementally process the snapshot, merging in events from
                 # the queue.
                 self._process_snapshot_and_events(resp, snapshot_index)
@@ -253,13 +250,22 @@ def _resync_and_merge(self):
                 self.stop()
             except WatcherDied:
                 _log.warning("Watcher died; resyncing.")
+                self._stop_watcher()  # Clean up the event
             except (urllib3.exceptions.HTTPError,
                     HTTPException,
                     socket.error) as e:
                 _log.error("Request to etcd failed: %r; resyncing.", e)
+                self._stop_watcher()
                 if monotonic_time() - loop_start < 1:
                     _log.warning("May be tight looping, sleeping...")
                     time.sleep(1)
+            except ResyncRequested:
+                _log.info("Resync requested, looping to start a new resync. "
+                          "Leaving watcher running if possible.")
+            except ResyncRequired:
+                _log.warn("Detected inconsistency requiring a full resync, "
+                          "stopping watcher")
+                self._stop_watcher()
             except DriverShutdown:
                 _log.info("Driver shut down.")
                 return
@@ -540,12 +546,9 @@ def _handle_next_watcher_event(self, resync_in_progress):
         while not self._stop_event.is_set():
             # To make sure we always make progress, only trigger a new resync
             # if we're not in the middle of one.
-            if (not resync_in_progress and
-                    self._resync_requested and
-                    self._watcher_stop_event):
+            if not resync_in_progress and self._resync_requested:
                 _log.info("Resync requested, triggering one.")
-                self._watcher_stop_event.set()
-                raise WatcherDied()
+                raise ResyncRequested()
             try:
                 event = self._watcher_queue.get(timeout=1)
             except Empty:
@@ -571,12 +574,21 @@ def _handle_next_watcher_event(self, resync_in_progress):
             for child_key in deleted_keys:
                 self._on_key_updated(child_key, None)
 
-    def _start_watcher(self, snapshot_index):
+    def _ensure_watcher_running(self, snapshot_index):
         """
-        Starts the watcher thread, creating its queue and event in the process.
+        Starts a new watcher from the given snapshot index, if needed.
         """
-        # Defensive: stop the watcher if it's already running.
-        self._stop_watcher()
+        if (self._watcher_thread is not None and
+                self._watcher_thread.is_alive() and
+                self._watcher_stop_event is not None and
+                not self._watcher_stop_event.is_set() and
+                self._watcher_queue is not None and
+                self._watcher_start_index <= snapshot_index):
+            _log.info("Watcher is still alive and started from a valid index, "
+                      "leaving it running")
+            return
+
+        self._watcher_start_index = snapshot_index
         self._watcher_queue = Queue(maxsize=WATCHER_QUEUE_SIZE)
         self._watcher_stop_event = Event()
         # Note: we pass the queue and event in as arguments so that the thread
@@ -665,7 +677,7 @@ def watch_etcd(self, next_index, event_queue, stop_event):
         _log.info("Watcher thread started")
         http = None
         try:
-            while not stop_event.is_set():
+            while not self._stop_event.is_set() and not stop_event.is_set():
                 if not http:
                     _log.info("No HTTP pool, creating one...")
                     http = self.get_etcd_connection()
@@ -819,3 +831,7 @@ class DriverShutdown(Exception):
 
 class ResyncRequired(Exception):
     pass
+
+
+class ResyncRequested(Exception):
+    pass
diff --git a/calico/etcddriver/test/test_driver.py b/calico/etcddriver/test/test_driver.py
index 3b7dde2d1e..337c9632fe 100644
--- a/calico/etcddriver/test/test_driver.py
+++ b/calico/etcddriver/test/test_driver.py
@@ -19,6 +19,7 @@
 Tests for the etcd driver module.
 """
 import json
+import threading
 import traceback
 from Queue import Empty
 
@@ -168,6 +169,23 @@ def test_mainline_resync(self):
                                     u'/calico/v1/adir2/dkey/',
                                     u'/calico/v1/adir/ekey/']))
 
+    def test_bad_data_triggers_resync(self):
+        # Initial handshake.
+        self.start_driver_and_handshake()
+        # Check for etcd request and start the response.
+        snap_stream, watcher_req = self.start_snapshot_response()
+        # Write some garbage to the stream, should trigger a resync.
+        watcher_stop_event = self.driver._watcher_stop_event
+        snap_stream.write('''
+                     {
+                         "key
+        ''')
+        snap_stream.write("")
+
+        watcher_stop_event.wait(1)
+        self.assertTrue(watcher_stop_event.is_set())
+        self.assert_status_message(STATUS_WAIT_FOR_READY)
+
     def test_many_events_during_resync(self):
         """
         Test many events during resync
@@ -230,10 +248,6 @@ def test_felix_triggers_resync(self):
             VERSION_DIR, recursive=True, timeout=90, wait_index=15
         )
 
-        # Take a copy of the watcher stop event so that we don't race to read
-        # it.
-        watcher_stop_event = self.driver._watcher_stop_event
-
         # Send a resync request from Felix.
         self.msg_reader.send_msg(MSG_TYPE_RESYNC, {})
 
@@ -244,15 +258,12 @@ def test_felix_triggers_resync(self):
             mod_index=15,
             action="set"
         )
-
-        # Resync thread should tell the watcher to die.
-        watcher_stop_event.wait(timeout=1)
-
         self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
             MSG_KEY_KEY: "/calico/v1/adir/ekey",
             MSG_KEY_VALUE: "e",
         })
         self.assert_flush_to_felix()
+
         self.assert_status_message(STATUS_WAIT_FOR_READY)
 
         # Re-do the config handshake.
@@ -266,16 +277,19 @@ def test_felix_triggers_resync(self):
             etcd_index=100
         )
 
-        # There could be more than one watcher now so we need to be careful
-        # to respond to the right one...
-        watcher_req = self.watcher_etcd.get_next_request()
-        if watcher_req.kwargs["wait_index"] == 16:
-            # Old watcher thread
-            watcher_req.respond_with_value("/calico/v1/adir/ekey", "e",
-                                           mod_index=99)
-            watcher_req = self.watcher_etcd.get_next_request()
-        # watcher_req should be from the new watcher thread
-        self.assertEqual(watcher_req.kwargs["wait_index"], 101)
+        watcher_req = self.watcher_etcd.assert_request(VERSION_DIR,
+                                                       wait_index=16,
+                                                       recursive=True,
+                                                       timeout=90)
+        watcher_req.respond_with_value("/calico/v1/adir/ekey", "e",
+                                       mod_index=50, action="set")
+
+        # Wait for next watcher event to make sure it has queued its request to
+        # the resync thread.
+        watcher_req = self.watcher_etcd.assert_request(VERSION_DIR,
+                                                       wait_index=51,
+                                                       recursive=True,
+                                                       timeout=90)
 
         # Start sending the snapshot response:
         snap_stream.write('''{
@@ -300,6 +314,10 @@ def test_felix_triggers_resync(self):
             MSG_KEY_KEY: "/calico/v1/adir/akey",
             MSG_KEY_VALUE: "akey's value",
         })
+        self.assert_msg_to_felix(MSG_TYPE_UPDATE, {
+            MSG_KEY_KEY: "/calico/v1/adir/ekey",
+            MSG_KEY_VALUE: "e",
+        })
 
         # Respond to the watcher, this should get merged into the event
         # stream at some point later.
@@ -313,10 +331,10 @@ def test_felix_triggers_resync(self):
         # Wait until the watcher makes its next request (with revved
         # wait_index) to make sure it has queued its event to the resync
         # thread.  Skip any events fro the old watcher.
-        watcher_req = self.watcher_etcd.get_next_request()
-        if watcher_req.kwargs["wait_index"] in (16, 100):
-            watcher_req = self.watcher_etcd.get_next_request()
-        self.assertFalse(watcher_req.kwargs["wait_index"] in (16, 100))
+        watcher_req = self.watcher_etcd.assert_request(VERSION_DIR,
+                                                       wait_index=103,
+                                                       recursive=True,
+                                                       timeout=90)
 
         # Write some data for an unchanged key to the resync thread, which
         # should be ignored.
@@ -888,6 +906,13 @@ def test_process_events_stopped(self):
         self.driver._stop_event.set()
         self.assertRaises(DriverShutdown, self.driver._process_events_only)
 
+    def test_watch_etcd_already_stopped(self):
+        stop_event = threading.Event()
+        stop_event.set()
+        m_queue = Mock()
+        self.driver.watch_etcd(10, m_queue, stop_event)
+        self.assertEqual(m_queue.put.mock_calls, [call(None)])
+
 
 def dump_all_thread_stacks():
     print >> sys.stderr, "\n*** STACKTRACE - START ***\n"