Skip to content

Commit

Permalink
Don't fail under load
Browse files Browse the repository at this point in the history
Allow cleanup code in _claim_block_affinity to "fail" if it's safe to do so.

Fixes projectcalico#66
  • Loading branch information
tomdee committed Jan 5, 2016
1 parent 65aa946 commit fe34533
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
9 changes: 7 additions & 2 deletions calico_containers/pycalico/ipam.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,16 @@ def _claim_block_affinity(self, host, block_cidr):
return

# Some other host beat us to claiming this block. Clean up.
self.etcd_client.delete(key)
try:
self.etcd_client.delete(key)
except EtcdKeyNotFound:
# A race exists where another process on the same host could
# have already deleted the key. This is fine as long as the key
# no longer exists.
pass

# Throw a key error to let the caller know the block wasn't free
# after all.

raise HostAffinityClaimedError("Block %s already claimed by %s",
block_id, block.host_affinity)
# successfully created the block. Done.
Expand Down
44 changes: 43 additions & 1 deletion calico_containers/tests/unit/ipam_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from pycalico.ipam import (IPAMClient, BlockHandleReaderWriter,
CASError, NoFreeBlocksError, _block_datastore_key,
_handle_datastore_key)
_handle_datastore_key, HostAffinityClaimedError)
from pycalico.datastore_errors import PoolNotFound
from pycalico.block import AllocationBlock, AddressNotAssignedError, BLOCK_SIZE
from pycalico.handle import AllocationHandle, AddressCountTooLow
Expand Down Expand Up @@ -1312,6 +1312,48 @@ def test_claim_block_affinity_already_owned(self):
prevExist=False)])
self.m_etcd_client.read.assert_called_once_with(key, quorum=True)

def test_claim_block_affinity_already_deleted(self):
"""
Test _claim_block_affinity() when another process deletes the host
affinity under our feet.
This can occur when the block gets claimed by one host and a second
host has two competing processes trying to claim the block. They can
both try to clean up at the same time.
Order of events
1 Write host affinity
2 Try to write the new block, but this fails
3 Re-read the block, discover another host owns it
4 Delete key from 1 but find it's already removed.
"""

block = _test_block_empty_v4()
m_result0 = Mock(spec=EtcdResult)
m_result0.value = block.to_json()

block_our_host = AllocationBlock(BLOCK_V4_1, "test_host2")

# Reads at 3
self.m_etcd_client.read.return_value = m_result0

# Write at 1, 2
self.m_etcd_client.write.side_effect = [None, EtcdAlreadyExist()]

# Delete at 4
self.m_etcd_client.delete.side_effect = [EtcdKeyNotFound()]

with self.assertRaises(HostAffinityClaimedError):
self.client._claim_block_affinity(block_our_host.host_affinity,
block.cidr)

key = _block_datastore_key(block_our_host.cidr)
value = block_our_host.to_json()
self.m_etcd_client.write.assert_has_calls([call(ANY, ""),
call(key, value,
prevExist=False)])
self.m_etcd_client.read.assert_called_once_with(key, quorum=True)
self.m_etcd_client.delete.assert_called_once_with(ANY)

def test_new_affine_block_race(self):
"""
Test _new_affine_block when another host claims it between reading
Expand Down

0 comments on commit fe34533

Please sign in to comment.