replace the whole cib if crm_feature_set < 3.0.9

* Applying a diff does not work correctly with crm_feature_set < 3.0.9 so pcs replaces the whole cib instead in such a case. * Fixed cibs in test resources to have real combinations of validate-with and crm_feature_set.
neo502721 · Jan 12, 2018 · b4e5835 · b4e5835
1 parent 30f9e8c
commit b4e5835
Show file tree

Hide file tree

Showing 21 changed files with 366 additions and 92 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,10 +22,13 @@
 - Pcs now properly exits with code 1 when an error occurs in `pcs cluster node
   add-remote` and `pcs cluster node add-guest` commands ([rhbz#1464781])
 - Fixed a crash in the `pcs booth sync` command ([rhbz#1527530])
+- Always replace the whole CIB instead of applying a diff when
+  crm\_feature\_set <= 3.0.8 ([rhbz#1488044])
 
 [ghissue#153]: https://github.com/ClusterLabs/pcs/issues/153
 [rhbz#1421702]: https://bugzilla.redhat.com/show_bug.cgi?id=1421702
 [rhbz#1464781]: https://bugzilla.redhat.com/show_bug.cgi?id=1464781
+[rhbz#1488044]: https://bugzilla.redhat.com/show_bug.cgi?id=1488044
 [rhbz#1517333]: https://bugzilla.redhat.com/show_bug.cgi?id=1517333
 [rhbz#1522813]: https://bugzilla.redhat.com/show_bug.cgi?id=1522813
 [rhbz#1523378]: https://bugzilla.redhat.com/show_bug.cgi?id=1523378

diff --git a/pcs/cli/common/console_report.py b/pcs/cli/common/console_report.py
@@ -757,6 +757,15 @@ def joined_list(item_list, optional_transformations=None):
         .format(**info)
     ,
 
+    codes.CIB_PUSH_FORCED_FULL_DUE_TO_CRM_FEATURE_SET: lambda info:
+        (
+            "Replacing the whole CIB instead of applying a diff, a race "
+            "condition may happen if the CIB is pushed more than once "
+            "simultaneously. To fix this, upgrade pacemaker to get "
+            "crm_feature_set at least {required_set}, current is {current_set}."
+        ).format(**info)
+    ,
+
     codes.CIB_SAVE_TMP_ERROR: lambda info:
         "Unable to save CIB to a temporary file: {reason}"
         .format(**info)

diff --git a/pcs/cli/common/test/test_console_report.py b/pcs/cli/common/test/test_console_report.py
@@ -2018,6 +2018,7 @@ def test_success(self):
             }
         )
 
+
 class IdNotFound(NameBuildTest):
     code = codes.ID_NOT_FOUND
     def test_id(self):
@@ -2063,3 +2064,20 @@ def test_type_and_context(self):
                 "context_id": "C_ID",
             }
         )
+
+
+class CibPushForcedFullDueToCrmFeatureSet(NameBuildTest):
+    code = codes.CIB_PUSH_FORCED_FULL_DUE_TO_CRM_FEATURE_SET
+    def test_success(self):
+        self.assert_message_from_info(
+            (
+                "Replacing the whole CIB instead of applying a diff, a race "
+                "condition may happen if the CIB is pushed more than once "
+                "simultaneously. To fix this, upgrade pacemaker to get "
+                "crm_feature_set at least 3.0.9, current is 3.0.6."
+            ),
+            {
+                "required_set": "3.0.9",
+                "current_set": "3.0.6",
+            }
+        )
diff --git a/pcs/common/report_codes.py b/pcs/common/report_codes.py
@@ -71,6 +71,7 @@
 CIB_LOAD_ERROR_BAD_FORMAT = "CIB_LOAD_ERROR_BAD_FORMAT"
 CIB_LOAD_ERROR = "CIB_LOAD_ERROR"
 CIB_LOAD_ERROR_SCOPE_MISSING = "CIB_LOAD_ERROR_SCOPE_MISSING"
+CIB_PUSH_FORCED_FULL_DUE_TO_CRM_FEATURE_SET = "CIB_PUSH_FORCED_FULL_DUE_TO_CRM_FEATURE_SET"
 CIB_PUSH_ERROR = "CIB_PUSH_ERROR"
 CIB_SAVE_TMP_ERROR = "CIB_SAVE_TMP_ERROR"
 CIB_UPGRADE_FAILED = "CIB_UPGRADE_FAILED"

diff --git a/pcs/lib/cib/test/test_tools.py b/pcs/lib/cib/test/test_tools.py
@@ -13,11 +13,13 @@
     assert_raise_library_error,
     assert_report_item_list_equal,
 )
+from pcs.test.tools import fixture
 from pcs.test.tools.misc import get_test_resource as rc
 from pcs.test.tools.pcs_unittest import mock
 from pcs.test.tools.xml import get_xml_manipulation_creator_from_file
 
 from pcs.common import report_codes
+from pcs.common.tools import Version
 from pcs.lib.errors import ReportItemSeverity as severities
 
 from pcs.lib.cib import tools as lib
@@ -450,13 +452,68 @@ def test_no_revision(self):
 
     def test_with_revision(self):
         self.assertEqual(
-            (1, 2, 3),
+            Version(1, 2, 3),
             lib.get_pacemaker_version_by_which_cib_was_validated(
                 etree.XML('<cib validate-with="pacemaker-1.2.3"/>')
             )
         )
 
 
+class getCibCrmFeatureSet(TestCase):
+    def test_success(self):
+        self.assertEqual(
+            Version(3, 0, 9),
+            lib.get_cib_crm_feature_set(
+                etree.XML('<cib crm_feature_set="3.0.9" />')
+            )
+        )
+
+    def test_success_no_revision(self):
+        self.assertEqual(
+            Version(3, 1),
+            lib.get_cib_crm_feature_set(
+                etree.XML('<cib crm_feature_set="3.1" />')
+            )
+        )
+
+    def test_missing_attribute(self):
+        assert_raise_library_error(
+            lambda: lib.get_cib_crm_feature_set(
+                etree.XML("<cib />")
+            ),
+            fixture.error(
+                report_codes.CIB_LOAD_ERROR_BAD_FORMAT,
+                reason=(
+                    "the attribute 'crm_feature_set' of the element 'cib' is "
+                    "missing"
+                )
+            )
+        )
+
+    def test_missing_attribute_none(self):
+        self.assertEqual(
+            None,
+            lib.get_cib_crm_feature_set(
+                etree.XML('<cib />'),
+                none_if_missing=True
+            )
+        )
+
+    def test_invalid_version(self):
+        assert_raise_library_error(
+            lambda: lib.get_cib_crm_feature_set(
+                etree.XML('<cib crm_feature_set="3" />')
+            ),
+            fixture.error(
+                report_codes.CIB_LOAD_ERROR_BAD_FORMAT,
+                reason=(
+                    "the attribute 'crm_feature_set' of the element 'cib' has "
+                    "an invalid value: '3'"
+                )
+            )
+        )
+
+
 find_group = partial(lib.find_element_by_tag_and_id, "group")
 class FindTagWithId(TestCase):
     def test_returns_element_when_exists(self):

diff --git a/pcs/lib/cib/tools.py b/pcs/lib/cib/tools.py
@@ -245,31 +245,55 @@ def get_resources(tree):
     """
     return sections.get(tree, sections.RESOURCES)
 
-def get_pacemaker_version_by_which_cib_was_validated(cib):
-    """
-    Return version of pacemaker which validated specified cib as tree.
-    Version is returned as an instance of pcs.common.tools.Version.
-    Raises LibraryError on any failure.
-
-    cib -- cib etree
-    """
-    version = cib.get("validate-with")
+def _get_cib_version(cib, attribute, regexp, none_if_missing=False):
+    version = cib.get(attribute)
     if version is None:
+        if none_if_missing:
+            return None
         raise LibraryError(reports.cib_load_error_invalid_format(
-            "the attribute 'validate-with' of the element 'cib' is missing"
+            "the attribute '{0}' of the element 'cib' is missing".format(
+                attribute
+            )
         ))
-
-    regexp = re.compile(
-        r"pacemaker-(?P<major>\d+)\.(?P<minor>\d+)(\.(?P<rev>\d+))?"
-    )
     match = regexp.match(version)
     if not match:
         raise LibraryError(reports.cib_load_error_invalid_format(
-            "the attribute 'validate-with' of the element 'cib' has an invalid"
-            " value: '{0}'".format(version)
+            (
+                "the attribute '{0}' of the element 'cib' has an invalid"
+                " value: '{1}'"
+            ).format(attribute, version)
         ))
     return Version(
         int(match.group("major")),
         int(match.group("minor")),
         int(match.group("rev")) if match.group("rev") else None
     )
+
+def get_pacemaker_version_by_which_cib_was_validated(cib):
+    """
+    Return version of pacemaker which validated specified cib as tree.
+    Version is returned as an instance of pcs.common.tools.Version.
+    Raises LibraryError on any failure.
+
+    cib -- cib etree
+    """
+    return _get_cib_version(
+        cib,
+        "validate-with",
+        re.compile(r"pacemaker-(?P<major>\d+)\.(?P<minor>\d+)(\.(?P<rev>\d+))?")
+    )
+
+def get_cib_crm_feature_set(cib, none_if_missing=False):
+    """
+    Return crm_feature_set as pcs.common.tools.Version or raise LibraryError
+
+    etree cib -- cib etree
+    bool none_if_missing -- return None instead of raising when crm_feature_set
+        is missing
+    """
+    return _get_cib_version(
+        cib,
+        "crm_feature_set",
+        re.compile(r"(?P<major>\d+)\.(?P<minor>\d+)(\.(?P<rev>\d+))?"),
+        none_if_missing=none_if_missing
+    )
diff --git a/pcs/lib/env.py b/pcs/lib/env.py
@@ -11,8 +11,10 @@
     NodeCommunicatorFactory,
     NodeTargetFactory
 )
+from pcs.common.tools import Version
 from pcs.lib import reports
 from pcs.lib.booth.env import BoothEnv
+from pcs.lib.cib.tools import get_cib_crm_feature_set
 from pcs.lib.pacemaker.env import PacemakerEnv
 from pcs.lib.cluster_conf_facade import ClusterConfFacade
 from pcs.lib.communication import qdevice
@@ -95,6 +97,7 @@ def __init__(
         self._cib_upgrade_reported = False
         self._cib_data_tmp_file = None
         self.__loaded_cib_diff_source = None
+        self.__loaded_cib_diff_source_feature_set = None
         self.__loaded_cib_to_modify = None
         self._communicator_factory = NodeCommunicatorFactory(
             LibCommunicatorLogger(self.logger, self.report_processor),
@@ -146,6 +149,14 @@ def get_cib(self, minimal_version=None):
                         reports.cib_upgrade_successful()
                     )
                 self._cib_upgrade_reported = True
+        self.__loaded_cib_diff_source_feature_set = (
+            get_cib_crm_feature_set(
+                self.__loaded_cib_to_modify,
+                none_if_missing=True
+            )
+            or
+            Version(0, 0, 0)
+        )
         return self.__loaded_cib_to_modify
 
     @property
@@ -179,10 +190,24 @@ def ensure_wait_satisfiable(self, wait):
 
     def push_cib(self, custom_cib=None, wait=False):
         if custom_cib is not None:
-            return self.push_cib_full(custom_cib, wait)
-        return self.push_cib_diff(wait)
+            return self._push_cib_full(custom_cib, wait)
+        # Push by diff works with crm_feature_set > 3.0.8, see
+        # https://bugzilla.redhat.com/show_bug.cgi?id=1488044 for details. We
+        # only check the version if a CIB has been loaded, otherwise the push
+        # fails anyway. By my testing it seems that only the source CIB's
+        # version matters.
+        if self.__loaded_cib_diff_source is not None:
+            if self.__loaded_cib_diff_source_feature_set < Version(3, 0, 9):
+                self.report_processor.process(
+                    reports.cib_push_forced_full_due_to_crm_feature_set(
+                        Version(3, 0, 9),
+                        self.__loaded_cib_diff_source_feature_set
+                    )
+                )
+                return self._push_cib_full(wait=wait)
+        return self._push_cib_diff(wait=wait)
 
-    def push_cib_full(self, custom_cib=None, wait=False):
+    def _push_cib_full(self, custom_cib=None, wait=False):
         if custom_cib is None and self.__loaded_cib_diff_source is None:
             raise AssertionError("CIB has not been loaded")
         if custom_cib is not None and self.__loaded_cib_diff_source is not None:
@@ -198,7 +223,7 @@ def push_cib_full(self, custom_cib=None, wait=False):
             wait
         )
 
-    def push_cib_diff(self, wait=False):
+    def _push_cib_diff(self, wait=False):
         if self.__loaded_cib_diff_source is None:
             raise AssertionError("CIB has not been loaded")
 
@@ -216,7 +241,6 @@ def __main_push_cib_diff(self, cmd_runner):
             self.__loaded_cib_diff_source,
             etree_to_str(self.__loaded_cib_to_modify)
         )
-
         if cib_diff_xml:
             push_cib_diff_xml(cmd_runner, cib_diff_xml)
 
@@ -225,6 +249,7 @@ def __do_push_cib(self, cmd_runner, push_strategy, wait):
         push_strategy()
         self._cib_upgrade_reported = False
         self.__loaded_cib_diff_source = None
+        self.__loaded_cib_diff_source_feature_set = None
         self.__loaded_cib_to_modify = None
         if self.is_cib_live and timeout is not False:
             wait_for_idle(cmd_runner, timeout)

diff --git a/pcs/lib/reports.py b/pcs/lib/reports.py
@@ -1406,6 +1406,21 @@ def cib_diff_error(reason, cib_old, cib_new):
         }
     )
 
+def cib_push_forced_full_due_to_crm_feature_set(required_set, current_set):
+    """
+    Pcs uses the old approach of pushing the CIB so race conditions may occur.
+
+    pcs.common.tools.Version required_set -- crm_feature_set required for diff
+    pcs.common.tools.Version current_set -- actual CIB crm_feature_set
+    """
+    return ReportItem.warning(
+        report_codes.CIB_PUSH_FORCED_FULL_DUE_TO_CRM_FEATURE_SET,
+        info={
+            "required_set": str(required_set),
+            "current_set": str(current_set),
+        }
+    )
+
 def cluster_state_cannot_load(reason):
     """
     cannot load cluster status from crm_mon, crm_mon exited with non-zero code