From 66f4f23cbf8c113863ea476b97a4cc976a677519 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 14 Feb 2019 11:43:47 -0600 Subject: [PATCH 1/5] Changed code to always print out all stderr files in node output directory to ensure all log files are reported for error dump. GH #6743 --- tests/Cluster.py | 19 ++++++------------- tests/TestHelper.py | 4 ++-- tests/nodeos_under_min_avail_ram.py | 2 +- tests/nodeos_voting_test.py | 2 +- tests/restart-scenarios-test.py | 2 +- 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/tests/Cluster.py b/tests/Cluster.py index 2a202be481d..1494ca13650 100644 --- a/tests/Cluster.py +++ b/tests/Cluster.py @@ -1289,19 +1289,16 @@ def __findFiles(path): match=re.match("stderr\..+\.txt", entry.name) if match: files.append(os.path.join(path, entry.name)) + files.sort() return files - def dumpErrorDetails(self, allStderrFiles=False): + def dumpErrorDetails(self): fileName=os.path.join(Cluster.__configDir + Cluster.nodeExtensionToName("bios"), "config.ini") Cluster.dumpErrorDetailImpl(fileName) path=Cluster.__dataDir + Cluster.nodeExtensionToName("bios") - if not allStderrFiles: - fileName=os.path.join(path, "stderr.txt") + fileNames=Cluster.__findFiles(path) + for fileName in fileNames: Cluster.dumpErrorDetailImpl(fileName) - else: - fileNames=Cluster.__findFiles(path) - for fileName in fileNames: - Cluster.dumpErrorDetailImpl(fileName) for i in range(0, len(self.nodes)): configLocation=Cluster.__configDir + Cluster.nodeExtensionToName(i) @@ -1310,13 +1307,9 @@ def dumpErrorDetails(self, allStderrFiles=False): fileName=os.path.join(configLocation, "genesis.json") Cluster.dumpErrorDetailImpl(fileName) path=Cluster.__dataDir + Cluster.nodeExtensionToName(i) - if not allStderrFiles: - fileName=os.path.join(path, "stderr.txt") + fileNames=Cluster.__findFiles(path) + for fileName in fileNames: Cluster.dumpErrorDetailImpl(fileName) - else: - fileNames=Cluster.__findFiles(path) - for fileName in fileNames: - Cluster.dumpErrorDetailImpl(fileName) if self.useBiosBootFile: Cluster.dumpErrorDetailImpl(Cluster.__bootlog) diff --git a/tests/TestHelper.py b/tests/TestHelper.py index d4e978872ce..1650597dee5 100644 --- a/tests/TestHelper.py +++ b/tests/TestHelper.py @@ -125,7 +125,7 @@ def printSystemInfo(prefix): @staticmethod # pylint: disable=too-many-arguments - def shutdown(cluster, walletMgr, testSuccessful=True, killEosInstances=True, killWallet=True, keepLogs=False, cleanRun=True, dumpErrorDetails=False, allStderrFiles=False): + def shutdown(cluster, walletMgr, testSuccessful=True, killEosInstances=True, killWallet=True, keepLogs=False, cleanRun=True, dumpErrorDetails=False): """Cluster and WalletMgr shutdown and cleanup.""" assert(cluster) assert(isinstance(cluster, Cluster)) @@ -145,7 +145,7 @@ def shutdown(cluster, walletMgr, testSuccessful=True, killEosInstances=True, kil Utils.Print("Test failed.") if not testSuccessful and dumpErrorDetails: cluster.reportStatus() - cluster.dumpErrorDetails(allStderrFiles=allStderrFiles) + cluster.dumpErrorDetails() if walletMgr: walletMgr.dumpErrorDetails() cluster.printBlockLogIfNeeded() diff --git a/tests/nodeos_under_min_avail_ram.py b/tests/nodeos_under_min_avail_ram.py index 121d5aded99..57f51beafe4 100755 --- a/tests/nodeos_under_min_avail_ram.py +++ b/tests/nodeos_under_min_avail_ram.py @@ -328,6 +328,6 @@ def setName(self, num): testSuccessful=True finally: - TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails, allStderrFiles=True) + TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails) exit(0) diff --git a/tests/nodeos_voting_test.py b/tests/nodeos_voting_test.py index d4781d0eefe..be77a815de9 100755 --- a/tests/nodeos_voting_test.py +++ b/tests/nodeos_voting_test.py @@ -246,6 +246,6 @@ def verifyProductionRounds(trans, node, prodsActive, rounds): testSuccessful=True finally: - TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) + TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails) exit(0) diff --git a/tests/restart-scenarios-test.py b/tests/restart-scenarios-test.py index 37dcc09792b..6b3c217d75d 100755 --- a/tests/restart-scenarios-test.py +++ b/tests/restart-scenarios-test.py @@ -140,6 +140,6 @@ testSuccessful=True finally: - TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killEosInstances, keepLogs, killAll, dumpErrorDetails) + TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killEosInstances, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails) exit(0) From 4c766aed7396ff3fa73d71536104d23cd49bd135 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 14 Feb 2019 11:58:42 -0600 Subject: [PATCH 2/5] Improved error reporting for buildkite error file dumping. GH #6743 --- tests/Cluster.py | 17 ++++++++--------- tests/TestHelper.py | 3 +++ tests/nodeos_forked_chain_test.py | 9 +++++++++ tests/testUtils.py | 2 ++ 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tests/Cluster.py b/tests/Cluster.py index 1494ca13650..3bc0f215566 100644 --- a/tests/Cluster.py +++ b/tests/Cluster.py @@ -32,7 +32,6 @@ class Cluster(object): __bootlog="eosio-ignition-wd/bootlog.txt" __configDir="etc/eosio/" __dataDir="var/lib/" - __fileDivider="=================================================================" # pylint: disable=too-many-arguments # walletd [True|False] Is keosd running. If not load the wallet plugin @@ -1272,7 +1271,7 @@ def relaunchEosInstances(self, cachePopen=False): @staticmethod def dumpErrorDetailImpl(fileName): - Utils.Print(Cluster.__fileDivider) + Utils.Print(Utils.FileDivider) Utils.Print("Contents of %s:" % (fileName)) if os.path.exists(fileName): with open(fileName, "r") as f: @@ -1460,8 +1459,8 @@ def getBlockLog(self, nodeExtension): def printBlockLog(self): blockLogBios=self.getBlockLog("bios") - Utils.Print(Cluster.__fileDivider) - Utils.Print("Block log from %s:\n%s" % (blockLogDir, json.dumps(blockLogBios, indent=1))) + Utils.Print(Utils.FileDivider) + Utils.Print("Block log from %s:\n%s" % ("bios", json.dumps(blockLogBios, indent=1))) if not hasattr(self, "nodes"): return @@ -1470,8 +1469,8 @@ def printBlockLog(self): for i in range(numNodes): node=self.nodes[i] blockLog=self.getBlockLog(i) - Utils.Print(Cluster.__fileDivider) - Utils.Print("Block log from %s:\n%s" % (blockLogDir, json.dumps(blockLog, indent=1))) + Utils.Print(Utils.FileDivider) + Utils.Print("Block log from node %s:\n%s" % (i, json.dumps(blockLog, indent=1))) def compareBlockLogs(self): @@ -1547,11 +1546,11 @@ def compareCommon(blockLogs, blockNameExtensions, first, last): if ret is not None: blockLogDir1=Cluster.__dataDir + Cluster.nodeExtensionToName(commonBlockNameExtensions[0]) + "/blocks/" blockLogDir2=Cluster.__dataDir + Cluster.nodeExtensionToName(commonBlockNameExtensions[i]) + "/blocks/" - Utils.Print(Cluster.__fileDivider) + Utils.Print(Utils.FileDivider) Utils.Print("Block log from %s:\n%s" % (blockLogDir1, json.dumps(commonBlockLogs[0], indent=1))) - Utils.Print(Cluster.__fileDivider) + Utils.Print(Utils.FileDivider) Utils.Print("Block log from %s:\n%s" % (blockLogDir2, json.dumps(commonBlockLogs[i], indent=1))) - Utils.Print(Cluster.__fileDivider) + Utils.Print(Utils.FileDivider) Utils.errorExit("Block logs do not match, difference description -> %s" % (ret)) return True diff --git a/tests/TestHelper.py b/tests/TestHelper.py index 1650597dee5..10b69fa334d 100644 --- a/tests/TestHelper.py +++ b/tests/TestHelper.py @@ -145,6 +145,9 @@ def shutdown(cluster, walletMgr, testSuccessful=True, killEosInstances=True, kil Utils.Print("Test failed.") if not testSuccessful and dumpErrorDetails: cluster.reportStatus() + Utils.Print(Utils.FileDivider) + psOut=Cluster.pgrepEosServers(timeout=60) + Utils.Print("pgrep output:\n%s" % (psOut)) cluster.dumpErrorDetails() if walletMgr: walletMgr.dumpErrorDetails() diff --git a/tests/nodeos_forked_chain_test.py b/tests/nodeos_forked_chain_test.py index de24f9b189f..f16bcf63762 100755 --- a/tests/nodeos_forked_chain_test.py +++ b/tests/nodeos_forked_chain_test.py @@ -433,4 +433,13 @@ def getMinHeadAndLib(prodNodes): finally: TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) + if not testSuccessful: + Print(Utils.FileDivider) + Print("Compare Blocklog") + cluster.compareBlockLogs() + Print(Utils.FileDivider) + Print("Compare Blocklog") + cluster.printBlockLog() + Print(Utils.FileDivider) + exit(0) diff --git a/tests/testUtils.py b/tests/testUtils.py index a8dbe0fd4d2..dc09eb34ae8 100755 --- a/tests/testUtils.py +++ b/tests/testUtils.py @@ -34,6 +34,8 @@ class Utils: EosBlockLogPath="programs/eosio-blocklog/eosio-blocklog" + FileDivider="=================================================================" + @staticmethod def Print(*args, **kwargs): stackDepth=len(inspect.stack())-2 From c6d796b438b4417097ccab1b8340a72c4e11eef8 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 14 Feb 2019 11:59:48 -0600 Subject: [PATCH 3/5] Fixed not giving nodeos enough time to process priority queue before shutting down. GH #6743 --- tests/Node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Node.py b/tests/Node.py index 11ed5696da5..0ba70c16cf7 100644 --- a/tests/Node.py +++ b/tests/Node.py @@ -1212,7 +1212,7 @@ def interruptAndVerifyExitStatus(self): assert self.popenProc is not None, "node: \"%s\" does not have a popenProc, this may be because it is only set after a relaunch." % (self.cmd) self.popenProc.send_signal(signal.SIGINT) try: - outs, _ = self.popenProc.communicate(timeout=1) + outs, _ = self.popenProc.communicate(timeout=15) assert self.popenProc.returncode == 0, "Expected terminating \"%s\" to have an exit status of 0, but got %d" % (self.cmd, self.popenProc.returncode) except subprocess.TimeoutExpired: Utils.errorExit("Terminate call failed on node: %s" % (self.cmd)) From f17a33e1563b55b19a57d0a9fa2726f33f4fad16 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 14 Feb 2019 12:02:01 -0600 Subject: [PATCH 4/5] Changed voting test to just indicate when a producer does not produce 12 blocks in a row, since there are valid situations for this to happen. GH #6743 --- tests/nodeos_voting_test.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/nodeos_voting_test.py b/tests/nodeos_voting_test.py index be77a815de9..ee728962b9f 100755 --- a/tests/nodeos_voting_test.py +++ b/tests/nodeos_voting_test.py @@ -97,6 +97,7 @@ def verifyProductionRounds(trans, node, prodsActive, rounds): Utils.Print("ADJUSTED %s blocks" % (invalidCount-1)) prodsSeen=None + reportFirstMissedBlock=False Utils.Print("Verify %s complete rounds of all producers producing" % (rounds)) for i in range(0, rounds): prodsSeen={} @@ -113,17 +114,19 @@ def verifyProductionRounds(trans, node, prodsActive, rounds): validBlockProducer(prodsActive, prodsSeen, blockNum, node1) blockProducer=node.getBlockProducerByNum(blockNum) if lastBlockProducer!=blockProducer: - printStr="" - newBlockNum=blockNum-18 - for l in range(0,36): - printStr+="%s" % (newBlockNum) - printStr+=":" - newBlockProducer=node.getBlockProducerByNum(newBlockNum) - printStr+="%s" % (newBlockProducer) - printStr+=" " - newBlockNum+=1 - Utils.cmdError("expected blockNum %s (started from %s) to be produced by %s, but produded by %s: round=%s, prod slot=%s, prod num=%s - %s" % (blockNum, startingFrom, lastBlockProducer, blockProducer, i, j, k, printStr)) - Utils.errorExit("Failed because of incorrect block producer order") + if not reportFirstMissedBlock: + printStr="" + newBlockNum=blockNum-18 + for l in range(0,36): + printStr+="%s" % (newBlockNum) + printStr+=":" + newBlockProducer=node.getBlockProducerByNum(newBlockNum) + printStr+="%s" % (newBlockProducer) + printStr+=" " + newBlockNum+=1 + Utils.Print("NOTE: expected blockNum %s (started from %s) to be produced by %s, but produded by %s: round=%s, prod slot=%s, prod num=%s - %s" % (blockNum, startingFrom, lastBlockProducer, blockProducer, i, j, k, printStr)) + reportFirstMissedBlock=True + break blockNum+=1 # make sure that we have seen all 21 producers From 1a37bd67ec900f54ba3fac2a12f0c2785c66318a Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 14 Feb 2019 14:19:26 -0600 Subject: [PATCH 5/5] Added test logging to issolate error. GH #6743 --- programs/nodeos/main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/nodeos/main.cpp b/programs/nodeos/main.cpp index b858db748dd..7867914bb24 100644 --- a/programs/nodeos/main.cpp +++ b/programs/nodeos/main.cpp @@ -152,5 +152,6 @@ int main(int argc, char** argv) return OTHER_FAIL; } + ilog("nodeos successfully exiting"); return SUCCESS; }