diff --git a/core/unix/inc/TUnixSystem.h b/core/unix/inc/TUnixSystem.h index 42abc6beb88ff..d5f49c97100ff 100644 --- a/core/unix/inc/TUnixSystem.h +++ b/core/unix/inc/TUnixSystem.h @@ -31,11 +31,31 @@ #include "TTimer.h" #endif +#include +#include + +int StackTraceExec(void *); + typedef void (*SigHandler_t)(ESignals); class TUnixSystem : public TSystem { + friend int StackTraceExec(void *); + +private: + struct StackTraceHelper_t { + static const int fStringLength = 255; + char fShellExec[fStringLength]; + char fPidString[fStringLength]; + char fPidNum[fStringLength]; + int fParentToChild[2]; + int fChildToParent[2]; + std::unique_ptr fHelperThread; + }; + + static StackTraceHelper_t fStackTraceHelper; + protected: const char *FindDynamicLibrary(TString &lib, Bool_t quiet = kFALSE); const char *GetLinkedLibraries(); @@ -71,6 +91,12 @@ class TUnixSystem : public TSystem { static int UnixRecv(int sock, void *buf, int len, int flag); static int UnixSend(int sock, const void *buf, int len, int flag); + // added helper static members for stacktrace + static char *const kStackArgv[]; + static char *const *GetStackArgv(); + static void StackTraceHelperThread(); + void CachePidInfo(); + public: TUnixSystem(); virtual ~TUnixSystem(); @@ -121,6 +147,7 @@ class TUnixSystem : public TSystem { void Abort(int code = 0); int GetPid(); void StackTrace(); + static void StackTraceFromThread(); //---- Directories ------------------------------------------ int MakeDirectory(const char *name); diff --git a/core/unix/src/TUnixSystem.cxx b/core/unix/src/TUnixSystem.cxx index fc2df48f0bc8b..863f286166dbf 100644 --- a/core/unix/src/TUnixSystem.cxx +++ b/core/unix/src/TUnixSystem.cxx @@ -40,10 +40,15 @@ #include #include +#ifdef __linux__ +#include +#endif + //#define G__OLDEXPAND #include #include +#include #include #if defined(R__SUN) || defined(R__AIX) || \ defined(R__LINUX) || defined(R__SOLARIS) || \ @@ -386,6 +391,85 @@ class TFdSet { ULong_t *GetBits() { return (ULong_t *)fds_bits; } }; +//////////////////////////////////////////////////////////////////////////////// +/// Async-signal-safe Write functions. +static int SignalSafeWrite(int fd, const char *text) { + const char *buffer = text; + size_t count = strlen(text); + ssize_t written = 0; + while (count) { + written = write(fd, buffer, count); + if (written == -1) { + if (errno == EINTR) { continue; } + else { return -errno; } + } + count -= written; + buffer += written; + } + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Async-signal-safe Read functions. +static int SignalSafeRead(int fd, char *inbuf, size_t len, int timeout=-1) { + char *buf = inbuf; + size_t count = len; + ssize_t complete = 0; + std::chrono::time_point endTime = std::chrono::steady_clock::now() + std::chrono::seconds(timeout); + int flags; + if (timeout < 0) { + flags = O_NONBLOCK; // Prevents us from trying to set / restore flags later. + } else if ((-1 == (flags = fcntl(fd, F_GETFL)))) { + return -errno; + } else { } + if ((flags & O_NONBLOCK) != O_NONBLOCK) { + if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) { + return -errno; + } + } + while (count) { + if (timeout >= 0) { + struct pollfd pollInfo{fd, POLLIN, 0}; + int msRemaining = std::chrono::duration_cast(endTime-std::chrono::steady_clock::now()).count(); + if (msRemaining > 0) { + if (poll(&pollInfo, 1, msRemaining) == 0) { + if ((flags & O_NONBLOCK) != O_NONBLOCK) { + fcntl(fd, F_SETFL, flags); + } + return -ETIMEDOUT; + } + } else if (msRemaining < 0) { + if ((flags & O_NONBLOCK) != O_NONBLOCK) { + fcntl(fd, F_SETFL, flags); + } + return -ETIMEDOUT; + } else { } + } + complete = read(fd, buf, count); + if (complete == -1) { + if (errno == EINTR) { continue; } + else if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { continue; } + else { + int origErrno = errno; + if ((flags & O_NONBLOCK) != O_NONBLOCK) { + fcntl(fd, F_SETFL, flags); + } + return -origErrno; + } + } + count -= complete; + buf += complete; + } + if ((flags & O_NONBLOCK) != O_NONBLOCK) { + fcntl(fd, F_SETFL, flags); + } + return 0; +} + +static int SignalSafeErrWrite(const char *text) { + return SignalSafeWrite(2, text); +} + //////////////////////////////////////////////////////////////////////////////// /// Unix signal handler. @@ -569,6 +653,9 @@ TUnixSystem::~TUnixSystem() //////////////////////////////////////////////////////////////////////////////// /// Initialize Unix system interface. +TUnixSystem::StackTraceHelper_t TUnixSystem::fStackTraceHelper; +char * const TUnixSystem::kStackArgv[] = {TUnixSystem::fStackTraceHelper.fShellExec, TUnixSystem::fStackTraceHelper.fPidString, TUnixSystem::fStackTraceHelper.fPidNum, nullptr}; + Bool_t TUnixSystem::Init() { if (TSystem::Init()) @@ -608,6 +695,30 @@ Bool_t TUnixSystem::Init() gRootDir = ROOTPREFIX; #endif + if(snprintf(fStackTraceHelper.fShellExec, fStackTraceHelper.fStringLength-1, "/bin/sh") >= fStackTraceHelper.fStringLength) { + SignalSafeErrWrite("Unable to pre-allocate shell command path"); + return kFALSE; + } + +#ifdef ROOTETCDIR + if(snprintf(fStackTraceHelper.fPidString, fStackTraceHelper.fStringLength-1, "%s/gdb-backtrace.sh", ROOTETCDIR) >= fStackTraceHelper.fStringLength) { + SignalSafeErrWrite("Unable to pre-allocate executable information"); + return kFALSE; + } +#else + if(snprintf(fStackTraceHelper.fPidString, fStackTraceHelper.fStringLength-1, "%s/etc/gdb-backtrace.sh", gSystem->Getenv("ROOTSYS")) >= fStackTraceHelper.fStringLength) { + SignalSafeErrWrite("Unable to pre-allocate executable information"); + return kFALSE; + } +#endif + + fStackTraceHelper.fParentToChild[0] = -1; + fStackTraceHelper.fParentToChild[1] = -1; + fStackTraceHelper.fChildToParent[0] = -1; + fStackTraceHelper.fChildToParent[1] = -1; + + CachePidInfo(); + return kFALSE; } @@ -2154,10 +2265,8 @@ void TUnixSystem::StackTrace() } gdbscript += " "; } - TString gdbmess = gEnv->GetValue("Root.StacktraceMessage", ""); gdbmess = gdbmess.Strip(); - std::cout.flush(); fflush(stdout); @@ -2286,7 +2395,6 @@ void TUnixSystem::StackTrace() fprintf(f, "%s\n", gdbmess.Data()); fclose(f); } - // use gdb to get stack trace #ifdef R__MACOSX gdbscript += GetExePath(); @@ -3524,6 +3632,7 @@ static void sighandler(int sig) void TUnixSystem::DispatchSignals(ESignals sig) { + const char* signalname = "unknown"; switch (sig) { case kSigAlarm: DispatchTimers(kFALSE); @@ -3532,8 +3641,14 @@ void TUnixSystem::DispatchSignals(ESignals sig) CheckChilds(); break; case kSigBus: + signalname = "bus error"; + break; case kSigSegmentationViolation: + signalname = "segmentation violation"; + break; case kSigIllegalInstruction: + signalname = "illegal instruction"; + break; case kSigFloatingException: Break("TUnixSystem::DispatchSignals", "%s", UnixSigname(sig)); StackTrace(); @@ -3558,6 +3673,20 @@ void TUnixSystem::DispatchSignals(ESignals sig) break; } + if ((sig == kSigIllegalInstruction) || (sig == kSigSegmentationViolation) || (sig == kSigBus)) + { + + SignalSafeErrWrite("\n\nA fatal system signal has occurred: "); + SignalSafeErrWrite(signalname); + SignalSafeErrWrite("\nThe following is the call stack containing the origin of the signal.\n" + "NOTE:The first few functions on the stack are artifacts of processing the signal and can be ignored\n\n"); + + TUnixSystem::StackTraceFromThread(); + + signal(sig, SIG_DFL); + raise(sig); + } + // check a-synchronous signals if (fSigcnt > 0 && fSignalHandler->GetSize() > 0) CheckSignals(kFALSE); @@ -5148,3 +5277,134 @@ int TUnixSystem::GetProcInfo(ProcInfo_t *info) const return 0; } + +static void StackTraceFork(); + +void SetDefaultSignals() { + signal(SIGILL, SIG_DFL); + signal(SIGSEGV, SIG_DFL); + signal(SIGBUS, SIG_DFL); +} + +void TUnixSystem::StackTraceHelperThread() +{ + int toParent = fStackTraceHelper.fChildToParent[1]; + int fromParent = fStackTraceHelper.fParentToChild[0]; + char buf[2]; buf[1] = '\0'; + while(true) { + int result = SignalSafeRead(fromParent, buf, 1, 5*60); + if (result < 0) { + SetDefaultSignals(); + close(toParent); + SignalSafeErrWrite("\n\nTraceback helper thread failed to read from parent: "); + SignalSafeErrWrite(strerror(-result)); + SignalSafeErrWrite("\n"); + ::abort(); + } + if (buf[0] == '1') { + SetDefaultSignals(); + StackTraceFork(); + SignalSafeWrite(toParent, buf); + } else if (buf[0] == '2') { + close(toParent); + close(fromParent); + toParent = fStackTraceHelper.fChildToParent[1]; + fromParent = fStackTraceHelper.fParentToChild[0]; + } else if (buf[0] == '3') { + break; + } else { + SetDefaultSignals(); + close(toParent); + SignalSafeErrWrite("\n\nTraceback helper thread got unknown command from parent: "); + SignalSafeErrWrite(buf); + SignalSafeErrWrite("\n"); + ::abort(); + } + } +} + +void TUnixSystem::StackTraceFromThread() +{ + int result = SignalSafeWrite(fStackTraceHelper.fParentToChild[1], "1"); + if (result < 0) { + SignalSafeErrWrite("\n\nAttempt to request stacktrace failed: "); + SignalSafeErrWrite(strerror(-result)); + SignalSafeErrWrite("\n"); + return; + } + char buf[2]; buf[1] = '\0'; + if ((result = SignalSafeRead(fStackTraceHelper.fChildToParent[0], buf, 1)) < 0) { + SignalSafeErrWrite("\n\nWaiting for stacktrace completion failed: "); + SignalSafeErrWrite(strerror(-result)); + SignalSafeErrWrite("\n"); + return; + } +} + +void StackTraceFork() +{ + static const int stackSize = 4*1024; + char childStack[stackSize]; + char *childStackPtr = childStack + stackSize; + int pid = +#ifdef __linux__ + clone(StackTraceExec, childStackPtr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr); +#else + fork(); + if (childStackPtr) {} // Suppress 'unused variable' warning on non-Linux + if (pid == 0) { StackTraceExec(nullptr); ::abort(); } +#endif + if (pid == -1) { + SignalSafeErrWrite("(Attempt to perform stack dump failed.)\n"); + } else { + int status; + if (waitpid(pid, &status, 0) == -1) { + SignalSafeErrWrite("(Failed to wait on stack dump output.)\n"); + } else {} + } +} + +int StackTraceExec(void *) +{ + char *const *argv = TUnixSystem::GetStackArgv(); +#ifdef __linux__ + syscall(SYS_execve, "/bin/sh", argv, __environ); +#else + execv("/bin/sh", argv); +#endif + ::abort(); + return 1; +} + +char *const *TUnixSystem::GetStackArgv() { + return kStackArgv; +} + +void TUnixSystem::CachePidInfo() +{ + if(snprintf(fStackTraceHelper.fPidNum, fStackTraceHelper.fStringLength-1, "%d", GetPid()) >= fStackTraceHelper.fStringLength) { + SignalSafeErrWrite("Unable to pre-allocate process id information"); + return; + } + + close(fStackTraceHelper.fChildToParent[0]); + close(fStackTraceHelper.fChildToParent[1]); + fStackTraceHelper.fChildToParent[0] = -1; fStackTraceHelper.fChildToParent[1] = -1; + close(fStackTraceHelper.fParentToChild[0]); + close(fStackTraceHelper.fParentToChild[1]); + fStackTraceHelper.fParentToChild[0] = -1; fStackTraceHelper.fParentToChild[1] = -1; + + if (-1 == pipe2(fStackTraceHelper.fChildToParent, O_CLOEXEC)) { + fprintf(stdout, "pipe fStackTraceHelper.fChildToParent failed\n"); + return; + } + if (-1 == pipe2(fStackTraceHelper.fParentToChild, O_CLOEXEC)){ + close(fStackTraceHelper.fChildToParent[0]); close(fStackTraceHelper.fChildToParent[1]); + fStackTraceHelper.fChildToParent[0] = -1; fStackTraceHelper.fChildToParent[1] = -1; + fprintf(stdout, "pipe parentToChild failed\n"); + return; + } + + fStackTraceHelper.fHelperThread.reset(new std::thread(StackTraceHelperThread)); + fStackTraceHelper.fHelperThread->detach(); +}