Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement signal handler to avoid async-unsafe functions #97

Closed
wants to merge 7 commits into from
23 changes: 23 additions & 0 deletions core/unix/inc/TUnixSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,22 @@
#include "TTimer.h"
#endif

#include <thread>
#include <memory>

namespace std {
class thread;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need a forward dec'l if you already include

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And please use ROOT coding standards.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(by the way, is there a good online ref for these to link to?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Philippe - Zhe, can you run through this guide and try to get the code to compliance?

@pcanal - when contributing to CVMFS, they provide an automated tool (based on cpplint). I see you reference astyle for managing whitespace. Do you have anything similarly automated you use?

}

int global_stacktrace(void *);

typedef void (*SigHandler_t)(ESignals);


class TUnixSystem : public TSystem {

friend int global_stacktrace(void *);

protected:
const char *FindDynamicLibrary(TString &lib, Bool_t quiet = kFALSE);
const char *GetLinkedLibraries();
Expand Down Expand Up @@ -71,6 +82,17 @@ class TUnixSystem : public TSystem {
static int UnixRecv(int sock, void *buf, int len, int flag);
static int UnixSend(int sock, const void *buf, int len, int flag);

// added helper static members for stacktrace
static const int pidStringLength_ = 255;
static char pidString_[pidStringLength_];
static char * const pstackArgv_[];
static char *const *getPstackArgv();
static int parentToChild_[2];
static int childToParent_[2];
static std::unique_ptr<std::thread> helperThread_;
static void stacktraceHelperThread();
void cachePidInfo();

public:
TUnixSystem();
virtual ~TUnixSystem();
Expand Down Expand Up @@ -121,6 +143,7 @@ class TUnixSystem : public TSystem {
void Abort(int code = 0);
int GetPid();
void StackTrace();
static void stacktraceFromThread();

//---- Directories ------------------------------------------
int MakeDirectory(const char *name);
Expand Down
231 changes: 228 additions & 3 deletions core/unix/src/TUnixSystem.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -568,12 +568,68 @@ TUnixSystem::~TUnixSystem()

////////////////////////////////////////////////////////////////////////////////
/// Initialize Unix system interface.
extern "C"
{
static int full_write(int fd, const char *text)
{
const char *buffer = text;
size_t count = strlen(text);
ssize_t written = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since written is not used outside the loop, it ought to be declared inside.

while (count)
{
written = write(fd, buffer, count);
if (written == -1)
{
if (errno == EINTR) {continue;}
else {return -errno;}
}
count -= written;
buffer += written;
}
return 0;
}

static int full_read(int fd, char *inbuf, size_t len)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add in an optional timeout to this function so the thread requesting the stack trace is less likely to timeout.

{
char *buf = inbuf;
size_t count = len;
ssize_t complete = 0;
while (count)
{
complete = read(fd, buf, count);
if (complete == -1)
{
if (errno == EINTR) {continue;}
else {return -errno;}
}
count -= complete;
buf += complete;
}
return 0;
}

static int full_cerr_write(const char *text)
{
return full_write(2, text);
}

}

static char pshellExec[8] = "/bin/sh";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably less error-prone to do pshellExec[] instead of pshellExec[8]; let the compiler do the work for you.

char TUnixSystem::pidString_[TUnixSystem::pidStringLength_] = {};
static char pidNum[11] = {}; //--- The largest PID number could be converted to 10 characters.
char * const TUnixSystem::pstackArgv_[] = {pshellExec, TUnixSystem::pidString_, pidNum, nullptr};
int TUnixSystem::parentToChild_[2] = {-1, -1};
int TUnixSystem::childToParent_[2] = {-1, -1};
std::unique_ptr<std::thread> TUnixSystem::helperThread_;

Bool_t TUnixSystem::Init()
{
if (TSystem::Init())
return kTRUE;

cachePidInfo();

fReadmask = new TFdSet;
fWritemask = new TFdSet;
fReadready = new TFdSet;
Expand Down Expand Up @@ -2154,10 +2210,8 @@ void TUnixSystem::StackTrace()
}
gdbscript += " ";
}

TString gdbmess = gEnv->GetValue("Root.StacktraceMessage", "");
gdbmess = gdbmess.Strip();

std::cout.flush();
fflush(stdout);

Expand Down Expand Up @@ -2286,7 +2340,6 @@ void TUnixSystem::StackTrace()
fprintf(f, "%s\n", gdbmess.Data());
fclose(f);
}

// use gdb to get stack trace
#ifdef R__MACOSX
gdbscript += GetExePath();
Expand Down Expand Up @@ -3524,6 +3577,7 @@ static void sighandler(int sig)

void TUnixSystem::DispatchSignals(ESignals sig)
{
const char* signalname = "unknown";
switch (sig) {
case kSigAlarm:
DispatchTimers(kFALSE);
Expand All @@ -3532,8 +3586,14 @@ void TUnixSystem::DispatchSignals(ESignals sig)
CheckChilds();
break;
case kSigBus:
signalname = "bus error";
break;
case kSigSegmentationViolation:
signalname = "segmentation violation";
break;
case kSigIllegalInstruction:
signalname = "illegal instruction";
break;
case kSigFloatingException:
Break("TUnixSystem::DispatchSignals", "%s", UnixSigname(sig));
StackTrace();
Expand All @@ -3558,6 +3618,20 @@ void TUnixSystem::DispatchSignals(ESignals sig)
break;
}

if ((sig == kSigIllegalInstruction) || (sig == kSigSegmentationViolation) || (sig == kSigBus))
{

full_cerr_write("\n\nA fatal system signal has occurred: ");
full_cerr_write(signalname);
full_cerr_write("\nThe following is the call stack containing the origin of the signal.\n"
"NOTE:The first few functions on the stack are artifacts of processing the signal and can be ignored\n\n");

TUnixSystem::stacktraceFromThread();

signal(sig, SIG_DFL);
raise(sig);
}

// check a-synchronous signals
if (fSigcnt > 0 && fSignalHandler->GetSize() > 0)
CheckSignals(kFALSE);
Expand Down Expand Up @@ -5148,3 +5222,154 @@ int TUnixSystem::GetProcInfo(ProcInfo_t *info) const

return 0;
}

static void stacktrace_fork();

void TUnixSystem::stacktraceHelperThread()
{
int toParent = childToParent_[1];
int fromParent = parentToChild_[0];
char buf[2]; buf[1] = '\0';
while(true)
{
int result = full_read(fromParent, buf, 1);
if (result < 0)
{
close(toParent);
full_cerr_write("\n\nTraceback helper thread failed to read from parent: ");
full_cerr_write(strerror(-result));
full_cerr_write("\n");
::abort();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reset signal handler here to SIG_DFL. Otherwise, ::abort() will deadlock.

}
if (buf[0] == '1')
{
stacktrace_fork();
full_write(toParent, buf);
}
else if (buf[0] == '2')
{
close(toParent);
close(fromParent);
toParent = childToParent_[1];
fromParent = parentToChild_[0];
}
else if (buf[0] == '3')
{
break;
}
else
{
close(toParent);
full_cerr_write("\n\nTraceback helper thread got unknown command from parent: ");
full_cerr_write(buf);
full_cerr_write("\n");
::abort();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above - reset signal handler.

}
}
}

void TUnixSystem::stacktraceFromThread()
{
int result = full_write(parentToChild_[1], "1");
if (result < 0)
{
full_cerr_write("\n\nAttempt to request stacktrace failed: ");
full_cerr_write(strerror(-result));
full_cerr_write("\n");
return;
}
char buf[2]; buf[1] = '\0';
if ((result = full_read(childToParent_[0], buf, 1)) < 0)
{
full_cerr_write("\n\nWaiting for stacktrace completion failed: ");
full_cerr_write(strerror(-result));
full_cerr_write("\n");
return;
}

}

void stacktrace_fork()
{
char child_stack[4*1024];
char *child_stack_ptr = child_stack + 4*1024;
int pid =
#ifdef __linux__
clone(global_stacktrace, child_stack_ptr, CLONE_VM|CLONE_FS|SIGCHLD, nullptr);
#else
fork();
if (child_stack_ptr) {} // Suppress 'unused variable' warning on non-Linux
if (pid == 0) {global_stacktrace(nullptr); ::abort();}
#endif
if (pid == -1)
{
full_cerr_write("(Attempt to perform stack dump failed.)\n");
}
else
{
int status;
if (waitpid(pid, &status, 0) == -1)
{
full_cerr_write("(Failed to wait on stack dump output.)\n");
} else {}
}
}

int global_stacktrace(void * /*arg*/)
{
char *const *argv = TUnixSystem::getPstackArgv();
execv("/bin/sh", argv);
::abort();
return 1;
}

char *const *TUnixSystem::getPstackArgv() {
return pstackArgv_;
}

void TUnixSystem::cachePidInfo()
{
#ifdef ROOTETCDIR
if(sprintf(pidString_, "%s/gdb-backtrace.sh", ROOTETCDIR) >= pidStringLength_)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you mean to use snprintf here?

Regardless, I think it may be better to switch to std::string and std::stringstream to form this.

{
full_cerr_write("Unable to pre-allocate executable information");
exit(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not call exit() from a library - instead, make it so this function can fail as necessary.

}
#else
if(sprintf(pidString_, "%s/etc/gdb-backtrace.sh", Getenv("ROOTSYS")) >= pidStringLength_)
{
full_cerr_write("Unable to pre-allocate executable information");
exit(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use consistent indentation.

}
#endif

if(sprintf(pidNum, "%d", getpid()) >= pidStringLength_)
{
full_cerr_write("Unable to pre-allocate process id information");
exit(1);
}

close(childToParent_[0]);
close(childToParent_[1]);
childToParent_[0] = -1; childToParent_[1] = -1;
close(parentToChild_[0]);
close(parentToChild_[1]);
parentToChild_[0] = -1; parentToChild_[1] = -1;

if (-1 == pipe2(childToParent_, O_CLOEXEC))
{
fprintf(stdout, "pipe childToParent failed\n");
exit(1);
}

if (-1 == pipe2(parentToChild_, O_CLOEXEC))
{
close(childToParent_[0]); close(childToParent_[1]);
childToParent_[0] = -1; childToParent_[1] = -1;
fprintf(stdout, "pipe parentToChild failed\n");
exit(1);
}

helperThread_.reset(new std::thread(stacktraceHelperThread));
helperThread_->detach();
}