Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support tmpfs and a server queue for C++ compilers #10

Merged
merged 4 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
RELEASE = v1.1.2
RELEASE = v1.2
BUILD_COMMIT := $(shell git rev-parse --short HEAD)
DATE := $(shell date -u '+%F %X UTC')
VERSION := ${RELEASE}, rev ${BUILD_COMMIT}, compiled at ${DATE}
Expand Down
9 changes: 6 additions & 3 deletions cmd/nocc-daemon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,20 @@ func main() {
}

if *checkServersAndExit {
if len(remoteNoccHosts) == 0 {
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
}
if len(os.Args) == 3 { // nocc -check-servers {remoteHostPort}
remoteNoccHosts = []string{os.Args[2]}
}
if len(remoteNoccHosts) == 0 {
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
}
client.RequestRemoteStatus(remoteNoccHosts)
os.Exit(0)
}

if *dumpServerLogsAndExit {
if len(os.Args) == 3 { // nocc -dump-server-logs {remoteHostPort}
remoteNoccHosts = []string{os.Args[2]}
}
if len(remoteNoccHosts) == 0 {
failedStart("no remote hosts set; you should set NOCC_SERVERS or NOCC_SERVERS_FILENAME")
}
Expand Down
55 changes: 30 additions & 25 deletions cmd/nocc-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"fmt"
"net"
"os"
"path"
"runtime"
"time"

"github.com/VKCOM/nocc/internal/common"
Expand All @@ -18,24 +18,29 @@ func failedStart(message string, err error) {
os.Exit(1)
}

// cleanupWorkingDir ensures that workingDir exists and is empty
// prepareEmptyDir ensures that serverDir exists and is empty
// it's executed on server launch
// as a consequence, all file caches are lost on restart
func cleanupWorkingDir(workingDir string) error {
oldWorkingDir := workingDir + ".old"

if err := os.RemoveAll(oldWorkingDir); err != nil {
failedStart("can't remove old working dir", err)
}
if _, err := os.Stat(workingDir); err == nil {
if err := os.Rename(workingDir, oldWorkingDir); err != nil {
failedStart("can't rename working dir %s to .old", err)
func prepareEmptyDir(parentDir *string, subdir string) string {
// if /tmp/nocc/cpp/src-cache already exists, it means, that it contains files from a previous launch
// to start up as quickly as possible, do the following:
// 1) rename it to /tmp/nocc/cpp/src-cache.old
// 2) clear it recursively in the background
serverDir := *parentDir + "/" + subdir
if _, err := os.Stat(serverDir); err == nil {
oldDirRenamed := fmt.Sprintf("%s.old.%d", serverDir, time.Now().Unix())
if err := os.Rename(serverDir, oldDirRenamed); err != nil {
failedStart("can't rename "+serverDir, err)
}
go func() {
_ = os.RemoveAll(oldDirRenamed)
}()
}
if err := os.MkdirAll(workingDir, os.ModePerm); err != nil {
return err

if err := os.MkdirAll(serverDir, os.ModePerm); err != nil {
failedStart("can't create "+serverDir, err)
}
return nil
return serverDir
}

// printDockerContainerIP is a dev/debug function called only when build special for local Docker, for local testing.
Expand All @@ -58,8 +63,10 @@ func main() {
"host", "")
listenPort := common.CmdEnvInt("Listening port, default 43210.", 43210,
"port", "")
workingDir := common.CmdEnvString("Directory for saving incoming files, default /tmp/nocc-server.", "/tmp/nocc-server",
"working-dir", "")
cppStoreDir := common.CmdEnvString("Directory for incoming C++ files and src cache, default /tmp/nocc/cpp.\nIt can be placed in tmpfs to speed up compilation", "/tmp/nocc/cpp",
"cpp-dir", "")
objStoreDir := common.CmdEnvString("Directory for resulting obj files and obj cache, default /tmp/nocc/obj.", "/tmp/nocc/obj",
"obj-dir", "")
logFileName := common.CmdEnvString("A filename to log, by default use stderr.", "",
"log-filename", "")
logVerbosity := common.CmdEnvInt("Logger verbosity level for INFO (-1 off, default 0, max 2).\nErrors are logged always.", 0,
Expand All @@ -70,6 +77,8 @@ func main() {
"obj-cache-limit", "")
statsdHostPort := common.CmdEnvString("Statsd udp address (host:port), omitted by default.\nIf omitted, stats won't be written.", "",
"statsd", "")
maxParallelCxx := common.CmdEnvInt("Max amount of C++ compiler processes launched in parallel, other ready sessions are waiting in a queue.\nBy default, it's a number of CPUs on the current machine.", int64(runtime.NumCPU()),
"max-parallel-cxx", "")

common.ParseCmdFlagsCombiningWithEnv()

Expand All @@ -78,10 +87,6 @@ func main() {
os.Exit(0)
}

if err = cleanupWorkingDir(*workingDir); err != nil {
failedStart("Can't create working directory "+*workingDir, err)
}

if err = server.MakeLoggerServer(*logFileName, *logVerbosity); err != nil {
failedStart("Can't init logger", err)
}
Expand All @@ -95,12 +100,12 @@ func main() {
failedStart("Failed to connect to statsd", err)
}

s.ActiveClients, err = server.MakeClientsStorage(path.Join(*workingDir, "clients"))
s.ActiveClients, err = server.MakeClientsStorage(prepareEmptyDir(cppStoreDir, "clients"))
if err != nil {
failedStart("Failed to init clients hashtable", err)
}

s.CxxLauncher, err = server.MakeCxxLauncher()
s.CxxLauncher, err = server.MakeCxxLauncher(*maxParallelCxx)
if err != nil {
failedStart("Failed to init cxx launcher", err)
}
Expand All @@ -110,17 +115,17 @@ func main() {
failedStart("Failed to init system headers hashtable", err)
}

s.SrcFileCache, err = server.MakeSrcFileCache(path.Join(*workingDir, "src-cache"), *srcCacheLimit)
s.SrcFileCache, err = server.MakeSrcFileCache(prepareEmptyDir(cppStoreDir, "src-cache"), *srcCacheLimit)
if err != nil {
failedStart("Failed to init src file cache", err)
}

s.ObjFileCache, err = server.MakeObjFileCache(path.Join(*workingDir, "obj-cache"), *objCacheLimit)
s.ObjFileCache, err = server.MakeObjFileCache(prepareEmptyDir(objStoreDir, "obj-cache"), prepareEmptyDir(objStoreDir, "cxx-out"), *objCacheLimit)
if err != nil {
failedStart("Failed to init obj file cache", err)
}

s.PchCompilation, err = server.MakePchCompilation(path.Join(*workingDir, "pch"))
s.PchCompilation, err = server.MakePchCompilation(prepareEmptyDir(cppStoreDir, "pch"))
if err != nil {
failedStart("Failed to init pch compilation", err)
}
Expand Down
16 changes: 1 addition & 15 deletions cmd/nocc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ char *format_time_to_log() {
static char time_buf[64];
time_t ts = time(nullptr);
tm *now = localtime(&ts);
sprintf(time_buf, "%d/%02d/%02d %02d:%02d:%02d", 1900 + now->tm_year, 1 + now->tm_mon, now->tm_mday, now->tm_hour, now->tm_min, now->tm_sec);
sprintf(time_buf, "%d-%02d-%02d %02d:%02d:%02d", 1900 + now->tm_year, 1 + now->tm_mon, now->tm_mday, now->tm_hour, now->tm_min, now->tm_sec);
return time_buf;
}

Expand Down Expand Up @@ -97,13 +97,6 @@ void __attribute__((noreturn)) execute_cxx_locally(const char *errToPrint, int e
exit(1);
}

void __attribute__((noreturn)) execute_distcc_locally() {
ARGV[0] = strdup("distcc");
execvp("distcc", ARGV + 0);
printf("could not run `distcc`, exit(1)\n");
exit(1);
}

void __attribute__((noreturn)) execute_go_nocc_instead_of_cpp() {
execv(NOCC_GO_EXECUTABLE, ARGV);
printf("could not run %s, exit(1)\n", NOCC_GO_EXECUTABLE);
Expand Down Expand Up @@ -279,13 +272,6 @@ int main(int argc, char *argv[]) {
exit(1);
}

// this possible fallback will be available for some time just in case
char *env_fallback_to_distcc = getenv("NOCC_FALLBACK_TO_DISTCC");
bool fallback_to_distcc = env_fallback_to_distcc != nullptr && env_fallback_to_distcc[0] == '1';
if (fallback_to_distcc) {
execute_distcc_locally();
}

if (ARGC == 2 && !strcmp(ARGV[1], "start")) {
int sockfd = connect_to_go_daemon_or_start_a_new_one();
exit(sockfd == -1 ? 1 : 0);
Expand Down
42 changes: 32 additions & 10 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,18 @@ When you launch lots of jobs like `make -j 600`, then `nocc-daemon` has to maint
All configuration on a server-side is done using command-line arguments.
For a server, they are more reliable than environment variables.

| Cmd argument | Description |
|--------------------------|-----------------------------------------------------------------------------------------|
| `-host {string}` | Binding address, default 0.0.0.0. |
| `-port {int}` | Listening port, default 43210. |
| `-working-dir {string}` | Directory for saving incoming files, default */tmp/nocc-server*. |
| `-log-filename {string}` | A filename to log, by default use stderr. |
| `-log-verbosity {int}` | Logger verbosity level for INFO (-1 off, default 0, max 2). Errors are logged always. |
| `-src-cache-limit {int}` | Header and source cache limit, in bytes, default 4G. |
| `-obj-cache-limit {int}` | Compiled obj cache limit, in bytes, default 16G. |
| `-statsd {string}` | Statsd udp address (host:port), omitted by default. If omitted, stats won't be written. |
| Cmd argument | Description |
|---------------------------|-----------------------------------------------------------------------------------------|
| `-host {string}` | Binding address, default 0.0.0.0. |
| `-port {int}` | Listening port, default 43210. |
| `-cpp-dir {string}` | Directory for incoming C++ files and src cache, default */tmp/nocc/cpp*. |
| `-obj-dir {string}` | Directory for resulting obj files and obj cache, default */tmp/nocc/obj*. |
| `-log-filename {string}` | A filename to log, by default use stderr. |
| `-log-verbosity {int}` | Logger verbosity level for INFO (-1 off, default 0, max 2). Errors are logged always. |
| `-src-cache-limit {int}` | Header and source cache limit, in bytes, default 4G. |
| `-obj-cache-limit {int}` | Compiled obj cache limit, in bytes, default 16G. |
| `-statsd {string}` | Statsd udp address (host:port), omitted by default. If omitted, stats won't be written. |
| `-max-parallel-cxx {int}` | Max amount of C++ compiler processes launched in parallel, default *nCPU*. |

All file caches are lost on restart, as references to files are kept in memory.
There is also an LRU expiration mechanism to fit cache limits.
Expand Down Expand Up @@ -75,6 +77,26 @@ A list of all written stats could be obtained [inside statsd.go](../internal/ser
They are quite intuitive, that's why we don't duplicate them here.


<p><br></p>

## Configuring nocc + tmpfs

The directory passed as `-cpp-dir` can be placed in **tmpfs**.
All operations with cpp files are performed in that directory:
* incoming files (h/cpp/etc.) are saved there mirroring client's file structure;
* src-cache is placed there;
* pch files are placed there;
* tmp files for preventing race conditions are also there, not in sys tmp dir.

So, if that directory is placed in tmpfs, the C++ compiler will take all files from memory (except for system headers),
which noticeably speeds up compilation.

When setting up limits to tmpfs in a system, ensure that it will fit `-src-cache-limit` plus some extra space.

Note, that placing `-obj-dir` in tmpfs is not recommended, because obj files are usually much heavier,
and they are just transparently streamed back from a hard disk in chunks.


<p><br></p>

## Other commands from a client
Expand Down
4 changes: 2 additions & 2 deletions internal/client/compile-remotely.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func CompileCppRemotely(daemon *Daemon, cwd string, invocation *Invocation, remo
return 0, nil, nil, err
}

logClient.Info(1, "remote", remote.remoteHostPort, "sessionID", invocation.sessionID, "waiting", len(fileIndexesToUpload), "uploads", invocation.cppInFile)
logClient.Info(1, "remote", remote.remoteHost, "sessionID", invocation.sessionID, "waiting", len(fileIndexesToUpload), "uploads", invocation.cppInFile)
logClient.Info(2, "checked", len(requiredFiles), "files whether upload is needed or they exist on remote")
invocation.summary.AddTiming("remote_session")

Expand All @@ -75,7 +75,7 @@ func CompileCppRemotely(daemon *Daemon, cwd string, invocation *Invocation, remo

// Now, we have a resulting .o file placed in a path determined by -o from command line.
if exitCode != 0 {
logClient.Info(0, "remote C++ compiler exited with code", exitCode, "sessionID", invocation.sessionID, invocation.cppInFile, remote.remoteHostPort)
logClient.Info(0, "remote C++ compiler exited with code", exitCode, "sessionID", invocation.sessionID, invocation.cppInFile, remote.remoteHost)
logClient.Info(1, "cxxExitCode:", exitCode, "sessionID", invocation.sessionID, "\ncxxStdout:", strings.TrimSpace(string(invocation.cxxStdout)), "\ncxxStderr:", strings.TrimSpace(string(invocation.cxxStderr)))
} else {
logClient.Info(2, "saved obj file to", invocation.objOutFile)
Expand Down
55 changes: 40 additions & 15 deletions internal/client/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
)

const (
timeoutForceInterruptInvocation = 5 * time.Minute
timeoutForceInterruptInvocation = 8 * time.Minute
)

// Daemon is created once, in a separate process `nocc-daemon`, which is listening for connections via unix socket.
Expand All @@ -37,6 +37,7 @@ type Daemon struct {

listener *DaemonUnixSockListener
remoteConnections []*RemoteConnection
allRemotesDelim string
localCxxThrottle chan struct{}

disableObjCache bool
Expand Down Expand Up @@ -77,31 +78,55 @@ func detectHostUserName() string {
return curUser.Username
}

func MakeDaemon(remoteNoccHosts []string, disableObjCache bool, disableOwnIncludes bool, localCxxQueueSize int64) (*Daemon, error) {
func MakeDaemon(remoteNoccHosts []string, disableObjCache bool, disableOwnIncludes bool, maxLocalCxxProcesses int64) (*Daemon, error) {
// send env NOCC_SERVERS on connect everywhere
// this is for debugging purpose: in production, all clients should have the same servers list
// to ensure this, just grep server logs: only one unique string should appear
allRemotesDelim := ""
for _, remoteHostPort := range remoteNoccHosts {
if allRemotesDelim != "" {
allRemotesDelim += ","
}
allRemotesDelim += ExtractRemoteHostWithoutPort(remoteHostPort)
}

// env NOCC_SERVERS and others are supposed to be the same between `nocc` invocations
// (in practice, this is true, as the first `nocc` invocation has no precedence over any other in a bunch)
daemon := &Daemon{
startTime: time.Now(),
quitChan: make(chan int),
clientID: detectClientID(),
hostUserName: detectHostUserName(),
remoteConnections: make([]*RemoteConnection, 0, len(remoteNoccHosts)),
localCxxThrottle: make(chan struct{}, localCxxQueueSize),
remoteConnections: make([]*RemoteConnection, len(remoteNoccHosts)),
allRemotesDelim: allRemotesDelim,
localCxxThrottle: make(chan struct{}, maxLocalCxxProcesses),
disableOwnIncludes: disableOwnIncludes,
disableObjCache: disableObjCache,
disableLocalCxx: localCxxQueueSize == 0,
disableLocalCxx: maxLocalCxxProcesses == 0,
activeInvocations: make(map[uint32]*Invocation, 300),
includesCache: make(map[string]*IncludesCache, 1),
}

for _, remoteHostPort := range remoteNoccHosts {
remote, err := MakeRemoteConnection(daemon, remoteHostPort, 1, 1)
if err != nil {
remote.isUnavailable = true
logClient.Error("error connecting to", remoteHostPort, err)
}
daemon.remoteConnections = append(daemon.remoteConnections, remote)
// connect to all remotes in parallel
wg := sync.WaitGroup{}
wg.Add(len(remoteNoccHosts))

ctxConnect, cancelFunc := context.WithTimeout(context.Background(), 5000*time.Millisecond)
defer cancelFunc()

for index, remoteHostPort := range remoteNoccHosts {
go func(index int, remoteHostPort string) {
remote, err := MakeRemoteConnection(daemon, remoteHostPort, ctxConnect)
if err != nil {
remote.isUnavailable = true
logClient.Error("error connecting to", remoteHostPort, err)
}

daemon.remoteConnections[index] = remote
wg.Done()
}(index, remoteHostPort)
}
wg.Wait()

return daemon, nil
}
Expand Down Expand Up @@ -202,10 +227,10 @@ func (daemon *Daemon) HandleInvocation(req DaemonSockRequest) DaemonSockResponse
}

remote := daemon.chooseRemoteConnectionForCppCompilation(invocation.cppInFile)
invocation.summary.remoteHostPort = remote.remoteHostPort
invocation.summary.remoteHost = remote.remoteHost

if remote.isUnavailable {
return daemon.FallbackToLocalCxx(req, fmt.Errorf("remote %s is unavailable", remote.remoteHostPort))
return daemon.FallbackToLocalCxx(req, fmt.Errorf("remote %s is unavailable", remote.remoteHost))
}

daemon.mu.Lock()
Expand Down Expand Up @@ -292,7 +317,7 @@ func (daemon *Daemon) PeriodicallyInterruptHangedInvocations() {
daemon.mu.Lock()
for _, invocation := range daemon.activeInvocations {
if time.Since(invocation.createTime) > timeoutForceInterruptInvocation {
invocation.ForceInterrupt(fmt.Errorf("interrupt sessionID %d after %d sec timeout", invocation.sessionID, int(time.Since(invocation.createTime).Seconds())))
invocation.ForceInterrupt(fmt.Errorf("interrupt sessionID %d (%s) after %d sec timeout", invocation.sessionID, invocation.summary.remoteHost, int(time.Since(invocation.createTime).Seconds())))
}
}
daemon.mu.Unlock()
Expand Down
2 changes: 1 addition & 1 deletion internal/client/files-receiving.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func receiveObjFileByChunks(stream pb.CompilationService_RecvCompiledObjStreamCl
return errWrite, false
}

fileTmp, errWrite := common.OpenTempFile(objOutFile, false)
fileTmp, errWrite := common.OpenTempFile(objOutFile)
if errWrite == nil {
_, errWrite = fileTmp.Write(firstChunk.ChunkBody)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/client/invocation-summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type invocationTimingItem struct {
// It's mostly for developing/debugging purposes: multiple nocc invocations are appended to a single log file,
// from which we can compute statistics, average and percentiles, either in total or partitioned by hosts.
type InvocationSummary struct {
remoteHostPort string
remoteHost string

nIncludes int
nFilesSent int
Expand All @@ -44,7 +44,7 @@ func (s *InvocationSummary) ToLogString(invocation *Invocation) string {

b := strings.Builder{}
fmt.Fprintf(&b, "cppInFile=%q, remote=%s, sessionID=%d, nIncludes=%d, nFilesSent=%d, nBytesSent=%d, nBytesReceived=%d, cxxDuration=%dms",
invocation.cppInFile, s.remoteHostPort, invocation.sessionID, s.nIncludes, s.nFilesSent, s.nBytesSent, s.nBytesReceived, invocation.cxxDuration)
invocation.cppInFile, s.remoteHost, invocation.sessionID, s.nIncludes, s.nFilesSent, s.nBytesSent, s.nBytesReceived, invocation.cxxDuration)

prevTime := invocation.createTime
fmt.Fprintf(&b, ", started=0ms")
Expand Down
Loading