Skip to content

Commit

Permalink
add process collector
Browse files Browse the repository at this point in the history
  • Loading branch information
RoadRunnr committed Dec 4, 2024
1 parent 9d381d9 commit 9ffd4d0
Show file tree
Hide file tree
Showing 14 changed files with 965 additions and 0 deletions.
21 changes: 21 additions & 0 deletions instrumentation/opentelemetry_process/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2016, Ilya Khaprov <[email protected]>.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
30 changes: 30 additions & 0 deletions instrumentation/opentelemetry_process/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# OpenTelemetry Process Instrumentation

NIF code is based on https://github.com/deadtrickster/prometheus_process_collector

OpenTelemtry instrumentation with metrics of the current state of cpu, memory, file descriptor usage and native threads count as well as the process start and up times. Implements all process metrics from OpenTelemetry's semantic convertions and some more.

- FreeBSD;
- Linux - uses /proc;
- MacOS X (expiremental).

After installing, setup the desired metrics in your application behaviour before your
top-level supervisor starts. Make sure the API and SDK applications are started before
your application.

```erlang
opentelemetry_process_metrics:setup(),
...
```

Build
-----

$ rebar3 compile

License
-----

FreeBSD-specific part uses copy-modified code from standard utils (limits and procstat) or standard API in some places.

MIT
80 changes: 80 additions & 0 deletions instrumentation/opentelemetry_process/c_src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Based on c_src.mk from erlang.mk by Loic Hoguin <[email protected]>

CURDIR := $(shell pwd)
BASEDIR := $(abspath $(CURDIR)/..)

PROJECT ?= $(notdir $(BASEDIR))
PROJECT := $(strip $(PROJECT))

ERTS_INCLUDE_DIR ?= $(shell erl -noshell -eval "io:format(\"~s/erts-~s/include/\", [code:root_dir(), erlang:system_info(version)])." -s erlang halt)
ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -eval "io:format(\"~s\", [code:lib_dir(erl_interface, include)])." -s erlang halt)
ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -eval "io:format(\"~s\", [code:lib_dir(erl_interface, lib)])." -s erlang halt)

C_SRC_DIR = $(CURDIR)
C_SRC_OUTPUT ?= $(CURDIR)/../priv/$(PROJECT).so
MEMTEST_OUTPUT ?= $(CURDIR)/../_build/memory_test

.PHONY: memory-test

# System type and C compiler/flags.

UNAME_SYS := $(shell uname -s)
ifeq ($(UNAME_SYS), Darwin)
CXX ?= c++
CXXFLAGS += -O3 -finline-functions
LDFLAGS += -flat_namespace -undefined suppress
PSOURCES = otel_process_info_macos.cc
else ifeq ($(UNAME_SYS), FreeBSD)
CXX ?= c++
CXXEFLAGS += -D__STANDALONE_TEST__ -std=c++11 -Wall
CXXFLAGS += -O3 -finline-functions
PSOURCES = otel_process_info_freebsd.cc
else ifeq ($(UNAME_SYS), Linux)
CXX ?= g++
CXXEFLAGS += -D__STANDALONE_TEST__ -std=c++11 -Wall
CXXFLAGS += -O3 -finline-functions
PSOURCES = otel_process_info_linux.cc
endif

SOURCES = otel_process_metrics_nif.cc $(PSOURCES)

CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) -std=c++11 -Wall
LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lei

ifeq ($(UNAME_SYS), OpenBSD)
LDLIBS += -lestdc++
else
LDLIBS += -lstdc++
endif

LDFLAGS += -shared

# Verbosity.

cpp_verbose_0 = @echo " CPP " $(?F);
cpp_verbose = $(cpp_verbose_$(V))

link_verbose_0 = @echo " LD " $(@F);
link_verbose = $(link_verbose_$(V))

OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))

COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c

$(C_SRC_OUTPUT): $(OBJECTS)
@mkdir -p $(BASEDIR)/priv/
$(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT)

%.o: %.cc
$(COMPILE_CPP) $(OUTPUT_OPTION) $<

%.o: %.cpp
$(COMPILE_CPP) $(OUTPUT_OPTION) $<

memory-test:
@mkdir -p $(BASEDIR)/_build/
$(CXX) $(CXXEFLAGS) $(LDLIBS) otel_process_info_mt.cc $(PSOURCES) -o $(MEMTEST_OUTPUT)
valgrind --leak-check=full --error-exitcode=1 $(MEMTEST_OUTPUT) 2

clean:
@rm -f $(C_SRC_OUTPUT) $(OBJECTS)
12 changes: 12 additions & 0 deletions instrumentation/opentelemetry_process/c_src/otel_exceptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <stdexcept>

namespace Prometheus
{
class ProcessInfoException : public std::runtime_error
{
public:
ProcessInfoException() : std::runtime_error("ProcessInfoException") {}
};
}
105 changes: 105 additions & 0 deletions instrumentation/opentelemetry_process/c_src/otel_process_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#pragma once

#include <cstdint>
#include <cstdio>
#include <ctime>
#include <unistd.h>

#if defined(__FreeBSD__) || defined(__APPLE__)
#include <sys/sysctl.h>
#include <sys/user.h>
#endif

#ifdef __linux__
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <sys/resource.h>
#include <time.h>
#endif

#ifdef __APPLE__
#include <libproc.h>
#include <sys/proc_info.h>
#endif

#include "otel_exceptions.h"

#define UNUSED(x) (void)(x)

namespace Prometheus
{
class ProcessInfo
{
private:
static struct rlimit get_process_limit(int resource)
{
struct rlimit rlp;
if (getrlimit(resource, &rlp))
{
throw ProcessInfoException();
}

return rlp;
};

void set_rusage()
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
utime_seconds = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_usec / 1000000.00;
stime_seconds = rusage.ru_stime.tv_sec + rusage.ru_stime.tv_usec / 1000000.00;
max_rm_bytes = rusage.ru_maxrss * 1024;
noio_pagefaults_total = rusage.ru_minflt;
io_pagefaults_total = rusage.ru_majflt;
swaps_total = rusage.ru_nswap;
disk_reads_total = rusage.ru_inblock;
disk_writes_total = rusage.ru_oublock;
signals_delivered_total = rusage.ru_nsignals;
voluntary_context_switches_total = rusage.ru_nvcsw;
involuntary_context_switches_total = rusage.ru_nivcsw;
};

void set_fds_limit()
{
const auto &fds_rlimit = get_process_limit(RLIMIT_NOFILE);
fds_limit = fds_rlimit.rlim_cur;
};

int get_fds_total();
void set_proc_stat();
public:
pid_t pid;
time_t now;
int fds_total;
uintmax_t fds_limit;
uintmax_t start_time_seconds;
long uptime_seconds;
int threads_total;
unsigned long vm_bytes;
unsigned long rm_bytes;
double utime_seconds;
double stime_seconds;
long max_rm_bytes;
long noio_pagefaults_total;
long io_pagefaults_total;
long swaps_total;
long disk_reads_total;
long disk_writes_total;
long signals_delivered_total;
long voluntary_context_switches_total;
long involuntary_context_switches_total;

ProcessInfo()
{
pid = getpid();

fds_total = get_fds_total();
set_fds_limit();
set_rusage();
std::time(&now);

set_proc_stat();
}
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "otel_process_info.h"
#include <cstdio>
#include <ctime>
#include <memory>

namespace Prometheus
{
static long pagesize(void)
{
uint pageSize;
size_t len = sizeof(pageSize);
if (sysctlbyname("vm.stats.vm.v_page_size", &pageSize, &len, NULL, 0) == -1)
{
long spageSize = sysconf(_SC_PAGESIZE);
if (spageSize == -1)
{
throw ProcessInfoException();
}
else
{
return spageSize;
}
}
else
{
return pageSize;
}
}

static std::unique_ptr<kinfo_proc> kinfo_getproc(pid_t pid)
{
int mib[4];
size_t len;

len = 0;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = pid;
if (sysctl(mib, nitems(mib), NULL, &len, NULL, 0) < 0)
{
throw ProcessInfoException();
}
std::unique_ptr<kinfo_proc>
kipp {reinterpret_cast<kinfo_proc*>(new char[len])};

if (sysctl(mib, 4, kipp.get(), &len, NULL, 0) < 0)
{
throw ProcessInfoException();
}
if (len != sizeof(*kipp))
{
throw ProcessInfoException();
}
if (kipp->ki_structsize != sizeof(*kipp))
{
throw ProcessInfoException();
}
if (kipp->ki_pid != pid)
{
throw ProcessInfoException();
}

return kipp;
}

/*
from https://github.com/freebsd/freebsd/blob/9e0a154b0fd5fa9010238ac9497ec59f84167c92/lib/libutil/kinfo_getfile.c#L22-L51
I don't need unpacked structs here, just count. Hope it won't break someday.
*/
int ProcessInfo::get_fds_total()
{
int mib[4];
int error;
int count;
size_t len;
char *buf, *eb;
struct kinfo_file *kf;

// get size of all pids
len = 0;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_FILEDESC;
mib[3] = pid;

error = sysctl(mib, nitems(mib), NULL, &len, NULL, 0);
if (error)
{
throw ProcessInfoException();
}

// allocate buf for pids
len = len * 4 / 3;
buf = (char*)malloc(len);
if (buf == NULL)
{
throw ProcessInfoException();
}

// fill buf with kinfo_files
error = sysctl(mib, nitems(mib), buf, &len, NULL, 0);
if (error)
{
free(buf);
throw ProcessInfoException();
}

// count structs in the buf
count = 0;
eb = buf + len;
while (buf < eb)
{
kf = (struct kinfo_file *)(uintptr_t)buf;
if (kf->kf_structsize == 0)
break;
buf += kf->kf_structsize;
count++;
}

free(buf - len);
return count;
}

void ProcessInfo::set_proc_stat()
{
auto proc = kinfo_getproc(pid);

start_time_seconds = proc->ki_start.tv_sec;
uptime_seconds = now - proc->ki_start.tv_sec;
threads_total = proc->ki_numthreads;
vm_bytes = proc->ki_size;
rm_bytes = proc->ki_rssize * pagesize();
}
}
Loading

0 comments on commit 9ffd4d0

Please sign in to comment.