diff --git a/instrumentation/opentelemetry_process/LICENSE b/instrumentation/opentelemetry_process/LICENSE new file mode 100644 index 00000000..ee08b93e --- /dev/null +++ b/instrumentation/opentelemetry_process/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016, Ilya Khaprov . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/instrumentation/opentelemetry_process/README.md b/instrumentation/opentelemetry_process/README.md new file mode 100644 index 00000000..f1773c1c --- /dev/null +++ b/instrumentation/opentelemetry_process/README.md @@ -0,0 +1,30 @@ +# OpenTelemetry Process Instrumentation + +NIF code is based on https://github.com/deadtrickster/prometheus_process_collector + +OpenTelemtry instrumentation with metrics of the current state of cpu, memory, file descriptor usage and native threads count as well as the process start and up times. Implements all process metrics from OpenTelemetry's semantic convertions and some more. + +- FreeBSD; +- Linux - uses /proc; +- MacOS X (expiremental). + +After installing, setup the desired metrics in your application behaviour before your +top-level supervisor starts. Make sure the API and SDK applications are started before +your application. + +```erlang +opentelemetry_process_metrics:setup(), +... +``` + +Build +----- + + $ rebar3 compile + +License +----- + +FreeBSD-specific part uses copy-modified code from standard utils (limits and procstat) or standard API in some places. + +MIT diff --git a/instrumentation/opentelemetry_process/c_src/Makefile b/instrumentation/opentelemetry_process/c_src/Makefile new file mode 100644 index 00000000..d70cacff --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/Makefile @@ -0,0 +1,80 @@ +# Based on c_src.mk from erlang.mk by Loic Hoguin + +CURDIR := $(shell pwd) +BASEDIR := $(abspath $(CURDIR)/..) + +PROJECT ?= $(notdir $(BASEDIR)) +PROJECT := $(strip $(PROJECT)) + +ERTS_INCLUDE_DIR ?= $(shell erl -noshell -eval "io:format(\"~s/erts-~s/include/\", [code:root_dir(), erlang:system_info(version)])." -s erlang halt) +ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -eval "io:format(\"~s\", [code:lib_dir(erl_interface, include)])." -s erlang halt) +ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -eval "io:format(\"~s\", [code:lib_dir(erl_interface, lib)])." -s erlang halt) + +C_SRC_DIR = $(CURDIR) +C_SRC_OUTPUT ?= $(CURDIR)/../priv/$(PROJECT).so +MEMTEST_OUTPUT ?= $(CURDIR)/../_build/memory_test + +.PHONY: memory-test + +# System type and C compiler/flags. + +UNAME_SYS := $(shell uname -s) +ifeq ($(UNAME_SYS), Darwin) + CXX ?= c++ + CXXFLAGS += -O3 -finline-functions + LDFLAGS += -flat_namespace -undefined suppress + PSOURCES = otel_process_info_macos.cc +else ifeq ($(UNAME_SYS), FreeBSD) + CXX ?= c++ + CXXEFLAGS += -D__STANDALONE_TEST__ -std=c++11 -Wall + CXXFLAGS += -O3 -finline-functions + PSOURCES = otel_process_info_freebsd.cc +else ifeq ($(UNAME_SYS), Linux) + CXX ?= g++ + CXXEFLAGS += -D__STANDALONE_TEST__ -std=c++11 -Wall + CXXFLAGS += -O3 -finline-functions + PSOURCES = otel_process_info_linux.cc +endif + +SOURCES = otel_process_metrics_nif.cc $(PSOURCES) + +CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) -std=c++11 -Wall +LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lei + +ifeq ($(UNAME_SYS), OpenBSD) + LDLIBS += -lestdc++ +else + LDLIBS += -lstdc++ +endif + +LDFLAGS += -shared + +# Verbosity. + +cpp_verbose_0 = @echo " CPP " $(?F); +cpp_verbose = $(cpp_verbose_$(V)) + +link_verbose_0 = @echo " LD " $(@F); +link_verbose = $(link_verbose_$(V)) + +OBJECTS = $(addsuffix .o, $(basename $(SOURCES))) + +COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c + +$(C_SRC_OUTPUT): $(OBJECTS) + @mkdir -p $(BASEDIR)/priv/ + $(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT) + +%.o: %.cc + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.cpp + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +memory-test: + @mkdir -p $(BASEDIR)/_build/ + $(CXX) $(CXXEFLAGS) $(LDLIBS) otel_process_info_mt.cc $(PSOURCES) -o $(MEMTEST_OUTPUT) + valgrind --leak-check=full --error-exitcode=1 $(MEMTEST_OUTPUT) 2 + +clean: + @rm -f $(C_SRC_OUTPUT) $(OBJECTS) diff --git a/instrumentation/opentelemetry_process/c_src/otel_exceptions.h b/instrumentation/opentelemetry_process/c_src/otel_exceptions.h new file mode 100644 index 00000000..4578a851 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_exceptions.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace Prometheus +{ +class ProcessInfoException : public std::runtime_error +{ +public: + ProcessInfoException() : std::runtime_error("ProcessInfoException") {} +}; +} \ No newline at end of file diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_info.h b/instrumentation/opentelemetry_process/c_src/otel_process_info.h new file mode 100644 index 00000000..2f037777 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_info.h @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include +#include + +#if defined(__FreeBSD__) || defined(__APPLE__) +#include +#include +#endif + +#ifdef __linux__ +#include +#include +#include +#include +#include +#endif + +#ifdef __APPLE__ +#include +#include +#endif + +#include "otel_exceptions.h" + +#define UNUSED(x) (void)(x) + +namespace Prometheus +{ + class ProcessInfo + { + private: + static struct rlimit get_process_limit(int resource) + { + struct rlimit rlp; + if (getrlimit(resource, &rlp)) + { + throw ProcessInfoException(); + } + + return rlp; + }; + + void set_rusage() + { + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + utime_seconds = rusage.ru_utime.tv_sec + rusage.ru_utime.tv_usec / 1000000.00; + stime_seconds = rusage.ru_stime.tv_sec + rusage.ru_stime.tv_usec / 1000000.00; + max_rm_bytes = rusage.ru_maxrss * 1024; + noio_pagefaults_total = rusage.ru_minflt; + io_pagefaults_total = rusage.ru_majflt; + swaps_total = rusage.ru_nswap; + disk_reads_total = rusage.ru_inblock; + disk_writes_total = rusage.ru_oublock; + signals_delivered_total = rusage.ru_nsignals; + voluntary_context_switches_total = rusage.ru_nvcsw; + involuntary_context_switches_total = rusage.ru_nivcsw; + }; + + void set_fds_limit() + { + const auto &fds_rlimit = get_process_limit(RLIMIT_NOFILE); + fds_limit = fds_rlimit.rlim_cur; + }; + + int get_fds_total(); + void set_proc_stat(); + public: + pid_t pid; + time_t now; + int fds_total; + uintmax_t fds_limit; + uintmax_t start_time_seconds; + long uptime_seconds; + int threads_total; + unsigned long vm_bytes; + unsigned long rm_bytes; + double utime_seconds; + double stime_seconds; + long max_rm_bytes; + long noio_pagefaults_total; + long io_pagefaults_total; + long swaps_total; + long disk_reads_total; + long disk_writes_total; + long signals_delivered_total; + long voluntary_context_switches_total; + long involuntary_context_switches_total; + + ProcessInfo() + { + pid = getpid(); + + fds_total = get_fds_total(); + set_fds_limit(); + set_rusage(); + std::time(&now); + + set_proc_stat(); + } + }; +} diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_info_freebsd.cc b/instrumentation/opentelemetry_process/c_src/otel_process_info_freebsd.cc new file mode 100644 index 00000000..1d242bfd --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_info_freebsd.cc @@ -0,0 +1,135 @@ +#include "otel_process_info.h" +#include +#include +#include + +namespace Prometheus +{ + static long pagesize(void) + { + uint pageSize; + size_t len = sizeof(pageSize); + if (sysctlbyname("vm.stats.vm.v_page_size", &pageSize, &len, NULL, 0) == -1) + { + long spageSize = sysconf(_SC_PAGESIZE); + if (spageSize == -1) + { + throw ProcessInfoException(); + } + else + { + return spageSize; + } + } + else + { + return pageSize; + } + } + + static std::unique_ptr kinfo_getproc(pid_t pid) + { + int mib[4]; + size_t len; + + len = 0; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = pid; + if (sysctl(mib, nitems(mib), NULL, &len, NULL, 0) < 0) + { + throw ProcessInfoException(); + } + std::unique_ptr + kipp {reinterpret_cast(new char[len])}; + + if (sysctl(mib, 4, kipp.get(), &len, NULL, 0) < 0) + { + throw ProcessInfoException(); + } + if (len != sizeof(*kipp)) + { + throw ProcessInfoException(); + } + if (kipp->ki_structsize != sizeof(*kipp)) + { + throw ProcessInfoException(); + } + if (kipp->ki_pid != pid) + { + throw ProcessInfoException(); + } + + return kipp; + } + + /* + from https://github.com/freebsd/freebsd/blob/9e0a154b0fd5fa9010238ac9497ec59f84167c92/lib/libutil/kinfo_getfile.c#L22-L51 + I don't need unpacked structs here, just count. Hope it won't break someday. + */ + int ProcessInfo::get_fds_total() + { + int mib[4]; + int error; + int count; + size_t len; + char *buf, *eb; + struct kinfo_file *kf; + + // get size of all pids + len = 0; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_FILEDESC; + mib[3] = pid; + + error = sysctl(mib, nitems(mib), NULL, &len, NULL, 0); + if (error) + { + throw ProcessInfoException(); + } + + // allocate buf for pids + len = len * 4 / 3; + buf = (char*)malloc(len); + if (buf == NULL) + { + throw ProcessInfoException(); + } + + // fill buf with kinfo_files + error = sysctl(mib, nitems(mib), buf, &len, NULL, 0); + if (error) + { + free(buf); + throw ProcessInfoException(); + } + + // count structs in the buf + count = 0; + eb = buf + len; + while (buf < eb) + { + kf = (struct kinfo_file *)(uintptr_t)buf; + if (kf->kf_structsize == 0) + break; + buf += kf->kf_structsize; + count++; + } + + free(buf - len); + return count; + } + + void ProcessInfo::set_proc_stat() + { + auto proc = kinfo_getproc(pid); + + start_time_seconds = proc->ki_start.tv_sec; + uptime_seconds = now - proc->ki_start.tv_sec; + threads_total = proc->ki_numthreads; + vm_bytes = proc->ki_size; + rm_bytes = proc->ki_rssize * pagesize(); + } +} diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_info_linux.cc b/instrumentation/opentelemetry_process/c_src/otel_process_info_linux.cc new file mode 100644 index 00000000..1a00edc8 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_info_linux.cc @@ -0,0 +1,106 @@ +#include "otel_process_info.h" +#include +#include +#include +#include +#include +#include + +namespace Prometheus +{ + static long pagesize(void) + { + auto ret = sysconf(_SC_PAGESIZE); + if (ret == -1) + { + throw ProcessInfoException(); + } + else + { + return ret; + } + } + + static long clk_tck() + { + long result = sysconf(_SC_CLK_TCK); + + if (result == -1 || result == 0) + { + throw ProcessInfoException(); + } + + return result; + } + + static long system_boot_time() + { + std::ifstream file("/proc/stat"); + std::string line; + long boot_time; + + while (std::getline(file, line) && + !sscanf(line.c_str(), "btime %ld", &boot_time)) + { + } + + return boot_time; + } + + static std::vector read_proc_stat(pid_t pid) + { + static char stat_path[32]; + std::sprintf(stat_path, "/proc/%d/stat", pid); + std::ifstream file(stat_path); + std::string line; + std::getline(file, line); + std::istringstream ils(line); + return std::vector{std::istream_iterator{ils}, + std::istream_iterator{}}; + } + + static unsigned long get_process_stat(int index, const std::vector &stat) + { + return std::stoul(stat[index]); + } + + int ProcessInfo::get_fds_total() + { + static char fd_path[32]; + + std::sprintf(fd_path, "/proc/%d/fd", pid); + + DIR *dirp = opendir(fd_path); + if (dirp == NULL) + { + throw ProcessInfoException(); + } + + int file_total = 0; + struct dirent *entry; + while ((entry = readdir(dirp)) != NULL) + { + if (entry->d_type == DT_LNK) + { + file_total++; + } + } + + closedir(dirp); + return file_total; + } + + void ProcessInfo::set_proc_stat() + { + long ticks = clk_tck(); + long boot_time = system_boot_time(); + + const auto &stat = read_proc_stat(pid); + + start_time_seconds = get_process_stat(21, stat) / ticks + boot_time; + uptime_seconds = now - start_time_seconds; + threads_total = get_process_stat(19, stat); + vm_bytes = get_process_stat(22, stat); + rm_bytes = get_process_stat(23, stat) * pagesize(); + } +} diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_info_macos.cc b/instrumentation/opentelemetry_process/c_src/otel_process_info_macos.cc new file mode 100644 index 00000000..28a4b38e --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_info_macos.cc @@ -0,0 +1,72 @@ +#include "otel_process_info.h" +#include +#include +#include + +namespace Prometheus +{ + static std::unique_ptr kinfo_getproc(pid_t pid) + { + int mib[4]; + size_t len; + + len = 0; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = pid; + if (sysctl(mib, 4, NULL, &len, NULL, 0) < 0) + { + throw ProcessInfoException(); + } + std::unique_ptr + kipp {reinterpret_cast(new char[len])}; + + if (sysctl(mib, 4, kipp.get(), &len, NULL, 0) < 0) + { + throw ProcessInfoException(); + } + if (len != sizeof(*kipp)) + { + throw ProcessInfoException(); + } + + return kipp; + } + + static struct proc_taskinfo proc_pidtaskinfo(pid_t pid) + { + struct proc_taskinfo pti; + if (PROC_PIDTASKINFO_SIZE == proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pti, PROC_PIDTASKINFO_SIZE)) + { + return pti; + } + else + { + throw ProcessInfoException(); + } + } + + int ProcessInfo::get_fds_total() + { + int total_size = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, 0, 0); + if (total_size < 0) + { + throw ProcessInfoException(); + } + + return total_size / PROC_PIDLISTFD_SIZE; + } + + void ProcessInfo::set_proc_stat() + { + auto proc = kinfo_getproc(pid); + const auto &pti = proc_pidtaskinfo(pid); + + start_time_seconds = proc->kp_proc.p_starttime.tv_sec; + uptime_seconds = now - start_time_seconds; + threads_total = pti.pti_threadnum; + vm_bytes = pti.pti_virtual_size; + rm_bytes = pti.pti_resident_size; + } +} diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_info_mt.cc b/instrumentation/opentelemetry_process/c_src/otel_process_info_mt.cc new file mode 100644 index 00000000..eae7bc11 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_info_mt.cc @@ -0,0 +1,32 @@ +#include +#include +#include + +#include "otel_process_info.h" + +int main(int argc, char **argv) +{ + + if (argc != 2) + { + // printf("Usage: %s \n", argv[0]); + return 1; + } + + char *dummy; + unsigned long iterations = strtoul(argv[1], &dummy, 10); + + if (*dummy != '\0') + { + // printf("Iterations must be an integer\n"); + return 1; + }; + + while (iterations--) + { + auto process_info = std::unique_ptr(new Prometheus::ProcessInfo()); + std::cout << process_info->threads_total << std::endl; + } + + return 0; +} diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_metrics.h b/instrumentation/opentelemetry_process/c_src/otel_process_metrics.h new file mode 100644 index 00000000..541a4675 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_metrics.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include + +#if defined(__FreeBSD__) || defined(__APPLE__) +#include +#include +#endif + +#ifdef __linux__ +#include +#include +#include +#include +#include +#endif + +#ifdef __APPLE__ +#include +#include +#endif + +#define UNUSED(x) (void)(x) + +struct otel_process_info +{ + int fds_total; + rlim_t fds_limit; + time_t start_time_seconds; + long uptime_seconds; + int threads_total; + unsigned long vm_bytes; + unsigned long rm_bytes; + double utime_seconds; + double stime_seconds; + long max_rm_bytes; + long noio_pagefaults_total; + long io_pagefaults_total; + long swaps_total; + long disk_reads_total; + long disk_writes_total; + long signals_delivered_total; + long voluntary_context_switches_total; + long involuntary_context_switches_total; +}; + +#ifdef __linux__ + +struct kinfo_proc +{ + struct timeval ki_start; /* starting time */ + int ki_numthreads; /* XXXKSE number of threads in total */ + unsigned long ki_size; /* virtual size */ + unsigned long ki_rssize; /* resident size in pages */ + struct rusage ki_rusage; /* process rusage statistics */ +}; + +#endif + +#define PROCESS_INFO_COUNT 18 + +int fill_otel_process_info(pid_t pid, struct otel_process_info *otel_process_info); \ No newline at end of file diff --git a/instrumentation/opentelemetry_process/c_src/otel_process_metrics_nif.cc b/instrumentation/opentelemetry_process/c_src/otel_process_metrics_nif.cc new file mode 100644 index 00000000..b6501126 --- /dev/null +++ b/instrumentation/opentelemetry_process/c_src/otel_process_metrics_nif.cc @@ -0,0 +1,109 @@ +#include "erl_nif.h" +#include + +#include "otel_process_metrics.h" +#include "otel_process_info.h" + +namespace Prometheus +{ + +#define MAXBUFLEN 1024 + + static ERL_NIF_TERM ATOM_OK; + static ERL_NIF_TERM ATOM_ERROR; + static ERL_NIF_TERM ATOM_PROCESS_OPEN_FDS; + static ERL_NIF_TERM ATOM_PROCESS_MAX_FDS; + static ERL_NIF_TERM ATOM_PROCESS_START_TIME_SECONDS; + static ERL_NIF_TERM ATOM_PROCESS_UPTIME_SECONDS; + static ERL_NIF_TERM ATOM_PROCESS_THREADS_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_VIRTUAL_MEMORY_BYTES; + static ERL_NIF_TERM ATOM_PROCESS_RESIDENT_MEMORY_BYTES; + static ERL_NIF_TERM ATOM_PROCESS_UTIME_SECONDS; + static ERL_NIF_TERM ATOM_PROCESS_STIME_SECONDS; + static ERL_NIF_TERM ATOM_PROCESS_MAX_RESIDENT_MEMORY_BYTES; + static ERL_NIF_TERM ATOM_PROCESS_NOIO_PAGEFAULTS_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_IO_PAGEFAULTS_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_SWAPS_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_DISK_READS_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_DISK_WRITES_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_SIGNALS_DELIVERED_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_VOLUNTARY_CONTEXT_SWITCHES_TOTAL; + static ERL_NIF_TERM ATOM_PROCESS_INVOLUNTARY_CONTEXT_SWITCHES_TOTAL; + + static ERL_NIF_TERM process_info_plist[PROCESS_INFO_COUNT]; + +/* ERL_NIF_TERM */ +/* mk_error(ErlNifEnv* env, const char* mesg) */ +/* { */ +/* return enif_make_tuple2(env, ATOM_ERROR, mk_atom(env, mesg)); */ +/* } */ + + static ERL_NIF_TERM get_process_info(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) + { + ProcessInfo process_info; + + process_info_plist[0] = enif_make_tuple2(env, ATOM_PROCESS_OPEN_FDS, enif_make_int(env, process_info.fds_total)); + process_info_plist[1] = enif_make_tuple2(env, ATOM_PROCESS_MAX_FDS, enif_make_int(env, process_info.fds_limit)); + process_info_plist[2] = enif_make_tuple2(env, ATOM_PROCESS_START_TIME_SECONDS, enif_make_long(env, process_info.start_time_seconds)); + process_info_plist[3] = enif_make_tuple2(env, ATOM_PROCESS_UPTIME_SECONDS, enif_make_long(env, process_info.uptime_seconds)); + process_info_plist[4] = enif_make_tuple2(env, ATOM_PROCESS_THREADS_TOTAL, enif_make_int(env, process_info.threads_total)); + process_info_plist[5] = enif_make_tuple2(env, ATOM_PROCESS_VIRTUAL_MEMORY_BYTES, enif_make_ulong(env, process_info.vm_bytes)); + process_info_plist[6] = enif_make_tuple2(env, ATOM_PROCESS_RESIDENT_MEMORY_BYTES, enif_make_ulong(env, process_info.rm_bytes)); + process_info_plist[7] = enif_make_tuple2(env, ATOM_PROCESS_UTIME_SECONDS, enif_make_double(env, process_info.utime_seconds)); + process_info_plist[8] = enif_make_tuple2(env, ATOM_PROCESS_STIME_SECONDS, enif_make_double(env, process_info.stime_seconds)); + process_info_plist[9] = enif_make_tuple2(env, ATOM_PROCESS_MAX_RESIDENT_MEMORY_BYTES, enif_make_long(env, process_info.max_rm_bytes)); + process_info_plist[10] = enif_make_tuple2(env, ATOM_PROCESS_NOIO_PAGEFAULTS_TOTAL, enif_make_long(env, process_info.noio_pagefaults_total)); + process_info_plist[11] = enif_make_tuple2(env, ATOM_PROCESS_IO_PAGEFAULTS_TOTAL, enif_make_long(env, process_info.io_pagefaults_total)); + process_info_plist[12] = enif_make_tuple2(env, ATOM_PROCESS_SWAPS_TOTAL, enif_make_long(env, process_info.swaps_total)); + process_info_plist[13] = enif_make_tuple2(env, ATOM_PROCESS_DISK_READS_TOTAL, enif_make_long(env, process_info.disk_reads_total)); + process_info_plist[14] = enif_make_tuple2(env, ATOM_PROCESS_DISK_WRITES_TOTAL, enif_make_long(env, process_info.disk_writes_total)); + process_info_plist[15] = enif_make_tuple2(env, ATOM_PROCESS_SIGNALS_DELIVERED_TOTAL, enif_make_long(env, process_info.signals_delivered_total)); + process_info_plist[16] = enif_make_tuple2(env, ATOM_PROCESS_VOLUNTARY_CONTEXT_SWITCHES_TOTAL, enif_make_long(env, process_info.voluntary_context_switches_total)); + process_info_plist[17] = enif_make_tuple2(env, ATOM_PROCESS_INVOLUNTARY_CONTEXT_SWITCHES_TOTAL, enif_make_long(env, process_info.involuntary_context_switches_total)); + + return enif_make_list_from_array(env, process_info_plist, 18); + } +} + +static int on_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) +{ + +#define ATOM(Id, Value) \ + { \ + Id = enif_make_atom(env, Value); \ + } + // initialize the atoms + using namespace Prometheus; + + ATOM(ATOM_OK, "ok"); + ATOM(ATOM_ERROR, "error"); + ATOM(ATOM_PROCESS_OPEN_FDS, "process_open_fds"); + ATOM(ATOM_PROCESS_MAX_FDS, "process_max_fds"); + ATOM(ATOM_PROCESS_START_TIME_SECONDS, "process_start_time_seconds"); + ATOM(ATOM_PROCESS_UPTIME_SECONDS, "process_uptime_seconds"); + ATOM(ATOM_PROCESS_THREADS_TOTAL, "process_threads_total"); + ATOM(ATOM_PROCESS_VIRTUAL_MEMORY_BYTES, "process_virtual_memory_bytes"); + ATOM(ATOM_PROCESS_RESIDENT_MEMORY_BYTES, "process_resident_memory_bytes"); + ATOM(ATOM_PROCESS_UTIME_SECONDS, "process_utime_seconds"); + ATOM(ATOM_PROCESS_STIME_SECONDS, "process_stime_seconds"); + ATOM(ATOM_PROCESS_MAX_RESIDENT_MEMORY_BYTES, "process_max_resident_memory_bytes"); + ATOM(ATOM_PROCESS_NOIO_PAGEFAULTS_TOTAL, "process_noio_pagefaults_total"); + ATOM(ATOM_PROCESS_IO_PAGEFAULTS_TOTAL, "process_io_pagefaults_total"); + ATOM(ATOM_PROCESS_SWAPS_TOTAL, "process_swaps_total"); + ATOM(ATOM_PROCESS_DISK_READS_TOTAL, "process_disk_reads_total"); + ATOM(ATOM_PROCESS_DISK_WRITES_TOTAL, "process_disk_writes_total"); + ATOM(ATOM_PROCESS_SIGNALS_DELIVERED_TOTAL, "process_signals_delivered_total"); + ATOM(ATOM_PROCESS_VOLUNTARY_CONTEXT_SWITCHES_TOTAL, "process_voluntary_context_switches_total"); + ATOM(ATOM_PROCESS_INVOLUNTARY_CONTEXT_SWITCHES_TOTAL, "process_involuntary_context_switches_total"); +#undef ATOM + + return 0; +} + +static ErlNifFunc nif_funcs[] = +{ + {"get_process_info", 0, Prometheus::get_process_info}}; + +extern "C" { + ERL_NIF_INIT(otel_process_metrics, nif_funcs, &on_load, NULL, NULL, NULL); +} diff --git a/instrumentation/opentelemetry_process/rebar.config b/instrumentation/opentelemetry_process/rebar.config new file mode 100644 index 00000000..962964db --- /dev/null +++ b/instrumentation/opentelemetry_process/rebar.config @@ -0,0 +1,16 @@ +%%-*-Erlang-*- +{erl_opts, [debug_info]}. +{deps, [ + {opentelemetry_api, "~> 1.3.0"}, + {opentelemetry_api_experimental, "~> 0.5.1"} + ]}. + +{pre_hooks, + [{"linux|darwin", compile, "make -C c_src"}, + {"freebsd", compile, "gmake -C c_src"}]}. +{post_hooks, + [{"linux|darwin", clean, "make -C c_src clean"}, + {"freebsd", clean, "gmake -C c_src clean"}]}. + +{plugins, [{rebar3_archive_plugin, "0.0.2"}, + rebar3_hex, rebar3_fmt]}. diff --git a/instrumentation/opentelemetry_process/src/opentelemetry_process.app.src b/instrumentation/opentelemetry_process/src/opentelemetry_process.app.src new file mode 100644 index 00000000..922ffccb --- /dev/null +++ b/instrumentation/opentelemetry_process/src/opentelemetry_process.app.src @@ -0,0 +1,16 @@ +{application, opentelemetry_process, + [{description, "OpenTelemetry process metrics " + "Exposes the current state of process metrics including cpu, memory, " + "file descriptor usage and native threads count as well as the process start and up times."}, + {vsn, semver}, + {registered, []}, + {applications, + [kernel, + stdlib + ]}, + {env,[]}, + {modules, []}, + + {licenses, ["MIT"]}, + {links, []} + ]}. diff --git a/instrumentation/opentelemetry_process/src/opentelemetry_process_metrics.erl b/instrumentation/opentelemetry_process/src/opentelemetry_process_metrics.erl new file mode 100644 index 00000000..5e801a65 --- /dev/null +++ b/instrumentation/opentelemetry_process/src/opentelemetry_process_metrics.erl @@ -0,0 +1,167 @@ +%% Copyright 2024, Travelping GmbH +%% +%% C/C++ code was taken from the prometheus_process_collector project which is covered by the +%% MIT License and Copyright (c) 2016, Ilya Khaprov . +%% + +-module(opentelemetry_process_metrics). + +-on_load(init/0). + +-export([setup/0, setup/1]). +-ignore_xref([setup/0, setup/1]). + +-export([process_metrics/1]). +-nifs([get_process_info/0]). + +-include_lib("opentelemetry_api_experimental/include/otel_meter.hrl"). + +-define(APPNAME, opentelemetry_process). +-define(LIBNAME, opentelemetry_process). + +%%%=================================================================== +%%% API +%%%=================================================================== + +setup() -> + setup(?current_meter). + +setup(Meter) -> + Instr = + [ + %% OTel process metrics defined by semantic conventions + + otel_meter:create_observable_counter( + Meter, 'process.cpu.time', + #{description => <<"Total CPU seconds broken down by different states.">>, + unit => 's'}), + otel_meter:create_observable_updowncounter( + Meter, 'process.memory.usage', + #{description => <<"The amount of physical memory in use.">>, + unit => 'By'}), + otel_meter:create_observable_updowncounter( + Meter, 'process.memory.virtual', + #{description => <<"The amount of committed virtual memory.">>, + unit => 'By'}), + otel_meter:create_observable_counter( + Meter, 'process.disk.io', + #{description => <<"Disk bytes transferred.">>, + unit => 'By'}), + otel_meter:create_observable_updowncounter( + Meter, 'process.thread.count', + #{description => <<"Process threads count.">>, + unit => '{thread}'}), + otel_meter:create_observable_updowncounter( + Meter, 'process.open_file_descriptor.count', + #{description => <<"Number of file descriptors in use by the process.">>, + unit => '{count}'}), + otel_meter:create_observable_counter( + Meter, 'process.context_switches', + #{description => <<"Number of times the process has been context switched.">>, + unit => '{count}'}), + otel_meter:create_observable_counter( + Meter, 'process.paging.faults', + #{description => <<"Number of times the process has been context switched.">>, + unit => '{fault}'}), + otel_meter:create_observable_gauge( + Meter, 'process.uptime', + #{description => <<"The time the process has been running.">>, + unit => 's'}), + + %% Extended process metrics + + otel_meter:create_observable_gauge( + Meter, 'process.open_file_descriptor.limit', + #{description => <<"Maximum number of open file descriptors.">>, + unit => '{count}'}), + otel_meter:create_observable_gauge( + Meter, 'process.start_time', + #{description => <<"Start time of the process since unix epoch in seconds.">>, + unit => 's'}), + otel_meter:create_observable_gauge( + Meter, 'process.memory.usage.max', + #{description => <<"Maximum resident set size used.">>, + unit => 'By'}), + otel_meter:create_observable_counter( + Meter, 'process.swap.count', + #{description => <<"Number of times a process was \"swapped\" out of main memory.">>, + unit => '{count}'}), + otel_meter:create_observable_counter( + Meter, 'process.signal.delivered.count', + #{description => <<"Number of signals delivered.">>, + unit => '{count}'}) + ], + otel_meter:register_callback(Meter, Instr, fun process_metrics/1, []), + ok. + +%%%=================================================================== +%%% NIF stubs +%%%=================================================================== + +init() -> + SoName = + case code:priv_dir(?APPNAME) of + {error, bad_name} -> + case filelib:is_dir(filename:join(["..", priv])) of + true -> + filename:join(["..", priv, ?LIBNAME]); + _ -> + filename:join([priv, ?LIBNAME]) + end; + Dir -> + filename:join(Dir, ?LIBNAME) + end, + erlang:load_nif(SoName, 0). + +-define(nif_stub,nif_stub_error(?LINE)). +nif_stub_error(Line) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,Line}). + +get_process_info() -> ?nif_stub. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +-define(gv(Key, PropList), proplists:get_value(Key, PropList)). + +process_metrics(_) -> + Data = get_process_info(), + + [ + {'process.cpu.time', + [{?gv(process_utime_seconds, Data), #{'cpu.mode' => user}}, + {?gv(process_stime_seconds, Data), #{'cpu.mode' => system}}]}, + {'process.memory.usage', + [{?gv(process_resident_memory_bytes, Data), #{}}]}, + {'process.memory.virtual', + [{?gv(process_virtual_memory_bytes, Data), #{}}]}, + {'process.disk.io', + [{?gv(process_disk_reads_total, Data), #{'disk.io.direction' => 'read'}}, + {?gv(process_disk_writes_total, Data), #{'disk.io.direction' => 'write'}}]}, + {'process.thread.count', + [{?gv(process_threads_total, Data), #{}}]}, + {'process.open_file_descriptor.count', + [{?gv(process_open_fds, Data), #{}}]}, + {'process.context_switches', + [{?gv(process_voluntary_context_switches_total, Data), + #{'process.context_switch_type' => 'voluntary'}}, + {?gv(process_involuntary_context_switches_total, Data), + #{'process.context_switch_type' => 'involuntary'}}]}, + {'process.paging.faults', + [{?gv(process_noio_pagefaults_total, Data), #{'process.paging.fault_type' => 'minor'}}, + {?gv(process_io_pagefaults_total, Data), #{'process.paging.fault_type' => 'major'}}]}, + {'process.uptime', + [{?gv(process_uptime_seconds, Data), #{}}]}, + + {'process.open_file_descriptor.limit', + [{?gv(process_max_fds, Data), #{}}]}, + {'process.start_time', + [{?gv(process_start_time_seconds, Data), #{}}]}, + {'process.memory.usage.max', + [{?gv(process_max_resident_memory_bytes, Data), #{}}]}, + {'process.swap.count', + [{?gv(process_swaps_total, Data), #{}}]}, + {'process.signal.delivered.count', + [{?gv(process_signals_delivered_total, Data), #{}}]} + ].