diff --git a/fw/mmap_buffer.c b/fw/mmap_buffer.c
new file mode 100644
index 000000000..ccbcb036d
--- /dev/null
+++ b/fw/mmap_buffer.c
@@ -0,0 +1,303 @@
+/**
+ *		Tempesta FW
+ *
+ * Handling ring buffers is_ready to user space.
+ *
+ * Copyright (C) 2024 Tempesta Technologies, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "mmap_buffer.h"
+#include "lib/str.h"
+#include <linux/types.h>
+#include <linux/log2.h>
+#include <linux/cpumask.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <asm/page.h>
+
+/*
+ * We can't pass TfwMmapBufferHolder pointer to the file operations handlers.
+ * Let's store these pointers, and find them by filenames in the open handler.
+ */
+#define MAX_HOLDERS 4
+static TfwMmapBufferHolder *holders[MAX_HOLDERS];
+static int holders_cnt;
+
+static int dev_file_open(struct inode *ino, struct file *filp);
+static int dev_file_close(struct inode *ino, struct file *filp);
+static int dev_file_mmap(struct file *filp, struct vm_area_struct *vma);
+static void dev_file_vm_close(struct vm_area_struct *vma);
+
+static const struct file_operations dev_fops = {
+	.open  = dev_file_open,
+	.release  = dev_file_close,
+	.mmap  = dev_file_mmap,
+};
+
+static const struct vm_operations_struct dev_vm_ops = {
+	.close = dev_file_vm_close
+};
+
+void
+tfw_mmap_buffer_get_room(TfwMmapBufferHolder *holder,
+						 char **part1, unsigned int *size1,
+						 char **part2, unsigned int *size2)
+{
+	TfwMmapBuffer *buf = *this_cpu_ptr(holder->buf);
+	u64 head, tail;
+
+	*size2 = 0;
+
+	if (!atomic_read(&buf->is_ready)) {
+		*size1 = 0;
+		return;
+	}
+
+	head = buf->head % buf->size;
+	tail = smp_load_acquire(&buf->tail) % buf->size;
+
+	*part1 = buf->data + head;
+
+	if (head < tail) {
+		*size1 = tail - head - 1;
+		return;
+	}
+
+	if (unlikely(head == 0)) {
+		*size1 = buf->size - 1;
+	} else {
+		*size1 = buf->size - head;
+		*part2 = buf->data;
+		*size2 = tail - 1;
+	}
+}
+
+void
+tfw_mmap_buffer_commit(TfwMmapBufferHolder *holder, unsigned int size)
+{
+	TfwMmapBuffer *buf = *this_cpu_ptr(holder->buf);
+
+	smp_store_release(&buf->head, buf->head + size);
+}
+
+static int
+dev_file_open(struct inode *ino, struct file *filp)
+{
+	TfwMmapBufferHolder *holder;
+	int i;
+
+	for (i = 0; i < holders_cnt; ++i) {
+		if (strcmp(holders[i]->dev_name, (char *)filp->f_path.dentry->d_iname) == 0) {
+			holder = holders[i];
+			goto found;
+		}
+	}
+
+	return -EINVAL;
+
+found:
+	if (atomic_read(&holder->is_freeing))
+		return -ENOENT;
+	if (atomic_read(&holder->dev_is_opened))
+		return -EBUSY;
+
+	atomic_set(&holder->dev_is_opened, 1);
+	filp->private_data = holder;
+
+	return 0;
+}
+
+static int
+dev_file_close(struct inode *ino, struct file *filp)
+{
+	TfwMmapBufferHolder *holder = filp->private_data;
+
+	atomic_set(&holder->dev_is_opened, 0);
+	return 0;
+}
+
+/*
+ * This function handles the mapping of ring buffers into user space. Each
+ * buffer should be mapped by user space with an offset calculated as
+ * full_buffer_size * cpu_num, where full_buffer_size is the size of buffer data
+ * plus TFW_MMAP_BUFFER_DATA_OFFSET; cpu_num is a number of CPU in a row. This
+ * allows determining which CPU's buffer should be mapped based on the offset.
+ */
+static int
+dev_file_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	TfwMmapBufferHolder *holder = filp->private_data;
+	TfwMmapBuffer *buf, *this_buf = *this_cpu_ptr(holder->buf);
+	unsigned long pfn, size, buf_size, buf_pages;
+	int cpu_num, cpu_id;
+
+	buf_size = TFW_MMAP_BUFFER_FULL_SIZE(this_buf->size);
+	size = vma->vm_end - vma->vm_start;
+	if (size > buf_size)
+		return -EINVAL;
+
+	buf_pages = buf_size / PAGE_SIZE;
+
+#define NTH_ONLINE_CPU(n) ({ \
+	int cpu, res = -1, i = 0; \
+	for_each_online_cpu(cpu) { \
+		if (i == n) { \
+			res = cpu; \
+			break; \
+		} \
+		++i; \
+	} \
+	res; \
+})
+
+	cpu_num = vma->vm_pgoff / buf_pages;
+	cpu_id = NTH_ONLINE_CPU(cpu_num);
+	if (cpu_id < 0)
+		return -EINVAL;
+
+	buf = *per_cpu_ptr(holder->buf, cpu_id);
+	pfn = page_to_pfn(virt_to_page(buf));
+
+	if (remap_pfn_range(vma, vma->vm_start, pfn, size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	vma->vm_ops = &dev_vm_ops;
+	(void)dev_vm_ops;
+
+	if (size == buf_size)
+		atomic_set(&buf->is_ready, 1);
+
+	return 0;
+
+#undef NTH_ONLINE_CPU
+}
+
+static void dev_file_vm_close(struct vm_area_struct *vma)
+{
+	TfwMmapBufferHolder *holder = vma->vm_file->private_data;
+	TfwMmapBuffer *buf = *this_cpu_ptr(holder->buf);
+
+	atomic_set(&buf->is_ready, 0);
+}
+
+TfwMmapBufferHolder *
+tfw_mmap_buffer_create(const char *filename, unsigned int size)
+{
+	TfwMmapBufferHolder *holder;
+	unsigned int order;
+	int cpu;
+
+	if (size < TFW_MMAP_BUFFER_MIN_SIZE
+		|| size > TFW_MMAP_BUFFER_MAX_SIZE
+		|| !is_power_of_2(size))
+		return NULL;
+
+	if (filename && strlen(filename) >= TFW_MMAP_BUFFER_MAX_NAME_LEN - 1)
+		return NULL;
+
+	holder = kmalloc(sizeof(TfwMmapBufferHolder) +
+					 sizeof(struct page *) * num_online_cpus(),
+					 GFP_KERNEL);
+	if (!holder)
+		return NULL;
+
+	order = get_order(size);
+
+	holder->dev_major = -1;
+	holder->buf = __alloc_percpu_gfp(sizeof(TfwMmapBuffer *),
+									 sizeof(u64), GFP_KERNEL);
+	atomic_set(&holder->dev_is_opened, 0);
+	atomic_set(&holder->is_freeing, 0);
+
+	for_each_online_cpu(cpu) {
+		TfwMmapBuffer *buf, **bufp;
+
+		holder->pg[cpu] = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, order);
+		if (holder->pg[cpu] == NULL)
+			goto err;
+
+		buf = (TfwMmapBuffer *)page_address(holder->pg[cpu]);
+		buf->size = TFW_MMAP_BUFFER_DATA_SIZE(size);
+		buf->head = 0;
+		buf->tail = 0;
+		buf->cpu = cpu;
+		bufp = per_cpu_ptr(holder->buf, cpu);
+		*bufp = buf;
+		atomic_set(&buf->is_ready, 0);
+	}
+
+	if (filename) { /* do not create the file in unit tests */
+		holder->dev_major = register_chrdev(0, filename, &dev_fops);
+		if (holder->dev_major < 0) {
+			T_WARN("Registering char device failed for %s\n", filename);
+			goto err;
+		}
+
+		holder->dev_class = class_create(THIS_MODULE, filename);
+		device_create(holder->dev_class, NULL,
+					  MKDEV(holder->dev_major, 0), NULL, filename);
+		strscpy(holder->dev_name, filename, sizeof(holder->dev_name));
+		holders[holders_cnt++] = holder;
+	}
+
+	return holder;
+
+err:
+	tfw_mmap_buffer_free(holder);
+
+	return NULL;
+}
+
+void
+tfw_mmap_buffer_free(TfwMmapBufferHolder *holder)
+{
+	int cpu;
+
+	if (!holder)
+		return;
+
+	atomic_set(&holder->is_freeing, 1);
+
+	for_each_online_cpu(cpu) {
+		TfwMmapBuffer *buf = *per_cpu_ptr(holder->buf, cpu);
+		/* Notify user space that it have to close the file */
+		atomic_set(&buf->is_ready, 0);
+	}
+
+	/* Wait till user space closes the file */
+	while (atomic_read(&holder->dev_is_opened))
+		schedule();
+
+	for_each_online_cpu(cpu) {
+		TfwMmapBuffer *buf = *per_cpu_ptr(holder->buf, cpu);
+
+		if (holder->pg[cpu]) {
+			__free_pages(holder->pg[cpu],
+						 get_order(TFW_MMAP_BUFFER_FULL_SIZE(buf->size)));
+			holder->pg[cpu] = NULL;
+		}
+	}
+
+	if (holder->dev_major > 0) {
+		device_destroy(holder->dev_class, MKDEV(holder->dev_major, 0));
+		class_destroy(holder->dev_class);
+		unregister_chrdev(holder->dev_major, holder->dev_name);
+	}
+
+	kfree(holder);
+}
diff --git a/fw/mmap_buffer.h b/fw/mmap_buffer.h
new file mode 100644
index 000000000..83df73e3e
--- /dev/null
+++ b/fw/mmap_buffer.h
@@ -0,0 +1,175 @@
+/**
+ *		Tempesta FW
+ *
+ * Tempesta ring buffers mmaped to user space.
+ * The overall concept behind is to implement a highly efficient, lock-free
+ * data transfer mechanism between the kernel and user space using per-CPU
+ * ring buffers. These buffers allow each CPU to handle its own data stream
+ * independently, minimizing contention and avoiding the overhead of
+ * traditional system calls or copying data between kernel and user space.
+ *
+ * Each CPU has its own ring buffer that is memory-mapped into user space.
+ * This design allows CPU-specific user-space threads to read data directly
+ * from the buffer without any need for synchronization with other CPUs. It
+ * reduces the complexity and overhead associated with locks or atomic
+ * operations across multiple CPUs.
+ *
+ * The communication between the kernel and user space is lockless. The kernel
+ * manages the write pointer (head), while user space manages the read pointer
+ * (tail). Each side only modifies its own pointer, preventing race conditions
+ * and eliminating the need for locking mechanisms.
+ *
+ * Since the ring buffers are memory-mapped into user space, data does not need
+ * to be copied between the kernel and user space. Instead, user space threads
+ * can directly access the data in the kernel’s memory, greatly improving
+ * performance by avoiding the overhead of traditional system calls and memory
+ * copying.
+ *
+ * Motivation for not using existing kernel ring buffers
+ *
+ * While the Linux kernel provides several ring buffer implementations, none of
+ * them are a perfect fit for our current use case. Below is an overview of the
+ * existing ring buffers, along with the reasons they were not chosen for our
+ * task:
+ *  * relay (relayfs):
+ *      We need to handle records of varying length, which makes determining
+ *      the subbuffer size inefficient. Additionally, both relay_reserve() and
+ *      relay_write() require the length of data to be known in advance, which
+ *      would force us to traverse the data twice. Furthermore, while relay
+ *      provides a sleeping mechanism (allowing user-space to use poll()),
+ *      kernel-side sleeping cannot be used in softirq context, which is a
+ *      limitation for our needs.
+ *
+ *  * New generic ring buffer (still unmerged):
+ *      This implementation also involves sleepable functions, making it
+ *      incompatible with the softirq context. Moreover, it does not natively
+ *      support per-CPU mode, which would require us to manually implement this
+ *      functionality.
+ *
+ *  * perf ring buffer:
+ *      This implementation also involves sleepable functions, making it
+ *      incompatible with the softirq context. Also it looks hard to decouple
+ *      necessary functionality from perf-specific mechanisms.
+ *
+ *  * io_uring:
+ *      While very capable, io_uring introduces additional overhead with its SQ
+ *      and CQ mechanisms, which are not needed for our simpler use case. Our
+ *      goal is to minimize complexity, and io_uring adds unnecessary layers of
+ *      interaction.
+ *
+ *  * packet_ring_buffer (used in packet mmap):
+ *      This buffer is specifically optimized for page-sized network frames and
+ *      is not designed for the generic transmission of smaller,
+ *      variable-length records. Our use case requires handling multiple
+ *      records per page, making this ring buffer inefficient.
+ *
+ *  * tracefs ring buffer:
+ *      Like the packet_ring_buffer, this buffer is primarily designed for
+ *      page-level operations.
+ *
+ *  * BPF ring buffer
+ *      Involves sleepable functions, making it incompatible with the softirq
+ *      context. Also, bpf_ringbuf_reserve() requires the length of data to be
+ *      known in advance, which would force us to traverse the data twice.
+ *
+ * Copyright (C) 2024 Tempesta Technologies, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#ifndef __TFW_MMAP_BUFFER_H__
+#define __TFW_MMAP_BUFFER_H__
+
+#ifdef __KERNEL__
+
+#include "str.h"
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+
+#else /* __KERNEL__ */
+
+#include <stdint.h>
+
+#define u32 uint32_t
+#define u64 uint64_t
+
+#endif /* __KERNEL__ */
+
+#define TFW_MMAP_BUFFER_DATA_OFFSET 32
+#define TFW_MMAP_BUFFER_MIN_SIZE    PAGE_SIZE
+#define TFW_MMAP_BUFFER_MAX_SIZE    (PAGE_SIZE * 4096)
+
+#define TFW_MMAP_BUFFER_MAX_NAME_LEN 32
+
+#define TFW_MMAP_BUFFER_DATA_SIZE(size) (size - TFW_MMAP_BUFFER_DATA_OFFSET)
+#define TFW_MMAP_BUFFER_FULL_SIZE(size) (size + TFW_MMAP_BUFFER_DATA_OFFSET)
+
+typedef struct {
+	u64 head;    /* head offset where the next data write will happen */
+	u64 tail;    /* tail offset where the next data read will happen */
+	u32 size;    /* size of the ring buffer data in bytes */
+	u32 cpu;     /* ID of the CPU tied to this buffer */
+#ifdef __KERNEL__
+	/* is_ready indicates that the buffer is mapped to user space and ready both
+	 * for writing and reading. Resetting this field signals to user space that it
+	 * should stop reading, unmap and close the file.
+	 */
+	atomic_t is_ready;
+#else
+	int is_ready;
+#endif
+	char __attribute__((aligned(TFW_MMAP_BUFFER_DATA_OFFSET))) data[];
+} TfwMmapBuffer;
+
+#ifdef __KERNEL__
+
+typedef struct {
+	TfwMmapBuffer __percpu **buf;
+	char dev_name[TFW_MMAP_BUFFER_MAX_NAME_LEN];
+	atomic_t dev_is_opened;
+	/* is_freeing indicates that freeing process started. It's necessary to
+	 * exclude repeated file opening.
+	 */
+	atomic_t is_freeing;
+	int dev_major;
+	struct class *dev_class;
+	struct page *pg[];
+} TfwMmapBufferHolder;
+
+/*
+ * The function 'tfw_mmap_buffer_get_room()' returns pointers and sizes to one
+ * or two contiguous memory regions available for writing in the buffer. The
+ * caller should write data to the first segment (part1), then to the second
+ * segment (part2). Internal state of the buffer (i.e., head or tail positions)
+ * is not modified at this time. As a result, the writing process can be
+ * interrupted at any time, and this function can be called again to request
+ * space for another element without affecting previous calls.
+ *
+ * Once the data has been successfully written, 'tfw_mmap_buffer_commit()' must
+ * be called, passing the actual size of the written data. This function
+ * updates the buffer's internal state to reflect the new data and make the
+ * written space unavailable for further writing.
+ */
+void tfw_mmap_buffer_get_room(TfwMmapBufferHolder *holder,
+							  char **part1, unsigned int *size1,
+							  char **part2, unsigned int *size2);
+void tfw_mmap_buffer_commit(TfwMmapBufferHolder *holder, unsigned int size);
+TfwMmapBufferHolder *tfw_mmap_buffer_create(const char *filename,
+											unsigned int size);
+void tfw_mmap_buffer_free(TfwMmapBufferHolder *holder);
+
+#endif /* __KERNEL__ */
+
+#endif /* __TFW_MMAP_BUFFER_H__ */